gob payloads and add list processing
This commit is contained in:
parent
72c5c9e85d
commit
6e059224fd
4 changed files with 158 additions and 77 deletions
36
README.md
36
README.md
|
@ -2,16 +2,12 @@
|
||||||
|
|
||||||
Convert Markdown to Gemini [gemtext](https://gemini.circumlunar.space/docs/gemtext.gmi) markup with
|
Convert Markdown to Gemini [gemtext](https://gemini.circumlunar.space/docs/gemtext.gmi) markup with
|
||||||
Go. Working with streams and pipes for UNIX like behavior utilizing Go channels. Processing streams
|
Go. Working with streams and pipes for UNIX like behavior utilizing Go channels. Processing streams
|
||||||
line by line is deliberately slightly more challenging than it needs to be to play around with go
|
line by line is slightly more complex than it needs to be as I'm playing with channels and state
|
||||||
state machines.
|
machines here.
|
||||||
|
|
||||||
<!-- testing markdown, this should be deleted, below merged -->
|
Internally md2gmi does a 1st pass that constructs the blocks of single lines for gemtext. This is
|
||||||
See the [gemini
|
then streamed to the 2nd pass line by line. The 2nd pass will convert links, fix headings and stream
|
||||||
protocol](https://gemini.circumlunar.space/) and the [protocol
|
line by line to the output sink. The sink is either a file or stdout.
|
||||||
spec](https://gemini.circumlunar.space/docs/specification.gmi).
|
|
||||||
|
|
||||||
Internally md2gmi does a 1st pass that constructs the core layout for gemtext. This is then streamed
|
|
||||||
to the 2nd pass line by line. The 2nd pass will convert links and stream line by line to the output.
|
|
||||||
|
|
||||||
### Usage
|
### Usage
|
||||||
|
|
||||||
|
@ -29,19 +25,31 @@ Usage of ./md2gmi:
|
||||||
cat file.md | md2gmi
|
cat file.md | md2gmi
|
||||||
md2gmi -in file.md -out file.gmi
|
md2gmi -in file.md -out file.gmi
|
||||||
|
|
||||||
The top part of this readme parses to
|
The top part of this readme parses from
|
||||||
|
|
||||||
```md
|
```md
|
||||||
## md2gmi
|
Convert Markdown to Gemini [gemtext](https://gemini.circumlunar.space/docs/gemtext.gmi) markup with
|
||||||
|
Go. Working with streams and pipes for UNIX like behavior utilizing Go channels. Processing streams
|
||||||
|
line by line is slightly more complex than it needs to be as I'm playing with channels and state
|
||||||
|
machines here.
|
||||||
|
|
||||||
Convert Markdown to Gemini gemtext[1] markup with Go. Working with streams and pipes for UNIX like behavior utilizing Go channels. Processing streams line by line is deliberately slightly more challenging than it needs to be to play around with go state machines.
|
> this is
|
||||||
|
a quote
|
||||||
|
|
||||||
|
<!-- testing markdown, this should be deleted, below merged -->
|
||||||
|
See the [gemini
|
||||||
|
protocol](https://gemini.circumlunar.space/) and the [protocol
|
||||||
|
spec](https://gemini.circumlunar.space/docs/specification.gmi).
|
||||||
|
```
|
||||||
|
|
||||||
|
```md
|
||||||
|
Convert Markdown to Gemini gemtext[1] markup with Go. Working with streams and pipes for UNIX like behavior utilizing Go channels. Processing streams line by line is slightly more complex than it needs to be as I'm playing with channels and state machines here.
|
||||||
|
|
||||||
=> https://gemini.circumlunar.space/docs/gemtext.gmi 1: gemtext
|
=> https://gemini.circumlunar.space/docs/gemtext.gmi 1: gemtext
|
||||||
|
|
||||||
|
> this is a quote
|
||||||
See the gemini protocol[1] and the protocol spec[2].
|
See the gemini protocol[1] and the protocol spec[2].
|
||||||
|
|
||||||
=> https://gemini.circumlunar.space/ 1: gemini protocol
|
=> https://gemini.circumlunar.space/ 1: gemini protocol
|
||||||
=> https://gemini.circumlunar.space/docs/specification.gmi 2: protocol spec
|
=> https://gemini.circumlunar.space/docs/specification.gmi 2: protocol spec
|
||||||
|
|
||||||
Internally md2gmi does a 1st pass that constructs the core layout for gemtext. This is then streamed to the 2nd pass line by line. The 2nd pass will convert links and stream line by line to the output.
|
|
||||||
```
|
```
|
48
main.go
48
main.go
|
@ -2,12 +2,44 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"encoding/gob"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type WorkItem struct {
|
||||||
|
index int
|
||||||
|
payload []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func New(index int, payload []byte) WorkItem {
|
||||||
|
w := WorkItem{index: index}
|
||||||
|
var indexBuffer bytes.Buffer
|
||||||
|
encoder := gob.NewEncoder(&indexBuffer)
|
||||||
|
if err := encoder.Encode(payload); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
w.payload = indexBuffer.Bytes()
|
||||||
|
return w
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *WorkItem) Index() int {
|
||||||
|
return w.index
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *WorkItem) Payload() []byte {
|
||||||
|
buf := bytes.NewReader(w.payload)
|
||||||
|
decoder := gob.NewDecoder(buf)
|
||||||
|
var tmp []byte
|
||||||
|
if err := decoder.Decode(&tmp); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return tmp
|
||||||
|
}
|
||||||
|
|
||||||
func reader(in string) (io.Reader, error) {
|
func reader(in string) (io.Reader, error) {
|
||||||
if in != "" {
|
if in != "" {
|
||||||
file, err := os.Open(in)
|
file, err := os.Open(in)
|
||||||
|
@ -46,12 +78,14 @@ func InputStream(r io.Reader) *ir {
|
||||||
return &ir{r: r}
|
return &ir{r: r}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ir) Output() chan []byte {
|
func (m *ir) Output() chan WorkItem {
|
||||||
data := make(chan []byte)
|
data := make(chan WorkItem)
|
||||||
s := bufio.NewScanner(m.r)
|
s := bufio.NewScanner(m.r)
|
||||||
go func() {
|
go func() {
|
||||||
|
i := 0
|
||||||
for s.Scan() {
|
for s.Scan() {
|
||||||
data <- s.Bytes()
|
data <- New(i, s.Bytes())
|
||||||
|
i += 1
|
||||||
}
|
}
|
||||||
close(data)
|
close(data)
|
||||||
}()
|
}()
|
||||||
|
@ -66,9 +100,9 @@ func OutputStream(w io.Writer) *ow {
|
||||||
return &ow{w: w}
|
return &ow{w: w}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ow) Input(data chan []byte) {
|
func (m *ow) Input(data chan WorkItem) {
|
||||||
for b := range data {
|
for b := range data {
|
||||||
write(m.w, b)
|
write(m.w, b.Payload())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,9 +131,9 @@ func main() {
|
||||||
|
|
||||||
//sink.Input(preproc.Process(source.Output()))
|
//sink.Input(preproc.Process(source.Output()))
|
||||||
sink.Input(
|
sink.Input(
|
||||||
RemoveComments(
|
FormatLinks(
|
||||||
FormatHeadings(
|
FormatHeadings(
|
||||||
FormatLinks(
|
RemoveComments(
|
||||||
preproc.Process(source.Output()),
|
preproc.Process(source.Output()),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
|
118
preproc.go
118
preproc.go
|
@ -1,15 +1,23 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"regexp"
|
||||||
|
)
|
||||||
|
|
||||||
// state function
|
// state function
|
||||||
type stateFn func(*fsm, []byte) stateFn
|
type stateFn func(*fsm, []byte) stateFn
|
||||||
|
|
||||||
// state machine
|
// state machine
|
||||||
type fsm struct {
|
type fsm struct {
|
||||||
state stateFn
|
state stateFn
|
||||||
out chan []byte
|
|
||||||
|
i int
|
||||||
|
out chan WorkItem
|
||||||
|
|
||||||
// combining multiple input lines
|
// combining multiple input lines
|
||||||
buffer []byte
|
blockBuffer []byte
|
||||||
|
sendBuffer []byte
|
||||||
// if we have a termination rule to abide, e.g. implied code fences
|
// if we have a termination rule to abide, e.g. implied code fences
|
||||||
pending []byte
|
pending []byte
|
||||||
}
|
}
|
||||||
|
@ -18,42 +26,69 @@ func NewPreproc() *fsm {
|
||||||
return &fsm{}
|
return &fsm{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *fsm) Process(in chan []byte) chan []byte {
|
func (m *fsm) Process(in chan WorkItem) chan WorkItem {
|
||||||
m.out = make(chan []byte)
|
m.out = make(chan WorkItem)
|
||||||
go func() {
|
go func() {
|
||||||
for m.state = normal; m.state != nil; {
|
for m.state = normal; m.state != nil; {
|
||||||
b, ok := <-in
|
b, ok := <-in
|
||||||
if !ok {
|
if !ok {
|
||||||
m.flush()
|
m.blockFlush()
|
||||||
|
m.sync()
|
||||||
close(m.out)
|
close(m.out)
|
||||||
m.state = nil
|
m.state = nil
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
m.state = m.state(m, b)
|
|
||||||
|
// fmt.Printf("i preproc '%v'\n", string(b.Payload()))
|
||||||
|
m.state = m.state(m, b.Payload())
|
||||||
|
m.sync()
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
return m.out
|
return m.out
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *fsm) flush() {
|
func (m *fsm) sync() {
|
||||||
|
if len(m.sendBuffer) > 0 {
|
||||||
|
//m.sendBuffer = bytes.TrimSpace(m.sendBuffer)
|
||||||
|
m.sendBuffer = append(m.sendBuffer, '\n')
|
||||||
|
//fmt.Printf("o preproc '%v'\n", string(m.sendBuffer))
|
||||||
|
m.out <- New(m.i, m.sendBuffer)
|
||||||
|
m.sendBuffer = m.sendBuffer[:0]
|
||||||
|
m.i += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *fsm) blockFlush() {
|
||||||
|
// blockBuffer to sendbuffer
|
||||||
|
//fmt.Println("block ", string(m.blockBuffer))
|
||||||
|
m.sendBuffer = append(m.sendBuffer, m.blockBuffer...)
|
||||||
|
m.blockBuffer = m.blockBuffer[:0]
|
||||||
|
|
||||||
if len(m.pending) > 0 {
|
if len(m.pending) > 0 {
|
||||||
m.out <- append(m.pending, '\n')
|
m.sendBuffer = append(m.sendBuffer, m.pending...)
|
||||||
|
m.sendBuffer = append(m.sendBuffer, '\n')
|
||||||
m.pending = m.pending[:0]
|
m.pending = m.pending[:0]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func isBlank(data []byte) bool {
|
|
||||||
return len(data) == 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func isHeader(data []byte) bool {
|
|
||||||
return len(data) > 0 && data[0] == '#'
|
|
||||||
}
|
|
||||||
|
|
||||||
func triggerBreak(data []byte) bool {
|
func triggerBreak(data []byte) bool {
|
||||||
return len(data) == 0 || data[len(data)-1] == '.'
|
return len(data) == 0 || data[len(data)-1] == '.'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isTerminated(data []byte) bool {
|
||||||
|
return len(data) > 0 && data[len(data)-1] != '.'
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleList(data []byte) ([]byte, bool) {
|
||||||
|
re := regexp.MustCompile(`([ ]*[-*])`)
|
||||||
|
sub := re.FindSubmatch(data)
|
||||||
|
// if lists, collapse to single level
|
||||||
|
if len(sub) > 1 {
|
||||||
|
return bytes.Replace(data, sub[1], []byte("-"), 1), true
|
||||||
|
}
|
||||||
|
return data, false
|
||||||
|
}
|
||||||
|
|
||||||
func isFence(data []byte) bool {
|
func isFence(data []byte) bool {
|
||||||
return len(data) >= 3 && string(data[0:3]) == "```"
|
return len(data) >= 3 && string(data[0:3]) == "```"
|
||||||
}
|
}
|
||||||
|
@ -63,67 +98,62 @@ func needsFence(data []byte) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
func normal(m *fsm, data []byte) stateFn {
|
func normal(m *fsm, data []byte) stateFn {
|
||||||
m.flush()
|
if data, isList := handleList(data); isList {
|
||||||
// blank line
|
m.blockBuffer = append(data, '\n')
|
||||||
if isBlank(data) {
|
m.blockFlush()
|
||||||
m.out <- []byte("\n")
|
|
||||||
return normal
|
|
||||||
}
|
|
||||||
// header
|
|
||||||
if isHeader(data) {
|
|
||||||
m.out <- append(data, '\n')
|
|
||||||
return normal
|
return normal
|
||||||
}
|
}
|
||||||
if isFence(data) {
|
if isFence(data) {
|
||||||
m.out <- append(data, '\n')
|
m.blockBuffer = append(data, '\n')
|
||||||
return fence
|
return fence
|
||||||
}
|
}
|
||||||
if needsFence(data) {
|
if needsFence(data) {
|
||||||
m.out <- []byte("```\n")
|
m.blockBuffer = append(m.blockBuffer, []byte("```\n")...)
|
||||||
m.out <- append(data[4:], '\n')
|
m.blockBuffer = append(m.blockBuffer, append(data[4:], '\n')...)
|
||||||
m.pending = []byte("```\n")
|
m.pending = []byte("```\n")
|
||||||
return toFence
|
return toFence
|
||||||
}
|
}
|
||||||
if data[len(data)-1] != '.' {
|
if isTerminated(data) {
|
||||||
m.buffer = append(m.buffer, data...)
|
m.blockBuffer = append(m.blockBuffer, data...)
|
||||||
m.buffer = append(m.buffer, []byte(" ")...)
|
m.blockBuffer = append(m.blockBuffer, ' ')
|
||||||
return paragraph
|
return paragraph
|
||||||
}
|
}
|
||||||
// TODO
|
// TODO
|
||||||
// collapse lists
|
// collapse lists
|
||||||
m.out <- append(data, '\n')
|
m.blockBuffer = append(m.blockBuffer, append(data, '\n')...)
|
||||||
|
m.blockFlush()
|
||||||
return normal
|
return normal
|
||||||
}
|
}
|
||||||
|
|
||||||
func fence(m *fsm, data []byte) stateFn {
|
func fence(m *fsm, data []byte) stateFn {
|
||||||
m.out <- append(data, '\n')
|
m.blockBuffer = append(m.blockBuffer, append(data, '\n')...)
|
||||||
|
// second fence returns to normal
|
||||||
if isFence(data) {
|
if isFence(data) {
|
||||||
|
m.blockFlush()
|
||||||
return normal
|
return normal
|
||||||
}
|
}
|
||||||
return fence
|
return fence
|
||||||
}
|
}
|
||||||
|
|
||||||
func toFence(m *fsm, data []byte) stateFn {
|
func toFence(m *fsm, data []byte) stateFn {
|
||||||
if len(data) >= 3 {
|
|
||||||
m.out <- append(data[4:], '\n')
|
|
||||||
} else {
|
|
||||||
//m.out <- []byte("\n")
|
|
||||||
}
|
|
||||||
if needsFence(data) {
|
if needsFence(data) {
|
||||||
|
m.blockBuffer = append(m.blockBuffer, append(data[4:], '\n')...)
|
||||||
return toFence
|
return toFence
|
||||||
}
|
}
|
||||||
|
m.blockFlush()
|
||||||
|
m.blockBuffer = append(m.blockBuffer, append(data, '\n')...)
|
||||||
return normal
|
return normal
|
||||||
}
|
}
|
||||||
|
|
||||||
func paragraph(m *fsm, data []byte) stateFn {
|
func paragraph(m *fsm, data []byte) stateFn {
|
||||||
if triggerBreak(data) {
|
if triggerBreak(data) {
|
||||||
m.buffer = append(m.buffer, data...)
|
m.blockBuffer = append(m.blockBuffer, data...)
|
||||||
m.out <- append(m.buffer, '\n')
|
m.blockBuffer = bytes.TrimSpace(m.blockBuffer)
|
||||||
m.buffer = m.buffer[:0]
|
m.blockBuffer = append(m.blockBuffer, '\n')
|
||||||
|
m.blockFlush()
|
||||||
return normal
|
return normal
|
||||||
}
|
}
|
||||||
m.buffer = append(m.buffer, data...)
|
m.blockBuffer = append(m.blockBuffer, data...)
|
||||||
m.buffer = append(m.buffer, []byte(" ")...)
|
m.blockBuffer = append(m.blockBuffer, []byte(" ")...)
|
||||||
return paragraph
|
return paragraph
|
||||||
}
|
}
|
||||||
|
|
33
proc.go
33
proc.go
|
@ -6,11 +6,11 @@ import (
|
||||||
"regexp"
|
"regexp"
|
||||||
)
|
)
|
||||||
|
|
||||||
func FormatLinks(in chan []byte) chan []byte {
|
func FormatLinks(in chan WorkItem) chan WorkItem {
|
||||||
out := make(chan []byte)
|
out := make(chan WorkItem)
|
||||||
go func() {
|
go func() {
|
||||||
for b := range in {
|
for b := range in {
|
||||||
out <- formatLinks(b)
|
out <- New(b.Index(), formatLinks(b.Payload()))
|
||||||
}
|
}
|
||||||
close(out)
|
close(out)
|
||||||
}()
|
}()
|
||||||
|
@ -37,33 +37,42 @@ func formatLinks(data []byte) []byte {
|
||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
|
|
||||||
func RemoveComments(in chan []byte) chan []byte {
|
func RemoveComments(in chan WorkItem) chan WorkItem {
|
||||||
out := make(chan []byte)
|
out := make(chan WorkItem)
|
||||||
go func() {
|
go func() {
|
||||||
re := regexp.MustCompile(`<!--.*-->`)
|
re := regexp.MustCompile(`<!--.*-->`)
|
||||||
for b := range in {
|
for b := range in {
|
||||||
out <- re.ReplaceAll(b, []byte{})
|
data := b.Payload()
|
||||||
|
for _, match := range re.FindAllSubmatch(data, -1) {
|
||||||
|
data = bytes.Replace(data, match[0], []byte(""), 1)
|
||||||
|
}
|
||||||
|
out <- New(b.Index(), append(bytes.TrimSpace(data), '\n'))
|
||||||
|
//out <- New(b.Index(), data)
|
||||||
}
|
}
|
||||||
close(out)
|
close(out)
|
||||||
}()
|
}()
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func FormatHeadings(in chan []byte) chan []byte {
|
func FormatHeadings(in chan WorkItem) chan WorkItem {
|
||||||
out := make(chan []byte)
|
out := make(chan WorkItem)
|
||||||
go func() {
|
go func() {
|
||||||
re := regexp.MustCompile(`^[#]{4,}`)
|
re := regexp.MustCompile(`^[#]{4,}`)
|
||||||
re2 := regexp.MustCompile(`^(#+)[^# ]`)
|
re2 := regexp.MustCompile(`^(#+)[^# ]`)
|
||||||
for b := range in {
|
for b := range in {
|
||||||
// fix up more than 4 levels
|
// fix up more than 4 levels
|
||||||
b = re.ReplaceAll(b, []byte("###"))
|
data := re.ReplaceAll(b.Payload(), []byte("###"))
|
||||||
// ensure we have a space
|
// ensure we have a space
|
||||||
sub := re2.FindSubmatch(b)
|
sub := re2.FindSubmatch(data)
|
||||||
if len(sub) > 0 {
|
if len(sub) > 0 {
|
||||||
b = bytes.Replace(b, sub[1], append(sub[1], []byte(" ")...), 1)
|
data = bytes.Replace(data, sub[1], append(sub[1], []byte(" ")...), 1)
|
||||||
|
}
|
||||||
|
// generally if we deal with a heading, add an extra blank line
|
||||||
|
if bytes.HasPrefix(data, []byte("#")) {
|
||||||
|
data = append(data, '\n')
|
||||||
}
|
}
|
||||||
// writeback
|
// writeback
|
||||||
out <- b
|
out <- New(b.Index(), data)
|
||||||
|
|
||||||
}
|
}
|
||||||
close(out)
|
close(out)
|
||||||
|
|
Loading…
Reference in a new issue