process links and headings

2021-07-04 15:03:37 +08:00 · 2021-07-04 15:03:37 +08:00 · 10ed27bb77
commit 10ed27bb77
parent 5eaedce5e6
3 changed files with 44 additions and 35 deletions
--- a/README.md
+++ b/README.md
@ -7,18 +7,19 @@ state machines.
 <!-- testing markdown, this should be deleted, below merged -->
 See the [gemini
-protocol](https://gemini.circumlunar.space/).
+protocol](https://gemini.circumlunar.space/) and the [protocol
 spec](https://gemini.circumlunar.space/docs/specification.gmi).
 Internally md2gmi does a 1st pass that constructs the core layout for gemtext. This is then streamed
 to the 2nd pass line by line. The 2nd pass will convert links and stream line by line to the output.
-### Usage
+###Usage
 ```plain
 Usage of ./md2gmi:
-  -in string
+  -f string
        specify a .md (Markdown) file to read from, otherwise stdin (default)
-  -out string
+  -o string
        specify a .gmi (gemtext) file to write to, otherwise stdout (default)
 ```
--- a/preproc.go
+++ b/preproc.go
@ -80,7 +80,7 @@ func normal(m *fsm, data []byte) stateFn {
 	}
 	if needsFence(data) {
 		m.out <- []byte("```\n")
-		m.out <- append(data, '\n')
+		m.out <- append(data[4:], '\n')
 		m.pending = []byte("```")
 		return toFence
 	}
@ -105,7 +105,7 @@ func fence(m *fsm, data []byte) stateFn {
 }
 func toFence(m *fsm, data []byte) stateFn {
-	m.out <- append(data, '\n')
+	m.out <- append(data[4:], '\n')
 	if needsFence(data) {
 		return toFence
 	}
--- a/proc.go
+++ b/proc.go
@ -1,17 +1,14 @@
 package main
-// state function
+import (
-type stateFn2 func(*proc, []byte) stateFn2
+	"bytes"
 	"fmt"
 	"regexp"
 )
 // state machine
 type proc struct {
-	state stateFn2
+	out chan []byte
 	out   chan []byte
 	// combining multiple input lines
 	buffer []byte
 	// if we have a termination rule to abide, e.g. implied code fences
 	pending []byte
 }
 func NewProc() *proc {
@ -21,32 +18,43 @@ func NewProc() *proc {
 func (m *proc) Process(in chan []byte) chan []byte {
 	m.out = make(chan []byte)
 	go func() {
-		for m.state = line; m.state != nil; {
+		for b := range in {
-			b, ok := <-in
+			m.out <- m.process(b)
 			if !ok {
 				m.flush()
 				close(m.out)
 				m.state = nil
 				continue
 			}
 			m.state = m.state(m, b)
 		}
 		close(m.out)
 	}()
 	return m.out
 }
-func (m *proc) flush() {
+func (m *proc) process(data []byte) []byte {
-	if len(m.pending) > 0 {
+	// find link name and url
-		m.out <- append(m.pending, '\n')
+	var buffer []byte
-		m.pending = m.pending[:0]
+	re := regexp.MustCompile(`\[([^\]*]*)\]\(([^)]*)\)`)
 	for i, match := range re.FindAllSubmatch(data, -1) {
 		replaceWithIndex := append(match[1], fmt.Sprintf("[%d]", i+1)...)
 		data = bytes.Replace(data, match[0], replaceWithIndex, 1)
 		link := fmt.Sprintf("=> %s %d: %s\n", match[2], i+1, match[1])
 		buffer = append(buffer, link...)
 	}
 	if len(buffer) > 0 {
 		data = append(data, []byte("\n")...)
 		data = append(data, buffer...)
 	}
 }
-func line(m *proc, data []byte) stateFn2 {
+	// remove comments
-	// TODO
+	re2 := regexp.MustCompile(`<!--.*-->`)
-	// find links
+	data = re2.ReplaceAll(data, []byte{})
 	// collapse lists
 	m.out <- data
-	return line
+	// collapse headings
 	re3 := regexp.MustCompile(`^[#]{4,}`)
 	data = re3.ReplaceAll(data, []byte("###"))
 	// heading without spacing
 	re4 := regexp.MustCompile(`^(#+)[^# ]`)
 	sub := re4.FindSubmatch(data)
 	if len(sub) > 0 {
 		data = bytes.Replace(data, sub[1], append(sub[1], []byte(" ")...), 1)
 	}
 	return data
 }