process links and headings

2021-07-04 15:03:37 +08:00 · 2021-07-04 15:03:37 +08:00 · 10ed27bb77
commit 10ed27bb77
parent 5eaedce5e6
3 changed files with 44 additions and 35 deletions
--- a/README.md
+++ b/README.md
@ -7,7 +7,8 @@ state machines.

 <!-- testing markdown, this should be deleted, below merged -->
 See the [gemini
-protocol](https://gemini.circumlunar.space/).
+protocol](https://gemini.circumlunar.space/) and the [protocol
+spec](https://gemini.circumlunar.space/docs/specification.gmi).

 Internally md2gmi does a 1st pass that constructs the core layout for gemtext. This is then streamed
 to the 2nd pass line by line. The 2nd pass will convert links and stream line by line to the output.
@ -16,9 +17,9 @@ to the 2nd pass line by line. The 2nd pass will convert links and stream line by

 ```plain
 Usage of ./md2gmi:
-  -in string
+  -f string
        specify a .md (Markdown) file to read from, otherwise stdin (default)
-  -out string
+  -o string
        specify a .gmi (gemtext) file to write to, otherwise stdout (default)
 ```

--- a/preproc.go
+++ b/preproc.go
@ -80,7 +80,7 @@ func normal(m *fsm, data []byte) stateFn {
 	}
 	if needsFence(data) {
 		m.out <- []byte("```\n")
-		m.out <- append(data, '\n')
+		m.out <- append(data[4:], '\n')
 		m.pending = []byte("```")
 		return toFence
 	}
@ -105,7 +105,7 @@ func fence(m *fsm, data []byte) stateFn {
 }

 func toFence(m *fsm, data []byte) stateFn {
-	m.out <- append(data, '\n')
+	m.out <- append(data[4:], '\n')
 	if needsFence(data) {
 		return toFence
 	}
--- a/proc.go
+++ b/proc.go
@ -1,17 +1,14 @@
 package main

-// state function
-type stateFn2 func(*proc, []byte) stateFn2
+import (
+	"bytes"
+	"fmt"
+	"regexp"
+)

 // state machine
 type proc struct {
-	state stateFn2
 	out chan []byte
-
-	// combining multiple input lines
-	buffer []byte
-	// if we have a termination rule to abide, e.g. implied code fences
-	pending []byte
 }

 func NewProc() *proc {
@ -21,32 +18,43 @@ func NewProc() *proc {
 func (m *proc) Process(in chan []byte) chan []byte {
 	m.out = make(chan []byte)
 	go func() {
-		for m.state = line; m.state != nil; {
-			b, ok := <-in
-			if !ok {
-				m.flush()
+		for b := range in {
+			m.out <- m.process(b)
+		}
 		close(m.out)
-				m.state = nil
-				continue
-			}
-			m.state = m.state(m, b)
-		}
 	}()
 	return m.out
 }

-func (m *proc) flush() {
-	if len(m.pending) > 0 {
-		m.out <- append(m.pending, '\n')
-		m.pending = m.pending[:0]
+func (m *proc) process(data []byte) []byte {
+	// find link name and url
+	var buffer []byte
+	re := regexp.MustCompile(`\[([^\]*]*)\]\(([^)]*)\)`)
+	for i, match := range re.FindAllSubmatch(data, -1) {
+		replaceWithIndex := append(match[1], fmt.Sprintf("[%d]", i+1)...)
+		data = bytes.Replace(data, match[0], replaceWithIndex, 1)
+		link := fmt.Sprintf("=> %s %d: %s\n", match[2], i+1, match[1])
+		buffer = append(buffer, link...)
 	}
+	if len(buffer) > 0 {
+		data = append(data, []byte("\n")...)
+		data = append(data, buffer...)
 	}

-func line(m *proc, data []byte) stateFn2 {
-	// TODO
-	// find links
-	// collapse lists
-	m.out <- data
+	// remove comments
+	re2 := regexp.MustCompile(`<!--.*-->`)
+	data = re2.ReplaceAll(data, []byte{})

-	return line
+	// collapse headings
+	re3 := regexp.MustCompile(`^[#]{4,}`)
+	data = re3.ReplaceAll(data, []byte("###"))
+
+	// heading without spacing
+	re4 := regexp.MustCompile(`^(#+)[^# ]`)
+	sub := re4.FindSubmatch(data)
+	if len(sub) > 0 {
+		data = bytes.Replace(data, sub[1], append(sub[1], []byte(" ")...), 1)
+	}
+
+	return data
 }