From 10ed27bb771172f70b5d321008e9c0ddc5d1ef61 Mon Sep 17 00:00:00 2001
From: dre <dre@nox.im>
Date: Sun, 4 Jul 2021 15:03:37 +0800
Subject: [PATCH] process links and headings

---
 README.md  |  9 ++++----
 preproc.go |  4 ++--
 proc.go    | 66 ++++++++++++++++++++++++++++++------------------------
 3 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/README.md b/README.md
index 79d4cd7..3166c0b 100644
--- a/README.md
+++ b/README.md
@@ -7,18 +7,19 @@ state machines.
 
 <!-- testing markdown, this should be deleted, below merged -->
 See the [gemini
-protocol](https://gemini.circumlunar.space/).
+protocol](https://gemini.circumlunar.space/) and the [protocol
+spec](https://gemini.circumlunar.space/docs/specification.gmi).
 
 Internally md2gmi does a 1st pass that constructs the core layout for gemtext. This is then streamed
 to the 2nd pass line by line. The 2nd pass will convert links and stream line by line to the output.
 
-### Usage
+###Usage
 
 ```plain
 Usage of ./md2gmi:
-  -in string
+  -f string
         specify a .md (Markdown) file to read from, otherwise stdin (default)
-  -out string
+  -o string
         specify a .gmi (gemtext) file to write to, otherwise stdout (default)
 ```
 
diff --git a/preproc.go b/preproc.go
index ebc030b..9f636d6 100644
--- a/preproc.go
+++ b/preproc.go
@@ -80,7 +80,7 @@ func normal(m *fsm, data []byte) stateFn {
 	}
 	if needsFence(data) {
 		m.out <- []byte("```\n")
-		m.out <- append(data, '\n')
+		m.out <- append(data[4:], '\n')
 		m.pending = []byte("```")
 		return toFence
 	}
@@ -105,7 +105,7 @@ func fence(m *fsm, data []byte) stateFn {
 }
 
 func toFence(m *fsm, data []byte) stateFn {
-	m.out <- append(data, '\n')
+	m.out <- append(data[4:], '\n')
 	if needsFence(data) {
 		return toFence
 	}
diff --git a/proc.go b/proc.go
index 44d4094..5d19773 100644
--- a/proc.go
+++ b/proc.go
@@ -1,17 +1,14 @@
 package main
 
-// state function
-type stateFn2 func(*proc, []byte) stateFn2
+import (
+	"bytes"
+	"fmt"
+	"regexp"
+)
 
 // state machine
 type proc struct {
-	state stateFn2
-	out   chan []byte
-
-	// combining multiple input lines
-	buffer []byte
-	// if we have a termination rule to abide, e.g. implied code fences
-	pending []byte
+	out chan []byte
 }
 
 func NewProc() *proc {
@@ -21,32 +18,43 @@ func NewProc() *proc {
 func (m *proc) Process(in chan []byte) chan []byte {
 	m.out = make(chan []byte)
 	go func() {
-		for m.state = line; m.state != nil; {
-			b, ok := <-in
-			if !ok {
-				m.flush()
-				close(m.out)
-				m.state = nil
-				continue
-			}
-			m.state = m.state(m, b)
+		for b := range in {
+			m.out <- m.process(b)
 		}
+		close(m.out)
 	}()
 	return m.out
 }
 
-func (m *proc) flush() {
-	if len(m.pending) > 0 {
-		m.out <- append(m.pending, '\n')
-		m.pending = m.pending[:0]
+func (m *proc) process(data []byte) []byte {
+	// find link name and url
+	var buffer []byte
+	re := regexp.MustCompile(`\[([^\]*]*)\]\(([^)]*)\)`)
+	for i, match := range re.FindAllSubmatch(data, -1) {
+		replaceWithIndex := append(match[1], fmt.Sprintf("[%d]", i+1)...)
+		data = bytes.Replace(data, match[0], replaceWithIndex, 1)
+		link := fmt.Sprintf("=> %s %d: %s\n", match[2], i+1, match[1])
+		buffer = append(buffer, link...)
+	}
+	if len(buffer) > 0 {
+		data = append(data, []byte("\n")...)
+		data = append(data, buffer...)
 	}
-}
 
-func line(m *proc, data []byte) stateFn2 {
-	// TODO
-	// find links
-	// collapse lists
-	m.out <- data
+	// remove comments
+	re2 := regexp.MustCompile(`<!--.*-->`)
+	data = re2.ReplaceAll(data, []byte{})
 
-	return line
+	// collapse headings
+	re3 := regexp.MustCompile(`^[#]{4,}`)
+	data = re3.ReplaceAll(data, []byte("###"))
+
+	// heading without spacing
+	re4 := regexp.MustCompile(`^(#+)[^# ]`)
+	sub := re4.FindSubmatch(data)
+	if len(sub) > 0 {
+		data = bytes.Replace(data, sub[1], append(sub[1], []byte(" ")...), 1)
+	}
+
+	return data
 }