split out pipelines

2021-07-05 13:36:20 +08:00 · 2021-07-05 13:36:20 +08:00 · 02c2abeb51
commit 02c2abeb51
parent d873f4a565
5 changed files with 326 additions and 0 deletions
--- a/mdproc/comments.go
+++ b/mdproc/comments.go
@ -0,0 +1,28 @@
+package mdproc
+
+import (
+	"bytes"
+	"regexp"
+
+	"github.com/n0x1m/md2gmi/pipe"
+)
+
+func RemoveComments(in chan pipe.StreamItem) chan pipe.StreamItem {
+	out := make(chan pipe.StreamItem)
+
+	go func() {
+		re := regexp.MustCompile(`<!--.*-->`)
+
+		for b := range in {
+			data := b.Payload()
+			for _, match := range re.FindAllSubmatch(data, -1) {
+				data = bytes.Replace(data, match[0], []byte(""), 1)
+			}
+			out <- pipe.NewItem(b.Index(), append(bytes.TrimSpace(data), '\n'))
+		}
+
+		close(out)
+	}()
+
+	return out
+}
--- a/mdproc/headings.go
+++ b/mdproc/headings.go
@ -0,0 +1,37 @@
+package mdproc
+
+import (
+	"bytes"
+	"regexp"
+
+	"github.com/n0x1m/md2gmi/pipe"
+)
+
+func FormatHeadings(in chan pipe.StreamItem) chan pipe.StreamItem {
+	out := make(chan pipe.StreamItem)
+
+	go func() {
+		re := regexp.MustCompile(`^[#]{4,}`)
+		re2 := regexp.MustCompile(`^(#+)[^# ]`)
+
+		for b := range in {
+			// fix up more than 4 levels
+			data := re.ReplaceAll(b.Payload(), []byte("###"))
+			// ensure we have a space
+			sub := re2.FindSubmatch(data)
+			if len(sub) > 0 {
+				data = bytes.Replace(data, sub[1], append(sub[1], []byte(" ")...), 1)
+			}
+			// generally if we deal with a heading, add an extra blank line
+			if bytes.HasPrefix(data, []byte("#")) {
+				data = append(data, '\n')
+			}
+			// writeback
+			out <- pipe.NewItem(b.Index(), data)
+		}
+
+		close(out)
+	}()
+
+	return out
+}
--- a/mdproc/hugo.go
+++ b/mdproc/hugo.go
@ -0,0 +1,28 @@
+package mdproc
+
+import (
+	"bytes"
+	"regexp"
+
+	"github.com/n0x1m/md2gmi/pipe"
+)
+
+func RemoveFrontMatter(in chan pipe.StreamItem) chan pipe.StreamItem {
+	out := make(chan pipe.StreamItem)
+
+	go func() {
+		re := regexp.MustCompile(`---.*---`)
+
+		for b := range in {
+			data := b.Payload()
+			for _, match := range re.FindAllSubmatch(data, -1) {
+				data = bytes.Replace(data, match[0], []byte(""), 1)
+			}
+			out <- pipe.NewItem(b.Index(), append(bytes.TrimSpace(data), '\n'))
+		}
+
+		close(out)
+	}()
+
+	return out
+}
--- a/mdproc/links.go
+++ b/mdproc/links.go
@ -0,0 +1,57 @@
+package mdproc
+
+import (
+	"bytes"
+	"fmt"
+	"regexp"
+
+	"github.com/n0x1m/md2gmi/pipe"
+)
+
+func FormatLinks(in chan pipe.StreamItem) chan pipe.StreamItem {
+	out := make(chan pipe.StreamItem)
+
+	go func() {
+		fenceOn := false
+
+		for b := range in {
+			data := b.Payload()
+			if isFence(data) {
+				fenceOn = !fenceOn
+			}
+
+			if fenceOn {
+				out <- pipe.NewItem(b.Index(), b.Payload())
+
+				continue
+			}
+			out <- pipe.NewItem(b.Index(), formatLinks(b.Payload()))
+		}
+
+		close(out)
+	}()
+
+	return out
+}
+
+func formatLinks(data []byte) []byte {
+	// find link name and url
+	var buffer []byte
+
+	re := regexp.MustCompile(`!?\[([^\]*]*)\]\(([^)]*)\)`)
+
+	for i, match := range re.FindAllSubmatch(data, -1) {
+		replaceWithIndex := append(match[1], fmt.Sprintf("[%d]", i+1)...)
+		data = bytes.Replace(data, match[0], replaceWithIndex, 1)
+		// append entry to buffer to be added later
+		link := fmt.Sprintf("=> %s %d: %s\n", match[2], i+1, match[1])
+		buffer = append(buffer, link...)
+	}
+	// append links to that paragraph
+	if len(buffer) > 0 {
+		data = append(data, []byte("\n")...)
+		data = append(data, buffer...)
+	}
+
+	return data
+}
--- a/mdproc/preproc.go
+++ b/mdproc/preproc.go
@ -0,0 +1,176 @@
+package mdproc
+
+import (
+	"bytes"
+	"regexp"
+
+	"github.com/n0x1m/md2gmi/pipe"
+)
+
+// state function.
+type stateFn func(*fsm, []byte) stateFn
+
+// state machine.
+type fsm struct {
+	state stateFn
+
+	i   int
+	out chan pipe.StreamItem
+
+	// combining multiple input lines
+	blockBuffer []byte
+	sendBuffer  []byte
+	// if we have a termination rule to abide, e.g. implied code fences
+	pending []byte
+}
+
+func Preproc() pipe.Pipeline {
+	return (&fsm{}).pipeline
+}
+
+func (m *fsm) pipeline(in chan pipe.StreamItem) chan pipe.StreamItem {
+	m.out = make(chan pipe.StreamItem)
+
+	go func() {
+		for m.state = normal; m.state != nil; {
+			b, ok := <-in
+			if !ok {
+				m.blockFlush()
+				m.sync()
+				close(m.out)
+				m.state = nil
+
+				continue
+			}
+
+			m.state = m.state(m, b.Payload())
+			m.sync()
+		}
+	}()
+
+	return m.out
+}
+
+func (m *fsm) sync() {
+	if len(m.sendBuffer) > 0 {
+		m.sendBuffer = append(m.sendBuffer, '\n')
+		m.out <- pipe.NewItem(m.i, m.sendBuffer)
+		m.sendBuffer = m.sendBuffer[:0]
+		m.i++
+	}
+}
+
+func (m *fsm) blockFlush() {
+	// blockBuffer to sendbuffer
+	m.sendBuffer = append(m.sendBuffer, m.blockBuffer...)
+	m.blockBuffer = m.blockBuffer[:0]
+
+	if len(m.pending) > 0 {
+		m.sendBuffer = append(m.sendBuffer, m.pending...)
+		m.sendBuffer = append(m.sendBuffer, '\n')
+		m.pending = m.pending[:0]
+	}
+}
+
+func triggerBreak(data []byte) bool {
+	return len(data) == 0 || data[len(data)-1] == '.'
+}
+
+func isTerminated(data []byte) bool {
+	return len(data) > 0 && data[len(data)-1] != '.'
+}
+
+func handleList(data []byte) ([]byte, bool) {
+	re := regexp.MustCompile(`^([ ]*[-*^]{1,1})[^*-]`)
+	sub := re.FindSubmatch(data)
+	// if lists, collapse to single level
+	if len(sub) > 1 {
+		return bytes.Replace(data, sub[1], []byte("-"), 1), true
+	}
+
+	return data, false
+}
+
+func isFence(data []byte) bool {
+	return len(data) >= 3 && string(data[0:3]) == "```"
+}
+
+func needsFence(data []byte) bool {
+	return len(data) >= 4 && string(data[0:4]) == "    "
+}
+
+func normal(m *fsm, data []byte) stateFn {
+	if data, isList := handleList(data); isList {
+		m.blockBuffer = append(data, '\n')
+		m.blockFlush()
+
+		return normal
+	}
+
+	if isFence(data) {
+		m.blockBuffer = append(data, '\n')
+
+		return fence
+	}
+
+	if needsFence(data) {
+		m.blockBuffer = append(m.blockBuffer, []byte("```\n")...)
+		m.blockBuffer = append(m.blockBuffer, append(data[4:], '\n')...)
+		m.pending = []byte("```\n")
+
+		return toFence
+	}
+
+	if isTerminated(data) {
+		m.blockBuffer = append(m.blockBuffer, data...)
+		m.blockBuffer = append(m.blockBuffer, ' ')
+
+		return paragraph
+	}
+
+	m.blockBuffer = append(m.blockBuffer, append(data, '\n')...)
+	m.blockFlush()
+
+	return normal
+}
+
+func fence(m *fsm, data []byte) stateFn {
+	m.blockBuffer = append(m.blockBuffer, append(data, '\n')...)
+	// second fence returns to normal
+	if isFence(data) {
+		m.blockFlush()
+
+		return normal
+	}
+
+	return fence
+}
+
+func toFence(m *fsm, data []byte) stateFn {
+	if needsFence(data) {
+		m.blockBuffer = append(m.blockBuffer, append(data[4:], '\n')...)
+
+		return toFence
+	}
+
+	m.blockFlush()
+	m.blockBuffer = append(m.blockBuffer, append(data, '\n')...)
+
+	return normal
+}
+
+func paragraph(m *fsm, data []byte) stateFn {
+	if triggerBreak(data) {
+		m.blockBuffer = append(m.blockBuffer, data...)
+		m.blockBuffer = bytes.TrimSpace(m.blockBuffer)
+		m.blockBuffer = append(m.blockBuffer, '\n')
+		m.blockFlush()
+
+		return normal
+	}
+
+	m.blockBuffer = append(m.blockBuffer, data...)
+	m.blockBuffer = append(m.blockBuffer, []byte(" ")...)
+
+	return paragraph
+}