restructure pipelines

This commit is contained in:
dre 2021-07-04 15:50:08 +08:00
parent adde673fa6
commit 72c5c9e85d
4 changed files with 58 additions and 35 deletions

View file

@ -38,7 +38,7 @@ Convert Markdown to Gemini gemtext[1] markup with Go. Working with streams and p
=> https://gemini.circumlunar.space/docs/gemtext.gmi 1: gemtext
See the gemini protocol[1] and the protocol spec[2].
See the gemini protocol[1] and the protocol spec[2].
=> https://gemini.circumlunar.space/ 1: gemini protocol
=> https://gemini.circumlunar.space/docs/specification.gmi 2: protocol spec

11
main.go
View file

@ -94,8 +94,15 @@ func main() {
source := InputStream(r)
sink := OutputStream(w)
preproc := NewPreproc()
proc := NewProc()
//sink.Input(preproc.Process(source.Output()))
sink.Input(proc.Process(preproc.Process(source.Output())))
sink.Input(
RemoveComments(
FormatHeadings(
FormatLinks(
preproc.Process(source.Output()),
),
),
),
)
}

View file

@ -81,7 +81,7 @@ func normal(m *fsm, data []byte) stateFn {
if needsFence(data) {
m.out <- []byte("```\n")
m.out <- append(data[4:], '\n')
m.pending = []byte("```")
m.pending = []byte("```\n")
return toFence
}
if data[len(data)-1] != '.' {
@ -105,7 +105,11 @@ func fence(m *fsm, data []byte) stateFn {
}
func toFence(m *fsm, data []byte) stateFn {
m.out <- append(data[4:], '\n')
if len(data) >= 3 {
m.out <- append(data[4:], '\n')
} else {
//m.out <- []byte("\n")
}
if needsFence(data) {
return toFence
}

72
proc.go
View file

@ -6,55 +6,67 @@ import (
"regexp"
)
// state machine
type proc struct {
out chan []byte
}
func NewProc() *proc {
return &proc{}
}
func (m *proc) Process(in chan []byte) chan []byte {
m.out = make(chan []byte)
func FormatLinks(in chan []byte) chan []byte {
out := make(chan []byte)
go func() {
for b := range in {
m.out <- m.process(b)
out <- formatLinks(b)
}
close(m.out)
close(out)
}()
return m.out
return out
}
func (m *proc) process(data []byte) []byte {
func formatLinks(data []byte) []byte {
// find link name and url
var buffer []byte
re := regexp.MustCompile(`!?\[([^\]*]*)\]\(([^)]*)\)`)
for i, match := range re.FindAllSubmatch(data, -1) {
replaceWithIndex := append(match[1], fmt.Sprintf("[%d]", i+1)...)
data = bytes.Replace(data, match[0], replaceWithIndex, 1)
// append entry to buffer to be added later
link := fmt.Sprintf("=> %s %d: %s\n", match[2], i+1, match[1])
buffer = append(buffer, link...)
}
// append links to that paragraph
if len(buffer) > 0 {
data = append(data, []byte("\n")...)
data = append(data, buffer...)
}
// remove comments
re2 := regexp.MustCompile(`<!--.*-->`)
data = re2.ReplaceAll(data, []byte{})
// collapse headings
re3 := regexp.MustCompile(`^[#]{4,}`)
data = re3.ReplaceAll(data, []byte("###"))
// heading without spacing
re4 := regexp.MustCompile(`^(#+)[^# ]`)
sub := re4.FindSubmatch(data)
if len(sub) > 0 {
data = bytes.Replace(data, sub[1], append(sub[1], []byte(" ")...), 1)
}
return data
}
func RemoveComments(in chan []byte) chan []byte {
out := make(chan []byte)
go func() {
re := regexp.MustCompile(`<!--.*-->`)
for b := range in {
out <- re.ReplaceAll(b, []byte{})
}
close(out)
}()
return out
}
func FormatHeadings(in chan []byte) chan []byte {
out := make(chan []byte)
go func() {
re := regexp.MustCompile(`^[#]{4,}`)
re2 := regexp.MustCompile(`^(#+)[^# ]`)
for b := range in {
// fix up more than 4 levels
b = re.ReplaceAll(b, []byte("###"))
// ensure we have a space
sub := re2.FindSubmatch(b)
if len(sub) > 0 {
b = bytes.Replace(b, sub[1], append(sub[1], []byte(" ")...), 1)
}
// writeback
out <- b
}
close(out)
}()
return out
}