split out pipelines

This commit is contained in:
dre 2021-07-05 13:36:20 +08:00
parent d873f4a565
commit 02c2abeb51
5 changed files with 326 additions and 0 deletions

28
mdproc/comments.go Normal file
View file

@ -0,0 +1,28 @@
package mdproc
import (
"bytes"
"regexp"
"github.com/n0x1m/md2gmi/pipe"
)
func RemoveComments(in chan pipe.StreamItem) chan pipe.StreamItem {
out := make(chan pipe.StreamItem)
go func() {
re := regexp.MustCompile(`<!--.*-->`)
for b := range in {
data := b.Payload()
for _, match := range re.FindAllSubmatch(data, -1) {
data = bytes.Replace(data, match[0], []byte(""), 1)
}
out <- pipe.NewItem(b.Index(), append(bytes.TrimSpace(data), '\n'))
}
close(out)
}()
return out
}

37
mdproc/headings.go Normal file
View file

@ -0,0 +1,37 @@
package mdproc
import (
"bytes"
"regexp"
"github.com/n0x1m/md2gmi/pipe"
)
func FormatHeadings(in chan pipe.StreamItem) chan pipe.StreamItem {
out := make(chan pipe.StreamItem)
go func() {
re := regexp.MustCompile(`^[#]{4,}`)
re2 := regexp.MustCompile(`^(#+)[^# ]`)
for b := range in {
// fix up more than 4 levels
data := re.ReplaceAll(b.Payload(), []byte("###"))
// ensure we have a space
sub := re2.FindSubmatch(data)
if len(sub) > 0 {
data = bytes.Replace(data, sub[1], append(sub[1], []byte(" ")...), 1)
}
// generally if we deal with a heading, add an extra blank line
if bytes.HasPrefix(data, []byte("#")) {
data = append(data, '\n')
}
// writeback
out <- pipe.NewItem(b.Index(), data)
}
close(out)
}()
return out
}

28
mdproc/hugo.go Normal file
View file

@ -0,0 +1,28 @@
package mdproc
import (
"bytes"
"regexp"
"github.com/n0x1m/md2gmi/pipe"
)
func RemoveFrontMatter(in chan pipe.StreamItem) chan pipe.StreamItem {
out := make(chan pipe.StreamItem)
go func() {
re := regexp.MustCompile(`---.*---`)
for b := range in {
data := b.Payload()
for _, match := range re.FindAllSubmatch(data, -1) {
data = bytes.Replace(data, match[0], []byte(""), 1)
}
out <- pipe.NewItem(b.Index(), append(bytes.TrimSpace(data), '\n'))
}
close(out)
}()
return out
}

57
mdproc/links.go Normal file
View file

@ -0,0 +1,57 @@
package mdproc
import (
"bytes"
"fmt"
"regexp"
"github.com/n0x1m/md2gmi/pipe"
)
func FormatLinks(in chan pipe.StreamItem) chan pipe.StreamItem {
out := make(chan pipe.StreamItem)
go func() {
fenceOn := false
for b := range in {
data := b.Payload()
if isFence(data) {
fenceOn = !fenceOn
}
if fenceOn {
out <- pipe.NewItem(b.Index(), b.Payload())
continue
}
out <- pipe.NewItem(b.Index(), formatLinks(b.Payload()))
}
close(out)
}()
return out
}
func formatLinks(data []byte) []byte {
// find link name and url
var buffer []byte
re := regexp.MustCompile(`!?\[([^\]*]*)\]\(([^)]*)\)`)
for i, match := range re.FindAllSubmatch(data, -1) {
replaceWithIndex := append(match[1], fmt.Sprintf("[%d]", i+1)...)
data = bytes.Replace(data, match[0], replaceWithIndex, 1)
// append entry to buffer to be added later
link := fmt.Sprintf("=> %s %d: %s\n", match[2], i+1, match[1])
buffer = append(buffer, link...)
}
// append links to that paragraph
if len(buffer) > 0 {
data = append(data, []byte("\n")...)
data = append(data, buffer...)
}
return data
}

176
mdproc/preproc.go Normal file
View file

@ -0,0 +1,176 @@
package mdproc
import (
"bytes"
"regexp"
"github.com/n0x1m/md2gmi/pipe"
)
// state function.
type stateFn func(*fsm, []byte) stateFn
// state machine.
type fsm struct {
state stateFn
i int
out chan pipe.StreamItem
// combining multiple input lines
blockBuffer []byte
sendBuffer []byte
// if we have a termination rule to abide, e.g. implied code fences
pending []byte
}
func Preproc() pipe.Pipeline {
return (&fsm{}).pipeline
}
func (m *fsm) pipeline(in chan pipe.StreamItem) chan pipe.StreamItem {
m.out = make(chan pipe.StreamItem)
go func() {
for m.state = normal; m.state != nil; {
b, ok := <-in
if !ok {
m.blockFlush()
m.sync()
close(m.out)
m.state = nil
continue
}
m.state = m.state(m, b.Payload())
m.sync()
}
}()
return m.out
}
func (m *fsm) sync() {
if len(m.sendBuffer) > 0 {
m.sendBuffer = append(m.sendBuffer, '\n')
m.out <- pipe.NewItem(m.i, m.sendBuffer)
m.sendBuffer = m.sendBuffer[:0]
m.i++
}
}
func (m *fsm) blockFlush() {
// blockBuffer to sendbuffer
m.sendBuffer = append(m.sendBuffer, m.blockBuffer...)
m.blockBuffer = m.blockBuffer[:0]
if len(m.pending) > 0 {
m.sendBuffer = append(m.sendBuffer, m.pending...)
m.sendBuffer = append(m.sendBuffer, '\n')
m.pending = m.pending[:0]
}
}
func triggerBreak(data []byte) bool {
return len(data) == 0 || data[len(data)-1] == '.'
}
func isTerminated(data []byte) bool {
return len(data) > 0 && data[len(data)-1] != '.'
}
func handleList(data []byte) ([]byte, bool) {
re := regexp.MustCompile(`^([ ]*[-*^]{1,1})[^*-]`)
sub := re.FindSubmatch(data)
// if lists, collapse to single level
if len(sub) > 1 {
return bytes.Replace(data, sub[1], []byte("-"), 1), true
}
return data, false
}
func isFence(data []byte) bool {
return len(data) >= 3 && string(data[0:3]) == "```"
}
func needsFence(data []byte) bool {
return len(data) >= 4 && string(data[0:4]) == " "
}
func normal(m *fsm, data []byte) stateFn {
if data, isList := handleList(data); isList {
m.blockBuffer = append(data, '\n')
m.blockFlush()
return normal
}
if isFence(data) {
m.blockBuffer = append(data, '\n')
return fence
}
if needsFence(data) {
m.blockBuffer = append(m.blockBuffer, []byte("```\n")...)
m.blockBuffer = append(m.blockBuffer, append(data[4:], '\n')...)
m.pending = []byte("```\n")
return toFence
}
if isTerminated(data) {
m.blockBuffer = append(m.blockBuffer, data...)
m.blockBuffer = append(m.blockBuffer, ' ')
return paragraph
}
m.blockBuffer = append(m.blockBuffer, append(data, '\n')...)
m.blockFlush()
return normal
}
func fence(m *fsm, data []byte) stateFn {
m.blockBuffer = append(m.blockBuffer, append(data, '\n')...)
// second fence returns to normal
if isFence(data) {
m.blockFlush()
return normal
}
return fence
}
func toFence(m *fsm, data []byte) stateFn {
if needsFence(data) {
m.blockBuffer = append(m.blockBuffer, append(data[4:], '\n')...)
return toFence
}
m.blockFlush()
m.blockBuffer = append(m.blockBuffer, append(data, '\n')...)
return normal
}
func paragraph(m *fsm, data []byte) stateFn {
if triggerBreak(data) {
m.blockBuffer = append(m.blockBuffer, data...)
m.blockBuffer = bytes.TrimSpace(m.blockBuffer)
m.blockBuffer = append(m.blockBuffer, '\n')
m.blockFlush()
return normal
}
m.blockBuffer = append(m.blockBuffer, data...)
m.blockBuffer = append(m.blockBuffer, []byte(" ")...)
return paragraph
}