From 02c2abeb51c9ae7fa895366c594e7b33cc9fa495 Mon Sep 17 00:00:00 2001 From: dre Date: Mon, 5 Jul 2021 13:36:20 +0800 Subject: [PATCH] split out pipelines --- mdproc/comments.go | 28 ++++++++ mdproc/headings.go | 37 ++++++++++ mdproc/hugo.go | 28 ++++++++ mdproc/links.go | 57 +++++++++++++++ mdproc/preproc.go | 176 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 326 insertions(+) create mode 100644 mdproc/comments.go create mode 100644 mdproc/headings.go create mode 100644 mdproc/hugo.go create mode 100644 mdproc/links.go create mode 100644 mdproc/preproc.go diff --git a/mdproc/comments.go b/mdproc/comments.go new file mode 100644 index 0000000..b88af5a --- /dev/null +++ b/mdproc/comments.go @@ -0,0 +1,28 @@ +package mdproc + +import ( + "bytes" + "regexp" + + "github.com/n0x1m/md2gmi/pipe" +) + +func RemoveComments(in chan pipe.StreamItem) chan pipe.StreamItem { + out := make(chan pipe.StreamItem) + + go func() { + re := regexp.MustCompile(``) + + for b := range in { + data := b.Payload() + for _, match := range re.FindAllSubmatch(data, -1) { + data = bytes.Replace(data, match[0], []byte(""), 1) + } + out <- pipe.NewItem(b.Index(), append(bytes.TrimSpace(data), '\n')) + } + + close(out) + }() + + return out +} diff --git a/mdproc/headings.go b/mdproc/headings.go new file mode 100644 index 0000000..244f10e --- /dev/null +++ b/mdproc/headings.go @@ -0,0 +1,37 @@ +package mdproc + +import ( + "bytes" + "regexp" + + "github.com/n0x1m/md2gmi/pipe" +) + +func FormatHeadings(in chan pipe.StreamItem) chan pipe.StreamItem { + out := make(chan pipe.StreamItem) + + go func() { + re := regexp.MustCompile(`^[#]{4,}`) + re2 := regexp.MustCompile(`^(#+)[^# ]`) + + for b := range in { + // fix up more than 4 levels + data := re.ReplaceAll(b.Payload(), []byte("###")) + // ensure we have a space + sub := re2.FindSubmatch(data) + if len(sub) > 0 { + data = bytes.Replace(data, sub[1], append(sub[1], []byte(" ")...), 1) + } + // generally if we deal with a heading, add an extra blank line + if bytes.HasPrefix(data, []byte("#")) { + data = append(data, '\n') + } + // writeback + out <- pipe.NewItem(b.Index(), data) + } + + close(out) + }() + + return out +} diff --git a/mdproc/hugo.go b/mdproc/hugo.go new file mode 100644 index 0000000..520d427 --- /dev/null +++ b/mdproc/hugo.go @@ -0,0 +1,28 @@ +package mdproc + +import ( + "bytes" + "regexp" + + "github.com/n0x1m/md2gmi/pipe" +) + +func RemoveFrontMatter(in chan pipe.StreamItem) chan pipe.StreamItem { + out := make(chan pipe.StreamItem) + + go func() { + re := regexp.MustCompile(`---.*---`) + + for b := range in { + data := b.Payload() + for _, match := range re.FindAllSubmatch(data, -1) { + data = bytes.Replace(data, match[0], []byte(""), 1) + } + out <- pipe.NewItem(b.Index(), append(bytes.TrimSpace(data), '\n')) + } + + close(out) + }() + + return out +} diff --git a/mdproc/links.go b/mdproc/links.go new file mode 100644 index 0000000..6759a6a --- /dev/null +++ b/mdproc/links.go @@ -0,0 +1,57 @@ +package mdproc + +import ( + "bytes" + "fmt" + "regexp" + + "github.com/n0x1m/md2gmi/pipe" +) + +func FormatLinks(in chan pipe.StreamItem) chan pipe.StreamItem { + out := make(chan pipe.StreamItem) + + go func() { + fenceOn := false + + for b := range in { + data := b.Payload() + if isFence(data) { + fenceOn = !fenceOn + } + + if fenceOn { + out <- pipe.NewItem(b.Index(), b.Payload()) + + continue + } + out <- pipe.NewItem(b.Index(), formatLinks(b.Payload())) + } + + close(out) + }() + + return out +} + +func formatLinks(data []byte) []byte { + // find link name and url + var buffer []byte + + re := regexp.MustCompile(`!?\[([^\]*]*)\]\(([^)]*)\)`) + + for i, match := range re.FindAllSubmatch(data, -1) { + replaceWithIndex := append(match[1], fmt.Sprintf("[%d]", i+1)...) + data = bytes.Replace(data, match[0], replaceWithIndex, 1) + // append entry to buffer to be added later + link := fmt.Sprintf("=> %s %d: %s\n", match[2], i+1, match[1]) + buffer = append(buffer, link...) + } + // append links to that paragraph + if len(buffer) > 0 { + data = append(data, []byte("\n")...) + data = append(data, buffer...) + } + + return data +} diff --git a/mdproc/preproc.go b/mdproc/preproc.go new file mode 100644 index 0000000..4f0616c --- /dev/null +++ b/mdproc/preproc.go @@ -0,0 +1,176 @@ +package mdproc + +import ( + "bytes" + "regexp" + + "github.com/n0x1m/md2gmi/pipe" +) + +// state function. +type stateFn func(*fsm, []byte) stateFn + +// state machine. +type fsm struct { + state stateFn + + i int + out chan pipe.StreamItem + + // combining multiple input lines + blockBuffer []byte + sendBuffer []byte + // if we have a termination rule to abide, e.g. implied code fences + pending []byte +} + +func Preproc() pipe.Pipeline { + return (&fsm{}).pipeline +} + +func (m *fsm) pipeline(in chan pipe.StreamItem) chan pipe.StreamItem { + m.out = make(chan pipe.StreamItem) + + go func() { + for m.state = normal; m.state != nil; { + b, ok := <-in + if !ok { + m.blockFlush() + m.sync() + close(m.out) + m.state = nil + + continue + } + + m.state = m.state(m, b.Payload()) + m.sync() + } + }() + + return m.out +} + +func (m *fsm) sync() { + if len(m.sendBuffer) > 0 { + m.sendBuffer = append(m.sendBuffer, '\n') + m.out <- pipe.NewItem(m.i, m.sendBuffer) + m.sendBuffer = m.sendBuffer[:0] + m.i++ + } +} + +func (m *fsm) blockFlush() { + // blockBuffer to sendbuffer + m.sendBuffer = append(m.sendBuffer, m.blockBuffer...) + m.blockBuffer = m.blockBuffer[:0] + + if len(m.pending) > 0 { + m.sendBuffer = append(m.sendBuffer, m.pending...) + m.sendBuffer = append(m.sendBuffer, '\n') + m.pending = m.pending[:0] + } +} + +func triggerBreak(data []byte) bool { + return len(data) == 0 || data[len(data)-1] == '.' +} + +func isTerminated(data []byte) bool { + return len(data) > 0 && data[len(data)-1] != '.' +} + +func handleList(data []byte) ([]byte, bool) { + re := regexp.MustCompile(`^([ ]*[-*^]{1,1})[^*-]`) + sub := re.FindSubmatch(data) + // if lists, collapse to single level + if len(sub) > 1 { + return bytes.Replace(data, sub[1], []byte("-"), 1), true + } + + return data, false +} + +func isFence(data []byte) bool { + return len(data) >= 3 && string(data[0:3]) == "```" +} + +func needsFence(data []byte) bool { + return len(data) >= 4 && string(data[0:4]) == " " +} + +func normal(m *fsm, data []byte) stateFn { + if data, isList := handleList(data); isList { + m.blockBuffer = append(data, '\n') + m.blockFlush() + + return normal + } + + if isFence(data) { + m.blockBuffer = append(data, '\n') + + return fence + } + + if needsFence(data) { + m.blockBuffer = append(m.blockBuffer, []byte("```\n")...) + m.blockBuffer = append(m.blockBuffer, append(data[4:], '\n')...) + m.pending = []byte("```\n") + + return toFence + } + + if isTerminated(data) { + m.blockBuffer = append(m.blockBuffer, data...) + m.blockBuffer = append(m.blockBuffer, ' ') + + return paragraph + } + + m.blockBuffer = append(m.blockBuffer, append(data, '\n')...) + m.blockFlush() + + return normal +} + +func fence(m *fsm, data []byte) stateFn { + m.blockBuffer = append(m.blockBuffer, append(data, '\n')...) + // second fence returns to normal + if isFence(data) { + m.blockFlush() + + return normal + } + + return fence +} + +func toFence(m *fsm, data []byte) stateFn { + if needsFence(data) { + m.blockBuffer = append(m.blockBuffer, append(data[4:], '\n')...) + + return toFence + } + + m.blockFlush() + m.blockBuffer = append(m.blockBuffer, append(data, '\n')...) + + return normal +} + +func paragraph(m *fsm, data []byte) stateFn { + if triggerBreak(data) { + m.blockBuffer = append(m.blockBuffer, data...) + m.blockBuffer = bytes.TrimSpace(m.blockBuffer) + m.blockBuffer = append(m.blockBuffer, '\n') + m.blockFlush() + + return normal + } + + m.blockBuffer = append(m.blockBuffer, data...) + m.blockBuffer = append(m.blockBuffer, []byte(" ")...) + + return paragraph +}