2021-07-05 08:36:20 +03:00
|
|
|
package mdproc
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"regexp"
|
|
|
|
|
|
|
|
"github.com/n0x1m/md2gmi/pipe"
|
|
|
|
)
|
|
|
|
|
|
|
|
// state function.
|
|
|
|
type stateFn func(*fsm, []byte) stateFn
|
|
|
|
|
|
|
|
// state machine.
|
|
|
|
type fsm struct {
|
|
|
|
state stateFn
|
|
|
|
|
|
|
|
i int
|
|
|
|
out chan pipe.StreamItem
|
|
|
|
|
|
|
|
// combining multiple input lines
|
2021-07-11 09:52:51 +03:00
|
|
|
multiLineBlockMode int
|
2021-07-11 09:12:56 +03:00
|
|
|
blockBuffer []byte
|
|
|
|
sendBuffer []byte
|
2021-07-05 08:36:20 +03:00
|
|
|
// if we have a termination rule to abide, e.g. implied code fences
|
|
|
|
pending []byte
|
|
|
|
}
|
|
|
|
|
2021-07-11 08:48:46 +03:00
|
|
|
func Preprocessor() pipe.Pipeline {
|
2021-07-05 08:36:20 +03:00
|
|
|
return (&fsm{}).pipeline
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *fsm) pipeline(in chan pipe.StreamItem) chan pipe.StreamItem {
|
|
|
|
m.out = make(chan pipe.StreamItem)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
for m.state = normal; m.state != nil; {
|
|
|
|
b, ok := <-in
|
|
|
|
if !ok {
|
|
|
|
m.blockFlush()
|
|
|
|
m.sync()
|
|
|
|
close(m.out)
|
|
|
|
m.state = nil
|
|
|
|
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2021-07-11 09:12:56 +03:00
|
|
|
m.state = m.state(wrap(m, b.Payload()))
|
2021-07-05 08:36:20 +03:00
|
|
|
m.sync()
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
return m.out
|
|
|
|
}
|
|
|
|
|
2021-07-11 09:12:56 +03:00
|
|
|
func wrap(m *fsm, data []byte) (*fsm, []byte) {
|
2021-07-11 09:52:51 +03:00
|
|
|
var scount, ecount int
|
|
|
|
if scount = countStart(data, "<!--"); scount > 0 {
|
|
|
|
m.multiLineBlockMode += scount
|
2021-07-11 09:12:56 +03:00
|
|
|
}
|
2021-07-11 12:12:57 +03:00
|
|
|
|
2021-07-11 09:52:51 +03:00
|
|
|
if ecount = countEnd(data, "-->"); ecount > 0 {
|
|
|
|
m.multiLineBlockMode -= ecount
|
|
|
|
}
|
|
|
|
|
2021-07-11 10:58:46 +03:00
|
|
|
// clip entire line if no control sequences present
|
|
|
|
if (m.multiLineBlockMode > 0 && scount == 0 && ecount == 0) || m.multiLineBlockMode > 1 {
|
2021-07-11 09:52:51 +03:00
|
|
|
data = data[:0]
|
|
|
|
return m, data
|
|
|
|
}
|
|
|
|
|
|
|
|
// clip data past first occurrence
|
|
|
|
if scount > 0 {
|
|
|
|
data = data[:bytes.Index(data, []byte("<!--"))]
|
|
|
|
}
|
|
|
|
|
|
|
|
// clip data past last occurrence
|
|
|
|
if ecount = countEnd(data, "-->"); ecount > 0 {
|
|
|
|
data = data[bytes.LastIndex(data, []byte("-->"))+3:]
|
2021-07-11 09:12:56 +03:00
|
|
|
}
|
2021-07-11 12:12:57 +03:00
|
|
|
|
2021-07-11 09:12:56 +03:00
|
|
|
return m, data
|
|
|
|
}
|
|
|
|
|
2021-07-11 09:52:51 +03:00
|
|
|
func countStart(data []byte, pattern string) int {
|
|
|
|
return bytes.Count(data, []byte(pattern))
|
|
|
|
}
|
|
|
|
|
|
|
|
func countEnd(data []byte, pattern string) int {
|
|
|
|
return bytes.Count(data, []byte(pattern))
|
|
|
|
}
|
|
|
|
|
2021-07-05 08:36:20 +03:00
|
|
|
func (m *fsm) sync() {
|
|
|
|
if len(m.sendBuffer) > 0 {
|
|
|
|
m.sendBuffer = append(m.sendBuffer, '\n')
|
|
|
|
m.out <- pipe.NewItem(m.i, m.sendBuffer)
|
|
|
|
m.sendBuffer = m.sendBuffer[:0]
|
|
|
|
m.i++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-11 09:12:56 +03:00
|
|
|
func (m *fsm) softBlockFlush() {
|
2021-07-11 09:52:51 +03:00
|
|
|
if m.multiLineBlockMode > 0 {
|
2021-07-11 09:12:56 +03:00
|
|
|
return
|
|
|
|
}
|
2021-07-11 12:12:57 +03:00
|
|
|
|
2021-07-11 09:12:56 +03:00
|
|
|
m.blockFlush()
|
|
|
|
}
|
|
|
|
|
2021-07-05 08:36:20 +03:00
|
|
|
func (m *fsm) blockFlush() {
|
|
|
|
// blockBuffer to sendbuffer
|
|
|
|
m.sendBuffer = append(m.sendBuffer, m.blockBuffer...)
|
|
|
|
m.blockBuffer = m.blockBuffer[:0]
|
|
|
|
|
2021-07-05 19:42:21 +03:00
|
|
|
// pending to sendbuffer too
|
2021-07-05 08:36:20 +03:00
|
|
|
if len(m.pending) > 0 {
|
|
|
|
m.sendBuffer = append(m.sendBuffer, m.pending...)
|
|
|
|
m.pending = m.pending[:0]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func isTerminated(data []byte) bool {
|
|
|
|
return len(data) > 0 && data[len(data)-1] != '.'
|
|
|
|
}
|
|
|
|
|
2021-07-11 08:48:46 +03:00
|
|
|
func triggerBreak(data []byte) bool {
|
|
|
|
if len(data) == 0 || len(data) == 1 && data[0] == '\n' {
|
|
|
|
return true
|
|
|
|
}
|
2021-07-11 12:12:57 +03:00
|
|
|
|
2021-07-11 08:48:46 +03:00
|
|
|
switch data[len(data)-1] {
|
|
|
|
case '.':
|
|
|
|
fallthrough
|
|
|
|
case ';':
|
|
|
|
fallthrough
|
|
|
|
case ':':
|
|
|
|
return true
|
|
|
|
}
|
2021-07-11 12:12:57 +03:00
|
|
|
|
2021-07-11 08:48:46 +03:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2021-07-05 08:36:20 +03:00
|
|
|
func handleList(data []byte) ([]byte, bool) {
|
2021-07-11 08:48:46 +03:00
|
|
|
// match italic, bold
|
|
|
|
nolist := regexp.MustCompile(`[\*_](.*)[\*_]`)
|
|
|
|
nosub := nolist.FindSubmatch(data)
|
|
|
|
// match lists
|
|
|
|
list := regexp.MustCompile(`^([ \t]*[-*^]{1,1})[^*-]`)
|
|
|
|
sub := list.FindSubmatch(data)
|
2021-07-05 08:36:20 +03:00
|
|
|
// if lists, collapse to single level
|
2021-07-11 08:48:46 +03:00
|
|
|
if len(sub) > 1 && len(nosub) <= 1 {
|
2021-07-05 08:36:20 +03:00
|
|
|
return bytes.Replace(data, sub[1], []byte("-"), 1), true
|
|
|
|
}
|
|
|
|
|
|
|
|
return data, false
|
|
|
|
}
|
|
|
|
|
2021-07-11 10:58:46 +03:00
|
|
|
func hasFence(data []byte) bool {
|
|
|
|
return bytes.Contains(data, []byte("```"))
|
2021-07-05 08:36:20 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func needsFence(data []byte) bool {
|
|
|
|
return len(data) >= 4 && string(data[0:4]) == " "
|
|
|
|
}
|
|
|
|
|
2021-07-06 13:39:27 +03:00
|
|
|
func normalText(m *fsm, data []byte) stateFn {
|
2021-07-05 20:01:29 +03:00
|
|
|
if len(bytes.TrimSpace(data)) == 0 {
|
2021-07-05 17:43:46 +03:00
|
|
|
return normal
|
|
|
|
}
|
2021-07-05 20:06:41 +03:00
|
|
|
|
2021-07-05 08:36:20 +03:00
|
|
|
if data, isList := handleList(data); isList {
|
2021-07-05 20:01:29 +03:00
|
|
|
m.blockBuffer = append(m.blockBuffer, data...)
|
2021-07-11 09:12:56 +03:00
|
|
|
m.softBlockFlush()
|
2021-07-05 08:36:20 +03:00
|
|
|
|
2021-07-05 20:01:29 +03:00
|
|
|
return list
|
2021-07-05 08:36:20 +03:00
|
|
|
}
|
|
|
|
|
2021-07-11 10:58:46 +03:00
|
|
|
if hasFence(data) {
|
2021-07-05 08:36:20 +03:00
|
|
|
m.blockBuffer = append(data, '\n')
|
|
|
|
|
|
|
|
return fence
|
|
|
|
}
|
|
|
|
|
|
|
|
if needsFence(data) {
|
|
|
|
m.blockBuffer = append(m.blockBuffer, []byte("```\n")...)
|
|
|
|
m.blockBuffer = append(m.blockBuffer, append(data[4:], '\n')...)
|
|
|
|
m.pending = []byte("```\n")
|
|
|
|
|
|
|
|
return toFence
|
|
|
|
}
|
|
|
|
|
|
|
|
if isTerminated(data) {
|
|
|
|
m.blockBuffer = append(m.blockBuffer, data...)
|
|
|
|
m.blockBuffer = append(m.blockBuffer, ' ')
|
|
|
|
|
|
|
|
return paragraph
|
|
|
|
}
|
|
|
|
|
|
|
|
m.blockBuffer = append(m.blockBuffer, append(data, '\n')...)
|
2021-07-11 09:12:56 +03:00
|
|
|
m.softBlockFlush()
|
2021-07-11 12:12:57 +03:00
|
|
|
|
2021-07-05 08:36:20 +03:00
|
|
|
return normal
|
|
|
|
}
|
|
|
|
|
2021-07-06 13:39:27 +03:00
|
|
|
func normal(m *fsm, data []byte) stateFn {
|
|
|
|
return normalText(m, data)
|
|
|
|
}
|
|
|
|
|
2021-07-05 20:01:29 +03:00
|
|
|
func list(m *fsm, data []byte) stateFn {
|
|
|
|
if data, isList := handleList(data); isList {
|
2021-07-06 13:39:27 +03:00
|
|
|
data = append(data, '\n')
|
2021-07-05 20:01:29 +03:00
|
|
|
m.blockBuffer = append(m.blockBuffer, data...)
|
|
|
|
|
|
|
|
return list
|
|
|
|
}
|
|
|
|
|
2021-07-11 09:12:56 +03:00
|
|
|
m.softBlockFlush()
|
2021-07-05 20:01:29 +03:00
|
|
|
|
2021-07-06 13:39:27 +03:00
|
|
|
return normalText(m, data)
|
2021-07-05 20:01:29 +03:00
|
|
|
}
|
|
|
|
|
2021-07-05 08:36:20 +03:00
|
|
|
func fence(m *fsm, data []byte) stateFn {
|
|
|
|
m.blockBuffer = append(m.blockBuffer, append(data, '\n')...)
|
|
|
|
// second fence returns to normal
|
2021-07-11 10:58:46 +03:00
|
|
|
if hasFence(data) {
|
2021-07-11 09:12:56 +03:00
|
|
|
m.softBlockFlush()
|
2021-07-05 08:36:20 +03:00
|
|
|
|
|
|
|
return normal
|
|
|
|
}
|
|
|
|
|
|
|
|
return fence
|
|
|
|
}
|
|
|
|
|
|
|
|
func toFence(m *fsm, data []byte) stateFn {
|
|
|
|
if needsFence(data) {
|
2021-07-06 13:45:15 +03:00
|
|
|
data = append(data, '\n')
|
|
|
|
m.blockBuffer = append(m.blockBuffer, data[4:]...)
|
2021-07-05 08:36:20 +03:00
|
|
|
|
|
|
|
return toFence
|
|
|
|
}
|
|
|
|
|
2021-07-11 09:12:56 +03:00
|
|
|
m.softBlockFlush()
|
2021-07-05 08:36:20 +03:00
|
|
|
|
2021-07-06 13:39:27 +03:00
|
|
|
return normalText(m, data)
|
2021-07-05 08:36:20 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func paragraph(m *fsm, data []byte) stateFn {
|
|
|
|
if triggerBreak(data) {
|
|
|
|
m.blockBuffer = append(m.blockBuffer, data...)
|
|
|
|
m.blockBuffer = bytes.TrimSpace(m.blockBuffer)
|
2021-07-05 19:42:21 +03:00
|
|
|
// TODO, remove double spaces inside paragraphs
|
2021-07-05 08:36:20 +03:00
|
|
|
m.blockBuffer = append(m.blockBuffer, '\n')
|
2021-07-11 09:12:56 +03:00
|
|
|
m.softBlockFlush()
|
2021-07-05 08:36:20 +03:00
|
|
|
|
|
|
|
return normal
|
|
|
|
}
|
|
|
|
|
|
|
|
m.blockBuffer = append(m.blockBuffer, data...)
|
|
|
|
m.blockBuffer = append(m.blockBuffer, []byte(" ")...)
|
|
|
|
|
|
|
|
return paragraph
|
|
|
|
}
|