lexpr/lex.go

182 lines
3.5 KiB
Go
Raw Normal View History

2022-06-13 04:31:31 +03:00
package lexpr
import (
"context"
"strings"
"unicode/utf8"
)
// EOF const.
const EOF rune = -1
// lex holds current scanner state.
type lex struct {
input string // Input string.
start int // Start position of current lexem.
pos int // Pos at input string.
output chan lexem // Lexems channel.
width int // Width of last rune.
}
// newLex returns new scanner for input string.
func newLex() *lex {
return &lex{
input: "",
start: 0,
pos: 0,
output: nil,
width: 0,
}
}
// parse input to lexems.
func (l *lex) parse(ctx context.Context, input string) <-chan lexem {
l.input = input
l.output = make(chan lexem)
go func() {
defer close(l.output)
for {
if ctx.Err() != nil {
return
}
switch {
case l.acceptWhile(" \n\t", false):
l.ignore()
case l.accept("("):
l.emit(lp)
case l.accept(")"):
l.emit(rp)
case l.accept(","):
l.emit(sep)
case scanNumber(l):
l.emit(number)
case scanOps(l):
l.emit(op)
case scanWord(l):
l.emit(word)
case scanQuotedString(l, `"`):
l.emit(str)
case l.peek() == EOF:
return
default:
l.emit(tokError)
return
}
}
}()
return l.output
}
// emit current lexem to output.
func (l *lex) emit(typ lexType) {
l.output <- lexem{
Type: typ,
Value: l.input[l.start:l.pos],
Start: l.start,
End: l.pos,
}
l.start = l.pos
}
// next rune from input.
func (l *lex) next() (r rune) {
if int(l.pos) >= len(l.input) {
l.width = 0
return EOF
}
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
return r
}
// back move position to previos rune.
func (l *lex) back() {
l.pos -= l.width
}
// ignore previosly buffered text.
func (l *lex) ignore() {
l.start = l.pos
l.width = 0
}
// peek rune at current position without moving position.
func (l *lex) peek() (r rune) {
r = l.next()
l.back()
return r
}
// accept any rune from valid string. Returns true if next rune was in valid string.
func (l *lex) accept(valid string) bool {
if strings.ContainsRune(valid, l.next()) {
return true
}
l.back()
return false
}
// acceptString returns true if given string was at position.
func (l *lex) acceptString(s string, caseInsentive bool) bool {
input := l.input
if caseInsentive {
input = strings.ToLower(input)
s = strings.ToLower(s)
}
if strings.HasPrefix(input, s) {
l.width = 0
l.pos += len(s)
return true
}
return false
}
// acceptAnyOf substrings. Retuns true if any of substrings was found.
func (l *lex) acceptAnyOf(s []string, caseInsentive bool) bool {
for _, substring := range s {
if l.acceptString(substring, caseInsentive) {
return true
}
}
return false
}
// acceptWhile passing symbols from input while they at `valid` string.
func (l *lex) acceptWhile(valid string, ignoreEscaped bool) bool {
start := l.pos
for {
ch := l.next()
switch {
case ch == EOF:
return false
case ch == '\\' && ignoreEscaped:
l.next()
case !strings.ContainsRune(valid, ch):
l.back()
return l.pos > start
}
}
}
// acceptWhileNot passing symbols from input while they NOT in `invalid` string.
func (l *lex) acceptWhileNot(invalid string, ignoreEscaped bool) bool {
start := l.pos
for {
ch := l.next()
switch {
case ch == EOF:
return false
case ch == '\\' && ignoreEscaped:
l.next()
case strings.ContainsRune(invalid, ch):
l.back()
return l.pos > start
}
}
}
// atStart returns true if current lexem not empty
func (l *lex) atStart() bool {
return l.pos == l.start
}