Initial
This commit is contained in:
commit
93740d2d15
9 changed files with 410 additions and 0 deletions
54
example/math_expression/main.go
Normal file
54
example/math_expression/main.go
Normal file
|
@ -0,0 +1,54 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/neonxp/unilex"
|
||||
)
|
||||
|
||||
var output []unilex.Lexem = []unilex.Lexem{}
|
||||
var opPriority = map[string]int{
|
||||
"^": 3,
|
||||
"!": 3,
|
||||
"*": 2,
|
||||
"/": 2,
|
||||
"+": 1,
|
||||
"-": 1,
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
||||
l := unilex.New("10 * (20.0 + 30.0)")
|
||||
|
||||
go l.Run(lexExpression) // Start lexer
|
||||
|
||||
// Read infix expression lexems from lexer and convert them to RPN (reverse polish notation)
|
||||
rpn := infixToRPNotation(l)
|
||||
fmt.Println("RPN:", rpn)
|
||||
|
||||
// Calculate RPN
|
||||
result := calculateRPN(rpn)
|
||||
fmt.Println("Result:", result)
|
||||
}
|
||||
|
||||
func lexExpression(l *unilex.Lexer) unilex.StateFunc {
|
||||
l.AcceptWhile(" \t")
|
||||
l.Ignore() // Ignore whitespaces
|
||||
|
||||
switch {
|
||||
case l.Accept("("):
|
||||
l.Emit("LP")
|
||||
case l.Accept(")"):
|
||||
l.Emit("RP")
|
||||
case unilex.ScanNumber(l):
|
||||
l.Emit("NUMBER")
|
||||
case l.Accept("+-*/^!"):
|
||||
l.Emit("OPERATOR")
|
||||
case l.Peek() == unilex.EOF:
|
||||
return nil
|
||||
default:
|
||||
return l.Errorf("Unexpected symbol")
|
||||
}
|
||||
|
||||
return lexExpression
|
||||
}
|
84
example/math_expression/rpn.go
Normal file
84
example/math_expression/rpn.go
Normal file
|
@ -0,0 +1,84 @@
|
|||
package main
|
||||
|
||||
// Helper functions to convert infix notation to RPN and calculates expression result.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"strconv"
|
||||
|
||||
"github.com/neonxp/unilex"
|
||||
)
|
||||
|
||||
func infixToRPNotation(l *unilex.Lexer) []unilex.Lexem {
|
||||
output := []unilex.Lexem{}
|
||||
stack := lexemStack{}
|
||||
parseLoop:
|
||||
for ll := range l.Output { // Read lexems from Lexer output channel, convert starts as soon as first lexems scanned!
|
||||
fmt.Printf("Lexem: %+v\n", ll)
|
||||
|
||||
switch {
|
||||
case ll.Type == "NUMBER", ll.Type == "OPERATOR" && ll.Value == "!":
|
||||
output = append(output, ll)
|
||||
case ll.Type == "LP":
|
||||
stack.Push(ll)
|
||||
case ll.Type == "RP":
|
||||
for {
|
||||
cl := stack.Pop()
|
||||
if cl.Type == "LP" {
|
||||
break
|
||||
}
|
||||
if cl.Type == unilex.LEOF {
|
||||
log.Fatalf("No pair for parenthesis at %d", ll.Start)
|
||||
}
|
||||
output = append(output, cl)
|
||||
}
|
||||
case ll.Type == "OPERATOR":
|
||||
for {
|
||||
if stack.Head().Type == "OPERATOR" && (opPriority[stack.Head().Value] > opPriority[ll.Value]) {
|
||||
output = append(output, stack.Pop())
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
stack.Push(ll)
|
||||
case ll.Type == unilex.LEOF:
|
||||
break parseLoop
|
||||
}
|
||||
}
|
||||
|
||||
for stack.Head().Type != unilex.LEOF {
|
||||
output = append(output, stack.Pop())
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
func calculateRPN(rpnLexems []unilex.Lexem) string {
|
||||
stack := lexemStack{}
|
||||
for _, op := range rpnLexems {
|
||||
if op.Type == "NUMBER" {
|
||||
stack.Push(op)
|
||||
} else {
|
||||
switch op.Value {
|
||||
case "+":
|
||||
a1, _ := strconv.ParseFloat(stack.Pop().Value, 64)
|
||||
a2, _ := strconv.ParseFloat(stack.Pop().Value, 64)
|
||||
stack.Push(unilex.Lexem{Type: "NUMBER", Value: strconv.FormatFloat(a2+a1, 'f', -1, 64)})
|
||||
case "-":
|
||||
a1, _ := strconv.ParseFloat(stack.Pop().Value, 64)
|
||||
a2, _ := strconv.ParseFloat(stack.Pop().Value, 64)
|
||||
stack.Push(unilex.Lexem{Type: "NUMBER", Value: strconv.FormatFloat(a2-a1, 'f', -1, 64)})
|
||||
case "*":
|
||||
a1, _ := strconv.ParseFloat(stack.Pop().Value, 64)
|
||||
a2, _ := strconv.ParseFloat(stack.Pop().Value, 64)
|
||||
stack.Push(unilex.Lexem{Type: "NUMBER", Value: strconv.FormatFloat(a2*a1, 'f', -1, 64)})
|
||||
case "/":
|
||||
a1, _ := strconv.ParseFloat(stack.Pop().Value, 64)
|
||||
a2, _ := strconv.ParseFloat(stack.Pop().Value, 64)
|
||||
stack.Push(unilex.Lexem{Type: "NUMBER", Value: strconv.FormatFloat(a2/a1, 'f', -1, 64)})
|
||||
}
|
||||
}
|
||||
}
|
||||
return stack.Head().Value
|
||||
}
|
26
example/math_expression/stack.go
Normal file
26
example/math_expression/stack.go
Normal file
|
@ -0,0 +1,26 @@
|
|||
package main
|
||||
|
||||
// Simple lexem stack implementation.
|
||||
|
||||
import "github.com/neonxp/unilex"
|
||||
|
||||
type lexemStack []unilex.Lexem
|
||||
|
||||
func (ls *lexemStack) Head() (l unilex.Lexem) {
|
||||
if len(*ls) == 0 {
|
||||
return unilex.Lexem{Type: unilex.LEOF}
|
||||
}
|
||||
return (*ls)[len(*ls)-1]
|
||||
}
|
||||
|
||||
func (ls *lexemStack) Push(l unilex.Lexem) {
|
||||
*ls = append(*ls, l)
|
||||
}
|
||||
|
||||
func (ls *lexemStack) Pop() (l unilex.Lexem) {
|
||||
if len(*ls) == 0 {
|
||||
return unilex.Lexem{Type: unilex.LEOF}
|
||||
}
|
||||
*ls, l = (*ls)[:len(*ls)-1], (*ls)[len(*ls)-1]
|
||||
return l
|
||||
}
|
3
go.mod
Normal file
3
go.mod
Normal file
|
@ -0,0 +1,3 @@
|
|||
module github.com/neonxp/unilex
|
||||
|
||||
go 1.16
|
20
lexem.go
Normal file
20
lexem.go
Normal file
|
@ -0,0 +1,20 @@
|
|||
package unilex
|
||||
|
||||
// Lexem represents part of parsed string.
|
||||
type Lexem struct {
|
||||
Type LexType // Type of Lexem.
|
||||
Value string // Value of Lexem.
|
||||
Start int // Start position at input string.
|
||||
End int // End position at input string.
|
||||
}
|
||||
|
||||
// LexType represents type of current lexem.
|
||||
type LexType string
|
||||
|
||||
// Some std lexem types
|
||||
const (
|
||||
// LError represents lexing error.
|
||||
LError LexType = "ERROR"
|
||||
// LEOF represents end of input.
|
||||
LEOF LexType = "EOF"
|
||||
)
|
141
lexer.go
Normal file
141
lexer.go
Normal file
|
@ -0,0 +1,141 @@
|
|||
package unilex
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// EOF const.
|
||||
const EOF rune = -1
|
||||
|
||||
// Lexer holds current scanner state.
|
||||
type Lexer struct {
|
||||
Input string // Input string.
|
||||
Start int // Start position of current lexem.
|
||||
Pos int // Pos at input string.
|
||||
Output chan Lexem // Lexems channel.
|
||||
width int // Width of last rune.
|
||||
}
|
||||
|
||||
// New returns new scanner for input string.
|
||||
func New(input string) *Lexer {
|
||||
return &Lexer{
|
||||
Input: input,
|
||||
Start: 0,
|
||||
Pos: 0,
|
||||
Output: make(chan Lexem, 2),
|
||||
width: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// Run lexing.
|
||||
func (l *Lexer) Run(init StateFunc) {
|
||||
for state := init; state != nil; {
|
||||
state = state(l)
|
||||
}
|
||||
close(l.Output)
|
||||
}
|
||||
|
||||
// Emit current lexem to output.
|
||||
func (l *Lexer) Emit(typ LexType) {
|
||||
l.Output <- Lexem{
|
||||
Type: typ,
|
||||
Value: l.Input[l.Start:l.Pos],
|
||||
Start: l.Start,
|
||||
End: l.Pos,
|
||||
}
|
||||
l.Start = l.Pos
|
||||
}
|
||||
|
||||
// Errorf produces error lexem and stops scanning.
|
||||
func (l *Lexer) Errorf(format string, args ...interface{}) StateFunc {
|
||||
l.Output <- Lexem{
|
||||
Type: LError,
|
||||
Value: fmt.Sprintf(format, args...),
|
||||
Start: l.Start,
|
||||
End: l.Pos,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Next rune from input.
|
||||
func (l *Lexer) Next() (r rune) {
|
||||
if int(l.Pos) >= len(l.Input) {
|
||||
l.width = 0
|
||||
return EOF
|
||||
}
|
||||
r, l.width = utf8.DecodeRuneInString(l.Input[l.Pos:])
|
||||
l.Pos += l.width
|
||||
return r
|
||||
}
|
||||
|
||||
// Back move position to previos rune.
|
||||
func (l *Lexer) Back() {
|
||||
l.Pos -= l.width
|
||||
}
|
||||
|
||||
// Ignore previosly buffered text.
|
||||
func (l *Lexer) Ignore() {
|
||||
l.Start = l.Pos
|
||||
l.width = 0
|
||||
}
|
||||
|
||||
// Peek rune at current position without moving position.
|
||||
func (l *Lexer) Peek() (r rune) {
|
||||
r = l.Next()
|
||||
l.Back()
|
||||
return r
|
||||
}
|
||||
|
||||
// Accept any rune from valid string. Returns true if next rune was in valid string.
|
||||
func (l *Lexer) Accept(valid string) bool {
|
||||
if strings.ContainsRune(valid, l.Next()) {
|
||||
return true
|
||||
}
|
||||
l.Back()
|
||||
return false
|
||||
}
|
||||
|
||||
// AcceptString returns true if given string was at position.
|
||||
func (l *Lexer) AcceptString(s string, caseInsentive bool) bool {
|
||||
input := l.Input
|
||||
if caseInsentive {
|
||||
input = strings.ToLower(input)
|
||||
s = strings.ToLower(s)
|
||||
}
|
||||
if strings.HasPrefix(input, s) {
|
||||
l.width = 0
|
||||
l.Pos += len(s)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// AcceptAnyOf substrings. Retuns true if any of substrings was found.
|
||||
func (l *Lexer) AcceptAnyOf(s []string, caseInsentive bool) bool {
|
||||
for _, substring := range s {
|
||||
if l.AcceptString(substring, caseInsentive) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// AcceptWhile passing symbols from input while they at `valid` string.
|
||||
func (l *Lexer) AcceptWhile(valid string) {
|
||||
for l.Accept(valid) {
|
||||
}
|
||||
}
|
||||
|
||||
// AcceptWhileNot passing symbols from input while they NOT in `invalid` string.
|
||||
func (l *Lexer) AcceptWhileNot(invalid string) {
|
||||
for !strings.ContainsRune(invalid, l.Next()) {
|
||||
}
|
||||
l.Back()
|
||||
}
|
||||
|
||||
// AtStart returns true if current lexem not empty
|
||||
func (l *Lexer) AtStart() bool {
|
||||
return l.Pos == l.Start
|
||||
}
|
25
scanners.go
Normal file
25
scanners.go
Normal file
|
@ -0,0 +1,25 @@
|
|||
package unilex
|
||||
|
||||
// ScanNumber simplest scanner that accepts decimal int and float.
|
||||
func ScanNumber(l *Lexer) bool {
|
||||
l.AcceptWhile("0123456789")
|
||||
if l.AtStart() {
|
||||
// not found any digit
|
||||
return false
|
||||
}
|
||||
l.Accept(".")
|
||||
l.AcceptWhile("0123456789")
|
||||
return !l.AtStart()
|
||||
}
|
||||
|
||||
// ScanAlphaNum returns true if next input token contains alphanum sequence that not starts from digit and not contains.
|
||||
// spaces or special characters.
|
||||
func ScanAlphaNum(l *Lexer) bool {
|
||||
digits := "0123456789"
|
||||
alpha := "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM"
|
||||
if !l.Accept(alpha) {
|
||||
return false
|
||||
}
|
||||
l.AcceptWhile(alpha + digits)
|
||||
return true
|
||||
}
|
53
scanners_test.go
Normal file
53
scanners_test.go
Normal file
|
@ -0,0 +1,53 @@
|
|||
package unilex
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestScanNumber(t *testing.T) {
|
||||
testCases := []struct {
|
||||
Input string
|
||||
Expected bool
|
||||
Pos int
|
||||
}{
|
||||
{"asd", false, 0},
|
||||
{"asd123", false, 0},
|
||||
{"123", true, 3},
|
||||
{"123asd", true, 3},
|
||||
{"123.321", true, 7},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
l := New(tc.Input)
|
||||
actual := ScanNumber(l)
|
||||
if actual != tc.Expected {
|
||||
t.Errorf("Input: %s expected scan result: %v actual: %v", tc.Input, tc.Expected, actual)
|
||||
}
|
||||
if l.Pos != tc.Pos {
|
||||
t.Errorf("Input: %s expected scan position: %d actual: %d", tc.Input, tc.Pos, l.Pos)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestScanAlphaNum(t *testing.T) {
|
||||
testCases := []struct {
|
||||
Input string
|
||||
Expected bool
|
||||
Pos int
|
||||
}{
|
||||
{"asd", true, 3},
|
||||
{"asd123", true, 6},
|
||||
{"123", false, 0},
|
||||
{"123asd", false, 0},
|
||||
{"123.321", false, 0},
|
||||
{"asd!dsa", true, 3},
|
||||
{"asd dsa", true, 3},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
l := New(tc.Input)
|
||||
actual := ScanAlphaNum(l)
|
||||
if actual != tc.Expected {
|
||||
t.Errorf("Input: %s expected scan result: %v actual: %v", tc.Input, tc.Expected, actual)
|
||||
}
|
||||
if l.Pos != tc.Pos {
|
||||
t.Errorf("Input: %s expected scan position: %d actual: %d", tc.Input, tc.Pos, l.Pos)
|
||||
}
|
||||
}
|
||||
}
|
4
statefunc.go
Normal file
4
statefunc.go
Normal file
|
@ -0,0 +1,4 @@
|
|||
package unilex
|
||||
|
||||
// StateFunc represents function that scans lexems and returns new state function or nil if lexing completed.
|
||||
type StateFunc func(*Lexer) StateFunc
|
Loading…
Reference in a new issue