Initial

2021-03-06 22:30:32 +03:00 · 2021-03-06 22:30:32 +03:00 · 93740d2d15
commit 93740d2d15
9 changed files with 410 additions and 0 deletions
--- a/example/math_expression/main.go
+++ b/example/math_expression/main.go
@ -0,0 +1,54 @@
+package main
+
+import (
+	"fmt"
+
+	"github.com/neonxp/unilex"
+)
+
+var output []unilex.Lexem = []unilex.Lexem{}
+var opPriority = map[string]int{
+	"^": 3,
+	"!": 3,
+	"*": 2,
+	"/": 2,
+	"+": 1,
+	"-": 1,
+}
+
+func main() {
+
+	l := unilex.New("10 * (20.0 + 30.0)")
+
+	go l.Run(lexExpression) // Start lexer
+
+	// Read infix expression lexems from lexer and convert them to RPN (reverse polish notation)
+	rpn := infixToRPNotation(l)
+	fmt.Println("RPN:", rpn)
+
+	// Calculate RPN
+	result := calculateRPN(rpn)
+	fmt.Println("Result:", result)
+}
+
+func lexExpression(l *unilex.Lexer) unilex.StateFunc {
+	l.AcceptWhile(" \t")
+	l.Ignore() // Ignore whitespaces
+
+	switch {
+	case l.Accept("("):
+		l.Emit("LP")
+	case l.Accept(")"):
+		l.Emit("RP")
+	case unilex.ScanNumber(l):
+		l.Emit("NUMBER")
+	case l.Accept("+-*/^!"):
+		l.Emit("OPERATOR")
+	case l.Peek() == unilex.EOF:
+		return nil
+	default:
+		return l.Errorf("Unexpected symbol")
+	}
+
+	return lexExpression
+}
--- a/example/math_expression/rpn.go
+++ b/example/math_expression/rpn.go
@ -0,0 +1,84 @@
+package main
+
+// Helper functions to convert infix notation to RPN and calculates expression result.
+
+import (
+	"fmt"
+	"log"
+	"strconv"
+
+	"github.com/neonxp/unilex"
+)
+
+func infixToRPNotation(l *unilex.Lexer) []unilex.Lexem {
+	output := []unilex.Lexem{}
+	stack := lexemStack{}
+parseLoop:
+	for ll := range l.Output { // Read lexems from Lexer output channel, convert starts as soon as first lexems scanned!
+		fmt.Printf("Lexem: %+v\n", ll)
+
+		switch {
+		case ll.Type == "NUMBER", ll.Type == "OPERATOR" && ll.Value == "!":
+			output = append(output, ll)
+		case ll.Type == "LP":
+			stack.Push(ll)
+		case ll.Type == "RP":
+			for {
+				cl := stack.Pop()
+				if cl.Type == "LP" {
+					break
+				}
+				if cl.Type == unilex.LEOF {
+					log.Fatalf("No pair for parenthesis at %d", ll.Start)
+				}
+				output = append(output, cl)
+			}
+		case ll.Type == "OPERATOR":
+			for {
+				if stack.Head().Type == "OPERATOR" && (opPriority[stack.Head().Value] > opPriority[ll.Value]) {
+					output = append(output, stack.Pop())
+					continue
+				}
+				break
+			}
+			stack.Push(ll)
+		case ll.Type == unilex.LEOF:
+			break parseLoop
+		}
+	}
+
+	for stack.Head().Type != unilex.LEOF {
+		output = append(output, stack.Pop())
+	}
+
+	return output
+}
+
+func calculateRPN(rpnLexems []unilex.Lexem) string {
+	stack := lexemStack{}
+	for _, op := range rpnLexems {
+		if op.Type == "NUMBER" {
+			stack.Push(op)
+		} else {
+			switch op.Value {
+			case "+":
+				a1, _ := strconv.ParseFloat(stack.Pop().Value, 64)
+				a2, _ := strconv.ParseFloat(stack.Pop().Value, 64)
+				stack.Push(unilex.Lexem{Type: "NUMBER", Value: strconv.FormatFloat(a2+a1, 'f', -1, 64)})
+			case "-":
+				a1, _ := strconv.ParseFloat(stack.Pop().Value, 64)
+				a2, _ := strconv.ParseFloat(stack.Pop().Value, 64)
+				stack.Push(unilex.Lexem{Type: "NUMBER", Value: strconv.FormatFloat(a2-a1, 'f', -1, 64)})
+			case "*":
+				a1, _ := strconv.ParseFloat(stack.Pop().Value, 64)
+				a2, _ := strconv.ParseFloat(stack.Pop().Value, 64)
+				stack.Push(unilex.Lexem{Type: "NUMBER", Value: strconv.FormatFloat(a2*a1, 'f', -1, 64)})
+			case "/":
+				a1, _ := strconv.ParseFloat(stack.Pop().Value, 64)
+				a2, _ := strconv.ParseFloat(stack.Pop().Value, 64)
+				stack.Push(unilex.Lexem{Type: "NUMBER", Value: strconv.FormatFloat(a2/a1, 'f', -1, 64)})
+			}
+		}
+	}
+	return stack.Head().Value
+}
--- a/example/math_expression/stack.go
+++ b/example/math_expression/stack.go
@ -0,0 +1,26 @@
+package main
+
+// Simple lexem stack implementation.
+
+import "github.com/neonxp/unilex"
+
+type lexemStack []unilex.Lexem
+
+func (ls *lexemStack) Head() (l unilex.Lexem) {
+	if len(*ls) == 0 {
+		return unilex.Lexem{Type: unilex.LEOF}
+	}
+	return (*ls)[len(*ls)-1]
+}
+
+func (ls *lexemStack) Push(l unilex.Lexem) {
+	*ls = append(*ls, l)
+}
+
+func (ls *lexemStack) Pop() (l unilex.Lexem) {
+	if len(*ls) == 0 {
+		return unilex.Lexem{Type: unilex.LEOF}
+	}
+	*ls, l = (*ls)[:len(*ls)-1], (*ls)[len(*ls)-1]
+	return l
+}
--- a/go.mod
+++ b/go.mod
@ -0,0 +1,3 @@
+module github.com/neonxp/unilex
+
+go 1.16
--- a/lexem.go
+++ b/lexem.go
@ -0,0 +1,20 @@
+package unilex
+
+// Lexem represents part of parsed string.
+type Lexem struct {
+	Type  LexType // Type of Lexem.
+	Value string  // Value of Lexem.
+	Start int     // Start position at input string.
+	End   int     // End position at input string.
+}
+
+// LexType represents type of current lexem.
+type LexType string
+
+// Some std lexem types
+const (
+	// LError represents lexing error.
+	LError LexType = "ERROR"
+	// LEOF represents end of input.
+	LEOF LexType = "EOF"
+)
--- a/lexer.go
+++ b/lexer.go
@ -0,0 +1,141 @@
+package unilex
+
+import (
+	"fmt"
+	"strings"
+	"unicode/utf8"
+)
+
+// EOF const.
+const EOF rune = -1
+
+// Lexer holds current scanner state.
+type Lexer struct {
+	Input  string     // Input string.
+	Start  int        // Start position of current lexem.
+	Pos    int        // Pos at input string.
+	Output chan Lexem // Lexems channel.
+	width  int        // Width of last rune.
+}
+
+// New returns new scanner for input string.
+func New(input string) *Lexer {
+	return &Lexer{
+		Input:  input,
+		Start:  0,
+		Pos:    0,
+		Output: make(chan Lexem, 2),
+		width:  0,
+	}
+}
+
+// Run lexing.
+func (l *Lexer) Run(init StateFunc) {
+	for state := init; state != nil; {
+		state = state(l)
+	}
+	close(l.Output)
+}
+
+// Emit current lexem to output.
+func (l *Lexer) Emit(typ LexType) {
+	l.Output <- Lexem{
+		Type:  typ,
+		Value: l.Input[l.Start:l.Pos],
+		Start: l.Start,
+		End:   l.Pos,
+	}
+	l.Start = l.Pos
+}
+
+// Errorf produces error lexem and stops scanning.
+func (l *Lexer) Errorf(format string, args ...interface{}) StateFunc {
+	l.Output <- Lexem{
+		Type:  LError,
+		Value: fmt.Sprintf(format, args...),
+		Start: l.Start,
+		End:   l.Pos,
+	}
+	return nil
+}
+
+// Next rune from input.
+func (l *Lexer) Next() (r rune) {
+	if int(l.Pos) >= len(l.Input) {
+		l.width = 0
+		return EOF
+	}
+	r, l.width = utf8.DecodeRuneInString(l.Input[l.Pos:])
+	l.Pos += l.width
+	return r
+}
+
+// Back move position to previos rune.
+func (l *Lexer) Back() {
+	l.Pos -= l.width
+}
+
+// Ignore previosly buffered text.
+func (l *Lexer) Ignore() {
+	l.Start = l.Pos
+	l.width = 0
+}
+
+// Peek rune at current position without moving position.
+func (l *Lexer) Peek() (r rune) {
+	r = l.Next()
+	l.Back()
+	return r
+}
+
+// Accept any rune from valid string. Returns true if next rune was in valid string.
+func (l *Lexer) Accept(valid string) bool {
+	if strings.ContainsRune(valid, l.Next()) {
+		return true
+	}
+	l.Back()
+	return false
+}
+
+// AcceptString returns true if given string was at position.
+func (l *Lexer) AcceptString(s string, caseInsentive bool) bool {
+	input := l.Input
+	if caseInsentive {
+		input = strings.ToLower(input)
+		s = strings.ToLower(s)
+	}
+	if strings.HasPrefix(input, s) {
+		l.width = 0
+		l.Pos += len(s)
+		return true
+	}
+	return false
+}
+
+// AcceptAnyOf substrings. Retuns true if any of substrings was found.
+func (l *Lexer) AcceptAnyOf(s []string, caseInsentive bool) bool {
+	for _, substring := range s {
+		if l.AcceptString(substring, caseInsentive) {
+			return true
+		}
+	}
+	return false
+}
+
+// AcceptWhile passing symbols from input while they at `valid` string.
+func (l *Lexer) AcceptWhile(valid string) {
+	for l.Accept(valid) {
+	}
+}
+
+// AcceptWhileNot passing symbols from input while they NOT in `invalid` string.
+func (l *Lexer) AcceptWhileNot(invalid string) {
+	for !strings.ContainsRune(invalid, l.Next()) {
+	}
+	l.Back()
+}
+
+// AtStart returns true if current lexem not empty
+func (l *Lexer) AtStart() bool {
+	return l.Pos == l.Start
+}
--- a/scanners.go
+++ b/scanners.go
@ -0,0 +1,25 @@
+package unilex
+
+// ScanNumber simplest scanner that accepts decimal int and float.
+func ScanNumber(l *Lexer) bool {
+	l.AcceptWhile("0123456789")
+	if l.AtStart() {
+		// not found any digit
+		return false
+	}
+	l.Accept(".")
+	l.AcceptWhile("0123456789")
+	return !l.AtStart()
+}
+
+// ScanAlphaNum returns true if next input token contains alphanum sequence that not starts from digit and not contains.
+// spaces or special characters.
+func ScanAlphaNum(l *Lexer) bool {
+	digits := "0123456789"
+	alpha := "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM"
+	if !l.Accept(alpha) {
+		return false
+	}
+	l.AcceptWhile(alpha + digits)
+	return true
+}
--- a/scanners_test.go
+++ b/scanners_test.go
@ -0,0 +1,53 @@
+package unilex
+
+import "testing"
+
+func TestScanNumber(t *testing.T) {
+	testCases := []struct {
+		Input    string
+		Expected bool
+		Pos      int
+	}{
+		{"asd", false, 0},
+		{"asd123", false, 0},
+		{"123", true, 3},
+		{"123asd", true, 3},
+		{"123.321", true, 7},
+	}
+	for _, tc := range testCases {
+		l := New(tc.Input)
+		actual := ScanNumber(l)
+		if actual != tc.Expected {
+			t.Errorf("Input: %s expected scan result: %v actual: %v", tc.Input, tc.Expected, actual)
+		}
+		if l.Pos != tc.Pos {
+			t.Errorf("Input: %s expected scan position: %d actual: %d", tc.Input, tc.Pos, l.Pos)
+		}
+	}
+}
+
+func TestScanAlphaNum(t *testing.T) {
+	testCases := []struct {
+		Input    string
+		Expected bool
+		Pos      int
+	}{
+		{"asd", true, 3},
+		{"asd123", true, 6},
+		{"123", false, 0},
+		{"123asd", false, 0},
+		{"123.321", false, 0},
+		{"asd!dsa", true, 3},
+		{"asd dsa", true, 3},
+	}
+	for _, tc := range testCases {
+		l := New(tc.Input)
+		actual := ScanAlphaNum(l)
+		if actual != tc.Expected {
+			t.Errorf("Input: %s expected scan result: %v actual: %v", tc.Input, tc.Expected, actual)
+		}
+		if l.Pos != tc.Pos {
+			t.Errorf("Input: %s expected scan position: %d actual: %d", tc.Input, tc.Pos, l.Pos)
+		}
+	}
+}
--- a/statefunc.go
+++ b/statefunc.go
@ -0,0 +1,4 @@
+package unilex
+
+// StateFunc represents function that scans lexems and returns new state function or nil if lexing completed.
+type StateFunc func(*Lexer) StateFunc