This commit is contained in:
Alexander Kiryukhin 2021-03-06 22:30:32 +03:00
commit 93740d2d15
No known key found for this signature in database
GPG key ID: 8CDA417C9098753B
9 changed files with 410 additions and 0 deletions

View file

@ -0,0 +1,54 @@
package main
import (
"fmt"
"github.com/neonxp/unilex"
)
var output []unilex.Lexem = []unilex.Lexem{}
var opPriority = map[string]int{
"^": 3,
"!": 3,
"*": 2,
"/": 2,
"+": 1,
"-": 1,
}
func main() {
l := unilex.New("10 * (20.0 + 30.0)")
go l.Run(lexExpression) // Start lexer
// Read infix expression lexems from lexer and convert them to RPN (reverse polish notation)
rpn := infixToRPNotation(l)
fmt.Println("RPN:", rpn)
// Calculate RPN
result := calculateRPN(rpn)
fmt.Println("Result:", result)
}
func lexExpression(l *unilex.Lexer) unilex.StateFunc {
l.AcceptWhile(" \t")
l.Ignore() // Ignore whitespaces
switch {
case l.Accept("("):
l.Emit("LP")
case l.Accept(")"):
l.Emit("RP")
case unilex.ScanNumber(l):
l.Emit("NUMBER")
case l.Accept("+-*/^!"):
l.Emit("OPERATOR")
case l.Peek() == unilex.EOF:
return nil
default:
return l.Errorf("Unexpected symbol")
}
return lexExpression
}

View file

@ -0,0 +1,84 @@
package main
// Helper functions to convert infix notation to RPN and calculates expression result.
import (
"fmt"
"log"
"strconv"
"github.com/neonxp/unilex"
)
func infixToRPNotation(l *unilex.Lexer) []unilex.Lexem {
output := []unilex.Lexem{}
stack := lexemStack{}
parseLoop:
for ll := range l.Output { // Read lexems from Lexer output channel, convert starts as soon as first lexems scanned!
fmt.Printf("Lexem: %+v\n", ll)
switch {
case ll.Type == "NUMBER", ll.Type == "OPERATOR" && ll.Value == "!":
output = append(output, ll)
case ll.Type == "LP":
stack.Push(ll)
case ll.Type == "RP":
for {
cl := stack.Pop()
if cl.Type == "LP" {
break
}
if cl.Type == unilex.LEOF {
log.Fatalf("No pair for parenthesis at %d", ll.Start)
}
output = append(output, cl)
}
case ll.Type == "OPERATOR":
for {
if stack.Head().Type == "OPERATOR" && (opPriority[stack.Head().Value] > opPriority[ll.Value]) {
output = append(output, stack.Pop())
continue
}
break
}
stack.Push(ll)
case ll.Type == unilex.LEOF:
break parseLoop
}
}
for stack.Head().Type != unilex.LEOF {
output = append(output, stack.Pop())
}
return output
}
func calculateRPN(rpnLexems []unilex.Lexem) string {
stack := lexemStack{}
for _, op := range rpnLexems {
if op.Type == "NUMBER" {
stack.Push(op)
} else {
switch op.Value {
case "+":
a1, _ := strconv.ParseFloat(stack.Pop().Value, 64)
a2, _ := strconv.ParseFloat(stack.Pop().Value, 64)
stack.Push(unilex.Lexem{Type: "NUMBER", Value: strconv.FormatFloat(a2+a1, 'f', -1, 64)})
case "-":
a1, _ := strconv.ParseFloat(stack.Pop().Value, 64)
a2, _ := strconv.ParseFloat(stack.Pop().Value, 64)
stack.Push(unilex.Lexem{Type: "NUMBER", Value: strconv.FormatFloat(a2-a1, 'f', -1, 64)})
case "*":
a1, _ := strconv.ParseFloat(stack.Pop().Value, 64)
a2, _ := strconv.ParseFloat(stack.Pop().Value, 64)
stack.Push(unilex.Lexem{Type: "NUMBER", Value: strconv.FormatFloat(a2*a1, 'f', -1, 64)})
case "/":
a1, _ := strconv.ParseFloat(stack.Pop().Value, 64)
a2, _ := strconv.ParseFloat(stack.Pop().Value, 64)
stack.Push(unilex.Lexem{Type: "NUMBER", Value: strconv.FormatFloat(a2/a1, 'f', -1, 64)})
}
}
}
return stack.Head().Value
}

View file

@ -0,0 +1,26 @@
package main
// Simple lexem stack implementation.
import "github.com/neonxp/unilex"
type lexemStack []unilex.Lexem
func (ls *lexemStack) Head() (l unilex.Lexem) {
if len(*ls) == 0 {
return unilex.Lexem{Type: unilex.LEOF}
}
return (*ls)[len(*ls)-1]
}
func (ls *lexemStack) Push(l unilex.Lexem) {
*ls = append(*ls, l)
}
func (ls *lexemStack) Pop() (l unilex.Lexem) {
if len(*ls) == 0 {
return unilex.Lexem{Type: unilex.LEOF}
}
*ls, l = (*ls)[:len(*ls)-1], (*ls)[len(*ls)-1]
return l
}

3
go.mod Normal file
View file

@ -0,0 +1,3 @@
module github.com/neonxp/unilex
go 1.16

20
lexem.go Normal file
View file

@ -0,0 +1,20 @@
package unilex
// Lexem represents part of parsed string.
type Lexem struct {
Type LexType // Type of Lexem.
Value string // Value of Lexem.
Start int // Start position at input string.
End int // End position at input string.
}
// LexType represents type of current lexem.
type LexType string
// Some std lexem types
const (
// LError represents lexing error.
LError LexType = "ERROR"
// LEOF represents end of input.
LEOF LexType = "EOF"
)

141
lexer.go Normal file
View file

@ -0,0 +1,141 @@
package unilex
import (
"fmt"
"strings"
"unicode/utf8"
)
// EOF const.
const EOF rune = -1
// Lexer holds current scanner state.
type Lexer struct {
Input string // Input string.
Start int // Start position of current lexem.
Pos int // Pos at input string.
Output chan Lexem // Lexems channel.
width int // Width of last rune.
}
// New returns new scanner for input string.
func New(input string) *Lexer {
return &Lexer{
Input: input,
Start: 0,
Pos: 0,
Output: make(chan Lexem, 2),
width: 0,
}
}
// Run lexing.
func (l *Lexer) Run(init StateFunc) {
for state := init; state != nil; {
state = state(l)
}
close(l.Output)
}
// Emit current lexem to output.
func (l *Lexer) Emit(typ LexType) {
l.Output <- Lexem{
Type: typ,
Value: l.Input[l.Start:l.Pos],
Start: l.Start,
End: l.Pos,
}
l.Start = l.Pos
}
// Errorf produces error lexem and stops scanning.
func (l *Lexer) Errorf(format string, args ...interface{}) StateFunc {
l.Output <- Lexem{
Type: LError,
Value: fmt.Sprintf(format, args...),
Start: l.Start,
End: l.Pos,
}
return nil
}
// Next rune from input.
func (l *Lexer) Next() (r rune) {
if int(l.Pos) >= len(l.Input) {
l.width = 0
return EOF
}
r, l.width = utf8.DecodeRuneInString(l.Input[l.Pos:])
l.Pos += l.width
return r
}
// Back move position to previos rune.
func (l *Lexer) Back() {
l.Pos -= l.width
}
// Ignore previosly buffered text.
func (l *Lexer) Ignore() {
l.Start = l.Pos
l.width = 0
}
// Peek rune at current position without moving position.
func (l *Lexer) Peek() (r rune) {
r = l.Next()
l.Back()
return r
}
// Accept any rune from valid string. Returns true if next rune was in valid string.
func (l *Lexer) Accept(valid string) bool {
if strings.ContainsRune(valid, l.Next()) {
return true
}
l.Back()
return false
}
// AcceptString returns true if given string was at position.
func (l *Lexer) AcceptString(s string, caseInsentive bool) bool {
input := l.Input
if caseInsentive {
input = strings.ToLower(input)
s = strings.ToLower(s)
}
if strings.HasPrefix(input, s) {
l.width = 0
l.Pos += len(s)
return true
}
return false
}
// AcceptAnyOf substrings. Retuns true if any of substrings was found.
func (l *Lexer) AcceptAnyOf(s []string, caseInsentive bool) bool {
for _, substring := range s {
if l.AcceptString(substring, caseInsentive) {
return true
}
}
return false
}
// AcceptWhile passing symbols from input while they at `valid` string.
func (l *Lexer) AcceptWhile(valid string) {
for l.Accept(valid) {
}
}
// AcceptWhileNot passing symbols from input while they NOT in `invalid` string.
func (l *Lexer) AcceptWhileNot(invalid string) {
for !strings.ContainsRune(invalid, l.Next()) {
}
l.Back()
}
// AtStart returns true if current lexem not empty
func (l *Lexer) AtStart() bool {
return l.Pos == l.Start
}

25
scanners.go Normal file
View file

@ -0,0 +1,25 @@
package unilex
// ScanNumber simplest scanner that accepts decimal int and float.
func ScanNumber(l *Lexer) bool {
l.AcceptWhile("0123456789")
if l.AtStart() {
// not found any digit
return false
}
l.Accept(".")
l.AcceptWhile("0123456789")
return !l.AtStart()
}
// ScanAlphaNum returns true if next input token contains alphanum sequence that not starts from digit and not contains.
// spaces or special characters.
func ScanAlphaNum(l *Lexer) bool {
digits := "0123456789"
alpha := "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM"
if !l.Accept(alpha) {
return false
}
l.AcceptWhile(alpha + digits)
return true
}

53
scanners_test.go Normal file
View file

@ -0,0 +1,53 @@
package unilex
import "testing"
func TestScanNumber(t *testing.T) {
testCases := []struct {
Input string
Expected bool
Pos int
}{
{"asd", false, 0},
{"asd123", false, 0},
{"123", true, 3},
{"123asd", true, 3},
{"123.321", true, 7},
}
for _, tc := range testCases {
l := New(tc.Input)
actual := ScanNumber(l)
if actual != tc.Expected {
t.Errorf("Input: %s expected scan result: %v actual: %v", tc.Input, tc.Expected, actual)
}
if l.Pos != tc.Pos {
t.Errorf("Input: %s expected scan position: %d actual: %d", tc.Input, tc.Pos, l.Pos)
}
}
}
func TestScanAlphaNum(t *testing.T) {
testCases := []struct {
Input string
Expected bool
Pos int
}{
{"asd", true, 3},
{"asd123", true, 6},
{"123", false, 0},
{"123asd", false, 0},
{"123.321", false, 0},
{"asd!dsa", true, 3},
{"asd dsa", true, 3},
}
for _, tc := range testCases {
l := New(tc.Input)
actual := ScanAlphaNum(l)
if actual != tc.Expected {
t.Errorf("Input: %s expected scan result: %v actual: %v", tc.Input, tc.Expected, actual)
}
if l.Pos != tc.Pos {
t.Errorf("Input: %s expected scan position: %d actual: %d", tc.Input, tc.Pos, l.Pos)
}
}
}

4
statefunc.go Normal file
View file

@ -0,0 +1,4 @@
package unilex
// StateFunc represents function that scans lexems and returns new state function or nil if lexing completed.
type StateFunc func(*Lexer) StateFunc