Push/pop state, full json example

This commit is contained in:
Александр Кирюхин 2021-03-10 00:47:58 +03:00
parent 521e6da1f6
commit ff198abd8f
No known key found for this signature in database
GPG key ID: 8CDA417C9098753B
5 changed files with 125 additions and 60 deletions

View file

@ -3,4 +3,6 @@ Universal lexer for Golang
Based on Rob Pike's awesome video [Lexical Scanning in Go](https://www.youtube.com/watch?v=HxaD_trXwRE)
TODO: Write brief documentation. For now - watch video and [/examples](https://github.com/neonxp/unilex/tree/master/example) directory.
Examples: [/examples](https://github.com/neonxp/unilex/tree/master/example) directory.

View file

@ -15,7 +15,19 @@ func main() {
"key2": {
"key3" : "value 3"
},
"key4": 123.321
"key4": 123.321,
"key5": [
1,
2,
[
3,
4,
5,
{
"key6": "value6"
}
]
]
}`
l := unilex.New(testJson)
go l.Run(initJson)
@ -25,11 +37,14 @@ func main() {
}
const (
lObjectStart unilex.LexType = "lObjectStart"
lObjectEnd unilex.LexType = "lObjectEnd"
lObjectKey unilex.LexType = "lObjectKey"
lObjectValueString unilex.LexType = "lObjectValueString"
lObjectValueNumber unilex.LexType = "lObjectValueNumber"
lObjectStart unilex.LexType = iota
lObjectEnd
lObjectKey
lObjectValue
lArrayStart
lArrayEnd
lString
lNumber
)
func initJson(l *unilex.Lexer) unilex.StateFunc {
@ -37,32 +52,26 @@ func initJson(l *unilex.Lexer) unilex.StateFunc {
switch {
case l.Accept("{"):
l.Emit(lObjectStart)
return stateInObject(true)
return stateInObject
case l.Peek() == unilex.EOF:
return nil
}
return l.Errorf("Unknown token: %s", l.Peek())
return l.Errorf("Unknown token: %s", string(l.Peek()))
}
func stateInObject(initial bool) unilex.StateFunc {
return func(l *unilex.Lexer) unilex.StateFunc {
func stateInObject(l *unilex.Lexer) unilex.StateFunc {
// we in object, so we expect field keys and values
ignoreWhiteSpace(l)
if l.Accept("}") {
l.Emit(lObjectEnd)
if initial {
return initJson
// If meet close object return to previous state (including initial)
return l.PopState()
}
ignoreWhiteSpace(l)
l.Accept(",")
ignoreWhiteSpace(l)
return stateInObject(initial)
}
if l.Peek() == unilex.EOF {
return nil
}
if !unilex.ScanQuotedString(l, '"') {
return l.Errorf("Unknown token: %s", l.Peek())
return l.Errorf("Unknown token: %s", string(l.Peek()))
}
l.Emit(lObjectKey)
ignoreWhiteSpace(l)
@ -70,27 +79,56 @@ func stateInObject(initial bool) unilex.StateFunc {
return l.Errorf("Expected ':'")
}
ignoreWhiteSpace(l)
l.Emit(lObjectValue)
switch {
case unilex.ScanQuotedString(l, '"'):
l.Emit(lObjectValueString)
l.Emit(lString)
ignoreWhiteSpace(l)
l.Accept(",")
l.Ignore()
ignoreWhiteSpace(l)
return stateInObject(initial)
return stateInObject
case unilex.ScanNumber(l):
l.Emit(lObjectValueNumber)
l.Emit(lNumber)
ignoreWhiteSpace(l)
l.Accept(",")
l.Ignore()
ignoreWhiteSpace(l)
return stateInObject(initial)
return stateInObject
case l.Accept("{"):
l.Emit(lObjectStart)
return stateInObject(false)
l.PushState(stateInObject)
return stateInObject
case l.Accept("["):
l.Emit(lArrayStart)
l.PushState(stateInObject)
return stateInArray
}
return l.Errorf("Unknown token")
return l.Errorf("Unknown token: %s", string(l.Peek()))
}
func stateInArray(l *unilex.Lexer) unilex.StateFunc {
ignoreWhiteSpace(l)
l.Accept(",")
ignoreWhiteSpace(l)
switch {
case unilex.ScanQuotedString(l, '"'):
l.Emit(lString)
case unilex.ScanNumber(l):
l.Emit(lNumber)
case l.Accept("{"):
l.Emit(lObjectStart)
l.PushState(stateInArray)
return stateInObject
case l.Accept("["):
l.Emit(lArrayStart)
l.PushState(stateInArray)
return stateInArray
case l.Accept("]"):
l.Emit(lArrayEnd)
return l.PopState()
}
return stateInArray
}
func ignoreWhiteSpace(l *unilex.Lexer) {

View file

@ -9,12 +9,12 @@ type Lexem struct {
}
// LexType represents type of current lexem.
type LexType string
type LexType int
// Some std lexem types
const (
// LError represents lexing error.
LError LexType = "ERROR"
// LEOF represents end of input.
LEOF LexType = "EOF"
LexEOF LexType = -1
// LError represents lexing error.
LexError LexType = -2
)

View file

@ -16,6 +16,7 @@ type Lexer struct {
Pos int // Pos at input string.
Output chan Lexem // Lexems channel.
width int // Width of last rune.
states stateStack // Stack of states to realize PrevState.
}
// New returns new scanner for input string.
@ -37,6 +38,16 @@ func (l *Lexer) Run(init StateFunc) {
close(l.Output)
}
// PopState returns previous state function.
func (l *Lexer) PopState() StateFunc {
return l.states.Pop()
}
// PushState pushes state before going deeper states.
func (l *Lexer) PushState(s StateFunc) {
l.states.Push(s)
}
// Emit current lexem to output.
func (l *Lexer) Emit(typ LexType) {
l.Output <- Lexem{
@ -51,7 +62,7 @@ func (l *Lexer) Emit(typ LexType) {
// Errorf produces error lexem and stops scanning.
func (l *Lexer) Errorf(format string, args ...interface{}) StateFunc {
l.Output <- Lexem{
Type: LError,
Type: LexError,
Value: fmt.Sprintf(format, args...),
Start: l.Start,
End: l.Pos,

View file

@ -2,3 +2,17 @@ package unilex
// StateFunc represents function that scans lexems and returns new state function or nil if lexing completed.
type StateFunc func(*Lexer) StateFunc
type stateStack []StateFunc
func (ss *stateStack) Push(s StateFunc) {
*ss = append(*ss, s)
}
func (ss *stateStack) Pop() (s StateFunc) {
if len(*ss) == 0 {
return nil
}
*ss, s = (*ss)[:len(*ss)-1], (*ss)[len(*ss)-1]
return s
}