Push/pop state, full json example

This commit is contained in:
Alexander Kiryukhin 2021-03-10 00:47:58 +03:00
parent 521e6da1f6
commit ff198abd8f
No known key found for this signature in database
GPG key ID: 8CDA417C9098753B
5 changed files with 125 additions and 60 deletions

View file

@ -3,4 +3,6 @@ Universal lexer for Golang
Based on Rob Pike's awesome video [Lexical Scanning in Go](https://www.youtube.com/watch?v=HxaD_trXwRE) Based on Rob Pike's awesome video [Lexical Scanning in Go](https://www.youtube.com/watch?v=HxaD_trXwRE)
TODO: Write brief documentation. For now - watch video and [/examples](https://github.com/neonxp/unilex/tree/master/example) directory. Examples: [/examples](https://github.com/neonxp/unilex/tree/master/example) directory.

View file

@ -15,7 +15,19 @@ func main() {
"key2": { "key2": {
"key3" : "value 3" "key3" : "value 3"
}, },
"key4": 123.321 "key4": 123.321,
"key5": [
1,
2,
[
3,
4,
5,
{
"key6": "value6"
}
]
]
}` }`
l := unilex.New(testJson) l := unilex.New(testJson)
go l.Run(initJson) go l.Run(initJson)
@ -25,11 +37,14 @@ func main() {
} }
const ( const (
lObjectStart unilex.LexType = "lObjectStart" lObjectStart unilex.LexType = iota
lObjectEnd unilex.LexType = "lObjectEnd" lObjectEnd
lObjectKey unilex.LexType = "lObjectKey" lObjectKey
lObjectValueString unilex.LexType = "lObjectValueString" lObjectValue
lObjectValueNumber unilex.LexType = "lObjectValueNumber" lArrayStart
lArrayEnd
lString
lNumber
) )
func initJson(l *unilex.Lexer) unilex.StateFunc { func initJson(l *unilex.Lexer) unilex.StateFunc {
@ -37,60 +52,83 @@ func initJson(l *unilex.Lexer) unilex.StateFunc {
switch { switch {
case l.Accept("{"): case l.Accept("{"):
l.Emit(lObjectStart) l.Emit(lObjectStart)
return stateInObject(true) return stateInObject
case l.Peek() == unilex.EOF: case l.Peek() == unilex.EOF:
return nil return nil
} }
return l.Errorf("Unknown token: %s", l.Peek()) return l.Errorf("Unknown token: %s", string(l.Peek()))
} }
func stateInObject(initial bool) unilex.StateFunc { func stateInObject(l *unilex.Lexer) unilex.StateFunc {
return func(l *unilex.Lexer) unilex.StateFunc { // we in object, so we expect field keys and values
// we in object, so we expect field keys and values ignoreWhiteSpace(l)
ignoreWhiteSpace(l) if l.Accept("}") {
if l.Accept("}") { l.Emit(lObjectEnd)
l.Emit(lObjectEnd) // If meet close object return to previous state (including initial)
if initial { return l.PopState()
return initJson
}
ignoreWhiteSpace(l)
l.Accept(",")
ignoreWhiteSpace(l)
return stateInObject(initial)
}
if l.Peek() == unilex.EOF {
return nil
}
if !unilex.ScanQuotedString(l, '"') {
return l.Errorf("Unknown token: %s", l.Peek())
}
l.Emit(lObjectKey)
ignoreWhiteSpace(l)
if !l.Accept(":") {
return l.Errorf("Expected ':'")
}
ignoreWhiteSpace(l)
switch {
case unilex.ScanQuotedString(l, '"'):
l.Emit(lObjectValueString)
ignoreWhiteSpace(l)
l.Accept(",")
l.Ignore()
ignoreWhiteSpace(l)
return stateInObject(initial)
case unilex.ScanNumber(l):
l.Emit(lObjectValueNumber)
ignoreWhiteSpace(l)
l.Accept(",")
l.Ignore()
ignoreWhiteSpace(l)
return stateInObject(initial)
case l.Accept("{"):
l.Emit(lObjectStart)
return stateInObject(false)
}
return l.Errorf("Unknown token")
} }
ignoreWhiteSpace(l)
l.Accept(",")
ignoreWhiteSpace(l)
if !unilex.ScanQuotedString(l, '"') {
return l.Errorf("Unknown token: %s", string(l.Peek()))
}
l.Emit(lObjectKey)
ignoreWhiteSpace(l)
if !l.Accept(":") {
return l.Errorf("Expected ':'")
}
ignoreWhiteSpace(l)
l.Emit(lObjectValue)
switch {
case unilex.ScanQuotedString(l, '"'):
l.Emit(lString)
ignoreWhiteSpace(l)
l.Accept(",")
l.Ignore()
ignoreWhiteSpace(l)
return stateInObject
case unilex.ScanNumber(l):
l.Emit(lNumber)
ignoreWhiteSpace(l)
l.Accept(",")
l.Ignore()
ignoreWhiteSpace(l)
return stateInObject
case l.Accept("{"):
l.Emit(lObjectStart)
l.PushState(stateInObject)
return stateInObject
case l.Accept("["):
l.Emit(lArrayStart)
l.PushState(stateInObject)
return stateInArray
}
return l.Errorf("Unknown token: %s", string(l.Peek()))
}
func stateInArray(l *unilex.Lexer) unilex.StateFunc {
ignoreWhiteSpace(l)
l.Accept(",")
ignoreWhiteSpace(l)
switch {
case unilex.ScanQuotedString(l, '"'):
l.Emit(lString)
case unilex.ScanNumber(l):
l.Emit(lNumber)
case l.Accept("{"):
l.Emit(lObjectStart)
l.PushState(stateInArray)
return stateInObject
case l.Accept("["):
l.Emit(lArrayStart)
l.PushState(stateInArray)
return stateInArray
case l.Accept("]"):
l.Emit(lArrayEnd)
return l.PopState()
}
return stateInArray
} }
func ignoreWhiteSpace(l *unilex.Lexer) { func ignoreWhiteSpace(l *unilex.Lexer) {

View file

@ -9,12 +9,12 @@ type Lexem struct {
} }
// LexType represents type of current lexem. // LexType represents type of current lexem.
type LexType string type LexType int
// Some std lexem types // Some std lexem types
const ( const (
// LError represents lexing error.
LError LexType = "ERROR"
// LEOF represents end of input. // LEOF represents end of input.
LEOF LexType = "EOF" LexEOF LexType = -1
// LError represents lexing error.
LexError LexType = -2
) )

View file

@ -16,6 +16,7 @@ type Lexer struct {
Pos int // Pos at input string. Pos int // Pos at input string.
Output chan Lexem // Lexems channel. Output chan Lexem // Lexems channel.
width int // Width of last rune. width int // Width of last rune.
states stateStack // Stack of states to realize PrevState.
} }
// New returns new scanner for input string. // New returns new scanner for input string.
@ -37,6 +38,16 @@ func (l *Lexer) Run(init StateFunc) {
close(l.Output) close(l.Output)
} }
// PopState returns previous state function.
func (l *Lexer) PopState() StateFunc {
return l.states.Pop()
}
// PushState pushes state before going deeper states.
func (l *Lexer) PushState(s StateFunc) {
l.states.Push(s)
}
// Emit current lexem to output. // Emit current lexem to output.
func (l *Lexer) Emit(typ LexType) { func (l *Lexer) Emit(typ LexType) {
l.Output <- Lexem{ l.Output <- Lexem{
@ -51,7 +62,7 @@ func (l *Lexer) Emit(typ LexType) {
// Errorf produces error lexem and stops scanning. // Errorf produces error lexem and stops scanning.
func (l *Lexer) Errorf(format string, args ...interface{}) StateFunc { func (l *Lexer) Errorf(format string, args ...interface{}) StateFunc {
l.Output <- Lexem{ l.Output <- Lexem{
Type: LError, Type: LexError,
Value: fmt.Sprintf(format, args...), Value: fmt.Sprintf(format, args...),
Start: l.Start, Start: l.Start,
End: l.Pos, End: l.Pos,

View file

@ -2,3 +2,17 @@ package unilex
// StateFunc represents function that scans lexems and returns new state function or nil if lexing completed. // StateFunc represents function that scans lexems and returns new state function or nil if lexing completed.
type StateFunc func(*Lexer) StateFunc type StateFunc func(*Lexer) StateFunc
type stateStack []StateFunc
func (ss *stateStack) Push(s StateFunc) {
*ss = append(*ss, s)
}
func (ss *stateStack) Pop() (s StateFunc) {
if len(*ss) == 0 {
return nil
}
*ss, s = (*ss)[:len(*ss)-1], (*ss)[len(*ss)-1]
return s
}