Push/pop state, full json example
This commit is contained in:
parent
521e6da1f6
commit
ff198abd8f
5 changed files with 125 additions and 60 deletions
|
@ -3,4 +3,6 @@ Universal lexer for Golang
|
|||
|
||||
Based on Rob Pike's awesome video [Lexical Scanning in Go](https://www.youtube.com/watch?v=HxaD_trXwRE)
|
||||
|
||||
TODO: Write brief documentation. For now - watch video and [/examples](https://github.com/neonxp/unilex/tree/master/example) directory.
|
||||
Examples: [/examples](https://github.com/neonxp/unilex/tree/master/example) directory.
|
||||
|
||||
|
||||
|
|
|
@ -15,7 +15,19 @@ func main() {
|
|||
"key2": {
|
||||
"key3" : "value 3"
|
||||
},
|
||||
"key4": 123.321
|
||||
"key4": 123.321,
|
||||
"key5": [
|
||||
1,
|
||||
2,
|
||||
[
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
{
|
||||
"key6": "value6"
|
||||
}
|
||||
]
|
||||
]
|
||||
}`
|
||||
l := unilex.New(testJson)
|
||||
go l.Run(initJson)
|
||||
|
@ -25,11 +37,14 @@ func main() {
|
|||
}
|
||||
|
||||
const (
|
||||
lObjectStart unilex.LexType = "lObjectStart"
|
||||
lObjectEnd unilex.LexType = "lObjectEnd"
|
||||
lObjectKey unilex.LexType = "lObjectKey"
|
||||
lObjectValueString unilex.LexType = "lObjectValueString"
|
||||
lObjectValueNumber unilex.LexType = "lObjectValueNumber"
|
||||
lObjectStart unilex.LexType = iota
|
||||
lObjectEnd
|
||||
lObjectKey
|
||||
lObjectValue
|
||||
lArrayStart
|
||||
lArrayEnd
|
||||
lString
|
||||
lNumber
|
||||
)
|
||||
|
||||
func initJson(l *unilex.Lexer) unilex.StateFunc {
|
||||
|
@ -37,60 +52,83 @@ func initJson(l *unilex.Lexer) unilex.StateFunc {
|
|||
switch {
|
||||
case l.Accept("{"):
|
||||
l.Emit(lObjectStart)
|
||||
return stateInObject(true)
|
||||
return stateInObject
|
||||
case l.Peek() == unilex.EOF:
|
||||
return nil
|
||||
}
|
||||
return l.Errorf("Unknown token: %s", l.Peek())
|
||||
return l.Errorf("Unknown token: %s", string(l.Peek()))
|
||||
}
|
||||
|
||||
func stateInObject(initial bool) unilex.StateFunc {
|
||||
return func(l *unilex.Lexer) unilex.StateFunc {
|
||||
// we in object, so we expect field keys and values
|
||||
ignoreWhiteSpace(l)
|
||||
if l.Accept("}") {
|
||||
l.Emit(lObjectEnd)
|
||||
if initial {
|
||||
return initJson
|
||||
}
|
||||
ignoreWhiteSpace(l)
|
||||
l.Accept(",")
|
||||
ignoreWhiteSpace(l)
|
||||
return stateInObject(initial)
|
||||
}
|
||||
if l.Peek() == unilex.EOF {
|
||||
return nil
|
||||
}
|
||||
if !unilex.ScanQuotedString(l, '"') {
|
||||
return l.Errorf("Unknown token: %s", l.Peek())
|
||||
}
|
||||
l.Emit(lObjectKey)
|
||||
ignoreWhiteSpace(l)
|
||||
if !l.Accept(":") {
|
||||
return l.Errorf("Expected ':'")
|
||||
}
|
||||
ignoreWhiteSpace(l)
|
||||
switch {
|
||||
case unilex.ScanQuotedString(l, '"'):
|
||||
l.Emit(lObjectValueString)
|
||||
ignoreWhiteSpace(l)
|
||||
l.Accept(",")
|
||||
l.Ignore()
|
||||
ignoreWhiteSpace(l)
|
||||
return stateInObject(initial)
|
||||
case unilex.ScanNumber(l):
|
||||
l.Emit(lObjectValueNumber)
|
||||
ignoreWhiteSpace(l)
|
||||
l.Accept(",")
|
||||
l.Ignore()
|
||||
ignoreWhiteSpace(l)
|
||||
return stateInObject(initial)
|
||||
case l.Accept("{"):
|
||||
l.Emit(lObjectStart)
|
||||
return stateInObject(false)
|
||||
}
|
||||
return l.Errorf("Unknown token")
|
||||
func stateInObject(l *unilex.Lexer) unilex.StateFunc {
|
||||
// we in object, so we expect field keys and values
|
||||
ignoreWhiteSpace(l)
|
||||
if l.Accept("}") {
|
||||
l.Emit(lObjectEnd)
|
||||
// If meet close object return to previous state (including initial)
|
||||
return l.PopState()
|
||||
}
|
||||
ignoreWhiteSpace(l)
|
||||
l.Accept(",")
|
||||
ignoreWhiteSpace(l)
|
||||
if !unilex.ScanQuotedString(l, '"') {
|
||||
return l.Errorf("Unknown token: %s", string(l.Peek()))
|
||||
}
|
||||
l.Emit(lObjectKey)
|
||||
ignoreWhiteSpace(l)
|
||||
if !l.Accept(":") {
|
||||
return l.Errorf("Expected ':'")
|
||||
}
|
||||
ignoreWhiteSpace(l)
|
||||
l.Emit(lObjectValue)
|
||||
switch {
|
||||
case unilex.ScanQuotedString(l, '"'):
|
||||
l.Emit(lString)
|
||||
ignoreWhiteSpace(l)
|
||||
l.Accept(",")
|
||||
l.Ignore()
|
||||
ignoreWhiteSpace(l)
|
||||
return stateInObject
|
||||
case unilex.ScanNumber(l):
|
||||
l.Emit(lNumber)
|
||||
ignoreWhiteSpace(l)
|
||||
l.Accept(",")
|
||||
l.Ignore()
|
||||
ignoreWhiteSpace(l)
|
||||
return stateInObject
|
||||
case l.Accept("{"):
|
||||
l.Emit(lObjectStart)
|
||||
l.PushState(stateInObject)
|
||||
return stateInObject
|
||||
case l.Accept("["):
|
||||
l.Emit(lArrayStart)
|
||||
l.PushState(stateInObject)
|
||||
return stateInArray
|
||||
}
|
||||
return l.Errorf("Unknown token: %s", string(l.Peek()))
|
||||
}
|
||||
|
||||
func stateInArray(l *unilex.Lexer) unilex.StateFunc {
|
||||
ignoreWhiteSpace(l)
|
||||
l.Accept(",")
|
||||
ignoreWhiteSpace(l)
|
||||
switch {
|
||||
case unilex.ScanQuotedString(l, '"'):
|
||||
l.Emit(lString)
|
||||
case unilex.ScanNumber(l):
|
||||
l.Emit(lNumber)
|
||||
case l.Accept("{"):
|
||||
l.Emit(lObjectStart)
|
||||
l.PushState(stateInArray)
|
||||
return stateInObject
|
||||
case l.Accept("["):
|
||||
l.Emit(lArrayStart)
|
||||
l.PushState(stateInArray)
|
||||
return stateInArray
|
||||
case l.Accept("]"):
|
||||
l.Emit(lArrayEnd)
|
||||
return l.PopState()
|
||||
}
|
||||
return stateInArray
|
||||
}
|
||||
|
||||
func ignoreWhiteSpace(l *unilex.Lexer) {
|
||||
|
|
8
lexem.go
8
lexem.go
|
@ -9,12 +9,12 @@ type Lexem struct {
|
|||
}
|
||||
|
||||
// LexType represents type of current lexem.
|
||||
type LexType string
|
||||
type LexType int
|
||||
|
||||
// Some std lexem types
|
||||
const (
|
||||
// LError represents lexing error.
|
||||
LError LexType = "ERROR"
|
||||
// LEOF represents end of input.
|
||||
LEOF LexType = "EOF"
|
||||
LexEOF LexType = -1
|
||||
// LError represents lexing error.
|
||||
LexError LexType = -2
|
||||
)
|
||||
|
|
13
lexer.go
13
lexer.go
|
@ -16,6 +16,7 @@ type Lexer struct {
|
|||
Pos int // Pos at input string.
|
||||
Output chan Lexem // Lexems channel.
|
||||
width int // Width of last rune.
|
||||
states stateStack // Stack of states to realize PrevState.
|
||||
}
|
||||
|
||||
// New returns new scanner for input string.
|
||||
|
@ -37,6 +38,16 @@ func (l *Lexer) Run(init StateFunc) {
|
|||
close(l.Output)
|
||||
}
|
||||
|
||||
// PopState returns previous state function.
|
||||
func (l *Lexer) PopState() StateFunc {
|
||||
return l.states.Pop()
|
||||
}
|
||||
|
||||
// PushState pushes state before going deeper states.
|
||||
func (l *Lexer) PushState(s StateFunc) {
|
||||
l.states.Push(s)
|
||||
}
|
||||
|
||||
// Emit current lexem to output.
|
||||
func (l *Lexer) Emit(typ LexType) {
|
||||
l.Output <- Lexem{
|
||||
|
@ -51,7 +62,7 @@ func (l *Lexer) Emit(typ LexType) {
|
|||
// Errorf produces error lexem and stops scanning.
|
||||
func (l *Lexer) Errorf(format string, args ...interface{}) StateFunc {
|
||||
l.Output <- Lexem{
|
||||
Type: LError,
|
||||
Type: LexError,
|
||||
Value: fmt.Sprintf(format, args...),
|
||||
Start: l.Start,
|
||||
End: l.Pos,
|
||||
|
|
14
statefunc.go
14
statefunc.go
|
@ -2,3 +2,17 @@ package unilex
|
|||
|
||||
// StateFunc represents function that scans lexems and returns new state function or nil if lexing completed.
|
||||
type StateFunc func(*Lexer) StateFunc
|
||||
|
||||
type stateStack []StateFunc
|
||||
|
||||
func (ss *stateStack) Push(s StateFunc) {
|
||||
*ss = append(*ss, s)
|
||||
}
|
||||
|
||||
func (ss *stateStack) Pop() (s StateFunc) {
|
||||
if len(*ss) == 0 {
|
||||
return nil
|
||||
}
|
||||
*ss, s = (*ss)[:len(*ss)-1], (*ss)[len(*ss)-1]
|
||||
return s
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue