Push/pop state, full json example
This commit is contained in:
parent
521e6da1f6
commit
ff198abd8f
5 changed files with 125 additions and 60 deletions
|
@ -3,4 +3,6 @@ Universal lexer for Golang
|
||||||
|
|
||||||
Based on Rob Pike's awesome video [Lexical Scanning in Go](https://www.youtube.com/watch?v=HxaD_trXwRE)
|
Based on Rob Pike's awesome video [Lexical Scanning in Go](https://www.youtube.com/watch?v=HxaD_trXwRE)
|
||||||
|
|
||||||
TODO: Write brief documentation. For now - watch video and [/examples](https://github.com/neonxp/unilex/tree/master/example) directory.
|
Examples: [/examples](https://github.com/neonxp/unilex/tree/master/example) directory.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,19 @@ func main() {
|
||||||
"key2": {
|
"key2": {
|
||||||
"key3" : "value 3"
|
"key3" : "value 3"
|
||||||
},
|
},
|
||||||
"key4": 123.321
|
"key4": 123.321,
|
||||||
|
"key5": [
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
[
|
||||||
|
3,
|
||||||
|
4,
|
||||||
|
5,
|
||||||
|
{
|
||||||
|
"key6": "value6"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
}`
|
}`
|
||||||
l := unilex.New(testJson)
|
l := unilex.New(testJson)
|
||||||
go l.Run(initJson)
|
go l.Run(initJson)
|
||||||
|
@ -25,11 +37,14 @@ func main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
lObjectStart unilex.LexType = "lObjectStart"
|
lObjectStart unilex.LexType = iota
|
||||||
lObjectEnd unilex.LexType = "lObjectEnd"
|
lObjectEnd
|
||||||
lObjectKey unilex.LexType = "lObjectKey"
|
lObjectKey
|
||||||
lObjectValueString unilex.LexType = "lObjectValueString"
|
lObjectValue
|
||||||
lObjectValueNumber unilex.LexType = "lObjectValueNumber"
|
lArrayStart
|
||||||
|
lArrayEnd
|
||||||
|
lString
|
||||||
|
lNumber
|
||||||
)
|
)
|
||||||
|
|
||||||
func initJson(l *unilex.Lexer) unilex.StateFunc {
|
func initJson(l *unilex.Lexer) unilex.StateFunc {
|
||||||
|
@ -37,32 +52,26 @@ func initJson(l *unilex.Lexer) unilex.StateFunc {
|
||||||
switch {
|
switch {
|
||||||
case l.Accept("{"):
|
case l.Accept("{"):
|
||||||
l.Emit(lObjectStart)
|
l.Emit(lObjectStart)
|
||||||
return stateInObject(true)
|
return stateInObject
|
||||||
case l.Peek() == unilex.EOF:
|
case l.Peek() == unilex.EOF:
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return l.Errorf("Unknown token: %s", l.Peek())
|
return l.Errorf("Unknown token: %s", string(l.Peek()))
|
||||||
}
|
}
|
||||||
|
|
||||||
func stateInObject(initial bool) unilex.StateFunc {
|
func stateInObject(l *unilex.Lexer) unilex.StateFunc {
|
||||||
return func(l *unilex.Lexer) unilex.StateFunc {
|
|
||||||
// we in object, so we expect field keys and values
|
// we in object, so we expect field keys and values
|
||||||
ignoreWhiteSpace(l)
|
ignoreWhiteSpace(l)
|
||||||
if l.Accept("}") {
|
if l.Accept("}") {
|
||||||
l.Emit(lObjectEnd)
|
l.Emit(lObjectEnd)
|
||||||
if initial {
|
// If meet close object return to previous state (including initial)
|
||||||
return initJson
|
return l.PopState()
|
||||||
}
|
}
|
||||||
ignoreWhiteSpace(l)
|
ignoreWhiteSpace(l)
|
||||||
l.Accept(",")
|
l.Accept(",")
|
||||||
ignoreWhiteSpace(l)
|
ignoreWhiteSpace(l)
|
||||||
return stateInObject(initial)
|
|
||||||
}
|
|
||||||
if l.Peek() == unilex.EOF {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if !unilex.ScanQuotedString(l, '"') {
|
if !unilex.ScanQuotedString(l, '"') {
|
||||||
return l.Errorf("Unknown token: %s", l.Peek())
|
return l.Errorf("Unknown token: %s", string(l.Peek()))
|
||||||
}
|
}
|
||||||
l.Emit(lObjectKey)
|
l.Emit(lObjectKey)
|
||||||
ignoreWhiteSpace(l)
|
ignoreWhiteSpace(l)
|
||||||
|
@ -70,27 +79,56 @@ func stateInObject(initial bool) unilex.StateFunc {
|
||||||
return l.Errorf("Expected ':'")
|
return l.Errorf("Expected ':'")
|
||||||
}
|
}
|
||||||
ignoreWhiteSpace(l)
|
ignoreWhiteSpace(l)
|
||||||
|
l.Emit(lObjectValue)
|
||||||
switch {
|
switch {
|
||||||
case unilex.ScanQuotedString(l, '"'):
|
case unilex.ScanQuotedString(l, '"'):
|
||||||
l.Emit(lObjectValueString)
|
l.Emit(lString)
|
||||||
ignoreWhiteSpace(l)
|
ignoreWhiteSpace(l)
|
||||||
l.Accept(",")
|
l.Accept(",")
|
||||||
l.Ignore()
|
l.Ignore()
|
||||||
ignoreWhiteSpace(l)
|
ignoreWhiteSpace(l)
|
||||||
return stateInObject(initial)
|
return stateInObject
|
||||||
case unilex.ScanNumber(l):
|
case unilex.ScanNumber(l):
|
||||||
l.Emit(lObjectValueNumber)
|
l.Emit(lNumber)
|
||||||
ignoreWhiteSpace(l)
|
ignoreWhiteSpace(l)
|
||||||
l.Accept(",")
|
l.Accept(",")
|
||||||
l.Ignore()
|
l.Ignore()
|
||||||
ignoreWhiteSpace(l)
|
ignoreWhiteSpace(l)
|
||||||
return stateInObject(initial)
|
return stateInObject
|
||||||
case l.Accept("{"):
|
case l.Accept("{"):
|
||||||
l.Emit(lObjectStart)
|
l.Emit(lObjectStart)
|
||||||
return stateInObject(false)
|
l.PushState(stateInObject)
|
||||||
|
return stateInObject
|
||||||
|
case l.Accept("["):
|
||||||
|
l.Emit(lArrayStart)
|
||||||
|
l.PushState(stateInObject)
|
||||||
|
return stateInArray
|
||||||
}
|
}
|
||||||
return l.Errorf("Unknown token")
|
return l.Errorf("Unknown token: %s", string(l.Peek()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func stateInArray(l *unilex.Lexer) unilex.StateFunc {
|
||||||
|
ignoreWhiteSpace(l)
|
||||||
|
l.Accept(",")
|
||||||
|
ignoreWhiteSpace(l)
|
||||||
|
switch {
|
||||||
|
case unilex.ScanQuotedString(l, '"'):
|
||||||
|
l.Emit(lString)
|
||||||
|
case unilex.ScanNumber(l):
|
||||||
|
l.Emit(lNumber)
|
||||||
|
case l.Accept("{"):
|
||||||
|
l.Emit(lObjectStart)
|
||||||
|
l.PushState(stateInArray)
|
||||||
|
return stateInObject
|
||||||
|
case l.Accept("["):
|
||||||
|
l.Emit(lArrayStart)
|
||||||
|
l.PushState(stateInArray)
|
||||||
|
return stateInArray
|
||||||
|
case l.Accept("]"):
|
||||||
|
l.Emit(lArrayEnd)
|
||||||
|
return l.PopState()
|
||||||
|
}
|
||||||
|
return stateInArray
|
||||||
}
|
}
|
||||||
|
|
||||||
func ignoreWhiteSpace(l *unilex.Lexer) {
|
func ignoreWhiteSpace(l *unilex.Lexer) {
|
||||||
|
|
8
lexem.go
8
lexem.go
|
@ -9,12 +9,12 @@ type Lexem struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// LexType represents type of current lexem.
|
// LexType represents type of current lexem.
|
||||||
type LexType string
|
type LexType int
|
||||||
|
|
||||||
// Some std lexem types
|
// Some std lexem types
|
||||||
const (
|
const (
|
||||||
// LError represents lexing error.
|
|
||||||
LError LexType = "ERROR"
|
|
||||||
// LEOF represents end of input.
|
// LEOF represents end of input.
|
||||||
LEOF LexType = "EOF"
|
LexEOF LexType = -1
|
||||||
|
// LError represents lexing error.
|
||||||
|
LexError LexType = -2
|
||||||
)
|
)
|
||||||
|
|
13
lexer.go
13
lexer.go
|
@ -16,6 +16,7 @@ type Lexer struct {
|
||||||
Pos int // Pos at input string.
|
Pos int // Pos at input string.
|
||||||
Output chan Lexem // Lexems channel.
|
Output chan Lexem // Lexems channel.
|
||||||
width int // Width of last rune.
|
width int // Width of last rune.
|
||||||
|
states stateStack // Stack of states to realize PrevState.
|
||||||
}
|
}
|
||||||
|
|
||||||
// New returns new scanner for input string.
|
// New returns new scanner for input string.
|
||||||
|
@ -37,6 +38,16 @@ func (l *Lexer) Run(init StateFunc) {
|
||||||
close(l.Output)
|
close(l.Output)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PopState returns previous state function.
|
||||||
|
func (l *Lexer) PopState() StateFunc {
|
||||||
|
return l.states.Pop()
|
||||||
|
}
|
||||||
|
|
||||||
|
// PushState pushes state before going deeper states.
|
||||||
|
func (l *Lexer) PushState(s StateFunc) {
|
||||||
|
l.states.Push(s)
|
||||||
|
}
|
||||||
|
|
||||||
// Emit current lexem to output.
|
// Emit current lexem to output.
|
||||||
func (l *Lexer) Emit(typ LexType) {
|
func (l *Lexer) Emit(typ LexType) {
|
||||||
l.Output <- Lexem{
|
l.Output <- Lexem{
|
||||||
|
@ -51,7 +62,7 @@ func (l *Lexer) Emit(typ LexType) {
|
||||||
// Errorf produces error lexem and stops scanning.
|
// Errorf produces error lexem and stops scanning.
|
||||||
func (l *Lexer) Errorf(format string, args ...interface{}) StateFunc {
|
func (l *Lexer) Errorf(format string, args ...interface{}) StateFunc {
|
||||||
l.Output <- Lexem{
|
l.Output <- Lexem{
|
||||||
Type: LError,
|
Type: LexError,
|
||||||
Value: fmt.Sprintf(format, args...),
|
Value: fmt.Sprintf(format, args...),
|
||||||
Start: l.Start,
|
Start: l.Start,
|
||||||
End: l.Pos,
|
End: l.Pos,
|
||||||
|
|
14
statefunc.go
14
statefunc.go
|
@ -2,3 +2,17 @@ package unilex
|
||||||
|
|
||||||
// StateFunc represents function that scans lexems and returns new state function or nil if lexing completed.
|
// StateFunc represents function that scans lexems and returns new state function or nil if lexing completed.
|
||||||
type StateFunc func(*Lexer) StateFunc
|
type StateFunc func(*Lexer) StateFunc
|
||||||
|
|
||||||
|
type stateStack []StateFunc
|
||||||
|
|
||||||
|
func (ss *stateStack) Push(s StateFunc) {
|
||||||
|
*ss = append(*ss, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ss *stateStack) Pop() (s StateFunc) {
|
||||||
|
if len(*ss) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
*ss, s = (*ss)[:len(*ss)-1], (*ss)[len(*ss)-1]
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue