From ff198abd8fc9e2019c2f3ef9b7e74206ecdb99b7 Mon Sep 17 00:00:00 2001
From: Alexander Kiryukhin <a.kiryukhin@mail.ru>
Date: Wed, 10 Mar 2021 00:47:58 +0300
Subject: [PATCH] Push/pop state, full json example

---
 README.md            |   4 +-
 example/json/main.go | 146 +++++++++++++++++++++++++++----------------
 lexem.go             |   8 +--
 lexer.go             |  13 +++-
 statefunc.go         |  14 +++++
 5 files changed, 125 insertions(+), 60 deletions(-)

diff --git a/README.md b/README.md
index 172ce9b..352cbe0 100644
--- a/README.md
+++ b/README.md
@@ -3,4 +3,6 @@ Universal lexer for Golang
 
 Based on Rob Pike's awesome video [Lexical Scanning in Go](https://www.youtube.com/watch?v=HxaD_trXwRE)
 
-TODO: Write brief documentation. For now - watch video and [/examples](https://github.com/neonxp/unilex/tree/master/example) directory.
+Examples: [/examples](https://github.com/neonxp/unilex/tree/master/example) directory.
+
+
diff --git a/example/json/main.go b/example/json/main.go
index 842ab82..745e5aa 100644
--- a/example/json/main.go
+++ b/example/json/main.go
@@ -15,7 +15,19 @@ func main() {
 		"key2": {
 			"key3" : "value 3"
 		},
-		"key4": 123.321
+		"key4": 123.321,
+		"key5": [
+			1,
+			2,
+			[
+				3,
+				4,
+				5,
+				{
+					"key6": "value6"
+				}
+			]
+		]
 	}`
 	l := unilex.New(testJson)
 	go l.Run(initJson)
@@ -25,11 +37,14 @@ func main() {
 }
 
 const (
-	lObjectStart       unilex.LexType = "lObjectStart"
-	lObjectEnd         unilex.LexType = "lObjectEnd"
-	lObjectKey         unilex.LexType = "lObjectKey"
-	lObjectValueString unilex.LexType = "lObjectValueString"
-	lObjectValueNumber unilex.LexType = "lObjectValueNumber"
+	lObjectStart unilex.LexType = iota
+	lObjectEnd
+	lObjectKey
+	lObjectValue
+	lArrayStart
+	lArrayEnd
+	lString
+	lNumber
 )
 
 func initJson(l *unilex.Lexer) unilex.StateFunc {
@@ -37,60 +52,83 @@ func initJson(l *unilex.Lexer) unilex.StateFunc {
 	switch {
 	case l.Accept("{"):
 		l.Emit(lObjectStart)
-		return stateInObject(true)
+		return stateInObject
 	case l.Peek() == unilex.EOF:
 		return nil
 	}
-	return l.Errorf("Unknown token: %s", l.Peek())
+	return l.Errorf("Unknown token: %s", string(l.Peek()))
 }
 
-func stateInObject(initial bool) unilex.StateFunc {
-	return func(l *unilex.Lexer) unilex.StateFunc {
-		// we in object, so we expect field keys and values
-		ignoreWhiteSpace(l)
-		if l.Accept("}") {
-			l.Emit(lObjectEnd)
-			if initial {
-				return initJson
-			}
-			ignoreWhiteSpace(l)
-			l.Accept(",")
-			ignoreWhiteSpace(l)
-			return stateInObject(initial)
-		}
-		if l.Peek() == unilex.EOF {
-			return nil
-		}
-		if !unilex.ScanQuotedString(l, '"') {
-			return l.Errorf("Unknown token: %s", l.Peek())
-		}
-		l.Emit(lObjectKey)
-		ignoreWhiteSpace(l)
-		if !l.Accept(":") {
-			return l.Errorf("Expected ':'")
-		}
-		ignoreWhiteSpace(l)
-		switch {
-		case unilex.ScanQuotedString(l, '"'):
-			l.Emit(lObjectValueString)
-			ignoreWhiteSpace(l)
-			l.Accept(",")
-			l.Ignore()
-			ignoreWhiteSpace(l)
-			return stateInObject(initial)
-		case unilex.ScanNumber(l):
-			l.Emit(lObjectValueNumber)
-			ignoreWhiteSpace(l)
-			l.Accept(",")
-			l.Ignore()
-			ignoreWhiteSpace(l)
-			return stateInObject(initial)
-		case l.Accept("{"):
-			l.Emit(lObjectStart)
-			return stateInObject(false)
-		}
-		return l.Errorf("Unknown token")
+func stateInObject(l *unilex.Lexer) unilex.StateFunc {
+	// we in object, so we expect field keys and values
+	ignoreWhiteSpace(l)
+	if l.Accept("}") {
+		l.Emit(lObjectEnd)
+		// If meet close object return to previous state (including initial)
+		return l.PopState()
 	}
+	ignoreWhiteSpace(l)
+	l.Accept(",")
+	ignoreWhiteSpace(l)
+	if !unilex.ScanQuotedString(l, '"') {
+		return l.Errorf("Unknown token: %s", string(l.Peek()))
+	}
+	l.Emit(lObjectKey)
+	ignoreWhiteSpace(l)
+	if !l.Accept(":") {
+		return l.Errorf("Expected ':'")
+	}
+	ignoreWhiteSpace(l)
+	l.Emit(lObjectValue)
+	switch {
+	case unilex.ScanQuotedString(l, '"'):
+		l.Emit(lString)
+		ignoreWhiteSpace(l)
+		l.Accept(",")
+		l.Ignore()
+		ignoreWhiteSpace(l)
+		return stateInObject
+	case unilex.ScanNumber(l):
+		l.Emit(lNumber)
+		ignoreWhiteSpace(l)
+		l.Accept(",")
+		l.Ignore()
+		ignoreWhiteSpace(l)
+		return stateInObject
+	case l.Accept("{"):
+		l.Emit(lObjectStart)
+		l.PushState(stateInObject)
+		return stateInObject
+	case l.Accept("["):
+		l.Emit(lArrayStart)
+		l.PushState(stateInObject)
+		return stateInArray
+	}
+	return l.Errorf("Unknown token: %s", string(l.Peek()))
+}
+
+func stateInArray(l *unilex.Lexer) unilex.StateFunc {
+	ignoreWhiteSpace(l)
+	l.Accept(",")
+	ignoreWhiteSpace(l)
+	switch {
+	case unilex.ScanQuotedString(l, '"'):
+		l.Emit(lString)
+	case unilex.ScanNumber(l):
+		l.Emit(lNumber)
+	case l.Accept("{"):
+		l.Emit(lObjectStart)
+		l.PushState(stateInArray)
+		return stateInObject
+	case l.Accept("["):
+		l.Emit(lArrayStart)
+		l.PushState(stateInArray)
+		return stateInArray
+	case l.Accept("]"):
+		l.Emit(lArrayEnd)
+		return l.PopState()
+	}
+	return stateInArray
 }
 
 func ignoreWhiteSpace(l *unilex.Lexer) {
diff --git a/lexem.go b/lexem.go
index bd24ea9..c594ee9 100644
--- a/lexem.go
+++ b/lexem.go
@@ -9,12 +9,12 @@ type Lexem struct {
 }
 
 // LexType represents type of current lexem.
-type LexType string
+type LexType int
 
 // Some std lexem types
 const (
-	// LError represents lexing error.
-	LError LexType = "ERROR"
 	// LEOF represents end of input.
-	LEOF LexType = "EOF"
+	LexEOF LexType = -1
+	// LError represents lexing error.
+	LexError LexType = -2
 )
diff --git a/lexer.go b/lexer.go
index 10317bd..b556045 100644
--- a/lexer.go
+++ b/lexer.go
@@ -16,6 +16,7 @@ type Lexer struct {
 	Pos    int        // Pos at input string.
 	Output chan Lexem // Lexems channel.
 	width  int        // Width of last rune.
+	states stateStack // Stack of states to realize PrevState.
 }
 
 // New returns new scanner for input string.
@@ -37,6 +38,16 @@ func (l *Lexer) Run(init StateFunc) {
 	close(l.Output)
 }
 
+// PopState returns previous state function.
+func (l *Lexer) PopState() StateFunc {
+	return l.states.Pop()
+}
+
+// PushState pushes state before going deeper states.
+func (l *Lexer) PushState(s StateFunc) {
+	l.states.Push(s)
+}
+
 // Emit current lexem to output.
 func (l *Lexer) Emit(typ LexType) {
 	l.Output <- Lexem{
@@ -51,7 +62,7 @@ func (l *Lexer) Emit(typ LexType) {
 // Errorf produces error lexem and stops scanning.
 func (l *Lexer) Errorf(format string, args ...interface{}) StateFunc {
 	l.Output <- Lexem{
-		Type:  LError,
+		Type:  LexError,
 		Value: fmt.Sprintf(format, args...),
 		Start: l.Start,
 		End:   l.Pos,
diff --git a/statefunc.go b/statefunc.go
index 5980ecc..734fe57 100644
--- a/statefunc.go
+++ b/statefunc.go
@@ -2,3 +2,17 @@ package unilex
 
 // StateFunc represents function that scans lexems and returns new state function or nil if lexing completed.
 type StateFunc func(*Lexer) StateFunc
+
+type stateStack []StateFunc
+
+func (ss *stateStack) Push(s StateFunc) {
+	*ss = append(*ss, s)
+}
+
+func (ss *stateStack) Pop() (s StateFunc) {
+	if len(*ss) == 0 {
+		return nil
+	}
+	*ss, s = (*ss)[:len(*ss)-1], (*ss)[len(*ss)-1]
+	return s
+}