Redid lex process

2019-11-29 19:02:30 -08:00 · 2019-11-29 19:02:30 -08:00 · 640dbb183e
commit 640dbb183e
parent c52391da07
3 changed files with 123 additions and 34 deletions
--- a/debug.go
+++ b/debug.go
@ -30,7 +30,7 @@ func FmtToken(arg *Token) string {
    switch arg.tag {
    case LIST:
-        return fmt.Sprintf("(%s, <List>)%s", "LIST", arg._inner, suffix)
+        return fmt.Sprintf("(%s, [List])%s", "LIST", suffix)
    default:
        return fmt.Sprintf("(%s, %s)%s", GetTagAsStr(arg.tag), arg._inner, suffix)
--- a/38
+++ b/38
@ -0,0 +1,38 @@
 !_TAG_FILE_FORMAT	2	/extended format; --format=1 will not append ;" to lines/
 !_TAG_FILE_SORTED	1	/0=unsorted, 1=sorted, 2=foldcase/
 !_TAG_PROGRAM_AUTHOR	Darren Hiebert	/dhiebert@users.sourceforge.net/
 !_TAG_PROGRAM_NAME	Exuberant Ctags	//
 !_TAG_PROGRAM_URL	http://ctags.sourceforge.net	/official site/
 !_TAG_PROGRAM_VERSION	Development	//
 CallFunction	func_table.go	/^func CallFunction(target *Function, args *Token) *Token {$/;"	f
 FmtToken	debug.go	/^func FmtToken(arg *Token) string {$/;"	f
 FuncTable	func_table.go	/^type FuncTable map[string]*Function$/;"	t
 Function	func_table.go	/^type Function struct {$/;"	t
 GetFunction	func_table.go	/^func GetFunction(arg string) *Function {$/;"	f
 GetTagAsStr	debug.go	/^func GetTagAsStr(tag token_t) string {$/;"	f
 GetVar	var_table.go	/^func GetVar(arg string, library []VarTable) *Token {$/;"	f
 GlobalFuncTable	func_table.go	/^    GlobalFuncTable *FuncTable$/;"	v
 GlobalVarTable	var_table.go	/^    GlobalVarTable *VarTable$/;"	v
 LIST	token.go	/^    LIST      token_t = iota$/;"	c
 Lex	token.go	/^func Lex(input string) *Token {$/;"	f
 NUMBER	token.go	/^    NUMBER    token_t = iota$/;"	c
 Operation	func_table.go	/^type Operation func(*Token) *Token$/;"	t
 ParseFunction	func_table.go	/^func ParseFunction(target *Function, args *Token) bool {$/;"	f
 Pop	stack.go	/^func (s *TokenStack) Pop() *Token {$/;"	f
 PrintSExpression	debug.go	/^func PrintSExpression(arg *Token) {$/;"	f
 Push	stack.go	/^func (s *TokenStack) Push(v *Token) {$/;"	f
 STRING	token.go	/^    STRING    token_t = iota$/;"	c
 SYMBOL	token.go	/^    SYMBOL    token_t = iota$/;"	c
 Token	token.go	/^type Token struct {$/;"	t
 TokenStack	stack.go	/^type TokenStack struct {$/;"	t
 VarTable	var_table.go	/^type VarTable map[string]*Token$/;"	t
 main	cmd/print_ast.go	/^func main() {$/;"	f
 main	cmd/print_ast.go	/^package main$/;"	p
 shs	debug.go	/^package shs$/;"	p
 shs	func_table.go	/^package shs$/;"	p
 shs	stack.go	/^package shs$/;"	p
 shs	token.go	/^package shs;$/;"	p
 shs	var_table.go	/^package shs$/;"	p
 string_delims	token.go	/^const string_delims string = "\\"'`"$/;"	c
 tokenIsNumber	token.go	/^func tokenIsNumber(arg string) bool {$/;"	f
 token_t	token.go	/^type token_t int$/;"	t
--- a/token.go
+++ b/token.go
@ -18,7 +18,6 @@
 package shs;
 import (
    "strings"
    "unicode"
 )
@ -37,17 +36,17 @@ type Token struct {
    _inner interface{}
 }
 const string_delims string = "\"'`"
 func Lex(input string) *Token {
    if len(input) == 0 {
        return nil
    }
    var ret *Token
    var tok strings.Builder
    iter := &ret
    delim := ' '
    is_list := false
    is_str  := false
    is_list := false
    tokenBuilder := func (pos int, tok string) {
        if len(tok) == 0 && !is_list && !is_str {
@ -79,48 +78,100 @@ func Lex(input string) *Token {
        iter = &(*iter).next
    }
-    for pos, char := range input {
+    // returns -1 on unmatched string delim
-        if char == delim {
+    matchStrEnd := func(start int, delim byte) int {
-            if is_str && is_list {
+        for i := start; i < len(input); i++ {
-                // String just ended inside list
+            if input[i] == delim {
-                is_str = false
+                return i
-                delim = ')'
+            }
                tok.WriteRune(char)
                continue
        }
-            delim = ' '
+        return -1
-            tokenBuilder(pos, tok.String())
+    }
            tok.Reset()
    // returns -1 on unmatched string delim
    // returns -2 on unmatched list delim
    matchListEnd := func(start int) int {
        depth := 0
        for i := start; i < len(input); i++ {
            switch input[i] {
            case '"','\'','`':
                i = matchStrEnd(i + 1, input[i])
                if i == -1 {
                    return -1
                }
            case '(':
                depth++
            case ')':
                if depth == 0 {
                    return i
                } else {
-            if strings.ContainsRune("\"'`", char) {
+                    depth -= 1
-                is_str = true
+                }
-                delim = char
+            }
                if !is_list {
                    continue
        }
-            } else if char == '(' && !is_str {
+        return -2
    }
    needs_alloc := false
    start_pos := 0
    for i := 0; i < len(input); i++ {
        switch input[i] {
        case '(':
            start_pos = i + 1
            i = matchListEnd(start_pos)
            is_list = true
-                delim = ')'
+            needs_alloc = true
        case '"','\'','`':
            start_pos = i + 1
            i = matchStrEnd(start_pos, input[i])
            is_str = true
            needs_alloc = true
        case ' ':
            if i == start_pos {
                start_pos += 1
                continue
            }
-            tok.WriteRune(char)
+            needs_alloc = true
        }
        if needs_alloc {
            needs_alloc = false
            if (i < 0) {
                // TODO: Maybe not overload this.
                start_pos = i
                goto error
            }
            tokenBuilder(start_pos, input[start_pos:i])
            start_pos = i+1
        }
    }
-    if tok.Len() > 0 {
+    if start_pos < len(input) {
-        if is_list || is_str {
+        tokenBuilder(start_pos, input[start_pos:])
-            // TODO: Throw hella lex error here
+    }
    return ret
 error:
    // TODO: Hook into error module
    // TODO: Finalize and GC alloced tokens
    if start_pos == -1 {
        println("[-] Unmatched string delimiter in input. discarding.")
    } else if start_pos == -2 {
        println("[-] Unmatched list delimiter in input. discarding.")
    } else {
        println("[-] Unknown error in input. discarding.")
    }
-        tokenBuilder(len(input), tok.String())
+    return nil
    }
    return ret
 }
 func tokenIsNumber(arg string) bool {