diff --git a/debug.go b/debug.go index 3d7e762..8c2ea9e 100644 --- a/debug.go +++ b/debug.go @@ -30,7 +30,7 @@ func FmtToken(arg *Token) string { switch arg.tag { case LIST: - return fmt.Sprintf("(%s, )%s", "LIST", arg._inner, suffix) + return fmt.Sprintf("(%s, [List])%s", "LIST", suffix) default: return fmt.Sprintf("(%s, %s)%s", GetTagAsStr(arg.tag), arg._inner, suffix) diff --git a/tags b/tags new file mode 100644 index 0000000..814afde --- /dev/null +++ b/tags @@ -0,0 +1,38 @@ +!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ +!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ +!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/ +!_TAG_PROGRAM_NAME Exuberant Ctags // +!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/ +!_TAG_PROGRAM_VERSION Development // +CallFunction func_table.go /^func CallFunction(target *Function, args *Token) *Token {$/;" f +FmtToken debug.go /^func FmtToken(arg *Token) string {$/;" f +FuncTable func_table.go /^type FuncTable map[string]*Function$/;" t +Function func_table.go /^type Function struct {$/;" t +GetFunction func_table.go /^func GetFunction(arg string) *Function {$/;" f +GetTagAsStr debug.go /^func GetTagAsStr(tag token_t) string {$/;" f +GetVar var_table.go /^func GetVar(arg string, library []VarTable) *Token {$/;" f +GlobalFuncTable func_table.go /^ GlobalFuncTable *FuncTable$/;" v +GlobalVarTable var_table.go /^ GlobalVarTable *VarTable$/;" v +LIST token.go /^ LIST token_t = iota$/;" c +Lex token.go /^func Lex(input string) *Token {$/;" f +NUMBER token.go /^ NUMBER token_t = iota$/;" c +Operation func_table.go /^type Operation func(*Token) *Token$/;" t +ParseFunction func_table.go /^func ParseFunction(target *Function, args *Token) bool {$/;" f +Pop stack.go /^func (s *TokenStack) Pop() *Token {$/;" f +PrintSExpression debug.go /^func PrintSExpression(arg *Token) {$/;" f +Push stack.go /^func (s *TokenStack) Push(v *Token) {$/;" f +STRING token.go /^ STRING token_t = iota$/;" c +SYMBOL token.go /^ SYMBOL token_t = iota$/;" c +Token token.go /^type Token struct {$/;" t +TokenStack stack.go /^type TokenStack struct {$/;" t +VarTable var_table.go /^type VarTable map[string]*Token$/;" t +main cmd/print_ast.go /^func main() {$/;" f +main cmd/print_ast.go /^package main$/;" p +shs debug.go /^package shs$/;" p +shs func_table.go /^package shs$/;" p +shs stack.go /^package shs$/;" p +shs token.go /^package shs;$/;" p +shs var_table.go /^package shs$/;" p +string_delims token.go /^const string_delims string = "\\"'`"$/;" c +tokenIsNumber token.go /^func tokenIsNumber(arg string) bool {$/;" f +token_t token.go /^type token_t int$/;" t diff --git a/token.go b/token.go index 19655da..42f42fa 100644 --- a/token.go +++ b/token.go @@ -18,7 +18,6 @@ package shs; import ( - "strings" "unicode" ) @@ -37,17 +36,17 @@ type Token struct { _inner interface{} } +const string_delims string = "\"'`" + func Lex(input string) *Token { if len(input) == 0 { return nil } var ret *Token - var tok strings.Builder iter := &ret - delim := ' ' - is_list := false is_str := false + is_list := false tokenBuilder := func (pos int, tok string) { if len(tok) == 0 && !is_list && !is_str { @@ -79,48 +78,100 @@ func Lex(input string) *Token { iter = &(*iter).next } - for pos, char := range input { - if char == delim { - if is_str && is_list { - // String just ended inside list - is_str = false - delim = ')' - tok.WriteRune(char) - continue + // returns -1 on unmatched string delim + matchStrEnd := func(start int, delim byte) int { + for i := start; i < len(input); i++ { + if input[i] == delim { + return i } + } - delim = ' ' - tokenBuilder(pos, tok.String()) - tok.Reset() + return -1 + } - } else { - if strings.ContainsRune("\"'`", char) { - is_str = true - delim = char - if !is_list { - continue + // returns -1 on unmatched string delim + // returns -2 on unmatched list delim + matchListEnd := func(start int) int { + depth := 0 + + for i := start; i < len(input); i++ { + switch input[i] { + case '"','\'','`': + i = matchStrEnd(i + 1, input[i]) + if i == -1 { + return -1 } - } else if char == '(' && !is_str { - is_list = true - delim = ')' + case '(': + depth++ + + case ')': + if depth == 0 { + return i + } else { + depth -= 1 + } + } + } + + return -2 + } + + needs_alloc := false + start_pos := 0 + for i := 0; i < len(input); i++ { + switch input[i] { + case '(': + start_pos = i + 1 + i = matchListEnd(start_pos) + is_list = true + needs_alloc = true + + case '"','\'','`': + start_pos = i + 1 + i = matchStrEnd(start_pos, input[i]) + is_str = true + needs_alloc = true + + case ' ': + if i == start_pos { + start_pos += 1 continue } - tok.WriteRune(char) + needs_alloc = true + } + + if needs_alloc { + needs_alloc = false + if (i < 0) { + // TODO: Maybe not overload this. + start_pos = i + goto error + } + + tokenBuilder(start_pos, input[start_pos:i]) + start_pos = i+1 } } - if tok.Len() > 0 { - if is_list || is_str { - // TODO: Throw hella lex error here - return ret - } - - tokenBuilder(len(input), tok.String()) + if start_pos < len(input) { + tokenBuilder(start_pos, input[start_pos:]) } - return ret + +error: + // TODO: Hook into error module + // TODO: Finalize and GC alloced tokens + if start_pos == -1 { + println("[-] Unmatched string delimiter in input. discarding.") + } else if start_pos == -2 { + println("[-] Unmatched list delimiter in input. discarding.") + } else { + println("[-] Unknown error in input. discarding.") + } + + return nil } func tokenIsNumber(arg string) bool {