/* SHS: Syntactically Homogeneous Shell * Copyright (C) 2019 Aidan Hahn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ package ast import ( "unicode" ) type Token_t int const ( LIST Token_t = iota STRING Token_t = iota NUMBER Token_t = iota SYMBOL Token_t = iota ) type Token struct { Next *Token Tag Token_t Position int Inner interface{} } const string_delims string = "\"'`" func Lex(input string) *Token { if len(input) == 0 { return nil } var ret *Token iter := &ret is_str := false is_list := false tokenBuilder := func (pos int, tok string) { if len(tok) == 0 && !is_list && !is_str { return } *iter = new(Token) (*iter).Position = pos if is_list { (*iter).Inner = Lex(tok) (*iter).Tag = LIST is_list = false } else { (*iter).Inner = tok if is_str { (*iter).Tag = STRING is_str = false } else if tokenIsNumber(tok) { (*iter).Tag = NUMBER } else { (*iter).Tag = SYMBOL } } iter = &(*iter).Next } // returns -1 on unmatched string delim matchStrEnd := func(start int, delim byte) int { for i := start; i < len(input); i++ { if input[i] == delim { return i } } return -1 } // returns -1 on unmatched string delim // returns -2 on unmatched list delim matchListEnd := func(start int) int { depth := 0 for i := start; i < len(input); i++ { switch input[i] { case '"','\'','`': i = matchStrEnd(i + 1, input[i]) if i == -1 { return -1 } case '(': depth++ case ')': if depth == 0 { return i } else { depth -= 1 } } } return -2 } needs_alloc := false start_pos := 0 for i := 0; i < len(input); i++ { switch input[i] { case '(': start_pos = i + 1 i = matchListEnd(start_pos) is_list = true needs_alloc = true case '"','\'','`': start_pos = i + 1 i = matchStrEnd(start_pos, input[i]) is_str = true needs_alloc = true case ' ': if i == start_pos { start_pos += 1 continue } needs_alloc = true } if needs_alloc { needs_alloc = false if (i < 0) { // TODO: Maybe not overload this. start_pos = i goto error } tokenBuilder(start_pos, input[start_pos:i]) start_pos = i+1 } } if start_pos < len(input) { tokenBuilder(start_pos, input[start_pos:]) } return ret error: // TODO: Hook into error module // TODO: Finalize and GC alloced tokens if start_pos == -1 { println("[-] Unmatched string delimiter in input. discarding.") } else if start_pos == -2 { println("[-] Unmatched list delimiter in input. discarding.") } else { println("[-] Unknown error in input. discarding.") } return nil } func tokenIsNumber(arg string) bool { dotCount := 0 for _, char := range arg { if !unicode.IsDigit(char) { if char == '.' && dotCount == 0 { dotCount++ } else { return false } } } return true }