/* SHS: Syntactically Homogeneous Shell * Copyright (C) 2019 Aidan Hahn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ package ast import ( "gitlab.com/whom/shs/log" "unicode" ) // all delimiters that work on strings const string_delims string = "\"'`" /* takes a line of user input * returns an unsimplified tree of tokens */ func Lex(input string) *Token { ret := lex(input) if ret == nil { return nil } if ret.Tag != LIST { temp := &Token{Tag: LIST} temp.Direct(ret) ret = temp } return ret } func lex(input string) *Token { if len(input) == 0 { return nil } var ret *Token iter := &ret is_str := false is_list := false tokenBuilder := func (pos int, tok string) { if len(tok) == 0 && !is_list && !is_str { return } *iter = new(Token) (*iter).Position = pos if is_list { (*iter).inner = lex(tok) (*iter).Tag = LIST is_list = false } else { (*iter).inner = tok if is_str { (*iter).Tag = STRING is_str = false } else if StrIsNumber(tok) { (*iter).Tag = NUMBER } else if tok == "T" || tok == "F" { (*iter).Tag = BOOL } else { (*iter).Tag = SYMBOL } } iter = &(*iter).Next } // returns -1 on unmatched string delim matchStrEnd := func(start int, delim byte) int { for i := start; i < len(input); i++ { if input[i] == delim { return i } } return -1 } // returns -1 on unmatched string delim // returns -2 on unmatched list delim matchListEnd := func(start int) int { depth := 0 for i := start; i < len(input); i++ { switch input[i] { case '"','\'','`': i = matchStrEnd(i + 1, input[i]) if i == -1 { return -1 } case '(': depth++ case ')': if depth == 0 { return i } else { depth -= 1 } } } return -2 } // returns the end of the string OR the end of the line matchLineEnd := func(start int) int { for i := start; i < len(input); i++ { if input[i] == '\n' { return i } } return len(input) } needs_alloc := false start_pos := 0 for i := 0; i < len(input); i++ { switch input[i] { case '(': start_pos = i + 1 i = matchListEnd(start_pos) is_list = true needs_alloc = true case '"','\'','`': start_pos = i + 1 i = matchStrEnd(start_pos, input[i]) is_str = true needs_alloc = true case ' ', '\n', '\t', '\v', '\f', '\r': if i == start_pos { start_pos += 1 continue } needs_alloc = true // comment case case ';': i = matchLineEnd(i) start_pos = i + 1 // this isnt to handle string escaping // its only to make sure that escaped spaces stay in // the same token. case '\\': if i != len(input) - 1 && input[i+1] == ' '{ // eat the backslash input = input[:i] + input[i+1:] } } if needs_alloc { needs_alloc = false if (i < 0) { start_pos = i goto error } tokenBuilder(start_pos, input[start_pos:i]) start_pos = i+1 } } if start_pos < len(input) { tokenBuilder(start_pos, input[start_pos:]) } return ret error: if start_pos == -1 { log.Log(log.ERR, "Unmatched string delimiter in input. discarding.", "lex") } else if start_pos == -2 { log.Log(log.ERR, "Unmatched list delimiter in input. discarding.", "lex") } else { log.Log(log.ERR, "Unknown error in input. discarding.", "lex") } return nil } // returns true if a string could contain an int or float func StrIsNumber(arg string) bool { dotCount := 0 // negative nums if len(arg) > 0 && arg[0] == '-' { arg = arg[1:] } for _, char := range arg { if !unicode.IsDigit(char) { if char == '.' && dotCount == 0 { dotCount++ } else { return false } } } return true }