WIP commit:

* Fix up project structures * combine vars and funcs table * make a place for old code that may be useful to reference * singleton pattern for sym table Commentary: When this change is finally finished I promise to use feature branches from here on out
2023-02-15 23:27:00 -08:00 · 2023-02-15 23:27:00 -08:00 · ca4c557d95
commit ca4c557d95
parent b680e3ca9a
32 changed files with 1092 additions and 616 deletions
--- a/legacy_snippets/initial_rough_spaghetti_implementation/lex.rs
+++ b/legacy_snippets/initial_rough_spaghetti_implementation/lex.rs
@ -0,0 +1,217 @@
+/* relish: highly versatile lisp interpreter
+ * Copyright (C) 2021 Aidan Hahn
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+use crate::segment::{list_append, Ctr, Seg};
+
+const UNMATCHED_STR_DELIM: &str = "Unmatched string delimiter in input";
+const UNMATCHED_LIST_DELIM: &str = "Unmatched list delimiter in input";
+
+/* takes a line of user input
+ * returns an unsimplified tree of tokens.
+ */
+pub fn lex<'a>(document: String) -> Result<Box<Seg<'a>>, String> {
+    if !document.is_ascii() {
+        return Err("document may only contain ascii characters".to_string());
+    }
+
+    let tree = process(document);
+
+    // TODO: Make multiple forms of Ok()
+    // To represent the multiple passable outcomes
+    return match tree {
+        Err(e) => Err(format!("Problem lexing document: {:?}", e)),
+        Ok(t) => Ok(t),
+    };
+}
+
+/* The logic used in lex
+ * Returns Ok(Rc<Seg>) if lexing passes
+ * Returns Err(String) if an error occurs
+ */
+fn process<'a>(document: &'a String) -> Result<Box<Seg<'a>>, String> {
+    let doc_len = document.len();
+
+    if doc_len == 0 {
+        return Err("Empty document".to_string());
+    }
+
+    /* State variables
+     * TODO: describe all of them
+     */
+    let mut is_str = false;
+    let mut ign = false;
+    let mut token = String::new();
+    let mut delim_stack = Vec::new();
+    let mut ref_stack = vec![];
+
+    /* Iterate over document
+     * Manage currently sought delimiter
+     */
+    for c in document.chars() {
+        let mut needs_alloc = false;
+        let mut alloc_list = false;
+        let delim: char;
+        if let Some(d) = delim_stack.last() {
+            delim = *d;
+
+            if delim == '*' {
+                token.push(c);
+                delim_stack.pop();
+                continue;
+
+            // normal delimiter cases
+            } else if c == delim {
+                needs_alloc = true;
+                // reset comment line status
+                if delim == '\n' {
+                    delim_stack.pop();
+                    ign = false;
+                    continue;
+                }
+
+                // catch too many list end
+                // set alloc_list
+                if delim == ')' {
+                    alloc_list = true;
+                    if ref_stack.len() < 1 {
+                        return Err("too many end parens".to_string());
+                    }
+                }
+                delim_stack.pop();
+
+            // if we are in a commented out space, skip this char
+            } else if ign {
+                continue;
+            }
+        }
+        // try to generalize all whitespace
+        if !needs_alloc && char::is_whitespace(c) && !is_str {
+            // dont make empty tokens just because the document has consecutive whitespace
+            if token.len() == 0 {
+                continue;
+            }
+            needs_alloc = true;
+        }
+        // match a delimiter
+        if !needs_alloc {
+            match c {
+                // add a new Seg reference to the stack
+                '(' => {
+                    if is_str {
+                        token.push(c);
+                        continue;
+                    }
+
+                    if token != "" {
+                        return Err("list started in middle of another token".to_string());
+                    }
+
+                    ref_stack.push(Box::new(Seg::new()));
+
+                    delim_stack.push(')');
+                }
+                // begin parsing a string
+                '"' | '\'' | '`' => {
+                    is_str = true;
+                    delim_stack.push(c);
+                }
+                // eat the whole line
+                '#' => {
+                    ign = true;
+                    delim_stack.push('\n');
+                }
+                // escape next char
+                '\\' => {
+                    delim_stack.push('*');
+                }
+                // add to token
+                _ => {
+                    token.push(c);
+                }
+            }
+
+        /* 1. Handle allocation of new Ctr
+         * 2. Handle expansion of current list ref
+         */
+        } else {
+            if token.len() == 0 && !is_str && !alloc_list {
+                return Err("Empty token".to_string());
+            }
+
+            let mut current_seg = ref_stack.pop();
+            let mut obj;
+            if is_str {
+                obj = Ctr::String(token);
+                is_str = false;
+                token = String::new();
+            } else if token.len() > 0 {
+                if token == "true" {
+                    obj = Ctr::Bool(true);
+                } else if token == "false" {
+                    obj = Ctr::Bool(false);
+                } else if let Ok(i) = token.parse::<i128>() {
+                    obj = Ctr::Integer(i);
+                } else if let Ok(f) = token.parse::<f64>() {
+                    obj = Ctr::Float(f);
+                } else if let Some(s) = tok_is_symbol(&token) {
+                    obj = Ctr::Symbol(s);
+                } else {
+                    return Err(format!("Unparsable token: {}", token));
+                }
+
+                token = String::new();
+            }
+
+            list_append(current_seg, obj);
+
+            if alloc_list {
+                // return if we have finished the document
+                if ref_stack.len() == 0 {
+                    return Ok(current_seg);
+                }
+
+                // shortening this will lead to naught but pain
+                obj = Ctr::Seg(current_seg.into_raw());
+                current_seg = ref_stack.pop();
+                list_append(current_seg, obj);
+            }
+
+            ref_stack.push(current_seg);
+        }
+    }
+
+    if is_str {
+        return Err(UNMATCHED_STR_DELIM.to_string());
+    }
+    return Err(UNMATCHED_LIST_DELIM.to_string());
+}
+
+/* Returns true if token
+ *   - is all alphanumeric except dash and underscore
+ *
+ * else returns false
+ */
+fn tok_is_symbol(token: &String) -> Option<String> {
+    let tok = token.as_str();
+    for t in tok.chars() {
+        if !t.is_alphabetic() && !t.is_digit(10) && !(t == '-') && !(t == '_') {
+            return None;
+        }
+    }
+
+    return Some(String::from(tok));
+}