- added more unit tests for lexer

- corrected defects revealed by added tests
2021-01-24 22:04:26 -08:00 · 2021-01-24 22:04:26 -08:00 · 172aa4ea4b
commit 172aa4ea4b
parent e4f2fbaa70
3 changed files with 113 additions and 77 deletions
--- a/src/cell.rs
+++ b/src/cell.rs
@ -20,6 +20,7 @@ use std::boxed::Box;
 // Container
 #[derive(Debug)]
 pub enum Ctr {
    SYMBOL(String),
    STRING(String),
@ -34,6 +35,7 @@ pub enum Ctr {
 * Holds two Containers.
 * Basic building block for more complex data structures.
 */
 #[derive(Debug)]
 pub struct Cell {
    /* "Cell Address Register"
     *  Historical way of referring to the first value in a cell.
--- a/src/lex.rs
+++ b/src/lex.rs
@ -62,51 +62,61 @@ fn process(document: String) -> Result<Box<Cell>, String> {
    let mut is_str = false;
    let mut ign = false;
    let mut token = String::new();
-    let mut delim_stack = vec![')', ' '];
+    let mut delim_stack = Vec::new();
-    let mut ref_stack = vec![Box::new(Cell{
+    let mut ref_stack = vec![];
        car: Ctr::None,
        cdr: Ctr::None
    })];
    /* Iterate over document
     * Manage currently sought delimiter
     */
    for c in document.chars() {
-        let mut needs_alloc = true;
+        let mut needs_alloc = false;
        let mut alloc_list = false;
-        let delim = delim_stack.last().unwrap();
+        let delim: char;
-        // case only happens when escaping a char
+        if let Some(d) = delim_stack.last() {
-        if *delim == '*' {
+            delim = *d;
            token.push(c);
-        // normal delimiter cases
+            if delim == '*' {
-        } else if c == *delim {
+                token.push(c);
-            // reset comment line status
+                delim_stack.pop();
-            if *delim == '\n' {
+                continue;
                ign = false
            }
-            // catch too many list end
+            // normal delimiter cases
-            // set alloc_list
+            } else if c == delim {
-            if *delim == ')' {
+                needs_alloc = true;
-                alloc_list = true;
+                // reset comment line status
-                if ref_stack.len() < 1 {
+                if delim == '\n' {
-                    return Err("too many end parens".to_string());
+                    ign = false
                }
                // catch too many list end
                // set alloc_list
                if delim == ')' {
                    alloc_list = true;
                    if ref_stack.len() < 1 {
                        return Err("too many end parens".to_string());
                    }
                }
                delim_stack.pop();
            // if we are in a commented out space, skip this char
            } else if ign {
                continue;
            }
-            delim_stack.pop();
+        }
        // try to generalize all whitespace
-        } else if *delim == ' ' && char::is_whitespace(c) {
+        if !needs_alloc && char::is_whitespace(c) && !is_str {
-            delim_stack.pop();
+            // dont make empty tokens just because the document has consecutive whitespace
-
+            if token.len() == 0 {
-            // match a delimiter
+                continue;
-            } else {
+            }
-            needs_alloc = false;
+            needs_alloc = true;
        }
        // match a delimiter
        if !needs_alloc {
            match c {
                // add a new Cell reference to the stack
                '(' => {
-                    if token != "" || *(delim_stack.last().unwrap()) != ' ' {
+                    if token != ""  {
                        return Err("list started in middle of another token".to_string());
                    }
@ -136,62 +146,52 @@ fn process(document: String) -> Result<Box<Cell>, String> {
                    token.push(c)
                }
            }
        }
        if ign {
            continue;
        }
        /* 1. Handle allocation of new Ctr
         * 2. Handle expansion of current list ref
         */
-        if needs_alloc {
+        } else {
            if delim_stack.len() == 0 {
                delim_stack.push(' ');
            }
            if token.len() == 0 && !is_str && !alloc_list {
                return Err("Empty token".to_string());
            }
            let mut current_cell_ref = ref_stack.pop().unwrap();
-            // throws warning (overwritten before read) not sure how to handle
+            let mut obj;
-            let mut obj = Ctr::None;
+            if token.len() > 0 {
-            if alloc_list {
+                if is_str {
-                // we should never hit this but if we do I want to know
+                    obj = Ctr::STRING(token);
-                if token.len() > 0 {
+                    is_str = false;
-                    return Err("list/token conflict".to_string());
+                } else if token == "true" {
                    obj = Ctr::BOOL(true);
                } else if token == "false" {
                    obj = Ctr::BOOL(false);
                } else if let Ok(i) = token.parse::<i128>() {
                    obj = Ctr::INTEGER(i);
                } else if let Ok(f) = token.parse::<f64>() {
                    obj = Ctr::FLOAT(f);
                } else if let Some(s) = tok_is_symbol(&token) {
                    obj = Ctr::SYMBOL(s);
                } else {
                    return Err(format!("Unparsable token:{}", token));
                }
-                // return if we have finished the document
+                token = String::new();
-                if ref_stack.len() == 0 {
+                append(&mut current_cell_ref, obj);
                    return Ok(current_cell_ref);
                }
                obj = Ctr::CELL(Box::new(*current_cell_ref));
                current_cell_ref = ref_stack.pop().unwrap();
            } else if is_str {
                obj = Ctr::STRING(token);
                is_str = false;
            } else if token == "true" {
                obj = Ctr::BOOL(true);
            } else if token == "false" {
                obj = Ctr::BOOL(false);
            } else if let Ok(i) = token.parse::<i128>() {
                obj = Ctr::INTEGER(i);
            } else if let Ok(f) = token.parse::<f64>() {
                obj = Ctr::FLOAT(f);
            } else if let Some(s) = tok_is_symbol(&token) {
                obj = Ctr::SYMBOL(s);
            } else {
                return Err(format!("Unparsable token: {}", token));
            }
-            append(&mut current_cell_ref, obj);
+            if alloc_list {
                // return if we have finished the document
                if ref_stack.len() == 0 {
                    return Ok(Box::new(*current_cell_ref));
                }
-            // reset token
+                // shortening this will lead to naught but pain
-            token = String::new();
+                obj = Ctr::CELL(Box::new(*current_cell_ref));
                current_cell_ref = ref_stack.pop().unwrap();
                append(&mut current_cell_ref, obj);
            }
            ref_stack.push(current_cell_ref);
        }
    }
--- a/tests/test_lex.rs
+++ b/tests/test_lex.rs
@ -2,13 +2,47 @@ mod lex_tests {
    use relish::ast::{lex};
    #[test]
-    fn test_lex_basic_list() {
+    fn test_lex_basic_pair() {
-        let document: &str = "(hello \"world\")";
+        let document: &str = "(hello 'world')";
        let output: &str = "(hello 'world' nil)";
        match lex(document.to_string()) {
            Ok(box_cell) => {
-                assert_eq!(format!("{}", *box_cell), document.to_string());
+                assert_eq!(format!("{}", *box_cell), output.to_string());
            },
-            Err(_s) => assert!(false)
+            Err(s) => {
                print!("{}\n", s);
                assert!(false);
            }
        }
    }
    #[test]
    fn test_lex_basic_list() {
        let document: &str = "(hello 'world' 1 2 3)";
        let output: &str = "(hello 'world' 1 2 3 nil)";
        match lex(document.to_string()) {
            Ok(box_cell) => {
                assert_eq!(format!("{}", *box_cell), output.to_string());
            },
            Err(s) => {
                print!("{}\n", s);
                assert!(false);
            }
        }
    }
    #[test]
    fn test_lex_complex_list() {
        let document: &str = "(hello 'world' (1 2 (1 2 3)) 1 2 3)";
        let output: &str = "(hello 'world' (1 2 (1 2 3 nil) nil) 1 2 3 nil)";
        match lex(document.to_string()) {
            Ok(box_cell) => {
                assert_eq!(format!("{}", *box_cell), output.to_string());
            },
            Err(s) => {
                print!("{}\n", s);
                assert!(false);
            }
        }
    }
 }