diff --git a/src/cell.rs b/src/cell.rs index 68d2ef6..28858ac 100644 --- a/src/cell.rs +++ b/src/cell.rs @@ -20,6 +20,7 @@ use std::boxed::Box; // Container +#[derive(Debug)] pub enum Ctr { SYMBOL(String), STRING(String), @@ -34,6 +35,7 @@ pub enum Ctr { * Holds two Containers. * Basic building block for more complex data structures. */ +#[derive(Debug)] pub struct Cell { /* "Cell Address Register" * Historical way of referring to the first value in a cell. diff --git a/src/lex.rs b/src/lex.rs index 4de9219..4f3e1b2 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -62,51 +62,61 @@ fn process(document: String) -> Result, String> { let mut is_str = false; let mut ign = false; let mut token = String::new(); - let mut delim_stack = vec![')', ' ']; - let mut ref_stack = vec![Box::new(Cell{ - car: Ctr::None, - cdr: Ctr::None - })]; + let mut delim_stack = Vec::new(); + let mut ref_stack = vec![]; /* Iterate over document * Manage currently sought delimiter */ for c in document.chars() { - let mut needs_alloc = true; + let mut needs_alloc = false; let mut alloc_list = false; - let delim = delim_stack.last().unwrap(); - // case only happens when escaping a char - if *delim == '*' { - token.push(c); + let delim: char; + if let Some(d) = delim_stack.last() { + delim = *d; - // normal delimiter cases - } else if c == *delim { - // reset comment line status - if *delim == '\n' { - ign = false - } + if delim == '*' { + token.push(c); + delim_stack.pop(); + continue; - // catch too many list end - // set alloc_list - if *delim == ')' { - alloc_list = true; - if ref_stack.len() < 1 { - return Err("too many end parens".to_string()); + // normal delimiter cases + } else if c == delim { + needs_alloc = true; + // reset comment line status + if delim == '\n' { + ign = false } + + // catch too many list end + // set alloc_list + if delim == ')' { + alloc_list = true; + if ref_stack.len() < 1 { + return Err("too many end parens".to_string()); + } + } + delim_stack.pop(); + + // if we are in a commented out space, skip this char + } else if ign { + continue; } - delim_stack.pop(); - + } // try to generalize all whitespace - } else if *delim == ' ' && char::is_whitespace(c) { - delim_stack.pop(); - - // match a delimiter - } else { - needs_alloc = false; + if !needs_alloc && char::is_whitespace(c) && !is_str { + // dont make empty tokens just because the document has consecutive whitespace + if token.len() == 0 { + continue; + } + needs_alloc = true; + } + // match a delimiter + if !needs_alloc { match c { // add a new Cell reference to the stack '(' => { - if token != "" || *(delim_stack.last().unwrap()) != ' ' { + if token != "" { return Err("list started in middle of another token".to_string()); } @@ -136,62 +146,52 @@ fn process(document: String) -> Result, String> { token.push(c) } } - } - - if ign { - continue; - } /* 1. Handle allocation of new Ctr * 2. Handle expansion of current list ref */ - if needs_alloc { - if delim_stack.len() == 0 { - delim_stack.push(' '); - } - + } else { if token.len() == 0 && !is_str && !alloc_list { return Err("Empty token".to_string()); } let mut current_cell_ref = ref_stack.pop().unwrap(); - // throws warning (overwritten before read) not sure how to handle - let mut obj = Ctr::None; - if alloc_list { - // we should never hit this but if we do I want to know - if token.len() > 0 { - return Err("list/token conflict".to_string()); + let mut obj; + if token.len() > 0 { + if is_str { + obj = Ctr::STRING(token); + is_str = false; + } else if token == "true" { + obj = Ctr::BOOL(true); + } else if token == "false" { + obj = Ctr::BOOL(false); + } else if let Ok(i) = token.parse::() { + obj = Ctr::INTEGER(i); + } else if let Ok(f) = token.parse::() { + obj = Ctr::FLOAT(f); + } else if let Some(s) = tok_is_symbol(&token) { + obj = Ctr::SYMBOL(s); + } else { + return Err(format!("Unparsable token:{}", token)); } - // return if we have finished the document - if ref_stack.len() == 0 { - return Ok(current_cell_ref); - } - - obj = Ctr::CELL(Box::new(*current_cell_ref)); - current_cell_ref = ref_stack.pop().unwrap(); - - } else if is_str { - obj = Ctr::STRING(token); - is_str = false; - } else if token == "true" { - obj = Ctr::BOOL(true); - } else if token == "false" { - obj = Ctr::BOOL(false); - } else if let Ok(i) = token.parse::() { - obj = Ctr::INTEGER(i); - } else if let Ok(f) = token.parse::() { - obj = Ctr::FLOAT(f); - } else if let Some(s) = tok_is_symbol(&token) { - obj = Ctr::SYMBOL(s); - } else { - return Err(format!("Unparsable token: {}", token)); + token = String::new(); + append(&mut current_cell_ref, obj); } - append(&mut current_cell_ref, obj); + if alloc_list { + // return if we have finished the document + if ref_stack.len() == 0 { + return Ok(Box::new(*current_cell_ref)); + } - // reset token - token = String::new(); + // shortening this will lead to naught but pain + obj = Ctr::CELL(Box::new(*current_cell_ref)); + current_cell_ref = ref_stack.pop().unwrap(); + append(&mut current_cell_ref, obj); + } + + ref_stack.push(current_cell_ref); } } diff --git a/tests/test_lex.rs b/tests/test_lex.rs index 1194fc0..e240fda 100644 --- a/tests/test_lex.rs +++ b/tests/test_lex.rs @@ -2,13 +2,47 @@ mod lex_tests { use relish::ast::{lex}; #[test] - fn test_lex_basic_list() { - let document: &str = "(hello \"world\")"; + fn test_lex_basic_pair() { + let document: &str = "(hello 'world')"; + let output: &str = "(hello 'world' nil)"; match lex(document.to_string()) { Ok(box_cell) => { - assert_eq!(format!("{}", *box_cell), document.to_string()); + assert_eq!(format!("{}", *box_cell), output.to_string()); }, - Err(_s) => assert!(false) + Err(s) => { + print!("{}\n", s); + assert!(false); + } + } + } + + #[test] + fn test_lex_basic_list() { + let document: &str = "(hello 'world' 1 2 3)"; + let output: &str = "(hello 'world' 1 2 3 nil)"; + match lex(document.to_string()) { + Ok(box_cell) => { + assert_eq!(format!("{}", *box_cell), output.to_string()); + }, + Err(s) => { + print!("{}\n", s); + assert!(false); + } + } + } + + #[test] + fn test_lex_complex_list() { + let document: &str = "(hello 'world' (1 2 (1 2 3)) 1 2 3)"; + let output: &str = "(hello 'world' (1 2 (1 2 3 nil) nil) 1 2 3 nil)"; + match lex(document.to_string()) { + Ok(box_cell) => { + assert_eq!(format!("{}", *box_cell), output.to_string()); + }, + Err(s) => { + print!("{}\n", s); + assert!(false); + } } } }