- added more unit tests for lexer

- corrected defects revealed by added tests
This commit is contained in:
Aidan 2021-01-24 22:04:26 -08:00
parent e4f2fbaa70
commit 172aa4ea4b
No known key found for this signature in database
GPG key ID: 327711E983899316
3 changed files with 113 additions and 77 deletions

View file

@ -20,6 +20,7 @@ use std::boxed::Box;
// Container
#[derive(Debug)]
pub enum Ctr {
SYMBOL(String),
STRING(String),
@ -34,6 +35,7 @@ pub enum Ctr {
* Holds two Containers.
* Basic building block for more complex data structures.
*/
#[derive(Debug)]
pub struct Cell {
/* "Cell Address Register"
* Historical way of referring to the first value in a cell.

View file

@ -62,51 +62,61 @@ fn process(document: String) -> Result<Box<Cell>, String> {
let mut is_str = false;
let mut ign = false;
let mut token = String::new();
let mut delim_stack = vec![')', ' '];
let mut ref_stack = vec![Box::new(Cell{
car: Ctr::None,
cdr: Ctr::None
})];
let mut delim_stack = Vec::new();
let mut ref_stack = vec![];
/* Iterate over document
* Manage currently sought delimiter
*/
for c in document.chars() {
let mut needs_alloc = true;
let mut needs_alloc = false;
let mut alloc_list = false;
let delim = delim_stack.last().unwrap();
// case only happens when escaping a char
if *delim == '*' {
token.push(c);
let delim: char;
if let Some(d) = delim_stack.last() {
delim = *d;
// normal delimiter cases
} else if c == *delim {
// reset comment line status
if *delim == '\n' {
ign = false
}
if delim == '*' {
token.push(c);
delim_stack.pop();
continue;
// catch too many list end
// set alloc_list
if *delim == ')' {
alloc_list = true;
if ref_stack.len() < 1 {
return Err("too many end parens".to_string());
// normal delimiter cases
} else if c == delim {
needs_alloc = true;
// reset comment line status
if delim == '\n' {
ign = false
}
// catch too many list end
// set alloc_list
if delim == ')' {
alloc_list = true;
if ref_stack.len() < 1 {
return Err("too many end parens".to_string());
}
}
delim_stack.pop();
// if we are in a commented out space, skip this char
} else if ign {
continue;
}
delim_stack.pop();
}
// try to generalize all whitespace
} else if *delim == ' ' && char::is_whitespace(c) {
delim_stack.pop();
// match a delimiter
} else {
needs_alloc = false;
if !needs_alloc && char::is_whitespace(c) && !is_str {
// dont make empty tokens just because the document has consecutive whitespace
if token.len() == 0 {
continue;
}
needs_alloc = true;
}
// match a delimiter
if !needs_alloc {
match c {
// add a new Cell reference to the stack
'(' => {
if token != "" || *(delim_stack.last().unwrap()) != ' ' {
if token != "" {
return Err("list started in middle of another token".to_string());
}
@ -136,62 +146,52 @@ fn process(document: String) -> Result<Box<Cell>, String> {
token.push(c)
}
}
}
if ign {
continue;
}
/* 1. Handle allocation of new Ctr
* 2. Handle expansion of current list ref
*/
if needs_alloc {
if delim_stack.len() == 0 {
delim_stack.push(' ');
}
} else {
if token.len() == 0 && !is_str && !alloc_list {
return Err("Empty token".to_string());
}
let mut current_cell_ref = ref_stack.pop().unwrap();
// throws warning (overwritten before read) not sure how to handle
let mut obj = Ctr::None;
if alloc_list {
// we should never hit this but if we do I want to know
if token.len() > 0 {
return Err("list/token conflict".to_string());
let mut obj;
if token.len() > 0 {
if is_str {
obj = Ctr::STRING(token);
is_str = false;
} else if token == "true" {
obj = Ctr::BOOL(true);
} else if token == "false" {
obj = Ctr::BOOL(false);
} else if let Ok(i) = token.parse::<i128>() {
obj = Ctr::INTEGER(i);
} else if let Ok(f) = token.parse::<f64>() {
obj = Ctr::FLOAT(f);
} else if let Some(s) = tok_is_symbol(&token) {
obj = Ctr::SYMBOL(s);
} else {
return Err(format!("Unparsable token:{}", token));
}
// return if we have finished the document
if ref_stack.len() == 0 {
return Ok(current_cell_ref);
}
obj = Ctr::CELL(Box::new(*current_cell_ref));
current_cell_ref = ref_stack.pop().unwrap();
} else if is_str {
obj = Ctr::STRING(token);
is_str = false;
} else if token == "true" {
obj = Ctr::BOOL(true);
} else if token == "false" {
obj = Ctr::BOOL(false);
} else if let Ok(i) = token.parse::<i128>() {
obj = Ctr::INTEGER(i);
} else if let Ok(f) = token.parse::<f64>() {
obj = Ctr::FLOAT(f);
} else if let Some(s) = tok_is_symbol(&token) {
obj = Ctr::SYMBOL(s);
} else {
return Err(format!("Unparsable token: {}", token));
token = String::new();
append(&mut current_cell_ref, obj);
}
append(&mut current_cell_ref, obj);
if alloc_list {
// return if we have finished the document
if ref_stack.len() == 0 {
return Ok(Box::new(*current_cell_ref));
}
// reset token
token = String::new();
// shortening this will lead to naught but pain
obj = Ctr::CELL(Box::new(*current_cell_ref));
current_cell_ref = ref_stack.pop().unwrap();
append(&mut current_cell_ref, obj);
}
ref_stack.push(current_cell_ref);
}
}