- added more unit tests for lexer

- corrected defects revealed by added tests
This commit is contained in:
Aidan 2021-01-24 22:04:26 -08:00
parent e4f2fbaa70
commit 172aa4ea4b
No known key found for this signature in database
GPG key ID: 327711E983899316
3 changed files with 113 additions and 77 deletions

View file

@ -20,6 +20,7 @@ use std::boxed::Box;
// Container // Container
#[derive(Debug)]
pub enum Ctr { pub enum Ctr {
SYMBOL(String), SYMBOL(String),
STRING(String), STRING(String),
@ -34,6 +35,7 @@ pub enum Ctr {
* Holds two Containers. * Holds two Containers.
* Basic building block for more complex data structures. * Basic building block for more complex data structures.
*/ */
#[derive(Debug)]
pub struct Cell { pub struct Cell {
/* "Cell Address Register" /* "Cell Address Register"
* Historical way of referring to the first value in a cell. * Historical way of referring to the first value in a cell.

View file

@ -62,51 +62,61 @@ fn process(document: String) -> Result<Box<Cell>, String> {
let mut is_str = false; let mut is_str = false;
let mut ign = false; let mut ign = false;
let mut token = String::new(); let mut token = String::new();
let mut delim_stack = vec![')', ' ']; let mut delim_stack = Vec::new();
let mut ref_stack = vec![Box::new(Cell{ let mut ref_stack = vec![];
car: Ctr::None,
cdr: Ctr::None
})];
/* Iterate over document /* Iterate over document
* Manage currently sought delimiter * Manage currently sought delimiter
*/ */
for c in document.chars() { for c in document.chars() {
let mut needs_alloc = true; let mut needs_alloc = false;
let mut alloc_list = false; let mut alloc_list = false;
let delim = delim_stack.last().unwrap(); let delim: char;
// case only happens when escaping a char if let Some(d) = delim_stack.last() {
if *delim == '*' { delim = *d;
token.push(c);
// normal delimiter cases if delim == '*' {
} else if c == *delim { token.push(c);
// reset comment line status delim_stack.pop();
if *delim == '\n' { continue;
ign = false
}
// catch too many list end // normal delimiter cases
// set alloc_list } else if c == delim {
if *delim == ')' { needs_alloc = true;
alloc_list = true; // reset comment line status
if ref_stack.len() < 1 { if delim == '\n' {
return Err("too many end parens".to_string()); ign = false
} }
// catch too many list end
// set alloc_list
if delim == ')' {
alloc_list = true;
if ref_stack.len() < 1 {
return Err("too many end parens".to_string());
}
}
delim_stack.pop();
// if we are in a commented out space, skip this char
} else if ign {
continue;
} }
delim_stack.pop(); }
// try to generalize all whitespace // try to generalize all whitespace
} else if *delim == ' ' && char::is_whitespace(c) { if !needs_alloc && char::is_whitespace(c) && !is_str {
delim_stack.pop(); // dont make empty tokens just because the document has consecutive whitespace
if token.len() == 0 {
// match a delimiter continue;
} else { }
needs_alloc = false; needs_alloc = true;
}
// match a delimiter
if !needs_alloc {
match c { match c {
// add a new Cell reference to the stack // add a new Cell reference to the stack
'(' => { '(' => {
if token != "" || *(delim_stack.last().unwrap()) != ' ' { if token != "" {
return Err("list started in middle of another token".to_string()); return Err("list started in middle of another token".to_string());
} }
@ -136,62 +146,52 @@ fn process(document: String) -> Result<Box<Cell>, String> {
token.push(c) token.push(c)
} }
} }
}
if ign {
continue;
}
/* 1. Handle allocation of new Ctr /* 1. Handle allocation of new Ctr
* 2. Handle expansion of current list ref * 2. Handle expansion of current list ref
*/ */
if needs_alloc { } else {
if delim_stack.len() == 0 {
delim_stack.push(' ');
}
if token.len() == 0 && !is_str && !alloc_list { if token.len() == 0 && !is_str && !alloc_list {
return Err("Empty token".to_string()); return Err("Empty token".to_string());
} }
let mut current_cell_ref = ref_stack.pop().unwrap(); let mut current_cell_ref = ref_stack.pop().unwrap();
// throws warning (overwritten before read) not sure how to handle let mut obj;
let mut obj = Ctr::None; if token.len() > 0 {
if alloc_list { if is_str {
// we should never hit this but if we do I want to know obj = Ctr::STRING(token);
if token.len() > 0 { is_str = false;
return Err("list/token conflict".to_string()); } else if token == "true" {
obj = Ctr::BOOL(true);
} else if token == "false" {
obj = Ctr::BOOL(false);
} else if let Ok(i) = token.parse::<i128>() {
obj = Ctr::INTEGER(i);
} else if let Ok(f) = token.parse::<f64>() {
obj = Ctr::FLOAT(f);
} else if let Some(s) = tok_is_symbol(&token) {
obj = Ctr::SYMBOL(s);
} else {
return Err(format!("Unparsable token:{}", token));
} }
// return if we have finished the document token = String::new();
if ref_stack.len() == 0 { append(&mut current_cell_ref, obj);
return Ok(current_cell_ref);
}
obj = Ctr::CELL(Box::new(*current_cell_ref));
current_cell_ref = ref_stack.pop().unwrap();
} else if is_str {
obj = Ctr::STRING(token);
is_str = false;
} else if token == "true" {
obj = Ctr::BOOL(true);
} else if token == "false" {
obj = Ctr::BOOL(false);
} else if let Ok(i) = token.parse::<i128>() {
obj = Ctr::INTEGER(i);
} else if let Ok(f) = token.parse::<f64>() {
obj = Ctr::FLOAT(f);
} else if let Some(s) = tok_is_symbol(&token) {
obj = Ctr::SYMBOL(s);
} else {
return Err(format!("Unparsable token: {}", token));
} }
append(&mut current_cell_ref, obj); if alloc_list {
// return if we have finished the document
if ref_stack.len() == 0 {
return Ok(Box::new(*current_cell_ref));
}
// reset token // shortening this will lead to naught but pain
token = String::new(); obj = Ctr::CELL(Box::new(*current_cell_ref));
current_cell_ref = ref_stack.pop().unwrap();
append(&mut current_cell_ref, obj);
}
ref_stack.push(current_cell_ref);
} }
} }

View file

@ -2,13 +2,47 @@ mod lex_tests {
use relish::ast::{lex}; use relish::ast::{lex};
#[test] #[test]
fn test_lex_basic_list() { fn test_lex_basic_pair() {
let document: &str = "(hello \"world\")"; let document: &str = "(hello 'world')";
let output: &str = "(hello 'world' nil)";
match lex(document.to_string()) { match lex(document.to_string()) {
Ok(box_cell) => { Ok(box_cell) => {
assert_eq!(format!("{}", *box_cell), document.to_string()); assert_eq!(format!("{}", *box_cell), output.to_string());
}, },
Err(_s) => assert!(false) Err(s) => {
print!("{}\n", s);
assert!(false);
}
}
}
#[test]
fn test_lex_basic_list() {
let document: &str = "(hello 'world' 1 2 3)";
let output: &str = "(hello 'world' 1 2 3 nil)";
match lex(document.to_string()) {
Ok(box_cell) => {
assert_eq!(format!("{}", *box_cell), output.to_string());
},
Err(s) => {
print!("{}\n", s);
assert!(false);
}
}
}
#[test]
fn test_lex_complex_list() {
let document: &str = "(hello 'world' (1 2 (1 2 3)) 1 2 3)";
let output: &str = "(hello 'world' (1 2 (1 2 3 nil) nil) 1 2 3 nil)";
match lex(document.to_string()) {
Ok(box_cell) => {
assert_eq!(format!("{}", *box_cell), output.to_string());
},
Err(s) => {
print!("{}\n", s);
assert!(false);
}
} }
} }
} }