- added more unit tests for lexer

- corrected defects revealed by added tests
This commit is contained in:
Aidan 2021-01-24 22:04:26 -08:00
parent e4f2fbaa70
commit 172aa4ea4b
No known key found for this signature in database
GPG key ID: 327711E983899316
3 changed files with 113 additions and 77 deletions

View file

@ -20,6 +20,7 @@ use std::boxed::Box;
// Container // Container
#[derive(Debug)]
pub enum Ctr { pub enum Ctr {
SYMBOL(String), SYMBOL(String),
STRING(String), STRING(String),
@ -34,6 +35,7 @@ pub enum Ctr {
* Holds two Containers. * Holds two Containers.
* Basic building block for more complex data structures. * Basic building block for more complex data structures.
*/ */
#[derive(Debug)]
pub struct Cell { pub struct Cell {
/* "Cell Address Register" /* "Cell Address Register"
* Historical way of referring to the first value in a cell. * Historical way of referring to the first value in a cell.

View file

@ -62,33 +62,35 @@ fn process(document: String) -> Result<Box<Cell>, String> {
let mut is_str = false; let mut is_str = false;
let mut ign = false; let mut ign = false;
let mut token = String::new(); let mut token = String::new();
let mut delim_stack = vec![')', ' ']; let mut delim_stack = Vec::new();
let mut ref_stack = vec![Box::new(Cell{ let mut ref_stack = vec![];
car: Ctr::None,
cdr: Ctr::None
})];
/* Iterate over document /* Iterate over document
* Manage currently sought delimiter * Manage currently sought delimiter
*/ */
for c in document.chars() { for c in document.chars() {
let mut needs_alloc = true; let mut needs_alloc = false;
let mut alloc_list = false; let mut alloc_list = false;
let delim = delim_stack.last().unwrap(); let delim: char;
// case only happens when escaping a char if let Some(d) = delim_stack.last() {
if *delim == '*' { delim = *d;
if delim == '*' {
token.push(c); token.push(c);
delim_stack.pop();
continue;
// normal delimiter cases // normal delimiter cases
} else if c == *delim { } else if c == delim {
needs_alloc = true;
// reset comment line status // reset comment line status
if *delim == '\n' { if delim == '\n' {
ign = false ign = false
} }
// catch too many list end // catch too many list end
// set alloc_list // set alloc_list
if *delim == ')' { if delim == ')' {
alloc_list = true; alloc_list = true;
if ref_stack.len() < 1 { if ref_stack.len() < 1 {
return Err("too many end parens".to_string()); return Err("too many end parens".to_string());
@ -96,17 +98,25 @@ fn process(document: String) -> Result<Box<Cell>, String> {
} }
delim_stack.pop(); delim_stack.pop();
// if we are in a commented out space, skip this char
} else if ign {
continue;
}
}
// try to generalize all whitespace // try to generalize all whitespace
} else if *delim == ' ' && char::is_whitespace(c) { if !needs_alloc && char::is_whitespace(c) && !is_str {
delim_stack.pop(); // dont make empty tokens just because the document has consecutive whitespace
if token.len() == 0 {
continue;
}
needs_alloc = true;
}
// match a delimiter // match a delimiter
} else { if !needs_alloc {
needs_alloc = false;
match c { match c {
// add a new Cell reference to the stack // add a new Cell reference to the stack
'(' => { '(' => {
if token != "" || *(delim_stack.last().unwrap()) != ' ' { if token != "" {
return Err("list started in middle of another token".to_string()); return Err("list started in middle of another token".to_string());
} }
@ -136,42 +146,19 @@ fn process(document: String) -> Result<Box<Cell>, String> {
token.push(c) token.push(c)
} }
} }
}
if ign {
continue;
}
/* 1. Handle allocation of new Ctr /* 1. Handle allocation of new Ctr
* 2. Handle expansion of current list ref * 2. Handle expansion of current list ref
*/ */
if needs_alloc { } else {
if delim_stack.len() == 0 {
delim_stack.push(' ');
}
if token.len() == 0 && !is_str && !alloc_list { if token.len() == 0 && !is_str && !alloc_list {
return Err("Empty token".to_string()); return Err("Empty token".to_string());
} }
let mut current_cell_ref = ref_stack.pop().unwrap(); let mut current_cell_ref = ref_stack.pop().unwrap();
// throws warning (overwritten before read) not sure how to handle let mut obj;
let mut obj = Ctr::None;
if alloc_list {
// we should never hit this but if we do I want to know
if token.len() > 0 { if token.len() > 0 {
return Err("list/token conflict".to_string()); if is_str {
}
// return if we have finished the document
if ref_stack.len() == 0 {
return Ok(current_cell_ref);
}
obj = Ctr::CELL(Box::new(*current_cell_ref));
current_cell_ref = ref_stack.pop().unwrap();
} else if is_str {
obj = Ctr::STRING(token); obj = Ctr::STRING(token);
is_str = false; is_str = false;
} else if token == "true" { } else if token == "true" {
@ -185,13 +172,26 @@ fn process(document: String) -> Result<Box<Cell>, String> {
} else if let Some(s) = tok_is_symbol(&token) { } else if let Some(s) = tok_is_symbol(&token) {
obj = Ctr::SYMBOL(s); obj = Ctr::SYMBOL(s);
} else { } else {
return Err(format!("Unparsable token: {}", token)); return Err(format!("Unparsable token:{}", token));
} }
append(&mut current_cell_ref, obj);
// reset token
token = String::new(); token = String::new();
append(&mut current_cell_ref, obj);
}
if alloc_list {
// return if we have finished the document
if ref_stack.len() == 0 {
return Ok(Box::new(*current_cell_ref));
}
// shortening this will lead to naught but pain
obj = Ctr::CELL(Box::new(*current_cell_ref));
current_cell_ref = ref_stack.pop().unwrap();
append(&mut current_cell_ref, obj);
}
ref_stack.push(current_cell_ref);
} }
} }

View file

@ -2,13 +2,47 @@ mod lex_tests {
use relish::ast::{lex}; use relish::ast::{lex};
#[test] #[test]
fn test_lex_basic_list() { fn test_lex_basic_pair() {
let document: &str = "(hello \"world\")"; let document: &str = "(hello 'world')";
let output: &str = "(hello 'world' nil)";
match lex(document.to_string()) { match lex(document.to_string()) {
Ok(box_cell) => { Ok(box_cell) => {
assert_eq!(format!("{}", *box_cell), document.to_string()); assert_eq!(format!("{}", *box_cell), output.to_string());
}, },
Err(_s) => assert!(false) Err(s) => {
print!("{}\n", s);
assert!(false);
}
}
}
#[test]
fn test_lex_basic_list() {
let document: &str = "(hello 'world' 1 2 3)";
let output: &str = "(hello 'world' 1 2 3 nil)";
match lex(document.to_string()) {
Ok(box_cell) => {
assert_eq!(format!("{}", *box_cell), output.to_string());
},
Err(s) => {
print!("{}\n", s);
assert!(false);
}
}
}
#[test]
fn test_lex_complex_list() {
let document: &str = "(hello 'world' (1 2 (1 2 3)) 1 2 3)";
let output: &str = "(hello 'world' (1 2 (1 2 3 nil) nil) 1 2 3 nil)";
match lex(document.to_string()) {
Ok(box_cell) => {
assert_eq!(format!("{}", *box_cell), output.to_string());
},
Err(s) => {
print!("{}\n", s);
assert!(false);
}
} }
} }
} }