- added more unit tests for lexer
- corrected defects revealed by added tests
This commit is contained in:
parent
e4f2fbaa70
commit
172aa4ea4b
3 changed files with 113 additions and 77 deletions
|
|
@ -20,6 +20,7 @@ use std::boxed::Box;
|
|||
|
||||
|
||||
// Container
|
||||
#[derive(Debug)]
|
||||
pub enum Ctr {
|
||||
SYMBOL(String),
|
||||
STRING(String),
|
||||
|
|
@ -34,6 +35,7 @@ pub enum Ctr {
|
|||
* Holds two Containers.
|
||||
* Basic building block for more complex data structures.
|
||||
*/
|
||||
#[derive(Debug)]
|
||||
pub struct Cell {
|
||||
/* "Cell Address Register"
|
||||
* Historical way of referring to the first value in a cell.
|
||||
|
|
|
|||
94
src/lex.rs
94
src/lex.rs
|
|
@ -62,33 +62,35 @@ fn process(document: String) -> Result<Box<Cell>, String> {
|
|||
let mut is_str = false;
|
||||
let mut ign = false;
|
||||
let mut token = String::new();
|
||||
let mut delim_stack = vec![')', ' '];
|
||||
let mut ref_stack = vec![Box::new(Cell{
|
||||
car: Ctr::None,
|
||||
cdr: Ctr::None
|
||||
})];
|
||||
let mut delim_stack = Vec::new();
|
||||
let mut ref_stack = vec![];
|
||||
|
||||
/* Iterate over document
|
||||
* Manage currently sought delimiter
|
||||
*/
|
||||
for c in document.chars() {
|
||||
let mut needs_alloc = true;
|
||||
let mut needs_alloc = false;
|
||||
let mut alloc_list = false;
|
||||
let delim = delim_stack.last().unwrap();
|
||||
// case only happens when escaping a char
|
||||
if *delim == '*' {
|
||||
let delim: char;
|
||||
if let Some(d) = delim_stack.last() {
|
||||
delim = *d;
|
||||
|
||||
if delim == '*' {
|
||||
token.push(c);
|
||||
delim_stack.pop();
|
||||
continue;
|
||||
|
||||
// normal delimiter cases
|
||||
} else if c == *delim {
|
||||
} else if c == delim {
|
||||
needs_alloc = true;
|
||||
// reset comment line status
|
||||
if *delim == '\n' {
|
||||
if delim == '\n' {
|
||||
ign = false
|
||||
}
|
||||
|
||||
// catch too many list end
|
||||
// set alloc_list
|
||||
if *delim == ')' {
|
||||
if delim == ')' {
|
||||
alloc_list = true;
|
||||
if ref_stack.len() < 1 {
|
||||
return Err("too many end parens".to_string());
|
||||
|
|
@ -96,17 +98,25 @@ fn process(document: String) -> Result<Box<Cell>, String> {
|
|||
}
|
||||
delim_stack.pop();
|
||||
|
||||
// if we are in a commented out space, skip this char
|
||||
} else if ign {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// try to generalize all whitespace
|
||||
} else if *delim == ' ' && char::is_whitespace(c) {
|
||||
delim_stack.pop();
|
||||
|
||||
if !needs_alloc && char::is_whitespace(c) && !is_str {
|
||||
// dont make empty tokens just because the document has consecutive whitespace
|
||||
if token.len() == 0 {
|
||||
continue;
|
||||
}
|
||||
needs_alloc = true;
|
||||
}
|
||||
// match a delimiter
|
||||
} else {
|
||||
needs_alloc = false;
|
||||
if !needs_alloc {
|
||||
match c {
|
||||
// add a new Cell reference to the stack
|
||||
'(' => {
|
||||
if token != "" || *(delim_stack.last().unwrap()) != ' ' {
|
||||
if token != "" {
|
||||
return Err("list started in middle of another token".to_string());
|
||||
}
|
||||
|
||||
|
|
@ -136,42 +146,19 @@ fn process(document: String) -> Result<Box<Cell>, String> {
|
|||
token.push(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ign {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* 1. Handle allocation of new Ctr
|
||||
* 2. Handle expansion of current list ref
|
||||
*/
|
||||
if needs_alloc {
|
||||
if delim_stack.len() == 0 {
|
||||
delim_stack.push(' ');
|
||||
}
|
||||
|
||||
} else {
|
||||
if token.len() == 0 && !is_str && !alloc_list {
|
||||
return Err("Empty token".to_string());
|
||||
}
|
||||
|
||||
let mut current_cell_ref = ref_stack.pop().unwrap();
|
||||
// throws warning (overwritten before read) not sure how to handle
|
||||
let mut obj = Ctr::None;
|
||||
if alloc_list {
|
||||
// we should never hit this but if we do I want to know
|
||||
let mut obj;
|
||||
if token.len() > 0 {
|
||||
return Err("list/token conflict".to_string());
|
||||
}
|
||||
|
||||
// return if we have finished the document
|
||||
if ref_stack.len() == 0 {
|
||||
return Ok(current_cell_ref);
|
||||
}
|
||||
|
||||
obj = Ctr::CELL(Box::new(*current_cell_ref));
|
||||
current_cell_ref = ref_stack.pop().unwrap();
|
||||
|
||||
} else if is_str {
|
||||
if is_str {
|
||||
obj = Ctr::STRING(token);
|
||||
is_str = false;
|
||||
} else if token == "true" {
|
||||
|
|
@ -188,10 +175,23 @@ fn process(document: String) -> Result<Box<Cell>, String> {
|
|||
return Err(format!("Unparsable token:{}", token));
|
||||
}
|
||||
|
||||
append(&mut current_cell_ref, obj);
|
||||
|
||||
// reset token
|
||||
token = String::new();
|
||||
append(&mut current_cell_ref, obj);
|
||||
}
|
||||
|
||||
if alloc_list {
|
||||
// return if we have finished the document
|
||||
if ref_stack.len() == 0 {
|
||||
return Ok(Box::new(*current_cell_ref));
|
||||
}
|
||||
|
||||
// shortening this will lead to naught but pain
|
||||
obj = Ctr::CELL(Box::new(*current_cell_ref));
|
||||
current_cell_ref = ref_stack.pop().unwrap();
|
||||
append(&mut current_cell_ref, obj);
|
||||
}
|
||||
|
||||
ref_stack.push(current_cell_ref);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,13 +2,47 @@ mod lex_tests {
|
|||
use relish::ast::{lex};
|
||||
|
||||
#[test]
|
||||
fn test_lex_basic_list() {
|
||||
let document: &str = "(hello \"world\")";
|
||||
fn test_lex_basic_pair() {
|
||||
let document: &str = "(hello 'world')";
|
||||
let output: &str = "(hello 'world' nil)";
|
||||
match lex(document.to_string()) {
|
||||
Ok(box_cell) => {
|
||||
assert_eq!(format!("{}", *box_cell), document.to_string());
|
||||
assert_eq!(format!("{}", *box_cell), output.to_string());
|
||||
},
|
||||
Err(_s) => assert!(false)
|
||||
Err(s) => {
|
||||
print!("{}\n", s);
|
||||
assert!(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lex_basic_list() {
|
||||
let document: &str = "(hello 'world' 1 2 3)";
|
||||
let output: &str = "(hello 'world' 1 2 3 nil)";
|
||||
match lex(document.to_string()) {
|
||||
Ok(box_cell) => {
|
||||
assert_eq!(format!("{}", *box_cell), output.to_string());
|
||||
},
|
||||
Err(s) => {
|
||||
print!("{}\n", s);
|
||||
assert!(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lex_complex_list() {
|
||||
let document: &str = "(hello 'world' (1 2 (1 2 3)) 1 2 3)";
|
||||
let output: &str = "(hello 'world' (1 2 (1 2 3 nil) nil) 1 2 3 nil)";
|
||||
match lex(document.to_string()) {
|
||||
Ok(box_cell) => {
|
||||
assert_eq!(format!("{}", *box_cell), output.to_string());
|
||||
},
|
||||
Err(s) => {
|
||||
print!("{}\n", s);
|
||||
assert!(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue