Decomposer: fixes from found code
This commit includes a new utility, the decomposer, which has primarily been used to test the AST against found scheme code in the wild (internet). Decomposer will time and test the lexing and parsing of any document full of scheme. This commit includes additional test cases and logical fixes for issues found during the testing performed. Signed-off-by: Ava Affine <ava@sunnypup.io>
This commit is contained in:
parent
86f905ba1d
commit
e4c6e0924a
7 changed files with 417 additions and 40 deletions
|
|
@ -62,7 +62,7 @@ pub struct ParseError(pub &'static str, pub Option<Result<LexToken, LexError>>);
|
|||
impl Display for ParseError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
let err_snippet_start = |t: &LexToken| -> usize {
|
||||
/* backtrack from current index until we either hit
|
||||
/* backtrack from current index until we either hit
|
||||
* - beginning of line
|
||||
* - 25 characters ago
|
||||
* - the doc Start
|
||||
|
|
@ -187,7 +187,7 @@ fn read_number(token: LexToken) -> Result<Number, ParseError> {
|
|||
}
|
||||
|
||||
fn read_char(token: LexToken) -> Result<u8, ParseError> {
|
||||
if token.end_idx - token.start_idx < 2 {
|
||||
if token.end_idx - token.start_idx < 3 {
|
||||
return Err(ParseError(E_CHAR_TRUNCATED, Some(Ok(token))))
|
||||
}
|
||||
|
||||
|
|
@ -202,7 +202,7 @@ fn read_char(token: LexToken) -> Result<u8, ParseError> {
|
|||
"space" => Ok(32),
|
||||
"tab" => Ok(11),
|
||||
_ if token.source_doc[token.start_idx + 2..].starts_with('x') &&
|
||||
token.end_idx - token.start_idx > 2 => {
|
||||
token.end_idx - token.start_idx > 3 => {
|
||||
if token.end_idx - token.start_idx > 5 {
|
||||
return Err(ParseError(E_CHAR_TOO_LONG, Some(Ok(token))))
|
||||
}
|
||||
|
|
@ -327,7 +327,10 @@ impl Parser {
|
|||
loop {
|
||||
let next_tok = self.lexer.next();
|
||||
if let None = next_tok {
|
||||
return Err(ParseError(E_COLLECTION_TRUNC, None))
|
||||
if let Some(e) = &self.lexer.has_error_state {
|
||||
return Err(ParseError(E_LEX_ERROR, Some(Err(e.clone()))))
|
||||
}
|
||||
return Err(ParseError(E_COLLECTION_TRUNC, Some(Ok(token))))
|
||||
}
|
||||
|
||||
let tok = next_tok.unwrap();
|
||||
|
|
@ -476,7 +479,7 @@ impl Parser {
|
|||
}
|
||||
|
||||
// Lexer error
|
||||
} else if self.lexer.has_error_state.is_some() {
|
||||
} else if let Some(e) = &self.lexer.has_error_state && e.0 != E_END_OF_DOCUMENT {
|
||||
Err(ParseError(E_LEX_ERROR,
|
||||
Some(Err(self.lexer.has_error_state.clone().unwrap()))))
|
||||
|
||||
|
|
@ -511,7 +514,10 @@ mod tests {
|
|||
("(hello \"world\")", "(hello \"world\")"),
|
||||
("; big doc string\n(one two)", "(one two)"),
|
||||
("(list #(vect 2 3 #u8(0 0)))", "(list #(vect 2 3 #u8(0 0)))"),
|
||||
("(#\\xf0 #\\alarm #\\a #\\z)", "(#\\xf0 #\\x7 #\\a #\\z)")
|
||||
("(#\\xf0 #\\alarm #\\a #\\z)", "(#\\xf0 #\\x7 #\\a #\\z)"),
|
||||
("(- q 1)", "(- q 1)"),
|
||||
("(+ q 1)", "(+ q 1)"),
|
||||
("(#\\x)", "(#\\x)"),
|
||||
];
|
||||
|
||||
let sad_cases = vec![
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue