Decomposer: fixes from found code

This commit includes a new utility, the decomposer, which has
primarily been used to test the AST against found scheme code in
the wild (internet). Decomposer will time and test the lexing and
parsing of any document full of scheme.

This commit includes additional test cases and logical fixes for
issues found during the testing performed.

Signed-off-by: Ava Affine <ava@sunnypup.io>
This commit is contained in:
Ava Apples Affine 2025-05-21 14:48:36 -07:00
parent 86f905ba1d
commit e4c6e0924a
7 changed files with 417 additions and 40 deletions

View file

@ -62,7 +62,7 @@ pub struct ParseError(pub &'static str, pub Option<Result<LexToken, LexError>>);
impl Display for ParseError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let err_snippet_start = |t: &LexToken| -> usize {
/* backtrack from current index until we either hit
/* backtrack from current index until we either hit
* - beginning of line
* - 25 characters ago
* - the doc Start
@ -187,7 +187,7 @@ fn read_number(token: LexToken) -> Result<Number, ParseError> {
}
fn read_char(token: LexToken) -> Result<u8, ParseError> {
if token.end_idx - token.start_idx < 2 {
if token.end_idx - token.start_idx < 3 {
return Err(ParseError(E_CHAR_TRUNCATED, Some(Ok(token))))
}
@ -202,7 +202,7 @@ fn read_char(token: LexToken) -> Result<u8, ParseError> {
"space" => Ok(32),
"tab" => Ok(11),
_ if token.source_doc[token.start_idx + 2..].starts_with('x') &&
token.end_idx - token.start_idx > 2 => {
token.end_idx - token.start_idx > 3 => {
if token.end_idx - token.start_idx > 5 {
return Err(ParseError(E_CHAR_TOO_LONG, Some(Ok(token))))
}
@ -327,7 +327,10 @@ impl Parser {
loop {
let next_tok = self.lexer.next();
if let None = next_tok {
return Err(ParseError(E_COLLECTION_TRUNC, None))
if let Some(e) = &self.lexer.has_error_state {
return Err(ParseError(E_LEX_ERROR, Some(Err(e.clone()))))
}
return Err(ParseError(E_COLLECTION_TRUNC, Some(Ok(token))))
}
let tok = next_tok.unwrap();
@ -476,7 +479,7 @@ impl Parser {
}
// Lexer error
} else if self.lexer.has_error_state.is_some() {
} else if let Some(e) = &self.lexer.has_error_state && e.0 != E_END_OF_DOCUMENT {
Err(ParseError(E_LEX_ERROR,
Some(Err(self.lexer.has_error_state.clone().unwrap()))))
@ -511,7 +514,10 @@ mod tests {
("(hello \"world\")", "(hello \"world\")"),
("; big doc string\n(one two)", "(one two)"),
("(list #(vect 2 3 #u8(0 0)))", "(list #(vect 2 3 #u8(0 0)))"),
("(#\\xf0 #\\alarm #\\a #\\z)", "(#\\xf0 #\\x7 #\\a #\\z)")
("(#\\xf0 #\\alarm #\\a #\\z)", "(#\\xf0 #\\x7 #\\a #\\z)"),
("(- q 1)", "(- q 1)"),
("(+ q 1)", "(+ q 1)"),
("(#\\x)", "(#\\x)"),
];
let sad_cases = vec![