diff --git a/mycelium/src/lexer.rs b/mycelium/src/lexer.rs index 6ccdbe3..e497f94 100644 --- a/mycelium/src/lexer.rs +++ b/mycelium/src/lexer.rs @@ -67,7 +67,7 @@ impl fmt::Display for LexError { } else { let mut idx = self.1; - while self.1 - idx > 25 { + while self.1 - idx < 25 { idx -= 1; if self.2[idx..] .char_indices() @@ -107,8 +107,11 @@ impl fmt::Display for LexError { } }; - write!(f, "Error when lexing document here:\n\n")?; - write!(f, " {}\n", &self.2[err_snippet_start()..err_snippet_end()])?; + write!(f, "Error when lexing document here: (idx: {})\n", self.1)?; + let s = err_snippet_start(); + let st = self.1 - err_snippet_start(); + write!(f, " {}\n", &self.2[s..err_snippet_end()])?; + write!(f, " {}^\n", " ".repeat(st))?; write!(f, "Error: {}\n", self.0) } } @@ -132,7 +135,7 @@ pub enum LexTokenType { Quote, QuasiQuote, Unquote, - UnquoteSpliceTemplate, + UnquoteSplice, NumTypes, } @@ -506,10 +509,11 @@ impl Lexer { return Ok(()) } + // make sure next character is a proper delimiter adv!().and_then(|_| if !delim(self.current_char()) { return Err(LexError(E_UNDELIMITED_ESC, self.current_index, self.document.clone())) - } else { if in_string {self.current_index = saved_idx }; Ok(()) }) + } else { self.current_index = saved_idx; Ok(()) }) } /* Called to output a token by the iterator implementation @@ -565,7 +569,8 @@ impl Lexer { if self.current_char() == ',' { if let Some(x) = self.peek_next_char() && x == '@'{ - output = Some(self.cut_new_token(LexTokenType::UnquoteSpliceTemplate)); + self.advance_char(); + output = Some(self.cut_new_token(LexTokenType::UnquoteSplice)); } else { output = Some(self.cut_new_token(LexTokenType::Unquote)); } @@ -578,7 +583,7 @@ impl Lexer { if output.is_none() { loop { let c = self.current_char(); - if !c.is_alphanumeric() && !LEX_SPECIAL.contains(&c) && c != ' ' { + if !c.is_alphanumeric() && !LEX_SPECIAL.contains(&c) && c != ' ' { output = Some(Err(LexError(E_INCOMPREHENSIBLE, self.current_index, self.document.clone()))); break; @@ -638,7 +643,8 @@ mod tests { /* Char Cases */ ( // HAPPY CASES - vec!["#\\a", "#\\t", "#\\\"", "#\\t", "#\\space", "#\\x20"], + vec!["#\\a", "#\\t", "#\\\"", "#\\t", "#\\space", + "#\\alarm", "#\\s", "#\\x20"], // SAD CASES vec!["\\c", "\\x20"] @@ -743,9 +749,9 @@ mod tests { vec![] ), - /* UnquoteSpliceTemplate cases */ ( + /* UnquoteSplice cases */ ( // HAPPY CASES - vec![",@x", ",@(", ",@"], + vec![",@x", ",@(", ",@", ",@(two)"], // SAD CASES vec![] @@ -755,7 +761,7 @@ mod tests { let no_subtoken_check_cases = [ LexTokenType::Dot as u8, LexTokenType::Unquote as u8, - LexTokenType::UnquoteSpliceTemplate as u8 + LexTokenType::UnquoteSplice as u8 ]; cases.iter().enumerate().for_each(|(idx, case)| { @@ -834,4 +840,22 @@ mod tests { assert!(l.next().is_none()); assert!(l.has_error_state.is_some()); } + + #[test] + fn char_lex_with_close() { + let mut res = vec![]; + Lexer::from(Rc::from("(#\\a)")) + .into_iter() + .collect_into(&mut res); + assert_eq!(res.len(), 3); + + assert_eq!(res[0].token_type, LexTokenType::ListStart); + assert_eq!(&res[0].source_doc[res[0].start_idx..res[0].end_idx], "("); + + assert_eq!(res[1].token_type, LexTokenType::Char); + assert_eq!(&res[1].source_doc[res[1].start_idx..res[1].end_idx], "#\\a"); + + assert_eq!(res[2].token_type, LexTokenType::CollectionEnd); + assert_eq!(&res[2].source_doc[res[2].start_idx..res[2].end_idx], ")"); + } } diff --git a/mycelium/src/lib.rs b/mycelium/src/lib.rs index 42130cc..83820cc 100644 --- a/mycelium/src/lib.rs +++ b/mycelium/src/lib.rs @@ -18,7 +18,7 @@ #![cfg_attr(not(test), no_std)] #![feature(let_chains)] #![feature(iter_collect_into)] -#![feature(impl_trait_in_assoc_type)] +#![feature(if_let_guard)] pub mod sexpr; pub mod lexer; diff --git a/mycelium/src/number.rs b/mycelium/src/number.rs index 21d2b39..5ee65be 100644 --- a/mycelium/src/number.rs +++ b/mycelium/src/number.rs @@ -38,7 +38,7 @@ pub const E_SCIENTIFIC_MULTI_E: &str = "scientific notation implies only a s pub const E_SCIENTIFIC_OPERAND: &str = "couldnt parse 32 bit float operand"; pub const E_SCIENTIFIC_POWER: &str = "couldnt parse integer power"; -trait Numeric: Copy + Clone + Debug + FromStr + Into { +pub trait Numeric: Copy + Clone + Debug + FromStr + Into { fn is_exact(&self) -> bool; fn make_inexact(&self) -> Float; fn make_exact(&self) -> Fraction; @@ -46,7 +46,7 @@ trait Numeric: Copy + Clone + Debug + FromStr + Into { #[derive(Copy, Clone, Debug, PartialEq)] -pub struct ScientificNotation (f32, isize); +pub struct ScientificNotation (pub f32, pub isize); #[derive(Copy, Clone, Debug, PartialEq)] pub enum SymbolicNumber { @@ -57,10 +57,10 @@ pub enum SymbolicNumber { } #[derive(Copy, Clone, Debug, PartialEq)] -pub struct Fraction (isize, isize); +pub struct Fraction (pub isize, pub isize); #[derive(Copy, Clone, Debug, PartialEq)] -pub struct Float (f64); +pub struct Float (pub f64); #[derive(Copy, Clone, Debug)] pub enum Number { @@ -481,7 +481,7 @@ impl FromStr for Fraction { impl Into for Fraction { fn into(self) -> String { - format!("#e{}/{}", self.0, self.1) + format!("{}/{}", self.0, self.1) } } @@ -508,7 +508,11 @@ impl FromStr for Float { impl Into for Float { fn into(self) -> String { - format!("#i{}", self.0) + if self.is_exact() { + format!("{}", self.0) + } else { + format!("#i{}", self.0) + } } } @@ -560,7 +564,7 @@ impl FromStr for ScientificNotation { impl Into for ScientificNotation { fn into(self) -> String { - format!("#{}e{}", self.0, self.1) + format!("{}e{}", self.0, self.1) } } diff --git a/mycelium/src/parser.rs b/mycelium/src/parser.rs new file mode 100644 index 0000000..4b383a0 --- /dev/null +++ b/mycelium/src/parser.rs @@ -0,0 +1,556 @@ +/* Mycelium Scheme + * Copyright (C) 2025 Ava Affine + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +use core::fmt::Display; + +use crate::lexer::{ + LexError, + LexToken, + LexTokenType, + Lexer, + E_CHAR_TOO_LONG, + E_END_OF_DOCUMENT +}; +use crate::number::{Number, Numeric}; +use crate::sexpr::{Datum, Ast}; + +use alloc::vec::Vec; +use alloc::vec; +use alloc::rc::Rc; +use alloc::string::String; + + +pub const E_LEX_ERROR: &str = "error in lexing document"; +pub const E_EXTRA_CLOSE: &str = "closing parenthesis closes nothing"; +pub const E_TERRIBLE: &str = "something has gone terribly wrong...."; +pub const E_VECTOR_DOT: &str = "dotted notation not valid in vectors"; +pub const E_DOT_NO_LIST: &str = "dotted notation used outside of list"; +pub const E_CHAR_TRUNCATED: &str = "character literal is truncated"; +pub const E_CHAR_HEX_PARSE: &str = "hexadecimal character literal failed to parse"; +pub const E_COLLECTION_TRUNC: &str = "collection is truncated"; +pub const E_BV_BADBYTE: &str = "number provided is not a real byte"; +pub const E_BV_NONBYTE: &str = "bytevector elements must all be bytes"; +pub const E_TOO_MANY_DOT: &str = "valid dot notation only includes one dot"; +pub const E_DOT_IDX: &str = "dot should preceed only last element in list"; +pub const E_DOT_EMPTY: &str = "cannot apply dotted notation to otherwise empty list"; +pub const E_UNQUOTE_NONQQ: &str = "unquote must be within a quasiquoted form"; +pub const E_UNQUOTE_SPL_NONQQ: &str = "unquote-splicing must be within a quasiquoted form"; +pub const E_UNQUOTE_SPL_COLL: &str = "expected list or vector after unquote-splicing"; + + +/* ParseError + * 0: error string + * 1: either problematic lexing token, or a lexing error + */ +#[derive(Clone)] +pub struct ParseError(pub &'static str, pub Option>); + +impl Display for ParseError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let err_snippet_start = |t: &LexToken| -> usize { + /* backtrack from current index until we either hit + * - beginning of line + * - 25 characters ago + * - the doc Start + */ + if t.source_doc.len() < 25 { + 0 + + } else { + let mut idx = t.start_idx; + while t.start_idx - idx < 25 { + idx -= 1; + if t.source_doc[idx..] + .char_indices() + .next() + .is_some_and(|(i, x)| x == '\n' && i == idx) { + idx += 1; + break; + } + } + + idx + } + }; + + let err_snippet_end = |t: &LexToken| -> usize { + /* read through document until we either hit + * - end of line + * - 25 characters forward + * - the doc end + */ + if t.source_doc.len() - t.end_idx < 25 { + t.source_doc.len() + + } else { + let mut idx = t.end_idx; + while idx - t.end_idx < 25 { + idx += 1; + if t.source_doc[idx..] + .char_indices() + .next() + .is_some_and(|(i, x)| x == '\n' && i == idx) { + break; + } + } + + idx + } + }; + + if let Some(frag) = &self.1 { + match frag { + Ok(token) => { + write!(f, "Error parsing syntax: {}\n", self.0)?; + write!(f," problematic token: {}\n", + &token.source_doc[token.start_idx..token.end_idx])?; + write!(f," {}\n", + &token.source_doc[err_snippet_start(token)..err_snippet_end(token)])?; + }, + + Err(e) => { + return e.fmt(f); + } + } + } + + write!(f, "Error parsing syntax: {}\n", self.0) + } +} + +pub struct Parser { + lexer: Lexer, + pub has_error_state: Option, + delayed: Vec>, + quasiquoted: bool, +} + +/* The From and Iterator traits serve as the primary + * interface to work with the parser. It is expected to + * make a Lexer first, and then use casting or type conv + * to make it into a parser and then a final AST, which + * we can then convert into a VM image once the compile + * step is finished. + */ + +impl From for Parser { + fn from(l: Lexer) -> Parser { + Parser { + lexer: l, + has_error_state: None, + delayed: vec![], + quasiquoted: false + } + } +} + +impl Iterator for Parser { + type Item = Rc; + + fn next(&mut self) -> Option { + if self.has_error_state.is_some() { + return None; + } + + if self.delayed.len() > 0 { + return self.delayed.pop() + } + + let res = self.get_next_datum(); + if let Err(ref e) = res { + self.has_error_state = Some(e.clone()); + } + + return res.ok() + } +} + +fn read_number(token: LexToken) -> Result { + return match (&token.source_doc[token.start_idx..token.end_idx]).parse::() { + Ok(num) => Ok(num), + Err(e) => Err(ParseError(e, Some(Ok(token)))), + } +} + +fn read_char(token: LexToken) -> Result { + if token.end_idx - token.start_idx < 2 { + return Err(ParseError(E_CHAR_TRUNCATED, Some(Ok(token)))) + } + + match &token.source_doc[token.start_idx + 2..token.end_idx] { + "alarm" => Ok(7), + "backspace" => Ok(8), + "delete" => Ok(127), + "escape" => Ok(33), + "newline" => Ok('\n' as u8), + "null" => Ok(0), + "return" => Ok(13), + "space" => Ok(32), + "tab" => Ok(11), + _ if token.source_doc[token.start_idx + 2..].starts_with('x') && + token.end_idx - token.start_idx > 2 => { + if token.end_idx - token.start_idx > 5 { + return Err(ParseError(E_CHAR_TOO_LONG, Some(Ok(token)))) + } + + match u8::from_str_radix( + &token.source_doc[token.start_idx + 3..token.end_idx], + 16) { + Ok(u) => Ok(u), + Err(_) => Err(ParseError(E_CHAR_HEX_PARSE, Some(Ok(token)))) + } + }, + _ => Ok(token.source_doc.as_bytes()[token.start_idx + 2]) + } +} + +fn read_bool(token: LexToken) -> bool { + match &token.source_doc[token.start_idx..token.end_idx] { + "#t" => true, + "#f" => false, + _ => panic!("impossible boolean") + } +} + +fn read_string(token: LexToken) -> Vec { + if token.end_idx - token.start_idx < 3 { + // empty string other than delimiters + Vec::default() + } else { + token.source_doc[token.start_idx + 1..token.end_idx - 1] + .as_bytes() + .to_vec() + } +} + +impl Parser { + /* Rules we must mind: + * 0. at this stage, drop and ignore comments, directives + * 1. quote, quasiquote, unquote, and unquote splicing + * all require another input after them (excluding + * collection end) + * 2. unquote-splicing explicitly requires a form I think? + * (verify) + * 3. vectors, lists, may have nested collections in them + * so track collection state in the parser's stack. + * 4. list dotted notation needs next datum put in cdr. + * 5. bytevectors can only have numbers from 0-255 in them. + */ + + fn complete_quote(&mut self) -> Result, ParseError> { + let next = self.get_next_datum()?; + Ok(Rc::from(Datum::List(Rc::from(Ast( + Rc::from(Datum::Symbol(String::from("quote"))), + + Rc::from(Datum::List(Rc::from(Ast( + next, + Rc::from(Datum::None) + )))) + ))))) + + } + + fn complete_unquote_splicing(&mut self, tok: LexToken) -> Result, ParseError> { + let next = self.get_next_datum()?; + match *next { + Datum::List(_) | Datum::Vector(_) | Datum::Symbol(_) => (), + _ => return Err(ParseError(E_UNQUOTE_SPL_COLL, Some(Ok(tok)))) + } + + Ok(Rc::from(Datum::List(Rc::from(Ast( + Rc::from(Datum::Symbol(String::from("unquote-splicing"))), + + Rc::from(Datum::List(Rc::from(Ast( + next, + Rc::from(Datum::None) + )))) + ))))) + } + + fn complete_unquote(&mut self) -> Result, ParseError> { + let next = self.get_next_datum()?; + Ok(Rc::from(Datum::List(Rc::from(Ast( + Rc::from(Datum::Symbol(String::from("unquote"))), + + Rc::from(Datum::List(Rc::from(Ast( + next, + Rc::from(Datum::None) + )))) + ))))) + } + + fn complete_quasiquote(&mut self) -> Result, ParseError> { + let prev = self.quasiquoted; // handle nesting appropriately + self.quasiquoted = true; + let next = self.get_next_datum()?; + self.quasiquoted = prev; + + Ok(Rc::from(Datum::List(Rc::from(Ast( + Rc::from(Datum::Symbol(String::from("quasiquote"))), + + Rc::from(Datum::List(Rc::from(Ast( + next, + Rc::from(Datum::None) + )))) + ))))) + } + + fn complete_collection(&mut self, token: LexToken) -> Result, ParseError> { + let is_bv = match token.token_type { + LexTokenType::ByteVectorStart => true, + _ => false, + }; + + let mut lex_stack = vec![]; + let mut bv_stack = vec![]; + + /* counting indexes helps greatly with calculating position dependent + * syntax rules like dot notation in lists + */ + let mut iter_count = 0; + let mut dot_idx = (None, None, None); + + loop { + let next_tok = self.lexer.next(); + if let None = next_tok { + return Err(ParseError(E_COLLECTION_TRUNC, None)) + } + + let tok = next_tok.unwrap(); + + match tok.token_type { + // Universal cases + LexTokenType::Comment | LexTokenType::Directive => continue, + LexTokenType::NumTypes => + return Err(ParseError(E_TERRIBLE, Some(Ok(tok)))), + LexTokenType::Unquote if !self.quasiquoted => + return Err(ParseError(E_UNQUOTE_SPL_NONQQ, Some(Ok(tok)))), + LexTokenType::UnquoteSplice if !self.quasiquoted => + return Err(ParseError(E_UNQUOTE_NONQQ, Some(Ok(tok)))), + + + // CollectionEnd must take precedence over the dot notation case + LexTokenType::CollectionEnd => break, + _ if let Some(idx) = dot_idx.0 && iter_count - idx > 2 => + return Err(ParseError(E_DOT_IDX, Some(Ok(dot_idx.1.unwrap())))), + + LexTokenType::Dot if token.token_type != LexTokenType::ListStart => + return Err(ParseError(E_VECTOR_DOT, Some(Ok(tok)))), + + + // List, Vector cases + LexTokenType::ListStart | LexTokenType::VectorStart | + LexTokenType::ByteVectorStart if !is_bv => + lex_stack.push(self.complete_collection(tok)?), + LexTokenType::String if !is_bv => + lex_stack.push(Rc::from(Datum::String(read_string(tok)))), + LexTokenType::Number if !is_bv => + lex_stack.push(Rc::from(Datum::Number(read_number(tok)?))), + LexTokenType::Char if !is_bv => + lex_stack.push(Rc::from(Datum::Char(read_char(tok)?))), + LexTokenType::Boolean if !is_bv => + lex_stack.push(Rc::from(Datum::Bool(read_bool(tok)))), + LexTokenType::Symbol if !is_bv => + lex_stack.push(Rc::from(Datum::Symbol( + String::from(&tok.source_doc[tok.start_idx..tok.end_idx])))), + LexTokenType::Quote if !is_bv => + lex_stack.push(self.complete_quote()?), + LexTokenType::QuasiQuote if !is_bv => + lex_stack.push(self.complete_quasiquote()?), + LexTokenType::Unquote if !is_bv && self.quasiquoted => + lex_stack.push(self.complete_unquote()?), + LexTokenType::UnquoteSplice if !is_bv && self.quasiquoted => + lex_stack.push(self.complete_unquote_splicing(tok)?), + + + // List only cases + LexTokenType::Dot => if let Some(_) = dot_idx.0 { + return Err(ParseError(E_TOO_MANY_DOT, Some(Ok(tok)))) + } else { + dot_idx = (Some(iter_count), Some(tok), None) + }, + + + // ByteVector cases + LexTokenType::Number if is_bv => { + let n = read_number(tok.clone())? + .make_inexact(); + + if n.0 < 0.0 || n.0 > 255.0 || n.0.fract() != 0.0 { + return Err(ParseError(E_BV_BADBYTE, Some(Ok(tok)))) + } + + bv_stack.push(n.0 as u8); + }, + + _ if is_bv => return Err(ParseError(E_BV_NONBYTE, Some(Ok(tok)))), + + // This should never get touched + _ => todo!("theoretically impossible case in parser::complete_collection"), + } + + if let Some(idx) = dot_idx.0 && iter_count == idx + 1 { + dot_idx.2 = Some(lex_stack.pop()); + } + + iter_count += 1; + } + + if is_bv { + return Ok(Rc::from(Datum::ByteVector(bv_stack))) + } + + if token.token_type == LexTokenType::VectorStart { + return Ok(Rc::from(Datum::Vector(lex_stack))) + } + + // handle an empty list + if lex_stack.len() < 1 { + // dont try to do something like "( . 'thing)" + if let (_, Some(node), _) = dot_idx { + return Err(ParseError(E_DOT_EMPTY, Some(Ok(node)))) + } + return Ok(Rc::from(Datum::List(Rc::from(Ast(Rc::from(Datum::None), + Rc::from(Datum::None)))))) + } + + let mut from_rear: Rc; + if let (_, _, Some(node)) = dot_idx { + from_rear = Rc::from(Ast(lex_stack.pop().unwrap(), node.unwrap())); + } else { + from_rear = Rc::from(Ast(lex_stack.pop().unwrap(), Rc::from(Datum::None))); + } + + lex_stack.iter() + .rev() + .for_each(|x| { + from_rear = Rc::from(Ast(x.clone(), Rc::from(Datum::List(from_rear.clone())))); + }); + + Ok(Rc::from(Datum::List(from_rear))) + } + + fn get_next_datum(&mut self) -> Result, ParseError> { + if let Some(token) = self.lexer.next() { + match token.token_type { + // normal paths: + LexTokenType::String => Ok(Rc::from(Datum::String(read_string(token)))), + LexTokenType::Number => Ok(Rc::from(Datum::Number(read_number(token)?))), + LexTokenType::Char => Ok(Rc::from(Datum::Char(read_char(token)?))), + LexTokenType::Symbol => Ok(Rc::from(Datum::Symbol(String::from( + &token.source_doc[token.start_idx..token.end_idx])))), + LexTokenType::Boolean => Ok(Rc::from(Datum::Bool(read_bool(token)))), + LexTokenType::VectorStart | LexTokenType::ListStart | + LexTokenType::ByteVectorStart => self.complete_collection(token), + LexTokenType::Quote => self.complete_quote(), + LexTokenType::QuasiQuote => self.complete_quasiquote(), + LexTokenType::Unquote if self.quasiquoted => self.complete_unquote(), + LexTokenType::UnquoteSplice if self.quasiquoted => + self.complete_unquote_splicing(token), + + // immediate errors: + LexTokenType::CollectionEnd => Err(ParseError(E_EXTRA_CLOSE, Some(Ok(token)))), + LexTokenType::NumTypes => Err(ParseError(E_TERRIBLE, Some(Ok(token)))), + LexTokenType::Dot => Err(ParseError(E_DOT_NO_LIST, Some(Ok(token)))), + LexTokenType::Unquote if !self.quasiquoted => + Err(ParseError(E_UNQUOTE_NONQQ, Some(Ok(token)))), + LexTokenType::UnquoteSplice if !self.quasiquoted => + Err(ParseError(E_UNQUOTE_SPL_NONQQ, Some(Ok(token)))), + + // ignore comment, directive: + _ => self.get_next_datum(), + } + + // Lexer error + } else if self.lexer.has_error_state.is_some() { + Err(ParseError(E_LEX_ERROR, + Some(Err(self.lexer.has_error_state.clone().unwrap())))) + + // End of document + } else { + Err(ParseError(E_END_OF_DOCUMENT, None)) + } + } +} + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_cases() { + let happy_cases = vec![ + // case, result + ("\"test\"", "\"test\""), + ("test", "test"), + ("(1 2 3)", "(1 2 3)"), + ("'test", "(quote test)"), + ("`test", "(quasiquote test)"), + ("`(,one)", "(quasiquote ((unquote one)))"), + ("`(test ,@(two))", "(quasiquote (test (unquote-splicing (two))))"), + ("#u8(0 14 249)", "#u8(0 14 249)"), + ("(nested lists (are pretty cool))", "(nested lists (are pretty cool))"), + ("((nested) lists (are (pretty) cool))", "((nested) lists (are (pretty) cool))"), + ("(dotted . notation)", "(dotted . notation)"), + ("(longer dotted . notation)", "(longer dotted . notation)"), + ("(hello \"world\")", "(hello \"world\")"), + ("; big doc string\n(one two)", "(one two)"), + ("(list #(vect 2 3 #u8(0 0)))", "(list #(vect 2 3 #u8(0 0)))"), + ("(#\\xf0 #\\alarm #\\a #\\z)", "(#\\xf0 #\\x7 #\\a #\\z)") + ]; + + let sad_cases = vec![ + "(", + "( one two ", + "( one two three ( four )", + ")", + "#(st", + "#u8(0 ", + "#u8(256)", + "#u8(two)", + "(one two ,three)", + "(one two ,@three)", + "`(one two ,@4.0)", + "(. two)", + "(one . two . three)", + ]; + + println!("+ Testing Happy Cases..."); + happy_cases.iter() + .for_each(|(case, result)| { + println!(" - case: {}", *case); + let mut p = Parser::from(Lexer::from(Rc::from(*case))); + let res = p.next(); + if let None = res { + println!("{}", p.has_error_state.unwrap()); + } + assert_eq!( + format!("{}", res.unwrap()), + format!("{}", result) + ); + }); + + println!("+ Testing Sad Cases..."); + sad_cases.iter() + .for_each(|case| { + println!(" - case: {}", *case); + let mut p = Parser::from(Lexer::from(Rc::from(*case))); + assert!(p.next().is_none() && p.has_error_state.is_some()) + }); + } +} diff --git a/mycelium/src/sexpr.rs b/mycelium/src/sexpr.rs index 2219f4e..b478712 100644 --- a/mycelium/src/sexpr.rs +++ b/mycelium/src/sexpr.rs @@ -16,6 +16,7 @@ */ use core::fmt::{self, Formatter}; +use alloc::format; use alloc::rc::Rc; use alloc::vec::Vec; use alloc::string::String; @@ -26,33 +27,51 @@ use crate::number::Number; pub enum Datum { Number(Number), Bool(bool), - List(Ast), + List(Rc), Symbol(String), Char(u8), String(Vec), - Vector(Vec), + Vector(Vec>), ByteVector(Vec), #[default] None, } fn byte_to_escaped_char(b: u8) -> String { - unimplemented!() + // alarm, backspace, delete + match b { + _ if b > 31 && b < 127 => String::from(b as char), + _ => format!("x{:x}", b), + } +} + +fn fmt_vec(v: &Vec) -> String { + if v.len() == 0 { + return String::new() + } + let mut s = format!("{}", v[0]); + let mut i = v.iter(); + i.next(); // discard + i.for_each(|e| { + s = format!("{} {}", s, e); + }); + + s } impl fmt::Display for Datum { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { Datum::Number(n) => write!(f, "{}", Into::::into(*n)), - Datum::Bool(n) => write!(f, "{n}"), + Datum::Bool(n) => write!(f, "{}", if *n {"#t"} else {"#f"}), Datum::List(n) => write!(f, "{n}"), Datum::Symbol(n) => write!(f, "{n}"), - Datum::Char(n) => write!(f, "{}", + Datum::Char(n) => write!(f, "#\\{}", byte_to_escaped_char(*n)), Datum::String(n) => write!(f, "\"{}\"", String::from_utf8_lossy(&*n)), - Datum::Vector(n) => write!(f, "#({n:?})"), - Datum::ByteVector(n) => write!(f, "#u8({n:?})"), + Datum::Vector(n) => write!(f, "#({})", fmt_vec(n)), + Datum::ByteVector(n) => write!(f, "#u8({})", fmt_vec(n)), Datum::None => Ok(()) } } @@ -68,7 +87,7 @@ impl fmt::Debug for Datum { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { Datum::Number(n) => write!(f, "{}", Into::::into(*n)), - Datum::Bool(n) => write!(f, "{n}"), + Datum::Bool(n) => write!(f, "{}", if *n {"#t"} else {"#f"}), Datum::List(n) => write!(f, "{n}"), Datum::Char(n) => write!(f, "{}", byte_to_escaped_char(*n)), @@ -84,7 +103,7 @@ impl fmt::Debug for Datum { #[derive(Default, Clone)] -pub struct Ast(Rc, Rc); +pub struct Ast(pub Rc, pub Rc); impl Iterator for Ast { type Item = Rc; @@ -120,7 +139,7 @@ impl fmt::Display for Ast { if let Datum::None = &*cur.1 { write!(f, ")") } else { - write!(f, " {})", cur.1) + write!(f, " . {})", cur.1) } } }