2025-05-19 14:38:11 -07:00
|
|
|
/* Mycelium Scheme
|
|
|
|
|
* Copyright (C) 2025 Ava Affine
|
|
|
|
|
*
|
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
use core::fmt::Display;
|
2025-06-26 10:52:54 -07:00
|
|
|
use core::cell::RefCell;
|
2025-05-19 14:38:11 -07:00
|
|
|
|
|
|
|
|
use crate::lexer::{
|
|
|
|
|
LexError,
|
|
|
|
|
LexToken,
|
|
|
|
|
LexTokenType,
|
|
|
|
|
Lexer,
|
|
|
|
|
E_CHAR_TOO_LONG,
|
|
|
|
|
E_END_OF_DOCUMENT
|
|
|
|
|
};
|
|
|
|
|
use crate::number::{Number, Numeric};
|
|
|
|
|
use crate::sexpr::{Datum, Ast};
|
|
|
|
|
|
|
|
|
|
use alloc::vec::Vec;
|
|
|
|
|
use alloc::vec;
|
|
|
|
|
use alloc::rc::Rc;
|
|
|
|
|
use alloc::string::String;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pub const E_LEX_ERROR: &str = "error in lexing document";
|
|
|
|
|
pub const E_EXTRA_CLOSE: &str = "closing parenthesis closes nothing";
|
|
|
|
|
pub const E_TERRIBLE: &str = "something has gone terribly wrong....";
|
|
|
|
|
pub const E_VECTOR_DOT: &str = "dotted notation not valid in vectors";
|
|
|
|
|
pub const E_DOT_NO_LIST: &str = "dotted notation used outside of list";
|
|
|
|
|
pub const E_CHAR_TRUNCATED: &str = "character literal is truncated";
|
|
|
|
|
pub const E_CHAR_HEX_PARSE: &str = "hexadecimal character literal failed to parse";
|
|
|
|
|
pub const E_COLLECTION_TRUNC: &str = "collection is truncated";
|
|
|
|
|
pub const E_BV_BADBYTE: &str = "number provided is not a real byte";
|
|
|
|
|
pub const E_BV_NONBYTE: &str = "bytevector elements must all be bytes";
|
|
|
|
|
pub const E_TOO_MANY_DOT: &str = "valid dot notation only includes one dot";
|
|
|
|
|
pub const E_DOT_IDX: &str = "dot should preceed only last element in list";
|
|
|
|
|
pub const E_DOT_EMPTY: &str = "cannot apply dotted notation to otherwise empty list";
|
|
|
|
|
pub const E_UNQUOTE_NONQQ: &str = "unquote must be within a quasiquoted form";
|
|
|
|
|
pub const E_UNQUOTE_SPL_NONQQ: &str = "unquote-splicing must be within a quasiquoted form";
|
|
|
|
|
pub const E_UNQUOTE_SPL_COLL: &str = "expected list or vector after unquote-splicing";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ParseError
|
|
|
|
|
* 0: error string
|
|
|
|
|
* 1: either problematic lexing token, or a lexing error
|
|
|
|
|
*/
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
|
pub struct ParseError(pub &'static str, pub Option<Result<LexToken, LexError>>);
|
|
|
|
|
|
|
|
|
|
impl Display for ParseError {
|
|
|
|
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
|
|
|
|
let err_snippet_start = |t: &LexToken| -> usize {
|
2025-05-21 14:48:36 -07:00
|
|
|
/* backtrack from current index until we either hit
|
2025-05-19 14:38:11 -07:00
|
|
|
* - beginning of line
|
|
|
|
|
* - 25 characters ago
|
|
|
|
|
* - the doc Start
|
|
|
|
|
*/
|
|
|
|
|
if t.source_doc.len() < 25 {
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
let mut idx = t.start_idx;
|
|
|
|
|
while t.start_idx - idx < 25 {
|
|
|
|
|
idx -= 1;
|
|
|
|
|
if t.source_doc[idx..]
|
|
|
|
|
.char_indices()
|
|
|
|
|
.next()
|
|
|
|
|
.is_some_and(|(i, x)| x == '\n' && i == idx) {
|
|
|
|
|
idx += 1;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
idx
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let err_snippet_end = |t: &LexToken| -> usize {
|
|
|
|
|
/* read through document until we either hit
|
|
|
|
|
* - end of line
|
|
|
|
|
* - 25 characters forward
|
|
|
|
|
* - the doc end
|
|
|
|
|
*/
|
|
|
|
|
if t.source_doc.len() - t.end_idx < 25 {
|
|
|
|
|
t.source_doc.len()
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
let mut idx = t.end_idx;
|
|
|
|
|
while idx - t.end_idx < 25 {
|
|
|
|
|
idx += 1;
|
|
|
|
|
if t.source_doc[idx..]
|
|
|
|
|
.char_indices()
|
|
|
|
|
.next()
|
|
|
|
|
.is_some_and(|(i, x)| x == '\n' && i == idx) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
idx
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if let Some(frag) = &self.1 {
|
|
|
|
|
match frag {
|
|
|
|
|
Ok(token) => {
|
|
|
|
|
write!(f, "Error parsing syntax: {}\n", self.0)?;
|
|
|
|
|
write!(f," problematic token: {}\n",
|
|
|
|
|
&token.source_doc[token.start_idx..token.end_idx])?;
|
|
|
|
|
write!(f," {}\n",
|
|
|
|
|
&token.source_doc[err_snippet_start(token)..err_snippet_end(token)])?;
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
Err(e) => {
|
|
|
|
|
return e.fmt(f);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
write!(f, "Error parsing syntax: {}\n", self.0)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub struct Parser {
|
|
|
|
|
lexer: Lexer,
|
|
|
|
|
pub has_error_state: Option<ParseError>,
|
|
|
|
|
delayed: Vec<Rc<Datum>>,
|
|
|
|
|
quasiquoted: bool,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* The From and Iterator traits serve as the primary
|
|
|
|
|
* interface to work with the parser. It is expected to
|
|
|
|
|
* make a Lexer first, and then use casting or type conv
|
|
|
|
|
* to make it into a parser and then a final AST, which
|
|
|
|
|
* we can then convert into a VM image once the compile
|
|
|
|
|
* step is finished.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
impl From<Lexer> for Parser {
|
|
|
|
|
fn from(l: Lexer) -> Parser {
|
|
|
|
|
Parser {
|
|
|
|
|
lexer: l,
|
|
|
|
|
has_error_state: None,
|
|
|
|
|
delayed: vec![],
|
|
|
|
|
quasiquoted: false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Iterator for Parser {
|
|
|
|
|
type Item = Rc<Datum>;
|
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
|
if self.has_error_state.is_some() {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if self.delayed.len() > 0 {
|
|
|
|
|
return self.delayed.pop()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let res = self.get_next_datum();
|
|
|
|
|
if let Err(ref e) = res {
|
|
|
|
|
self.has_error_state = Some(e.clone());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.ok()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn read_number(token: LexToken) -> Result<Number, ParseError> {
|
|
|
|
|
return match (&token.source_doc[token.start_idx..token.end_idx]).parse::<Number>() {
|
|
|
|
|
Ok(num) => Ok(num),
|
|
|
|
|
Err(e) => Err(ParseError(e, Some(Ok(token)))),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn read_char(token: LexToken) -> Result<u8, ParseError> {
|
2025-05-21 14:48:36 -07:00
|
|
|
if token.end_idx - token.start_idx < 3 {
|
2025-05-19 14:38:11 -07:00
|
|
|
return Err(ParseError(E_CHAR_TRUNCATED, Some(Ok(token))))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match &token.source_doc[token.start_idx + 2..token.end_idx] {
|
|
|
|
|
"alarm" => Ok(7),
|
|
|
|
|
"backspace" => Ok(8),
|
|
|
|
|
"delete" => Ok(127),
|
|
|
|
|
"escape" => Ok(33),
|
|
|
|
|
"newline" => Ok('\n' as u8),
|
|
|
|
|
"null" => Ok(0),
|
|
|
|
|
"return" => Ok(13),
|
|
|
|
|
"space" => Ok(32),
|
|
|
|
|
"tab" => Ok(11),
|
|
|
|
|
_ if token.source_doc[token.start_idx + 2..].starts_with('x') &&
|
2025-05-21 14:48:36 -07:00
|
|
|
token.end_idx - token.start_idx > 3 => {
|
2025-05-19 14:38:11 -07:00
|
|
|
if token.end_idx - token.start_idx > 5 {
|
|
|
|
|
return Err(ParseError(E_CHAR_TOO_LONG, Some(Ok(token))))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match u8::from_str_radix(
|
|
|
|
|
&token.source_doc[token.start_idx + 3..token.end_idx],
|
|
|
|
|
16) {
|
|
|
|
|
Ok(u) => Ok(u),
|
|
|
|
|
Err(_) => Err(ParseError(E_CHAR_HEX_PARSE, Some(Ok(token))))
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
_ => Ok(token.source_doc.as_bytes()[token.start_idx + 2])
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn read_bool(token: LexToken) -> bool {
|
|
|
|
|
match &token.source_doc[token.start_idx..token.end_idx] {
|
|
|
|
|
"#t" => true,
|
|
|
|
|
"#f" => false,
|
|
|
|
|
_ => panic!("impossible boolean")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn read_string(token: LexToken) -> Vec<u8> {
|
|
|
|
|
if token.end_idx - token.start_idx < 3 {
|
|
|
|
|
// empty string other than delimiters
|
|
|
|
|
Vec::default()
|
|
|
|
|
} else {
|
|
|
|
|
token.source_doc[token.start_idx + 1..token.end_idx - 1]
|
|
|
|
|
.as_bytes()
|
|
|
|
|
.to_vec()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Parser {
|
|
|
|
|
/* Rules we must mind:
|
|
|
|
|
* 0. at this stage, drop and ignore comments, directives
|
|
|
|
|
* 1. quote, quasiquote, unquote, and unquote splicing
|
|
|
|
|
* all require another input after them (excluding
|
|
|
|
|
* collection end)
|
|
|
|
|
* 2. unquote-splicing explicitly requires a form I think?
|
|
|
|
|
* (verify)
|
|
|
|
|
* 3. vectors, lists, may have nested collections in them
|
|
|
|
|
* so track collection state in the parser's stack.
|
|
|
|
|
* 4. list dotted notation needs next datum put in cdr.
|
|
|
|
|
* 5. bytevectors can only have numbers from 0-255 in them.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
fn complete_quote(&mut self) -> Result<Rc<Datum>, ParseError> {
|
|
|
|
|
let next = self.get_next_datum()?;
|
|
|
|
|
Ok(Rc::from(Datum::List(Rc::from(Ast(
|
|
|
|
|
Rc::from(Datum::Symbol(String::from("quote"))),
|
|
|
|
|
|
|
|
|
|
Rc::from(Datum::List(Rc::from(Ast(
|
|
|
|
|
next,
|
|
|
|
|
Rc::from(Datum::None)
|
|
|
|
|
))))
|
|
|
|
|
)))))
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn complete_unquote_splicing(&mut self, tok: LexToken) -> Result<Rc<Datum>, ParseError> {
|
|
|
|
|
let next = self.get_next_datum()?;
|
|
|
|
|
match *next {
|
|
|
|
|
Datum::List(_) | Datum::Vector(_) | Datum::Symbol(_) => (),
|
|
|
|
|
_ => return Err(ParseError(E_UNQUOTE_SPL_COLL, Some(Ok(tok))))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(Rc::from(Datum::List(Rc::from(Ast(
|
|
|
|
|
Rc::from(Datum::Symbol(String::from("unquote-splicing"))),
|
|
|
|
|
|
|
|
|
|
Rc::from(Datum::List(Rc::from(Ast(
|
|
|
|
|
next,
|
|
|
|
|
Rc::from(Datum::None)
|
|
|
|
|
))))
|
|
|
|
|
)))))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn complete_unquote(&mut self) -> Result<Rc<Datum>, ParseError> {
|
|
|
|
|
let next = self.get_next_datum()?;
|
|
|
|
|
Ok(Rc::from(Datum::List(Rc::from(Ast(
|
|
|
|
|
Rc::from(Datum::Symbol(String::from("unquote"))),
|
|
|
|
|
|
|
|
|
|
Rc::from(Datum::List(Rc::from(Ast(
|
|
|
|
|
next,
|
|
|
|
|
Rc::from(Datum::None)
|
|
|
|
|
))))
|
|
|
|
|
)))))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn complete_quasiquote(&mut self) -> Result<Rc<Datum>, ParseError> {
|
|
|
|
|
let prev = self.quasiquoted; // handle nesting appropriately
|
|
|
|
|
self.quasiquoted = true;
|
|
|
|
|
let next = self.get_next_datum()?;
|
|
|
|
|
self.quasiquoted = prev;
|
|
|
|
|
|
|
|
|
|
Ok(Rc::from(Datum::List(Rc::from(Ast(
|
|
|
|
|
Rc::from(Datum::Symbol(String::from("quasiquote"))),
|
|
|
|
|
|
|
|
|
|
Rc::from(Datum::List(Rc::from(Ast(
|
|
|
|
|
next,
|
|
|
|
|
Rc::from(Datum::None)
|
|
|
|
|
))))
|
|
|
|
|
)))))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn complete_collection(&mut self, token: LexToken) -> Result<Rc<Datum>, ParseError> {
|
|
|
|
|
let is_bv = match token.token_type {
|
|
|
|
|
LexTokenType::ByteVectorStart => true,
|
|
|
|
|
_ => false,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let mut lex_stack = vec![];
|
|
|
|
|
let mut bv_stack = vec![];
|
|
|
|
|
|
|
|
|
|
/* counting indexes helps greatly with calculating position dependent
|
|
|
|
|
* syntax rules like dot notation in lists
|
|
|
|
|
*/
|
|
|
|
|
let mut iter_count = 0;
|
|
|
|
|
let mut dot_idx = (None, None, None);
|
|
|
|
|
|
|
|
|
|
loop {
|
|
|
|
|
let next_tok = self.lexer.next();
|
|
|
|
|
if let None = next_tok {
|
2025-05-21 14:48:36 -07:00
|
|
|
if let Some(e) = &self.lexer.has_error_state {
|
|
|
|
|
return Err(ParseError(E_LEX_ERROR, Some(Err(e.clone()))))
|
|
|
|
|
}
|
|
|
|
|
return Err(ParseError(E_COLLECTION_TRUNC, Some(Ok(token))))
|
2025-05-19 14:38:11 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let tok = next_tok.unwrap();
|
|
|
|
|
|
|
|
|
|
match tok.token_type {
|
|
|
|
|
// Universal cases
|
|
|
|
|
LexTokenType::Comment | LexTokenType::Directive => continue,
|
|
|
|
|
LexTokenType::NumTypes =>
|
|
|
|
|
return Err(ParseError(E_TERRIBLE, Some(Ok(tok)))),
|
|
|
|
|
LexTokenType::Unquote if !self.quasiquoted =>
|
|
|
|
|
return Err(ParseError(E_UNQUOTE_SPL_NONQQ, Some(Ok(tok)))),
|
|
|
|
|
LexTokenType::UnquoteSplice if !self.quasiquoted =>
|
|
|
|
|
return Err(ParseError(E_UNQUOTE_NONQQ, Some(Ok(tok)))),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// CollectionEnd must take precedence over the dot notation case
|
|
|
|
|
LexTokenType::CollectionEnd => break,
|
|
|
|
|
_ if let Some(idx) = dot_idx.0 && iter_count - idx > 2 =>
|
|
|
|
|
return Err(ParseError(E_DOT_IDX, Some(Ok(dot_idx.1.unwrap())))),
|
|
|
|
|
|
|
|
|
|
LexTokenType::Dot if token.token_type != LexTokenType::ListStart =>
|
|
|
|
|
return Err(ParseError(E_VECTOR_DOT, Some(Ok(tok)))),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// List, Vector cases
|
|
|
|
|
LexTokenType::ListStart | LexTokenType::VectorStart |
|
|
|
|
|
LexTokenType::ByteVectorStart if !is_bv =>
|
|
|
|
|
lex_stack.push(self.complete_collection(tok)?),
|
|
|
|
|
LexTokenType::String if !is_bv =>
|
|
|
|
|
lex_stack.push(Rc::from(Datum::String(read_string(tok)))),
|
|
|
|
|
LexTokenType::Number if !is_bv =>
|
|
|
|
|
lex_stack.push(Rc::from(Datum::Number(read_number(tok)?))),
|
|
|
|
|
LexTokenType::Char if !is_bv =>
|
|
|
|
|
lex_stack.push(Rc::from(Datum::Char(read_char(tok)?))),
|
|
|
|
|
LexTokenType::Boolean if !is_bv =>
|
|
|
|
|
lex_stack.push(Rc::from(Datum::Bool(read_bool(tok)))),
|
|
|
|
|
LexTokenType::Symbol if !is_bv =>
|
|
|
|
|
lex_stack.push(Rc::from(Datum::Symbol(
|
|
|
|
|
String::from(&tok.source_doc[tok.start_idx..tok.end_idx])))),
|
|
|
|
|
LexTokenType::Quote if !is_bv =>
|
|
|
|
|
lex_stack.push(self.complete_quote()?),
|
|
|
|
|
LexTokenType::QuasiQuote if !is_bv =>
|
|
|
|
|
lex_stack.push(self.complete_quasiquote()?),
|
|
|
|
|
LexTokenType::Unquote if !is_bv && self.quasiquoted =>
|
|
|
|
|
lex_stack.push(self.complete_unquote()?),
|
|
|
|
|
LexTokenType::UnquoteSplice if !is_bv && self.quasiquoted =>
|
|
|
|
|
lex_stack.push(self.complete_unquote_splicing(tok)?),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// List only cases
|
|
|
|
|
LexTokenType::Dot => if let Some(_) = dot_idx.0 {
|
|
|
|
|
return Err(ParseError(E_TOO_MANY_DOT, Some(Ok(tok))))
|
|
|
|
|
} else {
|
|
|
|
|
dot_idx = (Some(iter_count), Some(tok), None)
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// ByteVector cases
|
|
|
|
|
LexTokenType::Number if is_bv => {
|
|
|
|
|
let n = read_number(tok.clone())?
|
|
|
|
|
.make_inexact();
|
|
|
|
|
|
|
|
|
|
if n.0 < 0.0 || n.0 > 255.0 || n.0.fract() != 0.0 {
|
|
|
|
|
return Err(ParseError(E_BV_BADBYTE, Some(Ok(tok))))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bv_stack.push(n.0 as u8);
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
_ if is_bv => return Err(ParseError(E_BV_NONBYTE, Some(Ok(tok)))),
|
|
|
|
|
|
|
|
|
|
// This should never get touched
|
|
|
|
|
_ => todo!("theoretically impossible case in parser::complete_collection"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Some(idx) = dot_idx.0 && iter_count == idx + 1 {
|
|
|
|
|
dot_idx.2 = Some(lex_stack.pop());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iter_count += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if is_bv {
|
2025-06-26 10:52:54 -07:00
|
|
|
return Ok(Rc::from(Datum::ByteVector(RefCell::from(bv_stack))))
|
2025-05-19 14:38:11 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if token.token_type == LexTokenType::VectorStart {
|
2025-06-26 10:52:54 -07:00
|
|
|
return Ok(Rc::from(Datum::Vector(RefCell::from(lex_stack))))
|
2025-05-19 14:38:11 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// handle an empty list
|
|
|
|
|
if lex_stack.len() < 1 {
|
|
|
|
|
// dont try to do something like "( . 'thing)"
|
|
|
|
|
if let (_, Some(node), _) = dot_idx {
|
|
|
|
|
return Err(ParseError(E_DOT_EMPTY, Some(Ok(node))))
|
|
|
|
|
}
|
|
|
|
|
return Ok(Rc::from(Datum::List(Rc::from(Ast(Rc::from(Datum::None),
|
|
|
|
|
Rc::from(Datum::None))))))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut from_rear: Rc<Ast>;
|
|
|
|
|
if let (_, _, Some(node)) = dot_idx {
|
|
|
|
|
from_rear = Rc::from(Ast(lex_stack.pop().unwrap(), node.unwrap()));
|
|
|
|
|
} else {
|
|
|
|
|
from_rear = Rc::from(Ast(lex_stack.pop().unwrap(), Rc::from(Datum::None)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lex_stack.iter()
|
|
|
|
|
.rev()
|
|
|
|
|
.for_each(|x| {
|
|
|
|
|
from_rear = Rc::from(Ast(x.clone(), Rc::from(Datum::List(from_rear.clone()))));
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
Ok(Rc::from(Datum::List(from_rear)))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn get_next_datum(&mut self) -> Result<Rc<Datum>, ParseError> {
|
|
|
|
|
if let Some(token) = self.lexer.next() {
|
|
|
|
|
match token.token_type {
|
|
|
|
|
// normal paths:
|
|
|
|
|
LexTokenType::String => Ok(Rc::from(Datum::String(read_string(token)))),
|
|
|
|
|
LexTokenType::Number => Ok(Rc::from(Datum::Number(read_number(token)?))),
|
|
|
|
|
LexTokenType::Char => Ok(Rc::from(Datum::Char(read_char(token)?))),
|
|
|
|
|
LexTokenType::Symbol => Ok(Rc::from(Datum::Symbol(String::from(
|
|
|
|
|
&token.source_doc[token.start_idx..token.end_idx])))),
|
|
|
|
|
LexTokenType::Boolean => Ok(Rc::from(Datum::Bool(read_bool(token)))),
|
|
|
|
|
LexTokenType::VectorStart | LexTokenType::ListStart |
|
|
|
|
|
LexTokenType::ByteVectorStart => self.complete_collection(token),
|
|
|
|
|
LexTokenType::Quote => self.complete_quote(),
|
|
|
|
|
LexTokenType::QuasiQuote => self.complete_quasiquote(),
|
|
|
|
|
LexTokenType::Unquote if self.quasiquoted => self.complete_unquote(),
|
|
|
|
|
LexTokenType::UnquoteSplice if self.quasiquoted =>
|
|
|
|
|
self.complete_unquote_splicing(token),
|
|
|
|
|
|
|
|
|
|
// immediate errors:
|
|
|
|
|
LexTokenType::CollectionEnd => Err(ParseError(E_EXTRA_CLOSE, Some(Ok(token)))),
|
|
|
|
|
LexTokenType::NumTypes => Err(ParseError(E_TERRIBLE, Some(Ok(token)))),
|
|
|
|
|
LexTokenType::Dot => Err(ParseError(E_DOT_NO_LIST, Some(Ok(token)))),
|
|
|
|
|
LexTokenType::Unquote if !self.quasiquoted =>
|
|
|
|
|
Err(ParseError(E_UNQUOTE_NONQQ, Some(Ok(token)))),
|
|
|
|
|
LexTokenType::UnquoteSplice if !self.quasiquoted =>
|
|
|
|
|
Err(ParseError(E_UNQUOTE_SPL_NONQQ, Some(Ok(token)))),
|
|
|
|
|
|
|
|
|
|
// ignore comment, directive:
|
|
|
|
|
_ => self.get_next_datum(),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Lexer error
|
2025-05-21 14:48:36 -07:00
|
|
|
} else if let Some(e) = &self.lexer.has_error_state && e.0 != E_END_OF_DOCUMENT {
|
2025-05-19 14:38:11 -07:00
|
|
|
Err(ParseError(E_LEX_ERROR,
|
|
|
|
|
Some(Err(self.lexer.has_error_state.clone().unwrap()))))
|
|
|
|
|
|
|
|
|
|
// End of document
|
|
|
|
|
} else {
|
|
|
|
|
Err(ParseError(E_END_OF_DOCUMENT, None))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_parse_cases() {
|
|
|
|
|
let happy_cases = vec![
|
|
|
|
|
// case, result
|
|
|
|
|
("\"test\"", "\"test\""),
|
|
|
|
|
("test", "test"),
|
|
|
|
|
("(1 2 3)", "(1 2 3)"),
|
|
|
|
|
("'test", "(quote test)"),
|
|
|
|
|
("`test", "(quasiquote test)"),
|
|
|
|
|
("`(,one)", "(quasiquote ((unquote one)))"),
|
|
|
|
|
("`(test ,@(two))", "(quasiquote (test (unquote-splicing (two))))"),
|
|
|
|
|
("#u8(0 14 249)", "#u8(0 14 249)"),
|
|
|
|
|
("(nested lists (are pretty cool))", "(nested lists (are pretty cool))"),
|
|
|
|
|
("((nested) lists (are (pretty) cool))", "((nested) lists (are (pretty) cool))"),
|
|
|
|
|
("(dotted . notation)", "(dotted . notation)"),
|
|
|
|
|
("(longer dotted . notation)", "(longer dotted . notation)"),
|
|
|
|
|
("(hello \"world\")", "(hello \"world\")"),
|
|
|
|
|
("; big doc string\n(one two)", "(one two)"),
|
|
|
|
|
("(list #(vect 2 3 #u8(0 0)))", "(list #(vect 2 3 #u8(0 0)))"),
|
2025-05-21 14:48:36 -07:00
|
|
|
("(#\\xf0 #\\alarm #\\a #\\z)", "(#\\xf0 #\\x7 #\\a #\\z)"),
|
|
|
|
|
("(- q 1)", "(- q 1)"),
|
|
|
|
|
("(+ q 1)", "(+ q 1)"),
|
|
|
|
|
("(#\\x)", "(#\\x)"),
|
2025-05-19 14:38:11 -07:00
|
|
|
];
|
|
|
|
|
|
|
|
|
|
let sad_cases = vec![
|
|
|
|
|
"(",
|
|
|
|
|
"( one two ",
|
|
|
|
|
"( one two three ( four )",
|
|
|
|
|
")",
|
|
|
|
|
"#(st",
|
|
|
|
|
"#u8(0 ",
|
|
|
|
|
"#u8(256)",
|
|
|
|
|
"#u8(two)",
|
|
|
|
|
"(one two ,three)",
|
|
|
|
|
"(one two ,@three)",
|
|
|
|
|
"`(one two ,@4.0)",
|
|
|
|
|
"(. two)",
|
|
|
|
|
"(one . two . three)",
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
println!("+ Testing Happy Cases...");
|
|
|
|
|
happy_cases.iter()
|
|
|
|
|
.for_each(|(case, result)| {
|
|
|
|
|
println!(" - case: {}", *case);
|
|
|
|
|
let mut p = Parser::from(Lexer::from(Rc::from(*case)));
|
|
|
|
|
let res = p.next();
|
|
|
|
|
if let None = res {
|
|
|
|
|
println!("{}", p.has_error_state.unwrap());
|
|
|
|
|
}
|
|
|
|
|
assert_eq!(
|
|
|
|
|
format!("{}", res.unwrap()),
|
|
|
|
|
format!("{}", result)
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
println!("+ Testing Sad Cases...");
|
|
|
|
|
sad_cases.iter()
|
|
|
|
|
.for_each(|case| {
|
|
|
|
|
println!(" - case: {}", *case);
|
|
|
|
|
let mut p = Parser::from(Lexer::from(Rc::from(*case)));
|
|
|
|
|
assert!(p.next().is_none() && p.has_error_state.is_some())
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|