Lexer and S-Expression data types
The lexer is complete with tests. It fully encapsulates the logic of splitting an input document into a stream of tokens. It can be instantiated from an Rc<str>, meaning no lifetimes need be managed references to the original document (like a stringview) can be passed around carelessly. The Lexer implements the iterator method which should help elegantly design repls / compilers, etc. The S-Expression data type represents the parsed AST. The actual parsing logic is yet to be added. It is intended that the AST be the last step before compiling to bytecode. The data representation here is cons cells of datum. Formatting is implemented. Signed-off-by: Ava Affine <ava@sunnypup.io>
This commit is contained in:
commit
6554a0639a
10 changed files with 1533 additions and 0 deletions
128
mycelium/src/sexpr.rs
Normal file
128
mycelium/src/sexpr.rs
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
/* Mycelium Scheme
|
||||
* Copyright (C) 2025 Ava Affine
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
use core::fmt::{self, Formatter};
|
||||
use alloc::rc::Rc;
|
||||
use alloc::vec::Vec;
|
||||
use alloc::string::String;
|
||||
|
||||
#[derive(Default, Clone)]
|
||||
pub enum Datum {
|
||||
Number(f64),
|
||||
Bool(bool),
|
||||
List(Ast),
|
||||
Symbol(String),
|
||||
String(Vec<u8>),
|
||||
Vector(Vec<Datum>),
|
||||
ByteVector(Vec<u8>),
|
||||
#[default]
|
||||
None,
|
||||
}
|
||||
|
||||
impl fmt::Display for Datum {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Datum::Number(n) => write!(f, "{n}"),
|
||||
Datum::Bool(n) => write!(f, "{n}"),
|
||||
Datum::List(n) => write!(f, "{n}"),
|
||||
Datum::Symbol(n) => write!(f, "{n}"),
|
||||
Datum::String(n) =>
|
||||
write!(f, "\"{}\"", String::from_utf8_lossy(&*n)),
|
||||
Datum::Vector(n) => write!(f, "#({n:?})"),
|
||||
Datum::ByteVector(n) => write!(f, "#u8({n:?})"),
|
||||
Datum::None => Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* WARNING
|
||||
* This is in a sense overloaded.
|
||||
* Instead of using this to print debugging information for the
|
||||
* Rust code, I have instead overloaded it to print the most
|
||||
* maximal expanded valid syntax for this Datum
|
||||
*/
|
||||
impl fmt::Debug for Datum {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Datum::Number(n) => write!(f, "{n}"),
|
||||
Datum::Bool(n) => write!(f, "{n}"),
|
||||
Datum::List(n) => write!(f, "{n}"),
|
||||
Datum::Symbol(n) => write!(f, "{n}"),
|
||||
Datum::String(n) =>
|
||||
write!(f, "\"{}\"", String::from_utf8_lossy(&*n)),
|
||||
Datum::Vector(n) => write!(f, "#({n:?})"),
|
||||
Datum::ByteVector(n) => write!(f, "#u8({n:?})"),
|
||||
Datum::None => Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Clone)]
|
||||
pub struct Ast(Rc<Datum>, Rc<Datum>);
|
||||
|
||||
impl Iterator for Ast {
|
||||
type Item = Rc<Datum>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Datum::List(n) = &*self.1 {
|
||||
let tmp_pair = n;
|
||||
self.0 = tmp_pair.0.clone();
|
||||
self.1 = tmp_pair.1.clone();
|
||||
return Some(self.0.clone());
|
||||
}
|
||||
|
||||
if let Datum::None = *self.1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let tmp = self.1.clone();
|
||||
self.0 = Rc::from(Datum::None);
|
||||
self.1 = Rc::from(Datum::None);
|
||||
return Some(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Ast {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "({}", self.0)?;
|
||||
let mut cur = self;
|
||||
while let Datum::List(next) = &*cur.1 {
|
||||
cur = &next;
|
||||
write!(f, " {}", cur.0)?;
|
||||
}
|
||||
|
||||
if let Datum::None = &*cur.1 {
|
||||
write!(f, ")")
|
||||
} else {
|
||||
write!(f, " {})", cur.1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Ast {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "({}", self.0)?;
|
||||
let mut cur = self;
|
||||
let mut end = 1;
|
||||
while let Datum::List(next) = &*cur.1 {
|
||||
cur = &next;
|
||||
end += 1;
|
||||
write!(f, "({} . ", cur.0)?
|
||||
}
|
||||
write!(f, "{}{}", cur.1, ")".repeat(end))
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue