diff --git a/Cargo.lock b/Cargo.lock index 7e5eb99..ecdcdf9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -166,6 +166,7 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" name = "mycelium" version = "0.1.0" dependencies = [ + "hyphae", "organelle", ] diff --git a/hyphae/src/heap.rs b/hyphae/src/heap.rs index b1b49ae..d303ce3 100644 --- a/hyphae/src/heap.rs +++ b/hyphae/src/heap.rs @@ -15,123 +15,168 @@ * along with this program. If not, see . */ -use core::fmt::{self, Formatter}; -use core::ops::Index; +use core::ops::{Index, Deref, DerefMut}; +use core::ptr::NonNull; use core::cell::RefCell; -use alloc::format; use alloc::rc::Rc; use alloc::vec::Vec; +use alloc::boxed::Box; use alloc::string::String; use organelle::Number; -#[derive(Default, Clone, PartialEq)] +/* NOTE + * decided not to implement a cache or a singleton heap manager + * because I did not want to involve a datatype that would add + * unneeded logic to where and how the Rcs get allocated or that + * would require relocation if more Rcs were allocated. Any + * ADT containing the source data referenced by Gc would add + * overhead without value. + * + * Meanwhile, just using allocated-at-site Rcs provides accurate + * reference counting garbage collection. We hack the Box::into_raw + * function to pass around heap allocated Rcs. + */ + +#[derive(Clone, PartialEq)] pub enum Datum { Number(Number), Bool(bool), - List(Rc), + Cons(Cons), Symbol(String), Char(u8), String(Vec), - Vector(RefCell>>), + Vector(RefCell>>), ByteVector(RefCell>), - #[default] - None, } -fn byte_to_escaped_char(b: u8) -> String { - // alarm, backspace, delete - match b { - _ if b > 31 && b < 127 => String::from(b as char), - _ => format!("x{:x}", b), - } -} - -fn fmt_vec(ve: &RefCell>) -> String { - let v = ve.borrow(); - if v.len() == 0 { - return String::new() - } - let mut s = format!("{}", v[0]); - let mut i = v.iter(); - i.next(); // discard - i.for_each(|e| { - s = format!("{} {}", s, e); - }); - - s -} - -impl fmt::Display for Datum { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - Datum::Number(n) => write!(f, "{}", Into::::into(*n)), - Datum::Bool(n) => write!(f, "{}", if *n {"#t"} else {"#f"}), - Datum::List(n) => write!(f, "{n}"), - Datum::Symbol(n) => write!(f, "{n}"), - Datum::Char(n) => write!(f, "#\\{}", - byte_to_escaped_char(*n)), - Datum::String(n) => - write!(f, "\"{}\"", String::from_utf8_lossy(&*n)), - Datum::Vector(n) => write!(f, "#({})", fmt_vec(n)), - Datum::ByteVector(n) => write!(f, "#u8({})", fmt_vec(n)), - Datum::None => Ok(()) - } - } -} - -/* WARNING - * This is in a sense overloaded. - * Instead of using this to print debugging information for the - * Rust code, I have instead overloaded it to print the most - * maximal expanded valid syntax for this Datum +/* Gc + * This is a heap allocated Rc passed around such that it fits into + * a physical register. The pointer is to a Box>, but custom + * deref implementation will ensure that deref always points to the + * encapsulated T */ -impl fmt::Debug for Datum { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - Datum::Number(n) => write!(f, "{}", Into::::into(*n)), - Datum::Bool(n) => write!(f, "{}", if *n {"#t"} else {"#f"}), - Datum::List(n) => write!(f, "{n}"), - Datum::Char(n) => write!(f, "{}", - byte_to_escaped_char(*n)), - Datum::Symbol(n) => write!(f, "{n}"), - Datum::String(n) => - write!(f, "\"{}\"", String::from_utf8_lossy(&*n)), - Datum::Vector(n) => write!(f, "#({n:?})"), - Datum::ByteVector(n) => write!(f, "#u8({n:?})"), - Datum::None => Ok(()) +#[repr(transparent)] +pub struct Gc(NonNull>); + +impl From> for Gc { + fn from(src: Rc) -> Self { + Gc(NonNull::new(Box::into_raw(Box::new(src.clone()))) + .expect("GC obj from rc nonnull ptr check")) + } +} + +impl From for Gc { + fn from(value: Datum) -> Self { + Gc(NonNull::new(Box::into_raw(Box::new(Rc::from(value)))) + .expect("GC obj from datum nonnull ptr check")) + } +} + +impl PartialEq for Gc { + fn eq(&self, other: &Self) -> bool { + self.deref().eq(other.deref()) + } + fn ne(&self, other: &Self) -> bool { + self.deref().ne(other.deref()) + } +} + +impl Deref for Gc { + type Target = T; + fn deref(&self) -> &Self::Target { + unsafe { + Rc::::as_ptr(self.0.as_ref()) + .as_ref() + .expect("GC obj deref inconsistent rc ptr") } } } +impl DerefMut for Gc { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + (Rc::::as_ptr(self.0.as_mut()) as *mut T) + .as_mut() + .expect("GC obj inconsistent rc ptr") + } + } +} -#[derive(Default, Clone, PartialEq)] -pub struct Ast(pub Rc, pub Rc); +// takes a pointer to target Rc +macro_rules! shallow_copy_rc { + ( $src:expr ) => { + unsafe { + NonNull::new(Box::into_raw(Box::new((*$src).clone()))) + .expect("GC obj shallow copy nonnull ptr check") + } + } +} -impl Ast { - pub fn subsl(&self, start: isize, end: isize) -> Ast { +impl Clone for Gc { + fn clone(&self) -> Self { + Gc(shallow_copy_rc!(self.0.as_ptr())) + } + + fn clone_from(&mut self, source: &Self) { + self.0 = shallow_copy_rc!(source.0.as_ptr()); + } +} + +impl Drop for Gc { + fn drop(&mut self) { + unsafe { + drop(Box::from_raw(self.0.as_ptr() as *mut Box>)) + } + } +} + +impl Gc { + #[inline] + pub fn deep_copy(&self) -> Gc { + Gc(unsafe { + NonNull::new(Box::into_raw(Box::new(Rc::from( + (*(self.0.as_ptr())).clone())))) + .expect("GC obj deep copy nonnull ptr check") + }) + } +} + +#[derive(Clone, PartialEq)] +pub struct Cons(pub Option>, pub Option>); + +impl Cons { + pub fn deep_copy(&self) -> Cons { + // TODO: recursive deep copy through the whole list + Cons(self.0.as_ref().map(|x| x.deep_copy()), + self.1.as_ref().map(|x| x.deep_copy())) + } + + pub fn subsl(&self, start: isize, end: isize) -> Cons { if end - start == 1 { - return Ast(Rc::from(self[start as usize].clone()), Rc::from(Datum::None)) + return Cons(Some(self[start as usize].clone()), None) } if end == 0 { - return Ast( - Rc::from((*(self.0)).clone()), - Rc::from(Datum::None) + return Cons( + self.0.clone(), + None ) } - let Datum::List(ref next) = *self.1 else { - panic!("index into improper list form") + let Some(ref next) = self.1 else { + panic!("out of bounds subsl of cons list") + }; + + let Datum::Cons(ref next) = **next else { + panic!("subsl of cons list not in standard form") }; if start <= 0 { - Ast( - Rc::from((*(self.0)).clone()), - Rc::from(Datum::List( - Rc::from(next.subsl(start - 1, end - 1)))) - ) + Cons(self.0.clone(), + Some(Datum::Cons(next.subsl(start - 1, end - 1)) + .into())) } else { next.subsl(start - 1, end - 1) @@ -139,81 +184,43 @@ impl Ast { } pub fn len(&self) -> usize { - let Datum::List(ref next) = *self.1 else { + let Some(_) = self.0 else { + return 0 + }; + + let Some(ref next) = self.1 else { return 1 }; + + let Datum::Cons(ref next) = **next else { + // weird list but okay + return 2 + }; + 1 + next.len() } } -impl Iterator for Ast { - type Item = Rc; - fn next(&mut self) -> Option { - if let Datum::List(n) = &*self.1 { - let tmp_pair = n; - self.0 = tmp_pair.0.clone(); - self.1 = tmp_pair.1.clone(); - return Some(self.0.clone()); - } - - if let Datum::None = *self.1 { - return None; - } - - let tmp = self.1.clone(); - self.0 = Rc::from(Datum::None); - self.1 = Rc::from(Datum::None); - return Some(tmp); - } -} - -impl Index for Ast { - type Output = Datum; +impl Index for Cons { + type Output = Gc; fn index(&self, index: usize) -> &Self::Output { if index == 0 { - if let Datum::None = *self.0 { - panic!("out of bounds indexing into AST") + if let Some(data) = &self.0 { + data } else { - self.0.as_ref() + panic!("out of bounds indexing into cons list") } } else { - let Datum::List(ref next) = *self.1 else { - panic!("out of bounds indexing into AST") + let Some(ref next) = self.1 else { + panic!("out of bounds indexing into cons list") }; - next.index(index - 1) + let Datum::Cons(ref next) = **next else { + panic!("cons list not in standard form") + }; + + &next[index - 1] } } } - -impl fmt::Display for Ast { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "({}", self.0)?; - let mut cur = self; - while let Datum::List(next) = &*cur.1 { - cur = &next; - write!(f, " {}", cur.0)?; - } - - if let Datum::None = &*cur.1 { - write!(f, ")") - } else { - write!(f, " . {})", cur.1) - } - } -} - -impl fmt::Debug for Ast { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "({}", self.0)?; - let mut cur = self; - let mut end = 1; - while let Datum::List(next) = &*cur.1 { - cur = &next; - end += 1; - write!(f, "({} . ", cur.0)? - } - write!(f, "{}{}", cur.1, ")".repeat(end)) - } -} diff --git a/hyphae/src/vm.rs b/hyphae/src/vm.rs index 8a932fa..7e244de 100644 --- a/hyphae/src/vm.rs +++ b/hyphae/src/vm.rs @@ -22,7 +22,7 @@ use crate::hmap::QuickMap; use crate::stackstack::StackStack; use crate::instr as i; use crate::util::{Operand, Program, Address}; -use crate::heap::Datum; +use crate::heap::{Gc, Datum}; use core::cell::RefCell; @@ -46,8 +46,8 @@ pub struct VM { pub traps: Vec>, // data registers - pub expr: Datum, - pub oper: [Datum; NUM_OPERAND_REGISTERS], + pub expr: Gc, + pub oper: [Gc; NUM_OPERAND_REGISTERS], // control flow registers pub retn: usize, @@ -144,7 +144,7 @@ impl VM { } $oper *match deref!(&instr.1[1]){ Datum::$in_type(l) => l, _ => e!("illegal argument to instruction"), - }) + }).into() } } @@ -210,24 +210,25 @@ impl VM { }, i::JMPIF => { - if let Datum::Bool(true) = self.expr { + if let Datum::Bool(true) = *self.expr { do_jmp!(0); } }, // boolean ops - i::EQ => self.expr = Datum::Bool(*deref!(&instr.1[0]) == *deref!(&instr.1[1])), + i::EQ => self.expr = + Datum::Bool(*deref!(&instr.1[0]) == *deref!(&instr.1[1])).into(), i::LT => lr_oper!(Number, <, Bool), i::GT => lr_oper!(Number, >, Bool), i::LTE => lr_oper!(Number, <=, Bool), i::GTE => lr_oper!(Number, >=, Bool), i::BOOL_NOT => { self.expr = Datum::Bool(!{ - let Datum::Bool(a) = self.expr else { + let Datum::Bool(a) = *self.expr else { e!("illegal argument to BOOL_NOT instruction"); }; a - }); + }).into(); }, i::BOOL_AND => lr_oper!(Bool, &&, Bool), @@ -239,11 +240,11 @@ impl VM { i::XOR => lr_oper!(Char, ^, Char), i::BYTE_NOT => { self.expr = Datum::Char(!{ - let Datum::Char(a) = self.expr else { + let Datum::Char(a) = *self.expr else { e!("illegal argument to BYTE_NOT instruction"); }; a - }); + }).into(); }, // numeric ops @@ -268,7 +269,7 @@ impl VM { e!("integer division on non integer value"); }; - self.expr = Datum::Number(Number::Fra(Fraction(l / r, 1))); + self.expr = Datum::Number(Number::Fra(Fraction(l / r, 1))).into(); }, i::POW => { @@ -280,7 +281,7 @@ impl VM { e!("illgal argument to POW instruction"); }; - self.expr = Datum::Number((*l).pow(*r)); + self.expr = Datum::Number((*l).pow(*r)).into(); }, i::INC => if let Datum::Number(src) = deref_mut!(&instr.1[0]) { @@ -319,8 +320,8 @@ impl VM { } }, - i::MKVEC => self.expr = Datum::Vector(RefCell::from(vec![])), - i::MKBVEC => self.expr = Datum::ByteVector(RefCell::from(vec![])), + i::MKVEC => self.expr = Datum::Vector(RefCell::from(vec![])).into(), + i::MKBVEC => self.expr = Datum::ByteVector(RefCell::from(vec![])).into(), i::INDEX => { let Datum::Number(idx) = deref!(&instr.1[1]) else { e!("illegal argument to INDEX instruction"); @@ -334,13 +335,13 @@ impl VM { match deref!(&instr.1[0]) { Datum::Vector(v) => { let a = (*v.borrow()[idx].clone()).clone(); - self.expr = a; + self.expr = a.into(); }, Datum::ByteVector(bv) => { let a = Datum::Char(bv.borrow()[idx]); - self.expr = a; + self.expr = a.into(); }, - Datum::List(l) => self.expr = l[idx].clone(), + Datum::Cons(l) => self.expr = l[idx].clone(), _ => e!("illegal argument to INDEX instruction") }; }, @@ -348,14 +349,14 @@ impl VM { i::LENGTH => match deref!(&instr.1[0]) { Datum::Vector(v) => { let a = Datum::Number(Number::Fra(Fraction(v.borrow().len() as isize, 1))); - self.expr = a; + self.expr = a.into(); }, Datum::ByteVector(bv) => { let a = Datum::Number(Number::Fra(Fraction(bv.borrow().len() as isize, 1))); - self.expr = a; + self.expr = a.into(); }, - Datum::List(l) => - self.expr = Datum::Number(Number::Fra(Fraction(l.len() as isize, 1))), + Datum::Cons(l) => self.expr = + Datum::Number(Number::Fra(Fraction(l.len() as isize, 1))).into(), _ => e!("illegal argument to LENGTH instruction"), }, @@ -385,15 +386,16 @@ impl VM { match deref!(&instr.1[0]) { Datum::Vector(v) => { let a = Datum::Vector(RefCell::from(v.borrow()[st..ed].to_vec())); - self.expr = a; + self.expr = a.into(); }, Datum::ByteVector(bv) => { let a = Datum::ByteVector(RefCell::from(bv.borrow()[st..ed].to_vec())); - self.expr = a; + self.expr = a.into(); }, - Datum::List(a) => - self.expr = Datum::List(Rc::new( - (**a).subsl(st as isize, ed as isize))), + + // TODO: do I deep copy the subslice? + Datum::Cons(a) => self.expr = + Datum::Cons(a.subsl(st as isize, ed as isize)).into(), _ => e!("illegal argument to SUBSL instruction") }; } @@ -425,15 +427,16 @@ impl VM { }, i::CAR => { - let Datum::List(arg) = deref!(&instr.1[0]) else { + let Datum::Cons(arg) = deref!(&instr.1[0]) else { e!("illegal argument to CAR instruction"); }; + // TODO: need a none type dont we now self.expr = (*arg.0).clone(); }, i::CDR => { - let Datum::List(arg) = deref!(&instr.1[0]) else { + let Datum::Cons(arg) = deref!(&instr.1[0]) else { e!("illegal argument to CAR instruction"); }; @@ -447,10 +450,6 @@ impl VM { */ }, - // in order to maintain a language agnostic VM these must be traps - //i::PARSE => todo!("implement AST API"), - //i::EVAL => todo!("implement AST API"), - _ => { e!("illegal instruction"); }, diff --git a/mycelium/Cargo.toml b/mycelium/Cargo.toml index 206b304..1ee891e 100644 --- a/mycelium/Cargo.toml +++ b/mycelium/Cargo.toml @@ -5,4 +5,4 @@ edition = "2024" [dependencies] organelle = { path = "../organelle" } - +hyphae = { path = "../hyphae" } diff --git a/mycelium/src/sexpr.rs b/mycelium/src/sexpr.rs index b1b49ae..fbff86f 100644 --- a/mycelium/src/sexpr.rs +++ b/mycelium/src/sexpr.rs @@ -110,34 +110,6 @@ impl fmt::Debug for Datum { pub struct Ast(pub Rc, pub Rc); impl Ast { - pub fn subsl(&self, start: isize, end: isize) -> Ast { - if end - start == 1 { - return Ast(Rc::from(self[start as usize].clone()), Rc::from(Datum::None)) - } - - if end == 0 { - return Ast( - Rc::from((*(self.0)).clone()), - Rc::from(Datum::None) - ) - } - - let Datum::List(ref next) = *self.1 else { - panic!("index into improper list form") - }; - - if start <= 0 { - Ast( - Rc::from((*(self.0)).clone()), - Rc::from(Datum::List( - Rc::from(next.subsl(start - 1, end - 1)))) - ) - - } else { - next.subsl(start - 1, end - 1) - } - } - pub fn len(&self) -> usize { let Datum::List(ref next) = *self.1 else { return 1 diff --git a/organelle/src/lib.rs b/organelle/src/lib.rs index 3703d35..cec5dc5 100644 --- a/organelle/src/lib.rs +++ b/organelle/src/lib.rs @@ -73,6 +73,12 @@ pub enum Number { Sym(SymbolicNumber) } +impl Default for Number { + fn default() -> Self { + Number::Fra(Fraction(0, 1)) + } +} + impl From for Number { fn from(value: SymbolicNumber) -> Self { Number::Sym(value)