Mycelium/hyphae/src/heap.rs

615 lines
20 KiB
Rust
Raw Normal View History

/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
use crate::serializer::DeserializerControlCode;
use core::ops::{Index, Deref, DerefMut};
use core::ptr::NonNull;
use alloc::{vec, vec::Vec};
use alloc::rc::Rc;
use alloc::boxed::Box;
use alloc::fmt::Debug;
use organelle::{Number, Fraction, SymbolicNumber, Float, ScientificNotation};
const US: usize = (usize::BITS / 8) as usize;
const IS: usize = (isize::BITS / 8) as usize;
/* NOTE
* decided not to implement a cache or a singleton heap manager
* because I did not want to involve a datatype that would add
* unneeded logic to where and how the Rcs get allocated or that
* would require relocation if more Rcs were allocated. Any
* ADT containing the source data referenced by Gc would add
* overhead without value.
*
* Meanwhile, just using allocated-at-site Rcs provides accurate
* reference counting garbage collection. We hack the Box::into_raw
* function to pass around heap allocated Rcs.
*/
/* Gc
* This is a heap allocated Rc passed around such that it fits into
* a physical register. The pointer is to a Box<Rc<T>>, but custom
* deref implementation will ensure that deref always points to the
* encapsulated T
*/
#[repr(transparent)]
pub struct Gc<T>(NonNull<Rc<T>>);
impl<T> From<Rc<T>> for Gc<T> {
fn from(src: Rc<T>) -> Self {
Gc(NonNull::new(Box::into_raw(Box::new(src.clone())))
.expect("GC obj from rc nonnull ptr check"))
}
}
impl<T> From<T> for Gc<T> {
fn from(value: T) -> Self {
Gc(NonNull::new(Box::into_raw(Box::new(Rc::from(value))))
.expect("GC obj from datum nonnull ptr check"))
}
}
impl<T: PartialEq> PartialEq for Gc<T> {
fn eq(&self, other: &Self) -> bool {
self.deref().eq(other.deref())
}
fn ne(&self, other: &Self) -> bool {
self.deref().ne(other.deref())
}
}
impl<T> Deref for Gc<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
unsafe {
Rc::<T>::as_ptr(self.0.as_ref())
.as_ref()
.expect("GC obj deref inconsistent rc ptr")
}
}
}
impl<T> DerefMut for Gc<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
unsafe {
(Rc::<T>::as_ptr(self.0.as_mut()) as *mut T)
.as_mut()
.expect("GC obj inconsistent rc ptr")
}
}
}
// takes a pointer to target Rc
macro_rules! shallow_copy_rc {
( $src:expr ) => {
unsafe {
NonNull::new(Box::into_raw(Box::new((*$src).clone())))
.expect("GC obj shallow copy nonnull ptr check")
}
}
}
impl<T> Clone for Gc<T> {
fn clone(&self) -> Self {
Gc(shallow_copy_rc!(self.0.as_ptr()))
}
fn clone_from(&mut self, source: &Self) {
self.0 = shallow_copy_rc!(source.0.as_ptr());
}
}
impl<T: Debug> Debug for Gc<T> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let refs = Rc::strong_count(unsafe { self.0.as_ref() });
write!(f, "<refs={refs}>: ")?;
write!(f, "{:?}", (*(unsafe { self.0.as_ref() })).as_ref())?;
Ok(())
}
}
impl<T> Drop for Gc<T> {
fn drop(&mut self) {
unsafe {
let a = Box::<Rc<T>>::from_raw(self.0.as_ptr());
drop(a)
}
}
}
impl<T: Clone> Gc<T> {
#[inline]
pub fn deep_copy(&self) -> Gc<T> {
Gc(unsafe {
NonNull::new(Box::into_raw(Box::new(Rc::from(
self.0.as_ref().clone()))))
.expect("GC obj deep copy nonnull ptr check")
})
}
}
#[derive(Clone, PartialEq, Debug)]
pub struct Cons(pub Option<Gc<Datum>>, pub Option<Gc<Datum>>);
impl Cons {
pub fn deep_copy(&self) -> Cons {
macro_rules! car_cpy {
() => {
if let Some(ref car) = self.0 {
if let Datum::Cons(ref car) = **car {
Some(Datum::Cons(car.deep_copy()).into())
} else {
self.0.as_ref().map(|x| x.deep_copy())
}
} else {
None
}
}
}
if let Some(ref next) = self.1 {
if let Datum::Cons(ref next) = **next {
Cons(car_cpy!(), Some(Datum::Cons(next.deep_copy()).into()))
} else {
Cons(car_cpy!(), self.1.as_ref().map(|x| x.deep_copy()))
}
} else {
Cons(car_cpy!(), None)
}
}
pub fn subsl(&self, start: isize, end: isize) -> Cons {
if end - start == 1 {
return Cons(Some(self[start as usize].clone()), None)
}
if end == 0 {
return Cons(
self.0.clone(),
None
)
}
let Some(ref next) = self.1 else {
panic!("out of bounds subsl of cons list")
};
let Datum::Cons(ref next) = **next else {
panic!("subsl of cons list not in standard form")
};
if start <= 0 {
Cons(self.0.clone(),
Some(Datum::Cons(next.subsl(start - 1, end - 1))
.into()))
} else {
next.subsl(start - 1, end - 1)
}
}
pub fn len(&self) -> usize {
let Some(_) = self.0 else {
return 0
};
let Some(ref next) = self.1 else {
return 1
};
let Datum::Cons(ref next) = **next else {
// weird list but okay
return 2
};
1 + next.len()
}
pub fn append(&mut self, arg: Gc<Datum>) {
let Some(_) = self.0 else {
self.0 = Some(arg);
return
};
if let Some(next) = &mut self.1 {
if let Datum::Cons(next) = next.deref_mut() {
next.append(arg);
return;
}
}
self.1 = Some(Datum::Cons(Cons(Some(arg), None)).into());
}
}
impl Index<usize> for Cons {
type Output = Gc<Datum>;
fn index(&self, index: usize) -> &Self::Output {
if index == 0 {
if let Some(data) = &self.0 {
data
} else {
panic!("out of bounds indexing into cons list")
}
} else {
let Some(ref next) = self.1 else {
panic!("out of bounds indexing into cons list")
};
let Datum::Cons(ref next) = **next else {
panic!("cons list not in standard form")
};
&next[index - 1]
}
}
}
#[derive(PartialEq, Debug)]
pub enum Datum {
Number(Number),
Bool(bool),
Cons(Cons),
Char(u8),
String(Vec<u8>),
Vector(Vec<Gc<Datum>>),
ByteVector(Vec<u8>),
None
}
// implemented by hand to force deep copy on Cons datum
impl Clone for Datum {
fn clone(&self) -> Datum {
match self {
Datum::Number(n) => Datum::Number(n.clone()),
Datum::Bool(n) => Datum::Bool(n.clone()),
Datum::Cons(n) => Datum::Cons(n.deep_copy()),
Datum::Char(n) => Datum::Char(n.clone()),
Datum::String(n) => Datum::String(n.clone()),
Datum::Vector(n) =>
Datum::Vector(n.clone()),
Datum::ByteVector(n) =>
Datum::ByteVector(n.clone()),
Datum::None => Datum::None,
}
}
fn clone_from(&mut self, source: &Self) {
*self = source.clone();
}
}
impl Into<Vec<u8>> for Datum {
fn into(self) -> Vec<u8> {
match self {
Datum::Number(n) => {
let mut out: Vec<u8> = vec![];
match n {
Number::Sci(num) => {
out.push(DeserializerControlCode::SciNumber as u8);
for ele in num.0.to_be_bytes().iter() {
out.push(*ele);
}
for ele in num.1.to_be_bytes().iter() {
out.push(*ele);
}
out
},
Number::Flt(num) => {
out.push(DeserializerControlCode::FltNumber as u8);
for ele in num.0.to_be_bytes().iter() {
out.push(*ele);
}
out
},
Number::Fra(num) => {
out.push(DeserializerControlCode::FraNumber as u8);
for ele in num.0.to_be_bytes().iter() {
out.push(*ele);
}
for ele in num.1.to_be_bytes().iter() {
out.push(*ele);
}
out
},
Number::Sym(num) => {
match num {
SymbolicNumber::Inf => out.push(DeserializerControlCode::SymInf as u8),
SymbolicNumber::NaN => out.push(DeserializerControlCode::SymNan as u8),
SymbolicNumber::NegInf => out.push(DeserializerControlCode::SymNegInf as u8),
SymbolicNumber::NegNan => out.push(DeserializerControlCode::SymNegNan as u8),
}
out
}
}
},
Datum::Bool(b) if !b => vec![DeserializerControlCode::BoolFalse as u8],
Datum::Bool(b) if b => vec![DeserializerControlCode::BoolTrue as u8],
Datum::Bool(_) => panic!("rustc somehow has a third bool!"),
Datum::Cons(c) => {
if let Some(lop) = &c.0 {
if let Some(rop) = &c.1 {
let mut out = vec![DeserializerControlCode::FullCons as u8];
out.append(&mut (*lop.deref()).clone().into());
out.append(&mut (*rop.deref()).clone().into());
out
} else {
let mut out = vec![DeserializerControlCode::LeftCons as u8];
out.append(&mut (*lop.deref()).clone().into());
out
}
} else {
if let Some(rop) = &c.1 {
let mut out = vec![DeserializerControlCode::RightCons as u8];
out.append(&mut (*rop.deref()).clone().into());
out
} else {
vec![DeserializerControlCode::EmptyCons as u8]
}
}
},
Datum::Char(c) => vec![DeserializerControlCode::Char as u8, c],
Datum::String(c) => {
let mut v = vec![DeserializerControlCode::String as u8];
v.append(&mut c.len().to_be_bytes().to_vec());
v.append(&mut c.clone());
v
},
Datum::ByteVector(c) => {
let mut v = vec![DeserializerControlCode::ByteVec as u8];
v.append(&mut c.len().to_be_bytes().to_vec());
v.append(&mut c.clone());
v
},
Datum::Vector(c) => {
let mut v = vec![DeserializerControlCode::Vector as u8];
v.append(&mut c.len().to_be_bytes().to_vec());
c.iter().for_each(|i| v.append(&mut (*i.deref()).clone().into()));
v
},
Datum::None => vec![],
}
}
}
impl TryFrom<&[u8]> for Datum {
type Error = &'static str;
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
match DeserializerControlCode::try_from(value[0])? {
// this entire block goes away when we finish redoing organelle
DeserializerControlCode::SymInf =>
Ok(Datum::Number(Number::Sym(SymbolicNumber::Inf))),
DeserializerControlCode::SymNan =>
Ok(Datum::Number(Number::Sym(SymbolicNumber::NaN))),
DeserializerControlCode::SymNegInf =>
Ok(Datum::Number(Number::Sym(SymbolicNumber::NegInf))),
DeserializerControlCode::SymNegNan =>
Ok(Datum::Number(Number::Sym(SymbolicNumber::NegNan))),
DeserializerControlCode::SciNumber if value.len() >= 1 + 4 + IS => {
let i = f32::from_be_bytes(value[1..5].try_into().unwrap());
let j = isize::from_be_bytes(value[5..(5 + IS)].try_into().unwrap());
Ok(Datum::Number(Number::Sci(ScientificNotation(i, j))))
},
DeserializerControlCode::FltNumber if value.len() >= 9 => {
let i = f64::from_be_bytes(value[1..9].try_into().unwrap());
Ok(Datum::Number(Number::Flt(Float(i))))
},
DeserializerControlCode::FraNumber if value.len() >= 1 + (IS * 2) => {
let i = isize::from_be_bytes(value[1..(1 + IS)].try_into().unwrap());
let j = isize::from_be_bytes(value[(1 + IS)..(1 + IS + IS)].try_into().unwrap());
Ok(Datum::Number(Number::Fra(Fraction(i, j))))
},
DeserializerControlCode::BoolFalse => Ok(Datum::Bool(false)),
DeserializerControlCode::BoolTrue => Ok(Datum::Bool(true)),
DeserializerControlCode::EmptyCons if value.len() >= 1 =>
Ok(Datum::Cons(Cons(None, None))),
DeserializerControlCode::Char if value.len() >= 2 =>
Ok(Datum::Char(value[1])),
DeserializerControlCode::String if value.len() >= 1 + US => {
let len = usize::from_be_bytes(value[1..(1 + US)].try_into().unwrap());
if len < 1 {
Ok(Datum::String(vec![]))
} else if value.len() - (1 + US) < len {
Err("String vector backing is corrupted or truncated!")
} else {
Ok(Datum::String(value[(1 + US)..(1 + US + len)].to_vec()))
}
},
DeserializerControlCode::ByteVec if value.len() >= 1 + US => {
let len = usize::from_be_bytes(value[1..(1 + US)].try_into().unwrap());
if len < 1 {
Ok(Datum::ByteVector(vec![]))
} else if value.len() - (1 + US) < len {
Err("ByteVector vector backing is corrupted or truncated!")
} else {
Ok(Datum::ByteVector(value[(1 + US)..(1 + US + len)].to_vec()))
}
},
DeserializerControlCode::Vector if value.len() >= 1 + US => {
let len = usize::from_be_bytes(value[1..(1 + US)].try_into().unwrap());
if len < 1 {
Ok(Datum::Vector(vec![]))
} else {
let mut cursor: usize = 1 + US;
let mut ovec: Vec<Gc<Datum>> = vec![];
for _ in 0..len {
ovec.push(Datum::try_from(&value[cursor..])?.into());
cursor += ovec.last().unwrap().byte_length();
}
Ok(Datum::Vector(ovec))
}
},
DeserializerControlCode::LeftCons if value.len() >= 2 =>
Ok(Datum::Cons(Cons(Some(Datum::try_from(&value[1..])?.into()), None))),
DeserializerControlCode::RightCons if value.len() >= 2 =>
Ok(Datum::Cons(Cons(None, Some(Datum::try_from(&value[1..])?.into())))),
DeserializerControlCode::FullCons if value.len() >= 3 => {
let lop = Datum::try_from(&value[1..])?;
let next = 1 + lop.byte_length();
let rop = Datum::try_from(&value[next..])?;
Ok(Datum::Cons(Cons(Some(lop.into()), Some(rop.into()))))
}
_ => Err("Deserializer Control Code not valid in this context")
}
}
}
impl Datum {
pub fn byte_length(&self) -> usize {
match self {
Datum::None => 0,
Datum::Bool(_) => 1,
Datum::Char(_) => 2,
// This will need to change with organelle
Datum::Number(n) => match n {
Number::Sym(_) => 1 as usize,
Number::Flt(_) => 1 + 8 as usize,
Number::Sci(_) => 1 + 4 + (isize::BITS / 8) as usize,
Number::Fra(_) => 1 + ((usize::BITS / 8) * 2) as usize,
},
Datum::String(s) => 1 + US + s.len(),
Datum::ByteVector(s) => 1 + US + s.len(),
Datum::Vector(s) => {
let mut c = 1 + US;
for i in s.iter() {
c += i.byte_length();
}
c
},
Datum::Cons(c) => {
let mut size = 1;
c.0.as_ref().and_then(|x| {
size += x.byte_length();
Some(())
});
c.1.as_ref().and_then(|x| {
size += x.byte_length();
Some(())
});
size
},
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Debug)]
struct GcTester<'a>(pub &'a mut bool);
impl Clone for GcTester<'_> {
fn clone(&self) -> Self {
unsafe {
GcTester(Box::into_raw(Box::new(self.0.clone()))
.as_mut()
.expect("Gc Test obj mut ref fail"))
}
}
fn clone_from(&mut self, _: &Self) {
unimplemented!("test impl")
}
}
impl Drop for GcTester<'_> {
fn drop(&mut self) {
*self.0 = true;
}
}
impl PartialEq for GcTester<'_> {
fn eq(&self, other: &Self) -> bool {
*(self.0) == *(other.0)
}
fn ne(&self, other: &Self) -> bool {
*(self.0) != *(other.0)
}
}
#[test]
fn test_gc_basic_behavior() {
let mut flag = false;
let a = Into::<Gc<GcTester>>::into(GcTester(&mut flag));
assert!(!*(*a).0);
drop(a);
assert!(flag);
}
#[test]
fn test_gc_shallow_copy() {
let mut flag = false;
let a =
Into::<Gc<GcTester>>::into(GcTester(&mut flag)).clone();
assert!(!*(*a).0);
drop(a);
assert!(flag);
}
#[test]
fn test_gc_deep_copy() {
let mut flag = false;
let reference_holder =
Into::<Gc<GcTester>>::into(GcTester(&mut flag)).clone();
assert!(!*(*reference_holder).0);
let copied_data = reference_holder.deep_copy();
assert!(!*(*copied_data).0);
assert_eq!(*reference_holder, *copied_data);
drop(reference_holder);
assert!(!*(*copied_data).0);
}
#[test]
fn serialize_deserialize_datum_tests() {
let cases = vec![
Datum::Number("2/3".parse::<Number>().unwrap()),
Datum::Number("-4/5".parse::<Number>().unwrap()),
Datum::Number("2e45".parse::<Number>().unwrap()),
Datum::Number("1.2432566".parse::<Number>().unwrap()),
Datum::Number("+inf.0".parse::<Number>().unwrap()),
Datum::Cons(Cons(Some(Datum::Bool(true).into()), Some(Datum::Bool(false).into()))),
Datum::Cons(Cons(None, Some(Datum::Bool(true).into()))),
Datum::Cons(Cons(Some(Datum::Bool(true).into()), None)),
Datum::Cons(Cons(None, None)),
Datum::Cons(Cons(Some(Datum::Cons(Cons(None, Some(Datum::Bool(false).into()))).into()), None)),
Datum::Vector(vec![Datum::Bool(true).into(), Datum::Bool(true).into(), Datum::Bool(false).into()]),
Datum::Vector(vec![]),
Datum::Vector(vec![Datum::Vector(vec![Datum::Bool(true).into()]).into(), Datum::Bool(false).into()]),
];
for i in cases.iter() {
let j: Vec<u8> = i.clone().into();
assert_eq!(*i, Datum::try_from(j.as_slice()).unwrap());
}
}
}