Mycelium/hyphae/src/heap.rs
Ava Affine 389bf6e9a0
All checks were successful
per-push tests / build (push) Successful in 1m34s
per-push tests / test-frontend (push) Successful in 41s
per-push tests / test-utility (push) Successful in 45s
per-push tests / test-backend (push) Successful in 44s
per-push tests / timed-decomposer-parse (push) Successful in 50s
serialization/deserialization of datum in VM
This commit adds logic to serialize and deserialize datum, as well
as the start of some total binary format. It implements serialize
and deserialize routines per datum type. Tests are included for
comples cases. Similar code existed in the organelle package which was
then centralized here.

Additionally: this commit makes release target binaries smaller and
faster

Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-11-25 21:53:21 +00:00

614 lines
20 KiB
Rust

/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
use crate::serializer::DeserializerControlCode;
use core::ops::{Index, Deref, DerefMut};
use core::ptr::NonNull;
use alloc::{vec, vec::Vec};
use alloc::rc::Rc;
use alloc::boxed::Box;
use alloc::fmt::Debug;
use organelle::{Number, Fraction, SymbolicNumber, Float, ScientificNotation};
const US: usize = (usize::BITS / 8) as usize;
const IS: usize = (isize::BITS / 8) as usize;
/* NOTE
* decided not to implement a cache or a singleton heap manager
* because I did not want to involve a datatype that would add
* unneeded logic to where and how the Rcs get allocated or that
* would require relocation if more Rcs were allocated. Any
* ADT containing the source data referenced by Gc would add
* overhead without value.
*
* Meanwhile, just using allocated-at-site Rcs provides accurate
* reference counting garbage collection. We hack the Box::into_raw
* function to pass around heap allocated Rcs.
*/
/* Gc
* This is a heap allocated Rc passed around such that it fits into
* a physical register. The pointer is to a Box<Rc<T>>, but custom
* deref implementation will ensure that deref always points to the
* encapsulated T
*/
#[repr(transparent)]
pub struct Gc<T>(NonNull<Rc<T>>);
impl<T> From<Rc<T>> for Gc<T> {
fn from(src: Rc<T>) -> Self {
Gc(NonNull::new(Box::into_raw(Box::new(src.clone())))
.expect("GC obj from rc nonnull ptr check"))
}
}
impl<T> From<T> for Gc<T> {
fn from(value: T) -> Self {
Gc(NonNull::new(Box::into_raw(Box::new(Rc::from(value))))
.expect("GC obj from datum nonnull ptr check"))
}
}
impl<T: PartialEq> PartialEq for Gc<T> {
fn eq(&self, other: &Self) -> bool {
self.deref().eq(other.deref())
}
fn ne(&self, other: &Self) -> bool {
self.deref().ne(other.deref())
}
}
impl<T> Deref for Gc<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
unsafe {
Rc::<T>::as_ptr(self.0.as_ref())
.as_ref()
.expect("GC obj deref inconsistent rc ptr")
}
}
}
impl<T> DerefMut for Gc<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
unsafe {
(Rc::<T>::as_ptr(self.0.as_mut()) as *mut T)
.as_mut()
.expect("GC obj inconsistent rc ptr")
}
}
}
// takes a pointer to target Rc
macro_rules! shallow_copy_rc {
( $src:expr ) => {
unsafe {
NonNull::new(Box::into_raw(Box::new((*$src).clone())))
.expect("GC obj shallow copy nonnull ptr check")
}
}
}
impl<T> Clone for Gc<T> {
fn clone(&self) -> Self {
Gc(shallow_copy_rc!(self.0.as_ptr()))
}
fn clone_from(&mut self, source: &Self) {
self.0 = shallow_copy_rc!(source.0.as_ptr());
}
}
impl<T: Debug> Debug for Gc<T> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let refs = Rc::strong_count(unsafe { self.0.as_ref() });
write!(f, "<refs={refs}>: ")?;
write!(f, "{:?}", (*(unsafe { self.0.as_ref() })).as_ref())?;
Ok(())
}
}
impl<T> Drop for Gc<T> {
fn drop(&mut self) {
unsafe {
let a = Box::<Rc<T>>::from_raw(self.0.as_ptr());
drop(a)
}
}
}
impl<T: Clone> Gc<T> {
#[inline]
pub fn deep_copy(&self) -> Gc<T> {
Gc(unsafe {
NonNull::new(Box::into_raw(Box::new(Rc::from(
self.0.as_ref().clone()))))
.expect("GC obj deep copy nonnull ptr check")
})
}
}
#[derive(Clone, PartialEq, Debug)]
pub struct Cons(pub Option<Gc<Datum>>, pub Option<Gc<Datum>>);
impl Cons {
pub fn deep_copy(&self) -> Cons {
macro_rules! car_cpy {
() => {
if let Some(ref car) = self.0 {
if let Datum::Cons(ref car) = **car {
Some(Datum::Cons(car.deep_copy()).into())
} else {
self.0.as_ref().map(|x| x.deep_copy())
}
} else {
None
}
}
}
if let Some(ref next) = self.1 {
if let Datum::Cons(ref next) = **next {
Cons(car_cpy!(), Some(Datum::Cons(next.deep_copy()).into()))
} else {
Cons(car_cpy!(), self.1.as_ref().map(|x| x.deep_copy()))
}
} else {
Cons(car_cpy!(), None)
}
}
pub fn subsl(&self, start: isize, end: isize) -> Cons {
if end - start == 1 {
return Cons(Some(self[start as usize].clone()), None)
}
if end == 0 {
return Cons(
self.0.clone(),
None
)
}
let Some(ref next) = self.1 else {
panic!("out of bounds subsl of cons list")
};
let Datum::Cons(ref next) = **next else {
panic!("subsl of cons list not in standard form")
};
if start <= 0 {
Cons(self.0.clone(),
Some(Datum::Cons(next.subsl(start - 1, end - 1))
.into()))
} else {
next.subsl(start - 1, end - 1)
}
}
pub fn len(&self) -> usize {
let Some(_) = self.0 else {
return 0
};
let Some(ref next) = self.1 else {
return 1
};
let Datum::Cons(ref next) = **next else {
// weird list but okay
return 2
};
1 + next.len()
}
pub fn append(&mut self, arg: Gc<Datum>) {
let Some(_) = self.0 else {
self.0 = Some(arg);
return
};
if let Some(next) = &mut self.1 {
if let Datum::Cons(next) = next.deref_mut() {
next.append(arg);
return;
}
}
self.1 = Some(Datum::Cons(Cons(Some(arg), None)).into());
}
}
impl Index<usize> for Cons {
type Output = Gc<Datum>;
fn index(&self, index: usize) -> &Self::Output {
if index == 0 {
if let Some(data) = &self.0 {
data
} else {
panic!("out of bounds indexing into cons list")
}
} else {
let Some(ref next) = self.1 else {
panic!("out of bounds indexing into cons list")
};
let Datum::Cons(ref next) = **next else {
panic!("cons list not in standard form")
};
&next[index - 1]
}
}
}
#[derive(PartialEq, Debug)]
pub enum Datum {
Number(Number),
Bool(bool),
Cons(Cons),
Char(u8),
String(Vec<u8>),
Vector(Vec<Gc<Datum>>),
ByteVector(Vec<u8>),
None
}
// implemented by hand to force deep copy on Cons datum
impl Clone for Datum {
fn clone(&self) -> Datum {
match self {
Datum::Number(n) => Datum::Number(n.clone()),
Datum::Bool(n) => Datum::Bool(n.clone()),
Datum::Cons(n) => Datum::Cons(n.deep_copy()),
Datum::Char(n) => Datum::Char(n.clone()),
Datum::String(n) => Datum::String(n.clone()),
Datum::Vector(n) =>
Datum::Vector(n.clone()),
Datum::ByteVector(n) =>
Datum::ByteVector(n.clone()),
Datum::None => Datum::None,
}
}
fn clone_from(&mut self, source: &Self) {
*self = source.clone();
}
}
impl Into<Vec<u8>> for Datum {
fn into(self) -> Vec<u8> {
match self {
Datum::Number(n) => {
let mut out: Vec<u8> = vec![];
match n {
Number::Sci(num) => {
out.push(DeserializerControlCode::SciNumber as u8);
for ele in num.0.to_be_bytes().iter() {
out.push(*ele);
}
for ele in num.1.to_be_bytes().iter() {
out.push(*ele);
}
out
},
Number::Flt(num) => {
out.push(DeserializerControlCode::FltNumber as u8);
for ele in num.0.to_be_bytes().iter() {
out.push(*ele);
}
out
},
Number::Fra(num) => {
out.push(DeserializerControlCode::FraNumber as u8);
for ele in num.0.to_be_bytes().iter() {
out.push(*ele);
}
for ele in num.1.to_be_bytes().iter() {
out.push(*ele);
}
out
},
Number::Sym(num) => {
match num {
SymbolicNumber::Inf => out.push(DeserializerControlCode::SymInf as u8),
SymbolicNumber::NaN => out.push(DeserializerControlCode::SymNan as u8),
SymbolicNumber::NegInf => out.push(DeserializerControlCode::SymNegInf as u8),
SymbolicNumber::NegNan => out.push(DeserializerControlCode::SymNegNan as u8),
}
out
}
}
},
Datum::Bool(b) if !b => vec![DeserializerControlCode::BoolFalse as u8],
Datum::Bool(b) if b => vec![DeserializerControlCode::BoolTrue as u8],
Datum::Bool(_) => panic!("rustc somehow has a third bool!"),
Datum::Cons(c) => {
if let Some(lop) = &c.0 {
if let Some(rop) = &c.1 {
let mut out = vec![DeserializerControlCode::FullCons as u8];
out.append(&mut (*lop.deref()).clone().into());
out.append(&mut (*rop.deref()).clone().into());
out
} else {
let mut out = vec![DeserializerControlCode::LeftCons as u8];
out.append(&mut (*lop.deref()).clone().into());
out
}
} else {
if let Some(rop) = &c.1 {
let mut out = vec![DeserializerControlCode::RightCons as u8];
out.append(&mut (*rop.deref()).clone().into());
out
} else {
vec![DeserializerControlCode::EmptyCons as u8]
}
}
},
Datum::Char(c) => vec![DeserializerControlCode::Char as u8, c],
Datum::String(c) => {
let mut v = vec![DeserializerControlCode::String as u8];
v.append(&mut c.len().to_be_bytes().to_vec());
v.append(&mut c.clone());
v
},
Datum::ByteVector(c) => {
let mut v = vec![DeserializerControlCode::ByteVec as u8];
v.append(&mut c.len().to_be_bytes().to_vec());
v.append(&mut c.clone());
v
},
Datum::Vector(c) => {
let mut v = vec![DeserializerControlCode::Vector as u8];
v.append(&mut c.len().to_be_bytes().to_vec());
c.iter().for_each(|i| v.append(&mut (*i.deref()).clone().into()));
v
},
Datum::None => vec![],
}
}
}
impl TryFrom<&[u8]> for Datum {
type Error = &'static str;
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
match DeserializerControlCode::try_from(value[0])? {
// this entire block goes away when we finish redoing organelle
DeserializerControlCode::SymInf =>
Ok(Datum::Number(Number::Sym(SymbolicNumber::Inf))),
DeserializerControlCode::SymNan =>
Ok(Datum::Number(Number::Sym(SymbolicNumber::NaN))),
DeserializerControlCode::SymNegInf =>
Ok(Datum::Number(Number::Sym(SymbolicNumber::NegInf))),
DeserializerControlCode::SymNegNan =>
Ok(Datum::Number(Number::Sym(SymbolicNumber::NegNan))),
DeserializerControlCode::SciNumber if value.len() >= 1 + 4 + IS => {
let i = f32::from_be_bytes(value[1..5].try_into().unwrap());
let j = isize::from_be_bytes(value[5..(5 + IS)].try_into().unwrap());
Ok(Datum::Number(Number::Sci(ScientificNotation(i, j))))
},
DeserializerControlCode::FltNumber if value.len() >= 9 => {
let i = f64::from_be_bytes(value[1..9].try_into().unwrap());
Ok(Datum::Number(Number::Flt(Float(i))))
},
DeserializerControlCode::FraNumber if value.len() >= 1 + (IS * 2) => {
let i = isize::from_be_bytes(value[1..(1 + IS)].try_into().unwrap());
let j = isize::from_be_bytes(value[(1 + IS)..(1 + IS + IS)].try_into().unwrap());
Ok(Datum::Number(Number::Fra(Fraction(i, j))))
},
DeserializerControlCode::BoolFalse => Ok(Datum::Bool(false)),
DeserializerControlCode::BoolTrue => Ok(Datum::Bool(true)),
DeserializerControlCode::EmptyCons if value.len() >= 1 =>
Ok(Datum::Cons(Cons(None, None))),
DeserializerControlCode::Char if value.len() >= 2 =>
Ok(Datum::Char(value[1])),
DeserializerControlCode::String if value.len() >= 1 + US => {
let len = usize::from_be_bytes(value[1..(1 + US)].try_into().unwrap());
if len < 1 {
Ok(Datum::String(vec![]))
} else if value.len() - (1 + US) < len {
Err("String vector backing is corrupted or truncated!")
} else {
Ok(Datum::String(value[(1 + US)..(1 + US + len)].to_vec()))
}
},
DeserializerControlCode::ByteVec if value.len() >= 1 + US => {
let len = usize::from_be_bytes(value[1..(1 + US)].try_into().unwrap());
if len < 1 {
Ok(Datum::ByteVector(vec![]))
} else if value.len() - (1 + US) < len {
Err("ByteVector vector backing is corrupted or truncated!")
} else {
Ok(Datum::ByteVector(value[(1 + US)..(1 + US + len)].to_vec()))
}
},
DeserializerControlCode::Vector if value.len() >= 1 + US => {
let len = usize::from_be_bytes(value[1..(1 + US)].try_into().unwrap());
if len < 1 {
Ok(Datum::Vector(vec![]))
} else {
let mut cursor: usize = 1 + US;
let mut ovec: Vec<Gc<Datum>> = vec![];
for _ in 0..len {
ovec.push(Datum::try_from(&value[cursor..])?.into());
cursor += ovec.last().unwrap().byte_length();
}
Ok(Datum::Vector(ovec))
}
},
DeserializerControlCode::LeftCons if value.len() >= 2 =>
Ok(Datum::Cons(Cons(Some(Datum::try_from(&value[1..])?.into()), None))),
DeserializerControlCode::RightCons if value.len() >= 2 =>
Ok(Datum::Cons(Cons(None, Some(Datum::try_from(&value[1..])?.into())))),
DeserializerControlCode::FullCons if value.len() >= 3 => {
let lop = Datum::try_from(&value[1..])?;
let next = 1 + lop.byte_length();
let rop = Datum::try_from(&value[next..])?;
Ok(Datum::Cons(Cons(Some(lop.into()), Some(rop.into()))))
}
_ => Err("Deserializer Control Code not valid in this context")
}
}
}
impl Datum {
pub fn byte_length(&self) -> usize {
match self {
Datum::None => 0,
Datum::Bool(_) => 1,
Datum::Char(_) => 2,
// This will need to change with organelle
Datum::Number(n) => match n {
Number::Sym(_) => 1 as usize,
Number::Flt(_) => 1 + 8 as usize,
Number::Sci(_) => 1 + 4 + (isize::BITS / 8) as usize,
Number::Fra(_) => 1 + ((usize::BITS / 8) * 2) as usize,
},
Datum::String(s) => 1 + US + s.len(),
Datum::ByteVector(s) => 1 + US + s.len(),
Datum::Vector(s) => {
let mut c = 1 + US;
for i in s.iter() {
c += i.byte_length();
}
c
},
Datum::Cons(c) => {
let mut size = 1;
c.0.as_ref().and_then(|x| {
size += x.byte_length();
Some(())
});
c.1.as_ref().and_then(|x| {
size += x.byte_length();
Some(())
});
size
},
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Debug)]
struct GcTester<'a>(pub &'a mut bool);
impl Clone for GcTester<'_> {
fn clone(&self) -> Self {
unsafe {
GcTester(Box::into_raw(Box::new(self.0.clone()))
.as_mut()
.expect("Gc Test obj mut ref fail"))
}
}
fn clone_from(&mut self, _: &Self) {
unimplemented!("test impl")
}
}
impl Drop for GcTester<'_> {
fn drop(&mut self) {
*self.0 = true;
}
}
impl PartialEq for GcTester<'_> {
fn eq(&self, other: &Self) -> bool {
*(self.0) == *(other.0)
}
fn ne(&self, other: &Self) -> bool {
*(self.0) != *(other.0)
}
}
#[test]
fn test_gc_basic_behavior() {
let mut flag = false;
let a = Into::<Gc<GcTester>>::into(GcTester(&mut flag));
assert!(!*(*a).0);
drop(a);
assert!(flag);
}
#[test]
fn test_gc_shallow_copy() {
let mut flag = false;
let a =
Into::<Gc<GcTester>>::into(GcTester(&mut flag)).clone();
assert!(!*(*a).0);
drop(a);
assert!(flag);
}
#[test]
fn test_gc_deep_copy() {
let mut flag = false;
let reference_holder =
Into::<Gc<GcTester>>::into(GcTester(&mut flag)).clone();
assert!(!*(*reference_holder).0);
let copied_data = reference_holder.deep_copy();
assert!(!*(*copied_data).0);
assert_eq!(*reference_holder, *copied_data);
drop(reference_holder);
assert!(!*(*copied_data).0);
}
#[test]
fn serialize_deserialize_datum_tests() {
let cases = vec![
Datum::Number("2/3".parse::<Number>().unwrap()),
Datum::Number("-4/5".parse::<Number>().unwrap()),
Datum::Number("2e45".parse::<Number>().unwrap()),
Datum::Number("1.2432566".parse::<Number>().unwrap()),
Datum::Number("+inf.0".parse::<Number>().unwrap()),
Datum::Cons(Cons(Some(Datum::Bool(true).into()), Some(Datum::Bool(false).into()))),
Datum::Cons(Cons(None, Some(Datum::Bool(true).into()))),
Datum::Cons(Cons(Some(Datum::Bool(true).into()), None)),
Datum::Cons(Cons(None, None)),
Datum::Cons(Cons(Some(Datum::Cons(Cons(None, Some(Datum::Bool(false).into()))).into()), None)),
Datum::Vector(vec![Datum::Bool(true).into(), Datum::Bool(true).into(), Datum::Bool(false).into()]),
Datum::Vector(vec![]),
Datum::Vector(vec![Datum::Vector(vec![Datum::Bool(true).into()]).into(), Datum::Bool(false).into()]),
];
for i in cases.iter() {
let j: Vec<u8> = i.clone().into();
assert_eq!(*i, Datum::try_from(j.as_slice()).unwrap());
}
}
}