From fea950ea17bb38d95a2a696e776443153a5e6e6b Mon Sep 17 00:00:00 2001 From: Ava Affine Date: Tue, 26 Aug 2025 17:11:37 +0000 Subject: [PATCH] WIP: serialization/deserialization of datum in VM Additionally: make release target binaries smaller and faster Signed-off-by: Ava Affine --- Cargo.toml | 10 ++++ hyphae/src/heap.rs | 96 ++++++++++++++++++++++++++++++++++++++ organelle/src/lib.rs | 108 +++++++++++++++++++++++++++++++++++++++++++ readme.md | 25 ++++++++-- 4 files changed, 236 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e69f2e1..df6ee70 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,13 @@ +cargo-features = ["profile-rustflags"] + [workspace] resolver = "2" members = ["mycelium", "decomposer", "hyphae", "organelle"] + +[profile.release] +opt-level = 3 +strip = true +lto = true +codegen-units = 1 +panic = "abort" +rustflags = [ "-Zlocation-detail=none", "-Zfmt-debug=none" ] diff --git a/hyphae/src/heap.rs b/hyphae/src/heap.rs index 46f4aab..57674ad 100644 --- a/hyphae/src/heap.rs +++ b/hyphae/src/heap.rs @@ -18,6 +18,7 @@ use core::ops::{Index, Deref, DerefMut}; use core::ptr::NonNull; +use alloc::vec; use alloc::rc::Rc; use alloc::vec::Vec; use alloc::boxed::Box; @@ -25,6 +26,14 @@ use alloc::fmt::Debug; use organelle::Number; +pub const DATUM_BOOL_FALSE_TAG: u8 = 0x07; +pub const DATUM_BOOL_TRUE_TAG: u8 = 0x08; +pub const DATUM_CONS_TAG: u8 = 0x09; +pub const DATUM_CHAR_TAG: u8 = 0x0A; +pub const DATUM_STRING_TAG: u8 = 0x0B; +pub const DATUM_BYTEVEC_TAG: u8 = 0x0C; +pub const DATUM_VECTOR_TAG: u8 = 0x0D; + /* NOTE * decided not to implement a cache or a singleton heap manager * because I did not want to involve a datatype that would add @@ -175,6 +184,81 @@ impl Clone for Datum { } } +impl Into> for Datum { + fn into(self) -> Vec { + match self { + // 0x00 - 0x06 + Datum::Number(n) => n.into(), + Datum::Bool(b) if !b => vec![DATUM_BOOL_FALSE_TAG], + Datum::Bool(b) if b => vec![DATUM_BOOL_TRUE_TAG], + Datum::Bool(_) => panic!("rustc somehow has a third bool!"), + Datum::Cons(c) => { + let mut v = vec![DATUM_CONS_TAG]; + for i in Into::>::into(c).iter() { + v.push(*i) + } + v + }, + Datum::Char(c) => vec![DATUM_CHAR_TAG, c], + Datum::String(c) => { + let mut v = vec![DATUM_STRING_TAG]; + for i in c.len().to_be_bytes().iter() { + v.push(*i); + } + for i in c.iter() { + v.push(*i); + } + v + }, + Datum::ByteVector(c) => { + let mut v = vec![DATUM_BYTEVEC_TAG]; + for i in c.len().to_be_bytes().iter() { + v.push(*i); + } + for i in c.iter() { + v.push(*i); + } + v + }, + Datum::Vector(c) => { + let mut v = vec![DATUM_STRING_TAG]; + for i in c.len().to_be_bytes().iter() { + v.push(*i); + } + for i in c.iter() { + let b = Into::>::into((**i).clone()); + for j in b.iter() { + v.push(*j); + } + } + v + }, + Datum::None => vec![], + } + } +} + +impl TryFrom<&[u8]> for Datum { + type Error = &'static str; + fn try_from(value: &[u8]) -> Result { + match value[0] { + 0x00 | 0x01 | 0x02 | 0x03 | 0x04 | 0x05 | 0x06 => + Number::try_from(value).and_then(|a| Ok(Datum::Number(a))), + DATUM_BOOL_FALSE_TAG => Ok(Datum::Bool(false)), + DATUM_BOOL_TRUE_TAG => Ok(Datum::Bool(true)), + DATUM_CONS_TAG if value.len() > 2 => + Cons::try_from(&value[1..]) + .and_then(|a| Ok(Datum::Cons(a))), + DATUM_CHAR_TAG if value.len() == 2 => + Ok(Datum::Char(value[1])), + DATUM_STRING_TAG if value.len() > 2 => unimplemented!(), + DATUM_BYTEVEC_TAG if value.len() > 2 => unimplemented!(), + DATUM_VECTOR_TAG if value.len() > 2 => unimplemented!(), + _ => Err("Could not unmarshal unknown datum tag or bad format") + } + } +} + #[derive(Clone, PartialEq, Debug)] pub struct Cons(pub Option>, pub Option>); @@ -293,6 +377,18 @@ impl Index for Cons { } } +impl Into> for Cons { + fn into(self) -> Vec { + unimplemented!("") + } +} + +impl TryFrom<&[u8]> for Cons { + type Error = &'static str; + fn try_from(value: &[u8]) -> Result { + unimplemented!("") + } +} #[cfg(test)] mod tests { diff --git a/organelle/src/lib.rs b/organelle/src/lib.rs index cec5dc5..5dc340d 100644 --- a/organelle/src/lib.rs +++ b/organelle/src/lib.rs @@ -21,6 +21,8 @@ extern crate alloc; use alloc::string::String; use alloc::format; use alloc::fmt::Debug; +use alloc::vec; +use alloc::vec::Vec; use core::{cmp::Ordering, f64, ops::{Add, Div, Mul, Sub}, str::FromStr}; use num::{integer::{gcd}, pow::Pow}; @@ -250,6 +252,96 @@ impl FromStr for Number { } } +// this looks rushed and it is +// would rather work on organelles replacement than improve this +impl Into> for Number { + fn into(self) -> Vec { + let mut out = vec![]; + match self { + Number::Sci(num) => { + out.push(0x00); + for ele in num.0.to_be_bytes().iter() { + out.push(*ele); + } + for ele in num.1.to_be_bytes().iter() { + out.push(*ele); + } + out + }, + Number::Flt(num) => { + out.push(0x01 as u8); + for ele in num.0.to_be_bytes().iter() { + out.push(*ele); + } + out + }, + Number::Fra(num) => { + out.push(0x02); + for ele in num.0.to_be_bytes().iter() { + out.push(*ele); + } + for ele in num.1.to_be_bytes().iter() { + out.push(*ele); + } + out + }, + Number::Sym(num) => { + match num { + SymbolicNumber::Inf => out.push(0x03), + SymbolicNumber::NaN => out.push(0x04), + SymbolicNumber::NegInf => out.push(0x05), + SymbolicNumber::NegNan => out.push(0x06), + } + out + } + } + } +} + +// same as the Into impl +impl TryFrom<&[u8]> for Number { + type Error = &'static str; + fn try_from(value: &[u8]) -> Result { + match value[0] { + 0x03 => Ok(Number::Sym(SymbolicNumber::Inf)), + 0x04 => Ok(Number::Sym(SymbolicNumber::NaN)), + 0x05 => Ok(Number::Sym(SymbolicNumber::NegInf)), + 0x06 => Ok(Number::Sym(SymbolicNumber::NegNan)), + 0x00 if value.len() >= (1 + 4 + (isize::BITS / 8)) as usize => { + let mut i: [u8; 4] = [0, 0, 0, 0]; + value[1..5].iter().zip(i.iter_mut()) + .for_each(|(a, b)| { *b = *a }); + let i = f32::from_be_bytes(i); + + let mut j: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; + value[5..13].iter().zip(j.iter_mut()) + .for_each(|(a, b)| { *b = *a }); + let j = isize::from_be_bytes(j); + Ok(Number::Sci(ScientificNotation(i, j))) + }, + 0x01 if value.len() >= 9 as usize => { + let mut i: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; + value[1..9].iter().zip(i.iter_mut()) + .for_each(|(a, b)| { *b = *a }); + let i = f64::from_be_bytes(i); + Ok(Number::Flt(Float(i))) + }, + 0x02 if value.len() >= 1 + ((isize::BITS / 8) * 2) as usize => { + let mut i: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; + value[1..9].iter().zip(i.iter_mut()) + .for_each(|(a, b)| { *b = *a }); + let i = isize::from_be_bytes(i); + let mut j: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; + value[9..17].iter().zip(j.iter_mut()) + .for_each(|(a, b)| { *b = *a }); + let j = isize::from_be_bytes(j); + Ok(Number::Fra(Fraction(i, j))) + }, + _ => Err("attempted to deserialize invalid number format") + } + } +} + impl Add for Number { type Output = Number; fn add(self, rhs: Self) -> Self::Output { @@ -596,6 +688,22 @@ impl Numeric for ScientificNotation { mod tests { use super::*; + #[test] + fn serialize_deserialize_tests() { + let cases = vec![ + "2/3".parse::().unwrap(), + "-4/5".parse::().unwrap(), + "2e45".parse::().unwrap(), + "1.2432566".parse::().unwrap(), + "+inf.0".parse::().unwrap(), + ]; + + for i in cases.iter() { + let j = Into::>::into(*i); + assert_eq!(*i, Number::try_from(j.as_slice()).unwrap()); + } + } + #[test] fn parse_fraction_tests() { assert_eq!("2/3".parse::(), diff --git a/readme.md b/readme.md index 1b7ddb4..576233d 100644 --- a/readme.md +++ b/readme.md @@ -8,9 +8,28 @@ project: a POSIX shell interpreter as well as a compiled to bytecode language fo running on ESP32 devices. ## Current Status -Currently the lexer and parser are implemented. On an X86 machine equipped with 64GB -RAM and an AMD Ryzen 7900 CPU this lexer and parser are capable of creating a fully +The lexer and parser are implemented. On an X86 machine equipped with 64GB RAM +and an AMD Ryzen 7900 CPU this lexer and parser are capable of creating a fully validated abstract syntax tree from approximately 11200 lines of handwritten scheme in about 55 milliseconds on average. -Currently the bytecode VM and its instruction set are next to implement. + +HyphaeVM is mostly implemented. The instruction set is defined and implemented, +including extensibility interfaces and the VM layout. Additionally, instruction +encoding and decoding are implemented. Garbage collection is implemented (via +reference counting). Currently being implemented are datum encoding/decoding and +full program encoding/decoding. Yet to be approached are debugging routines, CLI +utilities, and concurrency features. However, Documentation has been written on +programming with HyphaeVM. + + +The R7RS-Small Scheme to HyphaeVM compiler is not implemented. + + +R7RS-Large is not implemented. + + +The Linux/Mac/Windows runtime and extended compiler is not implemented. + + +Documentation is not implemented.