diff --git a/Cargo.toml b/Cargo.toml index e69f2e1..df6ee70 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,13 @@ +cargo-features = ["profile-rustflags"] + [workspace] resolver = "2" members = ["mycelium", "decomposer", "hyphae", "organelle"] + +[profile.release] +opt-level = 3 +strip = true +lto = true +codegen-units = 1 +panic = "abort" +rustflags = [ "-Zlocation-detail=none", "-Zfmt-debug=none" ] diff --git a/hyphae/src/heap.rs b/hyphae/src/heap.rs index 46f4aab..835e536 100644 --- a/hyphae/src/heap.rs +++ b/hyphae/src/heap.rs @@ -18,8 +18,8 @@ use core::ops::{Index, Deref, DerefMut}; use core::ptr::NonNull; -use alloc::rc::Rc; use alloc::vec::Vec; +use alloc::rc::Rc; use alloc::boxed::Box; use alloc::fmt::Debug; @@ -293,7 +293,6 @@ impl Index for Cons { } } - #[cfg(test)] mod tests { use super::*; diff --git a/hyphae/src/lib.rs b/hyphae/src/lib.rs index fe4ed14..6fda5d2 100644 --- a/hyphae/src/lib.rs +++ b/hyphae/src/lib.rs @@ -21,7 +21,7 @@ pub mod hmap; pub mod stackstack; pub mod instr; pub mod vm; -pub mod util; +pub mod serializer; pub mod heap; extern crate alloc; diff --git a/hyphae/src/serializer.rs b/hyphae/src/serializer.rs new file mode 100644 index 0000000..15f5311 --- /dev/null +++ b/hyphae/src/serializer.rs @@ -0,0 +1,734 @@ +/* Mycelium Scheme + * Copyright (C) 2025 Ava Affine + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +use crate::heap::{Datum, Cons, Gc}; +use crate::instr::Operation; +use crate::vm::VM; +use crate::stackstack::StackStack; + +use organelle::{Number, Fraction, SymbolicNumber, Float, ScientificNotation}; + +use alloc::vec::Vec; +use alloc::vec; + +use core::ops::{Deref, Index}; +use core::mem::transmute; + +const US: usize = (usize::BITS / 8) as usize; + +#[repr(u8)] +#[derive(Debug, Clone, PartialEq)] +pub enum DeserializerControlCode { + SciNumber = 0x00, + FltNumber = 0x01, + FraNumber = 0x02, + SymInf = 0x03, + SymNan = 0x04, + SymNegInf = 0x05, + SymNegNan = 0x06, + BoolFalse = 0x07, + BoolTrue = 0x08, + Char = 0x09, + String = 0x0A, + ByteVec = 0x0B, + Vector = 0x0C, + EmptyCons = 0x0D, + LeftCons = 0x0E, + RightCons = 0x0F, + FullCons = 0x10, + DataChunk = 0x11, + CodeChunk = 0x12, +} + +#[repr(u8)] +#[derive(Debug, Clone, PartialEq)] +pub enum Address { + Stack = 0xf0, // immutable access only + Instr = 0xf1, // immutable access only + Expr = 0xf2, // mutable access allowed + Oper1 = 0xf3, // mutable access allowed + Oper2 = 0xf4, // mutable access allowed + Oper3 = 0xf5, // mutable access allowed + Oper4 = 0xf6, // mutable access allowed + Numer = 0xf8, // immutable access only + Bool = 0xf9, // immutable access only + Char = 0xfa, // immutable access only +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Deserializer<'a> { + pub input: &'a [u8], + // TODO: Debug levels for errors +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Operand(pub Address, pub usize); + +#[derive(Debug, Clone, PartialEq)] +pub struct Instruction(pub Operation, pub Vec); + +#[derive(Debug, Clone, PartialEq)] +pub struct Program(pub Vec); + +impl Into for Address { + fn into(self) -> u8 { + unsafe { transmute::(self) } + } +} + +impl TryFrom for Address { + type Error = &'static str; + fn try_from(val: u8) -> Result { + match val { + _ if val == Address::Stack as u8 => Ok(Address::Stack), + _ if val == Address::Instr as u8 => Ok(Address::Instr), + _ if val == Address::Expr as u8 => Ok(Address::Expr), + _ if val == Address::Oper1 as u8 => Ok(Address::Oper1), + _ if val == Address::Oper2 as u8 => Ok(Address::Oper2), + _ if val == Address::Oper3 as u8 => Ok(Address::Oper3), + _ if val == Address::Oper4 as u8 => Ok(Address::Oper4), + _ if val == Address::Numer as u8 => Ok(Address::Numer), + _ if val == Address::Bool as u8 => Ok(Address::Bool), + _ if val == Address::Char as u8 => Ok(Address::Char), + _ => Err("illegal addressing mode") + } + } +} + +impl Address { + fn operand_size(&self) -> u8 { + match self { + Address::Stack => (usize::BITS / 8) as u8, + Address::Instr => (usize::BITS / 8) as u8, + Address::Numer => (usize::BITS / 8) as u8, + Address::Bool => 1, + Address::Char => 1, + _ => 0, + } + } +} + +impl TryFrom<&[u8]> for Operand { + type Error = &'static str; + fn try_from(value: &[u8]) -> Result { + let addr_mode: Address = value[0].try_into()?; + let operand_size = addr_mode.operand_size(); + if value.len() < (operand_size + 1).into() { + return Err("truncated address data") + } + + let mut operand_bytes: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; + for (&src, dest) in value[1..(1+operand_size) as usize] + .iter() + .zip(operand_bytes.iter_mut()) { + *dest = src; + } + + Ok(Operand(addr_mode, usize::from_ne_bytes(operand_bytes))) + } +} + +impl Into> for Operand { + fn into(self) -> Vec { + let mut res = vec![]; + res.push(self.0.clone() as u8); + res.append(&mut self.1.to_ne_bytes()[..self.0.operand_size() as usize].to_vec()); + res + } +} + +impl Operand { + fn byte_length(&self) -> u8 { + 1 + self.0.operand_size() + } +} + +impl TryFrom<&[u8]> for Instruction { + type Error = &'static str; + fn try_from(value: &[u8]) -> Result { + let operation: Operation = value[0].try_into()?; + let mut operands: Vec = vec![]; + + let mut cur = 1; + for _ in 0..operation.num_args()? { + if cur >= value.len() { + return Err("operand data truncated") + } + let operand: Operand = value[cur..].try_into()?; + cur += operand.byte_length() as usize; + operands.push(operand); + } + + Ok(Instruction(operation, operands)) + } +} + +impl Into> for Instruction { + fn into(self) -> Vec { + let mut res = vec![]; + res.push(self.0.0); + for op in self.1 { + res.append(&mut op.into()) + } + res + } +} + +impl Instruction { + fn byte_length(&self) -> u8 { + self.1.iter() + .fold(0, |total, oper| + total + oper.byte_length()) + 1 + } +} + +impl TryFrom<&[u8]> for Program { + type Error = &'static str; + fn try_from(value: &[u8]) -> Result { + let mut prog: Vec = vec![]; + let mut cur = 0; + + while cur < value.len() { + let instruction: Instruction = value[cur..].try_into()?; + cur += instruction.byte_length() as usize; + prog.push(instruction); + } + + Ok(Program(prog)) + } +} + +impl Into> for Program { + fn into(self) -> Vec { + let mut res: Vec = vec![]; + for instr in self.0 { + res.append(&mut instr.into()) + } + res + } +} + +impl<'a> Index for Program { + type Output = Instruction; + fn index(&self, index: usize) -> &Instruction { + self.0.get(index).expect("access to out of bounds instruction in vm") + } +} + +impl TryFrom for DeserializerControlCode { + type Error = &'static str; + fn try_from(value: u8) -> Result { + match value { + 0x00 => Ok(DeserializerControlCode::SciNumber), + 0x01 => Ok(DeserializerControlCode::FltNumber), + 0x02 => Ok(DeserializerControlCode::FraNumber), + 0x03 => Ok(DeserializerControlCode::SymInf), + 0x04 => Ok(DeserializerControlCode::SymNan), + 0x05 => Ok(DeserializerControlCode::SymNegInf), + 0x06 => Ok(DeserializerControlCode::SymNegNan), + 0x07 => Ok(DeserializerControlCode::BoolFalse), + 0x08 => Ok(DeserializerControlCode::BoolTrue), + 0x09 => Ok(DeserializerControlCode::Char), + 0x0A => Ok(DeserializerControlCode::String), + 0x0B => Ok(DeserializerControlCode::ByteVec), + 0x0C => Ok(DeserializerControlCode::Vector), + 0x0D => Ok(DeserializerControlCode::EmptyCons), + 0x0E => Ok(DeserializerControlCode::LeftCons), + 0x0F => Ok(DeserializerControlCode::RightCons), + 0x10 => Ok(DeserializerControlCode::FullCons), + 0x11 => Ok(DeserializerControlCode::DataChunk), + 0x12 => Ok(DeserializerControlCode::CodeChunk), + _ => Err("invalid control code") + } + } +} + +impl<'a> From<&'a [u8]> for Deserializer<'a> { + fn from(value: &'a [u8]) -> Self { + Deserializer{input: value} + } +} + +impl TryInto for Deserializer<'_> { + type Error = &'static str; + + fn try_into(self) -> Result { + let mut cur: usize = 0; + let mut stack: Option>> = None; + let mut program: Option = None; + + loop { + if cur >= self.input.len() { + break + } + + let b = self.input[cur]; + cur += 1; + + match DeserializerControlCode::try_from(b) { + Err(_) => return Err("executable contains data without a control code"), + Ok(ctrl) => match ctrl { + DeserializerControlCode::DataChunk => stack = Some(self.parse_data_chunk(cur)?), + DeserializerControlCode::CodeChunk => program = Some(self.parse_code_chunk(cur)?), + _ => return Err("executable contains stray data not in demarcated chunk"), + } + } + } + + if program.is_none() { + Err("executable does not contain a data chunk") + } else { + Ok(VM::new_with_opts(program.unwrap(), None, stack, None)) + } + } +} + +impl Deserializer<'_> { + fn parse_data_chunk(&self, start: usize) -> Result>, &'static str> { + let mut stack: StackStack> = StackStack::new(); + let mut cur = start; + + while cur <= self.input.len() { + let item = self.deserialize_datum(cur)?; + stack.push_current_stack(item.clone().into()); + cur += datum_byte_length(&item); + } + + Ok(stack) + } + + fn parse_code_chunk(&self, _start: usize) -> Result { + todo!() + } + + fn deserialize_number(&self, start: usize) -> Result { + let here = &self.input[start..]; + match DeserializerControlCode::try_from(here[0])? { + DeserializerControlCode::SymInf => Ok(Number::Sym(SymbolicNumber::Inf)), + DeserializerControlCode::SymNan => Ok(Number::Sym(SymbolicNumber::NaN)), + DeserializerControlCode::SymNegInf => Ok(Number::Sym(SymbolicNumber::NegInf)), + DeserializerControlCode::SymNegNan => Ok(Number::Sym(SymbolicNumber::NegNan)), + DeserializerControlCode::SciNumber if here.len() >= (1 + 4 + (isize::BITS / 8)) as usize => { + let i = f32::from_be_bytes(here[1..5].try_into().unwrap()); + let j = isize::from_be_bytes(here[5..13].try_into().unwrap()); + Ok(Number::Sci(ScientificNotation(i, j))) + }, + DeserializerControlCode::FltNumber if here.len() >= 9 as usize => { + let i = f64::from_be_bytes(here[1..9].try_into().unwrap()); + Ok(Number::Flt(Float(i))) + }, + DeserializerControlCode::FraNumber if here.len() >= 1 + ((isize::BITS / 8) * 2) as usize => { + let i = isize::from_be_bytes(here[1..9].try_into().unwrap()); + let j = isize::from_be_bytes(here[9..17].try_into().unwrap()); + Ok(Number::Fra(Fraction(i, j))) + }, + _ => Err("attempted to deserialize invalid number format") + } + } + + fn deserialize_datum(&self, start: usize) -> Result { + match DeserializerControlCode::try_from(self.input[start])? { + DeserializerControlCode::SciNumber | + DeserializerControlCode::FltNumber | + DeserializerControlCode::FraNumber | + DeserializerControlCode::SymInf | + DeserializerControlCode::SymNan | + DeserializerControlCode::SymNegInf | + DeserializerControlCode::SymNegNan => + self.deserialize_number(start).and_then(|num| Ok(Datum::Number(num))), + + DeserializerControlCode::BoolFalse => Ok(Datum::Bool(false)), + DeserializerControlCode::BoolTrue => Ok(Datum::Bool(true)), + + DeserializerControlCode::EmptyCons if self.input.len() - start >= 1 => + Ok(Datum::Cons(Cons(None, None))), + + DeserializerControlCode::Char if self.input.len() - start >= 2 => + Ok(Datum::Char(self.input[start + 1])), + + DeserializerControlCode::String if self.input.len() - start >= (1 + US) => { + let len = usize::from_be_bytes(self.input[start + 1..(start + 1 + US)].try_into().unwrap()); + if len < 1 { + Ok(Datum::String(vec![])) + } else if self.input.len() - (start + 1 + US) < len { + Err("String vector backing is corrupted or truncated!") + } else { + Ok(Datum::String(self.input[(start + 1 + US).. (start + 1 + US + len)].to_vec())) + } + }, + + DeserializerControlCode::ByteVec if self.input.len() - start >= (1 + US) => { + let len = usize::from_be_bytes(self.input[start + 1..(start + 1 + US)].try_into().unwrap()); + if len < 1 { + Ok(Datum::ByteVector(vec![])) + } else if self.input.len() - (start + 1 + US) < len { + Err("ByteVector vector backing is corrupted or truncated!") + } else { + Ok(Datum::ByteVector(self.input[(start + 1 + US).. (start + 1 + US + len)].to_vec())) + } + }, + + DeserializerControlCode::Vector if self.input.len() >= (start + 1 + US) => { + let len = usize::from_be_bytes(self.input[start + 1..(start + 1 + US)].try_into().unwrap()); + if len < 1 { + Ok(Datum::Vector(vec![])) + } else { + let mut cursor: usize = 1 + US; + let mut ovec: Vec> = vec![]; + for _ in 0..len { + ovec.push(self.deserialize_datum(start + cursor)?.into()); + cursor += datum_byte_length(ovec.last().unwrap()); + } + Ok(Datum::Vector(ovec)) + } + }, + + DeserializerControlCode::LeftCons if self.input.len() - start >= 2 => + Ok(Datum::Cons(Cons(Some(self.deserialize_datum(start + 1)?.into()), None))), + DeserializerControlCode::RightCons if self.input.len() - start >= 2 => + Ok(Datum::Cons(Cons(None, Some(self.deserialize_datum(start + 1)?.into())))), + DeserializerControlCode::FullCons if self.input.len() - start >= 3 => { + let lop = self.deserialize_datum(start + 1)?; + let next = start + 1 + datum_byte_length(&lop); + let rop = self.deserialize_datum(next)?; + Ok(Datum::Cons(Cons(Some(lop.into()), Some(rop.into())))) + } + + _ => Err("Deserializer Control Code not valid in this context") + } + } +} + +fn datum_byte_length(input: &Datum) -> usize { + match input { + Datum::None => 0, + Datum::Bool(_) => 1, + Datum::Char(_) => 2, + // This will need to change with organelle + Datum::Number(n) => match n { + Number::Sym(_) => 1 as usize, + Number::Flt(_) => 1 + 8 as usize, + Number::Sci(_) => 1 + 4 + (isize::BITS / 8) as usize, + Number::Fra(_) => 1 + ((usize::BITS / 8) * 2) as usize, + }, + Datum::String(s) => 1 + US + s.len(), + Datum::ByteVector(s) => 1 + US + s.len(), + Datum::Vector(s) => { + let mut c = 1 + US; + for i in s.iter() { + c += datum_byte_length(i); + } + c + }, + Datum::Cons(c) => { + let mut size = 1; + c.0.as_ref().and_then(|x| { + size += datum_byte_length(x.deref()); + Some(()) + }); + + c.1.as_ref().and_then(|x| { + size += datum_byte_length(x.deref()); + Some(()) + }); + + size + }, + } +} + +pub fn serialize_datum(input: &Datum) -> Vec { + match input { + Datum::Number(n) => { + let mut out: Vec = vec![]; + match n { + Number::Sci(num) => { + out.push(DeserializerControlCode::SciNumber as u8); + for ele in num.0.to_be_bytes().iter() { + out.push(*ele); + } + for ele in num.1.to_be_bytes().iter() { + out.push(*ele); + } + out + }, + Number::Flt(num) => { + out.push(DeserializerControlCode::FltNumber as u8); + for ele in num.0.to_be_bytes().iter() { + out.push(*ele); + } + out + }, + Number::Fra(num) => { + out.push(DeserializerControlCode::FraNumber as u8); + for ele in num.0.to_be_bytes().iter() { + out.push(*ele); + } + for ele in num.1.to_be_bytes().iter() { + out.push(*ele); + } + out + }, + Number::Sym(num) => { + match num { + SymbolicNumber::Inf => out.push(DeserializerControlCode::SymInf as u8), + SymbolicNumber::NaN => out.push(DeserializerControlCode::SymNan as u8), + SymbolicNumber::NegInf => out.push(DeserializerControlCode::SymNegInf as u8), + SymbolicNumber::NegNan => out.push(DeserializerControlCode::SymNegNan as u8), + } + out + } + } + }, + Datum::Bool(b) if !*b => vec![DeserializerControlCode::BoolFalse as u8], + Datum::Bool(b) if *b => vec![DeserializerControlCode::BoolTrue as u8], + Datum::Bool(_) => panic!("rustc somehow has a third bool!"), + Datum::Cons(c) => { + if let Some(lop) = &c.0 { + if let Some(rop) = &c.1 { + let mut out = vec![DeserializerControlCode::FullCons as u8]; + out.append(&mut serialize_datum(&lop)); + out.append(&mut serialize_datum(&rop)); + out + } else { + let mut out = vec![DeserializerControlCode::LeftCons as u8]; + out.append(&mut serialize_datum(&lop)); + out + } + } else { + if let Some(rop) = &c.1 { + let mut out = vec![DeserializerControlCode::RightCons as u8]; + out.append(&mut serialize_datum(&rop)); + out + } else { + vec![DeserializerControlCode::EmptyCons as u8] + } + } + }, + Datum::Char(c) => vec![DeserializerControlCode::Char as u8, *c], + Datum::String(c) => { + let mut v = vec![DeserializerControlCode::String as u8]; + for i in c.len().to_be_bytes().iter() { + v.push(*i); + } + for i in c.iter() { + v.push(*i); + } + v + }, + Datum::ByteVector(c) => { + let mut v = vec![DeserializerControlCode::ByteVec as u8]; + for i in c.len().to_be_bytes().iter() { + v.push(*i); + } + for i in c.iter() { + v.push(*i); + } + v + }, + Datum::Vector(c) => { + let mut v = vec![DeserializerControlCode::Vector as u8]; + for i in c.len().to_be_bytes().iter() { + v.push(*i); + } + for i in c.iter() { + let b = serialize_datum(i); + for j in b.iter() { + v.push(*j); + } + } + v + }, + Datum::None => vec![], + } +} + + +#[cfg(test)] +mod tests { + use crate::instr; + use super::*; + + #[test] + fn test_operand_parse() { + let bad_addressing = + TryInto::::try_into(&[0x13, 0x39][..]); + assert_eq!(bad_addressing, Err("illegal addressing mode")); + + let truncated_address = + TryInto::::try_into(&[0xf1][..]); + assert_eq!(truncated_address, Err("truncated address data")); + + let usize_case = + TryInto::::try_into(&[Address::Stack.into(), + 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23][..]); + assert!(usize_case.is_ok()); + assert_eq!(usize_case.unwrap().0, Address::Stack); + + let register_operand = Operand(Address::Expr, 0); + let operand_byte_arr = + TryInto::>::try_into(register_operand.clone()); + assert!(operand_byte_arr.is_ok()); + let br = operand_byte_arr.unwrap(); + let operand_bytes = br.as_slice(); + assert_eq!(operand_bytes, &[0xf2][..]); + let operand_conv = + TryInto::::try_into(operand_bytes); + assert!(operand_conv.is_ok()); + assert_eq!(register_operand, operand_conv.unwrap()); + } + + #[test] + fn test_instruction_parse() { + let illegal_instruction = + TryInto::::try_into(&[0x88][..]); + assert_eq!(illegal_instruction, Err("illegal instruction")); + + let bad_operand = + TryInto::::try_into(&[instr::TRAP.0, 0xf1][..]); + assert_eq!(bad_operand, Err("truncated address data")); + + let need_more_opers = + TryInto::::try_into(&[instr::TRAP.0][..]); + assert_eq!(need_more_opers, Err("operand data truncated")); + + let no_operands = + TryInto::::try_into(&[instr::POP.0][..]); + assert!(no_operands.is_ok()); + let nop = no_operands.unwrap(); + assert_eq!(nop.0, instr::POP); + let nop_bytes = + TryInto::>::try_into(nop); + assert!(nop_bytes.is_ok()); + assert_eq!(nop_bytes.unwrap(), vec![instr::POP.0]); + + let one_operand = + TryInto::::try_into(&[instr::TRAP.0, 0xf3][..]); + assert!(one_operand.is_ok()); + let oe_oper = one_operand.unwrap(); + assert_eq!(oe_oper.0, instr::TRAP); + assert_eq!(oe_oper.1.len(), 1); + assert_eq!(oe_oper.1[0], Operand(Address::Oper1, 0)); + let oe_bytes = + TryInto::>::try_into(oe_oper); + assert!(oe_bytes.is_ok()); + assert_eq!(oe_bytes.unwrap(), vec![instr::TRAP.0, 0xf3]); + + let two_operands = + TryInto::::try_into(&[instr::LINK.0, 0xf3, 0xf4][..]); + assert!(two_operands.is_ok()); + let two_oper = two_operands.unwrap(); + assert_eq!(two_oper.0, instr::LINK); + assert_eq!(two_oper.1.len(), 2); + let two_bytes = + TryInto::>::try_into(two_oper.clone()); + assert!(two_bytes.is_ok()); + assert_eq!(two_bytes.unwrap(), vec![instr::LINK.0, 0xf3, 0xf4]); + assert_eq!(two_oper.1[0], Operand(Address::Oper1, 0)); + assert_eq!(two_oper.1[1], Operand(Address::Oper2, 0)); + } + + #[test] + fn test_program_parse() { + let bytes1 = [instr::LINK.0, 0xf3, 0xf4]; + let out1 = vec![Instruction(instr::LINK, + vec![Operand(Address::Oper1, 0), Operand(Address::Oper2, 0)])]; + let res1 = + TryInto::::try_into(&bytes1[..]); + assert!(res1.is_ok()); + assert_eq!(res1.unwrap().0, out1); + + let bytes2 = [ + instr::LINK.0, 0xf3, 0xf4, + instr::CLEAR.0, 0xf0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + ]; + let out2 = vec![ + Instruction(instr::LINK, vec![ + Operand(Address::Oper1, 0), + Operand(Address::Oper2, 0) + ]), + + Instruction(instr::CLEAR, vec![ + Operand(Address::Stack, 1) + ]) + ]; + let res2 = + TryInto::::try_into(&bytes2[..]); + assert!(res2.is_ok()); + assert_eq!(res2.unwrap().0, out2); + } + + #[test] + fn test_serializer_control_code_consistency() { + let mut input: u8 = 0x00; + loop { + if DeserializerControlCode::try_from(input) + .and_then(|x| Ok(assert!(x as u8 == input))) + .is_err() { + break; + } + + input += 1; + } + } + + #[test] + fn serialize_deserialize_number_tests() { + let cases = vec![ + "2/3".parse::().unwrap(), + "-4/5".parse::().unwrap(), + "2e45".parse::().unwrap(), + "1.2432566".parse::().unwrap(), + "+inf.0".parse::().unwrap(), + ]; + + for i in cases.iter() { + let j = Deserializer{input: &serialize_datum(&Datum::Number(*i))}; + assert_eq!(*i, j.deserialize_number(0).unwrap()); + } + } + + #[test] + fn serialize_deserialize_cons_tests() { + let cases = vec![ + Datum::Cons(Cons(Some(Datum::Bool(true).into()), Some(Datum::Bool(false).into()))), + Datum::Cons(Cons(None, Some(Datum::Bool(true).into()))), + Datum::Cons(Cons(Some(Datum::Bool(true).into()), None)), + Datum::Cons(Cons(None, None)), + Datum::Cons(Cons(Some(Datum::Cons(Cons(None, Some(Datum::Bool(false).into()))).into()), None)), + ]; + + for i in cases.iter() { + let j = Deserializer{input: &serialize_datum(i)}; + assert_eq!(*i, j.deserialize_datum(0).unwrap()); + } + } + + #[test] + fn serialize_deserialize_vector_tests() { + let cases = vec![ + Datum::Vector(vec![Datum::Bool(true).into(), Datum::Bool(true).into(), Datum::Bool(false).into()]), + Datum::Vector(vec![]), + Datum::Vector(vec![Datum::Vector(vec![Datum::Bool(true).into()]).into(), Datum::Bool(false).into()]), + ]; + + for i in cases.iter() { + let j = Deserializer{input: &serialize_datum(i)}; + assert_eq!(*i, j.deserialize_datum(0).unwrap()); + } + } +} diff --git a/hyphae/src/util.rs b/hyphae/src/util.rs deleted file mode 100644 index e699424..0000000 --- a/hyphae/src/util.rs +++ /dev/null @@ -1,309 +0,0 @@ -/* Mycelium Scheme - * Copyright (C) 2025 Ava Affine - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -use crate::instr::Operation; - -use alloc::vec::Vec; -use alloc::vec; - -use core::ops::Index; -use core::mem::transmute; - -#[repr(u8)] -#[derive(Debug, Clone, PartialEq)] -pub enum Address { - Stack = 0xf0, // immutable access only - Instr = 0xf1, // immutable access only - Expr = 0xf2, // mutable access allowed - Oper1 = 0xf3, // mutable access allowed - Oper2 = 0xf4, // mutable access allowed - Oper3 = 0xf5, // mutable access allowed - Oper4 = 0xf6, // mutable access allowed - Numer = 0xf8, // immutable access only - Bool = 0xf9, // immutable access only - Char = 0xfa, // immutable access only -} - -#[derive(Debug, Clone, PartialEq)] -pub struct Operand(pub Address, pub usize); - -#[derive(Debug, Clone, PartialEq)] -pub struct Instruction(pub Operation, pub Vec); - -#[derive(Debug, Clone, PartialEq)] -pub struct Program(pub Vec); - -impl Into for Address { - fn into(self) -> u8 { - unsafe { transmute::(self) } - } -} - -impl TryFrom for Address { - type Error = &'static str; - fn try_from(val: u8) -> Result { - match val { - _ if val == Address::Stack as u8 => Ok(Address::Stack), - _ if val == Address::Instr as u8 => Ok(Address::Instr), - _ if val == Address::Expr as u8 => Ok(Address::Expr), - _ if val == Address::Oper1 as u8 => Ok(Address::Oper1), - _ if val == Address::Oper2 as u8 => Ok(Address::Oper2), - _ if val == Address::Oper3 as u8 => Ok(Address::Oper3), - _ if val == Address::Oper4 as u8 => Ok(Address::Oper4), - _ if val == Address::Numer as u8 => Ok(Address::Numer), - _ if val == Address::Bool as u8 => Ok(Address::Bool), - _ if val == Address::Char as u8 => Ok(Address::Char), - _ => Err("illegal addressing mode") - } - } -} - -impl Address { - fn operand_size(&self) -> u8 { - match self { - Address::Stack => (usize::BITS / 8) as u8, - Address::Instr => (usize::BITS / 8) as u8, - Address::Numer => (usize::BITS / 8) as u8, - Address::Bool => 1, - Address::Char => 1, - _ => 0, - } - } -} - -impl TryFrom<&[u8]> for Operand { - type Error = &'static str; - fn try_from(value: &[u8]) -> Result { - let addr_mode: Address = value[0].try_into()?; - let operand_size = addr_mode.operand_size(); - if value.len() < (operand_size + 1).into() { - return Err("truncated address data") - } - - let mut operand_bytes: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; - for (&src, dest) in value[1..(1+operand_size) as usize] - .iter() - .zip(operand_bytes.iter_mut()) { - *dest = src; - } - - Ok(Operand(addr_mode, usize::from_ne_bytes(operand_bytes))) - } -} - -impl Into> for Operand { - fn into(self) -> Vec { - let mut res = vec![]; - res.push(self.0.clone() as u8); - res.append(&mut self.1.to_ne_bytes()[..self.0.operand_size() as usize].to_vec()); - res - } -} - -impl Operand { - fn byte_length(&self) -> u8 { - 1 + self.0.operand_size() - } -} - -impl TryFrom<&[u8]> for Instruction { - type Error = &'static str; - fn try_from(value: &[u8]) -> Result { - let operation: Operation = value[0].try_into()?; - let mut operands: Vec = vec![]; - - let mut cur = 1; - for _ in 0..operation.num_args()? { - if cur >= value.len() { - return Err("operand data truncated") - } - let operand: Operand = value[cur..].try_into()?; - cur += operand.byte_length() as usize; - operands.push(operand); - } - - Ok(Instruction(operation, operands)) - } -} - -impl Into> for Instruction { - fn into(self) -> Vec { - let mut res = vec![]; - res.push(self.0.0); - for op in self.1 { - res.append(&mut op.into()) - } - res - } -} - -impl Instruction { - fn byte_length(&self) -> u8 { - self.1.iter() - .fold(0, |total, oper| - total + oper.byte_length()) + 1 - } -} - -impl TryFrom<&[u8]> for Program { - type Error = &'static str; - fn try_from(value: &[u8]) -> Result { - let mut prog: Vec = vec![]; - let mut cur = 0; - - while cur < value.len() { - let instruction: Instruction = value[cur..].try_into()?; - cur += instruction.byte_length() as usize; - prog.push(instruction); - } - - Ok(Program(prog)) - } -} - -impl Into> for Program { - fn into(self) -> Vec { - let mut res: Vec = vec![]; - for instr in self.0 { - res.append(&mut instr.into()) - } - res - } -} - -impl<'a> Index for Program { - type Output = Instruction; - fn index(&self, index: usize) -> &Instruction { - self.0.get(index).expect("access to out of bounds instruction in vm") - } -} - - -#[cfg(test)] -mod tests { - use crate::instr; - use super::*; - - #[test] - fn test_operand_parse() { - let bad_addressing = - TryInto::::try_into(&[0x13, 0x39][..]); - assert_eq!(bad_addressing, Err("illegal addressing mode")); - - let truncated_address = - TryInto::::try_into(&[0xf1][..]); - assert_eq!(truncated_address, Err("truncated address data")); - - let usize_case = - TryInto::::try_into(&[Address::Stack.into(), - 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23][..]); - assert!(usize_case.is_ok()); - assert_eq!(usize_case.unwrap().0, Address::Stack); - - let register_operand = Operand(Address::Expr, 0); - let operand_byte_arr = - TryInto::>::try_into(register_operand.clone()); - assert!(operand_byte_arr.is_ok()); - let br = operand_byte_arr.unwrap(); - let operand_bytes = br.as_slice(); - assert_eq!(operand_bytes, &[0xf2][..]); - let operand_conv = - TryInto::::try_into(operand_bytes); - assert!(operand_conv.is_ok()); - assert_eq!(register_operand, operand_conv.unwrap()); - } - - #[test] - fn test_instruction_parse() { - let illegal_instruction = - TryInto::::try_into(&[0x88][..]); - assert_eq!(illegal_instruction, Err("illegal instruction")); - - let bad_operand = - TryInto::::try_into(&[instr::TRAP.0, 0xf1][..]); - assert_eq!(bad_operand, Err("truncated address data")); - - let need_more_opers = - TryInto::::try_into(&[instr::TRAP.0][..]); - assert_eq!(need_more_opers, Err("operand data truncated")); - - let no_operands = - TryInto::::try_into(&[instr::POP.0][..]); - assert!(no_operands.is_ok()); - let nop = no_operands.unwrap(); - assert_eq!(nop.0, instr::POP); - let nop_bytes = - TryInto::>::try_into(nop); - assert!(nop_bytes.is_ok()); - assert_eq!(nop_bytes.unwrap(), vec![instr::POP.0]); - - let one_operand = - TryInto::::try_into(&[instr::TRAP.0, 0xf3][..]); - assert!(one_operand.is_ok()); - let oe_oper = one_operand.unwrap(); - assert_eq!(oe_oper.0, instr::TRAP); - assert_eq!(oe_oper.1.len(), 1); - assert_eq!(oe_oper.1[0], Operand(Address::Oper1, 0)); - let oe_bytes = - TryInto::>::try_into(oe_oper); - assert!(oe_bytes.is_ok()); - assert_eq!(oe_bytes.unwrap(), vec![instr::TRAP.0, 0xf3]); - - let two_operands = - TryInto::::try_into(&[instr::LINK.0, 0xf3, 0xf4][..]); - assert!(two_operands.is_ok()); - let two_oper = two_operands.unwrap(); - assert_eq!(two_oper.0, instr::LINK); - assert_eq!(two_oper.1.len(), 2); - let two_bytes = - TryInto::>::try_into(two_oper.clone()); - assert!(two_bytes.is_ok()); - assert_eq!(two_bytes.unwrap(), vec![instr::LINK.0, 0xf3, 0xf4]); - assert_eq!(two_oper.1[0], Operand(Address::Oper1, 0)); - assert_eq!(two_oper.1[1], Operand(Address::Oper2, 0)); - } - - #[test] - fn test_program_parse() { - let bytes1 = [instr::LINK.0, 0xf3, 0xf4]; - let out1 = vec![Instruction(instr::LINK, - vec![Operand(Address::Oper1, 0), Operand(Address::Oper2, 0)])]; - let res1 = - TryInto::::try_into(&bytes1[..]); - assert!(res1.is_ok()); - assert_eq!(res1.unwrap().0, out1); - - let bytes2 = [ - instr::LINK.0, 0xf3, 0xf4, - instr::CLEAR.0, 0xf0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - ]; - let out2 = vec![ - Instruction(instr::LINK, vec![ - Operand(Address::Oper1, 0), - Operand(Address::Oper2, 0) - ]), - - Instruction(instr::CLEAR, vec![ - Operand(Address::Stack, 1) - ]) - ]; - let res2 = - TryInto::::try_into(&bytes2[..]); - assert!(res2.is_ok()); - assert_eq!(res2.unwrap().0, out2); - } -} diff --git a/hyphae/src/vm.rs b/hyphae/src/vm.rs index 7c247fe..9d6a9be 100644 --- a/hyphae/src/vm.rs +++ b/hyphae/src/vm.rs @@ -21,7 +21,7 @@ use organelle::{Fraction, Number, Numeric}; use crate::hmap::QuickMap; use crate::stackstack::StackStack; use crate::instr as i; -use crate::util::{Operand, Program, Address}; +use crate::serializer::{Operand, Program, Address}; use crate::heap::{Gc, Datum, Cons}; use core::ops::DerefMut; @@ -88,7 +88,7 @@ impl VM { .with_stack(stack) .with_symbols(syms) .with_traps(traps) - .to_owned() // not efficient, but we are not executing + .to_owned() // not efficient } pub fn with_stack( @@ -616,7 +616,7 @@ impl VM { mod tests { use super::*; use crate::instr as i; - use crate::util::{Program, Instruction, Operand}; + use crate::serializer::{Program, Instruction, Operand}; use core::ops::Deref; use organelle::Float; diff --git a/readme.md b/readme.md index 1b7ddb4..576233d 100644 --- a/readme.md +++ b/readme.md @@ -8,9 +8,28 @@ project: a POSIX shell interpreter as well as a compiled to bytecode language fo running on ESP32 devices. ## Current Status -Currently the lexer and parser are implemented. On an X86 machine equipped with 64GB -RAM and an AMD Ryzen 7900 CPU this lexer and parser are capable of creating a fully +The lexer and parser are implemented. On an X86 machine equipped with 64GB RAM +and an AMD Ryzen 7900 CPU this lexer and parser are capable of creating a fully validated abstract syntax tree from approximately 11200 lines of handwritten scheme in about 55 milliseconds on average. -Currently the bytecode VM and its instruction set are next to implement. + +HyphaeVM is mostly implemented. The instruction set is defined and implemented, +including extensibility interfaces and the VM layout. Additionally, instruction +encoding and decoding are implemented. Garbage collection is implemented (via +reference counting). Currently being implemented are datum encoding/decoding and +full program encoding/decoding. Yet to be approached are debugging routines, CLI +utilities, and concurrency features. However, Documentation has been written on +programming with HyphaeVM. + + +The R7RS-Small Scheme to HyphaeVM compiler is not implemented. + + +R7RS-Large is not implemented. + + +The Linux/Mac/Windows runtime and extended compiler is not implemented. + + +Documentation is not implemented.