serialization/deserialization of datum in VM
All checks were successful
per-push tests / build (push) Successful in 1m34s
per-push tests / test-frontend (push) Successful in 41s
per-push tests / test-utility (push) Successful in 45s
per-push tests / test-backend (push) Successful in 44s
per-push tests / timed-decomposer-parse (push) Successful in 50s
All checks were successful
per-push tests / build (push) Successful in 1m34s
per-push tests / test-frontend (push) Successful in 41s
per-push tests / test-utility (push) Successful in 45s
per-push tests / test-backend (push) Successful in 44s
per-push tests / timed-decomposer-parse (push) Successful in 50s
This commit adds logic to serialize and deserialize datum, as well as the start of some total binary format. It implements serialize and deserialize routines per datum type. Tests are included for comples cases. Similar code existed in the organelle package which was then centralized here. Additionally: this commit makes release target binaries smaller and faster Signed-off-by: Ava Affine <ava@sunnypup.io>
This commit is contained in:
parent
0f85292e6f
commit
389bf6e9a0
6 changed files with 471 additions and 84 deletions
|
|
@ -15,15 +15,20 @@
|
|||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
use crate::serializer::DeserializerControlCode;
|
||||
|
||||
use core::ops::{Index, Deref, DerefMut};
|
||||
use core::ptr::NonNull;
|
||||
|
||||
use alloc::{vec, vec::Vec};
|
||||
use alloc::rc::Rc;
|
||||
use alloc::vec::Vec;
|
||||
use alloc::boxed::Box;
|
||||
use alloc::fmt::Debug;
|
||||
|
||||
use organelle::Number;
|
||||
use organelle::{Number, Fraction, SymbolicNumber, Float, ScientificNotation};
|
||||
|
||||
const US: usize = (usize::BITS / 8) as usize;
|
||||
const IS: usize = (isize::BITS / 8) as usize;
|
||||
|
||||
/* NOTE
|
||||
* decided not to implement a cache or a singleton heap manager
|
||||
|
|
@ -141,40 +146,6 @@ impl<T: Clone> Gc<T> {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub enum Datum {
|
||||
Number(Number),
|
||||
Bool(bool),
|
||||
Cons(Cons),
|
||||
Char(u8),
|
||||
String(Vec<u8>),
|
||||
Vector(Vec<Gc<Datum>>),
|
||||
ByteVector(Vec<u8>),
|
||||
None
|
||||
}
|
||||
|
||||
// implemented by hand to force deep copy on Cons datum
|
||||
impl Clone for Datum {
|
||||
fn clone(&self) -> Datum {
|
||||
match self {
|
||||
Datum::Number(n) => Datum::Number(n.clone()),
|
||||
Datum::Bool(n) => Datum::Bool(n.clone()),
|
||||
Datum::Cons(n) => Datum::Cons(n.deep_copy()),
|
||||
Datum::Char(n) => Datum::Char(n.clone()),
|
||||
Datum::String(n) => Datum::String(n.clone()),
|
||||
Datum::Vector(n) =>
|
||||
Datum::Vector(n.clone()),
|
||||
Datum::ByteVector(n) =>
|
||||
Datum::ByteVector(n.clone()),
|
||||
Datum::None => Datum::None,
|
||||
}
|
||||
}
|
||||
|
||||
fn clone_from(&mut self, source: &Self) {
|
||||
*self = source.clone();
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Debug)]
|
||||
pub struct Cons(pub Option<Gc<Datum>>, pub Option<Gc<Datum>>);
|
||||
|
||||
|
|
@ -293,6 +264,260 @@ impl Index<usize> for Cons {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub enum Datum {
|
||||
Number(Number),
|
||||
Bool(bool),
|
||||
Cons(Cons),
|
||||
Char(u8),
|
||||
String(Vec<u8>),
|
||||
Vector(Vec<Gc<Datum>>),
|
||||
ByteVector(Vec<u8>),
|
||||
None
|
||||
}
|
||||
|
||||
// implemented by hand to force deep copy on Cons datum
|
||||
impl Clone for Datum {
|
||||
fn clone(&self) -> Datum {
|
||||
match self {
|
||||
Datum::Number(n) => Datum::Number(n.clone()),
|
||||
Datum::Bool(n) => Datum::Bool(n.clone()),
|
||||
Datum::Cons(n) => Datum::Cons(n.deep_copy()),
|
||||
Datum::Char(n) => Datum::Char(n.clone()),
|
||||
Datum::String(n) => Datum::String(n.clone()),
|
||||
Datum::Vector(n) =>
|
||||
Datum::Vector(n.clone()),
|
||||
Datum::ByteVector(n) =>
|
||||
Datum::ByteVector(n.clone()),
|
||||
Datum::None => Datum::None,
|
||||
}
|
||||
}
|
||||
|
||||
fn clone_from(&mut self, source: &Self) {
|
||||
*self = source.clone();
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Vec<u8>> for Datum {
|
||||
fn into(self) -> Vec<u8> {
|
||||
match self {
|
||||
Datum::Number(n) => {
|
||||
let mut out: Vec<u8> = vec![];
|
||||
match n {
|
||||
Number::Sci(num) => {
|
||||
out.push(DeserializerControlCode::SciNumber as u8);
|
||||
for ele in num.0.to_be_bytes().iter() {
|
||||
out.push(*ele);
|
||||
}
|
||||
for ele in num.1.to_be_bytes().iter() {
|
||||
out.push(*ele);
|
||||
}
|
||||
out
|
||||
},
|
||||
Number::Flt(num) => {
|
||||
out.push(DeserializerControlCode::FltNumber as u8);
|
||||
for ele in num.0.to_be_bytes().iter() {
|
||||
out.push(*ele);
|
||||
}
|
||||
out
|
||||
},
|
||||
Number::Fra(num) => {
|
||||
out.push(DeserializerControlCode::FraNumber as u8);
|
||||
for ele in num.0.to_be_bytes().iter() {
|
||||
out.push(*ele);
|
||||
}
|
||||
for ele in num.1.to_be_bytes().iter() {
|
||||
out.push(*ele);
|
||||
}
|
||||
out
|
||||
},
|
||||
Number::Sym(num) => {
|
||||
match num {
|
||||
SymbolicNumber::Inf => out.push(DeserializerControlCode::SymInf as u8),
|
||||
SymbolicNumber::NaN => out.push(DeserializerControlCode::SymNan as u8),
|
||||
SymbolicNumber::NegInf => out.push(DeserializerControlCode::SymNegInf as u8),
|
||||
SymbolicNumber::NegNan => out.push(DeserializerControlCode::SymNegNan as u8),
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
},
|
||||
Datum::Bool(b) if !b => vec![DeserializerControlCode::BoolFalse as u8],
|
||||
Datum::Bool(b) if b => vec![DeserializerControlCode::BoolTrue as u8],
|
||||
Datum::Bool(_) => panic!("rustc somehow has a third bool!"),
|
||||
Datum::Cons(c) => {
|
||||
if let Some(lop) = &c.0 {
|
||||
if let Some(rop) = &c.1 {
|
||||
let mut out = vec![DeserializerControlCode::FullCons as u8];
|
||||
out.append(&mut (*lop.deref()).clone().into());
|
||||
out.append(&mut (*rop.deref()).clone().into());
|
||||
out
|
||||
} else {
|
||||
let mut out = vec![DeserializerControlCode::LeftCons as u8];
|
||||
out.append(&mut (*lop.deref()).clone().into());
|
||||
out
|
||||
}
|
||||
} else {
|
||||
if let Some(rop) = &c.1 {
|
||||
let mut out = vec![DeserializerControlCode::RightCons as u8];
|
||||
out.append(&mut (*rop.deref()).clone().into());
|
||||
out
|
||||
} else {
|
||||
vec![DeserializerControlCode::EmptyCons as u8]
|
||||
}
|
||||
}
|
||||
},
|
||||
Datum::Char(c) => vec![DeserializerControlCode::Char as u8, c],
|
||||
Datum::String(c) => {
|
||||
let mut v = vec![DeserializerControlCode::String as u8];
|
||||
v.append(&mut c.len().to_be_bytes().to_vec());
|
||||
v.append(&mut c.clone());
|
||||
v
|
||||
},
|
||||
Datum::ByteVector(c) => {
|
||||
let mut v = vec![DeserializerControlCode::ByteVec as u8];
|
||||
v.append(&mut c.len().to_be_bytes().to_vec());
|
||||
v.append(&mut c.clone());
|
||||
v
|
||||
},
|
||||
Datum::Vector(c) => {
|
||||
let mut v = vec![DeserializerControlCode::Vector as u8];
|
||||
v.append(&mut c.len().to_be_bytes().to_vec());
|
||||
c.iter().for_each(|i| v.append(&mut (*i.deref()).clone().into()));
|
||||
v
|
||||
},
|
||||
Datum::None => vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&[u8]> for Datum {
|
||||
type Error = &'static str;
|
||||
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
|
||||
match DeserializerControlCode::try_from(value[0])? {
|
||||
// this entire block goes away when we finish redoing organelle
|
||||
DeserializerControlCode::SymInf =>
|
||||
Ok(Datum::Number(Number::Sym(SymbolicNumber::Inf))),
|
||||
DeserializerControlCode::SymNan =>
|
||||
Ok(Datum::Number(Number::Sym(SymbolicNumber::NaN))),
|
||||
DeserializerControlCode::SymNegInf =>
|
||||
Ok(Datum::Number(Number::Sym(SymbolicNumber::NegInf))),
|
||||
DeserializerControlCode::SymNegNan =>
|
||||
Ok(Datum::Number(Number::Sym(SymbolicNumber::NegNan))),
|
||||
DeserializerControlCode::SciNumber if value.len() >= 1 + 4 + IS => {
|
||||
let i = f32::from_be_bytes(value[1..5].try_into().unwrap());
|
||||
let j = isize::from_be_bytes(value[5..(5 + IS)].try_into().unwrap());
|
||||
Ok(Datum::Number(Number::Sci(ScientificNotation(i, j))))
|
||||
},
|
||||
DeserializerControlCode::FltNumber if value.len() >= 9 => {
|
||||
let i = f64::from_be_bytes(value[1..9].try_into().unwrap());
|
||||
Ok(Datum::Number(Number::Flt(Float(i))))
|
||||
},
|
||||
DeserializerControlCode::FraNumber if value.len() >= 1 + (IS * 2) => {
|
||||
let i = isize::from_be_bytes(value[1..(1 + IS)].try_into().unwrap());
|
||||
let j = isize::from_be_bytes(value[(1 + IS)..(1 + IS + IS)].try_into().unwrap());
|
||||
Ok(Datum::Number(Number::Fra(Fraction(i, j))))
|
||||
},
|
||||
|
||||
DeserializerControlCode::BoolFalse => Ok(Datum::Bool(false)),
|
||||
DeserializerControlCode::BoolTrue => Ok(Datum::Bool(true)),
|
||||
|
||||
DeserializerControlCode::EmptyCons if value.len() >= 1 =>
|
||||
Ok(Datum::Cons(Cons(None, None))),
|
||||
|
||||
DeserializerControlCode::Char if value.len() >= 2 =>
|
||||
Ok(Datum::Char(value[1])),
|
||||
|
||||
DeserializerControlCode::String if value.len() >= 1 + US => {
|
||||
let len = usize::from_be_bytes(value[1..(1 + US)].try_into().unwrap());
|
||||
if len < 1 {
|
||||
Ok(Datum::String(vec![]))
|
||||
} else if value.len() - (1 + US) < len {
|
||||
Err("String vector backing is corrupted or truncated!")
|
||||
} else {
|
||||
Ok(Datum::String(value[(1 + US)..(1 + US + len)].to_vec()))
|
||||
}
|
||||
},
|
||||
|
||||
DeserializerControlCode::ByteVec if value.len() >= 1 + US => {
|
||||
let len = usize::from_be_bytes(value[1..(1 + US)].try_into().unwrap());
|
||||
if len < 1 {
|
||||
Ok(Datum::ByteVector(vec![]))
|
||||
} else if value.len() - (1 + US) < len {
|
||||
Err("ByteVector vector backing is corrupted or truncated!")
|
||||
} else {
|
||||
Ok(Datum::ByteVector(value[(1 + US)..(1 + US + len)].to_vec()))
|
||||
}
|
||||
},
|
||||
|
||||
DeserializerControlCode::Vector if value.len() >= 1 + US => {
|
||||
let len = usize::from_be_bytes(value[1..(1 + US)].try_into().unwrap());
|
||||
if len < 1 {
|
||||
Ok(Datum::Vector(vec![]))
|
||||
} else {
|
||||
let mut cursor: usize = 1 + US;
|
||||
let mut ovec: Vec<Gc<Datum>> = vec![];
|
||||
for _ in 0..len {
|
||||
ovec.push(Datum::try_from(&value[cursor..])?.into());
|
||||
cursor += ovec.last().unwrap().byte_length();
|
||||
}
|
||||
Ok(Datum::Vector(ovec))
|
||||
}
|
||||
},
|
||||
|
||||
DeserializerControlCode::LeftCons if value.len() >= 2 =>
|
||||
Ok(Datum::Cons(Cons(Some(Datum::try_from(&value[1..])?.into()), None))),
|
||||
DeserializerControlCode::RightCons if value.len() >= 2 =>
|
||||
Ok(Datum::Cons(Cons(None, Some(Datum::try_from(&value[1..])?.into())))),
|
||||
DeserializerControlCode::FullCons if value.len() >= 3 => {
|
||||
let lop = Datum::try_from(&value[1..])?;
|
||||
let next = 1 + lop.byte_length();
|
||||
let rop = Datum::try_from(&value[next..])?;
|
||||
Ok(Datum::Cons(Cons(Some(lop.into()), Some(rop.into()))))
|
||||
}
|
||||
|
||||
_ => Err("Deserializer Control Code not valid in this context")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Datum {
|
||||
pub fn byte_length(&self) -> usize {
|
||||
match self {
|
||||
Datum::None => 0,
|
||||
Datum::Bool(_) => 1,
|
||||
Datum::Char(_) => 2,
|
||||
// This will need to change with organelle
|
||||
Datum::Number(n) => match n {
|
||||
Number::Sym(_) => 1 as usize,
|
||||
Number::Flt(_) => 1 + 8 as usize,
|
||||
Number::Sci(_) => 1 + 4 + (isize::BITS / 8) as usize,
|
||||
Number::Fra(_) => 1 + ((usize::BITS / 8) * 2) as usize,
|
||||
},
|
||||
Datum::String(s) => 1 + US + s.len(),
|
||||
Datum::ByteVector(s) => 1 + US + s.len(),
|
||||
Datum::Vector(s) => {
|
||||
let mut c = 1 + US;
|
||||
for i in s.iter() {
|
||||
c += i.byte_length();
|
||||
}
|
||||
c
|
||||
},
|
||||
Datum::Cons(c) => {
|
||||
let mut size = 1;
|
||||
c.0.as_ref().and_then(|x| {
|
||||
size += x.byte_length();
|
||||
Some(())
|
||||
});
|
||||
c.1.as_ref().and_then(|x| {
|
||||
size += x.byte_length();
|
||||
Some(())
|
||||
});
|
||||
size
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
|
@ -362,4 +587,28 @@ mod tests {
|
|||
drop(reference_holder);
|
||||
assert!(!*(*copied_data).0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_deserialize_datum_tests() {
|
||||
let cases = vec![
|
||||
Datum::Number("2/3".parse::<Number>().unwrap()),
|
||||
Datum::Number("-4/5".parse::<Number>().unwrap()),
|
||||
Datum::Number("2e45".parse::<Number>().unwrap()),
|
||||
Datum::Number("1.2432566".parse::<Number>().unwrap()),
|
||||
Datum::Number("+inf.0".parse::<Number>().unwrap()),
|
||||
Datum::Cons(Cons(Some(Datum::Bool(true).into()), Some(Datum::Bool(false).into()))),
|
||||
Datum::Cons(Cons(None, Some(Datum::Bool(true).into()))),
|
||||
Datum::Cons(Cons(Some(Datum::Bool(true).into()), None)),
|
||||
Datum::Cons(Cons(None, None)),
|
||||
Datum::Cons(Cons(Some(Datum::Cons(Cons(None, Some(Datum::Bool(false).into()))).into()), None)),
|
||||
Datum::Vector(vec![Datum::Bool(true).into(), Datum::Bool(true).into(), Datum::Bool(false).into()]),
|
||||
Datum::Vector(vec![]),
|
||||
Datum::Vector(vec![Datum::Vector(vec![Datum::Bool(true).into()]).into(), Datum::Bool(false).into()]),
|
||||
];
|
||||
|
||||
for i in cases.iter() {
|
||||
let j: Vec<u8> = i.clone().into();
|
||||
assert_eq!(*i, Datum::try_from(j.as_slice()).unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue