diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d6d19ac..181d394 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -27,7 +27,7 @@ unit-test-parser: - cargo test parser unit-test-number-package: - stage: test-backend + stage: test-frontend script: - cargo test number @@ -40,3 +40,8 @@ unit-test-quickmap: stage: test-backend script: - cargo test hmap + +unit-test-instruction-decoding: + stage: test-backend + script: + - cargo test util diff --git a/Cargo.lock b/Cargo.lock index e800b30..97860ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -112,18 +112,56 @@ dependencies = [ "mycelium", ] +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "hashbrown" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hyphae" +version = "0.1.0" +dependencies = [ + "mycelium", + "num", + "serde", + "toml", +] + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + [[package]] name = "mycelium" version = "0.1.0" @@ -228,6 +266,35 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + [[package]] name = "strsim" version = "0.11.1" @@ -245,6 +312,47 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + [[package]] name = "unicode-ident" version = "1.0.18" @@ -329,3 +437,12 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd" +dependencies = [ + "memchr", +] diff --git a/Cargo.toml b/Cargo.toml index 3d4d48c..43867c5 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,3 @@ [workspace] resolver = "2" -members = ["mycelium", "decomposer"] +members = ["mycelium", "decomposer", "hyphae"] diff --git a/hyphae/Cargo.toml b/hyphae/Cargo.toml new file mode 100644 index 0000000..cb867ab --- /dev/null +++ b/hyphae/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "hyphae" +version = "0.1.0" +edition = "2024" + +[dependencies] +mycelium = { path = "../mycelium" } +num = { version = "0.4.3", features = ["alloc"] } + +[build-dependencies] +serde = { version = "1.0", features = ["alloc", "derive"] } +toml = "0.8.23" diff --git a/hyphae/build.rs b/hyphae/build.rs new file mode 100644 index 0000000..0255141 --- /dev/null +++ b/hyphae/build.rs @@ -0,0 +1,109 @@ +use std::{env, fs}; +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::Path; +use serde::Deserialize; + +#[derive(Deserialize)] +struct Document { + pub instructions: Vec, +} + +// dont warn about unused fields in json instruction struct +#[allow(dead_code)] +#[derive(Deserialize)] +struct Instruction { + pub name: String, + pub args: Vec, + pub output: String, + pub description: String, +} + +fn main() { + let output_path = Path::new(&env::var("OUT_DIR").unwrap()) + .join("hyphae_instr.rs"); + let input = fs::read_to_string("instructions.toml") + .unwrap(); + let mut output_file = + BufWriter::new(File::create(&output_path).unwrap()); + + let instruction_table: Document = + toml::from_str(&input) + .expect("hyphae: failed to parse instructions.toml"); + + let mut isa = "#[repr(transparent)]\n".to_owned(); + isa += "#[derive(Clone, Debug, PartialEq)]\n"; + isa += "pub struct Operation(pub u8);\n\n"; + + let mut isa_from_byte = "impl TryFrom for Operation {\n".to_owned(); + isa_from_byte += " type Error = &'static str;\n"; + isa_from_byte += " fn try_from(v: u8) -> Result {\n"; + isa_from_byte += " match v {\n"; + + + let mut isa_fromstr = "impl FromStr for Operation {\n".to_owned(); + isa_fromstr += " type Err = &'static str;\n"; + isa_fromstr += " fn from_str(v: &str) -> Result {\n"; + isa_fromstr += " match v {\n"; + + let mut isa_from_str = "impl TryFrom<&str> for Operation {\n".to_owned(); + isa_from_str += " type Error = &'static str;\n"; + isa_from_str += " fn try_from(v: &str) -> Result {\n"; + isa_from_str += " match v {\n"; + + let mut isa_num_args = "impl Operation {\n".to_owned(); + isa_num_args += " pub fn num_args(&self) -> Result {\n"; + isa_num_args += " match self.0 {\n"; + + instruction_table.instructions.iter() + .enumerate() + .for_each(|(idx, instr)| { + let const_name = instr.name.to_ascii_uppercase(); + + isa += format!("pub const {}: Operation = Operation({});\n", + const_name, idx).as_str(); + + isa_from_byte += format!(" {} => Ok({}),\n", idx, const_name) + .as_str(); + + isa_from_str += format!(" \"{}\" => Ok({}),\n", + const_name, const_name).as_str(); + + isa_fromstr += format!(" \"{}\" => Ok({}),\n", + const_name, const_name).as_str(); + + isa_num_args += format!(" {} => Ok({}),\n", idx, instr.args.len()) + .as_str() + }); + + isa_from_byte += " _ => Err(\"illegal instruction\"),\n"; + isa_from_byte += " }\n"; + isa_from_byte += " }\n"; + isa_from_byte += "}\n\n"; + + isa_from_str += " _ => Err(\"illegal instruction\"),\n"; + isa_from_str += " }\n"; + isa_from_str += " }\n"; + isa_from_str += "}\n\n"; + + isa_fromstr += " _ => Err(\"illegal instruction\"),\n"; + isa_fromstr += " }\n"; + isa_fromstr += " }\n"; + isa_fromstr += "}\n\n"; + + isa_num_args += " _ => Err(\"illegal instruction\"),\n"; + isa_num_args += " }\n"; + isa_num_args += " }\n"; + isa_num_args += "}\n\n"; + + isa += "\n"; + isa += isa_from_byte.as_str(); + isa += isa_from_str.as_str(); + isa += isa_fromstr.as_str(); + isa += isa_num_args.as_str(); + + write!(&mut output_file, "use core::str::FromStr;\n\n\n").unwrap(); + write!(&mut output_file, "{}", isa).unwrap(); + println!("cargo::rerun-if-changed=build.rs"); + println!("cargo::rerun-if-changed=instructions.json"); +} diff --git a/hyphae/instructions.toml b/hyphae/instructions.toml new file mode 100644 index 0000000..fa85567 --- /dev/null +++ b/hyphae/instructions.toml @@ -0,0 +1,303 @@ +# NOTE: keep libc out of this, thats what trap vector is for +# NOTE: to programmers: only registers allow mutable acess + +[[instructions]] +name = "trap" +args = ["index"] +output = "result of function" +description = "triggers callback in trap vector at index" + +[[instructions]] +name = "bind" +args = ["name", "operand"] +output = "" +description = "map name to operand in sym table." + +[[instructions]] +name = "unbind" +args = ["name"] +output = "" +description = "remove name mapping from sym table." + +[[instructions]] +name = "bound" +args = ["name"] +output = "expr = true if name is bound" +description = "test if a name is already bound" + +[[instructions]] +name = "push" +args = ["operand"] +output = "" +description = "pushes operand onto stack." + +[[instructions]] +name = "pop" +args = [] +output = "" +description = "removes element at top of stack." + +[[instructions]] +name = "enter" +args = [] +output = "" +description = "create new stack frame" + +[[instructions]] +name = "exit" +args = [] +output = "" +description = "delete current stack frame" + +[[instructions]] +name = "load" +args = ["src", "dest"] +output = "" +description = "copies src into dest" + +[[instructions]] +name = "clear" +args = ["dest"] +output = "" +description = "clears dest" + +[[instructions]] +name = "nop" +args = [] +output = "" +description = "no operation" + +[[instructions]] +name = "halt" +args = [] +output = "" +description = "halts the VM" + +[[instructions]] +name = "panic" +args = ["error"] +output = "" +description = "sets error state and halts VM" + +[[instructions]] +name = "jmp" +args = ["addr"] +output = "" +description = "sets ictr register to addr" + +[[instructions]] +name = "jmpif" +args = ["addr"] +output = "" +description = "if expr register holds true, sets ictr to addr" + +[[instructions]] +name = "eq" +args = ["a", "b"] +output = "a == b" +description = "equality test" + +[[instructions]] +name = "lt" +args = ["a", "b"] +output = "a < b" +description = "less than test" + +[[instructions]] +name = "gt" +args = ["a", "b"] +output = "a > b" +description = "greater than test" + +[[instructions]] +name = "lte" +args = ["a", "b"] +output = "a <= b" +description = "less than equals test" + +[[instructions]] +name = "gte" +args = ["a", "b"] +output = "a >= b" +description = "greater than equals test" + +[[instructions]] +name = "bool_not" +args = [] +output = "expr = !expr" +description = "boolean not" + +[[instructions]] +name = "bool_and" +args = ["a", "b"] +output = "a && b" +description = "boolean and" + +[[instructions]] +name = "bool_or" +args = ["a", "b"] +output = "a || b" +description = "boolean or" + +[[instructions]] +name = "byte_and" +args = ["a", "b"] +output = "a & b" +description = "bitwise and" + +[[instructions]] +name = "byte_or" +args = ["a", "b"] +output = "a | b" +description = "bitwise or" + +[[instructions]] +name = "xor" +args = ["a", "b"] +output = "a xor b" +description = "bitwise exclusive or" + +[[instructions]] +name = "byte_not" +args = [] +output = "expr = !expr" +description = "bitwise not" + +[[instructions]] +name = "add" +args = ["a", "b"] +output = "a + b" +description = "numeric addition" + +[[instructions]] +name = "sub" +args = ["a", "b"] +output = "a - b" +description = "numeric subtraction" + +[[instructions]] +name = "mul" +args = ["a", "b"] +output = "a * b" +description = "numeric multiplication" + +[[instructions]] +name = "fdiv" +args = ["a", "b"] +output = "a / b" +description = "numeric FLOAT division" + +[[instructions]] +name = "idiv" +args = ["a", "b"] +output = "a / b" +description = "numeric INTEGER division" + +[[instructions]] +name = "pow" +args = ["a", "b"] +output = "a ^ b" +description = "numeric operation to raise a to the power of b" + +[[instructions]] +name = "modulo" +args = ["a", "b"] +output = "a % b" +description = "numeric modulo operation" + +[[instructions]] +name = "rem" +args = ["a", "b"] +output = "remainder from a / b" +description = "remainder from integer division" + +[[instructions]] +name = "inc" +args = ["src"] +output = "" +description = "increments number at source" + +[[instructions]] +name = "dec" +args = ["src"] +output = "" +description = "decrements number at source" + +[[instructions]] +name = "cton" +args = ["src"] +output = "" +description = "mutates a char datum into a number datum" + +[[instructions]] +name = "ntoc" +args = ["src"] +output = "" +description = "mutates a number datum into a char datum" + +[[instructions]] +name = "ntoi" +args = ["src"] +output = "" +description = "mutates a number datum into its exact form" + +[[instructions]] +name = "ntoe" +args = ["src"] +output = "" +description = "mutates a number datum into its inexact form" + +[[instructions]] +name = "mkvec" +args = [] +output = "a blank vector" +description = "creates a new vector" + +[[instructions]] +name = "mkbvec" +args = [] +output = "a blank bytevector" +description = "creates a blank bytevector" + +[[instructions]] +name = "index" +args = ["collection", "index"] +output = "collection[index]" +description = "extracts element from collection at index" + +[[instructions]] +name = "length" +args = ["collection"] +output = "length of collection" +description = "calculates length of collection" + +[[instructions]] +name = "subsl" +args = ["collection", "start", "end"] +output = "collection[start:end]" +description = "returns a subset from collection denoted by start and end indexes" + +[[instructions]] +name = "inser" +args = ["collection", "elem", "idx"] +output = "" +description = "inserts an element at specified index into a collection" + +[[instructions]] +name = "cons" +args = ["left", "right"] +output = "resulting collection" +description = "either append right to left or make new list from both" + +[[instructions]] +name = "car" +args = ["list"] +output = "returns first element in cons cell" +description = "takes an AST and returns first element in top level cons cell" + +[[instructions]] +name = "cdr" +args = ["list"] +output = "returns last element in cons cell" +description = "takes an AST and returns last element in top level cons cell" + diff --git a/mycelium/src/hmap.rs b/hyphae/src/hmap.rs similarity index 97% rename from mycelium/src/hmap.rs rename to hyphae/src/hmap.rs index 6b417e2..2705cd1 100755 --- a/mycelium/src/hmap.rs +++ b/hyphae/src/hmap.rs @@ -44,7 +44,7 @@ const INDEXED_BUCKETS: u8 = 199; * or more likely rip and replace with a better nostd hashmap */ #[inline] -fn string_hash(input: &String) -> u8 { +fn string_hash(input: &str) -> u8 { input .chars() // each letter and number get a digit @@ -82,7 +82,7 @@ impl<'a, T: Clone> QuickMap { return None; } - pub fn remove(&mut self, arg: &String) -> Option { + pub fn remove(&mut self, arg: &str) -> Option { let idx = string_hash(&arg); let len = self.0[idx as usize].0.len(); for i in 0..len { @@ -97,7 +97,7 @@ impl<'a, T: Clone> QuickMap { return None; } - pub fn contains_key(&self, arg: &String) -> bool { + pub fn contains_key(&self, arg: &str) -> bool { let idx = string_hash(arg); for kv in self.0[idx as usize].0.iter() { if &kv.0 == arg { diff --git a/hyphae/src/instr.rs b/hyphae/src/instr.rs new file mode 100644 index 0000000..b5010a8 --- /dev/null +++ b/hyphae/src/instr.rs @@ -0,0 +1,19 @@ +/* Mycelium Scheme + * Copyright (C) 2025 Ava Affine + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +include!(concat!(env!("OUT_DIR"), "/hyphae_instr.rs")); + diff --git a/hyphae/src/lib.rs b/hyphae/src/lib.rs new file mode 100644 index 0000000..4de7909 --- /dev/null +++ b/hyphae/src/lib.rs @@ -0,0 +1,26 @@ +/* Mycelium Scheme + * Copyright (C) 2025 Ava Affine + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#![cfg_attr(not(test), no_std)] + +pub mod hmap; +pub mod stackstack; +pub mod instr; +pub mod vm; +pub mod util; + +extern crate alloc; diff --git a/mycelium/src/stackstack.rs b/hyphae/src/stackstack.rs similarity index 98% rename from mycelium/src/stackstack.rs rename to hyphae/src/stackstack.rs index c772f99..db1ab3d 100644 --- a/mycelium/src/stackstack.rs +++ b/hyphae/src/stackstack.rs @@ -83,10 +83,10 @@ impl Debug for StackStack { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { let mut ss_idx = 1; let mut ss_cur = &*self.0; - while let Some(ref inner) = ss_cur { + while let Some(inner) = ss_cur { write!(f, "Frame {ss_idx}:")?; let mut s_cur = &*inner.stack.0; - while let Some(ref node) = s_cur { + while let Some(node) = s_cur { write!(f, " {:#?}", node.data)?; s_cur = &*node.next.0; } diff --git a/hyphae/src/util.rs b/hyphae/src/util.rs new file mode 100644 index 0000000..de902bb --- /dev/null +++ b/hyphae/src/util.rs @@ -0,0 +1,303 @@ +/* Mycelium Scheme + * Copyright (C) 2025 Ava Affine + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +use crate::instr::Operation; + +use alloc::vec::Vec; +use alloc::vec; + +use core::ops::Index; +use core::mem::transmute; + +#[repr(u8)] +#[derive(Debug, Clone, PartialEq)] +pub enum Address { + Stack = 0xf0, // immutable access only + Instr = 0xf1, // immutable access only + Expr = 0xf2, // mutable access allowed + Oper1 = 0xf3, // mutable access allowed + Oper2 = 0xf4, // mutable access allowed + Oper3 = 0xf5, // mutable access allowed + Oper4 = 0xf6, // mutable access allowed + Numer = 0xf8, // immutable access only +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Operand(pub Address, pub usize); + +#[derive(Debug, Clone, PartialEq)] +pub struct Instruction(pub Operation, pub Vec); + +#[derive(Debug, Clone, PartialEq)] +pub struct Program(pub Vec); + +impl Into for Address { + fn into(self) -> u8 { + unsafe { transmute::(self) } + } +} + +impl TryFrom for Address { + type Error = &'static str; + fn try_from(val: u8) -> Result { + match val { + _ if val == Address::Stack as u8 => Ok(Address::Stack), + _ if val == Address::Instr as u8 => Ok(Address::Instr), + _ if val == Address::Expr as u8 => Ok(Address::Expr), + _ if val == Address::Oper1 as u8 => Ok(Address::Oper1), + _ if val == Address::Oper2 as u8 => Ok(Address::Oper2), + _ if val == Address::Oper3 as u8 => Ok(Address::Oper3), + _ if val == Address::Oper4 as u8 => Ok(Address::Oper4), + _ if val == Address::Numer as u8 => Ok(Address::Numer), + _ => Err("illegal addressing mode") + } + } +} + +impl Address { + fn operand_size(&self) -> u8 { + match self { + Address::Stack => (usize::BITS / 8) as u8, + Address::Instr => (usize::BITS / 8) as u8, + Address::Numer => (usize::BITS / 8) as u8, + _ => 0, + } + } +} + +impl TryFrom<&[u8]> for Operand { + type Error = &'static str; + fn try_from(value: &[u8]) -> Result { + let addr_mode: Address = value[0].try_into()?; + let operand_size = addr_mode.operand_size(); + if value.len() < (operand_size + 1).into() { + return Err("truncated address data") + } + + let mut operand_bytes: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; + for (&src, dest) in value[1..(1+operand_size) as usize] + .iter() + .zip(operand_bytes.iter_mut()) { + *dest = src; + } + + Ok(Operand(addr_mode, usize::from_ne_bytes(operand_bytes))) + } +} + +impl Into> for Operand { + fn into(self) -> Vec { + let mut res = vec![]; + res.push(self.0.clone() as u8); + res.append(&mut self.1.to_ne_bytes()[..self.0.operand_size() as usize].to_vec()); + res + } +} + +impl Operand { + fn byte_length(&self) -> u8 { + 1 + self.0.operand_size() + } +} + +impl TryFrom<&[u8]> for Instruction { + type Error = &'static str; + fn try_from(value: &[u8]) -> Result { + let operation: Operation = value[0].try_into()?; + let mut operands: Vec = vec![]; + + let mut cur = 1; + for _ in 0..operation.num_args()? { + if cur >= value.len() { + return Err("operand data truncated") + } + let operand: Operand = value[cur..].try_into()?; + cur += operand.byte_length() as usize; + operands.push(operand); + } + + Ok(Instruction(operation, operands)) + } +} + +impl Into> for Instruction { + fn into(self) -> Vec { + let mut res = vec![]; + res.push(self.0.0); + for op in self.1 { + res.append(&mut op.into()) + } + res + } +} + +impl Instruction { + fn byte_length(&self) -> u8 { + self.1.iter() + .fold(0, |total, oper| + total + oper.byte_length()) + 1 + } +} + +impl TryFrom<&[u8]> for Program { + type Error = &'static str; + fn try_from(value: &[u8]) -> Result { + let mut prog: Vec = vec![]; + let mut cur = 0; + + while cur < value.len() { + let instruction: Instruction = value[cur..].try_into()?; + cur += instruction.byte_length() as usize; + prog.push(instruction); + } + + Ok(Program(prog)) + } +} + +impl Into> for Program { + fn into(self) -> Vec { + let mut res: Vec = vec![]; + for instr in self.0 { + res.append(&mut instr.into()) + } + res + } +} + +impl<'a> Index for Program { + type Output = Instruction; + fn index(&self, index: usize) -> &Instruction { + self.0.get(index).expect("access to out of bounds instruction in vm") + } +} + + +#[cfg(test)] +mod tests { + use crate::instr; + use super::*; + + #[test] + fn test_operand_parse() { + let bad_addressing = + TryInto::::try_into(&[0x13, 0x39][..]); + assert_eq!(bad_addressing, Err("illegal addressing mode")); + + let truncated_address = + TryInto::::try_into(&[0xf1][..]); + assert_eq!(truncated_address, Err("truncated address data")); + + let usize_case = + TryInto::::try_into(&[Address::Stack.into(), + 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23][..]); + assert!(usize_case.is_ok()); + assert_eq!(usize_case.unwrap().0, Address::Stack); + + let register_operand = Operand(Address::Expr, 0); + let operand_byte_arr = + TryInto::>::try_into(register_operand.clone()); + assert!(operand_byte_arr.is_ok()); + let br = operand_byte_arr.unwrap(); + let operand_bytes = br.as_slice(); + assert_eq!(operand_bytes, &[0xf2][..]); + let operand_conv = + TryInto::::try_into(operand_bytes); + assert!(operand_conv.is_ok()); + assert_eq!(register_operand, operand_conv.unwrap()); + } + + #[test] + fn test_instruction_parse() { + let illegal_instruction = + TryInto::::try_into(&[0x88][..]); + assert_eq!(illegal_instruction, Err("illegal instruction")); + + let bad_operand = + TryInto::::try_into(&[instr::TRAP.0, 0xf1][..]); + assert_eq!(bad_operand, Err("truncated address data")); + + let need_more_opers = + TryInto::::try_into(&[instr::TRAP.0][..]); + assert_eq!(need_more_opers, Err("operand data truncated")); + + let no_operands = + TryInto::::try_into(&[instr::POP.0][..]); + assert!(no_operands.is_ok()); + let nop = no_operands.unwrap(); + assert_eq!(nop.0, instr::POP); + let nop_bytes = + TryInto::>::try_into(nop); + assert!(nop_bytes.is_ok()); + assert_eq!(nop_bytes.unwrap(), vec![instr::POP.0]); + + let one_operand = + TryInto::::try_into(&[instr::TRAP.0, 0xf3][..]); + assert!(one_operand.is_ok()); + let oe_oper = one_operand.unwrap(); + assert_eq!(oe_oper.0, instr::TRAP); + assert_eq!(oe_oper.1.len(), 1); + assert_eq!(oe_oper.1[0], Operand(Address::Oper1, 0)); + let oe_bytes = + TryInto::>::try_into(oe_oper); + assert!(oe_bytes.is_ok()); + assert_eq!(oe_bytes.unwrap(), vec![instr::TRAP.0, 0xf3]); + + let two_operands = + TryInto::::try_into(&[instr::LOAD.0, 0xf3, 0xf4][..]); + assert!(two_operands.is_ok()); + let two_oper = two_operands.unwrap(); + assert_eq!(two_oper.0, instr::LOAD); + assert_eq!(two_oper.1.len(), 2); + let two_bytes = + TryInto::>::try_into(two_oper.clone()); + assert!(two_bytes.is_ok()); + assert_eq!(two_bytes.unwrap(), vec![instr::LOAD.0, 0xf3, 0xf4]); + assert_eq!(two_oper.1[0], Operand(Address::Oper1, 0)); + assert_eq!(two_oper.1[1], Operand(Address::Oper2, 0)); + } + + #[test] + fn test_program_parse() { + let bytes1 = [instr::LOAD.0, 0xf3, 0xf4]; + let out1 = vec![Instruction(instr::LOAD, + vec![Operand(Address::Oper1, 0), Operand(Address::Oper2, 0)])]; + let res1 = + TryInto::::try_into(&bytes1[..]); + assert!(res1.is_ok()); + assert_eq!(res1.unwrap().0, out1); + + let bytes2 = [ + instr::LOAD.0, 0xf3, 0xf4, + instr::CLEAR.0, 0xf0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + ]; + let out2 = vec![ + Instruction(instr::LOAD, vec![ + Operand(Address::Oper1, 0), + Operand(Address::Oper2, 0) + ]), + + Instruction(instr::CLEAR, vec![ + Operand(Address::Stack, 1) + ]) + ]; + let res2 = + TryInto::::try_into(&bytes2[..]); + assert!(res2.is_ok()); + assert_eq!(res2.unwrap().0, out2); + } +} diff --git a/hyphae/src/vm.rs b/hyphae/src/vm.rs new file mode 100644 index 0000000..089e1fd --- /dev/null +++ b/hyphae/src/vm.rs @@ -0,0 +1,460 @@ +/* Mycelium Scheme + * Copyright (C) 2025 Ava Affine + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +use mycelium::sexpr::Datum; +use mycelium::number::{Fraction, Number, Numeric}; + +use crate::hmap::QuickMap; +use crate::stackstack::StackStack; +use crate::instr as i; +use crate::util::{Operand, Program, Address}; + +use core::cell::RefCell; + +use alloc::vec; +use alloc::rc::Rc; +use alloc::vec::Vec; +use alloc::sync::Arc; +use alloc::borrow::ToOwned; + +use num::pow::Pow; + + +const NUM_OPERAND_REGISTERS: usize = 4; + +pub struct VM { + // execution environment + pub stack: StackStack, + pub symtab: QuickMap, + pub prog: Program, + pub fds: Vec, + pub traps: Vec>, + + // data registers + pub expr: Datum, + pub oper: [Datum; NUM_OPERAND_REGISTERS], + + // control flow registers + pub retn: usize, + pub ictr: usize, + pub errr: Datum, + + // state + pub running: bool, + pub err_state: bool, +} + +impl VM { + pub fn run_program(&mut self) { + if self.prog.0.len() < 1 { + self.running = false; + } + + while self.ictr < self.prog.0.len() { + if self.err_state || !self.running { + return; + } + + self.execute_instruction(); + self.ictr += 1; + } + + self.running = false; + } + + #[inline(always)] + fn execute_instruction(&mut self) { + let instr = &self.prog.0[self.ictr].clone(); + + macro_rules! e { + ( $err:expr ) => { + { + self.running = false; + self.err_state = true; + self.errr = Datum::String($err.as_bytes().to_vec()); + return; + } + } + } + + macro_rules! deref { + ( $oper:expr ) => { + match $oper.0 { + Address::Expr => &self.expr, + Address::Oper1 => &self.oper[0], + Address::Oper2 => &self.oper[1], + Address::Oper3 => &self.oper[2], + Address::Oper4 => &self.oper[3], + Address::Stack => &self.stack[$oper.1], + Address::Numer => e!("attempt to dereference constant numeric data"), + Address::Instr => e!("bad access to instruction data"), + } + } + } + + macro_rules! deref_mut { + ( $oper:expr ) => { + match $oper.0 { + Address::Expr => &mut self.expr, + Address::Oper1 => &mut self.oper[0], + Address::Oper2 => &mut self.oper[1], + Address::Oper3 => &mut self.oper[2], + Address::Oper4 => &mut self.oper[3], + Address::Instr => e!("bad mutable access to instruction data"), + // Stack, Numer + _ => e!("mutable access to immutable data"), + } + } + } + + macro_rules! do_jmp { + ( $idx:expr ) => { + let Operand(Address::Instr, target) = instr.1[$idx] else { + e!("illegal argument to jump"); + }; + + if target >= self.prog.0.len() { + e!("out of bounds jump caught"); + } + + self.ictr = target; + } + } + + macro_rules! lr_oper { + ( $in_type:ident, $oper:tt, $out_type:ident ) => { + self.expr = Datum::$out_type(*match deref!(&instr.1[0]){ + Datum::$in_type(l) => l, + _ => e!("illegal argument to instruction"), + } $oper *match deref!(&instr.1[1]){ + Datum::$in_type(l) => l, + _ => e!("illegal argument to instruction"), + }) + } + } + + match instr.0 { + i::TRAP => { + let Operand(Address::Numer, idx) = instr.1[0] else { + e!("illegal argument to TRAP instruction"); + }; + + if idx >= self.traps.len() { + e!("access to out of bounds trap!") + } + + self.traps[idx].clone()(self) + }, + + // symtable ops + i::BIND => { + let Datum::String(tag) = deref!(&instr.1[0]) else { + e!("illegal argument to BIND instruction"); + }; + let tag = unsafe { str::from_utf8_unchecked(&tag).to_owned() }; + self.symtab.insert(tag, instr.1[1].clone()); + }, + + i::UNBIND => { + let Datum::String(tag) = deref!(&instr.1[0]) else { + e!("illegal argument to UNBIND instruction"); + }; + let tag = unsafe { str::from_utf8_unchecked(&tag) }; + self.symtab.remove(&tag); + }, + + i::BOUND => { + let Datum::String(tag) = deref!(&instr.1[0]) else { + e!("illegal argument to BOUND instruction"); + }; + let tag = unsafe { str::from_utf8_unchecked(&tag) }; + self.symtab.contains_key(&tag); + }, + + // stack ops + i::PUSH => self.stack.push_current_stack(deref!(&instr.1[0]).clone()), + i::POP => _ = self.stack.pop_current_stack(), + i::ENTER => self.stack.add_stack(), + i::EXIT => self.stack.destroy_top_stack(), + + // movement ops + i::LOAD => *deref_mut!(&instr.1[1]) = deref!(&instr.1[0]).clone(), + i::CLEAR => *deref_mut!(&instr.1[0]) = Datum::None, + + // control flow ops + i::NOP => (), + i::HALT => self.running = false, + i::PANIC => { + self.running = false; + self.err_state = false; + self.errr = deref!(&instr.1[0]).clone() + }, + + i::JMP => { + do_jmp!(0); + }, + + i::JMPIF => { + if let Datum::Bool(true) = self.expr { + do_jmp!(0); + } + }, + + // boolean ops + i::EQ => self.expr = Datum::Bool(*deref!(&instr.1[0]) == *deref!(&instr.1[1])), + i::LT => lr_oper!(Number, <, Bool), + i::GT => lr_oper!(Number, >, Bool), + i::LTE => lr_oper!(Number, <=, Bool), + i::GTE => lr_oper!(Number, >=, Bool), + i::BOOL_NOT => { + self.expr = Datum::Bool(!{ + let Datum::Bool(a) = self.expr else { + e!("illegal argument to BOOL_NOT instruction"); + }; + a + }); + }, + + i::BOOL_AND => lr_oper!(Bool, &&, Bool), + i::BOOL_OR => lr_oper!(Bool, ||, Bool), + + // char / byte ops + i::BYTE_AND => lr_oper!(Char, &, Char), + i::BYTE_OR => lr_oper!(Char, |, Char), + i::XOR => lr_oper!(Char, ^, Char), + i::BYTE_NOT => { + self.expr = Datum::Char(!{ + let Datum::Char(a) = self.expr else { + e!("illegal argument to BYTE_NOT instruction"); + }; + a + }); + }, + + // numeric ops + i::ADD => lr_oper!(Number, +, Number), + i::SUB => lr_oper!(Number, -, Number), + i::MUL => lr_oper!(Number, *, Number), + i::FDIV => lr_oper!(Number, /, Number), + i::IDIV => { + let Datum::Number(l) = deref!(&instr.1[0]) else { + e!("illegal argument to IDIV instruction"); + }; + + let Datum::Number(r) = deref!(&instr.1[1]) else { + e!("illgal argument to IDIV instruction"); + }; + + let Fraction(l, 1) = l.make_exact() else { + e!("integer division on non integer value"); + }; + + let Fraction(r, 1) = r.make_exact() else { + e!("integer division on non integer value"); + }; + + self.expr = Datum::Number(Number::Fra(Fraction(l / r, 1))); + }, + + i::POW => { + let Datum::Number(l) = deref!(&instr.1[0]) else { + e!("illegal argument to POW instruction"); + }; + + let Datum::Number(r) = deref!(&instr.1[1]) else { + e!("illgal argument to POW instruction"); + }; + + self.expr = Datum::Number((*l).pow(*r)); + }, + + i::INC => if let Datum::Number(src) = deref_mut!(&instr.1[0]) { + *src = *src + Number::Fra(Fraction(1, 1)); + } else { + e!("illegal argument to INC instruction"); + }, + + i::DEC => if let Datum::Number(src) = deref_mut!(&instr.1[0]) { + *src = *src - Number::Fra(Fraction(1, 1)); + } else { + e!("illegal argument to INC instruction"); + }, + + // byte/char to and from number conversions + i::CTON => { + let src = deref_mut!(&instr.1[0]); + if let Datum::Char(schr) = src { + *src = Datum::Number(Number::Fra(Fraction(*schr as isize, 1))); + } else { + e!("illegal argument to CTON instruction"); + } + }, + + i::NTOC => { + let src = deref_mut!(&instr.1[0]); + if let Datum::Number(snum) = src { + let n = snum.make_inexact(); + if !snum.is_exact() || n.0.fract() != 0.0 || n.0 > u8::MAX.into() || n.0 < 0.0 { + e!("input to NTOC cannot cleanly convert"); + } + *src = Datum::Char(n.0.trunc() as u64 as u8); + + } else { + e!("illegal argument to NTOC instruction"); + } + }, + + i::MKVEC => self.expr = Datum::Vector(RefCell::from(vec![])), + i::MKBVEC => self.expr = Datum::ByteVector(RefCell::from(vec![])), + i::INDEX => { + let Datum::Number(idx) = deref!(&instr.1[1]) else { + e!("illegal argument to INDEX instruction"); + }; + let idx = idx.make_inexact(); + if !idx.is_exact() || idx.0.fract() != 0.0 { + e!("illegal argument to INDEX instruction"); + } + let idx = idx.0.trunc() as usize; + + match deref!(&instr.1[0]) { + Datum::Vector(v) => { + let a = (*v.borrow()[idx].clone()).clone(); + self.expr = a; + }, + Datum::ByteVector(bv) => { + let a = Datum::Char(bv.borrow()[idx]); + self.expr = a; + }, + Datum::List(l) => self.expr = l[idx].clone(), + _ => e!("illegal argument to INDEX instruction") + }; + }, + + i::LENGTH => match deref!(&instr.1[0]) { + Datum::Vector(v) => { + let a = Datum::Number(Number::Fra(Fraction(v.borrow().len() as isize, 1))); + self.expr = a; + }, + Datum::ByteVector(bv) => { + let a = Datum::Number(Number::Fra(Fraction(bv.borrow().len() as isize, 1))); + self.expr = a; + }, + Datum::List(l) => + self.expr = Datum::Number(Number::Fra(Fraction(l.len() as isize, 1))), + _ => e!("illegal argument to LENGTH instruction"), + }, + + i::SUBSL => { + let Datum::Number(st) = deref!(&instr.1[1]) else { + e!("illegal argument to SUBSL instruction"); + }; + + let Datum::Number(ed) = deref!(&instr.1[2]) else { + e!("illegal argument to SUBSL instruction"); + }; + + if !st.is_exact() || !ed.is_exact() { + e!("illegal argument to SUBSL instruction"); + } + + let st = st.make_inexact(); + let ed = ed.make_inexact(); + + if st.0.fract() != 0.0 || ed.0.fract() != 0.0 { + e!("SUBSL: FP precision error"); + } + + let st = st.0.trunc() as usize; + let ed = ed.0.trunc() as usize; + + match deref!(&instr.1[0]) { + Datum::Vector(v) => { + let a = Datum::Vector(RefCell::from(v.borrow()[st..ed].to_vec())); + self.expr = a; + }, + Datum::ByteVector(bv) => { + let a = Datum::ByteVector(RefCell::from(bv.borrow()[st..ed].to_vec())); + self.expr = a; + }, + Datum::List(a) => + self.expr = Datum::List(Rc::new( + (**a).subsl(st as isize, ed as isize))), + _ => e!("illegal argument to SUBSL instruction") + }; + } + + i::INSER => { + let Datum::Number(idx) = deref!(&instr.1[2]) else { + e!("illegal argument to INSER instruction"); + }; + + let idx = idx.make_inexact(); + if !idx.is_exact() || idx.0.fract() != 0.0 { + e!("illegal argument to INSER instruction"); + } + + let idx = idx.0.trunc() as usize; + + match deref!(&instr.1[0]) { + Datum::Vector(v) => { + v.borrow_mut().insert(idx, deref!(&instr.1[1]).clone().into()); + }, + Datum::ByteVector(bv) => { + let Datum::Char(b) = deref!(&instr.1[1]) else { + e!("INSER instruction can only insert a byte into a bytevector"); + }; + bv.borrow_mut().insert(idx, *b); + }, + _ => e!("illegal argument to INSER instruction") + } + }, + + i::CAR => { + let Datum::List(arg) = deref!(&instr.1[0]) else { + e!("illegal argument to CAR instruction"); + }; + + self.expr = (*arg.0).clone(); + }, + + i::CDR => { + let Datum::List(arg) = deref!(&instr.1[0]) else { + e!("illegal argument to CAR instruction"); + }; + + self.expr = (*arg.1).clone(); + }, + + i::CONS => { + /* CONS BEHAVIOR + * L Datum is not list means create a new standard form list + * L Datum is list then append the second element to the first + */ + }, + + // in order to maintain a language agnostic VM these must be traps + //i::PARSE => todo!("implement AST API"), + //i::EVAL => todo!("implement AST API"), + + _ => { + e!("illegal instruction"); + }, + }; + } +} + diff --git a/mycelium/src/lib.rs b/mycelium/src/lib.rs index eb3bc56..b68beca 100644 --- a/mycelium/src/lib.rs +++ b/mycelium/src/lib.rs @@ -25,7 +25,6 @@ pub mod sexpr; pub mod lexer; pub mod parser; pub mod number; -pub mod stackstack; -pub mod hmap; extern crate alloc; + diff --git a/mycelium/src/number.rs b/mycelium/src/number.rs index 845bce0..d9af991 100644 --- a/mycelium/src/number.rs +++ b/mycelium/src/number.rs @@ -529,7 +529,7 @@ impl Numeric for Float { if self.0.fract() == 0.0 { Fraction(self.0 as isize, 1) } else { - unimplemented!("insert rational approximation procedure here") + todo!("rational approximation implementation") } } } diff --git a/mycelium/src/parser.rs b/mycelium/src/parser.rs index 3d7959e..31dc0d5 100644 --- a/mycelium/src/parser.rs +++ b/mycelium/src/parser.rs @@ -16,6 +16,7 @@ */ use core::fmt::Display; +use core::cell::RefCell; use crate::lexer::{ LexError, @@ -414,11 +415,11 @@ impl Parser { } if is_bv { - return Ok(Rc::from(Datum::ByteVector(bv_stack))) + return Ok(Rc::from(Datum::ByteVector(RefCell::from(bv_stack)))) } if token.token_type == LexTokenType::VectorStart { - return Ok(Rc::from(Datum::Vector(lex_stack))) + return Ok(Rc::from(Datum::Vector(RefCell::from(lex_stack)))) } // handle an empty list diff --git a/mycelium/src/sexpr.rs b/mycelium/src/sexpr.rs index b478712..d9f9438 100644 --- a/mycelium/src/sexpr.rs +++ b/mycelium/src/sexpr.rs @@ -16,6 +16,9 @@ */ use core::fmt::{self, Formatter}; +use core::ops::Index; +use core::cell::RefCell; + use alloc::format; use alloc::rc::Rc; use alloc::vec::Vec; @@ -23,7 +26,7 @@ use alloc::string::String; use crate::number::Number; -#[derive(Default, Clone)] +#[derive(Default, Clone, PartialEq)] pub enum Datum { Number(Number), Bool(bool), @@ -31,8 +34,8 @@ pub enum Datum { Symbol(String), Char(u8), String(Vec), - Vector(Vec>), - ByteVector(Vec), + Vector(RefCell>>), + ByteVector(RefCell>), #[default] None, } @@ -45,7 +48,8 @@ fn byte_to_escaped_char(b: u8) -> String { } } -fn fmt_vec(v: &Vec) -> String { +fn fmt_vec(ve: &RefCell>) -> String { + let v = ve.borrow(); if v.len() == 0 { return String::new() } @@ -102,9 +106,46 @@ impl fmt::Debug for Datum { } -#[derive(Default, Clone)] +#[derive(Default, Clone, PartialEq)] pub struct Ast(pub Rc, pub Rc); +impl Ast { + pub fn subsl(&self, start: isize, end: isize) -> Ast { + if end - start == 1 { + return Ast(Rc::from(self[start as usize].clone()), Rc::from(Datum::None)) + } + + if end == 0 { + return Ast( + Rc::from((*(self.0)).clone()), + Rc::from(Datum::None) + ) + } + + let Datum::List(ref next) = *self.1 else { + panic!("index into improper list form") + }; + + if start <= 0 { + Ast( + Rc::from((*(self.0)).clone()), + Rc::from(Datum::List( + Rc::from(next.subsl(start - 1, end - 1)))) + ) + + } else { + next.subsl(start - 1, end - 1) + } + } + + pub fn len(&self) -> usize { + let Datum::List(ref next) = *self.1 else { + return 1 + }; + 1 + next.len() + } +} + impl Iterator for Ast { type Item = Rc; @@ -127,6 +168,25 @@ impl Iterator for Ast { } } +impl Index for Ast { + type Output = Datum; + fn index(&self, index: usize) -> &Self::Output { + if index == 0 { + if let Datum::None = *self.0 { + panic!("out of bounds indexing into AST") + } else { + self.0.as_ref() + } + } else { + let Datum::List(ref next) = *self.1 else { + panic!("out of bounds indexing into AST") + }; + + next.index(index - 1) + } + } +} + impl fmt::Display for Ast { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "({}", self.0)?; diff --git a/snippets/in_progress_numbers.rs b/snippets/in_progress_numbers.rs new file mode 100644 index 0000000..39333dc --- /dev/null +++ b/snippets/in_progress_numbers.rs @@ -0,0 +1,642 @@ +use alloc::boxed::Box; +use alloc::{vec, vec::Vec}; +use alloc::fmt::Debug; +use lexer::{E_TOO_MANY_DECIMALS, E_TOO_MANY_SLASH}; + +use core::cmp::Ordering; +use core::{fmt, u8}; +use core::ops::{Add, Div, Mul, Sub}; + + +pub const E_INCOMPREHENSIBLE: &str = "could not parse number literal"; +pub const E_POUND_TRUNCATED: &str = "pound sign implies additional input"; +pub const E_BASE_PARSE_FAIL: &str = "failed to parse explicit base literal"; +pub const E_UNKNOWN_CONTROL: &str = "unknown character in number literal"; +pub const E_EMPTY_INPUT: &str = "empty string cannot be a number"; + +const NUM_INF: &str = "+inf.0"; +const NUM_NEG_INF: &str = "-inf.0"; +const NUM_NAN: &str = "+nan.0"; +const NUM_NEG_NAN: &str = "-nan.0"; + +pub const NegativeFlag: u8 = 0b10000000; // positive value if off +pub const DecimalFlag: u8 = 0b01000000; // single integer if off +pub const FractionFlag: u8 = 0b00100000; // decimal if off +pub const ScientificFlag: u8 = 0b00010000; // requires a second flags byte +pub const InfiniteFlag: u8 = 0b00001000; // can be positive or negative +pub const NotANumberFlag: u8 = 0b00000100; // can be positive or negative because r7rs +pub const OverflownFlag: u8 = 0b00000010; // poisons exactness + + +/* NUMBER BYTES FORMAT + * Generally the format within the byte array operates like this + * (guaranteed header) 1. NumberFlags (u8) + * (for each integer) 2. Byte Length (u8) + * (for each integer) 3. N proceeding bytes of data + * + * If Scientific Notation is used the leading number may be a decimal. + * In this case, there will be three total numbers + * + * All numbers are big endian + */ +#[repr(transparent)] +#[derive(Clone, Debug, PartialEq)] +pub struct Number<'src> (pub &'src [u8]); + + +/* WARNING + * member functions tend to assume that number encoding is consistent + * use Number::is_valid() to double check numbers from unknown sources + * + * TODO: maybe mark raw-indexing member functions as unsafe + */ +impl Number<'_> { + #[inline(always)] + pub fn byte_length(&self) -> u8 { + if self.0[0] & (InfiniteFlag | NotANumberFlag) != 0 { + return 1; + } + + let mut len = self.0[1] + 2; + if self.0[0] & (DecimalFlag | FractionFlag | ScientificFlag) != 0 { + len += self.0[len as usize] + 1; + } + + if self.0[0] & ScientificFlag != 0 && + self.0[0] & DecimalFlag != 0 { + len += self.0[len as usize]; + } + + len + } + + pub fn is_valid(&self) -> bool { + let len = self.0.len(); + if len < 1 { + return false; + } + + let decimal = self.0[0] & DecimalFlag != 0; + let fraction = self.0[0] & FractionFlag != 0; + let scientific = self.0[0] & ScientificFlag != 0; + let overflown = self.0[0] & OverflownFlag != 0; + let infinite = self.0[0] & InfiniteFlag != 0; + let notanumber = self.0[0] & NotANumberFlag != 0; + + // check flags + if overflown { + return false + } + + if (decimal && fraction) || (scientific && fraction) { + return false + } + + if (infinite || notanumber) && + (decimal || fraction || scientific || len != 1) { + return false + } + + // at least 3 bytes for a single u8 + if len < 3 { + return false + } + + let mut cur = self.0[1] + 2; + if len < cur as usize { + return false + } + + if decimal || fraction || scientific { + if len < (cur + 1) as usize { + return false; + } + + cur += self.0[cur as usize]; + if len < (cur + 1) as usize { + return false; + } + } + + if scientific && decimal { + cur += 1; + if len < (cur + 1) as usize { + return false + } + + cur += self.0[cur as usize]; + if len < (cur + 1) as usize { + return false + } + } + + true + } + + #[inline(always)] + pub fn is_exact(&self) -> bool { + self.0[0] & ScientificFlag == 0 + } + + #[inline(always)] + pub fn make_exact_into(&self, dst:&mut Vec) { + // expand scientific notation else just direct copy + if self.0[0] & ScientificFlag != 0 { + self.normalize_scientific_into(dst); + return + } + + self.copy_into(dst); + } + + #[inline(always)] + pub fn make_inexact_into(&self, dst: &mut Vec) { + // basically just convert a fraction into an actual division + todo!() + } + + // use this so you dont have to worry about clone while casting + #[inline(always)] + pub fn copy_into(&self, dst: &mut Vec) { + for i in self.0 { + dst.push(*i) + } + } + + #[inline(always)] + pub fn normalize_scientific_into(&self, dst: &mut Vec) { + todo!() + } + + #[inline(always)] + pub fn simplify_fraction_in_place(&mut self) { + if self.0[0] & FractionFlag == 0 { + return + } + + // can technically do this in place + // each element of the fraction will only shrink + todo!() + } + + #[inline(always)] + pub fn from_str_into(src: &str, dst: &mut Vec) -> Result<(), &'static str> { + // handle symbolic values + match src { + NUM_INF => { + dst.push(0 as u8 | InfiniteFlag); + return Ok(()); + }, + + NUM_NEG_INF => { + dst.push(0 as u8 | NegativeFlag | InfiniteFlag); + return Ok(()); + }, + + NUM_NAN => { + dst.push(0 as u8 | NotANumberFlag); + return Ok(()); + }, + + NUM_NEG_NAN => { + dst.push(0 as u8 | NegativeFlag | NotANumberFlag); + return Ok(()); + }, + + _ => (), + } + + let mut ctrl_flags = 0 as u8; + let mut operands = vec![]; + let mut digits_per_byte = 3; // default to decimal encoding + let mut base = 0; + let mut iter = src.chars().peekable(); + + match iter.next() { + Some('+') => (), + Some('-') => { + ctrl_flags |= NegativeFlag; + }, + Some('#') => { + match iter.next() { + None => return Err(E_POUND_TRUNCATED), + Some('i') => /* force_inexact = true */ (), + Some('e') => /* force_exact = true */ (), + Some('x') => { digits_per_byte = 2; base = 16 }, + Some('d') => { digits_per_byte = 3; base = 10 }, + Some('o') => { digits_per_byte = 4; base = 8 }, + Some('b') => { digits_per_byte = 8; base = 2 }, + _ => return Err(E_UNKNOWN_CONTROL), + } + }, + Some(a) if a.is_digit(10) => (), + Some(_) => return Err(E_INCOMPREHENSIBLE), + None => return Err(E_EMPTY_INPUT), + } + + let mut ops_needed = 1; + if base != 10 { + // cant mix non-decimal base and other number representations + let mut len = 0 as u8; + while let Some(chunk) = { + let mut chk = vec![]; + for _ in 0..digits_per_byte { + if let Some(c) = iter.next() { + chk.push(c as u8) + } + } + if chk.len() < 1 { None } else { Some(chk) } + } { + let Ok(val) = u8::from_str_radix( + unsafe {str::from_utf8_unchecked(chunk.as_slice())}, base) else { + return Err(E_BASE_PARSE_FAIL) + }; + operands.push(val); + len += 1; + } + // integer numbers prepended with their length + operands.insert(0, len); + ops_needed -= 1; + + } else { + // just a decimal number, but could have a weird format + loop { + macro_rules! pack_operand { + () => { + let s = unsafe { str::from_utf8_unchecked(operands.as_slice()) }; + let f = usize::from_str_radix(&s, 10).expect("str cast"); + let f = f.to_be_bytes(); + operands.clear(); + + dst.push(f.len() as u8); + dst.append(&mut f.to_vec()); + ops_needed -= 1; + } + } + + match iter.next() { + Some(c) if c.is_digit(10) => { + operands.push(c as u8); + }, + + Some('.') => { + ops_needed += 1; + if ctrl_flags & (FractionFlag | ScientificFlag) != 0 { + return Err(E_INCOMPREHENSIBLE) + } + + if ctrl_flags & DecimalFlag != 0 { + return Err(E_TOO_MANY_DECIMALS) + } + + ctrl_flags |= DecimalFlag; + pack_operand!(); + }, + + Some('/') => { + ops_needed += 1; + if ctrl_flags & (DecimalFlag | ScientificFlag) != 0 { + return Err(E_INCOMPREHENSIBLE) + } + + if ctrl_flags & FractionFlag != 0 { + return Err(E_TOO_MANY_SLASH) + } + + ctrl_flags |= DecimalFlag; + pack_operand!(); + }, + + Some('e') => { + ops_needed += 1; + if ctrl_flags & FractionFlag != 0 { + return Err(E_INCOMPREHENSIBLE) + } + + ctrl_flags |= ScientificFlag; + let mut newctrl = 0 as u8; + + if let Some('-') = iter.peek() { + newctrl |= NegativeFlag; + } + + pack_operand!(); + dst.push(newctrl); + }, + + Some(_) => return Err(E_INCOMPREHENSIBLE), + + None => { + pack_operand!(); + break; + } + } + } + } + + if ops_needed != 0 { + return Err(E_INCOMPREHENSIBLE); + } + + dst.insert(0, ctrl_flags); + Number(dst.as_slice()).simplify_fraction_in_place(); + Ok(()) + } + + pub fn from_u8_into(src: u8, dst: &mut Vec) -> Number { + dst.push(0 as u8); + dst.push(src); + Number(dst.as_slice()) + } +} + +impl fmt::Display for Number<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // can implement after I finish division + todo!() + } +} + +impl<'a> From<&'a Box<[u8]>> for Number<'a> { + fn from(value: &'a Box<[u8]>) -> Self { + Number(value.as_ref()) + } +} + +impl<'a> From<&'a Vec> for Number<'a> { + fn from(value: &'a Vec) -> Self { + Number(value.as_slice()) + } +} + +impl<'a> From<&'a [u8]> for Number<'a> { + fn from(value: &'a [u8]) -> Self { + Number(value) + } +} + +impl<'a> Into<&'a [u8]> for Number<'a> { + fn into(self) -> &'a [u8] { + self.0 + } +} + +impl Add for Number<'_> { + type Output = Box<[u8]>; + fn add(self, rhs: Self) -> Self::Output { + todo!() + } +} + +impl Sub for Number<'_> { + type Output = Box<[u8]>; + fn sub(self, rhs: Self) -> Self::Output { + todo!() + } +} + +impl Mul for Number<'_> { + type Output = Box<[u8]>; + fn mul(self, rhs: Self) -> Self::Output { + todo!() + } +} + +impl Div for Number<'_> { + type Output = Box<[u8]>; + fn div(self, rhs: Self) -> Self::Output { + // divide unsigned integer by unsigned integer + // the inputs (lh and rh) start with length byte + // returns a decimal index + fn div_ints(lh: &[u8], rh: &[u8], dest: &mut Vec) -> u8 { + todo!() + } + + /* Options + * divide a single int by a single int + * - (make fraction) + * divide a fraction by a single int + * - (multiply denominator) + * divide a decimal by a single int + * - (divide straight through) + * divide a scientific note by a single int + * - divide the first num + * - multiply by however much is needed for ones place (like 3.5) + * - add or subtract from the second number accordingly + * + * divide a single int by a fraction + * - output denom * lh / numer + * divide a single int by a decimal + */ + todo!() + } +} + +impl PartialEq for Number<'_> { + fn eq(&self, other: &Number) -> bool { + todo!() + } +} + +impl PartialOrd for Number<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + todo!() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_number_tests() { + assert_eq!("1.3".parse::(), + Ok(Number::Flt(Float(1.3)))); + + assert_eq!("1".parse::(), + Ok(Number::Flt(Float(1 as f64)))); + + assert_eq!("1.3e3".parse::(), + Ok(Number::Sci(ScientificNotation(1.3, 3)))); + + assert_eq!("+1.3".parse::(), + Ok(Number::Flt(Float(1.3)))); + + assert_eq!("-1.3".parse::(), + Ok(Number::Flt(Float(-1.3)))); + + assert_eq!("#d234".parse::(), + Ok(Number::Flt(Float(234.0)))); + + assert_eq!("#o17".parse::(), + Ok(Number::Fra(Fraction(15, 1)))); + + assert_eq!("#xAA".parse::(), + Ok(Number::Fra(Fraction(170, 1)))); + + assert_eq!("#b101".parse::(), + Ok(Number::Flt(Float(5.0)))); + + assert_eq!("2/4".parse::(), + Ok(Number::Fra(Fraction(2, 4)))); + + assert_eq!("#e1/5".parse::(), + Ok(Number::Fra(Fraction(1, 5)))); + + assert_eq!("#i1/5".parse::(), + Ok(Number::Flt(Float(0.2)))); + + assert_eq!("#e1e1".parse::(), + Ok(Number::Sci(ScientificNotation(1.0, 1)))); + + assert_eq!("+inf.0".parse::(), + Ok(Number::Sym(SymbolicNumber::Inf))); + + assert_eq!("2e3".parse::(), + Ok(ScientificNotation(2.0, 3))); + + assert_eq!("0e1".parse::(), + Ok(ScientificNotation(0.0, 1))); + + assert_eq!("-1e34".parse::(), + Ok(ScientificNotation(-1.0, 34))); + + assert_eq!("3.3e3".parse::(), + Ok(ScientificNotation(3.3, 3))); + + assert_eq!("2".parse::(), + Err(E_SCIENTIFIC_E)); + + assert_eq!("2e2e2".parse::(), + Err(E_SCIENTIFIC_MULTI_E)); + + assert_eq!("2/3".parse::(), + Ok(Fraction(2, 3))); + + assert_eq!("0/1".parse::(), + Ok(Fraction(0, 1))); + + assert_eq!("-1/34".parse::(), + Ok(Fraction(-1, 34))); + + assert_eq!("2".parse::(), + Err(E_NO_DENOMINATOR)); + + assert_eq!("2/2/2".parse::(), + Err(E_MULTI_DENOMINATOR)); + + assert_eq!("2/0".parse::(), + Err(E_ZERO_DENOMINATOR)); + + assert_eq!("3.3/3".parse::(), + Err(E_NUMERATOR_PARSE_FAIL)); + } + + #[test] + fn test_number_addition_subtraction_cases() { + let cases = vec![ + vec!["1/5", "4/5", "1/1"], + vec!["1/5", "0.8", "1/1"], + vec!["1e1", "2.0", "12/1"], + vec!["1e1", "2/1", "12/1"], + vec!["1e1", "1/2", "10.5"], + ]; + + cases.iter().for_each(|case| { + println!("+ {:#?}", case); + let x = case[0].parse::().unwrap(); + let y = case[1].parse::().unwrap(); + let z = case[2].parse::().unwrap(); + + // test some mathematical properties + assert_eq!(x + y, z); + assert_eq!(x + y, y + x); + assert_eq!(z - x, y); + assert_eq!(x + y - x, y); + }); + + // theres no reason this should adhere to all the other rules + let x = "+inf.0".parse::().unwrap(); + let y = "1e1".parse::().unwrap(); + let z = "+inf.0".parse::().unwrap(); + assert_eq!(x + y, z); + } + + #[test] + fn test_number_multiplication_division_cases() { + let cases = vec![ + vec!["1/5", "5e0", "1/1"], + vec!["1/5", "5", "1/1"], + vec!["1/5", "2/1", "2/5"], + vec!["4.4", "1/2", "2.2"], + vec!["12.0", "1/2", "6/1"], + vec!["1e1", "2.0", "20/1"], + vec!["1e1", "2/1", "20/1"], + vec!["1e1", "1/2", "5/1"], + ]; + + cases.iter().for_each(|case| { + println!("+ {:#?}", case); + let x = case[0].parse::().unwrap(); + let y = case[1].parse::().unwrap(); + let z = case[2].parse::().unwrap(); + + // test some mathematical properties + assert_eq!(x * y, z); + assert_eq!(x * y, y * x); + assert_eq!(z / x, y); + assert_eq!(x * y / x, y); + }); + } + + #[test] + fn test_number_pow_cases() { + // TODO: add scientific notation cases + let cases = vec![ + vec!["2", "2", "4"], + vec!["2/1", "2/1", "4/1"], + vec!["2/1", "2/-1", "1/4"], + vec!["2/1", "2/2", "2/1"], + vec!["2/1", "2.0", "4/1"], + vec!["27/8", "2/-3", "4/9"] + ]; + + cases.iter().for_each(|case| { + println!("+ {:#?}", case); + let x = case[0].parse::().unwrap(); + let y = case[1].parse::().unwrap(); + let z = case[2].parse::().unwrap(); + assert_eq!(x.pow(y), z); + }); + } + + #[test] + fn test_number_ord_cases() { + // TODO: add more cases + let cases = vec![ + vec!["1/2", "1.0", "1e1"], + ]; + + cases.iter().for_each(|case| { + println!("+ {:#?}", case); + let x = case[0].parse::().unwrap(); + let y = case[1].parse::().unwrap(); + let z = case[2].parse::().unwrap(); + assert!(x < y); + assert!(y < z); + assert!(x < z); + }); + } + + #[test] + fn float_negative_exponent_case() { + if let Float(0.1) = "1e-1" + .parse::() + .unwrap() + .make_inexact() { + return + } + + assert!(false) + } +}