From 4ad319213d9199db63f2df07b58d436cf58d6c4c Mon Sep 17 00:00:00 2001 From: Ava Affine Date: Thu, 26 Jun 2025 10:52:54 -0700 Subject: [PATCH] HyphaeVM - WIP This commit is a WORK IN PROGRESS for the base implementation of the HyphaeVM. This will be squashed into a larger commit eventually when the work of implementing the HyphaeVM is finished. Of note, the ISA is mostly finished and much of the VM design is in place. Yet to be done are a few traps in mycelium, migrating pieces like the number package and the sexpr package into the VM package, and of course much testing. Signed-off-by: Ava Affine --- .gitlab-ci.yml | 7 +- Cargo.lock | 117 +++++ Cargo.toml | 2 +- hyphae/Cargo.toml | 12 + hyphae/build.rs | 109 +++++ hyphae/instructions.toml | 303 ++++++++++++ {mycelium => hyphae}/src/hmap.rs | 6 +- hyphae/src/instr.rs | 19 + hyphae/src/lib.rs | 26 + {mycelium => hyphae}/src/stackstack.rs | 4 +- hyphae/src/util.rs | 303 ++++++++++++ hyphae/src/vm.rs | 460 ++++++++++++++++++ mycelium/src/lib.rs | 3 +- mycelium/src/number.rs | 2 +- mycelium/src/parser.rs | 5 +- mycelium/src/sexpr.rs | 70 ++- snippets/in_progress_numbers.rs | 642 +++++++++++++++++++++++++ 17 files changed, 2073 insertions(+), 17 deletions(-) create mode 100644 hyphae/Cargo.toml create mode 100644 hyphae/build.rs create mode 100644 hyphae/instructions.toml rename {mycelium => hyphae}/src/hmap.rs (97%) create mode 100644 hyphae/src/instr.rs create mode 100644 hyphae/src/lib.rs rename {mycelium => hyphae}/src/stackstack.rs (98%) create mode 100644 hyphae/src/util.rs create mode 100644 hyphae/src/vm.rs create mode 100644 snippets/in_progress_numbers.rs diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d6d19ac..181d394 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -27,7 +27,7 @@ unit-test-parser: - cargo test parser unit-test-number-package: - stage: test-backend + stage: test-frontend script: - cargo test number @@ -40,3 +40,8 @@ unit-test-quickmap: stage: test-backend script: - cargo test hmap + +unit-test-instruction-decoding: + stage: test-backend + script: + - cargo test util diff --git a/Cargo.lock b/Cargo.lock index e800b30..97860ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -112,18 +112,56 @@ dependencies = [ "mycelium", ] +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "hashbrown" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hyphae" +version = "0.1.0" +dependencies = [ + "mycelium", + "num", + "serde", + "toml", +] + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + [[package]] name = "mycelium" version = "0.1.0" @@ -228,6 +266,35 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + [[package]] name = "strsim" version = "0.11.1" @@ -245,6 +312,47 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + [[package]] name = "unicode-ident" version = "1.0.18" @@ -329,3 +437,12 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd" +dependencies = [ + "memchr", +] diff --git a/Cargo.toml b/Cargo.toml index 3d4d48c..43867c5 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,3 @@ [workspace] resolver = "2" -members = ["mycelium", "decomposer"] +members = ["mycelium", "decomposer", "hyphae"] diff --git a/hyphae/Cargo.toml b/hyphae/Cargo.toml new file mode 100644 index 0000000..cb867ab --- /dev/null +++ b/hyphae/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "hyphae" +version = "0.1.0" +edition = "2024" + +[dependencies] +mycelium = { path = "../mycelium" } +num = { version = "0.4.3", features = ["alloc"] } + +[build-dependencies] +serde = { version = "1.0", features = ["alloc", "derive"] } +toml = "0.8.23" diff --git a/hyphae/build.rs b/hyphae/build.rs new file mode 100644 index 0000000..0255141 --- /dev/null +++ b/hyphae/build.rs @@ -0,0 +1,109 @@ +use std::{env, fs}; +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::Path; +use serde::Deserialize; + +#[derive(Deserialize)] +struct Document { + pub instructions: Vec, +} + +// dont warn about unused fields in json instruction struct +#[allow(dead_code)] +#[derive(Deserialize)] +struct Instruction { + pub name: String, + pub args: Vec, + pub output: String, + pub description: String, +} + +fn main() { + let output_path = Path::new(&env::var("OUT_DIR").unwrap()) + .join("hyphae_instr.rs"); + let input = fs::read_to_string("instructions.toml") + .unwrap(); + let mut output_file = + BufWriter::new(File::create(&output_path).unwrap()); + + let instruction_table: Document = + toml::from_str(&input) + .expect("hyphae: failed to parse instructions.toml"); + + let mut isa = "#[repr(transparent)]\n".to_owned(); + isa += "#[derive(Clone, Debug, PartialEq)]\n"; + isa += "pub struct Operation(pub u8);\n\n"; + + let mut isa_from_byte = "impl TryFrom for Operation {\n".to_owned(); + isa_from_byte += " type Error = &'static str;\n"; + isa_from_byte += " fn try_from(v: u8) -> Result {\n"; + isa_from_byte += " match v {\n"; + + + let mut isa_fromstr = "impl FromStr for Operation {\n".to_owned(); + isa_fromstr += " type Err = &'static str;\n"; + isa_fromstr += " fn from_str(v: &str) -> Result {\n"; + isa_fromstr += " match v {\n"; + + let mut isa_from_str = "impl TryFrom<&str> for Operation {\n".to_owned(); + isa_from_str += " type Error = &'static str;\n"; + isa_from_str += " fn try_from(v: &str) -> Result {\n"; + isa_from_str += " match v {\n"; + + let mut isa_num_args = "impl Operation {\n".to_owned(); + isa_num_args += " pub fn num_args(&self) -> Result {\n"; + isa_num_args += " match self.0 {\n"; + + instruction_table.instructions.iter() + .enumerate() + .for_each(|(idx, instr)| { + let const_name = instr.name.to_ascii_uppercase(); + + isa += format!("pub const {}: Operation = Operation({});\n", + const_name, idx).as_str(); + + isa_from_byte += format!(" {} => Ok({}),\n", idx, const_name) + .as_str(); + + isa_from_str += format!(" \"{}\" => Ok({}),\n", + const_name, const_name).as_str(); + + isa_fromstr += format!(" \"{}\" => Ok({}),\n", + const_name, const_name).as_str(); + + isa_num_args += format!(" {} => Ok({}),\n", idx, instr.args.len()) + .as_str() + }); + + isa_from_byte += " _ => Err(\"illegal instruction\"),\n"; + isa_from_byte += " }\n"; + isa_from_byte += " }\n"; + isa_from_byte += "}\n\n"; + + isa_from_str += " _ => Err(\"illegal instruction\"),\n"; + isa_from_str += " }\n"; + isa_from_str += " }\n"; + isa_from_str += "}\n\n"; + + isa_fromstr += " _ => Err(\"illegal instruction\"),\n"; + isa_fromstr += " }\n"; + isa_fromstr += " }\n"; + isa_fromstr += "}\n\n"; + + isa_num_args += " _ => Err(\"illegal instruction\"),\n"; + isa_num_args += " }\n"; + isa_num_args += " }\n"; + isa_num_args += "}\n\n"; + + isa += "\n"; + isa += isa_from_byte.as_str(); + isa += isa_from_str.as_str(); + isa += isa_fromstr.as_str(); + isa += isa_num_args.as_str(); + + write!(&mut output_file, "use core::str::FromStr;\n\n\n").unwrap(); + write!(&mut output_file, "{}", isa).unwrap(); + println!("cargo::rerun-if-changed=build.rs"); + println!("cargo::rerun-if-changed=instructions.json"); +} diff --git a/hyphae/instructions.toml b/hyphae/instructions.toml new file mode 100644 index 0000000..fa85567 --- /dev/null +++ b/hyphae/instructions.toml @@ -0,0 +1,303 @@ +# NOTE: keep libc out of this, thats what trap vector is for +# NOTE: to programmers: only registers allow mutable acess + +[[instructions]] +name = "trap" +args = ["index"] +output = "result of function" +description = "triggers callback in trap vector at index" + +[[instructions]] +name = "bind" +args = ["name", "operand"] +output = "" +description = "map name to operand in sym table." + +[[instructions]] +name = "unbind" +args = ["name"] +output = "" +description = "remove name mapping from sym table." + +[[instructions]] +name = "bound" +args = ["name"] +output = "expr = true if name is bound" +description = "test if a name is already bound" + +[[instructions]] +name = "push" +args = ["operand"] +output = "" +description = "pushes operand onto stack." + +[[instructions]] +name = "pop" +args = [] +output = "" +description = "removes element at top of stack." + +[[instructions]] +name = "enter" +args = [] +output = "" +description = "create new stack frame" + +[[instructions]] +name = "exit" +args = [] +output = "" +description = "delete current stack frame" + +[[instructions]] +name = "load" +args = ["src", "dest"] +output = "" +description = "copies src into dest" + +[[instructions]] +name = "clear" +args = ["dest"] +output = "" +description = "clears dest" + +[[instructions]] +name = "nop" +args = [] +output = "" +description = "no operation" + +[[instructions]] +name = "halt" +args = [] +output = "" +description = "halts the VM" + +[[instructions]] +name = "panic" +args = ["error"] +output = "" +description = "sets error state and halts VM" + +[[instructions]] +name = "jmp" +args = ["addr"] +output = "" +description = "sets ictr register to addr" + +[[instructions]] +name = "jmpif" +args = ["addr"] +output = "" +description = "if expr register holds true, sets ictr to addr" + +[[instructions]] +name = "eq" +args = ["a", "b"] +output = "a == b" +description = "equality test" + +[[instructions]] +name = "lt" +args = ["a", "b"] +output = "a < b" +description = "less than test" + +[[instructions]] +name = "gt" +args = ["a", "b"] +output = "a > b" +description = "greater than test" + +[[instructions]] +name = "lte" +args = ["a", "b"] +output = "a <= b" +description = "less than equals test" + +[[instructions]] +name = "gte" +args = ["a", "b"] +output = "a >= b" +description = "greater than equals test" + +[[instructions]] +name = "bool_not" +args = [] +output = "expr = !expr" +description = "boolean not" + +[[instructions]] +name = "bool_and" +args = ["a", "b"] +output = "a && b" +description = "boolean and" + +[[instructions]] +name = "bool_or" +args = ["a", "b"] +output = "a || b" +description = "boolean or" + +[[instructions]] +name = "byte_and" +args = ["a", "b"] +output = "a & b" +description = "bitwise and" + +[[instructions]] +name = "byte_or" +args = ["a", "b"] +output = "a | b" +description = "bitwise or" + +[[instructions]] +name = "xor" +args = ["a", "b"] +output = "a xor b" +description = "bitwise exclusive or" + +[[instructions]] +name = "byte_not" +args = [] +output = "expr = !expr" +description = "bitwise not" + +[[instructions]] +name = "add" +args = ["a", "b"] +output = "a + b" +description = "numeric addition" + +[[instructions]] +name = "sub" +args = ["a", "b"] +output = "a - b" +description = "numeric subtraction" + +[[instructions]] +name = "mul" +args = ["a", "b"] +output = "a * b" +description = "numeric multiplication" + +[[instructions]] +name = "fdiv" +args = ["a", "b"] +output = "a / b" +description = "numeric FLOAT division" + +[[instructions]] +name = "idiv" +args = ["a", "b"] +output = "a / b" +description = "numeric INTEGER division" + +[[instructions]] +name = "pow" +args = ["a", "b"] +output = "a ^ b" +description = "numeric operation to raise a to the power of b" + +[[instructions]] +name = "modulo" +args = ["a", "b"] +output = "a % b" +description = "numeric modulo operation" + +[[instructions]] +name = "rem" +args = ["a", "b"] +output = "remainder from a / b" +description = "remainder from integer division" + +[[instructions]] +name = "inc" +args = ["src"] +output = "" +description = "increments number at source" + +[[instructions]] +name = "dec" +args = ["src"] +output = "" +description = "decrements number at source" + +[[instructions]] +name = "cton" +args = ["src"] +output = "" +description = "mutates a char datum into a number datum" + +[[instructions]] +name = "ntoc" +args = ["src"] +output = "" +description = "mutates a number datum into a char datum" + +[[instructions]] +name = "ntoi" +args = ["src"] +output = "" +description = "mutates a number datum into its exact form" + +[[instructions]] +name = "ntoe" +args = ["src"] +output = "" +description = "mutates a number datum into its inexact form" + +[[instructions]] +name = "mkvec" +args = [] +output = "a blank vector" +description = "creates a new vector" + +[[instructions]] +name = "mkbvec" +args = [] +output = "a blank bytevector" +description = "creates a blank bytevector" + +[[instructions]] +name = "index" +args = ["collection", "index"] +output = "collection[index]" +description = "extracts element from collection at index" + +[[instructions]] +name = "length" +args = ["collection"] +output = "length of collection" +description = "calculates length of collection" + +[[instructions]] +name = "subsl" +args = ["collection", "start", "end"] +output = "collection[start:end]" +description = "returns a subset from collection denoted by start and end indexes" + +[[instructions]] +name = "inser" +args = ["collection", "elem", "idx"] +output = "" +description = "inserts an element at specified index into a collection" + +[[instructions]] +name = "cons" +args = ["left", "right"] +output = "resulting collection" +description = "either append right to left or make new list from both" + +[[instructions]] +name = "car" +args = ["list"] +output = "returns first element in cons cell" +description = "takes an AST and returns first element in top level cons cell" + +[[instructions]] +name = "cdr" +args = ["list"] +output = "returns last element in cons cell" +description = "takes an AST and returns last element in top level cons cell" + diff --git a/mycelium/src/hmap.rs b/hyphae/src/hmap.rs similarity index 97% rename from mycelium/src/hmap.rs rename to hyphae/src/hmap.rs index 6b417e2..2705cd1 100755 --- a/mycelium/src/hmap.rs +++ b/hyphae/src/hmap.rs @@ -44,7 +44,7 @@ const INDEXED_BUCKETS: u8 = 199; * or more likely rip and replace with a better nostd hashmap */ #[inline] -fn string_hash(input: &String) -> u8 { +fn string_hash(input: &str) -> u8 { input .chars() // each letter and number get a digit @@ -82,7 +82,7 @@ impl<'a, T: Clone> QuickMap { return None; } - pub fn remove(&mut self, arg: &String) -> Option { + pub fn remove(&mut self, arg: &str) -> Option { let idx = string_hash(&arg); let len = self.0[idx as usize].0.len(); for i in 0..len { @@ -97,7 +97,7 @@ impl<'a, T: Clone> QuickMap { return None; } - pub fn contains_key(&self, arg: &String) -> bool { + pub fn contains_key(&self, arg: &str) -> bool { let idx = string_hash(arg); for kv in self.0[idx as usize].0.iter() { if &kv.0 == arg { diff --git a/hyphae/src/instr.rs b/hyphae/src/instr.rs new file mode 100644 index 0000000..b5010a8 --- /dev/null +++ b/hyphae/src/instr.rs @@ -0,0 +1,19 @@ +/* Mycelium Scheme + * Copyright (C) 2025 Ava Affine + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +include!(concat!(env!("OUT_DIR"), "/hyphae_instr.rs")); + diff --git a/hyphae/src/lib.rs b/hyphae/src/lib.rs new file mode 100644 index 0000000..4de7909 --- /dev/null +++ b/hyphae/src/lib.rs @@ -0,0 +1,26 @@ +/* Mycelium Scheme + * Copyright (C) 2025 Ava Affine + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#![cfg_attr(not(test), no_std)] + +pub mod hmap; +pub mod stackstack; +pub mod instr; +pub mod vm; +pub mod util; + +extern crate alloc; diff --git a/mycelium/src/stackstack.rs b/hyphae/src/stackstack.rs similarity index 98% rename from mycelium/src/stackstack.rs rename to hyphae/src/stackstack.rs index c772f99..db1ab3d 100644 --- a/mycelium/src/stackstack.rs +++ b/hyphae/src/stackstack.rs @@ -83,10 +83,10 @@ impl Debug for StackStack { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { let mut ss_idx = 1; let mut ss_cur = &*self.0; - while let Some(ref inner) = ss_cur { + while let Some(inner) = ss_cur { write!(f, "Frame {ss_idx}:")?; let mut s_cur = &*inner.stack.0; - while let Some(ref node) = s_cur { + while let Some(node) = s_cur { write!(f, " {:#?}", node.data)?; s_cur = &*node.next.0; } diff --git a/hyphae/src/util.rs b/hyphae/src/util.rs new file mode 100644 index 0000000..de902bb --- /dev/null +++ b/hyphae/src/util.rs @@ -0,0 +1,303 @@ +/* Mycelium Scheme + * Copyright (C) 2025 Ava Affine + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +use crate::instr::Operation; + +use alloc::vec::Vec; +use alloc::vec; + +use core::ops::Index; +use core::mem::transmute; + +#[repr(u8)] +#[derive(Debug, Clone, PartialEq)] +pub enum Address { + Stack = 0xf0, // immutable access only + Instr = 0xf1, // immutable access only + Expr = 0xf2, // mutable access allowed + Oper1 = 0xf3, // mutable access allowed + Oper2 = 0xf4, // mutable access allowed + Oper3 = 0xf5, // mutable access allowed + Oper4 = 0xf6, // mutable access allowed + Numer = 0xf8, // immutable access only +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Operand(pub Address, pub usize); + +#[derive(Debug, Clone, PartialEq)] +pub struct Instruction(pub Operation, pub Vec); + +#[derive(Debug, Clone, PartialEq)] +pub struct Program(pub Vec); + +impl Into for Address { + fn into(self) -> u8 { + unsafe { transmute::(self) } + } +} + +impl TryFrom for Address { + type Error = &'static str; + fn try_from(val: u8) -> Result { + match val { + _ if val == Address::Stack as u8 => Ok(Address::Stack), + _ if val == Address::Instr as u8 => Ok(Address::Instr), + _ if val == Address::Expr as u8 => Ok(Address::Expr), + _ if val == Address::Oper1 as u8 => Ok(Address::Oper1), + _ if val == Address::Oper2 as u8 => Ok(Address::Oper2), + _ if val == Address::Oper3 as u8 => Ok(Address::Oper3), + _ if val == Address::Oper4 as u8 => Ok(Address::Oper4), + _ if val == Address::Numer as u8 => Ok(Address::Numer), + _ => Err("illegal addressing mode") + } + } +} + +impl Address { + fn operand_size(&self) -> u8 { + match self { + Address::Stack => (usize::BITS / 8) as u8, + Address::Instr => (usize::BITS / 8) as u8, + Address::Numer => (usize::BITS / 8) as u8, + _ => 0, + } + } +} + +impl TryFrom<&[u8]> for Operand { + type Error = &'static str; + fn try_from(value: &[u8]) -> Result { + let addr_mode: Address = value[0].try_into()?; + let operand_size = addr_mode.operand_size(); + if value.len() < (operand_size + 1).into() { + return Err("truncated address data") + } + + let mut operand_bytes: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; + for (&src, dest) in value[1..(1+operand_size) as usize] + .iter() + .zip(operand_bytes.iter_mut()) { + *dest = src; + } + + Ok(Operand(addr_mode, usize::from_ne_bytes(operand_bytes))) + } +} + +impl Into> for Operand { + fn into(self) -> Vec { + let mut res = vec![]; + res.push(self.0.clone() as u8); + res.append(&mut self.1.to_ne_bytes()[..self.0.operand_size() as usize].to_vec()); + res + } +} + +impl Operand { + fn byte_length(&self) -> u8 { + 1 + self.0.operand_size() + } +} + +impl TryFrom<&[u8]> for Instruction { + type Error = &'static str; + fn try_from(value: &[u8]) -> Result { + let operation: Operation = value[0].try_into()?; + let mut operands: Vec = vec![]; + + let mut cur = 1; + for _ in 0..operation.num_args()? { + if cur >= value.len() { + return Err("operand data truncated") + } + let operand: Operand = value[cur..].try_into()?; + cur += operand.byte_length() as usize; + operands.push(operand); + } + + Ok(Instruction(operation, operands)) + } +} + +impl Into> for Instruction { + fn into(self) -> Vec { + let mut res = vec![]; + res.push(self.0.0); + for op in self.1 { + res.append(&mut op.into()) + } + res + } +} + +impl Instruction { + fn byte_length(&self) -> u8 { + self.1.iter() + .fold(0, |total, oper| + total + oper.byte_length()) + 1 + } +} + +impl TryFrom<&[u8]> for Program { + type Error = &'static str; + fn try_from(value: &[u8]) -> Result { + let mut prog: Vec = vec![]; + let mut cur = 0; + + while cur < value.len() { + let instruction: Instruction = value[cur..].try_into()?; + cur += instruction.byte_length() as usize; + prog.push(instruction); + } + + Ok(Program(prog)) + } +} + +impl Into> for Program { + fn into(self) -> Vec { + let mut res: Vec = vec![]; + for instr in self.0 { + res.append(&mut instr.into()) + } + res + } +} + +impl<'a> Index for Program { + type Output = Instruction; + fn index(&self, index: usize) -> &Instruction { + self.0.get(index).expect("access to out of bounds instruction in vm") + } +} + + +#[cfg(test)] +mod tests { + use crate::instr; + use super::*; + + #[test] + fn test_operand_parse() { + let bad_addressing = + TryInto::::try_into(&[0x13, 0x39][..]); + assert_eq!(bad_addressing, Err("illegal addressing mode")); + + let truncated_address = + TryInto::::try_into(&[0xf1][..]); + assert_eq!(truncated_address, Err("truncated address data")); + + let usize_case = + TryInto::::try_into(&[Address::Stack.into(), + 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23][..]); + assert!(usize_case.is_ok()); + assert_eq!(usize_case.unwrap().0, Address::Stack); + + let register_operand = Operand(Address::Expr, 0); + let operand_byte_arr = + TryInto::>::try_into(register_operand.clone()); + assert!(operand_byte_arr.is_ok()); + let br = operand_byte_arr.unwrap(); + let operand_bytes = br.as_slice(); + assert_eq!(operand_bytes, &[0xf2][..]); + let operand_conv = + TryInto::::try_into(operand_bytes); + assert!(operand_conv.is_ok()); + assert_eq!(register_operand, operand_conv.unwrap()); + } + + #[test] + fn test_instruction_parse() { + let illegal_instruction = + TryInto::::try_into(&[0x88][..]); + assert_eq!(illegal_instruction, Err("illegal instruction")); + + let bad_operand = + TryInto::::try_into(&[instr::TRAP.0, 0xf1][..]); + assert_eq!(bad_operand, Err("truncated address data")); + + let need_more_opers = + TryInto::::try_into(&[instr::TRAP.0][..]); + assert_eq!(need_more_opers, Err("operand data truncated")); + + let no_operands = + TryInto::::try_into(&[instr::POP.0][..]); + assert!(no_operands.is_ok()); + let nop = no_operands.unwrap(); + assert_eq!(nop.0, instr::POP); + let nop_bytes = + TryInto::>::try_into(nop); + assert!(nop_bytes.is_ok()); + assert_eq!(nop_bytes.unwrap(), vec![instr::POP.0]); + + let one_operand = + TryInto::::try_into(&[instr::TRAP.0, 0xf3][..]); + assert!(one_operand.is_ok()); + let oe_oper = one_operand.unwrap(); + assert_eq!(oe_oper.0, instr::TRAP); + assert_eq!(oe_oper.1.len(), 1); + assert_eq!(oe_oper.1[0], Operand(Address::Oper1, 0)); + let oe_bytes = + TryInto::>::try_into(oe_oper); + assert!(oe_bytes.is_ok()); + assert_eq!(oe_bytes.unwrap(), vec![instr::TRAP.0, 0xf3]); + + let two_operands = + TryInto::::try_into(&[instr::LOAD.0, 0xf3, 0xf4][..]); + assert!(two_operands.is_ok()); + let two_oper = two_operands.unwrap(); + assert_eq!(two_oper.0, instr::LOAD); + assert_eq!(two_oper.1.len(), 2); + let two_bytes = + TryInto::>::try_into(two_oper.clone()); + assert!(two_bytes.is_ok()); + assert_eq!(two_bytes.unwrap(), vec![instr::LOAD.0, 0xf3, 0xf4]); + assert_eq!(two_oper.1[0], Operand(Address::Oper1, 0)); + assert_eq!(two_oper.1[1], Operand(Address::Oper2, 0)); + } + + #[test] + fn test_program_parse() { + let bytes1 = [instr::LOAD.0, 0xf3, 0xf4]; + let out1 = vec![Instruction(instr::LOAD, + vec![Operand(Address::Oper1, 0), Operand(Address::Oper2, 0)])]; + let res1 = + TryInto::::try_into(&bytes1[..]); + assert!(res1.is_ok()); + assert_eq!(res1.unwrap().0, out1); + + let bytes2 = [ + instr::LOAD.0, 0xf3, 0xf4, + instr::CLEAR.0, 0xf0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + ]; + let out2 = vec![ + Instruction(instr::LOAD, vec![ + Operand(Address::Oper1, 0), + Operand(Address::Oper2, 0) + ]), + + Instruction(instr::CLEAR, vec![ + Operand(Address::Stack, 1) + ]) + ]; + let res2 = + TryInto::::try_into(&bytes2[..]); + assert!(res2.is_ok()); + assert_eq!(res2.unwrap().0, out2); + } +} diff --git a/hyphae/src/vm.rs b/hyphae/src/vm.rs new file mode 100644 index 0000000..089e1fd --- /dev/null +++ b/hyphae/src/vm.rs @@ -0,0 +1,460 @@ +/* Mycelium Scheme + * Copyright (C) 2025 Ava Affine + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +use mycelium::sexpr::Datum; +use mycelium::number::{Fraction, Number, Numeric}; + +use crate::hmap::QuickMap; +use crate::stackstack::StackStack; +use crate::instr as i; +use crate::util::{Operand, Program, Address}; + +use core::cell::RefCell; + +use alloc::vec; +use alloc::rc::Rc; +use alloc::vec::Vec; +use alloc::sync::Arc; +use alloc::borrow::ToOwned; + +use num::pow::Pow; + + +const NUM_OPERAND_REGISTERS: usize = 4; + +pub struct VM { + // execution environment + pub stack: StackStack, + pub symtab: QuickMap, + pub prog: Program, + pub fds: Vec, + pub traps: Vec>, + + // data registers + pub expr: Datum, + pub oper: [Datum; NUM_OPERAND_REGISTERS], + + // control flow registers + pub retn: usize, + pub ictr: usize, + pub errr: Datum, + + // state + pub running: bool, + pub err_state: bool, +} + +impl VM { + pub fn run_program(&mut self) { + if self.prog.0.len() < 1 { + self.running = false; + } + + while self.ictr < self.prog.0.len() { + if self.err_state || !self.running { + return; + } + + self.execute_instruction(); + self.ictr += 1; + } + + self.running = false; + } + + #[inline(always)] + fn execute_instruction(&mut self) { + let instr = &self.prog.0[self.ictr].clone(); + + macro_rules! e { + ( $err:expr ) => { + { + self.running = false; + self.err_state = true; + self.errr = Datum::String($err.as_bytes().to_vec()); + return; + } + } + } + + macro_rules! deref { + ( $oper:expr ) => { + match $oper.0 { + Address::Expr => &self.expr, + Address::Oper1 => &self.oper[0], + Address::Oper2 => &self.oper[1], + Address::Oper3 => &self.oper[2], + Address::Oper4 => &self.oper[3], + Address::Stack => &self.stack[$oper.1], + Address::Numer => e!("attempt to dereference constant numeric data"), + Address::Instr => e!("bad access to instruction data"), + } + } + } + + macro_rules! deref_mut { + ( $oper:expr ) => { + match $oper.0 { + Address::Expr => &mut self.expr, + Address::Oper1 => &mut self.oper[0], + Address::Oper2 => &mut self.oper[1], + Address::Oper3 => &mut self.oper[2], + Address::Oper4 => &mut self.oper[3], + Address::Instr => e!("bad mutable access to instruction data"), + // Stack, Numer + _ => e!("mutable access to immutable data"), + } + } + } + + macro_rules! do_jmp { + ( $idx:expr ) => { + let Operand(Address::Instr, target) = instr.1[$idx] else { + e!("illegal argument to jump"); + }; + + if target >= self.prog.0.len() { + e!("out of bounds jump caught"); + } + + self.ictr = target; + } + } + + macro_rules! lr_oper { + ( $in_type:ident, $oper:tt, $out_type:ident ) => { + self.expr = Datum::$out_type(*match deref!(&instr.1[0]){ + Datum::$in_type(l) => l, + _ => e!("illegal argument to instruction"), + } $oper *match deref!(&instr.1[1]){ + Datum::$in_type(l) => l, + _ => e!("illegal argument to instruction"), + }) + } + } + + match instr.0 { + i::TRAP => { + let Operand(Address::Numer, idx) = instr.1[0] else { + e!("illegal argument to TRAP instruction"); + }; + + if idx >= self.traps.len() { + e!("access to out of bounds trap!") + } + + self.traps[idx].clone()(self) + }, + + // symtable ops + i::BIND => { + let Datum::String(tag) = deref!(&instr.1[0]) else { + e!("illegal argument to BIND instruction"); + }; + let tag = unsafe { str::from_utf8_unchecked(&tag).to_owned() }; + self.symtab.insert(tag, instr.1[1].clone()); + }, + + i::UNBIND => { + let Datum::String(tag) = deref!(&instr.1[0]) else { + e!("illegal argument to UNBIND instruction"); + }; + let tag = unsafe { str::from_utf8_unchecked(&tag) }; + self.symtab.remove(&tag); + }, + + i::BOUND => { + let Datum::String(tag) = deref!(&instr.1[0]) else { + e!("illegal argument to BOUND instruction"); + }; + let tag = unsafe { str::from_utf8_unchecked(&tag) }; + self.symtab.contains_key(&tag); + }, + + // stack ops + i::PUSH => self.stack.push_current_stack(deref!(&instr.1[0]).clone()), + i::POP => _ = self.stack.pop_current_stack(), + i::ENTER => self.stack.add_stack(), + i::EXIT => self.stack.destroy_top_stack(), + + // movement ops + i::LOAD => *deref_mut!(&instr.1[1]) = deref!(&instr.1[0]).clone(), + i::CLEAR => *deref_mut!(&instr.1[0]) = Datum::None, + + // control flow ops + i::NOP => (), + i::HALT => self.running = false, + i::PANIC => { + self.running = false; + self.err_state = false; + self.errr = deref!(&instr.1[0]).clone() + }, + + i::JMP => { + do_jmp!(0); + }, + + i::JMPIF => { + if let Datum::Bool(true) = self.expr { + do_jmp!(0); + } + }, + + // boolean ops + i::EQ => self.expr = Datum::Bool(*deref!(&instr.1[0]) == *deref!(&instr.1[1])), + i::LT => lr_oper!(Number, <, Bool), + i::GT => lr_oper!(Number, >, Bool), + i::LTE => lr_oper!(Number, <=, Bool), + i::GTE => lr_oper!(Number, >=, Bool), + i::BOOL_NOT => { + self.expr = Datum::Bool(!{ + let Datum::Bool(a) = self.expr else { + e!("illegal argument to BOOL_NOT instruction"); + }; + a + }); + }, + + i::BOOL_AND => lr_oper!(Bool, &&, Bool), + i::BOOL_OR => lr_oper!(Bool, ||, Bool), + + // char / byte ops + i::BYTE_AND => lr_oper!(Char, &, Char), + i::BYTE_OR => lr_oper!(Char, |, Char), + i::XOR => lr_oper!(Char, ^, Char), + i::BYTE_NOT => { + self.expr = Datum::Char(!{ + let Datum::Char(a) = self.expr else { + e!("illegal argument to BYTE_NOT instruction"); + }; + a + }); + }, + + // numeric ops + i::ADD => lr_oper!(Number, +, Number), + i::SUB => lr_oper!(Number, -, Number), + i::MUL => lr_oper!(Number, *, Number), + i::FDIV => lr_oper!(Number, /, Number), + i::IDIV => { + let Datum::Number(l) = deref!(&instr.1[0]) else { + e!("illegal argument to IDIV instruction"); + }; + + let Datum::Number(r) = deref!(&instr.1[1]) else { + e!("illgal argument to IDIV instruction"); + }; + + let Fraction(l, 1) = l.make_exact() else { + e!("integer division on non integer value"); + }; + + let Fraction(r, 1) = r.make_exact() else { + e!("integer division on non integer value"); + }; + + self.expr = Datum::Number(Number::Fra(Fraction(l / r, 1))); + }, + + i::POW => { + let Datum::Number(l) = deref!(&instr.1[0]) else { + e!("illegal argument to POW instruction"); + }; + + let Datum::Number(r) = deref!(&instr.1[1]) else { + e!("illgal argument to POW instruction"); + }; + + self.expr = Datum::Number((*l).pow(*r)); + }, + + i::INC => if let Datum::Number(src) = deref_mut!(&instr.1[0]) { + *src = *src + Number::Fra(Fraction(1, 1)); + } else { + e!("illegal argument to INC instruction"); + }, + + i::DEC => if let Datum::Number(src) = deref_mut!(&instr.1[0]) { + *src = *src - Number::Fra(Fraction(1, 1)); + } else { + e!("illegal argument to INC instruction"); + }, + + // byte/char to and from number conversions + i::CTON => { + let src = deref_mut!(&instr.1[0]); + if let Datum::Char(schr) = src { + *src = Datum::Number(Number::Fra(Fraction(*schr as isize, 1))); + } else { + e!("illegal argument to CTON instruction"); + } + }, + + i::NTOC => { + let src = deref_mut!(&instr.1[0]); + if let Datum::Number(snum) = src { + let n = snum.make_inexact(); + if !snum.is_exact() || n.0.fract() != 0.0 || n.0 > u8::MAX.into() || n.0 < 0.0 { + e!("input to NTOC cannot cleanly convert"); + } + *src = Datum::Char(n.0.trunc() as u64 as u8); + + } else { + e!("illegal argument to NTOC instruction"); + } + }, + + i::MKVEC => self.expr = Datum::Vector(RefCell::from(vec![])), + i::MKBVEC => self.expr = Datum::ByteVector(RefCell::from(vec![])), + i::INDEX => { + let Datum::Number(idx) = deref!(&instr.1[1]) else { + e!("illegal argument to INDEX instruction"); + }; + let idx = idx.make_inexact(); + if !idx.is_exact() || idx.0.fract() != 0.0 { + e!("illegal argument to INDEX instruction"); + } + let idx = idx.0.trunc() as usize; + + match deref!(&instr.1[0]) { + Datum::Vector(v) => { + let a = (*v.borrow()[idx].clone()).clone(); + self.expr = a; + }, + Datum::ByteVector(bv) => { + let a = Datum::Char(bv.borrow()[idx]); + self.expr = a; + }, + Datum::List(l) => self.expr = l[idx].clone(), + _ => e!("illegal argument to INDEX instruction") + }; + }, + + i::LENGTH => match deref!(&instr.1[0]) { + Datum::Vector(v) => { + let a = Datum::Number(Number::Fra(Fraction(v.borrow().len() as isize, 1))); + self.expr = a; + }, + Datum::ByteVector(bv) => { + let a = Datum::Number(Number::Fra(Fraction(bv.borrow().len() as isize, 1))); + self.expr = a; + }, + Datum::List(l) => + self.expr = Datum::Number(Number::Fra(Fraction(l.len() as isize, 1))), + _ => e!("illegal argument to LENGTH instruction"), + }, + + i::SUBSL => { + let Datum::Number(st) = deref!(&instr.1[1]) else { + e!("illegal argument to SUBSL instruction"); + }; + + let Datum::Number(ed) = deref!(&instr.1[2]) else { + e!("illegal argument to SUBSL instruction"); + }; + + if !st.is_exact() || !ed.is_exact() { + e!("illegal argument to SUBSL instruction"); + } + + let st = st.make_inexact(); + let ed = ed.make_inexact(); + + if st.0.fract() != 0.0 || ed.0.fract() != 0.0 { + e!("SUBSL: FP precision error"); + } + + let st = st.0.trunc() as usize; + let ed = ed.0.trunc() as usize; + + match deref!(&instr.1[0]) { + Datum::Vector(v) => { + let a = Datum::Vector(RefCell::from(v.borrow()[st..ed].to_vec())); + self.expr = a; + }, + Datum::ByteVector(bv) => { + let a = Datum::ByteVector(RefCell::from(bv.borrow()[st..ed].to_vec())); + self.expr = a; + }, + Datum::List(a) => + self.expr = Datum::List(Rc::new( + (**a).subsl(st as isize, ed as isize))), + _ => e!("illegal argument to SUBSL instruction") + }; + } + + i::INSER => { + let Datum::Number(idx) = deref!(&instr.1[2]) else { + e!("illegal argument to INSER instruction"); + }; + + let idx = idx.make_inexact(); + if !idx.is_exact() || idx.0.fract() != 0.0 { + e!("illegal argument to INSER instruction"); + } + + let idx = idx.0.trunc() as usize; + + match deref!(&instr.1[0]) { + Datum::Vector(v) => { + v.borrow_mut().insert(idx, deref!(&instr.1[1]).clone().into()); + }, + Datum::ByteVector(bv) => { + let Datum::Char(b) = deref!(&instr.1[1]) else { + e!("INSER instruction can only insert a byte into a bytevector"); + }; + bv.borrow_mut().insert(idx, *b); + }, + _ => e!("illegal argument to INSER instruction") + } + }, + + i::CAR => { + let Datum::List(arg) = deref!(&instr.1[0]) else { + e!("illegal argument to CAR instruction"); + }; + + self.expr = (*arg.0).clone(); + }, + + i::CDR => { + let Datum::List(arg) = deref!(&instr.1[0]) else { + e!("illegal argument to CAR instruction"); + }; + + self.expr = (*arg.1).clone(); + }, + + i::CONS => { + /* CONS BEHAVIOR + * L Datum is not list means create a new standard form list + * L Datum is list then append the second element to the first + */ + }, + + // in order to maintain a language agnostic VM these must be traps + //i::PARSE => todo!("implement AST API"), + //i::EVAL => todo!("implement AST API"), + + _ => { + e!("illegal instruction"); + }, + }; + } +} + diff --git a/mycelium/src/lib.rs b/mycelium/src/lib.rs index eb3bc56..b68beca 100644 --- a/mycelium/src/lib.rs +++ b/mycelium/src/lib.rs @@ -25,7 +25,6 @@ pub mod sexpr; pub mod lexer; pub mod parser; pub mod number; -pub mod stackstack; -pub mod hmap; extern crate alloc; + diff --git a/mycelium/src/number.rs b/mycelium/src/number.rs index 845bce0..d9af991 100644 --- a/mycelium/src/number.rs +++ b/mycelium/src/number.rs @@ -529,7 +529,7 @@ impl Numeric for Float { if self.0.fract() == 0.0 { Fraction(self.0 as isize, 1) } else { - unimplemented!("insert rational approximation procedure here") + todo!("rational approximation implementation") } } } diff --git a/mycelium/src/parser.rs b/mycelium/src/parser.rs index 3d7959e..31dc0d5 100644 --- a/mycelium/src/parser.rs +++ b/mycelium/src/parser.rs @@ -16,6 +16,7 @@ */ use core::fmt::Display; +use core::cell::RefCell; use crate::lexer::{ LexError, @@ -414,11 +415,11 @@ impl Parser { } if is_bv { - return Ok(Rc::from(Datum::ByteVector(bv_stack))) + return Ok(Rc::from(Datum::ByteVector(RefCell::from(bv_stack)))) } if token.token_type == LexTokenType::VectorStart { - return Ok(Rc::from(Datum::Vector(lex_stack))) + return Ok(Rc::from(Datum::Vector(RefCell::from(lex_stack)))) } // handle an empty list diff --git a/mycelium/src/sexpr.rs b/mycelium/src/sexpr.rs index b478712..d9f9438 100644 --- a/mycelium/src/sexpr.rs +++ b/mycelium/src/sexpr.rs @@ -16,6 +16,9 @@ */ use core::fmt::{self, Formatter}; +use core::ops::Index; +use core::cell::RefCell; + use alloc::format; use alloc::rc::Rc; use alloc::vec::Vec; @@ -23,7 +26,7 @@ use alloc::string::String; use crate::number::Number; -#[derive(Default, Clone)] +#[derive(Default, Clone, PartialEq)] pub enum Datum { Number(Number), Bool(bool), @@ -31,8 +34,8 @@ pub enum Datum { Symbol(String), Char(u8), String(Vec), - Vector(Vec>), - ByteVector(Vec), + Vector(RefCell>>), + ByteVector(RefCell>), #[default] None, } @@ -45,7 +48,8 @@ fn byte_to_escaped_char(b: u8) -> String { } } -fn fmt_vec(v: &Vec) -> String { +fn fmt_vec(ve: &RefCell>) -> String { + let v = ve.borrow(); if v.len() == 0 { return String::new() } @@ -102,9 +106,46 @@ impl fmt::Debug for Datum { } -#[derive(Default, Clone)] +#[derive(Default, Clone, PartialEq)] pub struct Ast(pub Rc, pub Rc); +impl Ast { + pub fn subsl(&self, start: isize, end: isize) -> Ast { + if end - start == 1 { + return Ast(Rc::from(self[start as usize].clone()), Rc::from(Datum::None)) + } + + if end == 0 { + return Ast( + Rc::from((*(self.0)).clone()), + Rc::from(Datum::None) + ) + } + + let Datum::List(ref next) = *self.1 else { + panic!("index into improper list form") + }; + + if start <= 0 { + Ast( + Rc::from((*(self.0)).clone()), + Rc::from(Datum::List( + Rc::from(next.subsl(start - 1, end - 1)))) + ) + + } else { + next.subsl(start - 1, end - 1) + } + } + + pub fn len(&self) -> usize { + let Datum::List(ref next) = *self.1 else { + return 1 + }; + 1 + next.len() + } +} + impl Iterator for Ast { type Item = Rc; @@ -127,6 +168,25 @@ impl Iterator for Ast { } } +impl Index for Ast { + type Output = Datum; + fn index(&self, index: usize) -> &Self::Output { + if index == 0 { + if let Datum::None = *self.0 { + panic!("out of bounds indexing into AST") + } else { + self.0.as_ref() + } + } else { + let Datum::List(ref next) = *self.1 else { + panic!("out of bounds indexing into AST") + }; + + next.index(index - 1) + } + } +} + impl fmt::Display for Ast { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "({}", self.0)?; diff --git a/snippets/in_progress_numbers.rs b/snippets/in_progress_numbers.rs new file mode 100644 index 0000000..39333dc --- /dev/null +++ b/snippets/in_progress_numbers.rs @@ -0,0 +1,642 @@ +use alloc::boxed::Box; +use alloc::{vec, vec::Vec}; +use alloc::fmt::Debug; +use lexer::{E_TOO_MANY_DECIMALS, E_TOO_MANY_SLASH}; + +use core::cmp::Ordering; +use core::{fmt, u8}; +use core::ops::{Add, Div, Mul, Sub}; + + +pub const E_INCOMPREHENSIBLE: &str = "could not parse number literal"; +pub const E_POUND_TRUNCATED: &str = "pound sign implies additional input"; +pub const E_BASE_PARSE_FAIL: &str = "failed to parse explicit base literal"; +pub const E_UNKNOWN_CONTROL: &str = "unknown character in number literal"; +pub const E_EMPTY_INPUT: &str = "empty string cannot be a number"; + +const NUM_INF: &str = "+inf.0"; +const NUM_NEG_INF: &str = "-inf.0"; +const NUM_NAN: &str = "+nan.0"; +const NUM_NEG_NAN: &str = "-nan.0"; + +pub const NegativeFlag: u8 = 0b10000000; // positive value if off +pub const DecimalFlag: u8 = 0b01000000; // single integer if off +pub const FractionFlag: u8 = 0b00100000; // decimal if off +pub const ScientificFlag: u8 = 0b00010000; // requires a second flags byte +pub const InfiniteFlag: u8 = 0b00001000; // can be positive or negative +pub const NotANumberFlag: u8 = 0b00000100; // can be positive or negative because r7rs +pub const OverflownFlag: u8 = 0b00000010; // poisons exactness + + +/* NUMBER BYTES FORMAT + * Generally the format within the byte array operates like this + * (guaranteed header) 1. NumberFlags (u8) + * (for each integer) 2. Byte Length (u8) + * (for each integer) 3. N proceeding bytes of data + * + * If Scientific Notation is used the leading number may be a decimal. + * In this case, there will be three total numbers + * + * All numbers are big endian + */ +#[repr(transparent)] +#[derive(Clone, Debug, PartialEq)] +pub struct Number<'src> (pub &'src [u8]); + + +/* WARNING + * member functions tend to assume that number encoding is consistent + * use Number::is_valid() to double check numbers from unknown sources + * + * TODO: maybe mark raw-indexing member functions as unsafe + */ +impl Number<'_> { + #[inline(always)] + pub fn byte_length(&self) -> u8 { + if self.0[0] & (InfiniteFlag | NotANumberFlag) != 0 { + return 1; + } + + let mut len = self.0[1] + 2; + if self.0[0] & (DecimalFlag | FractionFlag | ScientificFlag) != 0 { + len += self.0[len as usize] + 1; + } + + if self.0[0] & ScientificFlag != 0 && + self.0[0] & DecimalFlag != 0 { + len += self.0[len as usize]; + } + + len + } + + pub fn is_valid(&self) -> bool { + let len = self.0.len(); + if len < 1 { + return false; + } + + let decimal = self.0[0] & DecimalFlag != 0; + let fraction = self.0[0] & FractionFlag != 0; + let scientific = self.0[0] & ScientificFlag != 0; + let overflown = self.0[0] & OverflownFlag != 0; + let infinite = self.0[0] & InfiniteFlag != 0; + let notanumber = self.0[0] & NotANumberFlag != 0; + + // check flags + if overflown { + return false + } + + if (decimal && fraction) || (scientific && fraction) { + return false + } + + if (infinite || notanumber) && + (decimal || fraction || scientific || len != 1) { + return false + } + + // at least 3 bytes for a single u8 + if len < 3 { + return false + } + + let mut cur = self.0[1] + 2; + if len < cur as usize { + return false + } + + if decimal || fraction || scientific { + if len < (cur + 1) as usize { + return false; + } + + cur += self.0[cur as usize]; + if len < (cur + 1) as usize { + return false; + } + } + + if scientific && decimal { + cur += 1; + if len < (cur + 1) as usize { + return false + } + + cur += self.0[cur as usize]; + if len < (cur + 1) as usize { + return false + } + } + + true + } + + #[inline(always)] + pub fn is_exact(&self) -> bool { + self.0[0] & ScientificFlag == 0 + } + + #[inline(always)] + pub fn make_exact_into(&self, dst:&mut Vec) { + // expand scientific notation else just direct copy + if self.0[0] & ScientificFlag != 0 { + self.normalize_scientific_into(dst); + return + } + + self.copy_into(dst); + } + + #[inline(always)] + pub fn make_inexact_into(&self, dst: &mut Vec) { + // basically just convert a fraction into an actual division + todo!() + } + + // use this so you dont have to worry about clone while casting + #[inline(always)] + pub fn copy_into(&self, dst: &mut Vec) { + for i in self.0 { + dst.push(*i) + } + } + + #[inline(always)] + pub fn normalize_scientific_into(&self, dst: &mut Vec) { + todo!() + } + + #[inline(always)] + pub fn simplify_fraction_in_place(&mut self) { + if self.0[0] & FractionFlag == 0 { + return + } + + // can technically do this in place + // each element of the fraction will only shrink + todo!() + } + + #[inline(always)] + pub fn from_str_into(src: &str, dst: &mut Vec) -> Result<(), &'static str> { + // handle symbolic values + match src { + NUM_INF => { + dst.push(0 as u8 | InfiniteFlag); + return Ok(()); + }, + + NUM_NEG_INF => { + dst.push(0 as u8 | NegativeFlag | InfiniteFlag); + return Ok(()); + }, + + NUM_NAN => { + dst.push(0 as u8 | NotANumberFlag); + return Ok(()); + }, + + NUM_NEG_NAN => { + dst.push(0 as u8 | NegativeFlag | NotANumberFlag); + return Ok(()); + }, + + _ => (), + } + + let mut ctrl_flags = 0 as u8; + let mut operands = vec![]; + let mut digits_per_byte = 3; // default to decimal encoding + let mut base = 0; + let mut iter = src.chars().peekable(); + + match iter.next() { + Some('+') => (), + Some('-') => { + ctrl_flags |= NegativeFlag; + }, + Some('#') => { + match iter.next() { + None => return Err(E_POUND_TRUNCATED), + Some('i') => /* force_inexact = true */ (), + Some('e') => /* force_exact = true */ (), + Some('x') => { digits_per_byte = 2; base = 16 }, + Some('d') => { digits_per_byte = 3; base = 10 }, + Some('o') => { digits_per_byte = 4; base = 8 }, + Some('b') => { digits_per_byte = 8; base = 2 }, + _ => return Err(E_UNKNOWN_CONTROL), + } + }, + Some(a) if a.is_digit(10) => (), + Some(_) => return Err(E_INCOMPREHENSIBLE), + None => return Err(E_EMPTY_INPUT), + } + + let mut ops_needed = 1; + if base != 10 { + // cant mix non-decimal base and other number representations + let mut len = 0 as u8; + while let Some(chunk) = { + let mut chk = vec![]; + for _ in 0..digits_per_byte { + if let Some(c) = iter.next() { + chk.push(c as u8) + } + } + if chk.len() < 1 { None } else { Some(chk) } + } { + let Ok(val) = u8::from_str_radix( + unsafe {str::from_utf8_unchecked(chunk.as_slice())}, base) else { + return Err(E_BASE_PARSE_FAIL) + }; + operands.push(val); + len += 1; + } + // integer numbers prepended with their length + operands.insert(0, len); + ops_needed -= 1; + + } else { + // just a decimal number, but could have a weird format + loop { + macro_rules! pack_operand { + () => { + let s = unsafe { str::from_utf8_unchecked(operands.as_slice()) }; + let f = usize::from_str_radix(&s, 10).expect("str cast"); + let f = f.to_be_bytes(); + operands.clear(); + + dst.push(f.len() as u8); + dst.append(&mut f.to_vec()); + ops_needed -= 1; + } + } + + match iter.next() { + Some(c) if c.is_digit(10) => { + operands.push(c as u8); + }, + + Some('.') => { + ops_needed += 1; + if ctrl_flags & (FractionFlag | ScientificFlag) != 0 { + return Err(E_INCOMPREHENSIBLE) + } + + if ctrl_flags & DecimalFlag != 0 { + return Err(E_TOO_MANY_DECIMALS) + } + + ctrl_flags |= DecimalFlag; + pack_operand!(); + }, + + Some('/') => { + ops_needed += 1; + if ctrl_flags & (DecimalFlag | ScientificFlag) != 0 { + return Err(E_INCOMPREHENSIBLE) + } + + if ctrl_flags & FractionFlag != 0 { + return Err(E_TOO_MANY_SLASH) + } + + ctrl_flags |= DecimalFlag; + pack_operand!(); + }, + + Some('e') => { + ops_needed += 1; + if ctrl_flags & FractionFlag != 0 { + return Err(E_INCOMPREHENSIBLE) + } + + ctrl_flags |= ScientificFlag; + let mut newctrl = 0 as u8; + + if let Some('-') = iter.peek() { + newctrl |= NegativeFlag; + } + + pack_operand!(); + dst.push(newctrl); + }, + + Some(_) => return Err(E_INCOMPREHENSIBLE), + + None => { + pack_operand!(); + break; + } + } + } + } + + if ops_needed != 0 { + return Err(E_INCOMPREHENSIBLE); + } + + dst.insert(0, ctrl_flags); + Number(dst.as_slice()).simplify_fraction_in_place(); + Ok(()) + } + + pub fn from_u8_into(src: u8, dst: &mut Vec) -> Number { + dst.push(0 as u8); + dst.push(src); + Number(dst.as_slice()) + } +} + +impl fmt::Display for Number<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // can implement after I finish division + todo!() + } +} + +impl<'a> From<&'a Box<[u8]>> for Number<'a> { + fn from(value: &'a Box<[u8]>) -> Self { + Number(value.as_ref()) + } +} + +impl<'a> From<&'a Vec> for Number<'a> { + fn from(value: &'a Vec) -> Self { + Number(value.as_slice()) + } +} + +impl<'a> From<&'a [u8]> for Number<'a> { + fn from(value: &'a [u8]) -> Self { + Number(value) + } +} + +impl<'a> Into<&'a [u8]> for Number<'a> { + fn into(self) -> &'a [u8] { + self.0 + } +} + +impl Add for Number<'_> { + type Output = Box<[u8]>; + fn add(self, rhs: Self) -> Self::Output { + todo!() + } +} + +impl Sub for Number<'_> { + type Output = Box<[u8]>; + fn sub(self, rhs: Self) -> Self::Output { + todo!() + } +} + +impl Mul for Number<'_> { + type Output = Box<[u8]>; + fn mul(self, rhs: Self) -> Self::Output { + todo!() + } +} + +impl Div for Number<'_> { + type Output = Box<[u8]>; + fn div(self, rhs: Self) -> Self::Output { + // divide unsigned integer by unsigned integer + // the inputs (lh and rh) start with length byte + // returns a decimal index + fn div_ints(lh: &[u8], rh: &[u8], dest: &mut Vec) -> u8 { + todo!() + } + + /* Options + * divide a single int by a single int + * - (make fraction) + * divide a fraction by a single int + * - (multiply denominator) + * divide a decimal by a single int + * - (divide straight through) + * divide a scientific note by a single int + * - divide the first num + * - multiply by however much is needed for ones place (like 3.5) + * - add or subtract from the second number accordingly + * + * divide a single int by a fraction + * - output denom * lh / numer + * divide a single int by a decimal + */ + todo!() + } +} + +impl PartialEq for Number<'_> { + fn eq(&self, other: &Number) -> bool { + todo!() + } +} + +impl PartialOrd for Number<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + todo!() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_number_tests() { + assert_eq!("1.3".parse::(), + Ok(Number::Flt(Float(1.3)))); + + assert_eq!("1".parse::(), + Ok(Number::Flt(Float(1 as f64)))); + + assert_eq!("1.3e3".parse::(), + Ok(Number::Sci(ScientificNotation(1.3, 3)))); + + assert_eq!("+1.3".parse::(), + Ok(Number::Flt(Float(1.3)))); + + assert_eq!("-1.3".parse::(), + Ok(Number::Flt(Float(-1.3)))); + + assert_eq!("#d234".parse::(), + Ok(Number::Flt(Float(234.0)))); + + assert_eq!("#o17".parse::(), + Ok(Number::Fra(Fraction(15, 1)))); + + assert_eq!("#xAA".parse::(), + Ok(Number::Fra(Fraction(170, 1)))); + + assert_eq!("#b101".parse::(), + Ok(Number::Flt(Float(5.0)))); + + assert_eq!("2/4".parse::(), + Ok(Number::Fra(Fraction(2, 4)))); + + assert_eq!("#e1/5".parse::(), + Ok(Number::Fra(Fraction(1, 5)))); + + assert_eq!("#i1/5".parse::(), + Ok(Number::Flt(Float(0.2)))); + + assert_eq!("#e1e1".parse::(), + Ok(Number::Sci(ScientificNotation(1.0, 1)))); + + assert_eq!("+inf.0".parse::(), + Ok(Number::Sym(SymbolicNumber::Inf))); + + assert_eq!("2e3".parse::(), + Ok(ScientificNotation(2.0, 3))); + + assert_eq!("0e1".parse::(), + Ok(ScientificNotation(0.0, 1))); + + assert_eq!("-1e34".parse::(), + Ok(ScientificNotation(-1.0, 34))); + + assert_eq!("3.3e3".parse::(), + Ok(ScientificNotation(3.3, 3))); + + assert_eq!("2".parse::(), + Err(E_SCIENTIFIC_E)); + + assert_eq!("2e2e2".parse::(), + Err(E_SCIENTIFIC_MULTI_E)); + + assert_eq!("2/3".parse::(), + Ok(Fraction(2, 3))); + + assert_eq!("0/1".parse::(), + Ok(Fraction(0, 1))); + + assert_eq!("-1/34".parse::(), + Ok(Fraction(-1, 34))); + + assert_eq!("2".parse::(), + Err(E_NO_DENOMINATOR)); + + assert_eq!("2/2/2".parse::(), + Err(E_MULTI_DENOMINATOR)); + + assert_eq!("2/0".parse::(), + Err(E_ZERO_DENOMINATOR)); + + assert_eq!("3.3/3".parse::(), + Err(E_NUMERATOR_PARSE_FAIL)); + } + + #[test] + fn test_number_addition_subtraction_cases() { + let cases = vec![ + vec!["1/5", "4/5", "1/1"], + vec!["1/5", "0.8", "1/1"], + vec!["1e1", "2.0", "12/1"], + vec!["1e1", "2/1", "12/1"], + vec!["1e1", "1/2", "10.5"], + ]; + + cases.iter().for_each(|case| { + println!("+ {:#?}", case); + let x = case[0].parse::().unwrap(); + let y = case[1].parse::().unwrap(); + let z = case[2].parse::().unwrap(); + + // test some mathematical properties + assert_eq!(x + y, z); + assert_eq!(x + y, y + x); + assert_eq!(z - x, y); + assert_eq!(x + y - x, y); + }); + + // theres no reason this should adhere to all the other rules + let x = "+inf.0".parse::().unwrap(); + let y = "1e1".parse::().unwrap(); + let z = "+inf.0".parse::().unwrap(); + assert_eq!(x + y, z); + } + + #[test] + fn test_number_multiplication_division_cases() { + let cases = vec![ + vec!["1/5", "5e0", "1/1"], + vec!["1/5", "5", "1/1"], + vec!["1/5", "2/1", "2/5"], + vec!["4.4", "1/2", "2.2"], + vec!["12.0", "1/2", "6/1"], + vec!["1e1", "2.0", "20/1"], + vec!["1e1", "2/1", "20/1"], + vec!["1e1", "1/2", "5/1"], + ]; + + cases.iter().for_each(|case| { + println!("+ {:#?}", case); + let x = case[0].parse::().unwrap(); + let y = case[1].parse::().unwrap(); + let z = case[2].parse::().unwrap(); + + // test some mathematical properties + assert_eq!(x * y, z); + assert_eq!(x * y, y * x); + assert_eq!(z / x, y); + assert_eq!(x * y / x, y); + }); + } + + #[test] + fn test_number_pow_cases() { + // TODO: add scientific notation cases + let cases = vec![ + vec!["2", "2", "4"], + vec!["2/1", "2/1", "4/1"], + vec!["2/1", "2/-1", "1/4"], + vec!["2/1", "2/2", "2/1"], + vec!["2/1", "2.0", "4/1"], + vec!["27/8", "2/-3", "4/9"] + ]; + + cases.iter().for_each(|case| { + println!("+ {:#?}", case); + let x = case[0].parse::().unwrap(); + let y = case[1].parse::().unwrap(); + let z = case[2].parse::().unwrap(); + assert_eq!(x.pow(y), z); + }); + } + + #[test] + fn test_number_ord_cases() { + // TODO: add more cases + let cases = vec![ + vec!["1/2", "1.0", "1e1"], + ]; + + cases.iter().for_each(|case| { + println!("+ {:#?}", case); + let x = case[0].parse::().unwrap(); + let y = case[1].parse::().unwrap(); + let z = case[2].parse::().unwrap(); + assert!(x < y); + assert!(y < z); + assert!(x < z); + }); + } + + #[test] + fn float_negative_exponent_case() { + if let Float(0.1) = "1e-1" + .parse::() + .unwrap() + .make_inexact() { + return + } + + assert!(false) + } +}