WIP HyphaeVM

This commit is a WORK IN PROGRESS for the base implementation of the
HyphaeVM. This will be squashed into a larger commit eventually when
the work of implementing the HyphaeVM is finished.

Do note the in progress number package implementation in snippets

Signed-off-by: Ava Affine <ava@sunnypup.io>
This commit is contained in:
Ava Apples Affine 2025-06-26 10:52:54 -07:00
parent 3a0a141738
commit 0476160ae3
17 changed files with 2065 additions and 17 deletions

View file

@ -27,7 +27,7 @@ unit-test-parser:
- cargo test parser - cargo test parser
unit-test-number-package: unit-test-number-package:
stage: test-backend stage: test-frontend
script: script:
- cargo test number - cargo test number
@ -40,3 +40,8 @@ unit-test-quickmap:
stage: test-backend stage: test-backend
script: script:
- cargo test hmap - cargo test hmap
unit-test-instruction-decoding:
stage: test-backend
script:
- cargo test util

117
Cargo.lock generated
View file

@ -112,18 +112,56 @@ dependencies = [
"mycelium", "mycelium",
] ]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "hashbrown"
version = "0.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.5.0" version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hyphae"
version = "0.1.0"
dependencies = [
"mycelium",
"num",
"serde",
"toml",
]
[[package]]
name = "indexmap"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]] [[package]]
name = "is_terminal_polyfill" name = "is_terminal_polyfill"
version = "1.70.1" version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "memchr"
version = "2.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]] [[package]]
name = "mycelium" name = "mycelium"
version = "0.1.0" version = "0.1.0"
@ -228,6 +266,35 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "serde"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_spanned"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.11.1" version = "0.11.1"
@ -245,6 +312,47 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "toml"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit",
]
[[package]]
name = "toml_datetime"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap",
"serde",
"serde_spanned",
"toml_datetime",
"toml_write",
"winnow",
]
[[package]]
name = "toml_write"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.18" version = "1.0.18"
@ -329,3 +437,12 @@ name = "windows_x86_64_msvc"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "winnow"
version = "0.7.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd"
dependencies = [
"memchr",
]

View file

@ -1,3 +1,3 @@
[workspace] [workspace]
resolver = "2" resolver = "2"
members = ["mycelium", "decomposer"] members = ["mycelium", "decomposer", "hyphae"]

12
hyphae/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "hyphae"
version = "0.1.0"
edition = "2024"
[dependencies]
mycelium = { path = "../mycelium" }
num = { version = "0.4.3", features = ["alloc"] }
[build-dependencies]
serde = { version = "1.0", features = ["alloc", "derive"] }
toml = "0.8.23"

109
hyphae/build.rs Normal file
View file

@ -0,0 +1,109 @@
use std::{env, fs};
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;
use serde::Deserialize;
#[derive(Deserialize)]
struct Document {
pub instructions: Vec<Instruction>,
}
// dont warn about unused fields in json instruction struct
#[allow(dead_code)]
#[derive(Deserialize)]
struct Instruction {
pub name: String,
pub args: Vec<String>,
pub output: String,
pub description: String,
}
fn main() {
let output_path = Path::new(&env::var("OUT_DIR").unwrap())
.join("hyphae_instr.rs");
let input = fs::read_to_string("instructions.toml")
.unwrap();
let mut output_file =
BufWriter::new(File::create(&output_path).unwrap());
let instruction_table: Document =
toml::from_str(&input)
.expect("hyphae: failed to parse instructions.toml");
let mut isa = "#[repr(transparent)]\n".to_owned();
isa += "#[derive(Clone, Debug, PartialEq)]\n";
isa += "pub struct Operation(pub u8);\n\n";
let mut isa_from_byte = "impl TryFrom<u8> for Operation {\n".to_owned();
isa_from_byte += " type Error = &'static str;\n";
isa_from_byte += " fn try_from(v: u8) -> Result<Self, Self::Error> {\n";
isa_from_byte += " match v {\n";
let mut isa_fromstr = "impl FromStr for Operation {\n".to_owned();
isa_fromstr += " type Err = &'static str;\n";
isa_fromstr += " fn from_str(v: &str) -> Result<Self, Self::Err> {\n";
isa_fromstr += " match v {\n";
let mut isa_from_str = "impl TryFrom<&str> for Operation {\n".to_owned();
isa_from_str += " type Error = &'static str;\n";
isa_from_str += " fn try_from(v: &str) -> Result<Self, Self::Error> {\n";
isa_from_str += " match v {\n";
let mut isa_num_args = "impl Operation {\n".to_owned();
isa_num_args += " pub fn num_args(&self) -> Result<u8, &'static str> {\n";
isa_num_args += " match self.0 {\n";
instruction_table.instructions.iter()
.enumerate()
.for_each(|(idx, instr)| {
let const_name = instr.name.to_ascii_uppercase();
isa += format!("pub const {}: Operation = Operation({});\n",
const_name, idx).as_str();
isa_from_byte += format!(" {} => Ok({}),\n", idx, const_name)
.as_str();
isa_from_str += format!(" \"{}\" => Ok({}),\n",
const_name, const_name).as_str();
isa_fromstr += format!(" \"{}\" => Ok({}),\n",
const_name, const_name).as_str();
isa_num_args += format!(" {} => Ok({}),\n", idx, instr.args.len())
.as_str()
});
isa_from_byte += " _ => Err(\"illegal instruction\"),\n";
isa_from_byte += " }\n";
isa_from_byte += " }\n";
isa_from_byte += "}\n\n";
isa_from_str += " _ => Err(\"illegal instruction\"),\n";
isa_from_str += " }\n";
isa_from_str += " }\n";
isa_from_str += "}\n\n";
isa_fromstr += " _ => Err(\"illegal instruction\"),\n";
isa_fromstr += " }\n";
isa_fromstr += " }\n";
isa_fromstr += "}\n\n";
isa_num_args += " _ => Err(\"illegal instruction\"),\n";
isa_num_args += " }\n";
isa_num_args += " }\n";
isa_num_args += "}\n\n";
isa += "\n";
isa += isa_from_byte.as_str();
isa += isa_from_str.as_str();
isa += isa_fromstr.as_str();
isa += isa_num_args.as_str();
write!(&mut output_file, "use core::str::FromStr;\n\n\n").unwrap();
write!(&mut output_file, "{}", isa).unwrap();
println!("cargo::rerun-if-changed=build.rs");
println!("cargo::rerun-if-changed=instructions.json");
}

302
hyphae/instructions.toml Normal file
View file

@ -0,0 +1,302 @@
# NOTE: keep libc out of this, thats what trap vector is for
# NOTE: to programmers: only registers allow mutable acess
[[instructions]]
name = "trap"
args = ["index"]
output = "result of function"
description = "triggers callback in trap vector at index"
[[instructions]]
name = "bind"
args = ["name", "operand"]
output = ""
description = "map name to operand in sym table."
[[instructions]]
name = "unbind"
args = ["name"]
output = ""
description = "remove name mapping from sym table."
[[instructions]]
name = "bound"
args = ["name"]
output = "expr = true if name is bound"
description = "test if a name is already bound"
[[instructions]]
name = "push"
args = ["operand"]
output = ""
description = "pushes operand onto stack."
[[instructions]]
name = "pop"
args = []
output = ""
description = "removes element at top of stack."
[[instructions]]
name = "enter"
args = []
output = ""
description = "create new stack frame"
[[instructions]]
name = "exit"
args = []
output = ""
description = "delete current stack frame"
[[instructions]]
name = "load"
args = ["src", "dest"]
output = ""
description = "copies src into dest"
[[instructions]]
name = "clear"
args = ["dest"]
output = ""
description = "clears dest"
[[instructions]]
name = "nop"
args = []
output = ""
description = "no operation"
[[instructions]]
name = "halt"
args = []
output = ""
description = "halts the VM"
[[instructions]]
name = "panic"
args = ["error"]
output = ""
description = "sets error state and halts VM"
[[instructions]]
name = "jmp"
args = ["addr"]
output = ""
description = "sets ictr register to addr"
[[instructions]]
name = "jmpif"
args = ["addr"]
output = ""
description = "if expr register holds true, sets ictr to addr"
[[instructions]]
name = "eq"
args = ["a", "b"]
output = "a == b"
description = "equality test"
[[instructions]]
name = "lt"
args = ["a", "b"]
output = "a < b"
description = "less than test"
[[instructions]]
name = "gt"
args = ["a", "b"]
output = "a > b"
description = "greater than test"
[[instructions]]
name = "lte"
args = ["a", "b"]
output = "a <= b"
description = "less than equals test"
[[instructions]]
name = "gte"
args = ["a", "b"]
output = "a >= b"
description = "greater than equals test"
[[instructions]]
name = "bool_not"
args = []
output = "expr = !expr"
description = "boolean not"
[[instructions]]
name = "bool_and"
args = ["a", "b"]
output = "a && b"
description = "boolean and"
[[instructions]]
name = "bool_or"
args = ["a", "b"]
output = "a || b"
description = "boolean or"
[[instructions]]
name = "byte_and"
args = ["a", "b"]
output = "a & b"
description = "bitwise and"
[[instructions]]
name = "byte_or"
args = ["a", "b"]
output = "a | b"
description = "bitwise or"
[[instructions]]
name = "xor"
args = ["a", "b"]
output = "a xor b"
description = "bitwise exclusive or"
[[instructions]]
name = "byte_not"
args = []
output = "expr = !expr"
description = "bitwise not"
[[instructions]]
name = "add"
args = ["a", "b"]
output = "a + b"
description = "numeric addition"
[[instructions]]
name = "sub"
args = ["a", "b"]
output = "a - b"
description = "numeric subtraction"
[[instructions]]
name = "mul"
args = ["a", "b"]
output = "a * b"
description = "numeric multiplication"
[[instructions]]
name = "fdiv"
args = ["a", "b"]
output = "a / b"
description = "numeric FLOAT division"
[[instructions]]
name = "idiv"
args = ["a", "b"]
output = "a / b"
description = "numeric INTEGER division"
[[instructions]]
name = "pow"
args = ["a", "b"]
output = "a ^ b"
description = "numeric operation to raise a to the power of b"
[[instructions]]
name = "modulo"
args = ["a", "b"]
output = "a % b"
description = "numeric modulo operation"
[[instructions]]
name = "rem"
args = ["a", "b"]
output = "remainder from a / b"
description = "remainder from integer division"
[[instructions]]
name = "inc"
args = ["src"]
output = ""
description = "increments number at source"
[[instructions]]
name = "dec"
args = ["src"]
output = ""
description = "decrements number at source"
[[instructions]]
name = "cton"
args = ["src"]
output = ""
description = "mutates a char datum into a number datum"
[[instructions]]
name = "ntoc"
args = ["src"]
output = ""
description = "mutates a number datum into a char datum"
[[instructions]]
name = "mkvec"
args = []
output = "a blank vector"
description = "creates a new vector"
[[instructions]]
name = "mkbvec"
args = []
output = "a blank bytevector"
description = "creates a blank bytevector"
[[instructions]]
name = "index"
args = ["collection", "index"]
output = "collection[index]"
description = "extracts element from collection at index"
[[instructions]]
name = "length"
args = ["collection"]
output = "length of collection"
description = "calculates length of collection"
[[instructions]]
name = "subsl"
args = ["collection", "start", "end"]
output = "collection[start:end]"
description = "returns a subset from collection denoted by start and end indexes"
[[instructions]]
name = "inser"
args = ["collection", "elem", "idx"]
output = ""
description = "inserts an element at specified index into a collection"
[[instructions]]
name = "cons"
args = ["left", "right"]
output = "resulting collection"
description = "either append right to left or make new list from both"
[[instructions]]
name = "car"
args = ["list"]
output = "returns first element in cons cell"
description = "takes an AST and returns first element in top level cons cell"
[[instructions]]
name = "cdr"
args = ["list"]
output = "returns last element in cons cell"
description = "takes an AST and returns last element in top level cons cell"
[[instructions]]
name = "parse"
args = ["input"]
output = "parses an AST from input string"
description = "calls parse and lex routine on an input string"
[[instructions]]
name = "eval"
args = []
output = "result of compiled instructions"
description = "expects AST in expr. compiles it to instructions and runs them"

View file

@ -44,7 +44,7 @@ const INDEXED_BUCKETS: u8 = 199;
* or more likely rip and replace with a better nostd hashmap * or more likely rip and replace with a better nostd hashmap
*/ */
#[inline] #[inline]
fn string_hash(input: &String) -> u8 { fn string_hash(input: &str) -> u8 {
input input
.chars() .chars()
// each letter and number get a digit // each letter and number get a digit
@ -82,7 +82,7 @@ impl<'a, T: Clone> QuickMap<T> {
return None; return None;
} }
pub fn remove(&mut self, arg: &String) -> Option<T> { pub fn remove(&mut self, arg: &str) -> Option<T> {
let idx = string_hash(&arg); let idx = string_hash(&arg);
let len = self.0[idx as usize].0.len(); let len = self.0[idx as usize].0.len();
for i in 0..len { for i in 0..len {
@ -97,7 +97,7 @@ impl<'a, T: Clone> QuickMap<T> {
return None; return None;
} }
pub fn contains_key(&self, arg: &String) -> bool { pub fn contains_key(&self, arg: &str) -> bool {
let idx = string_hash(arg); let idx = string_hash(arg);
for kv in self.0[idx as usize].0.iter() { for kv in self.0[idx as usize].0.iter() {
if &kv.0 == arg { if &kv.0 == arg {

19
hyphae/src/instr.rs Normal file
View file

@ -0,0 +1,19 @@
/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
include!(concat!(env!("OUT_DIR"), "/hyphae_instr.rs"));

26
hyphae/src/lib.rs Normal file
View file

@ -0,0 +1,26 @@
/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#![cfg_attr(not(test), no_std)]
pub mod hmap;
pub mod stackstack;
pub mod instr;
pub mod vm;
pub mod util;
extern crate alloc;

View file

@ -83,10 +83,10 @@ impl<T: Debug> Debug for StackStack<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let mut ss_idx = 1; let mut ss_idx = 1;
let mut ss_cur = &*self.0; let mut ss_cur = &*self.0;
while let Some(ref inner) = ss_cur { while let Some(inner) = ss_cur {
write!(f, "Frame {ss_idx}:")?; write!(f, "Frame {ss_idx}:")?;
let mut s_cur = &*inner.stack.0; let mut s_cur = &*inner.stack.0;
while let Some(ref node) = s_cur { while let Some(node) = s_cur {
write!(f, " {:#?}", node.data)?; write!(f, " {:#?}", node.data)?;
s_cur = &*node.next.0; s_cur = &*node.next.0;
} }

303
hyphae/src/util.rs Normal file
View file

@ -0,0 +1,303 @@
/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use crate::instr::Operation;
use alloc::vec::Vec;
use alloc::vec;
use core::ops::Index;
use core::mem::transmute;
#[repr(u8)]
#[derive(Debug, Clone, PartialEq)]
pub enum Address {
Stack = 0xf0, // immutable access only
Instr = 0xf1, // immutable access only
Expr = 0xf2, // mutable access allowed
Oper1 = 0xf3, // mutable access allowed
Oper2 = 0xf4, // mutable access allowed
Oper3 = 0xf5, // mutable access allowed
Oper4 = 0xf6, // mutable access allowed
Numer = 0xf8, // immutable access only
}
#[derive(Debug, Clone, PartialEq)]
pub struct Operand(pub Address, pub usize);
#[derive(Debug, Clone, PartialEq)]
pub struct Instruction(pub Operation, pub Vec<Operand>);
#[derive(Debug, Clone, PartialEq)]
pub struct Program(pub Vec<Instruction>);
impl Into<u8> for Address {
fn into(self) -> u8 {
unsafe { transmute::<Address, u8>(self) }
}
}
impl TryFrom<u8> for Address {
type Error = &'static str;
fn try_from(val: u8) -> Result<Self, Self::Error> {
match val {
_ if val == Address::Stack as u8 => Ok(Address::Stack),
_ if val == Address::Instr as u8 => Ok(Address::Instr),
_ if val == Address::Expr as u8 => Ok(Address::Expr),
_ if val == Address::Oper1 as u8 => Ok(Address::Oper1),
_ if val == Address::Oper2 as u8 => Ok(Address::Oper2),
_ if val == Address::Oper3 as u8 => Ok(Address::Oper3),
_ if val == Address::Oper4 as u8 => Ok(Address::Oper4),
_ if val == Address::Numer as u8 => Ok(Address::Numer),
_ => Err("illegal addressing mode")
}
}
}
impl Address {
fn operand_size(&self) -> u8 {
match self {
Address::Stack => (usize::BITS / 8) as u8,
Address::Instr => (usize::BITS / 8) as u8,
Address::Numer => (usize::BITS / 8) as u8,
_ => 0,
}
}
}
impl TryFrom<&[u8]> for Operand {
type Error = &'static str;
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
let addr_mode: Address = value[0].try_into()?;
let operand_size = addr_mode.operand_size();
if value.len() < (operand_size + 1).into() {
return Err("truncated address data")
}
let mut operand_bytes: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0];
for (&src, dest) in value[1..(1+operand_size) as usize]
.iter()
.zip(operand_bytes.iter_mut()) {
*dest = src;
}
Ok(Operand(addr_mode, usize::from_ne_bytes(operand_bytes)))
}
}
impl Into<Vec<u8>> for Operand {
fn into(self) -> Vec<u8> {
let mut res = vec![];
res.push(self.0.clone() as u8);
res.append(&mut self.1.to_ne_bytes()[..self.0.operand_size() as usize].to_vec());
res
}
}
impl Operand {
fn byte_length(&self) -> u8 {
1 + self.0.operand_size()
}
}
impl TryFrom<&[u8]> for Instruction {
type Error = &'static str;
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
let operation: Operation = value[0].try_into()?;
let mut operands: Vec<Operand> = vec![];
let mut cur = 1;
for _ in 0..operation.num_args()? {
if cur >= value.len() {
return Err("operand data truncated")
}
let operand: Operand = value[cur..].try_into()?;
cur += operand.byte_length() as usize;
operands.push(operand);
}
Ok(Instruction(operation, operands))
}
}
impl Into<Vec<u8>> for Instruction {
fn into(self) -> Vec<u8> {
let mut res = vec![];
res.push(self.0.0);
for op in self.1 {
res.append(&mut op.into())
}
res
}
}
impl Instruction {
fn byte_length(&self) -> u8 {
self.1.iter()
.fold(0, |total, oper|
total + oper.byte_length()) + 1
}
}
impl TryFrom<&[u8]> for Program {
type Error = &'static str;
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
let mut prog: Vec<Instruction> = vec![];
let mut cur = 0;
while cur < value.len() {
let instruction: Instruction = value[cur..].try_into()?;
cur += instruction.byte_length() as usize;
prog.push(instruction);
}
Ok(Program(prog))
}
}
impl Into<Vec<u8>> for Program {
fn into(self) -> Vec<u8> {
let mut res: Vec<u8> = vec![];
for instr in self.0 {
res.append(&mut instr.into())
}
res
}
}
impl<'a> Index<usize> for Program {
type Output = Instruction;
fn index(&self, index: usize) -> &Instruction {
self.0.get(index).expect("access to out of bounds instruction in vm")
}
}
#[cfg(test)]
mod tests {
use crate::instr;
use super::*;
#[test]
fn test_operand_parse() {
let bad_addressing =
TryInto::<Operand>::try_into(&[0x13, 0x39][..]);
assert_eq!(bad_addressing, Err("illegal addressing mode"));
let truncated_address =
TryInto::<Operand>::try_into(&[0xf1][..]);
assert_eq!(truncated_address, Err("truncated address data"));
let usize_case =
TryInto::<Operand>::try_into(&[Address::Stack.into(),
0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23][..]);
assert!(usize_case.is_ok());
assert_eq!(usize_case.unwrap().0, Address::Stack);
let register_operand = Operand(Address::Expr, 0);
let operand_byte_arr =
TryInto::<Vec<u8>>::try_into(register_operand.clone());
assert!(operand_byte_arr.is_ok());
let br = operand_byte_arr.unwrap();
let operand_bytes = br.as_slice();
assert_eq!(operand_bytes, &[0xf2][..]);
let operand_conv =
TryInto::<Operand>::try_into(operand_bytes);
assert!(operand_conv.is_ok());
assert_eq!(register_operand, operand_conv.unwrap());
}
#[test]
fn test_instruction_parse() {
let illegal_instruction =
TryInto::<Instruction>::try_into(&[0x88][..]);
assert_eq!(illegal_instruction, Err("illegal instruction"));
let bad_operand =
TryInto::<Instruction>::try_into(&[instr::TRAP.0, 0xf1][..]);
assert_eq!(bad_operand, Err("truncated address data"));
let need_more_opers =
TryInto::<Instruction>::try_into(&[instr::TRAP.0][..]);
assert_eq!(need_more_opers, Err("operand data truncated"));
let no_operands =
TryInto::<Instruction>::try_into(&[instr::POP.0][..]);
assert!(no_operands.is_ok());
let nop = no_operands.unwrap();
assert_eq!(nop.0, instr::POP);
let nop_bytes =
TryInto::<Vec<u8>>::try_into(nop);
assert!(nop_bytes.is_ok());
assert_eq!(nop_bytes.unwrap(), vec![instr::POP.0]);
let one_operand =
TryInto::<Instruction>::try_into(&[instr::TRAP.0, 0xf3][..]);
assert!(one_operand.is_ok());
let oe_oper = one_operand.unwrap();
assert_eq!(oe_oper.0, instr::TRAP);
assert_eq!(oe_oper.1.len(), 1);
assert_eq!(oe_oper.1[0], Operand(Address::Oper1, 0));
let oe_bytes =
TryInto::<Vec<u8>>::try_into(oe_oper);
assert!(oe_bytes.is_ok());
assert_eq!(oe_bytes.unwrap(), vec![instr::TRAP.0, 0xf3]);
let two_operands =
TryInto::<Instruction>::try_into(&[instr::LOAD.0, 0xf3, 0xf4][..]);
assert!(two_operands.is_ok());
let two_oper = two_operands.unwrap();
assert_eq!(two_oper.0, instr::LOAD);
assert_eq!(two_oper.1.len(), 2);
let two_bytes =
TryInto::<Vec<u8>>::try_into(two_oper.clone());
assert!(two_bytes.is_ok());
assert_eq!(two_bytes.unwrap(), vec![instr::LOAD.0, 0xf3, 0xf4]);
assert_eq!(two_oper.1[0], Operand(Address::Oper1, 0));
assert_eq!(two_oper.1[1], Operand(Address::Oper2, 0));
}
#[test]
fn test_program_parse() {
let bytes1 = [instr::LOAD.0, 0xf3, 0xf4];
let out1 = vec![Instruction(instr::LOAD,
vec![Operand(Address::Oper1, 0), Operand(Address::Oper2, 0)])];
let res1 =
TryInto::<Program>::try_into(&bytes1[..]);
assert!(res1.is_ok());
assert_eq!(res1.unwrap().0, out1);
let bytes2 = [
instr::LOAD.0, 0xf3, 0xf4,
instr::CLEAR.0, 0xf0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
];
let out2 = vec![
Instruction(instr::LOAD, vec![
Operand(Address::Oper1, 0),
Operand(Address::Oper2, 0)
]),
Instruction(instr::CLEAR, vec![
Operand(Address::Stack, 1)
])
];
let res2 =
TryInto::<Program>::try_into(&bytes2[..]);
assert!(res2.is_ok());
assert_eq!(res2.unwrap().0, out2);
}
}

453
hyphae/src/vm.rs Normal file
View file

@ -0,0 +1,453 @@
/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use mycelium::sexpr::Datum;
use mycelium::number::{Fraction, Number, Numeric};
use crate::hmap::QuickMap;
use crate::stackstack::StackStack;
use crate::instr as i;
use crate::util::{Operand, Program, Address};
use core::cell::RefCell;
use alloc::vec;
use alloc::rc::Rc;
use alloc::vec::Vec;
use alloc::sync::Arc;
use alloc::borrow::ToOwned;
use num::pow::Pow;
const NUM_OPERAND_REGISTERS: usize = 4;
pub struct VM {
// execution environment
pub stack: StackStack<Datum>,
pub symtab: QuickMap<Operand>,
pub prog: Program,
pub fds: Vec<u64>,
pub traps: Vec<Arc<dyn Fn(&mut VM)>>,
// data registers
pub expr: Datum,
pub oper: [Datum; NUM_OPERAND_REGISTERS],
// control flow registers
pub retn: usize,
pub ictr: usize,
pub errr: Datum,
// state
pub running: bool,
pub err_state: bool,
}
impl VM {
pub fn run_program(&mut self) {
if self.prog.0.len() < 1 {
self.running = false;
}
while self.ictr < self.prog.0.len() {
if self.err_state || !self.running {
return;
}
self.execute_instruction();
self.ictr += 1;
}
self.running = false;
}
#[inline(always)]
fn execute_instruction(&mut self) {
let instr = &self.prog.0[self.ictr].clone();
macro_rules! e {
( $err:expr ) => {
{
self.running = false;
self.err_state = true;
self.errr = Datum::String($err.as_bytes().to_vec());
return;
}
}
}
macro_rules! deref {
( $oper:expr ) => {
match $oper.0 {
Address::Expr => &self.expr,
Address::Oper1 => &self.oper[0],
Address::Oper2 => &self.oper[1],
Address::Oper3 => &self.oper[2],
Address::Oper4 => &self.oper[3],
Address::Stack => &self.stack[$oper.1],
Address::Numer => e!("attempt to dereference constant numeric data"),
Address::Instr => e!("bad access to instruction data"),
}
}
}
macro_rules! deref_mut {
( $oper:expr ) => {
match $oper.0 {
Address::Expr => &mut self.expr,
Address::Oper1 => &mut self.oper[0],
Address::Oper2 => &mut self.oper[1],
Address::Oper3 => &mut self.oper[2],
Address::Oper4 => &mut self.oper[3],
Address::Instr => e!("bad mutable access to instruction data"),
// Stack, Numer
_ => e!("mutable access to immutable data"),
}
}
}
macro_rules! do_jmp {
( $idx:expr ) => {
let Operand(Address::Instr, target) = instr.1[$idx] else {
e!("illegal argument to jump");
};
if target >= self.prog.0.len() {
e!("out of bounds jump caught");
}
self.ictr = target;
}
}
macro_rules! lr_oper {
( $in_type:ident, $oper:tt, $out_type:ident ) => {
self.expr = Datum::$out_type(*match deref!(&instr.1[0]){
Datum::$in_type(l) => l,
_ => e!("illegal argument to instruction"),
} $oper *match deref!(&instr.1[1]){
Datum::$in_type(l) => l,
_ => e!("illegal argument to instruction"),
})
}
}
match instr.0 {
i::TRAP => {
let Operand(Address::Numer, idx) = instr.1[0] else {
e!("illegal argument to TRAP instruction");
};
if idx >= self.traps.len() {
e!("access to out of bounds trap!")
}
self.traps[idx].clone()(self)
},
// symtable ops
i::BIND => {
let Datum::String(tag) = deref!(&instr.1[0]) else {
e!("illegal argument to BIND instruction");
};
let tag = unsafe { str::from_utf8_unchecked(&tag).to_owned() };
self.symtab.insert(tag, instr.1[1].clone());
},
i::UNBIND => {
let Datum::String(tag) = deref!(&instr.1[0]) else {
e!("illegal argument to UNBIND instruction");
};
let tag = unsafe { str::from_utf8_unchecked(&tag) };
self.symtab.remove(&tag);
},
i::BOUND => {
let Datum::String(tag) = deref!(&instr.1[0]) else {
e!("illegal argument to BOUND instruction");
};
let tag = unsafe { str::from_utf8_unchecked(&tag) };
self.symtab.contains_key(&tag);
},
// stack ops
i::PUSH => self.stack.push_current_stack(deref!(&instr.1[0]).clone()),
i::POP => _ = self.stack.pop_current_stack(),
i::ENTER => self.stack.add_stack(),
i::EXIT => self.stack.destroy_top_stack(),
// movement ops
i::LOAD => *deref_mut!(&instr.1[1]) = deref!(&instr.1[0]).clone(),
i::CLEAR => *deref_mut!(&instr.1[0]) = Datum::None,
// control flow ops
i::NOP => (),
i::HALT => self.running = false,
i::PANIC => {
self.running = false;
self.err_state = false;
self.errr = deref!(&instr.1[0]).clone()
},
i::JMP => {
do_jmp!(0);
},
i::JMPIF => {
if let Datum::Bool(true) = self.expr {
do_jmp!(0);
}
},
// boolean ops
i::EQ => self.expr = Datum::Bool(*deref!(&instr.1[0]) == *deref!(&instr.1[1])),
i::LT => lr_oper!(Number, <, Bool),
i::GT => lr_oper!(Number, >, Bool),
i::LTE => lr_oper!(Number, <=, Bool),
i::GTE => lr_oper!(Number, >=, Bool),
i::BOOL_NOT => {
self.expr = Datum::Bool(!{
let Datum::Bool(a) = self.expr else {
e!("illegal argument to BOOL_NOT instruction");
};
a
});
},
i::BOOL_AND => lr_oper!(Bool, &&, Bool),
i::BOOL_OR => lr_oper!(Bool, ||, Bool),
// char / byte ops
i::BYTE_AND => lr_oper!(Char, &, Char),
i::BYTE_OR => lr_oper!(Char, |, Char),
i::XOR => lr_oper!(Char, ^, Char),
i::BYTE_NOT => {
self.expr = Datum::Char(!{
let Datum::Char(a) = self.expr else {
e!("illegal argument to BYTE_NOT instruction");
};
a
});
},
// numeric ops
i::ADD => lr_oper!(Number, +, Number),
i::SUB => lr_oper!(Number, -, Number),
i::MUL => lr_oper!(Number, *, Number),
i::FDIV => lr_oper!(Number, /, Number),
i::IDIV => {
let Datum::Number(l) = deref!(&instr.1[0]) else {
e!("illegal argument to IDIV instruction");
};
let Datum::Number(r) = deref!(&instr.1[1]) else {
e!("illgal argument to IDIV instruction");
};
let Fraction(l, 1) = l.make_exact() else {
e!("integer division on non integer value");
};
let Fraction(r, 1) = r.make_exact() else {
e!("integer division on non integer value");
};
self.expr = Datum::Number(Number::Fra(Fraction(l / r, 1)));
},
i::POW => {
let Datum::Number(l) = deref!(&instr.1[0]) else {
e!("illegal argument to POW instruction");
};
let Datum::Number(r) = deref!(&instr.1[1]) else {
e!("illgal argument to POW instruction");
};
self.expr = Datum::Number((*l).pow(*r));
},
i::INC => if let Datum::Number(src) = deref_mut!(&instr.1[0]) {
*src = *src + Number::Fra(Fraction(1, 1));
} else {
e!("illegal argument to INC instruction");
},
i::DEC => if let Datum::Number(src) = deref_mut!(&instr.1[0]) {
*src = *src - Number::Fra(Fraction(1, 1));
} else {
e!("illegal argument to INC instruction");
},
// byte/char to and from number conversions
i::CTON => {
let src = deref_mut!(&instr.1[0]);
if let Datum::Char(schr) = src {
*src = Datum::Number(Number::Fra(Fraction(*schr as isize, 1)));
} else {
e!("illegal argument to CTON instruction");
}
},
i::NTOC => {
let src = deref_mut!(&instr.1[0]);
if let Datum::Number(snum) = src {
let n = snum.make_inexact();
if !snum.is_exact() || n.0.fract() != 0.0 || n.0 > u8::MAX.into() || n.0 < 0.0 {
e!("input to NTOC cannot cleanly convert");
}
*src = Datum::Char(n.0.trunc() as u64 as u8);
} else {
e!("illegal argument to NTOC instruction");
}
},
i::MKVEC => self.expr = Datum::Vector(RefCell::from(vec![])),
i::MKBVEC => self.expr = Datum::ByteVector(RefCell::from(vec![])),
i::INDEX => {
let Datum::Number(idx) = deref!(&instr.1[1]) else {
e!("illegal argument to INDEX instruction");
};
let idx = idx.make_inexact();
if !idx.is_exact() || idx.0.fract() != 0.0 {
e!("illegal argument to INDEX instruction");
}
let idx = idx.0.trunc() as usize;
match deref!(&instr.1[0]) {
Datum::Vector(v) => {
let a = (*v.borrow()[idx].clone()).clone();
self.expr = a;
},
Datum::ByteVector(bv) => {
let a = Datum::Char(bv.borrow()[idx]);
self.expr = a;
},
Datum::List(l) => self.expr = l[idx].clone(),
_ => e!("illegal argument to INDEX instruction")
};
},
i::LENGTH => match deref!(&instr.1[0]) {
Datum::Vector(v) => {
let a = Datum::Number(Number::Fra(Fraction(v.borrow().len() as isize, 1)));
self.expr = a;
},
Datum::ByteVector(bv) => {
let a = Datum::Number(Number::Fra(Fraction(bv.borrow().len() as isize, 1)));
self.expr = a;
},
Datum::List(l) =>
self.expr = Datum::Number(Number::Fra(Fraction(l.len() as isize, 1))),
_ => e!("illegal argument to LENGTH instruction"),
},
i::SUBSL => {
let Datum::Number(st) = deref!(&instr.1[1]) else {
e!("illegal argument to SUBSL instruction");
};
let Datum::Number(ed) = deref!(&instr.1[2]) else {
e!("illegal argument to SUBSL instruction");
};
if !st.is_exact() || !ed.is_exact() {
e!("illegal argument to SUBSL instruction");
}
let st = st.make_inexact();
let ed = ed.make_inexact();
if st.0.fract() != 0.0 || ed.0.fract() != 0.0 {
e!("SUBSL: FP precision error");
}
let st = st.0.trunc() as usize;
let ed = ed.0.trunc() as usize;
match deref!(&instr.1[0]) {
Datum::Vector(v) => {
let a = Datum::Vector(RefCell::from(v.borrow()[st..ed].to_vec()));
self.expr = a;
},
Datum::ByteVector(bv) => {
let a = Datum::ByteVector(RefCell::from(bv.borrow()[st..ed].to_vec()));
self.expr = a;
},
Datum::List(a) =>
self.expr = Datum::List(Rc::new(
(**a).subsl(st as isize, ed as isize))),
_ => e!("illegal argument to SUBSL instruction")
};
}
i::INSER => {
let Datum::Number(idx) = deref!(&instr.1[2]) else {
e!("illegal argument to INSER instruction");
};
let idx = idx.make_inexact();
if !idx.is_exact() || idx.0.fract() != 0.0 {
e!("illegal argument to INSER instruction");
}
let idx = idx.0.trunc() as usize;
match deref!(&instr.1[0]) {
Datum::Vector(v) => {
v.borrow_mut().insert(idx, deref!(&instr.1[1]).clone().into());
},
Datum::ByteVector(bv) => {
let Datum::Char(b) = deref!(&instr.1[1]) else {
e!("INSER instruction can only insert a byte into a bytevector");
};
bv.borrow_mut().insert(idx, *b);
},
_ => e!("illegal argument to INSER instruction")
}
},
i::CAR => {
let Datum::List(arg) = deref!(&instr.1[0]) else {
e!("illegal argument to CAR instruction");
};
self.expr = (*arg.0).clone();
},
i::CDR => {
let Datum::List(arg) = deref!(&instr.1[0]) else {
e!("illegal argument to CAR instruction");
};
self.expr = (*arg.0).clone();
},
i::CONS => todo!("implement AST API"),
i::PARSE => todo!("implement AST API"),
i::EVAL => todo!("implement AST API"),
_ => {
e!("illegal instruction");
},
};
}
}

View file

@ -25,7 +25,6 @@ pub mod sexpr;
pub mod lexer; pub mod lexer;
pub mod parser; pub mod parser;
pub mod number; pub mod number;
pub mod stackstack;
pub mod hmap;
extern crate alloc; extern crate alloc;

View file

@ -529,7 +529,7 @@ impl Numeric for Float {
if self.0.fract() == 0.0 { if self.0.fract() == 0.0 {
Fraction(self.0 as isize, 1) Fraction(self.0 as isize, 1)
} else { } else {
unimplemented!("insert rational approximation procedure here") todo!("rational approximation implementation")
} }
} }
} }

View file

@ -16,6 +16,7 @@
*/ */
use core::fmt::Display; use core::fmt::Display;
use core::cell::RefCell;
use crate::lexer::{ use crate::lexer::{
LexError, LexError,
@ -414,11 +415,11 @@ impl Parser {
} }
if is_bv { if is_bv {
return Ok(Rc::from(Datum::ByteVector(bv_stack))) return Ok(Rc::from(Datum::ByteVector(RefCell::from(bv_stack))))
} }
if token.token_type == LexTokenType::VectorStart { if token.token_type == LexTokenType::VectorStart {
return Ok(Rc::from(Datum::Vector(lex_stack))) return Ok(Rc::from(Datum::Vector(RefCell::from(lex_stack))))
} }
// handle an empty list // handle an empty list

View file

@ -16,6 +16,9 @@
*/ */
use core::fmt::{self, Formatter}; use core::fmt::{self, Formatter};
use core::ops::Index;
use core::cell::RefCell;
use alloc::format; use alloc::format;
use alloc::rc::Rc; use alloc::rc::Rc;
use alloc::vec::Vec; use alloc::vec::Vec;
@ -23,7 +26,7 @@ use alloc::string::String;
use crate::number::Number; use crate::number::Number;
#[derive(Default, Clone)] #[derive(Default, Clone, PartialEq)]
pub enum Datum { pub enum Datum {
Number(Number), Number(Number),
Bool(bool), Bool(bool),
@ -31,8 +34,8 @@ pub enum Datum {
Symbol(String), Symbol(String),
Char(u8), Char(u8),
String(Vec<u8>), String(Vec<u8>),
Vector(Vec<Rc<Datum>>), Vector(RefCell<Vec<Rc<Datum>>>),
ByteVector(Vec<u8>), ByteVector(RefCell<Vec<u8>>),
#[default] #[default]
None, None,
} }
@ -45,7 +48,8 @@ fn byte_to_escaped_char(b: u8) -> String {
} }
} }
fn fmt_vec<T: fmt::Display>(v: &Vec<T>) -> String { fn fmt_vec<T: fmt::Display>(ve: &RefCell<Vec<T>>) -> String {
let v = ve.borrow();
if v.len() == 0 { if v.len() == 0 {
return String::new() return String::new()
} }
@ -102,9 +106,46 @@ impl fmt::Debug for Datum {
} }
#[derive(Default, Clone)] #[derive(Default, Clone, PartialEq)]
pub struct Ast(pub Rc<Datum>, pub Rc<Datum>); pub struct Ast(pub Rc<Datum>, pub Rc<Datum>);
impl Ast {
pub fn subsl(&self, start: isize, end: isize) -> Ast {
if end - start == 1 {
return Ast(Rc::from(self[start as usize].clone()), Rc::from(Datum::None))
}
if end == 0 {
return Ast(
Rc::from((*(self.0)).clone()),
Rc::from(Datum::None)
)
}
let Datum::List(ref next) = *self.1 else {
panic!("index into improper list form")
};
if start <= 0 {
Ast(
Rc::from((*(self.0)).clone()),
Rc::from(Datum::List(
Rc::from(next.subsl(start - 1, end - 1))))
)
} else {
next.subsl(start - 1, end - 1)
}
}
pub fn len(&self) -> usize {
let Datum::List(ref next) = *self.1 else {
return 1
};
1 + next.len()
}
}
impl Iterator for Ast { impl Iterator for Ast {
type Item = Rc<Datum>; type Item = Rc<Datum>;
@ -127,6 +168,25 @@ impl Iterator for Ast {
} }
} }
impl Index<usize> for Ast {
type Output = Datum;
fn index(&self, index: usize) -> &Self::Output {
if index == 0 {
if let Datum::None = *self.0 {
panic!("out of bounds indexing into AST")
} else {
self.0.as_ref()
}
} else {
let Datum::List(ref next) = *self.1 else {
panic!("out of bounds indexing into AST")
};
next.index(index - 1)
}
}
}
impl fmt::Display for Ast { impl fmt::Display for Ast {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "({}", self.0)?; write!(f, "({}", self.0)?;

View file

@ -0,0 +1,642 @@
use alloc::boxed::Box;
use alloc::{vec, vec::Vec};
use alloc::fmt::Debug;
use lexer::{E_TOO_MANY_DECIMALS, E_TOO_MANY_SLASH};
use core::cmp::Ordering;
use core::{fmt, u8};
use core::ops::{Add, Div, Mul, Sub};
pub const E_INCOMPREHENSIBLE: &str = "could not parse number literal";
pub const E_POUND_TRUNCATED: &str = "pound sign implies additional input";
pub const E_BASE_PARSE_FAIL: &str = "failed to parse explicit base literal";
pub const E_UNKNOWN_CONTROL: &str = "unknown character in number literal";
pub const E_EMPTY_INPUT: &str = "empty string cannot be a number";
const NUM_INF: &str = "+inf.0";
const NUM_NEG_INF: &str = "-inf.0";
const NUM_NAN: &str = "+nan.0";
const NUM_NEG_NAN: &str = "-nan.0";
pub const NegativeFlag: u8 = 0b10000000; // positive value if off
pub const DecimalFlag: u8 = 0b01000000; // single integer if off
pub const FractionFlag: u8 = 0b00100000; // decimal if off
pub const ScientificFlag: u8 = 0b00010000; // requires a second flags byte
pub const InfiniteFlag: u8 = 0b00001000; // can be positive or negative
pub const NotANumberFlag: u8 = 0b00000100; // can be positive or negative because r7rs
pub const OverflownFlag: u8 = 0b00000010; // poisons exactness
/* NUMBER BYTES FORMAT
* Generally the format within the byte array operates like this
* (guaranteed header) 1. NumberFlags (u8)
* (for each integer) 2. Byte Length (u8)
* (for each integer) 3. N proceeding bytes of data
*
* If Scientific Notation is used the leading number may be a decimal.
* In this case, there will be three total numbers
*
* All numbers are big endian
*/
#[repr(transparent)]
#[derive(Clone, Debug, PartialEq)]
pub struct Number<'src> (pub &'src [u8]);
/* WARNING
* member functions tend to assume that number encoding is consistent
* use Number::is_valid() to double check numbers from unknown sources
*
* TODO: maybe mark raw-indexing member functions as unsafe
*/
impl Number<'_> {
#[inline(always)]
pub fn byte_length(&self) -> u8 {
if self.0[0] & (InfiniteFlag | NotANumberFlag) != 0 {
return 1;
}
let mut len = self.0[1] + 2;
if self.0[0] & (DecimalFlag | FractionFlag | ScientificFlag) != 0 {
len += self.0[len as usize] + 1;
}
if self.0[0] & ScientificFlag != 0 &&
self.0[0] & DecimalFlag != 0 {
len += self.0[len as usize];
}
len
}
pub fn is_valid(&self) -> bool {
let len = self.0.len();
if len < 1 {
return false;
}
let decimal = self.0[0] & DecimalFlag != 0;
let fraction = self.0[0] & FractionFlag != 0;
let scientific = self.0[0] & ScientificFlag != 0;
let overflown = self.0[0] & OverflownFlag != 0;
let infinite = self.0[0] & InfiniteFlag != 0;
let notanumber = self.0[0] & NotANumberFlag != 0;
// check flags
if overflown {
return false
}
if (decimal && fraction) || (scientific && fraction) {
return false
}
if (infinite || notanumber) &&
(decimal || fraction || scientific || len != 1) {
return false
}
// at least 3 bytes for a single u8
if len < 3 {
return false
}
let mut cur = self.0[1] + 2;
if len < cur as usize {
return false
}
if decimal || fraction || scientific {
if len < (cur + 1) as usize {
return false;
}
cur += self.0[cur as usize];
if len < (cur + 1) as usize {
return false;
}
}
if scientific && decimal {
cur += 1;
if len < (cur + 1) as usize {
return false
}
cur += self.0[cur as usize];
if len < (cur + 1) as usize {
return false
}
}
true
}
#[inline(always)]
pub fn is_exact(&self) -> bool {
self.0[0] & ScientificFlag == 0
}
#[inline(always)]
pub fn make_exact_into(&self, dst:&mut Vec<u8>) {
// expand scientific notation else just direct copy
if self.0[0] & ScientificFlag != 0 {
self.normalize_scientific_into(dst);
return
}
self.copy_into(dst);
}
#[inline(always)]
pub fn make_inexact_into(&self, dst: &mut Vec<u8>) {
// basically just convert a fraction into an actual division
todo!()
}
// use this so you dont have to worry about clone while casting
#[inline(always)]
pub fn copy_into(&self, dst: &mut Vec<u8>) {
for i in self.0 {
dst.push(*i)
}
}
#[inline(always)]
pub fn normalize_scientific_into(&self, dst: &mut Vec<u8>) {
todo!()
}
#[inline(always)]
pub fn simplify_fraction_in_place(&mut self) {
if self.0[0] & FractionFlag == 0 {
return
}
// can technically do this in place
// each element of the fraction will only shrink
todo!()
}
#[inline(always)]
pub fn from_str_into(src: &str, dst: &mut Vec<u8>) -> Result<(), &'static str> {
// handle symbolic values
match src {
NUM_INF => {
dst.push(0 as u8 | InfiniteFlag);
return Ok(());
},
NUM_NEG_INF => {
dst.push(0 as u8 | NegativeFlag | InfiniteFlag);
return Ok(());
},
NUM_NAN => {
dst.push(0 as u8 | NotANumberFlag);
return Ok(());
},
NUM_NEG_NAN => {
dst.push(0 as u8 | NegativeFlag | NotANumberFlag);
return Ok(());
},
_ => (),
}
let mut ctrl_flags = 0 as u8;
let mut operands = vec![];
let mut digits_per_byte = 3; // default to decimal encoding
let mut base = 0;
let mut iter = src.chars().peekable();
match iter.next() {
Some('+') => (),
Some('-') => {
ctrl_flags |= NegativeFlag;
},
Some('#') => {
match iter.next() {
None => return Err(E_POUND_TRUNCATED),
Some('i') => /* force_inexact = true */ (),
Some('e') => /* force_exact = true */ (),
Some('x') => { digits_per_byte = 2; base = 16 },
Some('d') => { digits_per_byte = 3; base = 10 },
Some('o') => { digits_per_byte = 4; base = 8 },
Some('b') => { digits_per_byte = 8; base = 2 },
_ => return Err(E_UNKNOWN_CONTROL),
}
},
Some(a) if a.is_digit(10) => (),
Some(_) => return Err(E_INCOMPREHENSIBLE),
None => return Err(E_EMPTY_INPUT),
}
let mut ops_needed = 1;
if base != 10 {
// cant mix non-decimal base and other number representations
let mut len = 0 as u8;
while let Some(chunk) = {
let mut chk = vec![];
for _ in 0..digits_per_byte {
if let Some(c) = iter.next() {
chk.push(c as u8)
}
}
if chk.len() < 1 { None } else { Some(chk) }
} {
let Ok(val) = u8::from_str_radix(
unsafe {str::from_utf8_unchecked(chunk.as_slice())}, base) else {
return Err(E_BASE_PARSE_FAIL)
};
operands.push(val);
len += 1;
}
// integer numbers prepended with their length
operands.insert(0, len);
ops_needed -= 1;
} else {
// just a decimal number, but could have a weird format
loop {
macro_rules! pack_operand {
() => {
let s = unsafe { str::from_utf8_unchecked(operands.as_slice()) };
let f = usize::from_str_radix(&s, 10).expect("str cast");
let f = f.to_be_bytes();
operands.clear();
dst.push(f.len() as u8);
dst.append(&mut f.to_vec());
ops_needed -= 1;
}
}
match iter.next() {
Some(c) if c.is_digit(10) => {
operands.push(c as u8);
},
Some('.') => {
ops_needed += 1;
if ctrl_flags & (FractionFlag | ScientificFlag) != 0 {
return Err(E_INCOMPREHENSIBLE)
}
if ctrl_flags & DecimalFlag != 0 {
return Err(E_TOO_MANY_DECIMALS)
}
ctrl_flags |= DecimalFlag;
pack_operand!();
},
Some('/') => {
ops_needed += 1;
if ctrl_flags & (DecimalFlag | ScientificFlag) != 0 {
return Err(E_INCOMPREHENSIBLE)
}
if ctrl_flags & FractionFlag != 0 {
return Err(E_TOO_MANY_SLASH)
}
ctrl_flags |= DecimalFlag;
pack_operand!();
},
Some('e') => {
ops_needed += 1;
if ctrl_flags & FractionFlag != 0 {
return Err(E_INCOMPREHENSIBLE)
}
ctrl_flags |= ScientificFlag;
let mut newctrl = 0 as u8;
if let Some('-') = iter.peek() {
newctrl |= NegativeFlag;
}
pack_operand!();
dst.push(newctrl);
},
Some(_) => return Err(E_INCOMPREHENSIBLE),
None => {
pack_operand!();
break;
}
}
}
}
if ops_needed != 0 {
return Err(E_INCOMPREHENSIBLE);
}
dst.insert(0, ctrl_flags);
Number(dst.as_slice()).simplify_fraction_in_place();
Ok(())
}
pub fn from_u8_into(src: u8, dst: &mut Vec<u8>) -> Number {
dst.push(0 as u8);
dst.push(src);
Number(dst.as_slice())
}
}
impl fmt::Display for Number<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// can implement after I finish division
todo!()
}
}
impl<'a> From<&'a Box<[u8]>> for Number<'a> {
fn from(value: &'a Box<[u8]>) -> Self {
Number(value.as_ref())
}
}
impl<'a> From<&'a Vec<u8>> for Number<'a> {
fn from(value: &'a Vec<u8>) -> Self {
Number(value.as_slice())
}
}
impl<'a> From<&'a [u8]> for Number<'a> {
fn from(value: &'a [u8]) -> Self {
Number(value)
}
}
impl<'a> Into<&'a [u8]> for Number<'a> {
fn into(self) -> &'a [u8] {
self.0
}
}
impl Add for Number<'_> {
type Output = Box<[u8]>;
fn add(self, rhs: Self) -> Self::Output {
todo!()
}
}
impl Sub for Number<'_> {
type Output = Box<[u8]>;
fn sub(self, rhs: Self) -> Self::Output {
todo!()
}
}
impl Mul for Number<'_> {
type Output = Box<[u8]>;
fn mul(self, rhs: Self) -> Self::Output {
todo!()
}
}
impl Div for Number<'_> {
type Output = Box<[u8]>;
fn div(self, rhs: Self) -> Self::Output {
// divide unsigned integer by unsigned integer
// the inputs (lh and rh) start with length byte
// returns a decimal index
fn div_ints(lh: &[u8], rh: &[u8], dest: &mut Vec<u8>) -> u8 {
todo!()
}
/* Options
* divide a single int by a single int
* - (make fraction)
* divide a fraction by a single int
* - (multiply denominator)
* divide a decimal by a single int
* - (divide straight through)
* divide a scientific note by a single int
* - divide the first num
* - multiply by however much is needed for ones place (like 3.5)
* - add or subtract from the second number accordingly
*
* divide a single int by a fraction
* - output denom * lh / numer
* divide a single int by a decimal
*/
todo!()
}
}
impl PartialEq for Number<'_> {
fn eq(&self, other: &Number) -> bool {
todo!()
}
}
impl PartialOrd for Number<'_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
todo!()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_number_tests() {
assert_eq!("1.3".parse::<Number>(),
Ok(Number::Flt(Float(1.3))));
assert_eq!("1".parse::<Number>(),
Ok(Number::Flt(Float(1 as f64))));
assert_eq!("1.3e3".parse::<Number>(),
Ok(Number::Sci(ScientificNotation(1.3, 3))));
assert_eq!("+1.3".parse::<Number>(),
Ok(Number::Flt(Float(1.3))));
assert_eq!("-1.3".parse::<Number>(),
Ok(Number::Flt(Float(-1.3))));
assert_eq!("#d234".parse::<Number>(),
Ok(Number::Flt(Float(234.0))));
assert_eq!("#o17".parse::<Number>(),
Ok(Number::Fra(Fraction(15, 1))));
assert_eq!("#xAA".parse::<Number>(),
Ok(Number::Fra(Fraction(170, 1))));
assert_eq!("#b101".parse::<Number>(),
Ok(Number::Flt(Float(5.0))));
assert_eq!("2/4".parse::<Number>(),
Ok(Number::Fra(Fraction(2, 4))));
assert_eq!("#e1/5".parse::<Number>(),
Ok(Number::Fra(Fraction(1, 5))));
assert_eq!("#i1/5".parse::<Number>(),
Ok(Number::Flt(Float(0.2))));
assert_eq!("#e1e1".parse::<Number>(),
Ok(Number::Sci(ScientificNotation(1.0, 1))));
assert_eq!("+inf.0".parse::<Number>(),
Ok(Number::Sym(SymbolicNumber::Inf)));
assert_eq!("2e3".parse::<Number>(),
Ok(ScientificNotation(2.0, 3)));
assert_eq!("0e1".parse::<Number>(),
Ok(ScientificNotation(0.0, 1)));
assert_eq!("-1e34".parse::<Number>(),
Ok(ScientificNotation(-1.0, 34)));
assert_eq!("3.3e3".parse::<Number>(),
Ok(ScientificNotation(3.3, 3)));
assert_eq!("2".parse::<Number>(),
Err(E_SCIENTIFIC_E));
assert_eq!("2e2e2".parse::<Number>(),
Err(E_SCIENTIFIC_MULTI_E));
assert_eq!("2/3".parse::<Number>(),
Ok(Fraction(2, 3)));
assert_eq!("0/1".parse::<Number>(),
Ok(Fraction(0, 1)));
assert_eq!("-1/34".parse::<Number>(),
Ok(Fraction(-1, 34)));
assert_eq!("2".parse::<Number>(),
Err(E_NO_DENOMINATOR));
assert_eq!("2/2/2".parse::<Number>(),
Err(E_MULTI_DENOMINATOR));
assert_eq!("2/0".parse::<Number>(),
Err(E_ZERO_DENOMINATOR));
assert_eq!("3.3/3".parse::<Number>(),
Err(E_NUMERATOR_PARSE_FAIL));
}
#[test]
fn test_number_addition_subtraction_cases() {
let cases = vec![
vec!["1/5", "4/5", "1/1"],
vec!["1/5", "0.8", "1/1"],
vec!["1e1", "2.0", "12/1"],
vec!["1e1", "2/1", "12/1"],
vec!["1e1", "1/2", "10.5"],
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
// test some mathematical properties
assert_eq!(x + y, z);
assert_eq!(x + y, y + x);
assert_eq!(z - x, y);
assert_eq!(x + y - x, y);
});
// theres no reason this should adhere to all the other rules
let x = "+inf.0".parse::<Number>().unwrap();
let y = "1e1".parse::<Number>().unwrap();
let z = "+inf.0".parse::<Number>().unwrap();
assert_eq!(x + y, z);
}
#[test]
fn test_number_multiplication_division_cases() {
let cases = vec![
vec!["1/5", "5e0", "1/1"],
vec!["1/5", "5", "1/1"],
vec!["1/5", "2/1", "2/5"],
vec!["4.4", "1/2", "2.2"],
vec!["12.0", "1/2", "6/1"],
vec!["1e1", "2.0", "20/1"],
vec!["1e1", "2/1", "20/1"],
vec!["1e1", "1/2", "5/1"],
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
// test some mathematical properties
assert_eq!(x * y, z);
assert_eq!(x * y, y * x);
assert_eq!(z / x, y);
assert_eq!(x * y / x, y);
});
}
#[test]
fn test_number_pow_cases() {
// TODO: add scientific notation cases
let cases = vec![
vec!["2", "2", "4"],
vec!["2/1", "2/1", "4/1"],
vec!["2/1", "2/-1", "1/4"],
vec!["2/1", "2/2", "2/1"],
vec!["2/1", "2.0", "4/1"],
vec!["27/8", "2/-3", "4/9"]
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
assert_eq!(x.pow(y), z);
});
}
#[test]
fn test_number_ord_cases() {
// TODO: add more cases
let cases = vec![
vec!["1/2", "1.0", "1e1"],
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
assert!(x < y);
assert!(y < z);
assert!(x < z);
});
}
#[test]
fn float_negative_exponent_case() {
if let Float(0.1) = "1e-1"
.parse::<Number>()
.unwrap()
.make_inexact() {
return
}
assert!(false)
}
}