HyphaeVM - WIP

This commit is a WORK IN PROGRESS for the base implementation of the
HyphaeVM. This will be squashed into a larger commit eventually when
the work of implementing the HyphaeVM is finished.

Of note, the ISA is mostly finished and much of the VM design is in
place. Yet to be done are a few traps in mycelium, migrating pieces
like the number package and the sexpr package into the VM package,
and of course much testing.

Signed-off-by: Ava Affine <ava@sunnypup.io>
This commit is contained in:
Ava Apples Affine 2025-06-26 10:52:54 -07:00
parent 3a0a141738
commit 4ad319213d
17 changed files with 2073 additions and 17 deletions

View file

@ -27,7 +27,7 @@ unit-test-parser:
- cargo test parser
unit-test-number-package:
stage: test-backend
stage: test-frontend
script:
- cargo test number
@ -40,3 +40,8 @@ unit-test-quickmap:
stage: test-backend
script:
- cargo test hmap
unit-test-instruction-decoding:
stage: test-backend
script:
- cargo test util

117
Cargo.lock generated
View file

@ -112,18 +112,56 @@ dependencies = [
"mycelium",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "hashbrown"
version = "0.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hyphae"
version = "0.1.0"
dependencies = [
"mycelium",
"num",
"serde",
"toml",
]
[[package]]
name = "indexmap"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "memchr"
version = "2.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
name = "mycelium"
version = "0.1.0"
@ -228,6 +266,35 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "serde"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_spanned"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
dependencies = [
"serde",
]
[[package]]
name = "strsim"
version = "0.11.1"
@ -245,6 +312,47 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "toml"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit",
]
[[package]]
name = "toml_datetime"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap",
"serde",
"serde_spanned",
"toml_datetime",
"toml_write",
"winnow",
]
[[package]]
name = "toml_write"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
[[package]]
name = "unicode-ident"
version = "1.0.18"
@ -329,3 +437,12 @@ name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "winnow"
version = "0.7.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd"
dependencies = [
"memchr",
]

View file

@ -1,3 +1,3 @@
[workspace]
resolver = "2"
members = ["mycelium", "decomposer"]
members = ["mycelium", "decomposer", "hyphae"]

12
hyphae/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "hyphae"
version = "0.1.0"
edition = "2024"
[dependencies]
mycelium = { path = "../mycelium" }
num = { version = "0.4.3", features = ["alloc"] }
[build-dependencies]
serde = { version = "1.0", features = ["alloc", "derive"] }
toml = "0.8.23"

109
hyphae/build.rs Normal file
View file

@ -0,0 +1,109 @@
use std::{env, fs};
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;
use serde::Deserialize;
#[derive(Deserialize)]
struct Document {
pub instructions: Vec<Instruction>,
}
// dont warn about unused fields in json instruction struct
#[allow(dead_code)]
#[derive(Deserialize)]
struct Instruction {
pub name: String,
pub args: Vec<String>,
pub output: String,
pub description: String,
}
fn main() {
let output_path = Path::new(&env::var("OUT_DIR").unwrap())
.join("hyphae_instr.rs");
let input = fs::read_to_string("instructions.toml")
.unwrap();
let mut output_file =
BufWriter::new(File::create(&output_path).unwrap());
let instruction_table: Document =
toml::from_str(&input)
.expect("hyphae: failed to parse instructions.toml");
let mut isa = "#[repr(transparent)]\n".to_owned();
isa += "#[derive(Clone, Debug, PartialEq)]\n";
isa += "pub struct Operation(pub u8);\n\n";
let mut isa_from_byte = "impl TryFrom<u8> for Operation {\n".to_owned();
isa_from_byte += " type Error = &'static str;\n";
isa_from_byte += " fn try_from(v: u8) -> Result<Self, Self::Error> {\n";
isa_from_byte += " match v {\n";
let mut isa_fromstr = "impl FromStr for Operation {\n".to_owned();
isa_fromstr += " type Err = &'static str;\n";
isa_fromstr += " fn from_str(v: &str) -> Result<Self, Self::Err> {\n";
isa_fromstr += " match v {\n";
let mut isa_from_str = "impl TryFrom<&str> for Operation {\n".to_owned();
isa_from_str += " type Error = &'static str;\n";
isa_from_str += " fn try_from(v: &str) -> Result<Self, Self::Error> {\n";
isa_from_str += " match v {\n";
let mut isa_num_args = "impl Operation {\n".to_owned();
isa_num_args += " pub fn num_args(&self) -> Result<u8, &'static str> {\n";
isa_num_args += " match self.0 {\n";
instruction_table.instructions.iter()
.enumerate()
.for_each(|(idx, instr)| {
let const_name = instr.name.to_ascii_uppercase();
isa += format!("pub const {}: Operation = Operation({});\n",
const_name, idx).as_str();
isa_from_byte += format!(" {} => Ok({}),\n", idx, const_name)
.as_str();
isa_from_str += format!(" \"{}\" => Ok({}),\n",
const_name, const_name).as_str();
isa_fromstr += format!(" \"{}\" => Ok({}),\n",
const_name, const_name).as_str();
isa_num_args += format!(" {} => Ok({}),\n", idx, instr.args.len())
.as_str()
});
isa_from_byte += " _ => Err(\"illegal instruction\"),\n";
isa_from_byte += " }\n";
isa_from_byte += " }\n";
isa_from_byte += "}\n\n";
isa_from_str += " _ => Err(\"illegal instruction\"),\n";
isa_from_str += " }\n";
isa_from_str += " }\n";
isa_from_str += "}\n\n";
isa_fromstr += " _ => Err(\"illegal instruction\"),\n";
isa_fromstr += " }\n";
isa_fromstr += " }\n";
isa_fromstr += "}\n\n";
isa_num_args += " _ => Err(\"illegal instruction\"),\n";
isa_num_args += " }\n";
isa_num_args += " }\n";
isa_num_args += "}\n\n";
isa += "\n";
isa += isa_from_byte.as_str();
isa += isa_from_str.as_str();
isa += isa_fromstr.as_str();
isa += isa_num_args.as_str();
write!(&mut output_file, "use core::str::FromStr;\n\n\n").unwrap();
write!(&mut output_file, "{}", isa).unwrap();
println!("cargo::rerun-if-changed=build.rs");
println!("cargo::rerun-if-changed=instructions.json");
}

303
hyphae/instructions.toml Normal file
View file

@ -0,0 +1,303 @@
# NOTE: keep libc out of this, thats what trap vector is for
# NOTE: to programmers: only registers allow mutable acess
[[instructions]]
name = "trap"
args = ["index"]
output = "result of function"
description = "triggers callback in trap vector at index"
[[instructions]]
name = "bind"
args = ["name", "operand"]
output = ""
description = "map name to operand in sym table."
[[instructions]]
name = "unbind"
args = ["name"]
output = ""
description = "remove name mapping from sym table."
[[instructions]]
name = "bound"
args = ["name"]
output = "expr = true if name is bound"
description = "test if a name is already bound"
[[instructions]]
name = "push"
args = ["operand"]
output = ""
description = "pushes operand onto stack."
[[instructions]]
name = "pop"
args = []
output = ""
description = "removes element at top of stack."
[[instructions]]
name = "enter"
args = []
output = ""
description = "create new stack frame"
[[instructions]]
name = "exit"
args = []
output = ""
description = "delete current stack frame"
[[instructions]]
name = "load"
args = ["src", "dest"]
output = ""
description = "copies src into dest"
[[instructions]]
name = "clear"
args = ["dest"]
output = ""
description = "clears dest"
[[instructions]]
name = "nop"
args = []
output = ""
description = "no operation"
[[instructions]]
name = "halt"
args = []
output = ""
description = "halts the VM"
[[instructions]]
name = "panic"
args = ["error"]
output = ""
description = "sets error state and halts VM"
[[instructions]]
name = "jmp"
args = ["addr"]
output = ""
description = "sets ictr register to addr"
[[instructions]]
name = "jmpif"
args = ["addr"]
output = ""
description = "if expr register holds true, sets ictr to addr"
[[instructions]]
name = "eq"
args = ["a", "b"]
output = "a == b"
description = "equality test"
[[instructions]]
name = "lt"
args = ["a", "b"]
output = "a < b"
description = "less than test"
[[instructions]]
name = "gt"
args = ["a", "b"]
output = "a > b"
description = "greater than test"
[[instructions]]
name = "lte"
args = ["a", "b"]
output = "a <= b"
description = "less than equals test"
[[instructions]]
name = "gte"
args = ["a", "b"]
output = "a >= b"
description = "greater than equals test"
[[instructions]]
name = "bool_not"
args = []
output = "expr = !expr"
description = "boolean not"
[[instructions]]
name = "bool_and"
args = ["a", "b"]
output = "a && b"
description = "boolean and"
[[instructions]]
name = "bool_or"
args = ["a", "b"]
output = "a || b"
description = "boolean or"
[[instructions]]
name = "byte_and"
args = ["a", "b"]
output = "a & b"
description = "bitwise and"
[[instructions]]
name = "byte_or"
args = ["a", "b"]
output = "a | b"
description = "bitwise or"
[[instructions]]
name = "xor"
args = ["a", "b"]
output = "a xor b"
description = "bitwise exclusive or"
[[instructions]]
name = "byte_not"
args = []
output = "expr = !expr"
description = "bitwise not"
[[instructions]]
name = "add"
args = ["a", "b"]
output = "a + b"
description = "numeric addition"
[[instructions]]
name = "sub"
args = ["a", "b"]
output = "a - b"
description = "numeric subtraction"
[[instructions]]
name = "mul"
args = ["a", "b"]
output = "a * b"
description = "numeric multiplication"
[[instructions]]
name = "fdiv"
args = ["a", "b"]
output = "a / b"
description = "numeric FLOAT division"
[[instructions]]
name = "idiv"
args = ["a", "b"]
output = "a / b"
description = "numeric INTEGER division"
[[instructions]]
name = "pow"
args = ["a", "b"]
output = "a ^ b"
description = "numeric operation to raise a to the power of b"
[[instructions]]
name = "modulo"
args = ["a", "b"]
output = "a % b"
description = "numeric modulo operation"
[[instructions]]
name = "rem"
args = ["a", "b"]
output = "remainder from a / b"
description = "remainder from integer division"
[[instructions]]
name = "inc"
args = ["src"]
output = ""
description = "increments number at source"
[[instructions]]
name = "dec"
args = ["src"]
output = ""
description = "decrements number at source"
[[instructions]]
name = "cton"
args = ["src"]
output = ""
description = "mutates a char datum into a number datum"
[[instructions]]
name = "ntoc"
args = ["src"]
output = ""
description = "mutates a number datum into a char datum"
[[instructions]]
name = "ntoi"
args = ["src"]
output = ""
description = "mutates a number datum into its exact form"
[[instructions]]
name = "ntoe"
args = ["src"]
output = ""
description = "mutates a number datum into its inexact form"
[[instructions]]
name = "mkvec"
args = []
output = "a blank vector"
description = "creates a new vector"
[[instructions]]
name = "mkbvec"
args = []
output = "a blank bytevector"
description = "creates a blank bytevector"
[[instructions]]
name = "index"
args = ["collection", "index"]
output = "collection[index]"
description = "extracts element from collection at index"
[[instructions]]
name = "length"
args = ["collection"]
output = "length of collection"
description = "calculates length of collection"
[[instructions]]
name = "subsl"
args = ["collection", "start", "end"]
output = "collection[start:end]"
description = "returns a subset from collection denoted by start and end indexes"
[[instructions]]
name = "inser"
args = ["collection", "elem", "idx"]
output = ""
description = "inserts an element at specified index into a collection"
[[instructions]]
name = "cons"
args = ["left", "right"]
output = "resulting collection"
description = "either append right to left or make new list from both"
[[instructions]]
name = "car"
args = ["list"]
output = "returns first element in cons cell"
description = "takes an AST and returns first element in top level cons cell"
[[instructions]]
name = "cdr"
args = ["list"]
output = "returns last element in cons cell"
description = "takes an AST and returns last element in top level cons cell"

View file

@ -44,7 +44,7 @@ const INDEXED_BUCKETS: u8 = 199;
* or more likely rip and replace with a better nostd hashmap
*/
#[inline]
fn string_hash(input: &String) -> u8 {
fn string_hash(input: &str) -> u8 {
input
.chars()
// each letter and number get a digit
@ -82,7 +82,7 @@ impl<'a, T: Clone> QuickMap<T> {
return None;
}
pub fn remove(&mut self, arg: &String) -> Option<T> {
pub fn remove(&mut self, arg: &str) -> Option<T> {
let idx = string_hash(&arg);
let len = self.0[idx as usize].0.len();
for i in 0..len {
@ -97,7 +97,7 @@ impl<'a, T: Clone> QuickMap<T> {
return None;
}
pub fn contains_key(&self, arg: &String) -> bool {
pub fn contains_key(&self, arg: &str) -> bool {
let idx = string_hash(arg);
for kv in self.0[idx as usize].0.iter() {
if &kv.0 == arg {

19
hyphae/src/instr.rs Normal file
View file

@ -0,0 +1,19 @@
/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
include!(concat!(env!("OUT_DIR"), "/hyphae_instr.rs"));

26
hyphae/src/lib.rs Normal file
View file

@ -0,0 +1,26 @@
/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#![cfg_attr(not(test), no_std)]
pub mod hmap;
pub mod stackstack;
pub mod instr;
pub mod vm;
pub mod util;
extern crate alloc;

View file

@ -83,10 +83,10 @@ impl<T: Debug> Debug for StackStack<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let mut ss_idx = 1;
let mut ss_cur = &*self.0;
while let Some(ref inner) = ss_cur {
while let Some(inner) = ss_cur {
write!(f, "Frame {ss_idx}:")?;
let mut s_cur = &*inner.stack.0;
while let Some(ref node) = s_cur {
while let Some(node) = s_cur {
write!(f, " {:#?}", node.data)?;
s_cur = &*node.next.0;
}

303
hyphae/src/util.rs Normal file
View file

@ -0,0 +1,303 @@
/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use crate::instr::Operation;
use alloc::vec::Vec;
use alloc::vec;
use core::ops::Index;
use core::mem::transmute;
#[repr(u8)]
#[derive(Debug, Clone, PartialEq)]
pub enum Address {
Stack = 0xf0, // immutable access only
Instr = 0xf1, // immutable access only
Expr = 0xf2, // mutable access allowed
Oper1 = 0xf3, // mutable access allowed
Oper2 = 0xf4, // mutable access allowed
Oper3 = 0xf5, // mutable access allowed
Oper4 = 0xf6, // mutable access allowed
Numer = 0xf8, // immutable access only
}
#[derive(Debug, Clone, PartialEq)]
pub struct Operand(pub Address, pub usize);
#[derive(Debug, Clone, PartialEq)]
pub struct Instruction(pub Operation, pub Vec<Operand>);
#[derive(Debug, Clone, PartialEq)]
pub struct Program(pub Vec<Instruction>);
impl Into<u8> for Address {
fn into(self) -> u8 {
unsafe { transmute::<Address, u8>(self) }
}
}
impl TryFrom<u8> for Address {
type Error = &'static str;
fn try_from(val: u8) -> Result<Self, Self::Error> {
match val {
_ if val == Address::Stack as u8 => Ok(Address::Stack),
_ if val == Address::Instr as u8 => Ok(Address::Instr),
_ if val == Address::Expr as u8 => Ok(Address::Expr),
_ if val == Address::Oper1 as u8 => Ok(Address::Oper1),
_ if val == Address::Oper2 as u8 => Ok(Address::Oper2),
_ if val == Address::Oper3 as u8 => Ok(Address::Oper3),
_ if val == Address::Oper4 as u8 => Ok(Address::Oper4),
_ if val == Address::Numer as u8 => Ok(Address::Numer),
_ => Err("illegal addressing mode")
}
}
}
impl Address {
fn operand_size(&self) -> u8 {
match self {
Address::Stack => (usize::BITS / 8) as u8,
Address::Instr => (usize::BITS / 8) as u8,
Address::Numer => (usize::BITS / 8) as u8,
_ => 0,
}
}
}
impl TryFrom<&[u8]> for Operand {
type Error = &'static str;
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
let addr_mode: Address = value[0].try_into()?;
let operand_size = addr_mode.operand_size();
if value.len() < (operand_size + 1).into() {
return Err("truncated address data")
}
let mut operand_bytes: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0];
for (&src, dest) in value[1..(1+operand_size) as usize]
.iter()
.zip(operand_bytes.iter_mut()) {
*dest = src;
}
Ok(Operand(addr_mode, usize::from_ne_bytes(operand_bytes)))
}
}
impl Into<Vec<u8>> for Operand {
fn into(self) -> Vec<u8> {
let mut res = vec![];
res.push(self.0.clone() as u8);
res.append(&mut self.1.to_ne_bytes()[..self.0.operand_size() as usize].to_vec());
res
}
}
impl Operand {
fn byte_length(&self) -> u8 {
1 + self.0.operand_size()
}
}
impl TryFrom<&[u8]> for Instruction {
type Error = &'static str;
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
let operation: Operation = value[0].try_into()?;
let mut operands: Vec<Operand> = vec![];
let mut cur = 1;
for _ in 0..operation.num_args()? {
if cur >= value.len() {
return Err("operand data truncated")
}
let operand: Operand = value[cur..].try_into()?;
cur += operand.byte_length() as usize;
operands.push(operand);
}
Ok(Instruction(operation, operands))
}
}
impl Into<Vec<u8>> for Instruction {
fn into(self) -> Vec<u8> {
let mut res = vec![];
res.push(self.0.0);
for op in self.1 {
res.append(&mut op.into())
}
res
}
}
impl Instruction {
fn byte_length(&self) -> u8 {
self.1.iter()
.fold(0, |total, oper|
total + oper.byte_length()) + 1
}
}
impl TryFrom<&[u8]> for Program {
type Error = &'static str;
fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
let mut prog: Vec<Instruction> = vec![];
let mut cur = 0;
while cur < value.len() {
let instruction: Instruction = value[cur..].try_into()?;
cur += instruction.byte_length() as usize;
prog.push(instruction);
}
Ok(Program(prog))
}
}
impl Into<Vec<u8>> for Program {
fn into(self) -> Vec<u8> {
let mut res: Vec<u8> = vec![];
for instr in self.0 {
res.append(&mut instr.into())
}
res
}
}
impl<'a> Index<usize> for Program {
type Output = Instruction;
fn index(&self, index: usize) -> &Instruction {
self.0.get(index).expect("access to out of bounds instruction in vm")
}
}
#[cfg(test)]
mod tests {
use crate::instr;
use super::*;
#[test]
fn test_operand_parse() {
let bad_addressing =
TryInto::<Operand>::try_into(&[0x13, 0x39][..]);
assert_eq!(bad_addressing, Err("illegal addressing mode"));
let truncated_address =
TryInto::<Operand>::try_into(&[0xf1][..]);
assert_eq!(truncated_address, Err("truncated address data"));
let usize_case =
TryInto::<Operand>::try_into(&[Address::Stack.into(),
0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23][..]);
assert!(usize_case.is_ok());
assert_eq!(usize_case.unwrap().0, Address::Stack);
let register_operand = Operand(Address::Expr, 0);
let operand_byte_arr =
TryInto::<Vec<u8>>::try_into(register_operand.clone());
assert!(operand_byte_arr.is_ok());
let br = operand_byte_arr.unwrap();
let operand_bytes = br.as_slice();
assert_eq!(operand_bytes, &[0xf2][..]);
let operand_conv =
TryInto::<Operand>::try_into(operand_bytes);
assert!(operand_conv.is_ok());
assert_eq!(register_operand, operand_conv.unwrap());
}
#[test]
fn test_instruction_parse() {
let illegal_instruction =
TryInto::<Instruction>::try_into(&[0x88][..]);
assert_eq!(illegal_instruction, Err("illegal instruction"));
let bad_operand =
TryInto::<Instruction>::try_into(&[instr::TRAP.0, 0xf1][..]);
assert_eq!(bad_operand, Err("truncated address data"));
let need_more_opers =
TryInto::<Instruction>::try_into(&[instr::TRAP.0][..]);
assert_eq!(need_more_opers, Err("operand data truncated"));
let no_operands =
TryInto::<Instruction>::try_into(&[instr::POP.0][..]);
assert!(no_operands.is_ok());
let nop = no_operands.unwrap();
assert_eq!(nop.0, instr::POP);
let nop_bytes =
TryInto::<Vec<u8>>::try_into(nop);
assert!(nop_bytes.is_ok());
assert_eq!(nop_bytes.unwrap(), vec![instr::POP.0]);
let one_operand =
TryInto::<Instruction>::try_into(&[instr::TRAP.0, 0xf3][..]);
assert!(one_operand.is_ok());
let oe_oper = one_operand.unwrap();
assert_eq!(oe_oper.0, instr::TRAP);
assert_eq!(oe_oper.1.len(), 1);
assert_eq!(oe_oper.1[0], Operand(Address::Oper1, 0));
let oe_bytes =
TryInto::<Vec<u8>>::try_into(oe_oper);
assert!(oe_bytes.is_ok());
assert_eq!(oe_bytes.unwrap(), vec![instr::TRAP.0, 0xf3]);
let two_operands =
TryInto::<Instruction>::try_into(&[instr::LOAD.0, 0xf3, 0xf4][..]);
assert!(two_operands.is_ok());
let two_oper = two_operands.unwrap();
assert_eq!(two_oper.0, instr::LOAD);
assert_eq!(two_oper.1.len(), 2);
let two_bytes =
TryInto::<Vec<u8>>::try_into(two_oper.clone());
assert!(two_bytes.is_ok());
assert_eq!(two_bytes.unwrap(), vec![instr::LOAD.0, 0xf3, 0xf4]);
assert_eq!(two_oper.1[0], Operand(Address::Oper1, 0));
assert_eq!(two_oper.1[1], Operand(Address::Oper2, 0));
}
#[test]
fn test_program_parse() {
let bytes1 = [instr::LOAD.0, 0xf3, 0xf4];
let out1 = vec![Instruction(instr::LOAD,
vec![Operand(Address::Oper1, 0), Operand(Address::Oper2, 0)])];
let res1 =
TryInto::<Program>::try_into(&bytes1[..]);
assert!(res1.is_ok());
assert_eq!(res1.unwrap().0, out1);
let bytes2 = [
instr::LOAD.0, 0xf3, 0xf4,
instr::CLEAR.0, 0xf0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
];
let out2 = vec![
Instruction(instr::LOAD, vec![
Operand(Address::Oper1, 0),
Operand(Address::Oper2, 0)
]),
Instruction(instr::CLEAR, vec![
Operand(Address::Stack, 1)
])
];
let res2 =
TryInto::<Program>::try_into(&bytes2[..]);
assert!(res2.is_ok());
assert_eq!(res2.unwrap().0, out2);
}
}

460
hyphae/src/vm.rs Normal file
View file

@ -0,0 +1,460 @@
/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use mycelium::sexpr::Datum;
use mycelium::number::{Fraction, Number, Numeric};
use crate::hmap::QuickMap;
use crate::stackstack::StackStack;
use crate::instr as i;
use crate::util::{Operand, Program, Address};
use core::cell::RefCell;
use alloc::vec;
use alloc::rc::Rc;
use alloc::vec::Vec;
use alloc::sync::Arc;
use alloc::borrow::ToOwned;
use num::pow::Pow;
const NUM_OPERAND_REGISTERS: usize = 4;
pub struct VM {
// execution environment
pub stack: StackStack<Datum>,
pub symtab: QuickMap<Operand>,
pub prog: Program,
pub fds: Vec<u64>,
pub traps: Vec<Arc<dyn Fn(&mut VM)>>,
// data registers
pub expr: Datum,
pub oper: [Datum; NUM_OPERAND_REGISTERS],
// control flow registers
pub retn: usize,
pub ictr: usize,
pub errr: Datum,
// state
pub running: bool,
pub err_state: bool,
}
impl VM {
pub fn run_program(&mut self) {
if self.prog.0.len() < 1 {
self.running = false;
}
while self.ictr < self.prog.0.len() {
if self.err_state || !self.running {
return;
}
self.execute_instruction();
self.ictr += 1;
}
self.running = false;
}
#[inline(always)]
fn execute_instruction(&mut self) {
let instr = &self.prog.0[self.ictr].clone();
macro_rules! e {
( $err:expr ) => {
{
self.running = false;
self.err_state = true;
self.errr = Datum::String($err.as_bytes().to_vec());
return;
}
}
}
macro_rules! deref {
( $oper:expr ) => {
match $oper.0 {
Address::Expr => &self.expr,
Address::Oper1 => &self.oper[0],
Address::Oper2 => &self.oper[1],
Address::Oper3 => &self.oper[2],
Address::Oper4 => &self.oper[3],
Address::Stack => &self.stack[$oper.1],
Address::Numer => e!("attempt to dereference constant numeric data"),
Address::Instr => e!("bad access to instruction data"),
}
}
}
macro_rules! deref_mut {
( $oper:expr ) => {
match $oper.0 {
Address::Expr => &mut self.expr,
Address::Oper1 => &mut self.oper[0],
Address::Oper2 => &mut self.oper[1],
Address::Oper3 => &mut self.oper[2],
Address::Oper4 => &mut self.oper[3],
Address::Instr => e!("bad mutable access to instruction data"),
// Stack, Numer
_ => e!("mutable access to immutable data"),
}
}
}
macro_rules! do_jmp {
( $idx:expr ) => {
let Operand(Address::Instr, target) = instr.1[$idx] else {
e!("illegal argument to jump");
};
if target >= self.prog.0.len() {
e!("out of bounds jump caught");
}
self.ictr = target;
}
}
macro_rules! lr_oper {
( $in_type:ident, $oper:tt, $out_type:ident ) => {
self.expr = Datum::$out_type(*match deref!(&instr.1[0]){
Datum::$in_type(l) => l,
_ => e!("illegal argument to instruction"),
} $oper *match deref!(&instr.1[1]){
Datum::$in_type(l) => l,
_ => e!("illegal argument to instruction"),
})
}
}
match instr.0 {
i::TRAP => {
let Operand(Address::Numer, idx) = instr.1[0] else {
e!("illegal argument to TRAP instruction");
};
if idx >= self.traps.len() {
e!("access to out of bounds trap!")
}
self.traps[idx].clone()(self)
},
// symtable ops
i::BIND => {
let Datum::String(tag) = deref!(&instr.1[0]) else {
e!("illegal argument to BIND instruction");
};
let tag = unsafe { str::from_utf8_unchecked(&tag).to_owned() };
self.symtab.insert(tag, instr.1[1].clone());
},
i::UNBIND => {
let Datum::String(tag) = deref!(&instr.1[0]) else {
e!("illegal argument to UNBIND instruction");
};
let tag = unsafe { str::from_utf8_unchecked(&tag) };
self.symtab.remove(&tag);
},
i::BOUND => {
let Datum::String(tag) = deref!(&instr.1[0]) else {
e!("illegal argument to BOUND instruction");
};
let tag = unsafe { str::from_utf8_unchecked(&tag) };
self.symtab.contains_key(&tag);
},
// stack ops
i::PUSH => self.stack.push_current_stack(deref!(&instr.1[0]).clone()),
i::POP => _ = self.stack.pop_current_stack(),
i::ENTER => self.stack.add_stack(),
i::EXIT => self.stack.destroy_top_stack(),
// movement ops
i::LOAD => *deref_mut!(&instr.1[1]) = deref!(&instr.1[0]).clone(),
i::CLEAR => *deref_mut!(&instr.1[0]) = Datum::None,
// control flow ops
i::NOP => (),
i::HALT => self.running = false,
i::PANIC => {
self.running = false;
self.err_state = false;
self.errr = deref!(&instr.1[0]).clone()
},
i::JMP => {
do_jmp!(0);
},
i::JMPIF => {
if let Datum::Bool(true) = self.expr {
do_jmp!(0);
}
},
// boolean ops
i::EQ => self.expr = Datum::Bool(*deref!(&instr.1[0]) == *deref!(&instr.1[1])),
i::LT => lr_oper!(Number, <, Bool),
i::GT => lr_oper!(Number, >, Bool),
i::LTE => lr_oper!(Number, <=, Bool),
i::GTE => lr_oper!(Number, >=, Bool),
i::BOOL_NOT => {
self.expr = Datum::Bool(!{
let Datum::Bool(a) = self.expr else {
e!("illegal argument to BOOL_NOT instruction");
};
a
});
},
i::BOOL_AND => lr_oper!(Bool, &&, Bool),
i::BOOL_OR => lr_oper!(Bool, ||, Bool),
// char / byte ops
i::BYTE_AND => lr_oper!(Char, &, Char),
i::BYTE_OR => lr_oper!(Char, |, Char),
i::XOR => lr_oper!(Char, ^, Char),
i::BYTE_NOT => {
self.expr = Datum::Char(!{
let Datum::Char(a) = self.expr else {
e!("illegal argument to BYTE_NOT instruction");
};
a
});
},
// numeric ops
i::ADD => lr_oper!(Number, +, Number),
i::SUB => lr_oper!(Number, -, Number),
i::MUL => lr_oper!(Number, *, Number),
i::FDIV => lr_oper!(Number, /, Number),
i::IDIV => {
let Datum::Number(l) = deref!(&instr.1[0]) else {
e!("illegal argument to IDIV instruction");
};
let Datum::Number(r) = deref!(&instr.1[1]) else {
e!("illgal argument to IDIV instruction");
};
let Fraction(l, 1) = l.make_exact() else {
e!("integer division on non integer value");
};
let Fraction(r, 1) = r.make_exact() else {
e!("integer division on non integer value");
};
self.expr = Datum::Number(Number::Fra(Fraction(l / r, 1)));
},
i::POW => {
let Datum::Number(l) = deref!(&instr.1[0]) else {
e!("illegal argument to POW instruction");
};
let Datum::Number(r) = deref!(&instr.1[1]) else {
e!("illgal argument to POW instruction");
};
self.expr = Datum::Number((*l).pow(*r));
},
i::INC => if let Datum::Number(src) = deref_mut!(&instr.1[0]) {
*src = *src + Number::Fra(Fraction(1, 1));
} else {
e!("illegal argument to INC instruction");
},
i::DEC => if let Datum::Number(src) = deref_mut!(&instr.1[0]) {
*src = *src - Number::Fra(Fraction(1, 1));
} else {
e!("illegal argument to INC instruction");
},
// byte/char to and from number conversions
i::CTON => {
let src = deref_mut!(&instr.1[0]);
if let Datum::Char(schr) = src {
*src = Datum::Number(Number::Fra(Fraction(*schr as isize, 1)));
} else {
e!("illegal argument to CTON instruction");
}
},
i::NTOC => {
let src = deref_mut!(&instr.1[0]);
if let Datum::Number(snum) = src {
let n = snum.make_inexact();
if !snum.is_exact() || n.0.fract() != 0.0 || n.0 > u8::MAX.into() || n.0 < 0.0 {
e!("input to NTOC cannot cleanly convert");
}
*src = Datum::Char(n.0.trunc() as u64 as u8);
} else {
e!("illegal argument to NTOC instruction");
}
},
i::MKVEC => self.expr = Datum::Vector(RefCell::from(vec![])),
i::MKBVEC => self.expr = Datum::ByteVector(RefCell::from(vec![])),
i::INDEX => {
let Datum::Number(idx) = deref!(&instr.1[1]) else {
e!("illegal argument to INDEX instruction");
};
let idx = idx.make_inexact();
if !idx.is_exact() || idx.0.fract() != 0.0 {
e!("illegal argument to INDEX instruction");
}
let idx = idx.0.trunc() as usize;
match deref!(&instr.1[0]) {
Datum::Vector(v) => {
let a = (*v.borrow()[idx].clone()).clone();
self.expr = a;
},
Datum::ByteVector(bv) => {
let a = Datum::Char(bv.borrow()[idx]);
self.expr = a;
},
Datum::List(l) => self.expr = l[idx].clone(),
_ => e!("illegal argument to INDEX instruction")
};
},
i::LENGTH => match deref!(&instr.1[0]) {
Datum::Vector(v) => {
let a = Datum::Number(Number::Fra(Fraction(v.borrow().len() as isize, 1)));
self.expr = a;
},
Datum::ByteVector(bv) => {
let a = Datum::Number(Number::Fra(Fraction(bv.borrow().len() as isize, 1)));
self.expr = a;
},
Datum::List(l) =>
self.expr = Datum::Number(Number::Fra(Fraction(l.len() as isize, 1))),
_ => e!("illegal argument to LENGTH instruction"),
},
i::SUBSL => {
let Datum::Number(st) = deref!(&instr.1[1]) else {
e!("illegal argument to SUBSL instruction");
};
let Datum::Number(ed) = deref!(&instr.1[2]) else {
e!("illegal argument to SUBSL instruction");
};
if !st.is_exact() || !ed.is_exact() {
e!("illegal argument to SUBSL instruction");
}
let st = st.make_inexact();
let ed = ed.make_inexact();
if st.0.fract() != 0.0 || ed.0.fract() != 0.0 {
e!("SUBSL: FP precision error");
}
let st = st.0.trunc() as usize;
let ed = ed.0.trunc() as usize;
match deref!(&instr.1[0]) {
Datum::Vector(v) => {
let a = Datum::Vector(RefCell::from(v.borrow()[st..ed].to_vec()));
self.expr = a;
},
Datum::ByteVector(bv) => {
let a = Datum::ByteVector(RefCell::from(bv.borrow()[st..ed].to_vec()));
self.expr = a;
},
Datum::List(a) =>
self.expr = Datum::List(Rc::new(
(**a).subsl(st as isize, ed as isize))),
_ => e!("illegal argument to SUBSL instruction")
};
}
i::INSER => {
let Datum::Number(idx) = deref!(&instr.1[2]) else {
e!("illegal argument to INSER instruction");
};
let idx = idx.make_inexact();
if !idx.is_exact() || idx.0.fract() != 0.0 {
e!("illegal argument to INSER instruction");
}
let idx = idx.0.trunc() as usize;
match deref!(&instr.1[0]) {
Datum::Vector(v) => {
v.borrow_mut().insert(idx, deref!(&instr.1[1]).clone().into());
},
Datum::ByteVector(bv) => {
let Datum::Char(b) = deref!(&instr.1[1]) else {
e!("INSER instruction can only insert a byte into a bytevector");
};
bv.borrow_mut().insert(idx, *b);
},
_ => e!("illegal argument to INSER instruction")
}
},
i::CAR => {
let Datum::List(arg) = deref!(&instr.1[0]) else {
e!("illegal argument to CAR instruction");
};
self.expr = (*arg.0).clone();
},
i::CDR => {
let Datum::List(arg) = deref!(&instr.1[0]) else {
e!("illegal argument to CAR instruction");
};
self.expr = (*arg.1).clone();
},
i::CONS => {
/* CONS BEHAVIOR
* L Datum is not list means create a new standard form list
* L Datum is list then append the second element to the first
*/
},
// in order to maintain a language agnostic VM these must be traps
//i::PARSE => todo!("implement AST API"),
//i::EVAL => todo!("implement AST API"),
_ => {
e!("illegal instruction");
},
};
}
}

View file

@ -25,7 +25,6 @@ pub mod sexpr;
pub mod lexer;
pub mod parser;
pub mod number;
pub mod stackstack;
pub mod hmap;
extern crate alloc;

View file

@ -529,7 +529,7 @@ impl Numeric for Float {
if self.0.fract() == 0.0 {
Fraction(self.0 as isize, 1)
} else {
unimplemented!("insert rational approximation procedure here")
todo!("rational approximation implementation")
}
}
}

View file

@ -16,6 +16,7 @@
*/
use core::fmt::Display;
use core::cell::RefCell;
use crate::lexer::{
LexError,
@ -414,11 +415,11 @@ impl Parser {
}
if is_bv {
return Ok(Rc::from(Datum::ByteVector(bv_stack)))
return Ok(Rc::from(Datum::ByteVector(RefCell::from(bv_stack))))
}
if token.token_type == LexTokenType::VectorStart {
return Ok(Rc::from(Datum::Vector(lex_stack)))
return Ok(Rc::from(Datum::Vector(RefCell::from(lex_stack))))
}
// handle an empty list

View file

@ -16,6 +16,9 @@
*/
use core::fmt::{self, Formatter};
use core::ops::Index;
use core::cell::RefCell;
use alloc::format;
use alloc::rc::Rc;
use alloc::vec::Vec;
@ -23,7 +26,7 @@ use alloc::string::String;
use crate::number::Number;
#[derive(Default, Clone)]
#[derive(Default, Clone, PartialEq)]
pub enum Datum {
Number(Number),
Bool(bool),
@ -31,8 +34,8 @@ pub enum Datum {
Symbol(String),
Char(u8),
String(Vec<u8>),
Vector(Vec<Rc<Datum>>),
ByteVector(Vec<u8>),
Vector(RefCell<Vec<Rc<Datum>>>),
ByteVector(RefCell<Vec<u8>>),
#[default]
None,
}
@ -45,7 +48,8 @@ fn byte_to_escaped_char(b: u8) -> String {
}
}
fn fmt_vec<T: fmt::Display>(v: &Vec<T>) -> String {
fn fmt_vec<T: fmt::Display>(ve: &RefCell<Vec<T>>) -> String {
let v = ve.borrow();
if v.len() == 0 {
return String::new()
}
@ -102,9 +106,46 @@ impl fmt::Debug for Datum {
}
#[derive(Default, Clone)]
#[derive(Default, Clone, PartialEq)]
pub struct Ast(pub Rc<Datum>, pub Rc<Datum>);
impl Ast {
pub fn subsl(&self, start: isize, end: isize) -> Ast {
if end - start == 1 {
return Ast(Rc::from(self[start as usize].clone()), Rc::from(Datum::None))
}
if end == 0 {
return Ast(
Rc::from((*(self.0)).clone()),
Rc::from(Datum::None)
)
}
let Datum::List(ref next) = *self.1 else {
panic!("index into improper list form")
};
if start <= 0 {
Ast(
Rc::from((*(self.0)).clone()),
Rc::from(Datum::List(
Rc::from(next.subsl(start - 1, end - 1))))
)
} else {
next.subsl(start - 1, end - 1)
}
}
pub fn len(&self) -> usize {
let Datum::List(ref next) = *self.1 else {
return 1
};
1 + next.len()
}
}
impl Iterator for Ast {
type Item = Rc<Datum>;
@ -127,6 +168,25 @@ impl Iterator for Ast {
}
}
impl Index<usize> for Ast {
type Output = Datum;
fn index(&self, index: usize) -> &Self::Output {
if index == 0 {
if let Datum::None = *self.0 {
panic!("out of bounds indexing into AST")
} else {
self.0.as_ref()
}
} else {
let Datum::List(ref next) = *self.1 else {
panic!("out of bounds indexing into AST")
};
next.index(index - 1)
}
}
}
impl fmt::Display for Ast {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "({}", self.0)?;

View file

@ -0,0 +1,642 @@
use alloc::boxed::Box;
use alloc::{vec, vec::Vec};
use alloc::fmt::Debug;
use lexer::{E_TOO_MANY_DECIMALS, E_TOO_MANY_SLASH};
use core::cmp::Ordering;
use core::{fmt, u8};
use core::ops::{Add, Div, Mul, Sub};
pub const E_INCOMPREHENSIBLE: &str = "could not parse number literal";
pub const E_POUND_TRUNCATED: &str = "pound sign implies additional input";
pub const E_BASE_PARSE_FAIL: &str = "failed to parse explicit base literal";
pub const E_UNKNOWN_CONTROL: &str = "unknown character in number literal";
pub const E_EMPTY_INPUT: &str = "empty string cannot be a number";
const NUM_INF: &str = "+inf.0";
const NUM_NEG_INF: &str = "-inf.0";
const NUM_NAN: &str = "+nan.0";
const NUM_NEG_NAN: &str = "-nan.0";
pub const NegativeFlag: u8 = 0b10000000; // positive value if off
pub const DecimalFlag: u8 = 0b01000000; // single integer if off
pub const FractionFlag: u8 = 0b00100000; // decimal if off
pub const ScientificFlag: u8 = 0b00010000; // requires a second flags byte
pub const InfiniteFlag: u8 = 0b00001000; // can be positive or negative
pub const NotANumberFlag: u8 = 0b00000100; // can be positive or negative because r7rs
pub const OverflownFlag: u8 = 0b00000010; // poisons exactness
/* NUMBER BYTES FORMAT
* Generally the format within the byte array operates like this
* (guaranteed header) 1. NumberFlags (u8)
* (for each integer) 2. Byte Length (u8)
* (for each integer) 3. N proceeding bytes of data
*
* If Scientific Notation is used the leading number may be a decimal.
* In this case, there will be three total numbers
*
* All numbers are big endian
*/
#[repr(transparent)]
#[derive(Clone, Debug, PartialEq)]
pub struct Number<'src> (pub &'src [u8]);
/* WARNING
* member functions tend to assume that number encoding is consistent
* use Number::is_valid() to double check numbers from unknown sources
*
* TODO: maybe mark raw-indexing member functions as unsafe
*/
impl Number<'_> {
#[inline(always)]
pub fn byte_length(&self) -> u8 {
if self.0[0] & (InfiniteFlag | NotANumberFlag) != 0 {
return 1;
}
let mut len = self.0[1] + 2;
if self.0[0] & (DecimalFlag | FractionFlag | ScientificFlag) != 0 {
len += self.0[len as usize] + 1;
}
if self.0[0] & ScientificFlag != 0 &&
self.0[0] & DecimalFlag != 0 {
len += self.0[len as usize];
}
len
}
pub fn is_valid(&self) -> bool {
let len = self.0.len();
if len < 1 {
return false;
}
let decimal = self.0[0] & DecimalFlag != 0;
let fraction = self.0[0] & FractionFlag != 0;
let scientific = self.0[0] & ScientificFlag != 0;
let overflown = self.0[0] & OverflownFlag != 0;
let infinite = self.0[0] & InfiniteFlag != 0;
let notanumber = self.0[0] & NotANumberFlag != 0;
// check flags
if overflown {
return false
}
if (decimal && fraction) || (scientific && fraction) {
return false
}
if (infinite || notanumber) &&
(decimal || fraction || scientific || len != 1) {
return false
}
// at least 3 bytes for a single u8
if len < 3 {
return false
}
let mut cur = self.0[1] + 2;
if len < cur as usize {
return false
}
if decimal || fraction || scientific {
if len < (cur + 1) as usize {
return false;
}
cur += self.0[cur as usize];
if len < (cur + 1) as usize {
return false;
}
}
if scientific && decimal {
cur += 1;
if len < (cur + 1) as usize {
return false
}
cur += self.0[cur as usize];
if len < (cur + 1) as usize {
return false
}
}
true
}
#[inline(always)]
pub fn is_exact(&self) -> bool {
self.0[0] & ScientificFlag == 0
}
#[inline(always)]
pub fn make_exact_into(&self, dst:&mut Vec<u8>) {
// expand scientific notation else just direct copy
if self.0[0] & ScientificFlag != 0 {
self.normalize_scientific_into(dst);
return
}
self.copy_into(dst);
}
#[inline(always)]
pub fn make_inexact_into(&self, dst: &mut Vec<u8>) {
// basically just convert a fraction into an actual division
todo!()
}
// use this so you dont have to worry about clone while casting
#[inline(always)]
pub fn copy_into(&self, dst: &mut Vec<u8>) {
for i in self.0 {
dst.push(*i)
}
}
#[inline(always)]
pub fn normalize_scientific_into(&self, dst: &mut Vec<u8>) {
todo!()
}
#[inline(always)]
pub fn simplify_fraction_in_place(&mut self) {
if self.0[0] & FractionFlag == 0 {
return
}
// can technically do this in place
// each element of the fraction will only shrink
todo!()
}
#[inline(always)]
pub fn from_str_into(src: &str, dst: &mut Vec<u8>) -> Result<(), &'static str> {
// handle symbolic values
match src {
NUM_INF => {
dst.push(0 as u8 | InfiniteFlag);
return Ok(());
},
NUM_NEG_INF => {
dst.push(0 as u8 | NegativeFlag | InfiniteFlag);
return Ok(());
},
NUM_NAN => {
dst.push(0 as u8 | NotANumberFlag);
return Ok(());
},
NUM_NEG_NAN => {
dst.push(0 as u8 | NegativeFlag | NotANumberFlag);
return Ok(());
},
_ => (),
}
let mut ctrl_flags = 0 as u8;
let mut operands = vec![];
let mut digits_per_byte = 3; // default to decimal encoding
let mut base = 0;
let mut iter = src.chars().peekable();
match iter.next() {
Some('+') => (),
Some('-') => {
ctrl_flags |= NegativeFlag;
},
Some('#') => {
match iter.next() {
None => return Err(E_POUND_TRUNCATED),
Some('i') => /* force_inexact = true */ (),
Some('e') => /* force_exact = true */ (),
Some('x') => { digits_per_byte = 2; base = 16 },
Some('d') => { digits_per_byte = 3; base = 10 },
Some('o') => { digits_per_byte = 4; base = 8 },
Some('b') => { digits_per_byte = 8; base = 2 },
_ => return Err(E_UNKNOWN_CONTROL),
}
},
Some(a) if a.is_digit(10) => (),
Some(_) => return Err(E_INCOMPREHENSIBLE),
None => return Err(E_EMPTY_INPUT),
}
let mut ops_needed = 1;
if base != 10 {
// cant mix non-decimal base and other number representations
let mut len = 0 as u8;
while let Some(chunk) = {
let mut chk = vec![];
for _ in 0..digits_per_byte {
if let Some(c) = iter.next() {
chk.push(c as u8)
}
}
if chk.len() < 1 { None } else { Some(chk) }
} {
let Ok(val) = u8::from_str_radix(
unsafe {str::from_utf8_unchecked(chunk.as_slice())}, base) else {
return Err(E_BASE_PARSE_FAIL)
};
operands.push(val);
len += 1;
}
// integer numbers prepended with their length
operands.insert(0, len);
ops_needed -= 1;
} else {
// just a decimal number, but could have a weird format
loop {
macro_rules! pack_operand {
() => {
let s = unsafe { str::from_utf8_unchecked(operands.as_slice()) };
let f = usize::from_str_radix(&s, 10).expect("str cast");
let f = f.to_be_bytes();
operands.clear();
dst.push(f.len() as u8);
dst.append(&mut f.to_vec());
ops_needed -= 1;
}
}
match iter.next() {
Some(c) if c.is_digit(10) => {
operands.push(c as u8);
},
Some('.') => {
ops_needed += 1;
if ctrl_flags & (FractionFlag | ScientificFlag) != 0 {
return Err(E_INCOMPREHENSIBLE)
}
if ctrl_flags & DecimalFlag != 0 {
return Err(E_TOO_MANY_DECIMALS)
}
ctrl_flags |= DecimalFlag;
pack_operand!();
},
Some('/') => {
ops_needed += 1;
if ctrl_flags & (DecimalFlag | ScientificFlag) != 0 {
return Err(E_INCOMPREHENSIBLE)
}
if ctrl_flags & FractionFlag != 0 {
return Err(E_TOO_MANY_SLASH)
}
ctrl_flags |= DecimalFlag;
pack_operand!();
},
Some('e') => {
ops_needed += 1;
if ctrl_flags & FractionFlag != 0 {
return Err(E_INCOMPREHENSIBLE)
}
ctrl_flags |= ScientificFlag;
let mut newctrl = 0 as u8;
if let Some('-') = iter.peek() {
newctrl |= NegativeFlag;
}
pack_operand!();
dst.push(newctrl);
},
Some(_) => return Err(E_INCOMPREHENSIBLE),
None => {
pack_operand!();
break;
}
}
}
}
if ops_needed != 0 {
return Err(E_INCOMPREHENSIBLE);
}
dst.insert(0, ctrl_flags);
Number(dst.as_slice()).simplify_fraction_in_place();
Ok(())
}
pub fn from_u8_into(src: u8, dst: &mut Vec<u8>) -> Number {
dst.push(0 as u8);
dst.push(src);
Number(dst.as_slice())
}
}
impl fmt::Display for Number<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// can implement after I finish division
todo!()
}
}
impl<'a> From<&'a Box<[u8]>> for Number<'a> {
fn from(value: &'a Box<[u8]>) -> Self {
Number(value.as_ref())
}
}
impl<'a> From<&'a Vec<u8>> for Number<'a> {
fn from(value: &'a Vec<u8>) -> Self {
Number(value.as_slice())
}
}
impl<'a> From<&'a [u8]> for Number<'a> {
fn from(value: &'a [u8]) -> Self {
Number(value)
}
}
impl<'a> Into<&'a [u8]> for Number<'a> {
fn into(self) -> &'a [u8] {
self.0
}
}
impl Add for Number<'_> {
type Output = Box<[u8]>;
fn add(self, rhs: Self) -> Self::Output {
todo!()
}
}
impl Sub for Number<'_> {
type Output = Box<[u8]>;
fn sub(self, rhs: Self) -> Self::Output {
todo!()
}
}
impl Mul for Number<'_> {
type Output = Box<[u8]>;
fn mul(self, rhs: Self) -> Self::Output {
todo!()
}
}
impl Div for Number<'_> {
type Output = Box<[u8]>;
fn div(self, rhs: Self) -> Self::Output {
// divide unsigned integer by unsigned integer
// the inputs (lh and rh) start with length byte
// returns a decimal index
fn div_ints(lh: &[u8], rh: &[u8], dest: &mut Vec<u8>) -> u8 {
todo!()
}
/* Options
* divide a single int by a single int
* - (make fraction)
* divide a fraction by a single int
* - (multiply denominator)
* divide a decimal by a single int
* - (divide straight through)
* divide a scientific note by a single int
* - divide the first num
* - multiply by however much is needed for ones place (like 3.5)
* - add or subtract from the second number accordingly
*
* divide a single int by a fraction
* - output denom * lh / numer
* divide a single int by a decimal
*/
todo!()
}
}
impl PartialEq for Number<'_> {
fn eq(&self, other: &Number) -> bool {
todo!()
}
}
impl PartialOrd for Number<'_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
todo!()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_number_tests() {
assert_eq!("1.3".parse::<Number>(),
Ok(Number::Flt(Float(1.3))));
assert_eq!("1".parse::<Number>(),
Ok(Number::Flt(Float(1 as f64))));
assert_eq!("1.3e3".parse::<Number>(),
Ok(Number::Sci(ScientificNotation(1.3, 3))));
assert_eq!("+1.3".parse::<Number>(),
Ok(Number::Flt(Float(1.3))));
assert_eq!("-1.3".parse::<Number>(),
Ok(Number::Flt(Float(-1.3))));
assert_eq!("#d234".parse::<Number>(),
Ok(Number::Flt(Float(234.0))));
assert_eq!("#o17".parse::<Number>(),
Ok(Number::Fra(Fraction(15, 1))));
assert_eq!("#xAA".parse::<Number>(),
Ok(Number::Fra(Fraction(170, 1))));
assert_eq!("#b101".parse::<Number>(),
Ok(Number::Flt(Float(5.0))));
assert_eq!("2/4".parse::<Number>(),
Ok(Number::Fra(Fraction(2, 4))));
assert_eq!("#e1/5".parse::<Number>(),
Ok(Number::Fra(Fraction(1, 5))));
assert_eq!("#i1/5".parse::<Number>(),
Ok(Number::Flt(Float(0.2))));
assert_eq!("#e1e1".parse::<Number>(),
Ok(Number::Sci(ScientificNotation(1.0, 1))));
assert_eq!("+inf.0".parse::<Number>(),
Ok(Number::Sym(SymbolicNumber::Inf)));
assert_eq!("2e3".parse::<Number>(),
Ok(ScientificNotation(2.0, 3)));
assert_eq!("0e1".parse::<Number>(),
Ok(ScientificNotation(0.0, 1)));
assert_eq!("-1e34".parse::<Number>(),
Ok(ScientificNotation(-1.0, 34)));
assert_eq!("3.3e3".parse::<Number>(),
Ok(ScientificNotation(3.3, 3)));
assert_eq!("2".parse::<Number>(),
Err(E_SCIENTIFIC_E));
assert_eq!("2e2e2".parse::<Number>(),
Err(E_SCIENTIFIC_MULTI_E));
assert_eq!("2/3".parse::<Number>(),
Ok(Fraction(2, 3)));
assert_eq!("0/1".parse::<Number>(),
Ok(Fraction(0, 1)));
assert_eq!("-1/34".parse::<Number>(),
Ok(Fraction(-1, 34)));
assert_eq!("2".parse::<Number>(),
Err(E_NO_DENOMINATOR));
assert_eq!("2/2/2".parse::<Number>(),
Err(E_MULTI_DENOMINATOR));
assert_eq!("2/0".parse::<Number>(),
Err(E_ZERO_DENOMINATOR));
assert_eq!("3.3/3".parse::<Number>(),
Err(E_NUMERATOR_PARSE_FAIL));
}
#[test]
fn test_number_addition_subtraction_cases() {
let cases = vec![
vec!["1/5", "4/5", "1/1"],
vec!["1/5", "0.8", "1/1"],
vec!["1e1", "2.0", "12/1"],
vec!["1e1", "2/1", "12/1"],
vec!["1e1", "1/2", "10.5"],
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
// test some mathematical properties
assert_eq!(x + y, z);
assert_eq!(x + y, y + x);
assert_eq!(z - x, y);
assert_eq!(x + y - x, y);
});
// theres no reason this should adhere to all the other rules
let x = "+inf.0".parse::<Number>().unwrap();
let y = "1e1".parse::<Number>().unwrap();
let z = "+inf.0".parse::<Number>().unwrap();
assert_eq!(x + y, z);
}
#[test]
fn test_number_multiplication_division_cases() {
let cases = vec![
vec!["1/5", "5e0", "1/1"],
vec!["1/5", "5", "1/1"],
vec!["1/5", "2/1", "2/5"],
vec!["4.4", "1/2", "2.2"],
vec!["12.0", "1/2", "6/1"],
vec!["1e1", "2.0", "20/1"],
vec!["1e1", "2/1", "20/1"],
vec!["1e1", "1/2", "5/1"],
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
// test some mathematical properties
assert_eq!(x * y, z);
assert_eq!(x * y, y * x);
assert_eq!(z / x, y);
assert_eq!(x * y / x, y);
});
}
#[test]
fn test_number_pow_cases() {
// TODO: add scientific notation cases
let cases = vec![
vec!["2", "2", "4"],
vec!["2/1", "2/1", "4/1"],
vec!["2/1", "2/-1", "1/4"],
vec!["2/1", "2/2", "2/1"],
vec!["2/1", "2.0", "4/1"],
vec!["27/8", "2/-3", "4/9"]
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
assert_eq!(x.pow(y), z);
});
}
#[test]
fn test_number_ord_cases() {
// TODO: add more cases
let cases = vec![
vec!["1/2", "1.0", "1e1"],
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
assert!(x < y);
assert!(y < z);
assert!(x < z);
});
}
#[test]
fn float_negative_exponent_case() {
if let Float(0.1) = "1e-1"
.parse::<Number>()
.unwrap()
.make_inexact() {
return
}
assert!(false)
}
}