Mycelium/snippets/in_progress_numbers.rs
Ava Affine 4ad319213d HyphaeVM - WIP
This commit is a WORK IN PROGRESS for the base implementation of the
HyphaeVM. This will be squashed into a larger commit eventually when
the work of implementing the HyphaeVM is finished.

Of note, the ISA is mostly finished and much of the VM design is in
place. Yet to be done are a few traps in mycelium, migrating pieces
like the number package and the sexpr package into the VM package,
and of course much testing.

Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-07-24 19:26:31 +00:00

642 lines
18 KiB
Rust

use alloc::boxed::Box;
use alloc::{vec, vec::Vec};
use alloc::fmt::Debug;
use lexer::{E_TOO_MANY_DECIMALS, E_TOO_MANY_SLASH};
use core::cmp::Ordering;
use core::{fmt, u8};
use core::ops::{Add, Div, Mul, Sub};
pub const E_INCOMPREHENSIBLE: &str = "could not parse number literal";
pub const E_POUND_TRUNCATED: &str = "pound sign implies additional input";
pub const E_BASE_PARSE_FAIL: &str = "failed to parse explicit base literal";
pub const E_UNKNOWN_CONTROL: &str = "unknown character in number literal";
pub const E_EMPTY_INPUT: &str = "empty string cannot be a number";
const NUM_INF: &str = "+inf.0";
const NUM_NEG_INF: &str = "-inf.0";
const NUM_NAN: &str = "+nan.0";
const NUM_NEG_NAN: &str = "-nan.0";
pub const NegativeFlag: u8 = 0b10000000; // positive value if off
pub const DecimalFlag: u8 = 0b01000000; // single integer if off
pub const FractionFlag: u8 = 0b00100000; // decimal if off
pub const ScientificFlag: u8 = 0b00010000; // requires a second flags byte
pub const InfiniteFlag: u8 = 0b00001000; // can be positive or negative
pub const NotANumberFlag: u8 = 0b00000100; // can be positive or negative because r7rs
pub const OverflownFlag: u8 = 0b00000010; // poisons exactness
/* NUMBER BYTES FORMAT
* Generally the format within the byte array operates like this
* (guaranteed header) 1. NumberFlags (u8)
* (for each integer) 2. Byte Length (u8)
* (for each integer) 3. N proceeding bytes of data
*
* If Scientific Notation is used the leading number may be a decimal.
* In this case, there will be three total numbers
*
* All numbers are big endian
*/
#[repr(transparent)]
#[derive(Clone, Debug, PartialEq)]
pub struct Number<'src> (pub &'src [u8]);
/* WARNING
* member functions tend to assume that number encoding is consistent
* use Number::is_valid() to double check numbers from unknown sources
*
* TODO: maybe mark raw-indexing member functions as unsafe
*/
impl Number<'_> {
#[inline(always)]
pub fn byte_length(&self) -> u8 {
if self.0[0] & (InfiniteFlag | NotANumberFlag) != 0 {
return 1;
}
let mut len = self.0[1] + 2;
if self.0[0] & (DecimalFlag | FractionFlag | ScientificFlag) != 0 {
len += self.0[len as usize] + 1;
}
if self.0[0] & ScientificFlag != 0 &&
self.0[0] & DecimalFlag != 0 {
len += self.0[len as usize];
}
len
}
pub fn is_valid(&self) -> bool {
let len = self.0.len();
if len < 1 {
return false;
}
let decimal = self.0[0] & DecimalFlag != 0;
let fraction = self.0[0] & FractionFlag != 0;
let scientific = self.0[0] & ScientificFlag != 0;
let overflown = self.0[0] & OverflownFlag != 0;
let infinite = self.0[0] & InfiniteFlag != 0;
let notanumber = self.0[0] & NotANumberFlag != 0;
// check flags
if overflown {
return false
}
if (decimal && fraction) || (scientific && fraction) {
return false
}
if (infinite || notanumber) &&
(decimal || fraction || scientific || len != 1) {
return false
}
// at least 3 bytes for a single u8
if len < 3 {
return false
}
let mut cur = self.0[1] + 2;
if len < cur as usize {
return false
}
if decimal || fraction || scientific {
if len < (cur + 1) as usize {
return false;
}
cur += self.0[cur as usize];
if len < (cur + 1) as usize {
return false;
}
}
if scientific && decimal {
cur += 1;
if len < (cur + 1) as usize {
return false
}
cur += self.0[cur as usize];
if len < (cur + 1) as usize {
return false
}
}
true
}
#[inline(always)]
pub fn is_exact(&self) -> bool {
self.0[0] & ScientificFlag == 0
}
#[inline(always)]
pub fn make_exact_into(&self, dst:&mut Vec<u8>) {
// expand scientific notation else just direct copy
if self.0[0] & ScientificFlag != 0 {
self.normalize_scientific_into(dst);
return
}
self.copy_into(dst);
}
#[inline(always)]
pub fn make_inexact_into(&self, dst: &mut Vec<u8>) {
// basically just convert a fraction into an actual division
todo!()
}
// use this so you dont have to worry about clone while casting
#[inline(always)]
pub fn copy_into(&self, dst: &mut Vec<u8>) {
for i in self.0 {
dst.push(*i)
}
}
#[inline(always)]
pub fn normalize_scientific_into(&self, dst: &mut Vec<u8>) {
todo!()
}
#[inline(always)]
pub fn simplify_fraction_in_place(&mut self) {
if self.0[0] & FractionFlag == 0 {
return
}
// can technically do this in place
// each element of the fraction will only shrink
todo!()
}
#[inline(always)]
pub fn from_str_into(src: &str, dst: &mut Vec<u8>) -> Result<(), &'static str> {
// handle symbolic values
match src {
NUM_INF => {
dst.push(0 as u8 | InfiniteFlag);
return Ok(());
},
NUM_NEG_INF => {
dst.push(0 as u8 | NegativeFlag | InfiniteFlag);
return Ok(());
},
NUM_NAN => {
dst.push(0 as u8 | NotANumberFlag);
return Ok(());
},
NUM_NEG_NAN => {
dst.push(0 as u8 | NegativeFlag | NotANumberFlag);
return Ok(());
},
_ => (),
}
let mut ctrl_flags = 0 as u8;
let mut operands = vec![];
let mut digits_per_byte = 3; // default to decimal encoding
let mut base = 0;
let mut iter = src.chars().peekable();
match iter.next() {
Some('+') => (),
Some('-') => {
ctrl_flags |= NegativeFlag;
},
Some('#') => {
match iter.next() {
None => return Err(E_POUND_TRUNCATED),
Some('i') => /* force_inexact = true */ (),
Some('e') => /* force_exact = true */ (),
Some('x') => { digits_per_byte = 2; base = 16 },
Some('d') => { digits_per_byte = 3; base = 10 },
Some('o') => { digits_per_byte = 4; base = 8 },
Some('b') => { digits_per_byte = 8; base = 2 },
_ => return Err(E_UNKNOWN_CONTROL),
}
},
Some(a) if a.is_digit(10) => (),
Some(_) => return Err(E_INCOMPREHENSIBLE),
None => return Err(E_EMPTY_INPUT),
}
let mut ops_needed = 1;
if base != 10 {
// cant mix non-decimal base and other number representations
let mut len = 0 as u8;
while let Some(chunk) = {
let mut chk = vec![];
for _ in 0..digits_per_byte {
if let Some(c) = iter.next() {
chk.push(c as u8)
}
}
if chk.len() < 1 { None } else { Some(chk) }
} {
let Ok(val) = u8::from_str_radix(
unsafe {str::from_utf8_unchecked(chunk.as_slice())}, base) else {
return Err(E_BASE_PARSE_FAIL)
};
operands.push(val);
len += 1;
}
// integer numbers prepended with their length
operands.insert(0, len);
ops_needed -= 1;
} else {
// just a decimal number, but could have a weird format
loop {
macro_rules! pack_operand {
() => {
let s = unsafe { str::from_utf8_unchecked(operands.as_slice()) };
let f = usize::from_str_radix(&s, 10).expect("str cast");
let f = f.to_be_bytes();
operands.clear();
dst.push(f.len() as u8);
dst.append(&mut f.to_vec());
ops_needed -= 1;
}
}
match iter.next() {
Some(c) if c.is_digit(10) => {
operands.push(c as u8);
},
Some('.') => {
ops_needed += 1;
if ctrl_flags & (FractionFlag | ScientificFlag) != 0 {
return Err(E_INCOMPREHENSIBLE)
}
if ctrl_flags & DecimalFlag != 0 {
return Err(E_TOO_MANY_DECIMALS)
}
ctrl_flags |= DecimalFlag;
pack_operand!();
},
Some('/') => {
ops_needed += 1;
if ctrl_flags & (DecimalFlag | ScientificFlag) != 0 {
return Err(E_INCOMPREHENSIBLE)
}
if ctrl_flags & FractionFlag != 0 {
return Err(E_TOO_MANY_SLASH)
}
ctrl_flags |= DecimalFlag;
pack_operand!();
},
Some('e') => {
ops_needed += 1;
if ctrl_flags & FractionFlag != 0 {
return Err(E_INCOMPREHENSIBLE)
}
ctrl_flags |= ScientificFlag;
let mut newctrl = 0 as u8;
if let Some('-') = iter.peek() {
newctrl |= NegativeFlag;
}
pack_operand!();
dst.push(newctrl);
},
Some(_) => return Err(E_INCOMPREHENSIBLE),
None => {
pack_operand!();
break;
}
}
}
}
if ops_needed != 0 {
return Err(E_INCOMPREHENSIBLE);
}
dst.insert(0, ctrl_flags);
Number(dst.as_slice()).simplify_fraction_in_place();
Ok(())
}
pub fn from_u8_into(src: u8, dst: &mut Vec<u8>) -> Number {
dst.push(0 as u8);
dst.push(src);
Number(dst.as_slice())
}
}
impl fmt::Display for Number<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// can implement after I finish division
todo!()
}
}
impl<'a> From<&'a Box<[u8]>> for Number<'a> {
fn from(value: &'a Box<[u8]>) -> Self {
Number(value.as_ref())
}
}
impl<'a> From<&'a Vec<u8>> for Number<'a> {
fn from(value: &'a Vec<u8>) -> Self {
Number(value.as_slice())
}
}
impl<'a> From<&'a [u8]> for Number<'a> {
fn from(value: &'a [u8]) -> Self {
Number(value)
}
}
impl<'a> Into<&'a [u8]> for Number<'a> {
fn into(self) -> &'a [u8] {
self.0
}
}
impl Add for Number<'_> {
type Output = Box<[u8]>;
fn add(self, rhs: Self) -> Self::Output {
todo!()
}
}
impl Sub for Number<'_> {
type Output = Box<[u8]>;
fn sub(self, rhs: Self) -> Self::Output {
todo!()
}
}
impl Mul for Number<'_> {
type Output = Box<[u8]>;
fn mul(self, rhs: Self) -> Self::Output {
todo!()
}
}
impl Div for Number<'_> {
type Output = Box<[u8]>;
fn div(self, rhs: Self) -> Self::Output {
// divide unsigned integer by unsigned integer
// the inputs (lh and rh) start with length byte
// returns a decimal index
fn div_ints(lh: &[u8], rh: &[u8], dest: &mut Vec<u8>) -> u8 {
todo!()
}
/* Options
* divide a single int by a single int
* - (make fraction)
* divide a fraction by a single int
* - (multiply denominator)
* divide a decimal by a single int
* - (divide straight through)
* divide a scientific note by a single int
* - divide the first num
* - multiply by however much is needed for ones place (like 3.5)
* - add or subtract from the second number accordingly
*
* divide a single int by a fraction
* - output denom * lh / numer
* divide a single int by a decimal
*/
todo!()
}
}
impl PartialEq for Number<'_> {
fn eq(&self, other: &Number) -> bool {
todo!()
}
}
impl PartialOrd for Number<'_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
todo!()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_number_tests() {
assert_eq!("1.3".parse::<Number>(),
Ok(Number::Flt(Float(1.3))));
assert_eq!("1".parse::<Number>(),
Ok(Number::Flt(Float(1 as f64))));
assert_eq!("1.3e3".parse::<Number>(),
Ok(Number::Sci(ScientificNotation(1.3, 3))));
assert_eq!("+1.3".parse::<Number>(),
Ok(Number::Flt(Float(1.3))));
assert_eq!("-1.3".parse::<Number>(),
Ok(Number::Flt(Float(-1.3))));
assert_eq!("#d234".parse::<Number>(),
Ok(Number::Flt(Float(234.0))));
assert_eq!("#o17".parse::<Number>(),
Ok(Number::Fra(Fraction(15, 1))));
assert_eq!("#xAA".parse::<Number>(),
Ok(Number::Fra(Fraction(170, 1))));
assert_eq!("#b101".parse::<Number>(),
Ok(Number::Flt(Float(5.0))));
assert_eq!("2/4".parse::<Number>(),
Ok(Number::Fra(Fraction(2, 4))));
assert_eq!("#e1/5".parse::<Number>(),
Ok(Number::Fra(Fraction(1, 5))));
assert_eq!("#i1/5".parse::<Number>(),
Ok(Number::Flt(Float(0.2))));
assert_eq!("#e1e1".parse::<Number>(),
Ok(Number::Sci(ScientificNotation(1.0, 1))));
assert_eq!("+inf.0".parse::<Number>(),
Ok(Number::Sym(SymbolicNumber::Inf)));
assert_eq!("2e3".parse::<Number>(),
Ok(ScientificNotation(2.0, 3)));
assert_eq!("0e1".parse::<Number>(),
Ok(ScientificNotation(0.0, 1)));
assert_eq!("-1e34".parse::<Number>(),
Ok(ScientificNotation(-1.0, 34)));
assert_eq!("3.3e3".parse::<Number>(),
Ok(ScientificNotation(3.3, 3)));
assert_eq!("2".parse::<Number>(),
Err(E_SCIENTIFIC_E));
assert_eq!("2e2e2".parse::<Number>(),
Err(E_SCIENTIFIC_MULTI_E));
assert_eq!("2/3".parse::<Number>(),
Ok(Fraction(2, 3)));
assert_eq!("0/1".parse::<Number>(),
Ok(Fraction(0, 1)));
assert_eq!("-1/34".parse::<Number>(),
Ok(Fraction(-1, 34)));
assert_eq!("2".parse::<Number>(),
Err(E_NO_DENOMINATOR));
assert_eq!("2/2/2".parse::<Number>(),
Err(E_MULTI_DENOMINATOR));
assert_eq!("2/0".parse::<Number>(),
Err(E_ZERO_DENOMINATOR));
assert_eq!("3.3/3".parse::<Number>(),
Err(E_NUMERATOR_PARSE_FAIL));
}
#[test]
fn test_number_addition_subtraction_cases() {
let cases = vec![
vec!["1/5", "4/5", "1/1"],
vec!["1/5", "0.8", "1/1"],
vec!["1e1", "2.0", "12/1"],
vec!["1e1", "2/1", "12/1"],
vec!["1e1", "1/2", "10.5"],
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
// test some mathematical properties
assert_eq!(x + y, z);
assert_eq!(x + y, y + x);
assert_eq!(z - x, y);
assert_eq!(x + y - x, y);
});
// theres no reason this should adhere to all the other rules
let x = "+inf.0".parse::<Number>().unwrap();
let y = "1e1".parse::<Number>().unwrap();
let z = "+inf.0".parse::<Number>().unwrap();
assert_eq!(x + y, z);
}
#[test]
fn test_number_multiplication_division_cases() {
let cases = vec![
vec!["1/5", "5e0", "1/1"],
vec!["1/5", "5", "1/1"],
vec!["1/5", "2/1", "2/5"],
vec!["4.4", "1/2", "2.2"],
vec!["12.0", "1/2", "6/1"],
vec!["1e1", "2.0", "20/1"],
vec!["1e1", "2/1", "20/1"],
vec!["1e1", "1/2", "5/1"],
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
// test some mathematical properties
assert_eq!(x * y, z);
assert_eq!(x * y, y * x);
assert_eq!(z / x, y);
assert_eq!(x * y / x, y);
});
}
#[test]
fn test_number_pow_cases() {
// TODO: add scientific notation cases
let cases = vec![
vec!["2", "2", "4"],
vec!["2/1", "2/1", "4/1"],
vec!["2/1", "2/-1", "1/4"],
vec!["2/1", "2/2", "2/1"],
vec!["2/1", "2.0", "4/1"],
vec!["27/8", "2/-3", "4/9"]
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
assert_eq!(x.pow(y), z);
});
}
#[test]
fn test_number_ord_cases() {
// TODO: add more cases
let cases = vec![
vec!["1/2", "1.0", "1e1"],
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
assert!(x < y);
assert!(y < z);
assert!(x < z);
});
}
#[test]
fn float_negative_exponent_case() {
if let Float(0.1) = "1e-1"
.parse::<Number>()
.unwrap()
.make_inexact() {
return
}
assert!(false)
}
}