Mycelium/snippets/in_progress_numbers.rs
Ava Affine 0476160ae3 WIP HyphaeVM
This commit is a WORK IN PROGRESS for the base implementation of the
HyphaeVM. This will be squashed into a larger commit eventually when
the work of implementing the HyphaeVM is finished.

Do note the in progress number package implementation in snippets

Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-07-10 16:56:41 -07:00

642 lines
18 KiB
Rust

use alloc::boxed::Box;
use alloc::{vec, vec::Vec};
use alloc::fmt::Debug;
use lexer::{E_TOO_MANY_DECIMALS, E_TOO_MANY_SLASH};
use core::cmp::Ordering;
use core::{fmt, u8};
use core::ops::{Add, Div, Mul, Sub};
pub const E_INCOMPREHENSIBLE: &str = "could not parse number literal";
pub const E_POUND_TRUNCATED: &str = "pound sign implies additional input";
pub const E_BASE_PARSE_FAIL: &str = "failed to parse explicit base literal";
pub const E_UNKNOWN_CONTROL: &str = "unknown character in number literal";
pub const E_EMPTY_INPUT: &str = "empty string cannot be a number";
const NUM_INF: &str = "+inf.0";
const NUM_NEG_INF: &str = "-inf.0";
const NUM_NAN: &str = "+nan.0";
const NUM_NEG_NAN: &str = "-nan.0";
pub const NegativeFlag: u8 = 0b10000000; // positive value if off
pub const DecimalFlag: u8 = 0b01000000; // single integer if off
pub const FractionFlag: u8 = 0b00100000; // decimal if off
pub const ScientificFlag: u8 = 0b00010000; // requires a second flags byte
pub const InfiniteFlag: u8 = 0b00001000; // can be positive or negative
pub const NotANumberFlag: u8 = 0b00000100; // can be positive or negative because r7rs
pub const OverflownFlag: u8 = 0b00000010; // poisons exactness
/* NUMBER BYTES FORMAT
* Generally the format within the byte array operates like this
* (guaranteed header) 1. NumberFlags (u8)
* (for each integer) 2. Byte Length (u8)
* (for each integer) 3. N proceeding bytes of data
*
* If Scientific Notation is used the leading number may be a decimal.
* In this case, there will be three total numbers
*
* All numbers are big endian
*/
#[repr(transparent)]
#[derive(Clone, Debug, PartialEq)]
pub struct Number<'src> (pub &'src [u8]);
/* WARNING
* member functions tend to assume that number encoding is consistent
* use Number::is_valid() to double check numbers from unknown sources
*
* TODO: maybe mark raw-indexing member functions as unsafe
*/
impl Number<'_> {
#[inline(always)]
pub fn byte_length(&self) -> u8 {
if self.0[0] & (InfiniteFlag | NotANumberFlag) != 0 {
return 1;
}
let mut len = self.0[1] + 2;
if self.0[0] & (DecimalFlag | FractionFlag | ScientificFlag) != 0 {
len += self.0[len as usize] + 1;
}
if self.0[0] & ScientificFlag != 0 &&
self.0[0] & DecimalFlag != 0 {
len += self.0[len as usize];
}
len
}
pub fn is_valid(&self) -> bool {
let len = self.0.len();
if len < 1 {
return false;
}
let decimal = self.0[0] & DecimalFlag != 0;
let fraction = self.0[0] & FractionFlag != 0;
let scientific = self.0[0] & ScientificFlag != 0;
let overflown = self.0[0] & OverflownFlag != 0;
let infinite = self.0[0] & InfiniteFlag != 0;
let notanumber = self.0[0] & NotANumberFlag != 0;
// check flags
if overflown {
return false
}
if (decimal && fraction) || (scientific && fraction) {
return false
}
if (infinite || notanumber) &&
(decimal || fraction || scientific || len != 1) {
return false
}
// at least 3 bytes for a single u8
if len < 3 {
return false
}
let mut cur = self.0[1] + 2;
if len < cur as usize {
return false
}
if decimal || fraction || scientific {
if len < (cur + 1) as usize {
return false;
}
cur += self.0[cur as usize];
if len < (cur + 1) as usize {
return false;
}
}
if scientific && decimal {
cur += 1;
if len < (cur + 1) as usize {
return false
}
cur += self.0[cur as usize];
if len < (cur + 1) as usize {
return false
}
}
true
}
#[inline(always)]
pub fn is_exact(&self) -> bool {
self.0[0] & ScientificFlag == 0
}
#[inline(always)]
pub fn make_exact_into(&self, dst:&mut Vec<u8>) {
// expand scientific notation else just direct copy
if self.0[0] & ScientificFlag != 0 {
self.normalize_scientific_into(dst);
return
}
self.copy_into(dst);
}
#[inline(always)]
pub fn make_inexact_into(&self, dst: &mut Vec<u8>) {
// basically just convert a fraction into an actual division
todo!()
}
// use this so you dont have to worry about clone while casting
#[inline(always)]
pub fn copy_into(&self, dst: &mut Vec<u8>) {
for i in self.0 {
dst.push(*i)
}
}
#[inline(always)]
pub fn normalize_scientific_into(&self, dst: &mut Vec<u8>) {
todo!()
}
#[inline(always)]
pub fn simplify_fraction_in_place(&mut self) {
if self.0[0] & FractionFlag == 0 {
return
}
// can technically do this in place
// each element of the fraction will only shrink
todo!()
}
#[inline(always)]
pub fn from_str_into(src: &str, dst: &mut Vec<u8>) -> Result<(), &'static str> {
// handle symbolic values
match src {
NUM_INF => {
dst.push(0 as u8 | InfiniteFlag);
return Ok(());
},
NUM_NEG_INF => {
dst.push(0 as u8 | NegativeFlag | InfiniteFlag);
return Ok(());
},
NUM_NAN => {
dst.push(0 as u8 | NotANumberFlag);
return Ok(());
},
NUM_NEG_NAN => {
dst.push(0 as u8 | NegativeFlag | NotANumberFlag);
return Ok(());
},
_ => (),
}
let mut ctrl_flags = 0 as u8;
let mut operands = vec![];
let mut digits_per_byte = 3; // default to decimal encoding
let mut base = 0;
let mut iter = src.chars().peekable();
match iter.next() {
Some('+') => (),
Some('-') => {
ctrl_flags |= NegativeFlag;
},
Some('#') => {
match iter.next() {
None => return Err(E_POUND_TRUNCATED),
Some('i') => /* force_inexact = true */ (),
Some('e') => /* force_exact = true */ (),
Some('x') => { digits_per_byte = 2; base = 16 },
Some('d') => { digits_per_byte = 3; base = 10 },
Some('o') => { digits_per_byte = 4; base = 8 },
Some('b') => { digits_per_byte = 8; base = 2 },
_ => return Err(E_UNKNOWN_CONTROL),
}
},
Some(a) if a.is_digit(10) => (),
Some(_) => return Err(E_INCOMPREHENSIBLE),
None => return Err(E_EMPTY_INPUT),
}
let mut ops_needed = 1;
if base != 10 {
// cant mix non-decimal base and other number representations
let mut len = 0 as u8;
while let Some(chunk) = {
let mut chk = vec![];
for _ in 0..digits_per_byte {
if let Some(c) = iter.next() {
chk.push(c as u8)
}
}
if chk.len() < 1 { None } else { Some(chk) }
} {
let Ok(val) = u8::from_str_radix(
unsafe {str::from_utf8_unchecked(chunk.as_slice())}, base) else {
return Err(E_BASE_PARSE_FAIL)
};
operands.push(val);
len += 1;
}
// integer numbers prepended with their length
operands.insert(0, len);
ops_needed -= 1;
} else {
// just a decimal number, but could have a weird format
loop {
macro_rules! pack_operand {
() => {
let s = unsafe { str::from_utf8_unchecked(operands.as_slice()) };
let f = usize::from_str_radix(&s, 10).expect("str cast");
let f = f.to_be_bytes();
operands.clear();
dst.push(f.len() as u8);
dst.append(&mut f.to_vec());
ops_needed -= 1;
}
}
match iter.next() {
Some(c) if c.is_digit(10) => {
operands.push(c as u8);
},
Some('.') => {
ops_needed += 1;
if ctrl_flags & (FractionFlag | ScientificFlag) != 0 {
return Err(E_INCOMPREHENSIBLE)
}
if ctrl_flags & DecimalFlag != 0 {
return Err(E_TOO_MANY_DECIMALS)
}
ctrl_flags |= DecimalFlag;
pack_operand!();
},
Some('/') => {
ops_needed += 1;
if ctrl_flags & (DecimalFlag | ScientificFlag) != 0 {
return Err(E_INCOMPREHENSIBLE)
}
if ctrl_flags & FractionFlag != 0 {
return Err(E_TOO_MANY_SLASH)
}
ctrl_flags |= DecimalFlag;
pack_operand!();
},
Some('e') => {
ops_needed += 1;
if ctrl_flags & FractionFlag != 0 {
return Err(E_INCOMPREHENSIBLE)
}
ctrl_flags |= ScientificFlag;
let mut newctrl = 0 as u8;
if let Some('-') = iter.peek() {
newctrl |= NegativeFlag;
}
pack_operand!();
dst.push(newctrl);
},
Some(_) => return Err(E_INCOMPREHENSIBLE),
None => {
pack_operand!();
break;
}
}
}
}
if ops_needed != 0 {
return Err(E_INCOMPREHENSIBLE);
}
dst.insert(0, ctrl_flags);
Number(dst.as_slice()).simplify_fraction_in_place();
Ok(())
}
pub fn from_u8_into(src: u8, dst: &mut Vec<u8>) -> Number {
dst.push(0 as u8);
dst.push(src);
Number(dst.as_slice())
}
}
impl fmt::Display for Number<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// can implement after I finish division
todo!()
}
}
impl<'a> From<&'a Box<[u8]>> for Number<'a> {
fn from(value: &'a Box<[u8]>) -> Self {
Number(value.as_ref())
}
}
impl<'a> From<&'a Vec<u8>> for Number<'a> {
fn from(value: &'a Vec<u8>) -> Self {
Number(value.as_slice())
}
}
impl<'a> From<&'a [u8]> for Number<'a> {
fn from(value: &'a [u8]) -> Self {
Number(value)
}
}
impl<'a> Into<&'a [u8]> for Number<'a> {
fn into(self) -> &'a [u8] {
self.0
}
}
impl Add for Number<'_> {
type Output = Box<[u8]>;
fn add(self, rhs: Self) -> Self::Output {
todo!()
}
}
impl Sub for Number<'_> {
type Output = Box<[u8]>;
fn sub(self, rhs: Self) -> Self::Output {
todo!()
}
}
impl Mul for Number<'_> {
type Output = Box<[u8]>;
fn mul(self, rhs: Self) -> Self::Output {
todo!()
}
}
impl Div for Number<'_> {
type Output = Box<[u8]>;
fn div(self, rhs: Self) -> Self::Output {
// divide unsigned integer by unsigned integer
// the inputs (lh and rh) start with length byte
// returns a decimal index
fn div_ints(lh: &[u8], rh: &[u8], dest: &mut Vec<u8>) -> u8 {
todo!()
}
/* Options
* divide a single int by a single int
* - (make fraction)
* divide a fraction by a single int
* - (multiply denominator)
* divide a decimal by a single int
* - (divide straight through)
* divide a scientific note by a single int
* - divide the first num
* - multiply by however much is needed for ones place (like 3.5)
* - add or subtract from the second number accordingly
*
* divide a single int by a fraction
* - output denom * lh / numer
* divide a single int by a decimal
*/
todo!()
}
}
impl PartialEq for Number<'_> {
fn eq(&self, other: &Number) -> bool {
todo!()
}
}
impl PartialOrd for Number<'_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
todo!()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_number_tests() {
assert_eq!("1.3".parse::<Number>(),
Ok(Number::Flt(Float(1.3))));
assert_eq!("1".parse::<Number>(),
Ok(Number::Flt(Float(1 as f64))));
assert_eq!("1.3e3".parse::<Number>(),
Ok(Number::Sci(ScientificNotation(1.3, 3))));
assert_eq!("+1.3".parse::<Number>(),
Ok(Number::Flt(Float(1.3))));
assert_eq!("-1.3".parse::<Number>(),
Ok(Number::Flt(Float(-1.3))));
assert_eq!("#d234".parse::<Number>(),
Ok(Number::Flt(Float(234.0))));
assert_eq!("#o17".parse::<Number>(),
Ok(Number::Fra(Fraction(15, 1))));
assert_eq!("#xAA".parse::<Number>(),
Ok(Number::Fra(Fraction(170, 1))));
assert_eq!("#b101".parse::<Number>(),
Ok(Number::Flt(Float(5.0))));
assert_eq!("2/4".parse::<Number>(),
Ok(Number::Fra(Fraction(2, 4))));
assert_eq!("#e1/5".parse::<Number>(),
Ok(Number::Fra(Fraction(1, 5))));
assert_eq!("#i1/5".parse::<Number>(),
Ok(Number::Flt(Float(0.2))));
assert_eq!("#e1e1".parse::<Number>(),
Ok(Number::Sci(ScientificNotation(1.0, 1))));
assert_eq!("+inf.0".parse::<Number>(),
Ok(Number::Sym(SymbolicNumber::Inf)));
assert_eq!("2e3".parse::<Number>(),
Ok(ScientificNotation(2.0, 3)));
assert_eq!("0e1".parse::<Number>(),
Ok(ScientificNotation(0.0, 1)));
assert_eq!("-1e34".parse::<Number>(),
Ok(ScientificNotation(-1.0, 34)));
assert_eq!("3.3e3".parse::<Number>(),
Ok(ScientificNotation(3.3, 3)));
assert_eq!("2".parse::<Number>(),
Err(E_SCIENTIFIC_E));
assert_eq!("2e2e2".parse::<Number>(),
Err(E_SCIENTIFIC_MULTI_E));
assert_eq!("2/3".parse::<Number>(),
Ok(Fraction(2, 3)));
assert_eq!("0/1".parse::<Number>(),
Ok(Fraction(0, 1)));
assert_eq!("-1/34".parse::<Number>(),
Ok(Fraction(-1, 34)));
assert_eq!("2".parse::<Number>(),
Err(E_NO_DENOMINATOR));
assert_eq!("2/2/2".parse::<Number>(),
Err(E_MULTI_DENOMINATOR));
assert_eq!("2/0".parse::<Number>(),
Err(E_ZERO_DENOMINATOR));
assert_eq!("3.3/3".parse::<Number>(),
Err(E_NUMERATOR_PARSE_FAIL));
}
#[test]
fn test_number_addition_subtraction_cases() {
let cases = vec![
vec!["1/5", "4/5", "1/1"],
vec!["1/5", "0.8", "1/1"],
vec!["1e1", "2.0", "12/1"],
vec!["1e1", "2/1", "12/1"],
vec!["1e1", "1/2", "10.5"],
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
// test some mathematical properties
assert_eq!(x + y, z);
assert_eq!(x + y, y + x);
assert_eq!(z - x, y);
assert_eq!(x + y - x, y);
});
// theres no reason this should adhere to all the other rules
let x = "+inf.0".parse::<Number>().unwrap();
let y = "1e1".parse::<Number>().unwrap();
let z = "+inf.0".parse::<Number>().unwrap();
assert_eq!(x + y, z);
}
#[test]
fn test_number_multiplication_division_cases() {
let cases = vec![
vec!["1/5", "5e0", "1/1"],
vec!["1/5", "5", "1/1"],
vec!["1/5", "2/1", "2/5"],
vec!["4.4", "1/2", "2.2"],
vec!["12.0", "1/2", "6/1"],
vec!["1e1", "2.0", "20/1"],
vec!["1e1", "2/1", "20/1"],
vec!["1e1", "1/2", "5/1"],
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
// test some mathematical properties
assert_eq!(x * y, z);
assert_eq!(x * y, y * x);
assert_eq!(z / x, y);
assert_eq!(x * y / x, y);
});
}
#[test]
fn test_number_pow_cases() {
// TODO: add scientific notation cases
let cases = vec![
vec!["2", "2", "4"],
vec!["2/1", "2/1", "4/1"],
vec!["2/1", "2/-1", "1/4"],
vec!["2/1", "2/2", "2/1"],
vec!["2/1", "2.0", "4/1"],
vec!["27/8", "2/-3", "4/9"]
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
assert_eq!(x.pow(y), z);
});
}
#[test]
fn test_number_ord_cases() {
// TODO: add more cases
let cases = vec![
vec!["1/2", "1.0", "1e1"],
];
cases.iter().for_each(|case| {
println!("+ {:#?}", case);
let x = case[0].parse::<Number>().unwrap();
let y = case[1].parse::<Number>().unwrap();
let z = case[2].parse::<Number>().unwrap();
assert!(x < y);
assert!(y < z);
assert!(x < z);
});
}
#[test]
fn float_negative_exponent_case() {
if let Float(0.1) = "1e-1"
.parse::<Number>()
.unwrap()
.make_inexact() {
return
}
assert!(false)
}
}