Mycelium/snippets/in_progress_numbers.rs

use alloc::boxed::Box;
use alloc::{vec, vec::Vec};
use alloc::fmt::Debug;
use lexer::{E_TOO_MANY_DECIMALS, E_TOO_MANY_SLASH};

use core::cmp::Ordering;
use core::{fmt, u8};
use core::ops::{Add, Div, Mul, Sub};


pub const E_INCOMPREHENSIBLE:       &str = "could not parse number literal";
pub const E_POUND_TRUNCATED:        &str = "pound sign implies additional input";
pub const E_BASE_PARSE_FAIL:        &str = "failed to parse explicit base literal";
pub const E_UNKNOWN_CONTROL:        &str = "unknown character in number literal";
pub const E_EMPTY_INPUT:            &str = "empty string cannot be a number";

const NUM_INF:     &str = "+inf.0";
const NUM_NEG_INF: &str = "-inf.0";
const NUM_NAN:     &str = "+nan.0";
const NUM_NEG_NAN: &str = "-nan.0";

pub const NegativeFlag:   u8 = 0b10000000; // positive value if off
pub const DecimalFlag:    u8 = 0b01000000; // single integer if off
pub const FractionFlag:   u8 = 0b00100000; // decimal if off
pub const ScientificFlag: u8 = 0b00010000; // requires a second flags byte
pub const InfiniteFlag:   u8 = 0b00001000; // can be positive or negative
pub const NotANumberFlag: u8 = 0b00000100; // can be positive or negative because r7rs
pub const OverflownFlag:  u8 = 0b00000010; // poisons exactness


/* NUMBER BYTES FORMAT
 * Generally the format within the byte array operates like this
 * (guaranteed header) 1. NumberFlags (u8)
 * (for each integer)  2. Byte Length (u8)
 * (for each integer)  3. N proceeding bytes of data
 *
 * If Scientific Notation is used the leading number may be a decimal.
 * In this case, there will be three total numbers
 *
 * All numbers are big endian
 */
#[repr(transparent)]
#[derive(Clone, Debug, PartialEq)]
pub struct Number<'src> (pub &'src [u8]);


/* WARNING
 * member functions tend to assume that number encoding is consistent
 * use Number::is_valid() to double check numbers from unknown sources
 *
 * TODO: maybe mark raw-indexing member functions as unsafe
 */
impl Number<'_> {
    #[inline(always)]
    pub fn byte_length(&self) -> u8 {
        if self.0[0] & (InfiniteFlag | NotANumberFlag) != 0 {
            return 1;
        }

        let mut len = self.0[1] + 2;
        if self.0[0] & (DecimalFlag | FractionFlag | ScientificFlag) != 0 {
            len += self.0[len as usize] + 1;
        }

        if  self.0[0] & ScientificFlag != 0  &&
            self.0[0] & DecimalFlag != 0 {
                len += self.0[len as usize];
            }

        len
    }

    pub fn is_valid(&self) -> bool {
        let len = self.0.len();
        if len < 1 {
            return false;
        }

        let decimal    = self.0[0] & DecimalFlag != 0;
        let fraction   = self.0[0] & FractionFlag != 0;
        let scientific = self.0[0] & ScientificFlag != 0;
        let overflown  = self.0[0] & OverflownFlag != 0;
        let infinite   = self.0[0] & InfiniteFlag != 0;
        let notanumber = self.0[0] & NotANumberFlag != 0;

        // check flags
        if overflown {
            return false
        }

        if (decimal && fraction) || (scientific && fraction) {
            return false
        }

        if (infinite || notanumber) &&
            (decimal || fraction || scientific || len != 1) {
            return false
        }

        // at least 3 bytes for a single u8
        if len < 3 {
            return false
        }

        let mut cur = self.0[1] + 2;
        if len < cur as usize {
            return false
        }

        if decimal || fraction || scientific {
            if len < (cur + 1) as usize {
                return false;
            }

            cur += self.0[cur as usize];
            if len < (cur + 1) as usize {
                return false;
            }
        }

        if scientific && decimal {
            cur += 1;
            if len < (cur + 1) as usize {
                return false
            }

            cur += self.0[cur as usize];
            if len < (cur + 1) as usize {
                return false
            }
        }

        true
    }

    #[inline(always)]
    pub fn is_exact(&self) -> bool {
        self.0[0] & ScientificFlag == 0
    }

    #[inline(always)]
    pub fn make_exact_into(&self, dst:&mut Vec<u8>) {
        // expand scientific notation else just direct copy
        if self.0[0] & ScientificFlag != 0 {
            self.normalize_scientific_into(dst);
            return
        }

        self.copy_into(dst);
    }

    #[inline(always)]
    pub fn make_inexact_into(&self, dst: &mut Vec<u8>) {
        // basically just convert a fraction into an actual division
        todo!()
    }

    // use this so you dont have to worry about clone while casting
    #[inline(always)]
    pub fn copy_into(&self, dst: &mut Vec<u8>) {
        for i in self.0 {
            dst.push(*i)
        }
    }

    #[inline(always)]
    pub fn normalize_scientific_into(&self, dst: &mut Vec<u8>) {
        todo!()
    }

    #[inline(always)]
    pub fn simplify_fraction_in_place(&mut self) {
        if self.0[0] & FractionFlag == 0 {
            return
        }

        // can technically do this in place
        // each element of the fraction will only shrink
        todo!()
    }

    #[inline(always)]
    pub fn from_str_into(src: &str, dst: &mut Vec<u8>) -> Result<(), &'static str> {
        // handle symbolic values
        match src {
            NUM_INF => {
                dst.push(0 as u8 | InfiniteFlag);
                return Ok(());
            },

            NUM_NEG_INF => {
                dst.push(0 as u8 | NegativeFlag | InfiniteFlag);
                return Ok(());
            },

            NUM_NAN => {
                dst.push(0 as u8 | NotANumberFlag);
                return Ok(());
            },

            NUM_NEG_NAN => {
                dst.push(0 as u8 | NegativeFlag | NotANumberFlag);
                return Ok(());
            },

            _ => (),
        }

        let mut ctrl_flags = 0 as u8;
        let mut operands = vec![];
        let mut digits_per_byte = 3; // default to decimal encoding
        let mut base = 0;
        let mut iter = src.chars().peekable();

        match iter.next() {
            Some('+') => (),
            Some('-') => {
                ctrl_flags |= NegativeFlag;
            },
            Some('#') => {
                match iter.next() {
                    None => return Err(E_POUND_TRUNCATED),
                    Some('i') => /* force_inexact = true */ (),
                    Some('e') => /* force_exact = true */ (),
                    Some('x') => { digits_per_byte = 2; base = 16 },
                    Some('d') => { digits_per_byte = 3; base = 10 },
                    Some('o') => { digits_per_byte = 4; base = 8  },
                    Some('b') => { digits_per_byte = 8; base = 2  },
                    _ => return Err(E_UNKNOWN_CONTROL),
                }
            },
            Some(a) if a.is_digit(10) => (),
            Some(_) => return Err(E_INCOMPREHENSIBLE),
            None => return Err(E_EMPTY_INPUT),
        }

        let mut ops_needed = 1;
        if base != 10 {
            // cant mix non-decimal base and other number representations
            let mut len = 0 as u8;
            while let Some(chunk) = {
                let mut chk = vec![];
                for _ in 0..digits_per_byte {
                    if let Some(c) = iter.next() {
                        chk.push(c as u8)
                    }
                }
                if chk.len() < 1 { None } else { Some(chk) }
            } {
                let Ok(val) = u8::from_str_radix(
               unsafe {str::from_utf8_unchecked(chunk.as_slice())}, base) else {
                        return Err(E_BASE_PARSE_FAIL)
                };
                operands.push(val);
                len += 1;
            }
            // integer numbers prepended with their length
            operands.insert(0, len);
            ops_needed -= 1;

        } else {
            // just a decimal number, but could have a weird format
            loop {
                macro_rules! pack_operand {
                    () => {
                        let s = unsafe { str::from_utf8_unchecked(operands.as_slice()) };
                        let f = usize::from_str_radix(&s, 10).expect("str cast");
                        let f = f.to_be_bytes();
                        operands.clear();

                        dst.push(f.len() as u8);
                        dst.append(&mut f.to_vec());
                        ops_needed -= 1;
                    }
                }

                match iter.next() {
                    Some(c) if c.is_digit(10) => {
                        operands.push(c as u8);
                    },

                    Some('.') => {
                        ops_needed += 1;
                        if ctrl_flags & (FractionFlag | ScientificFlag) != 0 {
                            return Err(E_INCOMPREHENSIBLE)
                        }

                        if ctrl_flags & DecimalFlag != 0 {
                            return Err(E_TOO_MANY_DECIMALS)
                        }

                        ctrl_flags |= DecimalFlag;
                        pack_operand!();
                    },

                    Some('/') => {
                        ops_needed += 1;
                        if ctrl_flags & (DecimalFlag | ScientificFlag) != 0 {
                            return Err(E_INCOMPREHENSIBLE)
                        }

                        if ctrl_flags & FractionFlag != 0 {
                            return Err(E_TOO_MANY_SLASH)
                        }

                        ctrl_flags |= DecimalFlag;
                        pack_operand!();
                    },

                    Some('e') => {
                        ops_needed += 1;
                        if ctrl_flags & FractionFlag != 0 {
                            return Err(E_INCOMPREHENSIBLE)
                        }

                        ctrl_flags |= ScientificFlag;
                        let mut newctrl = 0 as u8;

                        if let Some('-') = iter.peek() {
                            newctrl |= NegativeFlag;
                        }

                        pack_operand!();
                        dst.push(newctrl);
                    },

                    Some(_) => return Err(E_INCOMPREHENSIBLE),

                    None => {
                        pack_operand!();
                        break;
                    }
                }
            }
        }

        if ops_needed != 0 {
            return Err(E_INCOMPREHENSIBLE);
        }

        dst.insert(0, ctrl_flags);
        Number(dst.as_slice()).simplify_fraction_in_place();
        Ok(())
    }

    pub fn from_u8_into(src: u8, dst: &mut Vec<u8>) -> Number {
        dst.push(0 as u8);
        dst.push(src);
        Number(dst.as_slice())
    }
}

impl fmt::Display for Number<'_> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        // can implement after I finish division
        todo!()
    }
}

impl<'a> From<&'a Box<[u8]>> for Number<'a> {
    fn from(value: &'a Box<[u8]>) -> Self {
        Number(value.as_ref())
    }
}

impl<'a> From<&'a Vec<u8>> for Number<'a> {
    fn from(value: &'a Vec<u8>) -> Self {
        Number(value.as_slice())
    }
}

impl<'a> From<&'a [u8]> for Number<'a> {
    fn from(value: &'a [u8]) -> Self {
        Number(value)
    }
}

impl<'a> Into<&'a [u8]> for Number<'a> {
    fn into(self) -> &'a [u8] {
        self.0
    }
}

impl Add for Number<'_> {
    type Output = Box<[u8]>;
    fn add(self, rhs: Self) -> Self::Output {
        todo!()
    }
}

impl Sub for Number<'_> {
    type Output = Box<[u8]>;
    fn sub(self, rhs: Self) -> Self::Output {
        todo!()
    }
}

impl Mul for Number<'_> {
    type Output = Box<[u8]>;
    fn mul(self, rhs: Self) -> Self::Output {
        todo!()
    }
}

impl Div for Number<'_> {
    type Output = Box<[u8]>;
    fn div(self, rhs: Self) -> Self::Output {
        // divide unsigned integer by unsigned integer
        // the inputs (lh and rh) start with length byte
        // returns a decimal index
        fn div_ints(lh: &[u8], rh: &[u8], dest: &mut Vec<u8>) -> u8 {
            todo!()
        }

        /* Options
         * divide a single int by a single int
         *    - (make fraction)
         * divide a fraction by a single int
         *    - (multiply denominator)
         * divide a decimal by a single int
         *    - (divide straight through)
         * divide a scientific note by a single int
         *    - divide the first num
         *    - multiply by however much is needed for ones place (like 3.5)
         *    - add or subtract from the second number accordingly
         *
         * divide a single int by a fraction
         *    - output denom * lh / numer
         * divide a single int by a decimal
         */
        todo!()
    }
}

impl PartialEq for Number<'_> {
    fn eq(&self, other: &Number) -> bool {
        todo!()
    }
}

impl PartialOrd for Number<'_> {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        todo!()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_number_tests() {
        assert_eq!("1.3".parse::<Number>(),
            Ok(Number::Flt(Float(1.3))));

        assert_eq!("1".parse::<Number>(),
            Ok(Number::Flt(Float(1 as f64))));

        assert_eq!("1.3e3".parse::<Number>(),
            Ok(Number::Sci(ScientificNotation(1.3, 3))));

        assert_eq!("+1.3".parse::<Number>(),
            Ok(Number::Flt(Float(1.3))));

        assert_eq!("-1.3".parse::<Number>(),
            Ok(Number::Flt(Float(-1.3))));

        assert_eq!("#d234".parse::<Number>(),
            Ok(Number::Flt(Float(234.0))));

        assert_eq!("#o17".parse::<Number>(),
            Ok(Number::Fra(Fraction(15, 1))));

        assert_eq!("#xAA".parse::<Number>(),
            Ok(Number::Fra(Fraction(170, 1))));

        assert_eq!("#b101".parse::<Number>(),
            Ok(Number::Flt(Float(5.0))));

        assert_eq!("2/4".parse::<Number>(),
            Ok(Number::Fra(Fraction(2, 4))));

        assert_eq!("#e1/5".parse::<Number>(),
            Ok(Number::Fra(Fraction(1, 5))));

        assert_eq!("#i1/5".parse::<Number>(),
            Ok(Number::Flt(Float(0.2))));

        assert_eq!("#e1e1".parse::<Number>(),
            Ok(Number::Sci(ScientificNotation(1.0, 1))));

        assert_eq!("+inf.0".parse::<Number>(),
            Ok(Number::Sym(SymbolicNumber::Inf)));

        assert_eq!("2e3".parse::<Number>(),
            Ok(ScientificNotation(2.0, 3)));

        assert_eq!("0e1".parse::<Number>(),
            Ok(ScientificNotation(0.0, 1)));

        assert_eq!("-1e34".parse::<Number>(),
            Ok(ScientificNotation(-1.0, 34)));

        assert_eq!("3.3e3".parse::<Number>(),
            Ok(ScientificNotation(3.3, 3)));

        assert_eq!("2".parse::<Number>(),
            Err(E_SCIENTIFIC_E));

        assert_eq!("2e2e2".parse::<Number>(),
            Err(E_SCIENTIFIC_MULTI_E));

        assert_eq!("2/3".parse::<Number>(),
            Ok(Fraction(2, 3)));

        assert_eq!("0/1".parse::<Number>(),
            Ok(Fraction(0, 1)));

        assert_eq!("-1/34".parse::<Number>(),
            Ok(Fraction(-1, 34)));

        assert_eq!("2".parse::<Number>(),
            Err(E_NO_DENOMINATOR));

        assert_eq!("2/2/2".parse::<Number>(),
            Err(E_MULTI_DENOMINATOR));

        assert_eq!("2/0".parse::<Number>(),
            Err(E_ZERO_DENOMINATOR));

        assert_eq!("3.3/3".parse::<Number>(),
            Err(E_NUMERATOR_PARSE_FAIL));
    }

    #[test]
    fn test_number_addition_subtraction_cases() {
        let cases = vec![
            vec!["1/5", "4/5", "1/1"],
            vec!["1/5", "0.8", "1/1"],
            vec!["1e1", "2.0", "12/1"],
            vec!["1e1", "2/1", "12/1"],
            vec!["1e1", "1/2", "10.5"],
        ];

        cases.iter().for_each(|case| {
            println!("+ {:#?}", case);
            let x = case[0].parse::<Number>().unwrap();
            let y = case[1].parse::<Number>().unwrap();
            let z = case[2].parse::<Number>().unwrap();

            // test some mathematical properties
            assert_eq!(x + y, z);
            assert_eq!(x + y, y + x);
            assert_eq!(z - x, y);
            assert_eq!(x + y - x, y);
        });

        // theres no reason this should adhere to all the other rules
        let x = "+inf.0".parse::<Number>().unwrap();
        let y = "1e1".parse::<Number>().unwrap();
        let z = "+inf.0".parse::<Number>().unwrap();
        assert_eq!(x + y, z);
    }

    #[test]
    fn test_number_multiplication_division_cases() {
        let cases = vec![
            vec!["1/5", "5e0", "1/1"],
            vec!["1/5", "5", "1/1"],
            vec!["1/5", "2/1", "2/5"],
            vec!["4.4", "1/2", "2.2"],
            vec!["12.0", "1/2", "6/1"],
            vec!["1e1", "2.0", "20/1"],
            vec!["1e1", "2/1", "20/1"],
            vec!["1e1", "1/2", "5/1"],
        ];

        cases.iter().for_each(|case| {
            println!("+ {:#?}", case);
            let x = case[0].parse::<Number>().unwrap();
            let y = case[1].parse::<Number>().unwrap();
            let z = case[2].parse::<Number>().unwrap();

            // test some mathematical properties
            assert_eq!(x * y, z);
            assert_eq!(x * y, y * x);
            assert_eq!(z / x, y);
            assert_eq!(x * y / x, y);
        });
    }

    #[test]
    fn test_number_pow_cases() {
        // TODO: add scientific notation cases
        let cases = vec![
            vec!["2", "2", "4"],
            vec!["2/1", "2/1", "4/1"],
            vec!["2/1", "2/-1", "1/4"],
            vec!["2/1", "2/2", "2/1"],
            vec!["2/1", "2.0", "4/1"],
            vec!["27/8", "2/-3", "4/9"]
        ];

        cases.iter().for_each(|case| {
            println!("+ {:#?}", case);
            let x = case[0].parse::<Number>().unwrap();
            let y = case[1].parse::<Number>().unwrap();
            let z = case[2].parse::<Number>().unwrap();
            assert_eq!(x.pow(y), z);
        });
    }

    #[test]
    fn test_number_ord_cases() {
        // TODO: add more cases
        let cases = vec![
            vec!["1/2", "1.0", "1e1"],
        ];

        cases.iter().for_each(|case| {
            println!("+ {:#?}", case);
            let x = case[0].parse::<Number>().unwrap();
            let y = case[1].parse::<Number>().unwrap();
            let z = case[2].parse::<Number>().unwrap();
            assert!(x < y);
            assert!(y < z);
            assert!(x < z);
        });
    }

    #[test]
    fn float_negative_exponent_case() {
        if let Float(0.1) = "1e-1"
            .parse::<Number>()
            .unwrap()
            .make_inexact() {
                return
        }

        assert!(false)
    }
}