Number library and integrations

This commit adds a number library which handles fractions, floats, whole numbers, scientific notation, and special symbolic numbers all according to the R7RS small specification. Numeric trait is used to abstract operations across all number types and a Number enum is used to offer a non-opaque type that stores any kind of number. Upon the Number enum is implemented the following traits: - Add, Div, Sub, Mul - Pow - PartialEq - PartialOrd Which then offer the following operators to use on the Number enum instances themselves: + - / * == != < > <= >= and of course x.pow(y). Additionally, the number package contains parsing logic for each type of number. FromStr is implemented as part of the Numeric trait, and then in turn implemented on Number. Additionally Into<String> is implemented for the Numeric trait and then on the Number enum type as well. Test cases have been added for basic cases, but could be expanded. Additional modifications: - LexError has a custom display implementation that properly outputs formatted errors. - Sexpr package updated to use new number package Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00 · 2025-05-15 12:49:08 -07:00 · 41216d3526
commit 41216d3526
parent 6554a0639a
6 changed files with 992 additions and 31 deletions
--- a/mycelium/src/lexer.rs
+++ b/mycelium/src/lexer.rs
@ -15,6 +15,7 @@
 *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

+use core::fmt;
 use alloc::rc::Rc;

 pub const LEX_SPECIAL:     [char; 18] = ['!', '$', '%', '&', '*', '+', '-', '/',
@ -37,19 +38,79 @@ pub const E_UNIMPLEMENTED_HEX:  &str = "hexadecimal literals not supported";
 pub const E_NUMER_BASE_ERR:     &str = "digit in number exceeds specified base";
 pub const E_UNSUPPORTED_ESC:    &str = "unsupported escape";
 pub const E_BAD_DOT:            &str = "expected space after dot in dotted notation";
-pub const E_NO_SPLICE_TEMPL:    &str = "expected more input after unquote splicing";
 pub const E_INCOMPREHENSIBLE:   &str = "token does not lex";
 pub const E_END_OF_DOCUMENT:    &str = "no additional input left in document";

 /* LexError
 * 0: error string
 * 1: index into document
+ * 2: document in question
 */
 #[derive(Clone)]
-pub struct LexError(pub &'static str, pub usize);
+pub struct LexError(pub &'static str, pub usize, pub Rc<str>);
+
+impl fmt::Display for LexError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let err_snippet_start = || -> usize {
+            /* backtrack from current index until we either hit 
+             *   - beginning of line
+             *   - 25 characters ago
+             *   - the doc Start
+             */
+            if self.2.len() < 25 {
+                0
+
+            } else {
+                let mut idx = self.1;
+                while self.1 - idx > 25 {
+                    idx -= 1;
+                    if self.2[idx..]
+                        .char_indices()
+                        .next()
+                        .is_some_and(|(i, x)| x == '\n' && i == idx) {
+                            idx += 1;
+                            break;
+                        }
+                }
+
+                idx
+            }
+        };
+
+        let err_snippet_end = || -> usize {
+            /* read through document until we either hit
+             *   - end of line
+             *   - 25 characters forward
+             *   - the doc end
+             */
+            if self.2.len() - self.1 < 25 {
+                self.2.len()
+
+            } else {
+                let mut idx = self.1;
+                while idx - self.1 < 25 {
+                    idx += 1;
+                    if self.2[idx..]
+                        .char_indices()
+                        .next()
+                        .is_some_and(|(i, x)| x == '\n' && i == idx) {
+                            break;
+                    }
+                }
+
+                idx
+            }
+        };
+
+        write!(f, "Error when lexing document here:\n\n")?;
+        write!(f, "    {}\n", &self.2[err_snippet_start()..err_snippet_end()])?;
+        write!(f, "Error: {}\n", self.0)
+    }
+}
+

 #[repr(u8)]
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Clone)]
 pub enum LexTokenType {
    String = 0,
    Number,
@ -82,11 +143,12 @@ impl TryFrom<u8> for LexTokenType {
 }


+#[derive(Clone)]
 pub struct LexToken {
-    token_type: LexTokenType,
-    start_idx: usize,
-    end_idx: usize,
-    source_doc: Rc<str>,
+    pub token_type: LexTokenType,
+    pub start_idx: usize,
+    pub end_idx: usize,
+    pub source_doc: Rc<str>,
 }


@ -94,7 +156,7 @@ pub struct Lexer {
    document: Rc<str>,
    current_index: usize,
    current_token_start: usize,
-    has_error_state: Option<LexError>,
+    pub has_error_state: Option<LexError>,
 }

 impl From<Rc<str>> for Lexer {
@ -214,7 +276,8 @@ impl Lexer {
        // TODO: support escaped quotes
        loop {
            if let None = self.advance_char() {
-                return Err(LexError(E_NO_MATCHING_QUOTE, self.current_token_start))
+                return Err(LexError(E_NO_MATCHING_QUOTE,
+                        self.current_token_start, self.document.clone()))
            } else if self.current_char() == '"' {
                return self.cut_new_token(LexTokenType::String)
            }
@ -227,7 +290,8 @@ impl Lexer {
        let a = self.current_char();
        if NUMERICAL_BASE.contains(&a) {
            if let None = self.advance_char() {
-                return Err(LexError(E_NUMBER_TRUNCATED, self.current_token_start))
+                return Err(LexError(E_NUMBER_TRUNCATED,
+                            self.current_token_start, self.document.clone()))
            }
            match a {
                'd' => base = 10,
@ -242,7 +306,8 @@ impl Lexer {
            let a = self.current_char();
            if NUMERICAL_EXTRA.contains(&a) {
                if hasdot || base < 10 {
-                    return Err(LexError(E_TOO_MANY_DECIMALS, self.current_token_start))
+                    return Err(LexError(E_TOO_MANY_DECIMALS, 
+                                self.current_token_start, self.document.clone()))
                }
                hasdot = true;

@ -252,10 +317,12 @@ impl Lexer {
                return self.cut_new_token(LexTokenType::Number)

            } else if !a.is_numeric() {
-                return Err(LexError(E_INCOMPREHENSIBLE, self.current_token_start))
+                return Err(LexError(E_INCOMPREHENSIBLE,
+                            self.current_token_start, self.document.clone()))

            } else if a.to_digit(10).unwrap() >= base {
-                return Err(LexError(E_NUMER_BASE_ERR, self.current_token_start))
+                return Err(LexError(E_NUMER_BASE_ERR,
+                            self.current_token_start, self.document.clone()))
            }

            if let None = self.advance_char() {
@ -269,7 +336,8 @@ impl Lexer {
    fn seek_end_of_block_comment(&mut self) -> Result<LexToken, LexError> {
        loop {
            if let None = self.advance_char() {
-                return Err(LexError(E_UNCLOSED_COMMENT, self.current_token_start))
+                return Err(LexError(E_UNCLOSED_COMMENT,
+                            self.current_token_start, self.document.clone()))
            }

            match self.current_char() {
@ -287,7 +355,8 @@ impl Lexer {
    fn seek_end_of_line_comment(&mut self, directive: bool) -> Result<LexToken, LexError> {
        loop {
            if let None = self.advance_char() {
-                return Err(LexError(E_UNCLOSED_COMMENT, self.current_token_start))
+                return Err(LexError(E_UNCLOSED_COMMENT,
+                            self.current_token_start, self.document.clone()))
            }

            match self.current_char() {
@ -302,7 +371,8 @@ impl Lexer {
    fn seek_closing_pipe(&mut self) -> Result<LexToken, LexError> {
        loop {
            if let None = self.advance_char() {
-                return Err(LexError(E_NO_CLOSING_PIPE, self.current_token_start));
+                return Err(LexError(E_NO_CLOSING_PIPE,
+                            self.current_token_start, self.document.clone()));
            }

            let c = self.current_char();
@ -313,7 +383,8 @@ impl Lexer {
                 _  if LEX_SPECIAL.contains(&c) => continue,
                 _  if c == ' ' || c == '\n' => continue,
                    // quote case caught here
-                 _  => return Err(LexError(E_INCOMPREHENSIBLE, self.current_token_start)),
+                 _  => return Err(LexError(E_INCOMPREHENSIBLE,
+                                    self.current_token_start, self.document.clone())),
            };
        }
    }
@ -331,12 +402,14 @@ impl Lexer {
                '(' => return self.cut_new_token(LexTokenType::VectorStart),
                '\\' => self.seek_end_of_escape(false)
                            .and_then(|_| self.cut_new_token(LexTokenType::Char)),
-                'x'  => return Err(LexError(E_UNIMPLEMENTED_HEX, self.current_index)),
+                'x'  => return Err(LexError(E_UNIMPLEMENTED_HEX,
+                                    self.current_index, self.document.clone())),
                 _ if  NUMERICAL_BASE.contains(&ch) => return self.seek_end_of_number(),
-                 _ => return Err(LexError(E_INCOMPREHENSIBLE, self.current_token_start)),
+                 _ => return Err(LexError(E_INCOMPREHENSIBLE,
+                                    self.current_token_start, self.document.clone())),
            }
        } else {
-            Err(LexError(E_NO_END_TO_HASH, self.current_token_start))
+            Err(LexError(E_NO_END_TO_HASH, self.current_token_start, self.document.clone()))
        }
    }

@ -349,15 +422,17 @@ impl Lexer {
        if let None = self.advance_char() {
            let mut error_msg = E_CHAR_TRUNCATED;
            if in_string { error_msg = E_STRING_TRUNCATED; }
-            return Err(LexError(error_msg, self.current_token_start))
+            return Err(LexError(error_msg, self.current_token_start, self.document.clone()))
        }

        match self.current_char() {
            // eat an escaped whitespace or delim
            ' ' | 'n' | 'r' | 't' | '|' | '\\' | '"' => { () },
-            'x' => return Err(LexError(E_UNIMPLEMENTED_HEX, self.current_token_start)),
+            'x' => return Err(LexError(E_UNIMPLEMENTED_HEX,
+                                self.current_token_start, self.document.clone())),
             _  if self.current_char().is_alphabetic() => { () },
-             _  => return Err(LexError(E_UNSUPPORTED_ESC, self.current_index)),
+             _  => return Err(LexError(E_UNSUPPORTED_ESC,
+                                self.current_index, self.document.clone())),
        }

        return Ok(())
@ -372,12 +447,14 @@ impl Lexer {
        let mut output: Option<Result<LexToken, LexError>> = None;

        if self.current_index >= self.document.len() {
-            return Err(LexError(E_END_OF_DOCUMENT, self.document.len()));
+            return Err(LexError(E_END_OF_DOCUMENT,
+                        self.document.len(), self.document.clone()));
        }

        while LEX_WHITESPACE.contains(&self.current_char()) {
            if let None = self.advance_char() {
-                return Err(LexError(E_END_OF_DOCUMENT, self.document.len()));
+                return Err(LexError(E_END_OF_DOCUMENT,
+                            self.document.len(), self.document.clone()));
            }
        }

@ -426,7 +503,8 @@ impl Lexer {
            loop {
                let c = self.current_char();
                if !c.is_alphanumeric() && !LEX_SPECIAL.contains(&c) &&  c != ' ' {
-                    output = Some(Err(LexError(E_INCOMPREHENSIBLE, self.current_index)));
+                    output = Some(Err(LexError(E_INCOMPREHENSIBLE,
+                                        self.current_index, self.document.clone())));
                    break;
                }

@ -466,7 +544,8 @@ mod tests {
            /* String Cases */ (
                // HAPPY CASES
                vec!["\"asdf\"", "\"as sdf\"", "\"asdflkj\\n\"",
-                     "\"LKsldkf;l\"", "\" sdlkfj \"", "\"#;sdf\""],
+                     "\"LKsldkf;l\"", "\" sdlkfj \"", "\"#;sdf\"",
+                     "\"\""],

                // SAD CASES
                vec!["\"sdf"]
@ -592,7 +671,7 @@ mod tests {
                vec![",@x", ",@(", ",@"],

                // SAD CASES
-                vec![","]
+                vec![]
            ),
        ];