2025-05-07 09:19:33 -07:00
|
|
|
/* Mycelium Scheme
|
|
|
|
|
* Copyright (C) 2025 Ava Affine
|
|
|
|
|
*
|
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
use core::fmt;
|
2025-05-07 09:19:33 -07:00
|
|
|
use alloc::rc::Rc;
|
|
|
|
|
|
|
|
|
|
pub const LEX_SPECIAL: [char; 18] = ['!', '$', '%', '&', '*', '+', '-', '/',
|
|
|
|
|
':', '<', '=', '>', '?', '@', '^', '_', '~', '.'];
|
|
|
|
|
pub const LEX_WHITESPACE: [char; 4] = [' ', '\n', '\t', '\r'];
|
|
|
|
|
pub const NUMERICAL_EXTRA: [char; 3] = ['.', 'i', 'e'];
|
|
|
|
|
pub const NUMERICAL_BASE: [char; 3] = ['d', 'o', 'b'];
|
|
|
|
|
|
|
|
|
|
pub const E_NO_MATCHING_QUOTE: &str = "couldn't find matching quote";
|
|
|
|
|
pub const E_TOO_MANY_DECIMALS: &str = "number can only have one of {i e .}";
|
|
|
|
|
pub const E_NO_MATCHING_PAREN: &str = "couldn't find matching paren";
|
|
|
|
|
pub const E_UNCLOSED_COMMENT: &str = "block comment has no end";
|
|
|
|
|
pub const E_NO_CLOSING_PIPE: &str = "expected a closing pipe";
|
|
|
|
|
pub const E_NO_END_TO_HASH: &str = "expected more input after hash";
|
|
|
|
|
pub const E_NUMBER_TRUNCATED: &str = "number literal is truncated";
|
|
|
|
|
pub const E_CHAR_TRUNCATED: &str = "character literal is truncated";
|
|
|
|
|
pub const E_STRING_TRUNCATED: &str = "string literal is truncated";
|
|
|
|
|
pub const E_EXTRA_CLOSE: &str = "extra closing parenthesis";
|
|
|
|
|
pub const E_UNIMPLEMENTED_HEX: &str = "hexadecimal literals not supported";
|
|
|
|
|
pub const E_NUMER_BASE_ERR: &str = "digit in number exceeds specified base";
|
|
|
|
|
pub const E_UNSUPPORTED_ESC: &str = "unsupported escape";
|
|
|
|
|
pub const E_BAD_DOT: &str = "expected space after dot in dotted notation";
|
|
|
|
|
pub const E_INCOMPREHENSIBLE: &str = "token does not lex";
|
|
|
|
|
pub const E_END_OF_DOCUMENT: &str = "no additional input left in document";
|
|
|
|
|
|
|
|
|
|
/* LexError
|
|
|
|
|
* 0: error string
|
|
|
|
|
* 1: index into document
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
* 2: document in question
|
2025-05-07 09:19:33 -07:00
|
|
|
*/
|
|
|
|
|
#[derive(Clone)]
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
pub struct LexError(pub &'static str, pub usize, pub Rc<str>);
|
|
|
|
|
|
|
|
|
|
impl fmt::Display for LexError {
|
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
|
|
let err_snippet_start = || -> usize {
|
|
|
|
|
/* backtrack from current index until we either hit
|
|
|
|
|
* - beginning of line
|
|
|
|
|
* - 25 characters ago
|
|
|
|
|
* - the doc Start
|
|
|
|
|
*/
|
|
|
|
|
if self.2.len() < 25 {
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
let mut idx = self.1;
|
|
|
|
|
while self.1 - idx > 25 {
|
|
|
|
|
idx -= 1;
|
|
|
|
|
if self.2[idx..]
|
|
|
|
|
.char_indices()
|
|
|
|
|
.next()
|
|
|
|
|
.is_some_and(|(i, x)| x == '\n' && i == idx) {
|
|
|
|
|
idx += 1;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
idx
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let err_snippet_end = || -> usize {
|
|
|
|
|
/* read through document until we either hit
|
|
|
|
|
* - end of line
|
|
|
|
|
* - 25 characters forward
|
|
|
|
|
* - the doc end
|
|
|
|
|
*/
|
|
|
|
|
if self.2.len() - self.1 < 25 {
|
|
|
|
|
self.2.len()
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
let mut idx = self.1;
|
|
|
|
|
while idx - self.1 < 25 {
|
|
|
|
|
idx += 1;
|
|
|
|
|
if self.2[idx..]
|
|
|
|
|
.char_indices()
|
|
|
|
|
.next()
|
|
|
|
|
.is_some_and(|(i, x)| x == '\n' && i == idx) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
idx
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
write!(f, "Error when lexing document here:\n\n")?;
|
|
|
|
|
write!(f, " {}\n", &self.2[err_snippet_start()..err_snippet_end()])?;
|
|
|
|
|
write!(f, "Error: {}\n", self.0)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-07 09:19:33 -07:00
|
|
|
|
|
|
|
|
#[repr(u8)]
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
#[derive(Debug, PartialEq, Clone)]
|
2025-05-07 09:19:33 -07:00
|
|
|
pub enum LexTokenType {
|
|
|
|
|
String = 0,
|
|
|
|
|
Number,
|
|
|
|
|
Char,
|
|
|
|
|
Symbol,
|
|
|
|
|
VectorStart,
|
|
|
|
|
ByteVectorStart,
|
|
|
|
|
ListStart,
|
|
|
|
|
CollectionEnd,
|
|
|
|
|
Boolean,
|
|
|
|
|
Dot,
|
|
|
|
|
Comment,
|
|
|
|
|
Directive,
|
|
|
|
|
Quote,
|
|
|
|
|
QuasiQuote,
|
|
|
|
|
Unquote,
|
|
|
|
|
UnquoteSpliceTemplate,
|
|
|
|
|
NumTypes,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl TryFrom<u8> for LexTokenType {
|
|
|
|
|
type Error = &'static str;
|
|
|
|
|
fn try_from(u: u8) -> Result<LexTokenType, Self::Error> {
|
|
|
|
|
if u >= LexTokenType::NumTypes as u8 {
|
|
|
|
|
Err("out of token type range")
|
|
|
|
|
} else {
|
|
|
|
|
unsafe { Ok(core::mem::transmute(u)) }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
#[derive(Clone)]
|
2025-05-07 09:19:33 -07:00
|
|
|
pub struct LexToken {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
pub token_type: LexTokenType,
|
|
|
|
|
pub start_idx: usize,
|
|
|
|
|
pub end_idx: usize,
|
|
|
|
|
pub source_doc: Rc<str>,
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pub struct Lexer {
|
|
|
|
|
document: Rc<str>,
|
|
|
|
|
current_index: usize,
|
|
|
|
|
current_token_start: usize,
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
pub has_error_state: Option<LexError>,
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl From<Rc<str>> for Lexer {
|
|
|
|
|
fn from(s: Rc<str>) -> Lexer {
|
|
|
|
|
Lexer {
|
|
|
|
|
document: Rc::from(s),
|
|
|
|
|
current_index: 0,
|
|
|
|
|
current_token_start: 0,
|
|
|
|
|
has_error_state: None,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Iterator for Lexer {
|
|
|
|
|
type Item = LexToken;
|
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
|
if self.has_error_state.is_some() {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let res = self.seek_next_token();
|
|
|
|
|
if let Err(ref e) = res {
|
|
|
|
|
self.has_error_state = Some(e.clone());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.ok()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Lexer {
|
|
|
|
|
// I just didnt want to write and rewrite this...
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn current_char(&mut self) -> char {
|
|
|
|
|
self.document.as_bytes()[self.current_index] as char
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn peek_next_char(&mut self) -> Option<char> {
|
|
|
|
|
if let Some((_, ch)) = self.document[self.current_index+1..]
|
|
|
|
|
.char_indices()
|
|
|
|
|
.next() {
|
|
|
|
|
Some(ch)
|
|
|
|
|
} else {
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn advance_char(&mut self) -> Option<()> {
|
|
|
|
|
self.current_index += 1;
|
|
|
|
|
if let Some((idx, _)) = self.document[self.current_index..]
|
|
|
|
|
.char_indices()
|
|
|
|
|
.next() {
|
|
|
|
|
|
|
|
|
|
self.current_index = idx + self.current_index;
|
|
|
|
|
Some(())
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
self.current_index = self.document.len();
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn match_chunk_next(&mut self, chunk: &str) -> Option<bool> {
|
|
|
|
|
for i in chunk.chars() {
|
|
|
|
|
self.advance_char()?;
|
|
|
|
|
if i != self.current_char() {
|
|
|
|
|
return Some(false)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Some(true)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* TODO
|
|
|
|
|
* I figured this function would be useful for supporting hexadec encoding
|
|
|
|
|
* later down the line. We can use this instead of the base check in the
|
|
|
|
|
* number function.
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn next_chars_allowed(&mut self, len: usize, allowed: &str) -> Option<bool> {
|
|
|
|
|
let mut i = len;
|
|
|
|
|
while i < 0 {
|
|
|
|
|
if !allowed.contains(self.current_char()) {
|
|
|
|
|
return Some(false)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
i -= 1;
|
|
|
|
|
self.advance_char()?;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Some(true)
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn cut_new_token(&mut self, t: LexTokenType) -> Result<LexToken, LexError> {
|
|
|
|
|
let next_idx = self.advance_char()
|
|
|
|
|
.and_then(|_| Some(self.current_index))
|
|
|
|
|
.or(Some(self.document.len()))
|
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
|
|
let l = LexToken{
|
|
|
|
|
token_type: t,
|
|
|
|
|
start_idx: self.current_token_start,
|
|
|
|
|
end_idx: next_idx,
|
|
|
|
|
source_doc: self.document.clone(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
self.current_token_start = 0;
|
|
|
|
|
return Ok(l);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_end_of_string(&mut self) -> Result<LexToken, LexError> {
|
|
|
|
|
// TODO: support escaped quotes
|
|
|
|
|
loop {
|
|
|
|
|
if let None = self.advance_char() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_NO_MATCHING_QUOTE,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
} else if self.current_char() == '"' {
|
|
|
|
|
return self.cut_new_token(LexTokenType::String)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_end_of_number(&mut self) -> Result<LexToken, LexError> {
|
|
|
|
|
let mut base = 10;
|
|
|
|
|
let a = self.current_char();
|
|
|
|
|
if NUMERICAL_BASE.contains(&a) {
|
|
|
|
|
if let None = self.advance_char() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_NUMBER_TRUNCATED,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
match a {
|
|
|
|
|
'd' => base = 10,
|
|
|
|
|
'o' => base = 8,
|
|
|
|
|
'b' => base = 2,
|
|
|
|
|
_ => (),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut hasdot = false;
|
|
|
|
|
loop {
|
|
|
|
|
let a = self.current_char();
|
|
|
|
|
if NUMERICAL_EXTRA.contains(&a) {
|
|
|
|
|
if hasdot || base < 10 {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_TOO_MANY_DECIMALS,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
hasdot = true;
|
|
|
|
|
|
|
|
|
|
} else if a == ' ' || a == ')' {
|
|
|
|
|
// back up one
|
|
|
|
|
self.current_index -= 1;
|
|
|
|
|
return self.cut_new_token(LexTokenType::Number)
|
|
|
|
|
|
|
|
|
|
} else if !a.is_numeric() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_INCOMPREHENSIBLE,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
|
|
|
|
|
} else if a.to_digit(10).unwrap() >= base {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_NUMER_BASE_ERR,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let None = self.advance_char() {
|
|
|
|
|
self.current_index = self.document.len() - 1;
|
|
|
|
|
return self.cut_new_token(LexTokenType::Number)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_end_of_block_comment(&mut self) -> Result<LexToken, LexError> {
|
|
|
|
|
loop {
|
|
|
|
|
if let None = self.advance_char() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_UNCLOSED_COMMENT,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match self.current_char() {
|
|
|
|
|
'|' if self.advance_char().and_then(|_|
|
|
|
|
|
if self.current_char() == '#' {
|
|
|
|
|
return Some(())
|
|
|
|
|
} else { return None }).is_some() =>
|
|
|
|
|
return self.cut_new_token(LexTokenType::Comment),
|
|
|
|
|
_ => continue,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_end_of_line_comment(&mut self, directive: bool) -> Result<LexToken, LexError> {
|
|
|
|
|
loop {
|
|
|
|
|
if let None = self.advance_char() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_UNCLOSED_COMMENT,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match self.current_char() {
|
|
|
|
|
'\n' if !directive => return self.cut_new_token(LexTokenType::Comment),
|
|
|
|
|
'\n' if directive => return self.cut_new_token(LexTokenType::Directive),
|
|
|
|
|
_ => continue,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_closing_pipe(&mut self) -> Result<LexToken, LexError> {
|
|
|
|
|
loop {
|
|
|
|
|
if let None = self.advance_char() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_NO_CLOSING_PIPE,
|
|
|
|
|
self.current_token_start, self.document.clone()));
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let c = self.current_char();
|
|
|
|
|
match self.current_char() {
|
|
|
|
|
'\\' => self.seek_end_of_escape(false)?,
|
|
|
|
|
'|' => return self.cut_new_token(LexTokenType::Symbol),
|
|
|
|
|
_ if c.is_alphanumeric() => continue,
|
|
|
|
|
_ if LEX_SPECIAL.contains(&c) => continue,
|
|
|
|
|
_ if c == ' ' || c == '\n' => continue,
|
|
|
|
|
// quote case caught here
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
_ => return Err(LexError(E_INCOMPREHENSIBLE,
|
|
|
|
|
self.current_token_start, self.document.clone())),
|
2025-05-07 09:19:33 -07:00
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_end_from_hash(&mut self) -> Result<LexToken, LexError> {
|
|
|
|
|
let c = self.advance_char().and_then(|_| Some(self.current_char()));
|
|
|
|
|
if let Some(ch) = c {
|
|
|
|
|
match ch {
|
|
|
|
|
't' | 'f' => return self.cut_new_token(LexTokenType::Boolean),
|
|
|
|
|
'|' => return self.seek_end_of_block_comment(),
|
|
|
|
|
'!' => return self.seek_end_of_line_comment(true),
|
|
|
|
|
'u' if self.match_chunk_next("8(").is_some_and(|x| x) =>
|
|
|
|
|
return self.cut_new_token(LexTokenType::ByteVectorStart),
|
|
|
|
|
'(' => return self.cut_new_token(LexTokenType::VectorStart),
|
|
|
|
|
'\\' => self.seek_end_of_escape(false)
|
|
|
|
|
.and_then(|_| self.cut_new_token(LexTokenType::Char)),
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
'x' => return Err(LexError(E_UNIMPLEMENTED_HEX,
|
|
|
|
|
self.current_index, self.document.clone())),
|
2025-05-07 09:19:33 -07:00
|
|
|
_ if NUMERICAL_BASE.contains(&ch) => return self.seek_end_of_number(),
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
_ => return Err(LexError(E_INCOMPREHENSIBLE,
|
|
|
|
|
self.current_token_start, self.document.clone())),
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
Err(LexError(E_NO_END_TO_HASH, self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// DOES NOT RETURN A TOKEN.......
|
|
|
|
|
// only the caller knows what actually needs to be returned
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_end_of_escape(&mut self, in_string: bool) -> Result<(), LexError> {
|
|
|
|
|
//let delim = if in_string { ';' } else { ' ' };
|
|
|
|
|
// Delim and the arg to this function will be useful once we support hexadecimal encoding
|
|
|
|
|
if let None = self.advance_char() {
|
|
|
|
|
let mut error_msg = E_CHAR_TRUNCATED;
|
|
|
|
|
if in_string { error_msg = E_STRING_TRUNCATED; }
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(error_msg, self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match self.current_char() {
|
|
|
|
|
// eat an escaped whitespace or delim
|
|
|
|
|
' ' | 'n' | 'r' | 't' | '|' | '\\' | '"' => { () },
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
'x' => return Err(LexError(E_UNIMPLEMENTED_HEX,
|
|
|
|
|
self.current_token_start, self.document.clone())),
|
2025-05-07 09:19:33 -07:00
|
|
|
_ if self.current_char().is_alphabetic() => { () },
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
_ => return Err(LexError(E_UNSUPPORTED_ESC,
|
|
|
|
|
self.current_index, self.document.clone())),
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Called to output a token by the iterator implementation
|
|
|
|
|
* I dont think this has to be inlined. The other ones are inlined to
|
|
|
|
|
* prevent the process of parsing a token from being slowed down by
|
|
|
|
|
* so many stack frames. This one is called once per token.
|
|
|
|
|
*/
|
|
|
|
|
fn seek_next_token(&mut self) -> Result<LexToken, LexError> {
|
|
|
|
|
let mut output: Option<Result<LexToken, LexError>> = None;
|
|
|
|
|
|
|
|
|
|
if self.current_index >= self.document.len() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_END_OF_DOCUMENT,
|
|
|
|
|
self.document.len(), self.document.clone()));
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while LEX_WHITESPACE.contains(&self.current_char()) {
|
|
|
|
|
if let None = self.advance_char() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_END_OF_DOCUMENT,
|
|
|
|
|
self.document.len(), self.document.clone()));
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.current_token_start = self.current_index;
|
|
|
|
|
|
|
|
|
|
// handle syntactic sugar cases
|
|
|
|
|
match self.current_char() {
|
|
|
|
|
';' => output = Some(self.seek_end_of_line_comment(false)),
|
|
|
|
|
'\'' => output = Some(self.cut_new_token(LexTokenType::Quote)),
|
|
|
|
|
'`' => output = Some(self.cut_new_token(LexTokenType::QuasiQuote)),
|
|
|
|
|
'(' => output = Some(self.cut_new_token(LexTokenType::ListStart)),
|
|
|
|
|
')' => output = Some(self.cut_new_token(LexTokenType::CollectionEnd)),
|
|
|
|
|
'#' => output = Some(self.seek_end_from_hash()),
|
|
|
|
|
'"' => output = Some(self.seek_end_of_string()),
|
|
|
|
|
'\\' => output = Some(self.seek_end_of_escape(false)
|
|
|
|
|
.and_then(|_|
|
|
|
|
|
self.cut_new_token(LexTokenType::Char))),
|
|
|
|
|
'|' => output = Some(self.seek_closing_pipe()),
|
|
|
|
|
_ if self.current_char().is_numeric() => output =
|
|
|
|
|
Some(self.seek_end_of_number()),
|
|
|
|
|
_ => (),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if output.is_none() {
|
|
|
|
|
if self.current_char() == '.' {
|
|
|
|
|
if let Some(x) = self.peek_next_char() && x == ' ' {
|
|
|
|
|
output = Some(self.cut_new_token(LexTokenType::Dot));
|
|
|
|
|
} /* else {
|
|
|
|
|
output = Some(Err(LexError(E_BAD_DOT, self.current_index)));
|
|
|
|
|
} SYKE! It could be a symbol... */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if self.current_char() == ',' {
|
|
|
|
|
if let Some(x) = self.peek_next_char() && x == '@'{
|
|
|
|
|
output = Some(self.cut_new_token(LexTokenType::UnquoteSpliceTemplate));
|
|
|
|
|
} else {
|
|
|
|
|
output = Some(self.cut_new_token(LexTokenType::Unquote));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Broken out into a separate case to maintain precedence of the
|
|
|
|
|
* unquote syntax and dotted notation.
|
|
|
|
|
*/
|
|
|
|
|
if output.is_none() {
|
|
|
|
|
loop {
|
|
|
|
|
let c = self.current_char();
|
|
|
|
|
if !c.is_alphanumeric() && !LEX_SPECIAL.contains(&c) && c != ' ' {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
output = Some(Err(LexError(E_INCOMPREHENSIBLE,
|
|
|
|
|
self.current_index, self.document.clone())));
|
2025-05-07 09:19:33 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Some(c) = self.peek_next_char() {
|
|
|
|
|
if c == ' ' || c == ')' {
|
|
|
|
|
output = Some(self.cut_new_token(LexTokenType::Symbol));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.advance_char().unwrap();
|
|
|
|
|
} else {
|
|
|
|
|
output = Some(self.cut_new_token(LexTokenType::Symbol));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Some(ref res) = output {
|
|
|
|
|
if let Err(ref e) = res {
|
|
|
|
|
self.has_error_state = Some(e.clone());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return output.unwrap()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_token_evaluations() {
|
|
|
|
|
// indexed by LexTokenType
|
|
|
|
|
let cases: [(Vec<&str>, Vec<&str>); LexTokenType::NumTypes as usize] = [
|
|
|
|
|
/* String Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["\"asdf\"", "\"as sdf\"", "\"asdflkj\\n\"",
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
"\"LKsldkf;l\"", "\" sdlkfj \"", "\"#;sdf\"",
|
|
|
|
|
"\"\""],
|
2025-05-07 09:19:33 -07:00
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec!["\"sdf"]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Number Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["1", "1.0", "#d1.1", "#o1423", "#b11"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec!["1.1.1", "#o9", "#b1.01", "#xADADAD"]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Char Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["\\a", "\\t", "\\\"", "#\\t"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec!["\\x20"]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Identifier Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["...", "+", "+soup+", "<=?", "V17a", "->string", "a34kTMNs",
|
|
|
|
|
"lambda", "q", "list->vector", "|two words|", "|two\nwords|",
|
|
|
|
|
"the-word-recursion-has-many-meanings"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec!["|\"\"|", "|(|", "|valid"]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Vector Start Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["#("],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Byte Vector Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["#u8("],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec!["#u8", "#u9", "#u("]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* List Start Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["("],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Collection End Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec![")"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Boolean Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["#t", "#f"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Dot Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec![" . "],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Comment cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["; (\n", "; #\n", ";\"\n", "#| ; ( \" |#"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec!["#|", "; "]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Directive cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["#!test-directive\n"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec!["#!test-directive"]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Quote cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["'"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* QuasiQuote cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["`"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Unquote cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec![",x", ","],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* UnquoteSpliceTemplate cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec![",@x", ",@(", ",@"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
vec![]
|
2025-05-07 09:19:33 -07:00
|
|
|
),
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
let no_subtoken_check_cases = [
|
|
|
|
|
LexTokenType::Dot as u8,
|
|
|
|
|
LexTokenType::Unquote as u8,
|
|
|
|
|
LexTokenType::UnquoteSpliceTemplate as u8
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
cases.iter().enumerate().for_each(|(idx, case)| {
|
|
|
|
|
println!("+ Testing {:#?} Cases...", LexTokenType::try_from(idx as u8).unwrap());
|
|
|
|
|
|
|
|
|
|
case.0.iter()
|
|
|
|
|
.for_each(|subcase| {
|
|
|
|
|
println!(" - happy case: {}", subcase);
|
|
|
|
|
let token = Lexer::from(Rc::from(*subcase))
|
|
|
|
|
.next()
|
|
|
|
|
.unwrap();
|
|
|
|
|
assert_eq!(token.token_type,
|
|
|
|
|
LexTokenType::try_from(idx as u8)
|
|
|
|
|
.unwrap());
|
|
|
|
|
if no_subtoken_check_cases.contains(&(idx as u8)) {
|
|
|
|
|
/* DO NOTHING, ignore the dot case since its subcase is
|
|
|
|
|
* a superset of the actual token substring
|
|
|
|
|
*/
|
|
|
|
|
} else {
|
|
|
|
|
assert_eq!(&token.source_doc[token.start_idx..token.end_idx],
|
|
|
|
|
*subcase)
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
case.1.iter()
|
|
|
|
|
.for_each(|subcase| {
|
|
|
|
|
println!(" - sad case: {}", subcase);
|
|
|
|
|
assert!(Lexer::from(Rc::from(*subcase)).next().is_none())
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_multi_token_iter() {
|
|
|
|
|
let mut res = vec![];
|
|
|
|
|
Lexer::from(Rc::from("( one two three )"))
|
|
|
|
|
.into_iter()
|
|
|
|
|
.collect_into(&mut res);
|
|
|
|
|
assert_eq!(res.len(), 5);
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[0].token_type, LexTokenType::ListStart);
|
|
|
|
|
assert_eq!(res[0].start_idx, 0);
|
|
|
|
|
assert_eq!(res[0].end_idx, 1);
|
|
|
|
|
assert_eq!(&res[0].source_doc[res[0].start_idx..res[0].end_idx], "(");
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[1].token_type, LexTokenType::Symbol);
|
|
|
|
|
assert_eq!(res[1].start_idx, 2);
|
|
|
|
|
assert_eq!(res[1].end_idx, 5);
|
|
|
|
|
assert_eq!(&res[1].source_doc[res[1].start_idx..res[1].end_idx], "one");
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[2].token_type, LexTokenType::Symbol);
|
|
|
|
|
assert_eq!(res[2].start_idx, 6);
|
|
|
|
|
assert_eq!(res[2].end_idx, 9);
|
|
|
|
|
assert_eq!(&res[2].source_doc[res[2].start_idx..res[2].end_idx], "two");
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[3].token_type, LexTokenType::Symbol);
|
|
|
|
|
assert_eq!(res[3].start_idx, 10);
|
|
|
|
|
assert_eq!(res[3].end_idx, 15);
|
|
|
|
|
assert_eq!(&res[3].source_doc[res[3].start_idx..res[3].end_idx], "three");
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[4].token_type, LexTokenType::CollectionEnd);
|
|
|
|
|
assert_eq!(res[4].start_idx, 16);
|
|
|
|
|
assert_eq!(res[4].end_idx, 17);
|
|
|
|
|
assert_eq!(&res[4].source_doc[res[4].start_idx..res[4].end_idx], ")");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_error_state_blocking() {
|
|
|
|
|
let mut l = Lexer::from(Rc::from("( 1 2.2.2 valid_token"))
|
|
|
|
|
.into_iter();
|
|
|
|
|
|
|
|
|
|
assert!(l.next().is_some());
|
|
|
|
|
assert!(l.next().is_some());
|
|
|
|
|
assert!(l.next().is_none());
|
|
|
|
|
assert!(l.has_error_state.is_some());
|
|
|
|
|
assert!(l.next().is_none());
|
|
|
|
|
assert!(l.has_error_state.is_some());
|
|
|
|
|
}
|
|
|
|
|
}
|