2025-05-07 09:19:33 -07:00
|
|
|
/* Mycelium Scheme
|
|
|
|
|
* Copyright (C) 2025 Ava Affine
|
|
|
|
|
*
|
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
use core::fmt;
|
2025-05-07 09:19:33 -07:00
|
|
|
use alloc::rc::Rc;
|
|
|
|
|
|
|
|
|
|
pub const LEX_SPECIAL: [char; 18] = ['!', '$', '%', '&', '*', '+', '-', '/',
|
|
|
|
|
':', '<', '=', '>', '?', '@', '^', '_', '~', '.'];
|
|
|
|
|
pub const LEX_WHITESPACE: [char; 4] = [' ', '\n', '\t', '\r'];
|
2025-05-15 15:55:05 -07:00
|
|
|
pub const NUMERICAL_EXTRA: [char; 4] = ['.', 'i', 'e', '/'];
|
|
|
|
|
pub const NUMERICAL_BASE: [char; 4] = ['d', 'o', 'b', 'x'];
|
|
|
|
|
pub const TOK_DELIMITERS: [char; 5] = [')', ' ', '\t', '\n', '\r'];
|
2025-05-07 09:19:33 -07:00
|
|
|
|
|
|
|
|
pub const E_NO_MATCHING_QUOTE: &str = "couldn't find matching quote";
|
2025-05-15 15:55:05 -07:00
|
|
|
pub const E_TOO_MANY_DECIMALS: &str = "number can only have one dot";
|
|
|
|
|
pub const E_TOO_MANY_SLASH: &str = "number can only have one slash";
|
|
|
|
|
pub const E_TOO_MANY_E: &str = "number can only have one e";
|
2025-05-07 09:19:33 -07:00
|
|
|
pub const E_NO_MATCHING_PAREN: &str = "couldn't find matching paren";
|
|
|
|
|
pub const E_UNCLOSED_COMMENT: &str = "block comment has no end";
|
|
|
|
|
pub const E_NO_CLOSING_PIPE: &str = "expected a closing pipe";
|
|
|
|
|
pub const E_NO_END_TO_HASH: &str = "expected more input after hash";
|
|
|
|
|
pub const E_NUMBER_TRUNCATED: &str = "number literal is truncated";
|
|
|
|
|
pub const E_CHAR_TRUNCATED: &str = "character literal is truncated";
|
|
|
|
|
pub const E_STRING_TRUNCATED: &str = "string literal is truncated";
|
2025-05-15 15:55:05 -07:00
|
|
|
pub const E_UNDELIMITED_ESC: &str = "char escape is not delimited";
|
2025-05-07 09:19:33 -07:00
|
|
|
pub const E_EXTRA_CLOSE: &str = "extra closing parenthesis";
|
2025-05-15 15:55:05 -07:00
|
|
|
pub const E_CHAR_TOO_LONG: &str = "character literal is too long";
|
2025-05-07 09:19:33 -07:00
|
|
|
pub const E_NUMER_BASE_ERR: &str = "digit in number exceeds specified base";
|
|
|
|
|
pub const E_UNSUPPORTED_ESC: &str = "unsupported escape";
|
|
|
|
|
pub const E_BAD_DOT: &str = "expected space after dot in dotted notation";
|
2025-05-15 15:55:05 -07:00
|
|
|
pub const E_BAD_HEX: &str = "character is not valid hexadecimal notation";
|
2025-05-07 09:19:33 -07:00
|
|
|
pub const E_INCOMPREHENSIBLE: &str = "token does not lex";
|
|
|
|
|
pub const E_END_OF_DOCUMENT: &str = "no additional input left in document";
|
|
|
|
|
|
|
|
|
|
/* LexError
|
|
|
|
|
* 0: error string
|
|
|
|
|
* 1: index into document
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
* 2: document in question
|
2025-05-07 09:19:33 -07:00
|
|
|
*/
|
|
|
|
|
#[derive(Clone)]
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
pub struct LexError(pub &'static str, pub usize, pub Rc<str>);
|
|
|
|
|
|
|
|
|
|
impl fmt::Display for LexError {
|
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
|
|
let err_snippet_start = || -> usize {
|
|
|
|
|
/* backtrack from current index until we either hit
|
|
|
|
|
* - beginning of line
|
|
|
|
|
* - 25 characters ago
|
|
|
|
|
* - the doc Start
|
|
|
|
|
*/
|
|
|
|
|
if self.2.len() < 25 {
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
let mut idx = self.1;
|
2025-05-19 14:38:11 -07:00
|
|
|
while self.1 - idx < 25 {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
idx -= 1;
|
|
|
|
|
if self.2[idx..]
|
|
|
|
|
.char_indices()
|
|
|
|
|
.next()
|
|
|
|
|
.is_some_and(|(i, x)| x == '\n' && i == idx) {
|
|
|
|
|
idx += 1;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
idx
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let err_snippet_end = || -> usize {
|
|
|
|
|
/* read through document until we either hit
|
|
|
|
|
* - end of line
|
|
|
|
|
* - 25 characters forward
|
|
|
|
|
* - the doc end
|
|
|
|
|
*/
|
|
|
|
|
if self.2.len() - self.1 < 25 {
|
|
|
|
|
self.2.len()
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
let mut idx = self.1;
|
|
|
|
|
while idx - self.1 < 25 {
|
|
|
|
|
idx += 1;
|
|
|
|
|
if self.2[idx..]
|
|
|
|
|
.char_indices()
|
|
|
|
|
.next()
|
|
|
|
|
.is_some_and(|(i, x)| x == '\n' && i == idx) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
idx
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2025-05-19 14:38:11 -07:00
|
|
|
write!(f, "Error when lexing document here: (idx: {})\n", self.1)?;
|
|
|
|
|
let s = err_snippet_start();
|
|
|
|
|
let st = self.1 - err_snippet_start();
|
|
|
|
|
write!(f, " {}\n", &self.2[s..err_snippet_end()])?;
|
|
|
|
|
write!(f, " {}^\n", " ".repeat(st))?;
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
write!(f, "Error: {}\n", self.0)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-07 09:19:33 -07:00
|
|
|
|
|
|
|
|
#[repr(u8)]
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
#[derive(Debug, PartialEq, Clone)]
|
2025-05-07 09:19:33 -07:00
|
|
|
pub enum LexTokenType {
|
|
|
|
|
String = 0,
|
|
|
|
|
Number,
|
|
|
|
|
Char,
|
|
|
|
|
Symbol,
|
|
|
|
|
VectorStart,
|
|
|
|
|
ByteVectorStart,
|
|
|
|
|
ListStart,
|
|
|
|
|
CollectionEnd,
|
|
|
|
|
Boolean,
|
|
|
|
|
Dot,
|
|
|
|
|
Comment,
|
|
|
|
|
Directive,
|
|
|
|
|
Quote,
|
|
|
|
|
QuasiQuote,
|
|
|
|
|
Unquote,
|
2025-05-19 14:38:11 -07:00
|
|
|
UnquoteSplice,
|
2025-05-07 09:19:33 -07:00
|
|
|
NumTypes,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl TryFrom<u8> for LexTokenType {
|
|
|
|
|
type Error = &'static str;
|
|
|
|
|
fn try_from(u: u8) -> Result<LexTokenType, Self::Error> {
|
|
|
|
|
if u >= LexTokenType::NumTypes as u8 {
|
|
|
|
|
Err("out of token type range")
|
|
|
|
|
} else {
|
|
|
|
|
unsafe { Ok(core::mem::transmute(u)) }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
#[derive(Clone)]
|
2025-05-07 09:19:33 -07:00
|
|
|
pub struct LexToken {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
pub token_type: LexTokenType,
|
|
|
|
|
pub start_idx: usize,
|
|
|
|
|
pub end_idx: usize,
|
|
|
|
|
pub source_doc: Rc<str>,
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pub struct Lexer {
|
|
|
|
|
document: Rc<str>,
|
|
|
|
|
current_index: usize,
|
|
|
|
|
current_token_start: usize,
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
pub has_error_state: Option<LexError>,
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl From<Rc<str>> for Lexer {
|
|
|
|
|
fn from(s: Rc<str>) -> Lexer {
|
|
|
|
|
Lexer {
|
|
|
|
|
document: Rc::from(s),
|
|
|
|
|
current_index: 0,
|
|
|
|
|
current_token_start: 0,
|
|
|
|
|
has_error_state: None,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Iterator for Lexer {
|
|
|
|
|
type Item = LexToken;
|
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
|
if self.has_error_state.is_some() {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let res = self.seek_next_token();
|
|
|
|
|
if let Err(ref e) = res {
|
|
|
|
|
self.has_error_state = Some(e.clone());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res.ok()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Lexer {
|
|
|
|
|
// I just didnt want to write and rewrite this...
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn current_char(&mut self) -> char {
|
|
|
|
|
self.document.as_bytes()[self.current_index] as char
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn peek_next_char(&mut self) -> Option<char> {
|
|
|
|
|
if let Some((_, ch)) = self.document[self.current_index+1..]
|
|
|
|
|
.char_indices()
|
|
|
|
|
.next() {
|
|
|
|
|
Some(ch)
|
|
|
|
|
} else {
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn advance_char(&mut self) -> Option<()> {
|
|
|
|
|
self.current_index += 1;
|
2025-05-15 15:55:05 -07:00
|
|
|
if self.current_index >= self.document.len() {
|
|
|
|
|
return None
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-07 09:19:33 -07:00
|
|
|
if let Some((idx, _)) = self.document[self.current_index..]
|
|
|
|
|
.char_indices()
|
|
|
|
|
.next() {
|
|
|
|
|
|
|
|
|
|
self.current_index = idx + self.current_index;
|
|
|
|
|
Some(())
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
self.current_index = self.document.len();
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
2025-05-21 14:48:36 -07:00
|
|
|
fn match_chunk_next(&mut self, chunk: &str, peek: bool) -> Option<bool> {
|
2025-05-15 15:55:05 -07:00
|
|
|
let saved = self.current_index;
|
2025-05-07 09:19:33 -07:00
|
|
|
for i in chunk.chars() {
|
2025-05-15 15:55:05 -07:00
|
|
|
if let None = self.advance_char() {
|
|
|
|
|
self.current_index = saved;
|
|
|
|
|
return None
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
2025-05-15 15:55:05 -07:00
|
|
|
if i != self.current_char() {
|
|
|
|
|
self.current_index = saved;
|
2025-05-07 09:19:33 -07:00
|
|
|
return Some(false)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-21 14:48:36 -07:00
|
|
|
if peek { self.current_index = saved; }
|
2025-05-07 09:19:33 -07:00
|
|
|
Some(true)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn cut_new_token(&mut self, t: LexTokenType) -> Result<LexToken, LexError> {
|
|
|
|
|
let next_idx = self.advance_char()
|
|
|
|
|
.and_then(|_| Some(self.current_index))
|
|
|
|
|
.or(Some(self.document.len()))
|
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
|
|
let l = LexToken{
|
|
|
|
|
token_type: t,
|
|
|
|
|
start_idx: self.current_token_start,
|
|
|
|
|
end_idx: next_idx,
|
|
|
|
|
source_doc: self.document.clone(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
self.current_token_start = 0;
|
|
|
|
|
return Ok(l);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_end_of_string(&mut self) -> Result<LexToken, LexError> {
|
|
|
|
|
loop {
|
|
|
|
|
if let None = self.advance_char() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_NO_MATCHING_QUOTE,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-15 15:55:05 -07:00
|
|
|
|
|
|
|
|
} else if self.current_char() == '\\' {
|
|
|
|
|
self.seek_end_of_escape(true)?;
|
|
|
|
|
|
2025-05-07 09:19:33 -07:00
|
|
|
} else if self.current_char() == '"' {
|
|
|
|
|
return self.cut_new_token(LexTokenType::String)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_end_of_number(&mut self) -> Result<LexToken, LexError> {
|
|
|
|
|
let mut base = 10;
|
|
|
|
|
let a = self.current_char();
|
2025-05-15 15:55:05 -07:00
|
|
|
|
2025-05-21 14:48:36 -07:00
|
|
|
if let Some(true) = self.match_chunk_next("inf.0", false) {
|
|
|
|
|
return self.cut_new_token(LexTokenType::Number)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Some(true) = self.match_chunk_next("nan.0", false) {
|
|
|
|
|
return self.cut_new_token(LexTokenType::Number)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if NUMERICAL_BASE.contains(&a) || a == 'i' || a == 'e' || a == '+' || a == '-' {
|
2025-05-07 09:19:33 -07:00
|
|
|
if let None = self.advance_char() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_NUMBER_TRUNCATED,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-15 15:55:05 -07:00
|
|
|
|
|
|
|
|
// someday rust will get its shit together and if let chaining will be adequate
|
|
|
|
|
} else if TOK_DELIMITERS.contains(&a) {
|
|
|
|
|
return Err(LexError(E_NUMBER_TRUNCATED,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
2025-05-15 15:55:05 -07:00
|
|
|
|
2025-05-07 09:19:33 -07:00
|
|
|
match a {
|
2025-05-15 15:55:05 -07:00
|
|
|
'x' => base = 16,
|
2025-05-07 09:19:33 -07:00
|
|
|
'd' => base = 10,
|
|
|
|
|
'o' => base = 8,
|
|
|
|
|
'b' => base = 2,
|
2025-05-15 15:55:05 -07:00
|
|
|
// ignore i or e, number parsers will handle that
|
2025-05-07 09:19:33 -07:00
|
|
|
_ => (),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-15 15:55:05 -07:00
|
|
|
let mut hasdot = false;
|
|
|
|
|
let mut hasslash = false;
|
|
|
|
|
let mut hase = false;
|
2025-05-07 09:19:33 -07:00
|
|
|
loop {
|
|
|
|
|
let a = self.current_char();
|
2025-05-15 15:55:05 -07:00
|
|
|
if a == '.' {
|
2025-05-07 09:19:33 -07:00
|
|
|
if hasdot || base < 10 {
|
2025-05-15 15:55:05 -07:00
|
|
|
return Err(LexError(E_TOO_MANY_DECIMALS,
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
hasdot = true;
|
|
|
|
|
|
2025-05-15 15:55:05 -07:00
|
|
|
} else if a == '/' {
|
|
|
|
|
if hasslash || base < 10 {
|
|
|
|
|
return Err(LexError(E_TOO_MANY_SLASH,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
|
|
|
|
}
|
|
|
|
|
hasslash = true;
|
|
|
|
|
|
|
|
|
|
} else if a == 'e' {
|
|
|
|
|
if hase || base < 10 {
|
|
|
|
|
return Err(LexError(E_TOO_MANY_E,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
|
|
|
|
}
|
|
|
|
|
hase = true
|
|
|
|
|
|
|
|
|
|
} else if TOK_DELIMITERS.contains(&a) {
|
2025-05-07 09:19:33 -07:00
|
|
|
// back up one
|
|
|
|
|
self.current_index -= 1;
|
|
|
|
|
return self.cut_new_token(LexTokenType::Number)
|
|
|
|
|
|
2025-05-15 15:55:05 -07:00
|
|
|
} else if let None = a.to_digit(base) {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_NUMER_BASE_ERR,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let None = self.advance_char() {
|
|
|
|
|
self.current_index = self.document.len() - 1;
|
|
|
|
|
return self.cut_new_token(LexTokenType::Number)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_end_of_block_comment(&mut self) -> Result<LexToken, LexError> {
|
|
|
|
|
loop {
|
|
|
|
|
if let None = self.advance_char() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_UNCLOSED_COMMENT,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match self.current_char() {
|
|
|
|
|
'|' if self.advance_char().and_then(|_|
|
|
|
|
|
if self.current_char() == '#' {
|
|
|
|
|
return Some(())
|
2025-05-21 14:48:36 -07:00
|
|
|
} else { return None }).is_some() =>
|
2025-05-07 09:19:33 -07:00
|
|
|
return self.cut_new_token(LexTokenType::Comment),
|
|
|
|
|
_ => continue,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_end_of_line_comment(&mut self, directive: bool) -> Result<LexToken, LexError> {
|
|
|
|
|
loop {
|
|
|
|
|
if let None = self.advance_char() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_UNCLOSED_COMMENT,
|
|
|
|
|
self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match self.current_char() {
|
|
|
|
|
'\n' if !directive => return self.cut_new_token(LexTokenType::Comment),
|
|
|
|
|
'\n' if directive => return self.cut_new_token(LexTokenType::Directive),
|
|
|
|
|
_ => continue,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_closing_pipe(&mut self) -> Result<LexToken, LexError> {
|
|
|
|
|
loop {
|
|
|
|
|
if let None = self.advance_char() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_NO_CLOSING_PIPE,
|
|
|
|
|
self.current_token_start, self.document.clone()));
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let c = self.current_char();
|
|
|
|
|
match self.current_char() {
|
|
|
|
|
'\\' => self.seek_end_of_escape(false)?,
|
|
|
|
|
'|' => return self.cut_new_token(LexTokenType::Symbol),
|
|
|
|
|
_ if c.is_alphanumeric() => continue,
|
|
|
|
|
_ if LEX_SPECIAL.contains(&c) => continue,
|
|
|
|
|
_ if c == ' ' || c == '\n' => continue,
|
|
|
|
|
// quote case caught here
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
_ => return Err(LexError(E_INCOMPREHENSIBLE,
|
|
|
|
|
self.current_token_start, self.document.clone())),
|
2025-05-07 09:19:33 -07:00
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_end_from_hash(&mut self) -> Result<LexToken, LexError> {
|
|
|
|
|
let c = self.advance_char().and_then(|_| Some(self.current_char()));
|
|
|
|
|
if let Some(ch) = c {
|
|
|
|
|
match ch {
|
|
|
|
|
't' | 'f' => return self.cut_new_token(LexTokenType::Boolean),
|
|
|
|
|
'|' => return self.seek_end_of_block_comment(),
|
|
|
|
|
'!' => return self.seek_end_of_line_comment(true),
|
2025-05-21 14:48:36 -07:00
|
|
|
'u' if self.match_chunk_next("8(", false).is_some_and(|x| x) =>
|
2025-05-07 09:19:33 -07:00
|
|
|
return self.cut_new_token(LexTokenType::ByteVectorStart),
|
|
|
|
|
'(' => return self.cut_new_token(LexTokenType::VectorStart),
|
2025-05-15 15:55:05 -07:00
|
|
|
'\\' => self.seek_end_of_escape(false, )
|
2025-05-07 09:19:33 -07:00
|
|
|
.and_then(|_| self.cut_new_token(LexTokenType::Char)),
|
|
|
|
|
_ if NUMERICAL_BASE.contains(&ch) => return self.seek_end_of_number(),
|
2025-05-15 15:55:05 -07:00
|
|
|
'i' | 'e' => return self.seek_end_of_number(),
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
_ => return Err(LexError(E_INCOMPREHENSIBLE,
|
|
|
|
|
self.current_token_start, self.document.clone())),
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
} else {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
Err(LexError(E_NO_END_TO_HASH, self.current_token_start, self.document.clone()))
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// DOES NOT RETURN A TOKEN.......
|
|
|
|
|
// only the caller knows what actually needs to be returned
|
|
|
|
|
#[inline(always)]
|
|
|
|
|
fn seek_end_of_escape(&mut self, in_string: bool) -> Result<(), LexError> {
|
2025-05-15 15:55:05 -07:00
|
|
|
// little helper to deduplicate logic for advancing characters
|
|
|
|
|
macro_rules! adv {
|
|
|
|
|
() => {
|
|
|
|
|
if let None = self.advance_char() {
|
|
|
|
|
let mut error_msg = E_CHAR_TRUNCATED;
|
|
|
|
|
if in_string { error_msg = E_STRING_TRUNCATED; }
|
|
|
|
|
Err(LexError(error_msg, self.current_token_start,
|
|
|
|
|
self.document.clone()))
|
|
|
|
|
} else { Ok(()) }
|
|
|
|
|
};
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
2025-05-15 15:55:05 -07:00
|
|
|
let delim = |x| -> bool {
|
|
|
|
|
in_string || TOK_DELIMITERS.contains(&x)
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// advance char once
|
|
|
|
|
adv!()?;
|
|
|
|
|
|
|
|
|
|
/* if match_chunk_next fails then the index is unmoved
|
|
|
|
|
* allowing us to treat this like a single char escape
|
|
|
|
|
*/
|
2025-05-07 09:19:33 -07:00
|
|
|
match self.current_char() {
|
2025-05-15 15:55:05 -07:00
|
|
|
// char escapes
|
2025-05-21 14:48:36 -07:00
|
|
|
'a' if !in_string => self.match_chunk_next("larm", false),
|
|
|
|
|
'b' if !in_string => self.match_chunk_next("ackspace", false),
|
|
|
|
|
'd' if !in_string => self.match_chunk_next("elete", false),
|
|
|
|
|
'e' if !in_string => self.match_chunk_next("scape", false),
|
|
|
|
|
'n' if !in_string => self.match_chunk_next("ewline", false)
|
|
|
|
|
.or(self.match_chunk_next("ull", false)),
|
|
|
|
|
'r' if !in_string => self.match_chunk_next("eturn", false),
|
|
|
|
|
's' if !in_string => self.match_chunk_next("pace", false),
|
|
|
|
|
't' if !in_string => self.match_chunk_next("ab", false),
|
|
|
|
|
// specifically catch a non hex 'x' character escape
|
|
|
|
|
'x' if self.peek_next_char()
|
|
|
|
|
.is_none_or(|c| TOK_DELIMITERS.contains(&c)) && !in_string
|
|
|
|
|
=> None,
|
2025-05-15 15:55:05 -07:00
|
|
|
|
|
|
|
|
// string escapes
|
|
|
|
|
'a' | 'b' | 't' | 'n' | 'r' | '"' | '\\' if in_string => None,
|
|
|
|
|
|
|
|
|
|
// both
|
|
|
|
|
'x' => {
|
|
|
|
|
// we look for TWO hex digits
|
|
|
|
|
adv!()?;
|
|
|
|
|
self.current_char().to_digit(16)
|
|
|
|
|
.ok_or(LexError(E_BAD_HEX, self.current_index,
|
|
|
|
|
self.document.clone()))?;
|
|
|
|
|
adv!()?;
|
|
|
|
|
self.current_char().to_digit(16)
|
|
|
|
|
.ok_or(LexError(E_BAD_HEX, self.current_index,
|
|
|
|
|
self.document.clone()))?;
|
|
|
|
|
None
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
// catchalls
|
|
|
|
|
_ if !in_string => None,
|
|
|
|
|
_ => return Err(LexError(E_UNSUPPORTED_ESC, self.current_index,
|
|
|
|
|
self.document.clone())),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let saved_idx = self.current_index;
|
|
|
|
|
if saved_idx == self.document.len() - 1 {
|
|
|
|
|
return Ok(())
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
2025-05-19 14:38:11 -07:00
|
|
|
// make sure next character is a proper delimiter
|
2025-05-15 15:55:05 -07:00
|
|
|
adv!().and_then(|_| if !delim(self.current_char()) {
|
|
|
|
|
return Err(LexError(E_UNDELIMITED_ESC, self.current_index,
|
|
|
|
|
self.document.clone()))
|
2025-05-19 14:38:11 -07:00
|
|
|
} else { self.current_index = saved_idx; Ok(()) })
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Called to output a token by the iterator implementation
|
|
|
|
|
* I dont think this has to be inlined. The other ones are inlined to
|
|
|
|
|
* prevent the process of parsing a token from being slowed down by
|
|
|
|
|
* so many stack frames. This one is called once per token.
|
|
|
|
|
*/
|
|
|
|
|
fn seek_next_token(&mut self) -> Result<LexToken, LexError> {
|
|
|
|
|
let mut output: Option<Result<LexToken, LexError>> = None;
|
|
|
|
|
|
|
|
|
|
if self.current_index >= self.document.len() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_END_OF_DOCUMENT,
|
|
|
|
|
self.document.len(), self.document.clone()));
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while LEX_WHITESPACE.contains(&self.current_char()) {
|
|
|
|
|
if let None = self.advance_char() {
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
return Err(LexError(E_END_OF_DOCUMENT,
|
|
|
|
|
self.document.len(), self.document.clone()));
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.current_token_start = self.current_index;
|
|
|
|
|
|
2025-05-21 14:48:36 -07:00
|
|
|
macro_rules! numeric {
|
|
|
|
|
( $x:expr ) => {
|
|
|
|
|
$x.is_numeric() || self.match_chunk_next("inf.0", true)
|
|
|
|
|
.or(self.match_chunk_next("nan.0", true))
|
|
|
|
|
.or(Some(false))
|
|
|
|
|
.unwrap()
|
|
|
|
|
};
|
|
|
|
|
}
|
2025-05-07 09:19:33 -07:00
|
|
|
match self.current_char() {
|
|
|
|
|
';' => output = Some(self.seek_end_of_line_comment(false)),
|
|
|
|
|
'\'' => output = Some(self.cut_new_token(LexTokenType::Quote)),
|
|
|
|
|
'`' => output = Some(self.cut_new_token(LexTokenType::QuasiQuote)),
|
|
|
|
|
'(' => output = Some(self.cut_new_token(LexTokenType::ListStart)),
|
|
|
|
|
')' => output = Some(self.cut_new_token(LexTokenType::CollectionEnd)),
|
|
|
|
|
'#' => output = Some(self.seek_end_from_hash()),
|
|
|
|
|
'"' => output = Some(self.seek_end_of_string()),
|
|
|
|
|
'|' => output = Some(self.seek_closing_pipe()),
|
2025-05-21 14:48:36 -07:00
|
|
|
'+' | '-' if self.peek_next_char()
|
|
|
|
|
.and_then(|x| Some(numeric!(x)))
|
|
|
|
|
.or(Some(false))
|
|
|
|
|
.unwrap() => output = Some(self.seek_end_of_number()),
|
2025-05-07 09:19:33 -07:00
|
|
|
_ if self.current_char().is_numeric() => output =
|
|
|
|
|
Some(self.seek_end_of_number()),
|
|
|
|
|
_ => (),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if output.is_none() {
|
|
|
|
|
if self.current_char() == '.' {
|
|
|
|
|
if let Some(x) = self.peek_next_char() && x == ' ' {
|
|
|
|
|
output = Some(self.cut_new_token(LexTokenType::Dot));
|
|
|
|
|
} /* else {
|
|
|
|
|
output = Some(Err(LexError(E_BAD_DOT, self.current_index)));
|
|
|
|
|
} SYKE! It could be a symbol... */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if self.current_char() == ',' {
|
|
|
|
|
if let Some(x) = self.peek_next_char() && x == '@'{
|
2025-05-19 14:38:11 -07:00
|
|
|
self.advance_char();
|
|
|
|
|
output = Some(self.cut_new_token(LexTokenType::UnquoteSplice));
|
2025-05-07 09:19:33 -07:00
|
|
|
} else {
|
|
|
|
|
output = Some(self.cut_new_token(LexTokenType::Unquote));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Broken out into a separate case to maintain precedence of the
|
|
|
|
|
* unquote syntax and dotted notation.
|
|
|
|
|
*/
|
|
|
|
|
if output.is_none() {
|
|
|
|
|
loop {
|
|
|
|
|
let c = self.current_char();
|
2025-05-21 14:48:36 -07:00
|
|
|
if !c.is_alphanumeric() &&
|
|
|
|
|
!LEX_SPECIAL.contains(&c) &&
|
|
|
|
|
!TOK_DELIMITERS.contains(&c) {
|
|
|
|
|
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
output = Some(Err(LexError(E_INCOMPREHENSIBLE,
|
|
|
|
|
self.current_index, self.document.clone())));
|
2025-05-07 09:19:33 -07:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Some(c) = self.peek_next_char() {
|
|
|
|
|
if c == ' ' || c == ')' {
|
|
|
|
|
output = Some(self.cut_new_token(LexTokenType::Symbol));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.advance_char().unwrap();
|
|
|
|
|
} else {
|
|
|
|
|
output = Some(self.cut_new_token(LexTokenType::Symbol));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Some(ref res) = output {
|
|
|
|
|
if let Err(ref e) = res {
|
|
|
|
|
self.has_error_state = Some(e.clone());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return output.unwrap()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_token_evaluations() {
|
|
|
|
|
// indexed by LexTokenType
|
|
|
|
|
let cases: [(Vec<&str>, Vec<&str>); LexTokenType::NumTypes as usize] = [
|
|
|
|
|
/* String Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["\"asdf\"", "\"as sdf\"", "\"asdflkj\\n\"",
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
"\"LKsldkf;l\"", "\" sdlkfj \"", "\"#;sdf\"",
|
2025-05-15 15:55:05 -07:00
|
|
|
"\"\"", "\"\\\" \\\"\""],
|
2025-05-07 09:19:33 -07:00
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec!["\"sdf"]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Number Cases */ (
|
|
|
|
|
// HAPPY CASES
|
2025-05-15 15:55:05 -07:00
|
|
|
vec!["1", "1.0", "#d1.1", "#o1423", "#b11", "#xDF",
|
2025-05-21 14:48:36 -07:00
|
|
|
"#e1e1", "#i1/4", "+inf.0", "1e1", "-1"],
|
2025-05-07 09:19:33 -07:00
|
|
|
|
|
|
|
|
// SAD CASES
|
2025-05-15 15:55:05 -07:00
|
|
|
vec!["1.1.1", "#o9", "#b1.01", "#i1/3/3"]
|
2025-05-07 09:19:33 -07:00
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Char Cases */ (
|
|
|
|
|
// HAPPY CASES
|
2025-05-19 14:38:11 -07:00
|
|
|
vec!["#\\a", "#\\t", "#\\\"", "#\\t", "#\\space",
|
2025-05-21 14:48:36 -07:00
|
|
|
"#\\alarm", "#\\s", "#\\x20", "#\\x", "#\\\\"],
|
2025-05-07 09:19:33 -07:00
|
|
|
|
|
|
|
|
// SAD CASES
|
2025-05-15 15:55:05 -07:00
|
|
|
vec!["\\c", "\\x20"]
|
2025-05-07 09:19:33 -07:00
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Identifier Cases */ (
|
|
|
|
|
// HAPPY CASES
|
2025-05-15 15:55:05 -07:00
|
|
|
vec!["...", "<=?", "V17a", "a34kTMNs", "lambda", "q",
|
|
|
|
|
"list->vector", "|two words|", "|two\nwords|",
|
2025-05-21 14:48:36 -07:00
|
|
|
"the-word-recursion-has-many-meanings", "+", "-",
|
|
|
|
|
"slatex.*slatex*"],
|
2025-05-07 09:19:33 -07:00
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec!["|\"\"|", "|(|", "|valid"]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Vector Start Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["#("],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Byte Vector Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["#u8("],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec!["#u8", "#u9", "#u("]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* List Start Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["("],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Collection End Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec![")"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Boolean Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["#t", "#f"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Dot Cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec![" . "],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Comment cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["; (\n", "; #\n", ";\"\n", "#| ; ( \" |#"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec!["#|", "; "]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Directive cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["#!test-directive\n"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec!["#!test-directive"]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Quote cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["'"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* QuasiQuote cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec!["`"],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
/* Unquote cases */ (
|
|
|
|
|
// HAPPY CASES
|
|
|
|
|
vec![",x", ","],
|
|
|
|
|
|
|
|
|
|
// SAD CASES
|
|
|
|
|
vec![]
|
|
|
|
|
),
|
|
|
|
|
|
2025-05-19 14:38:11 -07:00
|
|
|
/* UnquoteSplice cases */ (
|
2025-05-07 09:19:33 -07:00
|
|
|
// HAPPY CASES
|
2025-05-19 14:38:11 -07:00
|
|
|
vec![",@x", ",@(", ",@", ",@(two)"],
|
2025-05-07 09:19:33 -07:00
|
|
|
|
|
|
|
|
// SAD CASES
|
Number library and integrations
This commit adds a number library which handles fractions, floats,
whole numbers, scientific notation, and special symbolic numbers
all according to the R7RS small specification.
Numeric trait is used to abstract operations across all number types
and a Number enum is used to offer a non-opaque type that stores any
kind of number.
Upon the Number enum is implemented the following traits:
- Add, Div, Sub, Mul
- Pow
- PartialEq
- PartialOrd
Which then offer the following operators to use on the Number enum
instances themselves: + - / * == != < > <= >= and of course x.pow(y).
Additionally, the number package contains parsing logic for each type
of number. FromStr is implemented as part of the Numeric trait, and
then in turn implemented on Number. Additionally Into<String> is
implemented for the Numeric trait and then on the Number enum type
as well.
Test cases have been added for basic cases, but could be expanded.
Additional modifications:
- LexError has a custom display implementation that properly outputs
formatted errors.
- Sexpr package updated to use new number package
Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-05-15 12:49:08 -07:00
|
|
|
vec![]
|
2025-05-07 09:19:33 -07:00
|
|
|
),
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
let no_subtoken_check_cases = [
|
|
|
|
|
LexTokenType::Dot as u8,
|
|
|
|
|
LexTokenType::Unquote as u8,
|
2025-05-19 14:38:11 -07:00
|
|
|
LexTokenType::UnquoteSplice as u8
|
2025-05-07 09:19:33 -07:00
|
|
|
];
|
|
|
|
|
|
|
|
|
|
cases.iter().enumerate().for_each(|(idx, case)| {
|
|
|
|
|
println!("+ Testing {:#?} Cases...", LexTokenType::try_from(idx as u8).unwrap());
|
|
|
|
|
|
|
|
|
|
case.0.iter()
|
|
|
|
|
.for_each(|subcase| {
|
|
|
|
|
println!(" - happy case: {}", subcase);
|
|
|
|
|
let token = Lexer::from(Rc::from(*subcase))
|
|
|
|
|
.next()
|
|
|
|
|
.unwrap();
|
|
|
|
|
assert_eq!(token.token_type,
|
|
|
|
|
LexTokenType::try_from(idx as u8)
|
|
|
|
|
.unwrap());
|
|
|
|
|
if no_subtoken_check_cases.contains(&(idx as u8)) {
|
|
|
|
|
/* DO NOTHING, ignore the dot case since its subcase is
|
|
|
|
|
* a superset of the actual token substring
|
|
|
|
|
*/
|
|
|
|
|
} else {
|
|
|
|
|
assert_eq!(&token.source_doc[token.start_idx..token.end_idx],
|
|
|
|
|
*subcase)
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
case.1.iter()
|
|
|
|
|
.for_each(|subcase| {
|
|
|
|
|
println!(" - sad case: {}", subcase);
|
|
|
|
|
assert!(Lexer::from(Rc::from(*subcase)).next().is_none())
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_multi_token_iter() {
|
|
|
|
|
let mut res = vec![];
|
|
|
|
|
Lexer::from(Rc::from("( one two three )"))
|
|
|
|
|
.into_iter()
|
|
|
|
|
.collect_into(&mut res);
|
|
|
|
|
assert_eq!(res.len(), 5);
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[0].token_type, LexTokenType::ListStart);
|
|
|
|
|
assert_eq!(res[0].start_idx, 0);
|
|
|
|
|
assert_eq!(res[0].end_idx, 1);
|
|
|
|
|
assert_eq!(&res[0].source_doc[res[0].start_idx..res[0].end_idx], "(");
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[1].token_type, LexTokenType::Symbol);
|
|
|
|
|
assert_eq!(res[1].start_idx, 2);
|
|
|
|
|
assert_eq!(res[1].end_idx, 5);
|
|
|
|
|
assert_eq!(&res[1].source_doc[res[1].start_idx..res[1].end_idx], "one");
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[2].token_type, LexTokenType::Symbol);
|
|
|
|
|
assert_eq!(res[2].start_idx, 6);
|
|
|
|
|
assert_eq!(res[2].end_idx, 9);
|
|
|
|
|
assert_eq!(&res[2].source_doc[res[2].start_idx..res[2].end_idx], "two");
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[3].token_type, LexTokenType::Symbol);
|
|
|
|
|
assert_eq!(res[3].start_idx, 10);
|
|
|
|
|
assert_eq!(res[3].end_idx, 15);
|
|
|
|
|
assert_eq!(&res[3].source_doc[res[3].start_idx..res[3].end_idx], "three");
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[4].token_type, LexTokenType::CollectionEnd);
|
|
|
|
|
assert_eq!(res[4].start_idx, 16);
|
|
|
|
|
assert_eq!(res[4].end_idx, 17);
|
|
|
|
|
assert_eq!(&res[4].source_doc[res[4].start_idx..res[4].end_idx], ")");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_error_state_blocking() {
|
|
|
|
|
let mut l = Lexer::from(Rc::from("( 1 2.2.2 valid_token"))
|
|
|
|
|
.into_iter();
|
|
|
|
|
|
|
|
|
|
assert!(l.next().is_some());
|
|
|
|
|
assert!(l.next().is_some());
|
|
|
|
|
assert!(l.next().is_none());
|
|
|
|
|
assert!(l.has_error_state.is_some());
|
|
|
|
|
assert!(l.next().is_none());
|
|
|
|
|
assert!(l.has_error_state.is_some());
|
|
|
|
|
}
|
2025-05-19 14:38:11 -07:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn char_lex_with_close() {
|
|
|
|
|
let mut res = vec![];
|
|
|
|
|
Lexer::from(Rc::from("(#\\a)"))
|
|
|
|
|
.into_iter()
|
|
|
|
|
.collect_into(&mut res);
|
|
|
|
|
assert_eq!(res.len(), 3);
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[0].token_type, LexTokenType::ListStart);
|
|
|
|
|
assert_eq!(&res[0].source_doc[res[0].start_idx..res[0].end_idx], "(");
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[1].token_type, LexTokenType::Char);
|
|
|
|
|
assert_eq!(&res[1].source_doc[res[1].start_idx..res[1].end_idx], "#\\a");
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[2].token_type, LexTokenType::CollectionEnd);
|
|
|
|
|
assert_eq!(&res[2].source_doc[res[2].start_idx..res[2].end_idx], ")");
|
|
|
|
|
}
|
2025-05-21 14:48:36 -07:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn num_lex_plusnum_case() {
|
|
|
|
|
let mut res = vec![];
|
|
|
|
|
Lexer::from(Rc::from("+1"))
|
|
|
|
|
.into_iter()
|
|
|
|
|
.collect_into(&mut res);
|
|
|
|
|
assert_eq!(res.len(), 1);
|
|
|
|
|
assert_eq!(res[0].token_type, LexTokenType::Number);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn char_lex_xchar_case() {
|
|
|
|
|
let mut res = vec![];
|
|
|
|
|
Lexer::from(Rc::from("#\\x)"))
|
|
|
|
|
.into_iter()
|
|
|
|
|
.collect_into(&mut res);
|
|
|
|
|
assert_eq!(res.len(), 2);
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[0].token_type, LexTokenType::Char);
|
|
|
|
|
assert_eq!(&res[0].source_doc[res[0].start_idx..res[0].end_idx], "#\\x");
|
|
|
|
|
|
|
|
|
|
assert_eq!(res[1].token_type, LexTokenType::CollectionEnd);
|
|
|
|
|
assert_eq!(&res[1].source_doc[res[1].start_idx..res[1].end_idx], ")");
|
|
|
|
|
}
|
2025-05-07 09:19:33 -07:00
|
|
|
}
|