Decomposer: fixes from found code
This commit includes a new utility, the decomposer, which has primarily been used to test the AST against found scheme code in the wild (internet). Decomposer will time and test the lexing and parsing of any document full of scheme. This commit includes additional test cases and logical fixes for issues found during the testing performed. Signed-off-by: Ava Affine <ava@sunnypup.io>
This commit is contained in:
parent
86f905ba1d
commit
e4c6e0924a
7 changed files with 417 additions and 40 deletions
242
Cargo.lock
generated
242
Cargo.lock
generated
|
|
@ -6,16 +6,132 @@ version = 4
|
||||||
name = "amanita"
|
name = "amanita"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstream"
|
||||||
|
version = "0.6.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"anstyle-parse",
|
||||||
|
"anstyle-query",
|
||||||
|
"anstyle-wincon",
|
||||||
|
"colorchoice",
|
||||||
|
"is_terminal_polyfill",
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle"
|
||||||
|
version = "1.0.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-parse"
|
||||||
|
version = "0.2.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
|
||||||
|
dependencies = [
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-query"
|
||||||
|
version = "1.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
|
||||||
|
dependencies = [
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-wincon"
|
||||||
|
version = "3.0.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"once_cell",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "autocfg"
|
name = "autocfg"
|
||||||
version = "1.4.0"
|
version = "1.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap"
|
||||||
|
version = "4.5.38"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ed93b9805f8ba930df42c2590f05453d5ec36cbb85d018868a5b24d31f6ac000"
|
||||||
|
dependencies = [
|
||||||
|
"clap_builder",
|
||||||
|
"clap_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_builder"
|
||||||
|
version = "4.5.38"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "379026ff283facf611b0ea629334361c4211d1b12ee01024eec1591133b04120"
|
||||||
|
dependencies = [
|
||||||
|
"anstream",
|
||||||
|
"anstyle",
|
||||||
|
"clap_lex",
|
||||||
|
"strsim",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_derive"
|
||||||
|
version = "4.5.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_lex"
|
||||||
|
version = "0.7.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorchoice"
|
||||||
|
version = "1.0.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "decomposer"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"clap",
|
||||||
|
"mycelium",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "enoki"
|
name = "enoki"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "is_terminal_polyfill"
|
||||||
|
version = "1.70.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mycelium"
|
name = "mycelium"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|
@ -95,3 +211,129 @@ checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.21.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.95"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.40"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strsim"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.101"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8parse"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.59.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-targets"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||||
|
dependencies = [
|
||||||
|
"windows_aarch64_gnullvm",
|
||||||
|
"windows_aarch64_msvc",
|
||||||
|
"windows_i686_gnu",
|
||||||
|
"windows_i686_gnullvm",
|
||||||
|
"windows_i686_msvc",
|
||||||
|
"windows_x86_64_gnu",
|
||||||
|
"windows_x86_64_gnullvm",
|
||||||
|
"windows_x86_64_msvc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,3 @@
|
||||||
[workspace]
|
[workspace]
|
||||||
resolver = "2"
|
resolver = "2"
|
||||||
members = [ "amanita", "enoki","mycelium"]
|
members = ["mycelium", "decomposer"]
|
||||||
|
|
|
||||||
9
decomposer/Cargo.toml
Normal file
9
decomposer/Cargo.toml
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
[package]
|
||||||
|
name = "decomposer"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
authors = ["Ava Affine <ava@sunnypup.io>"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
clap = { version = "4.5.38", features = [ "derive" ] }
|
||||||
|
mycelium = { path = "../mycelium" }
|
||||||
81
decomposer/src/main.rs
Normal file
81
decomposer/src/main.rs
Normal file
|
|
@ -0,0 +1,81 @@
|
||||||
|
/* Mycelium Scheme
|
||||||
|
* Copyright (C) 2025 Ava Affine
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#![feature(iter_collect_into)]
|
||||||
|
|
||||||
|
use mycelium::{lexer as l, parser as p};
|
||||||
|
|
||||||
|
use std::rc::Rc;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::fs;
|
||||||
|
use std::error::Error;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(version, about, long_about = None)]
|
||||||
|
struct Args {
|
||||||
|
/// display time to parse scripts
|
||||||
|
#[arg(short, long)]
|
||||||
|
time: bool,
|
||||||
|
|
||||||
|
/// output script AST once parsed
|
||||||
|
#[arg(short, long)]
|
||||||
|
debug: bool,
|
||||||
|
|
||||||
|
scripts: Vec<PathBuf>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> Result<(), Box<dyn Error>> {
|
||||||
|
let args = Args::parse();
|
||||||
|
for script in args.scripts {
|
||||||
|
println!("+ processing {:#?}", script);
|
||||||
|
let message = fs::read_to_string(script)?;
|
||||||
|
|
||||||
|
let start: Option<Instant>;
|
||||||
|
if args.time {
|
||||||
|
start = Some(Instant::now());
|
||||||
|
} else {
|
||||||
|
start = None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut p = p::Parser::from(l::Lexer::from(Rc::from(message.as_str())));
|
||||||
|
let mut i = p.next();
|
||||||
|
while let Some(ref r) = i {
|
||||||
|
if args.debug {
|
||||||
|
println!(" > res: {}", r);
|
||||||
|
}
|
||||||
|
|
||||||
|
i = p.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(i) = start {
|
||||||
|
println!(" > time: {:#?}", i.elapsed());
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.has_error_state.is_some() {
|
||||||
|
let e = p.has_error_state.unwrap();
|
||||||
|
if e.0 != l::E_END_OF_DOCUMENT {
|
||||||
|
println!(" > error!\n{}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
@ -234,7 +234,7 @@ impl Lexer {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn match_chunk_next(&mut self, chunk: &str) -> Option<bool> {
|
fn match_chunk_next(&mut self, chunk: &str, peek: bool) -> Option<bool> {
|
||||||
let saved = self.current_index;
|
let saved = self.current_index;
|
||||||
for i in chunk.chars() {
|
for i in chunk.chars() {
|
||||||
if let None = self.advance_char() {
|
if let None = self.advance_char() {
|
||||||
|
|
@ -248,6 +248,7 @@ impl Lexer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if peek { self.current_index = saved; }
|
||||||
Some(true)
|
Some(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -290,7 +291,15 @@ impl Lexer {
|
||||||
let mut base = 10;
|
let mut base = 10;
|
||||||
let a = self.current_char();
|
let a = self.current_char();
|
||||||
|
|
||||||
if NUMERICAL_BASE.contains(&a) || a == 'i' || a == 'e' {
|
if let Some(true) = self.match_chunk_next("inf.0", false) {
|
||||||
|
return self.cut_new_token(LexTokenType::Number)
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(true) = self.match_chunk_next("nan.0", false) {
|
||||||
|
return self.cut_new_token(LexTokenType::Number)
|
||||||
|
}
|
||||||
|
|
||||||
|
if NUMERICAL_BASE.contains(&a) || a == 'i' || a == 'e' || a == '+' || a == '-' {
|
||||||
if let None = self.advance_char() {
|
if let None = self.advance_char() {
|
||||||
return Err(LexError(E_NUMBER_TRUNCATED,
|
return Err(LexError(E_NUMBER_TRUNCATED,
|
||||||
self.current_token_start, self.document.clone()))
|
self.current_token_start, self.document.clone()))
|
||||||
|
|
@ -311,14 +320,6 @@ impl Lexer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(true) = self.match_chunk_next("inf.0") {
|
|
||||||
return self.cut_new_token(LexTokenType::Number)
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(true) = self.match_chunk_next("nan.0") {
|
|
||||||
return self.cut_new_token(LexTokenType::Number)
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut hasdot = false;
|
let mut hasdot = false;
|
||||||
let mut hasslash = false;
|
let mut hasslash = false;
|
||||||
let mut hase = false;
|
let mut hase = false;
|
||||||
|
|
@ -427,7 +428,7 @@ impl Lexer {
|
||||||
't' | 'f' => return self.cut_new_token(LexTokenType::Boolean),
|
't' | 'f' => return self.cut_new_token(LexTokenType::Boolean),
|
||||||
'|' => return self.seek_end_of_block_comment(),
|
'|' => return self.seek_end_of_block_comment(),
|
||||||
'!' => return self.seek_end_of_line_comment(true),
|
'!' => return self.seek_end_of_line_comment(true),
|
||||||
'u' if self.match_chunk_next("8(").is_some_and(|x| x) =>
|
'u' if self.match_chunk_next("8(", false).is_some_and(|x| x) =>
|
||||||
return self.cut_new_token(LexTokenType::ByteVectorStart),
|
return self.cut_new_token(LexTokenType::ByteVectorStart),
|
||||||
'(' => return self.cut_new_token(LexTokenType::VectorStart),
|
'(' => return self.cut_new_token(LexTokenType::VectorStart),
|
||||||
'\\' => self.seek_end_of_escape(false, )
|
'\\' => self.seek_end_of_escape(false, )
|
||||||
|
|
@ -470,16 +471,19 @@ impl Lexer {
|
||||||
*/
|
*/
|
||||||
match self.current_char() {
|
match self.current_char() {
|
||||||
// char escapes
|
// char escapes
|
||||||
'a' if !in_string => self.match_chunk_next("larm"),
|
'a' if !in_string => self.match_chunk_next("larm", false),
|
||||||
'b' if !in_string => self.match_chunk_next("ackspace"),
|
'b' if !in_string => self.match_chunk_next("ackspace", false),
|
||||||
'd' if !in_string => self.match_chunk_next("elete"),
|
'd' if !in_string => self.match_chunk_next("elete", false),
|
||||||
'e' if !in_string => self.match_chunk_next("scape"),
|
'e' if !in_string => self.match_chunk_next("scape", false),
|
||||||
'n' if !in_string => self.match_chunk_next("ewline").or(
|
'n' if !in_string => self.match_chunk_next("ewline", false)
|
||||||
self.match_chunk_next("ull")
|
.or(self.match_chunk_next("ull", false)),
|
||||||
),
|
'r' if !in_string => self.match_chunk_next("eturn", false),
|
||||||
'r' if !in_string => self.match_chunk_next("eturn"),
|
's' if !in_string => self.match_chunk_next("pace", false),
|
||||||
's' if !in_string => self.match_chunk_next("pace"),
|
't' if !in_string => self.match_chunk_next("ab", false),
|
||||||
't' if !in_string => self.match_chunk_next("ab"),
|
// specifically catch a non hex 'x' character escape
|
||||||
|
'x' if self.peek_next_char()
|
||||||
|
.is_none_or(|c| TOK_DELIMITERS.contains(&c)) && !in_string
|
||||||
|
=> None,
|
||||||
|
|
||||||
// string escapes
|
// string escapes
|
||||||
'a' | 'b' | 't' | 'n' | 'r' | '"' | '\\' if in_string => None,
|
'a' | 'b' | 't' | 'n' | 'r' | '"' | '\\' if in_string => None,
|
||||||
|
|
@ -538,7 +542,14 @@ impl Lexer {
|
||||||
|
|
||||||
self.current_token_start = self.current_index;
|
self.current_token_start = self.current_index;
|
||||||
|
|
||||||
// handle syntactic sugar cases
|
macro_rules! numeric {
|
||||||
|
( $x:expr ) => {
|
||||||
|
$x.is_numeric() || self.match_chunk_next("inf.0", true)
|
||||||
|
.or(self.match_chunk_next("nan.0", true))
|
||||||
|
.or(Some(false))
|
||||||
|
.unwrap()
|
||||||
|
};
|
||||||
|
}
|
||||||
match self.current_char() {
|
match self.current_char() {
|
||||||
';' => output = Some(self.seek_end_of_line_comment(false)),
|
';' => output = Some(self.seek_end_of_line_comment(false)),
|
||||||
'\'' => output = Some(self.cut_new_token(LexTokenType::Quote)),
|
'\'' => output = Some(self.cut_new_token(LexTokenType::Quote)),
|
||||||
|
|
@ -547,12 +558,11 @@ impl Lexer {
|
||||||
')' => output = Some(self.cut_new_token(LexTokenType::CollectionEnd)),
|
')' => output = Some(self.cut_new_token(LexTokenType::CollectionEnd)),
|
||||||
'#' => output = Some(self.seek_end_from_hash()),
|
'#' => output = Some(self.seek_end_from_hash()),
|
||||||
'"' => output = Some(self.seek_end_of_string()),
|
'"' => output = Some(self.seek_end_of_string()),
|
||||||
/* This code commented out. I dont think you can open a char without '#'
|
|
||||||
* '\\' => output = Some(self.seek_end_of_escape(false)
|
|
||||||
.and_then(|_|
|
|
||||||
self.cut_new_token(LexTokenType::Char))),*/
|
|
||||||
'|' => output = Some(self.seek_closing_pipe()),
|
'|' => output = Some(self.seek_closing_pipe()),
|
||||||
'+' | '-' => output = Some(self.seek_end_of_number()),
|
'+' | '-' if self.peek_next_char()
|
||||||
|
.and_then(|x| Some(numeric!(x)))
|
||||||
|
.or(Some(false))
|
||||||
|
.unwrap() => output = Some(self.seek_end_of_number()),
|
||||||
_ if self.current_char().is_numeric() => output =
|
_ if self.current_char().is_numeric() => output =
|
||||||
Some(self.seek_end_of_number()),
|
Some(self.seek_end_of_number()),
|
||||||
_ => (),
|
_ => (),
|
||||||
|
|
@ -583,7 +593,10 @@ impl Lexer {
|
||||||
if output.is_none() {
|
if output.is_none() {
|
||||||
loop {
|
loop {
|
||||||
let c = self.current_char();
|
let c = self.current_char();
|
||||||
if !c.is_alphanumeric() && !LEX_SPECIAL.contains(&c) && c != ' ' {
|
if !c.is_alphanumeric() &&
|
||||||
|
!LEX_SPECIAL.contains(&c) &&
|
||||||
|
!TOK_DELIMITERS.contains(&c) {
|
||||||
|
|
||||||
output = Some(Err(LexError(E_INCOMPREHENSIBLE,
|
output = Some(Err(LexError(E_INCOMPREHENSIBLE,
|
||||||
self.current_index, self.document.clone())));
|
self.current_index, self.document.clone())));
|
||||||
break;
|
break;
|
||||||
|
|
@ -635,7 +648,7 @@ mod tests {
|
||||||
/* Number Cases */ (
|
/* Number Cases */ (
|
||||||
// HAPPY CASES
|
// HAPPY CASES
|
||||||
vec!["1", "1.0", "#d1.1", "#o1423", "#b11", "#xDF",
|
vec!["1", "1.0", "#d1.1", "#o1423", "#b11", "#xDF",
|
||||||
"#e1e1", "#i1/4", "+inf.0", "1e1"],
|
"#e1e1", "#i1/4", "+inf.0", "1e1", "-1"],
|
||||||
|
|
||||||
// SAD CASES
|
// SAD CASES
|
||||||
vec!["1.1.1", "#o9", "#b1.01", "#i1/3/3"]
|
vec!["1.1.1", "#o9", "#b1.01", "#i1/3/3"]
|
||||||
|
|
@ -644,7 +657,7 @@ mod tests {
|
||||||
/* Char Cases */ (
|
/* Char Cases */ (
|
||||||
// HAPPY CASES
|
// HAPPY CASES
|
||||||
vec!["#\\a", "#\\t", "#\\\"", "#\\t", "#\\space",
|
vec!["#\\a", "#\\t", "#\\\"", "#\\t", "#\\space",
|
||||||
"#\\alarm", "#\\s", "#\\x20"],
|
"#\\alarm", "#\\s", "#\\x20", "#\\x", "#\\\\"],
|
||||||
|
|
||||||
// SAD CASES
|
// SAD CASES
|
||||||
vec!["\\c", "\\x20"]
|
vec!["\\c", "\\x20"]
|
||||||
|
|
@ -654,7 +667,8 @@ mod tests {
|
||||||
// HAPPY CASES
|
// HAPPY CASES
|
||||||
vec!["...", "<=?", "V17a", "a34kTMNs", "lambda", "q",
|
vec!["...", "<=?", "V17a", "a34kTMNs", "lambda", "q",
|
||||||
"list->vector", "|two words|", "|two\nwords|",
|
"list->vector", "|two words|", "|two\nwords|",
|
||||||
"the-word-recursion-has-many-meanings"],
|
"the-word-recursion-has-many-meanings", "+", "-",
|
||||||
|
"slatex.*slatex*"],
|
||||||
|
|
||||||
// SAD CASES
|
// SAD CASES
|
||||||
vec!["|\"\"|", "|(|", "|valid"]
|
vec!["|\"\"|", "|(|", "|valid"]
|
||||||
|
|
@ -858,4 +872,29 @@ mod tests {
|
||||||
assert_eq!(res[2].token_type, LexTokenType::CollectionEnd);
|
assert_eq!(res[2].token_type, LexTokenType::CollectionEnd);
|
||||||
assert_eq!(&res[2].source_doc[res[2].start_idx..res[2].end_idx], ")");
|
assert_eq!(&res[2].source_doc[res[2].start_idx..res[2].end_idx], ")");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn num_lex_plusnum_case() {
|
||||||
|
let mut res = vec![];
|
||||||
|
Lexer::from(Rc::from("+1"))
|
||||||
|
.into_iter()
|
||||||
|
.collect_into(&mut res);
|
||||||
|
assert_eq!(res.len(), 1);
|
||||||
|
assert_eq!(res[0].token_type, LexTokenType::Number);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn char_lex_xchar_case() {
|
||||||
|
let mut res = vec![];
|
||||||
|
Lexer::from(Rc::from("#\\x)"))
|
||||||
|
.into_iter()
|
||||||
|
.collect_into(&mut res);
|
||||||
|
assert_eq!(res.len(), 2);
|
||||||
|
|
||||||
|
assert_eq!(res[0].token_type, LexTokenType::Char);
|
||||||
|
assert_eq!(&res[0].source_doc[res[0].start_idx..res[0].end_idx], "#\\x");
|
||||||
|
|
||||||
|
assert_eq!(res[1].token_type, LexTokenType::CollectionEnd);
|
||||||
|
assert_eq!(&res[1].source_doc[res[1].start_idx..res[1].end_idx], ")");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ use alloc::string::String;
|
||||||
use alloc::format;
|
use alloc::format;
|
||||||
use alloc::fmt::Debug;
|
use alloc::fmt::Debug;
|
||||||
use core::{cmp::Ordering, f64, ops::{Add, Div, Mul, Sub}, str::FromStr};
|
use core::{cmp::Ordering, f64, ops::{Add, Div, Mul, Sub}, str::FromStr};
|
||||||
use num::{integer::{gcd}, pow::{self, Pow}};
|
use num::{integer::{gcd}, pow::Pow};
|
||||||
|
|
||||||
pub const E_INCOMPREHENSIBLE: &str = "could not comprehend number literal";
|
pub const E_INCOMPREHENSIBLE: &str = "could not comprehend number literal";
|
||||||
pub const E_BASE_PARSE_FAIL: &str = "failed to parse explicit base literal";
|
pub const E_BASE_PARSE_FAIL: &str = "failed to parse explicit base literal";
|
||||||
|
|
|
||||||
|
|
@ -187,7 +187,7 @@ fn read_number(token: LexToken) -> Result<Number, ParseError> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_char(token: LexToken) -> Result<u8, ParseError> {
|
fn read_char(token: LexToken) -> Result<u8, ParseError> {
|
||||||
if token.end_idx - token.start_idx < 2 {
|
if token.end_idx - token.start_idx < 3 {
|
||||||
return Err(ParseError(E_CHAR_TRUNCATED, Some(Ok(token))))
|
return Err(ParseError(E_CHAR_TRUNCATED, Some(Ok(token))))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -202,7 +202,7 @@ fn read_char(token: LexToken) -> Result<u8, ParseError> {
|
||||||
"space" => Ok(32),
|
"space" => Ok(32),
|
||||||
"tab" => Ok(11),
|
"tab" => Ok(11),
|
||||||
_ if token.source_doc[token.start_idx + 2..].starts_with('x') &&
|
_ if token.source_doc[token.start_idx + 2..].starts_with('x') &&
|
||||||
token.end_idx - token.start_idx > 2 => {
|
token.end_idx - token.start_idx > 3 => {
|
||||||
if token.end_idx - token.start_idx > 5 {
|
if token.end_idx - token.start_idx > 5 {
|
||||||
return Err(ParseError(E_CHAR_TOO_LONG, Some(Ok(token))))
|
return Err(ParseError(E_CHAR_TOO_LONG, Some(Ok(token))))
|
||||||
}
|
}
|
||||||
|
|
@ -327,7 +327,10 @@ impl Parser {
|
||||||
loop {
|
loop {
|
||||||
let next_tok = self.lexer.next();
|
let next_tok = self.lexer.next();
|
||||||
if let None = next_tok {
|
if let None = next_tok {
|
||||||
return Err(ParseError(E_COLLECTION_TRUNC, None))
|
if let Some(e) = &self.lexer.has_error_state {
|
||||||
|
return Err(ParseError(E_LEX_ERROR, Some(Err(e.clone()))))
|
||||||
|
}
|
||||||
|
return Err(ParseError(E_COLLECTION_TRUNC, Some(Ok(token))))
|
||||||
}
|
}
|
||||||
|
|
||||||
let tok = next_tok.unwrap();
|
let tok = next_tok.unwrap();
|
||||||
|
|
@ -476,7 +479,7 @@ impl Parser {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lexer error
|
// Lexer error
|
||||||
} else if self.lexer.has_error_state.is_some() {
|
} else if let Some(e) = &self.lexer.has_error_state && e.0 != E_END_OF_DOCUMENT {
|
||||||
Err(ParseError(E_LEX_ERROR,
|
Err(ParseError(E_LEX_ERROR,
|
||||||
Some(Err(self.lexer.has_error_state.clone().unwrap()))))
|
Some(Err(self.lexer.has_error_state.clone().unwrap()))))
|
||||||
|
|
||||||
|
|
@ -511,7 +514,10 @@ mod tests {
|
||||||
("(hello \"world\")", "(hello \"world\")"),
|
("(hello \"world\")", "(hello \"world\")"),
|
||||||
("; big doc string\n(one two)", "(one two)"),
|
("; big doc string\n(one two)", "(one two)"),
|
||||||
("(list #(vect 2 3 #u8(0 0)))", "(list #(vect 2 3 #u8(0 0)))"),
|
("(list #(vect 2 3 #u8(0 0)))", "(list #(vect 2 3 #u8(0 0)))"),
|
||||||
("(#\\xf0 #\\alarm #\\a #\\z)", "(#\\xf0 #\\x7 #\\a #\\z)")
|
("(#\\xf0 #\\alarm #\\a #\\z)", "(#\\xf0 #\\x7 #\\a #\\z)"),
|
||||||
|
("(- q 1)", "(- q 1)"),
|
||||||
|
("(+ q 1)", "(+ q 1)"),
|
||||||
|
("(#\\x)", "(#\\x)"),
|
||||||
];
|
];
|
||||||
|
|
||||||
let sad_cases = vec![
|
let sad_cases = vec![
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue