2021-01-24 12:34:58 -08:00
|
|
|
/* relish: highly versatile lisp interpreter
|
|
|
|
|
* Copyright (C) 2021 Aidan Hahn
|
|
|
|
|
*
|
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
|
2023-02-15 23:27:00 -08:00
|
|
|
use crate::segment::{Ctr, Seg};
|
2021-01-24 12:34:58 -08:00
|
|
|
|
|
|
|
|
const UNMATCHED_STR_DELIM: &str = "Unmatched string delimiter in input";
|
|
|
|
|
const UNMATCHED_LIST_DELIM: &str = "Unmatched list delimiter in input";
|
|
|
|
|
|
|
|
|
|
/* takes a line of user input
|
|
|
|
|
* returns an unsimplified tree of tokens.
|
|
|
|
|
*/
|
2023-02-17 21:00:07 -08:00
|
|
|
pub fn lex(document: &String) -> Result<Box<Seg>, String> {
|
2021-01-24 12:34:58 -08:00
|
|
|
if !document.is_ascii() {
|
|
|
|
|
return Err("document may only contain ascii characters".to_string());
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-01 12:20:43 -08:00
|
|
|
let mut document_normal = document.clone();
|
|
|
|
|
if !document_normal.ends_with(')') {
|
|
|
|
|
document_normal = document_normal + ")";
|
|
|
|
|
}
|
|
|
|
|
if !document_normal.starts_with('(') {
|
|
|
|
|
document_normal = "(".to_string() + &document_normal;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let tree = process(&document_normal);
|
2021-01-24 12:34:58 -08:00
|
|
|
|
|
|
|
|
// TODO: Make multiple forms of Ok()
|
|
|
|
|
// To represent the multiple passable outcomes
|
|
|
|
|
return match tree {
|
|
|
|
|
Err(e) => Err(format!("Problem lexing document: {:?}", e)),
|
2022-01-16 22:02:40 -08:00
|
|
|
Ok(t) => Ok(t),
|
|
|
|
|
};
|
2021-01-24 12:34:58 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* The logic used in lex
|
2021-03-14 16:14:57 -07:00
|
|
|
* Returns Ok(Rc<Seg>) if lexing passes
|
2021-01-24 12:34:58 -08:00
|
|
|
* Returns Err(String) if an error occurs
|
|
|
|
|
*/
|
2023-02-17 21:00:07 -08:00
|
|
|
fn process(document: &String) -> Result<Box<Seg>, String> {
|
2021-01-24 12:34:58 -08:00
|
|
|
let doc_len = document.len();
|
|
|
|
|
|
|
|
|
|
if doc_len == 0 {
|
|
|
|
|
return Err("Empty document".to_string());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* State variables
|
|
|
|
|
* TODO: describe all of them
|
|
|
|
|
*/
|
|
|
|
|
let mut is_str = false;
|
|
|
|
|
let mut ign = false;
|
|
|
|
|
let mut token = String::new();
|
2021-01-24 22:04:26 -08:00
|
|
|
let mut delim_stack = Vec::new();
|
|
|
|
|
let mut ref_stack = vec![];
|
2021-01-24 12:34:58 -08:00
|
|
|
|
|
|
|
|
/* Iterate over document
|
|
|
|
|
* Manage currently sought delimiter
|
|
|
|
|
*/
|
|
|
|
|
for c in document.chars() {
|
2021-01-24 22:04:26 -08:00
|
|
|
let mut needs_alloc = false;
|
2021-01-24 12:34:58 -08:00
|
|
|
let mut alloc_list = false;
|
2021-01-24 22:04:26 -08:00
|
|
|
let delim: char;
|
|
|
|
|
if let Some(d) = delim_stack.last() {
|
|
|
|
|
delim = *d;
|
|
|
|
|
|
|
|
|
|
if delim == '*' {
|
|
|
|
|
token.push(c);
|
|
|
|
|
delim_stack.pop();
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// normal delimiter cases
|
|
|
|
|
} else if c == delim {
|
|
|
|
|
needs_alloc = true;
|
|
|
|
|
// reset comment line status
|
|
|
|
|
if delim == '\n' {
|
2021-01-25 20:55:16 -08:00
|
|
|
delim_stack.pop();
|
2021-01-24 22:32:09 -08:00
|
|
|
ign = false;
|
|
|
|
|
continue;
|
2021-01-24 22:04:26 -08:00
|
|
|
}
|
2021-01-24 12:34:58 -08:00
|
|
|
|
2021-01-24 22:04:26 -08:00
|
|
|
// catch too many list end
|
|
|
|
|
// set alloc_list
|
|
|
|
|
if delim == ')' {
|
|
|
|
|
alloc_list = true;
|
2023-02-17 21:00:07 -08:00
|
|
|
if ref_stack.is_empty() {
|
2021-01-24 22:04:26 -08:00
|
|
|
return Err("too many end parens".to_string());
|
|
|
|
|
}
|
2021-01-24 12:34:58 -08:00
|
|
|
}
|
2021-01-24 22:04:26 -08:00
|
|
|
delim_stack.pop();
|
2021-01-24 12:34:58 -08:00
|
|
|
|
2021-01-24 22:04:26 -08:00
|
|
|
// if we are in a commented out space, skip this char
|
|
|
|
|
} else if ign {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-01-24 12:34:58 -08:00
|
|
|
// try to generalize all whitespace
|
2021-01-24 22:04:26 -08:00
|
|
|
if !needs_alloc && char::is_whitespace(c) && !is_str {
|
|
|
|
|
// dont make empty tokens just because the document has consecutive whitespace
|
2023-02-17 21:00:07 -08:00
|
|
|
if token.is_empty() {
|
2021-01-24 22:04:26 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
needs_alloc = true;
|
|
|
|
|
}
|
|
|
|
|
// match a delimiter
|
|
|
|
|
if !needs_alloc {
|
2021-01-24 12:34:58 -08:00
|
|
|
match c {
|
2021-03-14 16:14:57 -07:00
|
|
|
// add a new Seg reference to the stack
|
2021-01-24 12:34:58 -08:00
|
|
|
'(' => {
|
2021-09-18 16:48:24 -07:00
|
|
|
if is_str {
|
|
|
|
|
token.push(c);
|
2022-01-16 22:02:40 -08:00
|
|
|
continue;
|
2021-09-18 16:48:24 -07:00
|
|
|
}
|
|
|
|
|
|
2023-02-17 21:00:07 -08:00
|
|
|
if !token.is_empty() {
|
2021-01-24 12:34:58 -08:00
|
|
|
return Err("list started in middle of another token".to_string());
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-15 23:27:00 -08:00
|
|
|
ref_stack.push(Seg::new());
|
2021-01-24 12:34:58 -08:00
|
|
|
delim_stack.push(')');
|
2022-01-16 22:02:40 -08:00
|
|
|
}
|
2021-01-24 12:34:58 -08:00
|
|
|
// begin parsing a string
|
|
|
|
|
'"' | '\'' | '`' => {
|
|
|
|
|
is_str = true;
|
|
|
|
|
delim_stack.push(c);
|
2022-01-16 22:02:40 -08:00
|
|
|
}
|
2021-01-24 12:34:58 -08:00
|
|
|
// eat the whole line
|
|
|
|
|
'#' => {
|
|
|
|
|
ign = true;
|
|
|
|
|
delim_stack.push('\n');
|
2022-01-16 22:02:40 -08:00
|
|
|
}
|
2021-01-24 12:34:58 -08:00
|
|
|
// escape next char
|
|
|
|
|
'\\' => {
|
|
|
|
|
delim_stack.push('*');
|
|
|
|
|
}
|
|
|
|
|
// add to token
|
|
|
|
|
_ => {
|
2021-09-18 16:48:24 -07:00
|
|
|
token.push(c);
|
2021-01-24 12:34:58 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* 1. Handle allocation of new Ctr
|
|
|
|
|
* 2. Handle expansion of current list ref
|
|
|
|
|
*/
|
2021-01-24 22:04:26 -08:00
|
|
|
} else {
|
2023-02-17 21:00:07 -08:00
|
|
|
if token.is_empty() && !is_str && !alloc_list {
|
2021-01-24 12:34:58 -08:00
|
|
|
return Err("Empty token".to_string());
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-01 12:20:43 -08:00
|
|
|
let mut current_seg = ref_stack.pop().unwrap();
|
2023-02-15 23:27:00 -08:00
|
|
|
let obj;
|
2021-09-18 16:48:24 -07:00
|
|
|
if is_str {
|
2023-02-15 23:27:00 -08:00
|
|
|
obj = Box::from(Ctr::String(token));
|
2021-09-18 16:48:24 -07:00
|
|
|
is_str = false;
|
|
|
|
|
token = String::new();
|
2023-02-15 23:27:00 -08:00
|
|
|
current_seg.append(obj);
|
2023-02-17 21:00:07 -08:00
|
|
|
} else if !token.is_empty() {
|
2021-09-18 16:48:24 -07:00
|
|
|
if token == "true" {
|
2023-02-15 23:27:00 -08:00
|
|
|
obj = Box::from(Ctr::Bool(true));
|
2021-01-24 22:04:26 -08:00
|
|
|
} else if token == "false" {
|
2023-02-15 23:27:00 -08:00
|
|
|
obj = Box::from(Ctr::Bool(false));
|
2021-01-24 22:04:26 -08:00
|
|
|
} else if let Ok(i) = token.parse::<i128>() {
|
2023-02-15 23:27:00 -08:00
|
|
|
obj = Box::from(Ctr::Integer(i));
|
2021-01-24 22:04:26 -08:00
|
|
|
} else if let Ok(f) = token.parse::<f64>() {
|
2023-02-15 23:27:00 -08:00
|
|
|
obj = Box::from(Ctr::Float(f));
|
2021-01-24 22:04:26 -08:00
|
|
|
} else if let Some(s) = tok_is_symbol(&token) {
|
2023-02-15 23:27:00 -08:00
|
|
|
obj = Box::from(Ctr::Symbol(s));
|
2021-01-24 22:04:26 -08:00
|
|
|
} else {
|
2021-07-19 23:59:03 -07:00
|
|
|
return Err(format!("Unparsable token: {}", token));
|
2021-01-24 12:34:58 -08:00
|
|
|
}
|
|
|
|
|
|
2021-01-24 22:04:26 -08:00
|
|
|
token = String::new();
|
2023-02-15 23:27:00 -08:00
|
|
|
current_seg.append(obj.clone());
|
2021-01-24 22:04:26 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if alloc_list {
|
2021-01-24 12:34:58 -08:00
|
|
|
// return if we have finished the document
|
2023-02-17 21:00:07 -08:00
|
|
|
if ref_stack.is_empty() {
|
2023-02-15 23:27:00 -08:00
|
|
|
return Ok(Box::new(current_seg));
|
2021-01-24 12:34:58 -08:00
|
|
|
}
|
|
|
|
|
|
2023-02-15 23:27:00 -08:00
|
|
|
let t = current_seg;
|
|
|
|
|
current_seg = ref_stack.pop().unwrap();
|
|
|
|
|
current_seg.append(Box::from(Ctr::Seg(t)));
|
2021-01-24 12:34:58 -08:00
|
|
|
}
|
|
|
|
|
|
2023-01-27 17:45:19 -08:00
|
|
|
ref_stack.push(current_seg);
|
2021-01-24 12:34:58 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if is_str {
|
2023-02-17 21:00:07 -08:00
|
|
|
Err(UNMATCHED_STR_DELIM.to_string())
|
2023-03-01 11:14:42 -08:00
|
|
|
} else {
|
|
|
|
|
Err(UNMATCHED_LIST_DELIM.to_string())
|
2021-01-24 12:34:58 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Returns true if token
|
2021-07-19 23:59:03 -07:00
|
|
|
* - is all alphanumeric except dash and underscore
|
2021-01-24 12:34:58 -08:00
|
|
|
*
|
|
|
|
|
* else returns false
|
|
|
|
|
*/
|
2023-02-17 21:00:07 -08:00
|
|
|
fn tok_is_symbol(token: &str) -> Option<String> {
|
|
|
|
|
for t in token.chars() {
|
|
|
|
|
if !t.is_alphanumeric() && t != '-' && t != '_' {
|
2022-01-16 22:02:40 -08:00
|
|
|
return None;
|
2021-01-24 12:34:58 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-17 21:00:07 -08:00
|
|
|
Some(String::from(token))
|
2021-01-24 12:34:58 -08:00
|
|
|
}
|