/* relish: highly versatile lisp interpreter
* Copyright (C) 2021 Aidan Hahn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
use crate::segment::{Ctr, Seg};
use crate::error::{Traceback, start_trace};
use phf::{Map, phf_map};
const UNMATCHED_STR_DELIM: &str = "Unmatched string delimiter in input";
const UNMATCHED_LIST_DELIM: &str = "Unmatched list delimiter in input";
static ESCAPES: Map = phf_map! {
'n' => '\n',
't' => '\t',
'\\' => '\\',
};
/* takes a line of user input
* returns an unsimplified tree of tokens.
*/
pub fn lex(document: &String) -> Result, Traceback> {
if !document.is_ascii() {
return Err(start_trace(
("", "document may only contain ascii characters".to_string())
.into()))
}
// finish a singlet token, or do nothing
let document_normal = document.clone() + " ";
let tree = process(&document_normal);
// TODO: Make multiple forms of Ok()
// To represent the multiple passable outcomes
return match tree {
Err(e) => Err(start_trace(
("", format!("Problem lexing document: {:?}", e))
.into())),
Ok(t) => Ok(t),
};
}
/* The logic used in lex
* Returns Ok(Rc) if lexing passes
* Returns Err(String) if an error occurs
*/
fn process(document: &String) -> Result, String> {
let doc_len = document.len();
if doc_len == 0 {
return Err("Empty document".to_string());
}
/* State variables
* TODO: describe all of them
*/
let mut is_str = false;
let mut ign = false;
let mut token = String::new();
let mut delim_stack = Vec::new();
let mut ref_stack = vec![];
/* Iterate over document
* Manage currently sought delimiter
*/
for c in document.chars() {
let mut needs_alloc = false;
let mut alloc_list = false;
let delim: char;
if let Some(d) = delim_stack.last() {
delim = *d;
if delim == '*' {
token.push(ESCAPES[&c]);
delim_stack.pop();
continue;
// normal delimiter cases
} else if c == delim {
needs_alloc = true;
// reset comment line status
if delim == '\n' {
delim_stack.pop();
ign = false;
continue;
}
// catch too many list end
// set alloc_list
if delim == ')' {
alloc_list = true;
if ref_stack.is_empty() {
return Err("too many end parens".to_string());
}
}
delim_stack.pop();
// if we are in a commented out space, skip this char
} else if ign {
continue;
}
}
// try to generalize all whitespace
if !needs_alloc && char::is_whitespace(c) && !is_str {
// dont make empty tokens just because the document has consecutive whitespace
if token.is_empty() {
continue;
}
needs_alloc = true;
}
// match a delimiter
if !needs_alloc {
match c {
// add a new Seg reference to the stack
'(' => {
if is_str {
token.push(c);
continue;
}
if !token.is_empty() {
return Err("list started in middle of another token".to_string());
}
ref_stack.push(Seg::new());
delim_stack.push(')');
}
// begin parsing a string
'"' | '\'' | '`' if !is_str => {
is_str = true;
delim_stack.push(c);
}
// eat the whole line
'#' | ';' => {
ign = true;
delim_stack.push('\n');
}
// escape next char
'\\' => if is_str {
delim_stack.push('*');
}
// add to token
_ => {
token.push(c);
}
}
/* 1. Handle allocation of new Ctr
* 2. Handle expansion of current list ref
*/
} else {
if token.is_empty() && !is_str && !alloc_list {
return Err("Empty token".to_string());
}
let mut return_singlet = false;
let mut current_seg = ref_stack.pop().unwrap_or_else(|| {
return_singlet = true;
Seg::new()
});
let obj;
if is_str {
obj = Box::from(Ctr::String(token));
is_str = false;
token = String::new();
current_seg.append(obj);
} else if !token.is_empty() {
if token == "true" {
obj = Box::from(Ctr::Bool(true));
} else if token == "false" {
obj = Box::from(Ctr::Bool(false));
} else if let Ok(i) = token.parse::() {
obj = Box::from(Ctr::Integer(i));
} else if let Ok(f) = token.parse::() {
obj = Box::from(Ctr::Float(f));
} else if let Some(s) = tok_is_symbol(&token) {
obj = Box::from(Ctr::Symbol(s));
} else {
return Err(format!("Unparsable token: {}", token));
}
token = String::new();
current_seg.append(obj.clone());
}
if alloc_list || return_singlet {
// return if we have finished the document
if ref_stack.is_empty() {
return Ok(Box::new(current_seg));
}
let t = current_seg;
current_seg = ref_stack.pop().unwrap();
current_seg.append(Box::from(Ctr::Seg(t)));
}
ref_stack.push(current_seg);
}
}
if is_str {
Err(UNMATCHED_STR_DELIM.to_string())
} else {
Err(UNMATCHED_LIST_DELIM.to_string())
}
}
/* Returns true if token
* - is all alphanumeric except dash, question, and underscore
* - equals is also allowed but only for shell command compatibility
* else returns false
*/
fn tok_is_symbol(token: &str) -> Option {
for t in token.chars() {
if !t.is_alphanumeric() &&
t != '-' &&
t != '_' &&
t != '?' &&
t != '=' &&
t != '.' &&
t != '/' &&
t != '.'
{
return None;
}
}
Some(String::from(token))
}