/* relish: highly versatile lisp interpreter * Copyright (C) 2021 Aidan Hahn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ use crate::segment::{Ctr, Seg}; const UNMATCHED_STR_DELIM: &str = "Unmatched string delimiter in input"; const UNMATCHED_LIST_DELIM: &str = "Unmatched list delimiter in input"; /* takes a line of user input * returns an unsimplified tree of tokens. */ pub fn lex(document: &String) -> Result, String> { if !document.is_ascii() { return Err("document may only contain ascii characters".to_string()); } // finish a singlet token, or do nothing let document_normal = document.clone() + " "; let tree = process(&document_normal); // TODO: Make multiple forms of Ok() // To represent the multiple passable outcomes return match tree { Err(e) => Err(format!("Problem lexing document: {:?}", e)), Ok(t) => Ok(t), }; } /* The logic used in lex * Returns Ok(Rc) if lexing passes * Returns Err(String) if an error occurs */ fn process(document: &String) -> Result, String> { let doc_len = document.len(); if doc_len == 0 { return Err("Empty document".to_string()); } /* State variables * TODO: describe all of them */ let mut is_str = false; let mut ign = false; let mut token = String::new(); let mut delim_stack = Vec::new(); let mut ref_stack = vec![]; /* Iterate over document * Manage currently sought delimiter */ for c in document.chars() { let mut needs_alloc = false; let mut alloc_list = false; let delim: char; if let Some(d) = delim_stack.last() { delim = *d; if delim == '*' { token.push(c); delim_stack.pop(); continue; // normal delimiter cases } else if c == delim { needs_alloc = true; // reset comment line status if delim == '\n' { delim_stack.pop(); ign = false; continue; } // catch too many list end // set alloc_list if delim == ')' { alloc_list = true; if ref_stack.is_empty() { return Err("too many end parens".to_string()); } } delim_stack.pop(); // if we are in a commented out space, skip this char } else if ign { continue; } } // try to generalize all whitespace if !needs_alloc && char::is_whitespace(c) && !is_str { // dont make empty tokens just because the document has consecutive whitespace if token.is_empty() { continue; } needs_alloc = true; } // match a delimiter if !needs_alloc { match c { // add a new Seg reference to the stack '(' => { if is_str { token.push(c); continue; } if !token.is_empty() { return Err("list started in middle of another token".to_string()); } ref_stack.push(Seg::new()); delim_stack.push(')'); } // begin parsing a string '"' | '\'' | '`' if !is_str => { is_str = true; delim_stack.push(c); } // eat the whole line '#' | ';' => { ign = true; delim_stack.push('\n'); } // escape next char '\\' => { delim_stack.push('*'); } // add to token _ => { token.push(c); } } /* 1. Handle allocation of new Ctr * 2. Handle expansion of current list ref */ } else { if token.is_empty() && !is_str && !alloc_list { return Err("Empty token".to_string()); } let mut return_singlet = false; let mut current_seg = ref_stack.pop().unwrap_or_else(|| { return_singlet = true; Seg::new() }); let obj; if is_str { obj = Box::from(Ctr::String(token)); is_str = false; token = String::new(); current_seg.append(obj); } else if !token.is_empty() { if token == "true" { obj = Box::from(Ctr::Bool(true)); } else if token == "false" { obj = Box::from(Ctr::Bool(false)); } else if let Ok(i) = token.parse::() { obj = Box::from(Ctr::Integer(i)); } else if let Ok(f) = token.parse::() { obj = Box::from(Ctr::Float(f)); } else if let Some(s) = tok_is_symbol(&token) { obj = Box::from(Ctr::Symbol(s)); } else { return Err(format!("Unparsable token: {}", token)); } token = String::new(); current_seg.append(obj.clone()); } if alloc_list || return_singlet { // return if we have finished the document if ref_stack.is_empty() { return Ok(Box::new(current_seg)); } let t = current_seg; current_seg = ref_stack.pop().unwrap(); current_seg.append(Box::from(Ctr::Seg(t))); } ref_stack.push(current_seg); } } if is_str { Err(UNMATCHED_STR_DELIM.to_string()) } else { Err(UNMATCHED_LIST_DELIM.to_string()) } } /* Returns true if token * - is all alphanumeric except dash, question, and underscore * - equals is also allowed but only for shell command compatibility * else returns false */ fn tok_is_symbol(token: &str) -> Option { for t in token.chars() { if !t.is_alphanumeric() && t != '-' && t != '_' && t != '?' && t != '=' && t != '.' && t != '/' { return None; } } Some(String::from(token)) }