/* relish: highly versatile lisp interpreter * Copyright (C) 2021 Aidan Hahn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ use std::boxed::Box; use crate::cell::{Ctr, append, Cell}; const UNMATCHED_STR_DELIM: &str = "Unmatched string delimiter in input"; const UNMATCHED_LIST_DELIM: &str = "Unmatched list delimiter in input"; /* takes a line of user input * returns an unsimplified tree of tokens. * * WARNING: lex and process ONLY SUPPORT ASCII CHARACTERS. * Unicode and other technology where one rune can take multiple indexes * can cause havoc if part of a rune matches a whitespace or other operator */ pub fn lex(document: String) -> Result, String> { if !document.is_ascii() { return Err("document may only contain ascii characters".to_string()); } let tree = process(document); // TODO: Make multiple forms of Ok() // To represent the multiple passable outcomes return match tree { Err(e) => Err(format!("Problem lexing document: {:?}", e)), Ok(t) => Ok(t) } } /* The logic used in lex * Returns Ok(Box) if lexing passes * Returns Err(String) if an error occurs * * WARNING: read docs for lex */ fn process(document: String) -> Result, String> { let doc_len = document.len(); if doc_len == 0 { return Err("Empty document".to_string()); } /* State variables * TODO: describe all of them */ let mut is_str = false; let mut ign = false; let mut token = String::new(); let mut delim_stack = vec![')', ' ']; let mut ref_stack = vec![Box::new(Cell{ car: Ctr::None, cdr: Ctr::None })]; /* Iterate over document * Manage currently sought delimiter */ for c in document.chars() { let mut needs_alloc = true; let mut alloc_list = false; let delim = delim_stack.last().unwrap(); // case only happens when escaping a char if *delim == '*' { token.push(c); // normal delimiter cases } else if c == *delim { // reset comment line status if *delim == '\n' { ign = false } // catch too many list end // set alloc_list if *delim == ')' { alloc_list = true; if ref_stack.len() < 1 { return Err("too many end parens".to_string()); } } delim_stack.pop(); // try to generalize all whitespace } else if *delim == ' ' && char::is_whitespace(c) { delim_stack.pop(); // match a delimiter } else { needs_alloc = false; match c { // add a new Cell reference to the stack '(' => { if token != "" || *(delim_stack.last().unwrap()) != ' ' { return Err("list started in middle of another token".to_string()); } ref_stack.push(Box::new(Cell{ car: Ctr::None, cdr: Ctr::None })); delim_stack.push(')'); }, // begin parsing a string '"' | '\'' | '`' => { is_str = true; delim_stack.push(c); }, // eat the whole line '#' => { ign = true; delim_stack.push('\n'); }, // escape next char '\\' => { delim_stack.push('*'); } // add to token _ => { token.push(c) } } } if ign { continue; } /* 1. Handle allocation of new Ctr * 2. Handle expansion of current list ref */ if needs_alloc { if delim_stack.len() == 0 { delim_stack.push(' '); } if token.len() == 0 && !is_str && !alloc_list { return Err("Empty token".to_string()); } let mut current_cell_ref = ref_stack.pop().unwrap(); // throws warning (overwritten before read) not sure how to handle let mut obj = Ctr::None; if alloc_list { // we should never hit this but if we do I want to know if token.len() > 0 { return Err("list/token conflict".to_string()); } // return if we have finished the document if ref_stack.len() == 0 { return Ok(current_cell_ref); } obj = Ctr::CELL(Box::new(*current_cell_ref)); current_cell_ref = ref_stack.pop().unwrap(); } else if is_str { obj = Ctr::STRING(token); is_str = false; } else if token == "true" { obj = Ctr::BOOL(true); } else if token == "false" { obj = Ctr::BOOL(false); } else if let Ok(i) = token.parse::() { obj = Ctr::INTEGER(i); } else if let Ok(f) = token.parse::() { obj = Ctr::FLOAT(f); } else if let Some(s) = tok_is_symbol(&token) { obj = Ctr::SYMBOL(s); } else { return Err(format!("Unparsable token: {}", token)); } append(&mut current_cell_ref, obj); // reset token token = String::new(); } } if is_str { return Err(UNMATCHED_STR_DELIM.to_string()); } return Err(UNMATCHED_LIST_DELIM.to_string()); } /* Returns true if token * - is all alphanumeric * * else returns false */ fn tok_is_symbol(token: &String) -> Option { let tok = token.as_str(); for t in tok.chars() { if !t.is_alphabetic() && !t.is_digit(10) { return None } } return Some(String::from(tok)) }