- syntax tree datatypes
- prototype lex function - a lex unit test - gitignore - library structure - license
This commit is contained in:
commit
e4f2fbaa70
8 changed files with 995 additions and 0 deletions
132
src/cell.rs
Normal file
132
src/cell.rs
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
/* relish: highly versatile lisp interpreter
|
||||
* Copyright (C) 2021 Aidan Hahn
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
use std::fmt;
|
||||
use std::boxed::Box;
|
||||
|
||||
|
||||
// Container
|
||||
pub enum Ctr {
|
||||
SYMBOL(String),
|
||||
STRING(String),
|
||||
INTEGER(i128),
|
||||
FLOAT(f64),
|
||||
BOOL(bool),
|
||||
CELL(Box<Cell>),
|
||||
None
|
||||
}
|
||||
|
||||
/* Cell
|
||||
* Holds two Containers.
|
||||
* Basic building block for more complex data structures.
|
||||
*/
|
||||
pub struct Cell {
|
||||
/* "Cell Address Register"
|
||||
* Historical way of referring to the first value in a cell.
|
||||
*/
|
||||
pub car: Ctr,
|
||||
|
||||
/* "Cell Decrement Register"
|
||||
* Historical way of referring to the second value in a cell.
|
||||
*/
|
||||
pub cdr: Ctr
|
||||
}
|
||||
|
||||
// creates a cell containing two boxes
|
||||
pub fn cons (l_ctr: Ctr, r_ctr: Ctr) -> Cell {
|
||||
Cell {
|
||||
car: l_ctr,
|
||||
cdr: r_ctr
|
||||
}
|
||||
}
|
||||
|
||||
/* Prints any cell as a string
|
||||
* recurs on CELL type Containers
|
||||
*/
|
||||
pub fn cell_as_string(c: &Cell, with_parens: bool) -> String {
|
||||
let mut string = String::new();
|
||||
match &c.car {
|
||||
Ctr::SYMBOL(s) => string.push_str(&s),
|
||||
Ctr::STRING(s) => {
|
||||
string.push('\'');
|
||||
string.push_str(&s);
|
||||
string.push('\'');
|
||||
},
|
||||
Ctr::INTEGER(i) => string = string + &i.to_string(),
|
||||
Ctr::FLOAT(f) => string = string + &f.to_string(),
|
||||
Ctr::BOOL(b) => string = string + &b.to_string(),
|
||||
Ctr::CELL(c) => string.push_str(cell_as_string(&c, true).as_str()),
|
||||
Ctr::None => string.push_str("nil")
|
||||
}
|
||||
|
||||
string.push(' ');
|
||||
match &c.cdr {
|
||||
Ctr::SYMBOL(s) => string.push_str(&s),
|
||||
Ctr::STRING(s) => {
|
||||
string.push('\'');
|
||||
string.push_str(&s);
|
||||
string.push('\'');
|
||||
},
|
||||
Ctr::INTEGER(i) => string = string + &i.to_string(),
|
||||
Ctr::FLOAT(f) => string = string + &f.to_string(),
|
||||
Ctr::BOOL(b) => string = string + &b.to_string(),
|
||||
Ctr::CELL(c) => string.push_str(cell_as_string(&c, false).as_str()),
|
||||
Ctr::None => string.push_str("nil")
|
||||
}
|
||||
|
||||
// TODO: maybe a better way to do this
|
||||
if with_parens {
|
||||
let mut extra = String::from("(");
|
||||
extra.push_str(&string);
|
||||
extra.push(')');
|
||||
string = extra
|
||||
}
|
||||
return string
|
||||
}
|
||||
|
||||
impl fmt::Display for Cell {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", cell_as_string(self, true).as_str())
|
||||
}
|
||||
}
|
||||
|
||||
/* recurs over a chain of cells
|
||||
* adds obj to chain
|
||||
* not public, only meant for internal use... yet
|
||||
* steals ownership of obj
|
||||
*/
|
||||
pub fn append(c: &mut Cell, obj: Ctr) {
|
||||
match &mut c.car {
|
||||
Ctr::None => {
|
||||
c.car = obj;
|
||||
},
|
||||
_ => {
|
||||
match &mut c.cdr {
|
||||
Ctr::None => {
|
||||
c.cdr = Ctr::CELL(Box::new(Cell{
|
||||
car: obj,
|
||||
cdr: Ctr::None
|
||||
}));
|
||||
},
|
||||
Ctr::CELL(cell) => {
|
||||
append(cell, obj);
|
||||
},
|
||||
_ => ()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
219
src/lex.rs
Normal file
219
src/lex.rs
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
/* relish: highly versatile lisp interpreter
|
||||
* Copyright (C) 2021 Aidan Hahn
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
use std::boxed::Box;
|
||||
use crate::cell::{Ctr, append, Cell};
|
||||
|
||||
const UNMATCHED_STR_DELIM: &str = "Unmatched string delimiter in input";
|
||||
const UNMATCHED_LIST_DELIM: &str = "Unmatched list delimiter in input";
|
||||
|
||||
/* takes a line of user input
|
||||
* returns an unsimplified tree of tokens.
|
||||
*
|
||||
* WARNING: lex and process ONLY SUPPORT ASCII CHARACTERS.
|
||||
* Unicode and other technology where one rune can take multiple indexes
|
||||
* can cause havoc if part of a rune matches a whitespace or other operator
|
||||
*/
|
||||
pub fn lex(document: String) -> Result<Box<Cell>, String> {
|
||||
if !document.is_ascii() {
|
||||
return Err("document may only contain ascii characters".to_string());
|
||||
}
|
||||
|
||||
let tree = process(document);
|
||||
|
||||
// TODO: Make multiple forms of Ok()
|
||||
// To represent the multiple passable outcomes
|
||||
return match tree {
|
||||
Err(e) => Err(format!("Problem lexing document: {:?}", e)),
|
||||
Ok(t) => Ok(t)
|
||||
}
|
||||
}
|
||||
|
||||
/* The logic used in lex
|
||||
* Returns Ok(Box<Cell>) if lexing passes
|
||||
* Returns Err(String) if an error occurs
|
||||
*
|
||||
* WARNING: read docs for lex
|
||||
*/
|
||||
fn process(document: String) -> Result<Box<Cell>, String> {
|
||||
let doc_len = document.len();
|
||||
|
||||
if doc_len == 0 {
|
||||
return Err("Empty document".to_string());
|
||||
}
|
||||
|
||||
/* State variables
|
||||
* TODO: describe all of them
|
||||
*/
|
||||
let mut is_str = false;
|
||||
let mut ign = false;
|
||||
let mut token = String::new();
|
||||
let mut delim_stack = vec![')', ' '];
|
||||
let mut ref_stack = vec![Box::new(Cell{
|
||||
car: Ctr::None,
|
||||
cdr: Ctr::None
|
||||
})];
|
||||
|
||||
/* Iterate over document
|
||||
* Manage currently sought delimiter
|
||||
*/
|
||||
for c in document.chars() {
|
||||
let mut needs_alloc = true;
|
||||
let mut alloc_list = false;
|
||||
let delim = delim_stack.last().unwrap();
|
||||
// case only happens when escaping a char
|
||||
if *delim == '*' {
|
||||
token.push(c);
|
||||
|
||||
// normal delimiter cases
|
||||
} else if c == *delim {
|
||||
// reset comment line status
|
||||
if *delim == '\n' {
|
||||
ign = false
|
||||
}
|
||||
|
||||
// catch too many list end
|
||||
// set alloc_list
|
||||
if *delim == ')' {
|
||||
alloc_list = true;
|
||||
if ref_stack.len() < 1 {
|
||||
return Err("too many end parens".to_string());
|
||||
}
|
||||
}
|
||||
delim_stack.pop();
|
||||
|
||||
// try to generalize all whitespace
|
||||
} else if *delim == ' ' && char::is_whitespace(c) {
|
||||
delim_stack.pop();
|
||||
|
||||
// match a delimiter
|
||||
} else {
|
||||
needs_alloc = false;
|
||||
match c {
|
||||
// add a new Cell reference to the stack
|
||||
'(' => {
|
||||
if token != "" || *(delim_stack.last().unwrap()) != ' ' {
|
||||
return Err("list started in middle of another token".to_string());
|
||||
}
|
||||
|
||||
ref_stack.push(Box::new(Cell{
|
||||
car: Ctr::None,
|
||||
cdr: Ctr::None
|
||||
}));
|
||||
|
||||
delim_stack.push(')');
|
||||
},
|
||||
// begin parsing a string
|
||||
'"' | '\'' | '`' => {
|
||||
is_str = true;
|
||||
delim_stack.push(c);
|
||||
},
|
||||
// eat the whole line
|
||||
'#' => {
|
||||
ign = true;
|
||||
delim_stack.push('\n');
|
||||
},
|
||||
// escape next char
|
||||
'\\' => {
|
||||
delim_stack.push('*');
|
||||
}
|
||||
// add to token
|
||||
_ => {
|
||||
token.push(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ign {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* 1. Handle allocation of new Ctr
|
||||
* 2. Handle expansion of current list ref
|
||||
*/
|
||||
if needs_alloc {
|
||||
if delim_stack.len() == 0 {
|
||||
delim_stack.push(' ');
|
||||
}
|
||||
|
||||
if token.len() == 0 && !is_str && !alloc_list {
|
||||
return Err("Empty token".to_string());
|
||||
}
|
||||
|
||||
let mut current_cell_ref = ref_stack.pop().unwrap();
|
||||
// throws warning (overwritten before read) not sure how to handle
|
||||
let mut obj = Ctr::None;
|
||||
if alloc_list {
|
||||
// we should never hit this but if we do I want to know
|
||||
if token.len() > 0 {
|
||||
return Err("list/token conflict".to_string());
|
||||
}
|
||||
|
||||
// return if we have finished the document
|
||||
if ref_stack.len() == 0 {
|
||||
return Ok(current_cell_ref);
|
||||
}
|
||||
|
||||
obj = Ctr::CELL(Box::new(*current_cell_ref));
|
||||
current_cell_ref = ref_stack.pop().unwrap();
|
||||
|
||||
} else if is_str {
|
||||
obj = Ctr::STRING(token);
|
||||
is_str = false;
|
||||
} else if token == "true" {
|
||||
obj = Ctr::BOOL(true);
|
||||
} else if token == "false" {
|
||||
obj = Ctr::BOOL(false);
|
||||
} else if let Ok(i) = token.parse::<i128>() {
|
||||
obj = Ctr::INTEGER(i);
|
||||
} else if let Ok(f) = token.parse::<f64>() {
|
||||
obj = Ctr::FLOAT(f);
|
||||
} else if let Some(s) = tok_is_symbol(&token) {
|
||||
obj = Ctr::SYMBOL(s);
|
||||
} else {
|
||||
return Err(format!("Unparsable token: {}", token));
|
||||
}
|
||||
|
||||
append(&mut current_cell_ref, obj);
|
||||
|
||||
// reset token
|
||||
token = String::new();
|
||||
}
|
||||
}
|
||||
|
||||
if is_str {
|
||||
return Err(UNMATCHED_STR_DELIM.to_string());
|
||||
}
|
||||
return Err(UNMATCHED_LIST_DELIM.to_string());
|
||||
}
|
||||
|
||||
|
||||
/* Returns true if token
|
||||
* - is all alphanumeric
|
||||
*
|
||||
* else returns false
|
||||
*/
|
||||
fn tok_is_symbol(token: &String) -> Option<String> {
|
||||
let tok = token.as_str();
|
||||
for t in tok.chars() {
|
||||
if !t.is_alphabetic() && !t.is_digit(10) {
|
||||
return None
|
||||
}
|
||||
}
|
||||
|
||||
return Some(String::from(tok))
|
||||
}
|
||||
24
src/lib.rs
Normal file
24
src/lib.rs
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
/* relish: highly versatile lisp interpreter
|
||||
* Copyright (C) 2021 Aidan Hahn
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
mod cell;
|
||||
mod lex;
|
||||
|
||||
pub mod ast {
|
||||
pub use crate::cell::{Cell, Ctr, cons, cell_as_string};
|
||||
pub use crate::lex::{lex};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue