big temp status

Signed-off-by: Ava Hahn <ava@aidanis.online>
This commit is contained in:
Ava Hahn 2023-01-27 17:45:19 -08:00
parent 45453f819f
commit 5261efbc65
Signed by untrusted user who does not match committer: affine
GPG key ID: 3A4645B8CF806069
12 changed files with 960 additions and 224 deletions

11
the_rewrite/Cargo.toml Normal file
View file

@ -0,0 +1,11 @@
[package]
name = "relish"
version = "0.1.0"
authors = ["Aidan <aidan@aidanis.online>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
dirs = "3.0"
rustyline = "8.2.0"

221
the_rewrite/src/lex.rs Normal file
View file

@ -0,0 +1,221 @@
/* relish: highly versatile lisp interpreter
* Copyright (C) 2021 Aidan Hahn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use crate::segment::{Ctr, Seg};
const UNMATCHED_STR_DELIM: &str = "Unmatched string delimiter in input";
const UNMATCHED_LIST_DELIM: &str = "Unmatched list delimiter in input";
/* takes a line of user input
* returns an unsimplified tree of tokens.
*/
pub fn lex<'a>(document: &'a String) -> Result<Box<Seg>, String> {
if !document.is_ascii() {
return Err("document may only contain ascii characters".to_string());
}
let tree = process(document);
// TODO: Make multiple forms of Ok()
// To represent the multiple passable outcomes
return match tree {
Err(e) => Err(format!("Problem lexing document: {:?}", e)),
Ok(t) => Ok(t),
};
}
/* The logic used in lex
* Returns Ok(Rc<Seg>) if lexing passes
* Returns Err(String) if an error occurs
*/
fn process<'a>(document: &'a String) -> Result<Box<Seg>, String> {
let doc_len = document.len();
if doc_len == 0 {
return Err("Empty document".to_string());
}
/* State variables
* TODO: describe all of them
*/
let mut is_str = false;
let mut ign = false;
let mut token = String::new();
let mut delim_stack = Vec::new();
let mut ref_stack = vec![];
/* Iterate over document
* Manage currently sought delimiter
*/
for c in document.chars() {
let mut needs_alloc = false;
let mut alloc_list = false;
let delim: char;
if let Some(d) = delim_stack.last() {
delim = *d;
if delim == '*' {
token.push(c);
delim_stack.pop();
continue;
// normal delimiter cases
} else if c == delim {
needs_alloc = true;
// reset comment line status
if delim == '\n' {
delim_stack.pop();
ign = false;
continue;
}
// catch too many list end
// set alloc_list
if delim == ')' {
alloc_list = true;
if ref_stack.len() < 1 {
return Err("too many end parens".to_string());
}
}
delim_stack.pop();
// if we are in a commented out space, skip this char
} else if ign {
continue;
}
}
// try to generalize all whitespace
if !needs_alloc && char::is_whitespace(c) && !is_str {
// dont make empty tokens just because the document has consecutive whitespace
if token.len() == 0 {
continue;
}
needs_alloc = true;
}
// match a delimiter
if !needs_alloc {
match c {
// add a new Seg reference to the stack
'(' => {
if is_str {
token.push(c);
continue;
}
if token != "" {
return Err("list started in middle of another token".to_string());
}
ref_stack.push(Seg::new());
delim_stack.push(')');
}
// begin parsing a string
'"' | '\'' | '`' => {
is_str = true;
delim_stack.push(c);
}
// eat the whole line
'#' => {
ign = true;
delim_stack.push('\n');
}
// escape next char
'\\' => {
delim_stack.push('*');
}
// add to token
_ => {
token.push(c);
}
}
/* 1. Handle allocation of new Ctr
* 2. Handle expansion of current list ref
*/
} else {
if token.len() == 0 && !is_str && !alloc_list {
return Err("Empty token".to_string());
}
let mut current_seg = ref_stack.pop().unwrap();
let obj;
if is_str {
obj = Box::from(Ctr::String(token));
is_str = false;
token = String::new();
current_seg.append(obj);
} else if token.len() > 0 {
if token == "true" {
obj = Box::from(Ctr::Bool(true));
} else if token == "false" {
obj = Box::from(Ctr::Bool(false));
} else if let Ok(i) = token.parse::<i128>() {
obj = Box::from(Ctr::Integer(i));
} else if let Ok(f) = token.parse::<f64>() {
obj = Box::from(Ctr::Float(f));
} else if let Some(s) = tok_is_symbol(&token) {
obj = Box::from(Ctr::Symbol(s));
} else {
return Err(format!("Unparsable token: {}", token));
}
token = String::new();
current_seg.append(obj.clone());
}
if alloc_list {
// return if we have finished the document
if ref_stack.len() == 0 {
return Ok(Box::new(current_seg));
}
let t = current_seg;
current_seg = ref_stack.pop().unwrap();
/* TODO: is there a way to do this that doesnt
* involve needlessly copying heap data? I am
* not sure what optimizations rustc performs
* but I assume this should not end up copying
* contained segments around.
*/
current_seg.append(Box::from(Ctr::Seg(t)));
}
ref_stack.push(current_seg);
}
}
if is_str {
return Err(UNMATCHED_STR_DELIM.to_string());
}
return Err(UNMATCHED_LIST_DELIM.to_string());
}
/* Returns true if token
* - is all alphanumeric except dash and underscore
*
* else returns false
*/
fn tok_is_symbol(token: &String) -> Option<String> {
let tok = token.as_str();
for t in tok.chars() {
if !t.is_alphabetic() && !t.is_digit(10) && !(t == '-') && !(t == '_') {
return None;
}
}
return Some(String::from(tok));
}

54
the_rewrite/src/lib.rs Normal file
View file

@ -0,0 +1,54 @@
/* relish: highly versatile lisp interpreter
* Copyright (C) 2021 Aidan Hahn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#![feature(derive_default_enum)]
#![feature(box_into_inner)]
/*mod append;
mod config;
mod eval;
mod func;*/
mod lex;
mod segment;
/*mod stl;
mod str;
mod vars;*/
pub mod ast {
// pub use crate::eval::eval;
// pub use crate::func::{
// func_call, func_declare, Args, ExternalOperation, FTable, Function, Operation,
// };
pub use crate::lex::lex;
pub use crate::segment::{Ctr, Seg, Type};
// pub use crate::vars::{define, VTable};
}
mod test {
}
/*pub mod stdlib {
pub use crate::append::get_append;
pub use crate::stl::get_stdlib;
pub use crate::str::{get_concat, get_echo};
pub use crate::vars::get_export;
}*/
/*pub mod aux {
pub use crate::config::configure;
}*/

236
the_rewrite/src/segment.rs Normal file
View file

@ -0,0 +1,236 @@
/* relish: versatile lisp shell
* Copyright (C) 2021 Aidan Hahn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use std::fmt;
use std::marker::PhantomData;
// Container
#[derive(Debug, Default)]
pub enum Ctr<'a> {
Symbol(String),
String(String),
Integer(i128),
Float(f64),
Bool(bool),
Seg(Seg<'a>),
#[default]
None,
}
// Type of Container
#[derive(PartialEq, Clone)]
pub enum Type {
Symbol,
String,
Integer,
Float,
Bool,
Seg,
None,
}
/* Segment
* Holds two Containers.
* Basic building block for more complex data structures.
* I was going to call it Cell and then I learned about
* how important RefCells were in Rust
*/
#[derive(Debug)]
pub struct Seg<'a> {
/* "Contents of Address Register"
* Historical way of referring to the first value in a cell.
*/
pub car: Box<Ctr<'a>>,
/* "Contents of Decrement Register"
* Historical way of referring to the second value in a cell.
*/
pub cdr: Box<Ctr<'a>>,
/* Stupid hack that makes rust look foolish.
* Needed to determine variance of lifetime.
* How this is an acceptable solution I have
* not a single clue.
*/
_lifetime_variance_determinant: PhantomData<&'a ()>
}
impl Ctr<'_> {
pub fn to_type(&self) -> Type {
match self {
Ctr::Symbol(_s) => Type::Symbol,
Ctr::String(_s) => Type::String,
Ctr::Integer(_s) => Type::Integer,
Ctr::Float(_s) => Type::Float,
Ctr::Bool(_s) => Type::Bool,
Ctr::Seg(_s) => Type::Seg,
Ctr::None => Type::None,
}
}
}
impl<'a> Seg<'a> {
pub fn new() -> Seg<'a> {
return Seg{
car: Box::new(Ctr::None),
cdr: Box::new(Ctr::None),
_lifetime_variance_determinant: PhantomData,
}
}
pub fn from(arg: Box<Ctr<'a>>) -> Seg<'a> {
return Seg{
car: arg,
cdr: Box::new(Ctr::None),
_lifetime_variance_determinant: PhantomData,
}
}
/* applies a function across a list in standard form
* function must take a Ctr and return a bool
* short circuits on the first false returned.
* also returns false on a non standard form list
*/
pub fn circuit<F: FnMut(&Ctr) -> bool>(&self, func: &mut F) -> bool {
if func(&self.car) {
match &*(self.cdr) {
Ctr::None => true,
Ctr::Seg(l) => l.circuit(func),
_ => false,
}
} else {
false
}
}
/* recurs over ast assumed to be list in standard form
* returns length
*/
pub fn len(&self) -> u128 {
let mut len = 0;
self.circuit(&mut |_c: &Ctr| -> bool { len += 1; true });
len
}
/* recurs over tree assumed to be list in standard form
* appends object to end of list
*
* TODO: figure out how not to call CLONE on a CTR via obj arg
* TODO: return result
*/
pub fn append<'b>(&mut self, obj: Box<Ctr<'a>>) {
if let Ctr::None = &*(self.car) {
self.car = obj;
return
}
if let Ctr::Seg(s) = &mut *(self.cdr) {
s.append(obj);
return
}
if let Ctr::None = &mut *(self.cdr) {
self.cdr = Box::new(Ctr::Seg(Seg::from(obj)));
// pray for memory lost to the void
}
}
}
fn seg_to_string(s: &Seg, parens: bool) -> String {
let mut string = String::new();
if parens { string.push('('); }
match *(s.car) {
Ctr::None => string.push_str("<nil>"),
_ => string.push_str(&s.car.to_string()),
}
string.push(' ');
match &*(s.cdr) {
Ctr::Seg(inner) => string.push_str(&seg_to_string(&inner, false)),
Ctr::None => {string.pop();},
_ => string.push_str(&s.cdr.to_string()),
}
if parens { string.push(')'); }
string
}
impl<'a> Clone for Seg<'a> {
fn clone(&self) -> Seg<'a> {
return Seg{
car: self.car.clone(),
cdr: self.cdr.clone(),
_lifetime_variance_determinant: PhantomData,
}
}
}
impl<'a> Clone for Ctr<'a> {
fn clone(&self) -> Ctr<'a> {
match self {
Ctr::Symbol(s) => Ctr::Symbol(s.clone()),
Ctr::String(s) => Ctr::String(s.clone()),
Ctr::Integer(s) => Ctr::Integer(s.clone()),
Ctr::Float(s) => Ctr::Float(s.clone()),
Ctr::Bool(s) => Ctr::Bool(s.clone()),
Ctr::Seg(s) => Ctr::Seg(s.clone()),
Ctr::None => Ctr::None,
}
}
}
impl fmt::Display for Ctr<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Ctr::Symbol(s) => write!(f, "{}", s),
Ctr::String(s) => write!(f, "\'{}\'", s),
Ctr::Integer(s) => write!(f, "{}", s),
Ctr::Float(s) => write!(f, "{}", s),
Ctr::Bool(s) => {
if *s {
write!(f, "T")
} else {
write!(f, "F")
}
},
Ctr::Seg(s) => write!(f, "{}", s),
Ctr::None => Ok(()),
}
}
}
impl fmt::Display for Seg<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", seg_to_string(self, true))
}
}
impl Type {
pub fn to_string(&self) -> String {
let ret: &str;
match self {
Type::Symbol => ret = "symbol",
Type::String => ret = "string",
Type::Integer => ret = "integer",
Type::Float => ret = "float",
Type::Bool => ret = "bool",
Type::Seg => ret = "segment",
Type::None => ret = "none",
}
ret.to_owned()
}
}

View file

@ -0,0 +1,178 @@
mod lex_tests {
use relish::ast::lex;
#[test]
fn test_lex_basic_pair() {
let document = String::from("(hello 'world')");
match lex(&document) {
Ok(tree) => {
assert_eq!(tree.to_string(), document);
}
Err(s) => {
print!("{}\n", s);
assert!(false);
}
}
}
#[test]
fn test_lex_basic_list() {
let document = String::from("(hello 'world' 1 2 3)");
match lex(&document) {
Ok(tree) => {
assert_eq!(tree.to_string(), document);
}
Err(s) => {
print!("{}\n", s);
assert!(false);
}
}
}
#[test]
fn test_lex_complex_list() {
let document = String::from("(hello 'world' (1 2 (1 2 3)) 1 2 3)");
match lex(&document) {
Ok(tree) => {
assert_eq!(tree.to_string(), document);
}
Err(s) => {
print!("{}\n", s);
assert!(false);
}
}
}
#[test]
fn test_bad_symbol() {
let document = String::from("(as;dd)");
let output: &str = "Problem lexing document: \"Unparsable token: as;dd\"";
match lex(&document) {
Ok(tree) => {
print!("Bad token yielded: {}\n", tree.to_string());
assert!(false);
}
Err(s) => {
assert_eq!(s, output);
}
}
}
#[test]
fn test_list_delim_in_str() {
let document = String::from("('(')");
match lex(&document) {
Ok(tree) => {
assert_eq!(tree.to_string(), document);
}
Err(s) => {
print!("{}\n", s);
assert!(false);
}
}
}
#[test]
fn test_empty_string() {
let document = String::from("('')");
match lex(&document) {
Ok(tree) => {
assert_eq!(tree.to_string(), document);
}
Err(s) => {
print!("{}\n", s);
assert!(false);
}
}
}
#[test]
fn test_unmatched_list_delim_flat() {
let document = String::from("(one two");
let output: &str = "Problem lexing document: \"Unmatched list delimiter in input\"";
match lex(&document) {
Ok(tree) => {
print!("Bad token yielded: {}\n", tree.to_string());
assert!(false);
}
Err(s) => {
assert_eq!(s, output);
}
}
}
#[test]
fn test_unmatched_list_delim_complex() {
let document = String::from("(one two (three)");
let output: &str = "Problem lexing document: \"Unmatched list delimiter in input\"";
match lex(&document) {
Ok(tree) => {
print!("Bad token yielded: {}\n", tree);
assert!(false);
}
Err(s) => {
assert_eq!(s, output);
}
}
}
#[test]
fn test_comment() {
let document = String::from("#!/bin/relish\n(one two)");
let output: &str = "(one two)";
match lex(&document) {
Ok(tree) => {
assert_eq!(tree.to_string(), output);
}
Err(s) => {
print!("{}\n", s);
assert!(false);
}
}
}
#[test]
fn test_postline_comment() {
let document = String::from("#!/bin/relish\n((one two)# another doc comment\n(three four))");
let output: &str = "((one two) (three four))";
match lex(&document) {
Ok(tree) => {
assert_eq!(tree.to_string(), output.to_string());
}
Err(s) => {
print!("{}\n", s);
assert!(false);
}
}
}
#[test]
fn test_inline_comment() {
let document = String::from("#!/bin/relish\n((one two)\n# another doc comment\nthree)");
let output: &str = "((one two) three)";
match lex(&document) {
Ok(tree) => {
assert_eq!(tree.to_string(), output.to_string());
}
Err(s) => {
print!("{}\n", s);
assert!(false);
}
}
}
#[test]
fn test_bad_token_list() {
let document = String::from("(one t(wo)");
let output: &str = "Problem lexing document: \"list started in middle of another token\"";
match lex(&document) {
Ok(tree) => {
print!("Bad token yielded: {}\n", tree);
assert!(false);
}
Err(s) => {
assert_eq!(s, output);
}
}
}
}