diff --git a/Readme.org b/Readme.org index 23fbfe4..8164e0f 100644 --- a/Readme.org +++ b/Readme.org @@ -15,64 +15,6 @@ https://gitlab.com/whom/shs - To be well tested code - No unsafe code without extreme consideration and rigorous containment -* Current Status / TODO list -*** DONE Core interpreter stuffs -**** DONE Lexing -**** DONE Parsing -**** DONE Evaluation -**** DONE Function table -**** DONE Variable table -*** DONE Echo function -In string lib -*** TODO Rudimentary Control Flow -**** TODO if clause -**** TODO loop clause -**** TODO while clause -**** TODO circuit clause -*** TODO Configuration -**** DONE Function to load configuration into Variable and Function tables -**** DONE Configure in main shell -**** DONE manual verification of config settings -**** DONE manual verification of config defaults -*** TODO Help function -*** TODO Env function -*** TODO User variable declaration -*** TODO User function declaration -*** TODO Load (load a script) function -Pull/Refactor the logic out of the configure functions. -Optionally return a list of new variables and/or functions? -Will need a concatenate function for func tables -*** TODO Main shell calls Load function on arg and exits -*** TODO Shell module -**** TODO Process launching with environment variables -**** TODO Foreground process TTY -**** TODO Background processes -*** TODO Custom error printing -*** TODO Custom ast pretty print -*** TODO Implement Ctr, Ast to_string / Display trait -*** TODO get_stdlibphase1 -> configuration -> get_stdlibphase2 -*** TODO STDLIB -**** DONE append -**** TODO string operations -***** DONE concatenate -***** TODO substr by index -***** TODO tokenize by delimiter -***** TODO sprintf / string build -**** TODO arithmetic operations -***** TODO -**** TODO Serialize/Deserialize basic data types -**** TODO file opterations -***** TODO -**** TODO Network library -***** TODO HTTP Client -***** TODO TCP Stream client -***** TODO UDP Client -***** TODO TCP Listener -***** TODO HTTP Listener -***** TODO UDP Listener -*** TODO Ensure full test coverage - - * Contact - Matrix chat: #vomitorium:matrix.sunnypup.io https://matrix.to/#/#vomitorium:matrix.sunnypup.io @@ -154,3 +96,77 @@ You may choose to override this function if you would like to include your own s *** bin: file:src/bin/ This contains any executable target of this project. Notably the main shell file:src/bin/main.rs. + + +* Current Status / TODO list +Note: this section will not show the status of each item unless you are viewing it with a proper orgmode viewer +*** DONE Core interpreter stuffs +**** DONE Lexing +**** DONE Parsing +**** DONE Evaluation +**** DONE Function table +**** DONE Variable table +*** DONE Echo function +*** TODO Redo segment.rs +**** DONE Clone impl for Ctr +(derived) +**** DONE Derive Clone for Seg +**** DONE ToString impl for Ctr +**** DONE ToString impl for Seg +**** DONE Display, Debug impls for Ctr +**** DONE Derive Display, Debug for Seg +**** DONE Default impl for Ctr +**** DONE Derive Default for Seg +**** WONTDO From/Into for Ctr +**** DONE Iterator for Seg +**** TODO YEET AST EVERYWHERE. PASS AROUND A FUCKING SEG REF DAMNIT +In string lib +*** TODO Rudimentary Control Flow +**** TODO if clause +**** TODO loop clause +**** TODO while clause +**** TODO circuit clause +*** TODO Configuration +**** DONE Function to load configuration into Variable and Function tables +**** DONE Configure in main shell +**** DONE manual verification of config settings +**** DONE manual verification of config defaults +*** TODO Help function +*** TODO Env function +*** TODO User variable declaration +*** TODO User function declaration +*** TODO Load (load a script) function +Pull/Refactor the logic out of the configure functions. +Optionally return a list of new variables and/or functions? +Will need a concatenate function for func tables +*** TODO Main shell calls Load function on arg and exits +*** TODO Shell module +**** TODO Process launching with environment variables +**** TODO Foreground process TTY +**** TODO Background processes +*** TODO Custom error printing +*** TODO Custom ast pretty print +*** TODO Implement Ctr, Ast to_string / Display trait +*** TODO get_stdlibphase1 -> configuration -> get_stdlibphase2 +*** TODO STDLIB +**** DONE append +**** TODO string operations +***** DONE concatenate +***** TODO substr by index +***** TODO tokenize by delimiter +***** TODO sprintf / string build +**** TODO arithmetic operations +***** TODO +**** TODO Serialize/Deserialize basic data types +**** TODO file opterations +***** TODO +**** TODO Network library +***** TODO HTTP Client +***** TODO TCP Stream client +***** TODO UDP Client +***** TODO TCP Listener +***** TODO HTTP Listener +***** TODO UDP Listener +*** TODO Ensure full test coverage + +THE GOOD STUFF IS IN Repositories/temprl diff --git a/src/control.rs b/src/control.rs new file mode 100644 index 0000000..9032397 --- /dev/null +++ b/src/control.rs @@ -0,0 +1,40 @@ +/* relish: versatile lisp shell + * Copyright (C) 2021 Aidan Hahn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +use crate::append::get_append; +use crate::func::{func_declare, FTable, Ast}; +use crate::segment::Ctr; +use crate::str::{get_concat, get_echo}; +use crate::vars::{get_export, VTable}; +use std::cell::RefCell; +use std::rc::Rc; + +pub fn get_if() -> Function { + return Function { + name: String::from("if"), + loose_syms: false, + eval_lazy: true, + args: Args::Lazy(-1), + function: Operation::Internal( + Box::new(|args: Ast, vars: Rc>, funcs: Rc>| -> Ctr { + // Either 2 long or 3 long. + // arg 1 must eval to a bool + // then eval arg 2 or 3 + }) + ), + }; +} diff --git a/src/lex.rs b/src/lex.rs index e01f760..87700c8 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -15,7 +15,7 @@ * along with this program. If not, see . */ -use crate::segment::{list_append, new_ast, Ast, Ctr}; +use crate::segment::{list_append, Ctr, Seg}; const UNMATCHED_STR_DELIM: &str = "Unmatched string delimiter in input"; const UNMATCHED_LIST_DELIM: &str = "Unmatched list delimiter in input"; @@ -23,7 +23,7 @@ const UNMATCHED_LIST_DELIM: &str = "Unmatched list delimiter in input"; /* takes a line of user input * returns an unsimplified tree of tokens. */ -pub fn lex(document: String) -> Result { +pub fn lex<'a>(document: String) -> Result>, String> { if !document.is_ascii() { return Err("document may only contain ascii characters".to_string()); } @@ -42,7 +42,7 @@ pub fn lex(document: String) -> Result { * Returns Ok(Rc) if lexing passes * Returns Err(String) if an error occurs */ -fn process(document: String) -> Result { +fn process<'a>(document: &'a String) -> Result>, String> { let doc_len = document.len(); if doc_len == 0 { @@ -120,7 +120,7 @@ fn process(document: String) -> Result { return Err("list started in middle of another token".to_string()); } - ref_stack.push(new_ast(Ctr::None, Ctr::None)); + ref_stack.push(Box::new(Seg::new())); delim_stack.push(')'); } @@ -152,13 +152,12 @@ fn process(document: String) -> Result { return Err("Empty token".to_string()); } - let mut current_seg_ref = ref_stack.pop().unwrap(); + let mut current_seg = ref_stack.pop(); let mut obj; if is_str { obj = Ctr::String(token); is_str = false; token = String::new(); - list_append(current_seg_ref.clone(), obj); } else if token.len() > 0 { if token == "true" { obj = Ctr::Bool(true); @@ -175,22 +174,23 @@ fn process(document: String) -> Result { } token = String::new(); - list_append(current_seg_ref.clone(), obj); } + list_append(current_seg, obj); + if alloc_list { // return if we have finished the document if ref_stack.len() == 0 { - return Ok(current_seg_ref); + return Ok(current_seg); } // shortening this will lead to naught but pain - obj = Ctr::Seg(current_seg_ref.clone()); - current_seg_ref = ref_stack.pop().unwrap(); - list_append(current_seg_ref.clone(), obj); + obj = Ctr::Seg(current_seg.into_raw()); + current_seg = ref_stack.pop(); + list_append(current_seg, obj); } - ref_stack.push(current_seg_ref); + ref_stack.push(current_seg); } } diff --git a/src/lib.rs b/src/lib.rs index ed3f5a3..6a48eef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,8 @@ * along with this program. If not, see . */ +#![feature(derive_default_enum)] + mod append; mod config; mod eval; @@ -31,7 +33,7 @@ pub mod ast { func_call, func_declare, Args, ExternalOperation, FTable, Function, Operation, }; pub use crate::lex::lex; - pub use crate::segment::{ast_to_string, new_ast, Ast, Ctr, Seg, Type}; + pub use crate::segment::{Ctr, Seg, Type}; pub use crate::vars::{define, VTable}; } diff --git a/src/segment.rs b/src/segment.rs index 9486b56..3535aa0 100644 --- a/src/segment.rs +++ b/src/segment.rs @@ -14,22 +14,19 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ +use std::fmt; -use std::cell::RefCell; -use std::rc::Rc; - -// Recursive data type for a tree of Segments -pub type Ast = Rc>; // Container -#[derive(Clone, Debug)] -pub enum Ctr { +#[derive(Debug, Clone, Default)] +pub enum Ctr <'a> { Symbol(String), String(String), Integer(i128), Float(f64), Bool(bool), - Seg(Ast), + Seg(Seg<'a>), + #[default] None, } @@ -48,23 +45,21 @@ pub enum Type { /* Segment * Holds two Containers. * Basic building block for more complex data structures. - * I was going to call it Cell and then I learned about - * how important RefCells were in Rust */ -#[derive(Clone, Debug)] -pub struct Seg { +#[derive(Clone, Debug, Default)] +pub struct Seg <'a> { /* "Contents of Address Register" * Historical way of referring to the first value in a cell. */ - pub car: Ctr, + pub car: &mut Ctr<'a>, /* "Contents of Decrement Register" * Historical way of referring to the second value in a cell. */ - pub cdr: Ctr, + pub cdr: &mut Ctr<'a>, } -impl Ctr { +impl Ctr<'_> { pub fn to_type(&self) -> Type { match self { Ctr::Symbol(_s) => Type::Symbol, @@ -76,10 +71,65 @@ impl Ctr { Ctr::None => Type::None, } } + +} + +fn seg_to_string(s: &Seg, parens: bool) -> String { + let mut string = String::new(); + match s.car { + Ctr::None => string.push_str(""), + _ => string.push_str(s.car), + } + string.push(' '); + match s.cdr { + Ctr::Seg(inner) => string.push_str(seg_to_string(inner, false)), + Ctr::None => {}, + _ => string.push_str(s.cdr), + } + + if parens { + String::from("(" + string + ")") + } +} + +impl fmt::Display for Ctr <'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Ctr::Symbol(s) => write!(f, "{}", s), + Ctr::String(s) => write!(f, "\'{}\'", s), + Ctr::Integer(s) => write!(f, "{}", s), + Ctr::Float(s) => write!(f, "{}", s), + Ctr::Bool(s) => { + if s { + write!(f, "T") + } else { + write!(f, "F") + } + }, + Ctr::Seg(s) => write!(f, "{}", s), + Ctr::None => Ok(), + } + } +} + +impl fmt::Display for Seg<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", seg_to_string(self, true)) + } +} + +impl Iterator for Seg<'_> { + fn next(&self) -> Option<&Seg> { + if let Ctr::Seg(s) = self.cdr { + Ok(s) + } else { + None() + } + } } impl Type { - pub fn to_str(&self) -> String { + pub fn to_string(&self) -> String { let ret: &str; match self { Type::Symbol => ret = "symbol", @@ -95,84 +145,16 @@ impl Type { } } -/* Prints a Syntax Tree as a string - */ -pub fn ast_as_string(c: Ast, with_parens: bool) -> String { - let mut string = String::new(); - let mut prn_space = true; - let seg = c.borrow(); - match &seg.car { - Ctr::Symbol(s) => string.push_str(&s), - Ctr::String(s) => { - string.push('\''); - string.push_str(&s); - string.push('\''); - } - Ctr::Integer(i) => string = string + &i.to_string(), - Ctr::Float(f) => string = string + &f.to_string(), - Ctr::Bool(b) => string = string + &b.to_string(), - Ctr::Seg(c) => string.push_str(ast_as_string(c.clone(), true).as_str()), - Ctr::None => prn_space = false, - } - - if prn_space { - string.push(' '); - } - - match &seg.cdr { - Ctr::Symbol(s) => string.push_str(&s), - Ctr::String(s) => { - string.push('\''); - string.push_str(&s); - string.push('\''); - } - Ctr::Integer(i) => string = string + &i.to_string(), - Ctr::Float(f) => string = string + &f.to_string(), - Ctr::Bool(b) => string = string + &b.to_string(), - Ctr::Seg(c) => string.push_str(ast_as_string(c.clone(), false).as_str()), - Ctr::None => { - if prn_space { - string.pop(); - } - } - } - - // TODO: maybe a better way to do this - if with_parens { - let mut extra = String::from("("); - extra.push_str(&string); - extra.push(')'); - string = extra - } - return string; -} - -pub fn ast_to_string(c: Ast) -> String { - ast_as_string(c.clone(), true) -} - -/* NOTE: "Standard form" is used here to refer to a list of segments - * that resembles a typical linked list. This means that Car may hold whatever, - * but Cdr must either be Seg or None. - */ - -/* Initializes a new ast node with segment car and cdr passed in - */ -pub fn new_ast(car: Ctr, cdr: Ctr) -> Ast { - Rc::new(RefCell::new(Seg { car: car, cdr: cdr })) -} - /* applies a function across a list in standard form * function must take a Ctr and return a bool * short circuits on the first false returned. * also returns false on a non standard form list */ -pub fn circuit bool>(tree: Ast, func: &mut F) -> bool { - let inner = tree.borrow(); - if func(&inner.car) { - match &inner.cdr { +pub fn circuit(list: &Seg, func: &mut F) -> bool { + if func(&list.car) { + match list.cdr { Ctr::None => true, - Ctr::Seg(c) => circuit(c.clone(), func), + Ctr::Seg(l) => circuit(l, func), _ => false, } } else { @@ -183,55 +165,46 @@ pub fn circuit bool>(tree: Ast, func: &mut F) -> bool { /* recurs over ast assumed to be list in standard form * returns length */ -pub fn list_len(tree: Ast) -> u128 { - match &tree.borrow().cdr { - Ctr::Seg(c) => list_len(c.clone()) + 1, - _ => 1, - } +pub fn list_len(list: &Seg) -> u128 { + let mut len = 0; + circuit(list, &circuit(&mut |c: &Ctr| -> bool { len += 1; true })) } /* recurs over tree assumed to be list in standard form * returns clone of ctr at index provided + * + * TODO: return result (or option?) */ -pub fn list_idx(tree: Ast, idx: u128) -> Ctr { - let inner = tree.borrow(); +pub fn list_idx<'a>(list: &Seg, idx: u128) -> Ctr<'a> { if idx > 0 { - match &inner.cdr { - Ctr::None => Ctr::None, - Ctr::Seg(c) => list_idx(c.clone(), idx - 1), - _ => { - if idx == 1 { - inner.cdr.clone() - } else { - Ctr::None - } - } + if let Ctr::Seg(s) = list.car { + list_idx(s, idx - 1) + } else if idx == 1 { + list.cdr + } else { + Ctr::None } } else { - match inner.car { - Ctr::None => Ctr::None, - _ => inner.car.clone(), - } + list.car } } /* recurs over tree assumed to be list in standard form * appends object to end of list + * + * TODO: return result */ -pub fn list_append(tree: Ast, obj: Ctr) { - let mut inner = tree.borrow_mut(); - match &inner.car { - Ctr::None => { - inner.car = obj; - } - _ => match &inner.cdr { - Ctr::None => { - inner.cdr = Ctr::Seg(new_ast(obj, Ctr::None)); - } - Ctr::Seg(tr) => { - list_append(tr.clone(), obj); - } - _ => (), - }, +pub fn list_append<'a>(list: &Seg, obj: Ctr) { + if let Ctr::None = list.car { + list.car = obj; + return + } + + if let Ctr::Seg(s) = list.cdr { + list_append(s, obj) + } + + if let Ctr::None = list.cdr { + list.cdr = Ctr::Seg(&Seg{car:obj, cdr:Ctr::None}) } } diff --git a/src/stl.rs b/src/stl.rs index 49dd159..b9b4188 100644 --- a/src/stl.rs +++ b/src/stl.rs @@ -19,6 +19,7 @@ use crate::append::get_append; use crate::func::{func_declare, FTable}; use crate::segment::Ctr; use crate::str::{get_concat, get_echo}; +use crate::control::{get_if}; use crate::vars::{get_export, VTable}; use std::cell::RefCell; use std::rc::Rc; @@ -53,5 +54,9 @@ pub fn get_stdlib(conf: Rc>) -> Result>, Stri return Err(s); } + if let Some(s) = func_declare(ft.clone(), Rc::new(RefCell::new(get_if()))) { + return Err(s); + } + return Ok(ft); } diff --git a/tests/test_lex.rs b/tests/test_lex.rs index 5e80a8f..ff3d946 100644 --- a/tests/test_lex.rs +++ b/tests/test_lex.rs @@ -1,12 +1,12 @@ mod lex_tests { - use relish::ast::{ast_to_string, lex}; + use relish::ast::lex; #[test] fn test_lex_basic_pair() { let document: &str = "(hello 'world')"; - match lex(document.to_string()) { + match lex(document) { Ok(tree) => { - assert_eq!(ast_to_string(tree), document); + assert_eq!(tree, document); } Err(s) => { print!("{}\n", s); @@ -18,9 +18,9 @@ mod lex_tests { #[test] fn test_lex_basic_list() { let document: &str = "(hello 'world' 1 2 3)"; - match lex(document.to_string()) { + match lex(document) { Ok(tree) => { - assert_eq!(ast_to_string(tree), document); + assert_eq!(tree, document); } Err(s) => { print!("{}\n", s); @@ -32,9 +32,9 @@ mod lex_tests { #[test] fn test_lex_complex_list() { let document: &str = "(hello 'world' (1 2 (1 2 3)) 1 2 3)"; - match lex(document.to_string()) { + match lex(document) { Ok(tree) => { - assert_eq!(ast_to_string(tree), document); + assert_eq!(tree, document); } Err(s) => { print!("{}\n", s); @@ -47,13 +47,13 @@ mod lex_tests { fn test_bad_symbol() { let document: &str = "(as;dd)"; let output: &str = "Problem lexing document: \"Unparsable token: as;dd\""; - match lex(document.to_string()) { + match lex(document) { Ok(tree) => { - print!("Bad token yielded: {}\n", ast_to_string(tree)); + print!("Bad token yielded: {}\n", tree); assert!(false); } Err(s) => { - assert_eq!(s, output.to_string()); + assert_eq!(s, output); } } } @@ -61,9 +61,9 @@ mod lex_tests { #[test] fn test_list_delim_in_str() { let document: &str = "('(')"; - match lex(document.to_string()) { + match lex(document) { Ok(tree) => { - assert_eq!(ast_to_string(tree), document); + assert_eq!(tree, document); } Err(s) => { print!("{}\n", s); @@ -75,9 +75,9 @@ mod lex_tests { #[test] fn test_empty_string() { let document: &str = "('')"; - match lex(document.to_string()) { + match lex(document) { Ok(tree) => { - assert_eq!(ast_to_string(tree), document); + assert_eq!(tree, document); } Err(s) => { print!("{}\n", s); @@ -90,13 +90,13 @@ mod lex_tests { fn test_unmatched_list_delim_flat() { let document: &str = "(one two"; let output: &str = "Problem lexing document: \"Unmatched list delimiter in input\""; - match lex(document.to_string()) { + match lex(document) { Ok(tree) => { - print!("Bad token yielded: {}\n", ast_to_string(tree)); + print!("Bad token yielded: {}\n", tree); assert!(false); } Err(s) => { - assert_eq!(s, output.to_string()); + assert_eq!(s, output); } } } @@ -105,13 +105,13 @@ mod lex_tests { fn test_unmatched_list_delim_complex() { let document: &str = "(one two (three)"; let output: &str = "Problem lexing document: \"Unmatched list delimiter in input\""; - match lex(document.to_string()) { + match lex(document) { Ok(tree) => { - print!("Bad token yielded: {}\n", ast_to_string(tree)); + print!("Bad token yielded: {}\n", tree); assert!(false); } Err(s) => { - assert_eq!(s, output.to_string()); + assert_eq!(s, output); } } } @@ -120,9 +120,9 @@ mod lex_tests { fn test_comment() { let document: &str = "#!/bin/relish\n(one two)"; let output: &str = "(one two)"; - match lex(document.to_string()) { + match lex(document) { Ok(tree) => { - assert_eq!(ast_to_string(tree), output.to_string()); + assert_eq!(tree, output); } Err(s) => { print!("{}\n", s); @@ -135,9 +135,9 @@ mod lex_tests { fn test_postline_comment() { let document: &str = "#!/bin/relish\n((one two)# another doc comment\n(three four))"; let output: &str = "((one two) (three four))"; - match lex(document.to_string()) { + match lex(document) { Ok(tree) => { - assert_eq!(ast_to_string(tree), output.to_string()); + assert_eq!(tree, output.to_string()); } Err(s) => { print!("{}\n", s); @@ -150,9 +150,9 @@ mod lex_tests { fn test_inline_comment() { let document: &str = "#!/bin/relish\n((one two)\n# another doc comment\nthree)"; let output: &str = "((one two) three)"; - match lex(document.to_string()) { + match lex(document) { Ok(tree) => { - assert_eq!(ast_to_string(tree), output.to_string()); + assert_eq!(tree, output); } Err(s) => { print!("{}\n", s); @@ -165,13 +165,13 @@ mod lex_tests { fn test_bad_token_list() { let document: &str = "(one t(wo)"; let output: &str = "Problem lexing document: \"list started in middle of another token\""; - match lex(document.to_string()) { + match lex(document) { Ok(tree) => { - print!("Bad token yielded: {}\n", ast_to_string(tree)); + print!("Bad token yielded: {}\n", tree); assert!(false); } Err(s) => { - assert_eq!(s, output.to_string()); + assert_eq!(s, output); } } } diff --git a/the_rewrite/Cargo.toml b/the_rewrite/Cargo.toml new file mode 100644 index 0000000..7214a92 --- /dev/null +++ b/the_rewrite/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "relish" +version = "0.1.0" +authors = ["Aidan "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +dirs = "3.0" +rustyline = "8.2.0" \ No newline at end of file diff --git a/the_rewrite/src/lex.rs b/the_rewrite/src/lex.rs new file mode 100644 index 0000000..337849b --- /dev/null +++ b/the_rewrite/src/lex.rs @@ -0,0 +1,221 @@ +/* relish: highly versatile lisp interpreter + * Copyright (C) 2021 Aidan Hahn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +use crate::segment::{Ctr, Seg}; + +const UNMATCHED_STR_DELIM: &str = "Unmatched string delimiter in input"; +const UNMATCHED_LIST_DELIM: &str = "Unmatched list delimiter in input"; + +/* takes a line of user input + * returns an unsimplified tree of tokens. + */ +pub fn lex<'a>(document: &'a String) -> Result, String> { + if !document.is_ascii() { + return Err("document may only contain ascii characters".to_string()); + } + + let tree = process(document); + + // TODO: Make multiple forms of Ok() + // To represent the multiple passable outcomes + return match tree { + Err(e) => Err(format!("Problem lexing document: {:?}", e)), + Ok(t) => Ok(t), + }; +} + +/* The logic used in lex + * Returns Ok(Rc) if lexing passes + * Returns Err(String) if an error occurs + */ +fn process<'a>(document: &'a String) -> Result, String> { + let doc_len = document.len(); + + if doc_len == 0 { + return Err("Empty document".to_string()); + } + + /* State variables + * TODO: describe all of them + */ + let mut is_str = false; + let mut ign = false; + let mut token = String::new(); + let mut delim_stack = Vec::new(); + let mut ref_stack = vec![]; + + /* Iterate over document + * Manage currently sought delimiter + */ + for c in document.chars() { + let mut needs_alloc = false; + let mut alloc_list = false; + let delim: char; + if let Some(d) = delim_stack.last() { + delim = *d; + + if delim == '*' { + token.push(c); + delim_stack.pop(); + continue; + + // normal delimiter cases + } else if c == delim { + needs_alloc = true; + // reset comment line status + if delim == '\n' { + delim_stack.pop(); + ign = false; + continue; + } + + // catch too many list end + // set alloc_list + if delim == ')' { + alloc_list = true; + if ref_stack.len() < 1 { + return Err("too many end parens".to_string()); + } + } + delim_stack.pop(); + + // if we are in a commented out space, skip this char + } else if ign { + continue; + } + } + // try to generalize all whitespace + if !needs_alloc && char::is_whitespace(c) && !is_str { + // dont make empty tokens just because the document has consecutive whitespace + if token.len() == 0 { + continue; + } + needs_alloc = true; + } + // match a delimiter + if !needs_alloc { + match c { + // add a new Seg reference to the stack + '(' => { + if is_str { + token.push(c); + continue; + } + + if token != "" { + return Err("list started in middle of another token".to_string()); + } + + ref_stack.push(Seg::new()); + delim_stack.push(')'); + } + // begin parsing a string + '"' | '\'' | '`' => { + is_str = true; + delim_stack.push(c); + } + // eat the whole line + '#' => { + ign = true; + delim_stack.push('\n'); + } + // escape next char + '\\' => { + delim_stack.push('*'); + } + // add to token + _ => { + token.push(c); + } + } + + /* 1. Handle allocation of new Ctr + * 2. Handle expansion of current list ref + */ + } else { + if token.len() == 0 && !is_str && !alloc_list { + return Err("Empty token".to_string()); + } + + let mut current_seg = ref_stack.pop().unwrap(); + let obj; + if is_str { + obj = Box::from(Ctr::String(token)); + is_str = false; + token = String::new(); + current_seg.append(obj); + } else if token.len() > 0 { + if token == "true" { + obj = Box::from(Ctr::Bool(true)); + } else if token == "false" { + obj = Box::from(Ctr::Bool(false)); + } else if let Ok(i) = token.parse::() { + obj = Box::from(Ctr::Integer(i)); + } else if let Ok(f) = token.parse::() { + obj = Box::from(Ctr::Float(f)); + } else if let Some(s) = tok_is_symbol(&token) { + obj = Box::from(Ctr::Symbol(s)); + } else { + return Err(format!("Unparsable token: {}", token)); + } + + token = String::new(); + current_seg.append(obj.clone()); + } + + if alloc_list { + // return if we have finished the document + if ref_stack.len() == 0 { + return Ok(Box::new(current_seg)); + } + + let t = current_seg; + current_seg = ref_stack.pop().unwrap(); + /* TODO: is there a way to do this that doesnt + * involve needlessly copying heap data? I am + * not sure what optimizations rustc performs + * but I assume this should not end up copying + * contained segments around. + */ + current_seg.append(Box::from(Ctr::Seg(t))); + } + + ref_stack.push(current_seg); + } + } + + if is_str { + return Err(UNMATCHED_STR_DELIM.to_string()); + } + return Err(UNMATCHED_LIST_DELIM.to_string()); +} + +/* Returns true if token + * - is all alphanumeric except dash and underscore + * + * else returns false + */ +fn tok_is_symbol(token: &String) -> Option { + let tok = token.as_str(); + for t in tok.chars() { + if !t.is_alphabetic() && !t.is_digit(10) && !(t == '-') && !(t == '_') { + return None; + } + } + + return Some(String::from(tok)); +} diff --git a/the_rewrite/src/lib.rs b/the_rewrite/src/lib.rs new file mode 100644 index 0000000..7347455 --- /dev/null +++ b/the_rewrite/src/lib.rs @@ -0,0 +1,54 @@ +/* relish: highly versatile lisp interpreter + * Copyright (C) 2021 Aidan Hahn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#![feature(derive_default_enum)] +#![feature(box_into_inner)] + +/*mod append; +mod config; +mod eval; +mod func;*/ +mod lex; +mod segment; +/*mod stl; +mod str; +mod vars;*/ + +pub mod ast { +// pub use crate::eval::eval; +// pub use crate::func::{ +// func_call, func_declare, Args, ExternalOperation, FTable, Function, Operation, +// }; + pub use crate::lex::lex; + pub use crate::segment::{Ctr, Seg, Type}; +// pub use crate::vars::{define, VTable}; +} + +mod test { + +} + +/*pub mod stdlib { + pub use crate::append::get_append; + pub use crate::stl::get_stdlib; + pub use crate::str::{get_concat, get_echo}; + pub use crate::vars::get_export; +}*/ + +/*pub mod aux { + pub use crate::config::configure; +}*/ diff --git a/the_rewrite/src/segment.rs b/the_rewrite/src/segment.rs new file mode 100644 index 0000000..5bbdc1f --- /dev/null +++ b/the_rewrite/src/segment.rs @@ -0,0 +1,236 @@ +/* relish: versatile lisp shell + * Copyright (C) 2021 Aidan Hahn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +use std::fmt; +use std::marker::PhantomData; + +// Container +#[derive(Debug, Default)] +pub enum Ctr<'a> { + Symbol(String), + String(String), + Integer(i128), + Float(f64), + Bool(bool), + Seg(Seg<'a>), + #[default] + None, +} + +// Type of Container +#[derive(PartialEq, Clone)] +pub enum Type { + Symbol, + String, + Integer, + Float, + Bool, + Seg, + None, +} + +/* Segment + * Holds two Containers. + * Basic building block for more complex data structures. + * I was going to call it Cell and then I learned about + * how important RefCells were in Rust + */ +#[derive(Debug)] +pub struct Seg<'a> { + /* "Contents of Address Register" + * Historical way of referring to the first value in a cell. + */ + pub car: Box>, + + /* "Contents of Decrement Register" + * Historical way of referring to the second value in a cell. + */ + pub cdr: Box>, + + /* Stupid hack that makes rust look foolish. + * Needed to determine variance of lifetime. + * How this is an acceptable solution I have + * not a single clue. + */ + _lifetime_variance_determinant: PhantomData<&'a ()> +} + +impl Ctr<'_> { + pub fn to_type(&self) -> Type { + match self { + Ctr::Symbol(_s) => Type::Symbol, + Ctr::String(_s) => Type::String, + Ctr::Integer(_s) => Type::Integer, + Ctr::Float(_s) => Type::Float, + Ctr::Bool(_s) => Type::Bool, + Ctr::Seg(_s) => Type::Seg, + Ctr::None => Type::None, + } + } + +} + +impl<'a> Seg<'a> { + pub fn new() -> Seg<'a> { + return Seg{ + car: Box::new(Ctr::None), + cdr: Box::new(Ctr::None), + _lifetime_variance_determinant: PhantomData, + } + } + + pub fn from(arg: Box>) -> Seg<'a> { + return Seg{ + car: arg, + cdr: Box::new(Ctr::None), + _lifetime_variance_determinant: PhantomData, + } + } + + /* applies a function across a list in standard form + * function must take a Ctr and return a bool + * short circuits on the first false returned. + * also returns false on a non standard form list + */ + pub fn circuit bool>(&self, func: &mut F) -> bool { + if func(&self.car) { + match &*(self.cdr) { + Ctr::None => true, + Ctr::Seg(l) => l.circuit(func), + _ => false, + } + } else { + false + } + } + + /* recurs over ast assumed to be list in standard form + * returns length + */ + pub fn len(&self) -> u128 { + let mut len = 0; + self.circuit(&mut |_c: &Ctr| -> bool { len += 1; true }); + len + } + + /* recurs over tree assumed to be list in standard form + * appends object to end of list + * + * TODO: figure out how not to call CLONE on a CTR via obj arg + * TODO: return result + */ + pub fn append<'b>(&mut self, obj: Box>) { + if let Ctr::None = &*(self.car) { + self.car = obj; + return + } + + if let Ctr::Seg(s) = &mut *(self.cdr) { + s.append(obj); + return + } + + if let Ctr::None = &mut *(self.cdr) { + self.cdr = Box::new(Ctr::Seg(Seg::from(obj))); + // pray for memory lost to the void + } + } +} + +fn seg_to_string(s: &Seg, parens: bool) -> String { + let mut string = String::new(); + if parens { string.push('('); } + match *(s.car) { + Ctr::None => string.push_str(""), + _ => string.push_str(&s.car.to_string()), + } + string.push(' '); + match &*(s.cdr) { + Ctr::Seg(inner) => string.push_str(&seg_to_string(&inner, false)), + Ctr::None => {string.pop();}, + _ => string.push_str(&s.cdr.to_string()), + } + if parens { string.push(')'); } + + string +} + +impl<'a> Clone for Seg<'a> { + fn clone(&self) -> Seg<'a> { + return Seg{ + car: self.car.clone(), + cdr: self.cdr.clone(), + _lifetime_variance_determinant: PhantomData, + } + } +} + +impl<'a> Clone for Ctr<'a> { + fn clone(&self) -> Ctr<'a> { + match self { + Ctr::Symbol(s) => Ctr::Symbol(s.clone()), + Ctr::String(s) => Ctr::String(s.clone()), + Ctr::Integer(s) => Ctr::Integer(s.clone()), + Ctr::Float(s) => Ctr::Float(s.clone()), + Ctr::Bool(s) => Ctr::Bool(s.clone()), + Ctr::Seg(s) => Ctr::Seg(s.clone()), + Ctr::None => Ctr::None, + } + } +} + +impl fmt::Display for Ctr<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Ctr::Symbol(s) => write!(f, "{}", s), + Ctr::String(s) => write!(f, "\'{}\'", s), + Ctr::Integer(s) => write!(f, "{}", s), + Ctr::Float(s) => write!(f, "{}", s), + Ctr::Bool(s) => { + if *s { + write!(f, "T") + } else { + write!(f, "F") + } + }, + Ctr::Seg(s) => write!(f, "{}", s), + Ctr::None => Ok(()), + } + } +} + +impl fmt::Display for Seg<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", seg_to_string(self, true)) + } +} + +impl Type { + pub fn to_string(&self) -> String { + let ret: &str; + match self { + Type::Symbol => ret = "symbol", + Type::String => ret = "string", + Type::Integer => ret = "integer", + Type::Float => ret = "float", + Type::Bool => ret = "bool", + Type::Seg => ret = "segment", + Type::None => ret = "none", + } + + ret.to_owned() + } +} diff --git a/the_rewrite/tests/test_lex.rs b/the_rewrite/tests/test_lex.rs new file mode 100644 index 0000000..3469b4a --- /dev/null +++ b/the_rewrite/tests/test_lex.rs @@ -0,0 +1,178 @@ +mod lex_tests { + use relish::ast::lex; + + #[test] + fn test_lex_basic_pair() { + let document = String::from("(hello 'world')"); + match lex(&document) { + Ok(tree) => { + assert_eq!(tree.to_string(), document); + } + Err(s) => { + print!("{}\n", s); + assert!(false); + } + } + } + + #[test] + fn test_lex_basic_list() { + let document = String::from("(hello 'world' 1 2 3)"); + match lex(&document) { + Ok(tree) => { + assert_eq!(tree.to_string(), document); + } + Err(s) => { + print!("{}\n", s); + assert!(false); + } + } + } + + #[test] + fn test_lex_complex_list() { + let document = String::from("(hello 'world' (1 2 (1 2 3)) 1 2 3)"); + match lex(&document) { + Ok(tree) => { + assert_eq!(tree.to_string(), document); + } + Err(s) => { + print!("{}\n", s); + assert!(false); + } + } + } + + #[test] + fn test_bad_symbol() { + let document = String::from("(as;dd)"); + let output: &str = "Problem lexing document: \"Unparsable token: as;dd\""; + match lex(&document) { + Ok(tree) => { + print!("Bad token yielded: {}\n", tree.to_string()); + assert!(false); + } + Err(s) => { + assert_eq!(s, output); + } + } + } + + #[test] + fn test_list_delim_in_str() { + let document = String::from("('(')"); + match lex(&document) { + Ok(tree) => { + assert_eq!(tree.to_string(), document); + } + Err(s) => { + print!("{}\n", s); + assert!(false); + } + } + } + + #[test] + fn test_empty_string() { + let document = String::from("('')"); + match lex(&document) { + Ok(tree) => { + assert_eq!(tree.to_string(), document); + } + Err(s) => { + print!("{}\n", s); + assert!(false); + } + } + } + + #[test] + fn test_unmatched_list_delim_flat() { + let document = String::from("(one two"); + let output: &str = "Problem lexing document: \"Unmatched list delimiter in input\""; + match lex(&document) { + Ok(tree) => { + print!("Bad token yielded: {}\n", tree.to_string()); + assert!(false); + } + Err(s) => { + assert_eq!(s, output); + } + } + } + + #[test] + fn test_unmatched_list_delim_complex() { + let document = String::from("(one two (three)"); + let output: &str = "Problem lexing document: \"Unmatched list delimiter in input\""; + match lex(&document) { + Ok(tree) => { + print!("Bad token yielded: {}\n", tree); + assert!(false); + } + Err(s) => { + assert_eq!(s, output); + } + } + } + + #[test] + fn test_comment() { + let document = String::from("#!/bin/relish\n(one two)"); + let output: &str = "(one two)"; + match lex(&document) { + Ok(tree) => { + assert_eq!(tree.to_string(), output); + } + Err(s) => { + print!("{}\n", s); + assert!(false); + } + } + } + + #[test] + fn test_postline_comment() { + let document = String::from("#!/bin/relish\n((one two)# another doc comment\n(three four))"); + let output: &str = "((one two) (three four))"; + match lex(&document) { + Ok(tree) => { + assert_eq!(tree.to_string(), output.to_string()); + } + Err(s) => { + print!("{}\n", s); + assert!(false); + } + } + } + + #[test] + fn test_inline_comment() { + let document = String::from("#!/bin/relish\n((one two)\n# another doc comment\nthree)"); + let output: &str = "((one two) three)"; + match lex(&document) { + Ok(tree) => { + assert_eq!(tree.to_string(), output.to_string()); + } + Err(s) => { + print!("{}\n", s); + assert!(false); + } + } + } + + #[test] + fn test_bad_token_list() { + let document = String::from("(one t(wo)"); + let output: &str = "Problem lexing document: \"list started in middle of another token\""; + match lex(&document) { + Ok(tree) => { + print!("Bad token yielded: {}\n", tree); + assert!(false); + } + Err(s) => { + assert_eq!(s, output); + } + } + } +}