Mycelium/mycelium/src/parser.rs

/*  Mycelium Scheme
 *  Copyright (C) 2025 Ava Affine
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

use core::fmt::Display;

use crate::lexer::{
    LexError,
    LexToken,
    LexTokenType,
    Lexer,
    E_CHAR_TOO_LONG,
    E_END_OF_DOCUMENT
};
use crate::number::{Number, Numeric};
use crate::sexpr::{Datum, Ast};

use alloc::vec::Vec;
use alloc::vec;
use alloc::rc::Rc;
use alloc::string::String;


pub const E_LEX_ERROR:         &str = "error in lexing document";
pub const E_EXTRA_CLOSE:       &str = "closing parenthesis closes nothing";
pub const E_TERRIBLE:          &str = "something has gone terribly wrong....";
pub const E_VECTOR_DOT:        &str = "dotted notation not valid in vectors";
pub const E_DOT_NO_LIST:       &str = "dotted notation used outside of list";
pub const E_CHAR_TRUNCATED:    &str = "character literal is truncated";
pub const E_CHAR_HEX_PARSE:    &str = "hexadecimal character literal failed to parse";
pub const E_COLLECTION_TRUNC:  &str = "collection is truncated";
pub const E_BV_BADBYTE:        &str = "number provided is not a real byte";
pub const E_BV_NONBYTE:        &str = "bytevector elements must all be bytes";
pub const E_TOO_MANY_DOT:      &str = "valid dot notation only includes one dot";
pub const E_DOT_IDX:           &str = "dot should preceed only last element in list";
pub const E_DOT_EMPTY:         &str = "cannot apply dotted notation to otherwise empty list";
pub const E_UNQUOTE_NONQQ:     &str = "unquote must be within a quasiquoted form";
pub const E_UNQUOTE_SPL_NONQQ: &str = "unquote-splicing must be within a quasiquoted form";
pub const E_UNQUOTE_SPL_COLL:  &str = "expected list or vector after unquote-splicing";


/* ParseError
 * 0: error string
 * 1: either problematic lexing token, or a lexing error
 */
#[derive(Clone)]
pub struct ParseError(pub &'static str, pub Option<Result<LexToken, LexError>>);

impl Display for ParseError {
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        let err_snippet_start = |t: &LexToken| -> usize {
            /* backtrack from current index until we either hit 
             *   - beginning of line
             *   - 25 characters ago
             *   - the doc Start
             */
            if t.source_doc.len() < 25 {
                0

            } else {
                let mut idx = t.start_idx;
                while t.start_idx - idx < 25 {
                    idx -= 1;
                    if t.source_doc[idx..]
                        .char_indices()
                        .next()
                        .is_some_and(|(i, x)| x == '\n' && i == idx) {
                            idx += 1;
                            break;
                        }
                }

                idx
            }
        };

        let err_snippet_end = |t: &LexToken| -> usize {
            /* read through document until we either hit
             *   - end of line
             *   - 25 characters forward
             *   - the doc end
             */
            if t.source_doc.len() - t.end_idx < 25 {
                t.source_doc.len()

            } else {
                let mut idx = t.end_idx;
                while idx - t.end_idx < 25 {
                    idx += 1;
                    if t.source_doc[idx..]
                        .char_indices()
                        .next()
                        .is_some_and(|(i, x)| x == '\n' && i == idx) {
                            break;
                    }
                }

                idx
            }
        };

        if let Some(frag) = &self.1 {
            match frag {
                Ok(token) => {
                    write!(f, "Error parsing syntax: {}\n", self.0)?;
                    write!(f,"    problematic token: {}\n",
                        &token.source_doc[token.start_idx..token.end_idx])?;
                    write!(f,"    {}\n",
                        &token.source_doc[err_snippet_start(token)..err_snippet_end(token)])?;
                },

                Err(e) => {
                    return e.fmt(f);
                }
            }
        }

        write!(f, "Error parsing syntax: {}\n", self.0)
    }
}

pub struct Parser {
    lexer: Lexer,
    pub has_error_state: Option<ParseError>,
    delayed: Vec<Rc<Datum>>,
    quasiquoted: bool,
}

/* The From and Iterator traits serve as the primary
 * interface to work with the parser. It is expected to
 * make a Lexer first, and then use casting or type conv
 * to make it into a parser and then a final AST, which
 * we can then convert into a VM image once the compile
 * step is finished.
 */

impl From<Lexer> for Parser {
    fn from(l: Lexer) -> Parser {
        Parser {
            lexer: l,
            has_error_state: None,
            delayed: vec![],
            quasiquoted: false
        }
    }
}

impl Iterator for Parser {
    type Item = Rc<Datum>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.has_error_state.is_some() {
            return None;
        }

        if self.delayed.len() > 0 {
            return self.delayed.pop()
        }

        let res = self.get_next_datum();
        if let Err(ref e) = res {
            self.has_error_state = Some(e.clone());
        }

        return res.ok()
    }
}

fn read_number(token: LexToken) -> Result<Number, ParseError> {
    return match (&token.source_doc[token.start_idx..token.end_idx]).parse::<Number>() {
        Ok(num) => Ok(num),
        Err(e)  => Err(ParseError(e, Some(Ok(token)))),
    }
}

fn read_char(token: LexToken) -> Result<u8, ParseError> {
    if token.end_idx - token.start_idx < 2 {
        return Err(ParseError(E_CHAR_TRUNCATED, Some(Ok(token))))
    }

    match &token.source_doc[token.start_idx + 2..token.end_idx] {
        "alarm"     => Ok(7),
        "backspace" => Ok(8),
        "delete"    => Ok(127),
        "escape"    => Ok(33),
        "newline"   => Ok('\n' as u8),
        "null"      => Ok(0),
        "return"    => Ok(13),
        "space"     => Ok(32),
        "tab"       => Ok(11),
        _ if token.source_doc[token.start_idx + 2..].starts_with('x') &&
            token.end_idx - token.start_idx > 2 => {
                if token.end_idx - token.start_idx > 5 {
                    return Err(ParseError(E_CHAR_TOO_LONG, Some(Ok(token))))
                }

                match u8::from_str_radix(
                    &token.source_doc[token.start_idx + 3..token.end_idx],
                    16) {
                        Ok(u)  => Ok(u),
                        Err(_) => Err(ParseError(E_CHAR_HEX_PARSE, Some(Ok(token))))
                }
        },
        _ => Ok(token.source_doc.as_bytes()[token.start_idx + 2])
    }
}

fn read_bool(token: LexToken) -> bool {
    match &token.source_doc[token.start_idx..token.end_idx] {
        "#t" => true,
        "#f" => false,
         _   => panic!("impossible boolean")
    }
}

fn read_string(token: LexToken) -> Vec<u8> {
    if token.end_idx - token.start_idx < 3 {
        // empty string other than delimiters
        Vec::default()
    } else {
        token.source_doc[token.start_idx + 1..token.end_idx - 1]
            .as_bytes()
            .to_vec()
    }
}

impl Parser {
    /* Rules we must mind:
     * 0. at this stage, drop and ignore comments, directives
     * 1. quote, quasiquote, unquote, and unquote splicing
     *     all require another input after them (excluding
     *     collection end)
     * 2. unquote-splicing explicitly requires a form I think?
     *     (verify)
     * 3. vectors, lists, may have nested collections in them
     *     so track collection state in the parser's stack.
     * 4. list dotted notation needs next datum put in cdr.
     * 5. bytevectors can only have numbers from 0-255 in them.
     */

    fn complete_quote(&mut self) -> Result<Rc<Datum>, ParseError> {
        let next = self.get_next_datum()?;
        Ok(Rc::from(Datum::List(Rc::from(Ast(
            Rc::from(Datum::Symbol(String::from("quote"))),

            Rc::from(Datum::List(Rc::from(Ast(
                next,
                Rc::from(Datum::None)
            ))))
        )))))

    }

    fn complete_unquote_splicing(&mut self, tok: LexToken) -> Result<Rc<Datum>, ParseError> {
        let next = self.get_next_datum()?;
        match *next {
            Datum::List(_) | Datum::Vector(_) | Datum::Symbol(_) => (),
            _ => return Err(ParseError(E_UNQUOTE_SPL_COLL, Some(Ok(tok))))
        }

        Ok(Rc::from(Datum::List(Rc::from(Ast(
            Rc::from(Datum::Symbol(String::from("unquote-splicing"))),

            Rc::from(Datum::List(Rc::from(Ast(
                next,
                Rc::from(Datum::None)
            ))))
        )))))
    }

    fn complete_unquote(&mut self) -> Result<Rc<Datum>, ParseError> {
        let next = self.get_next_datum()?;
        Ok(Rc::from(Datum::List(Rc::from(Ast(
            Rc::from(Datum::Symbol(String::from("unquote"))),

            Rc::from(Datum::List(Rc::from(Ast(
                next,
                Rc::from(Datum::None)
            ))))
        )))))
    }

    fn complete_quasiquote(&mut self) -> Result<Rc<Datum>, ParseError> {
        let prev = self.quasiquoted; // handle nesting appropriately
        self.quasiquoted = true;
        let next = self.get_next_datum()?;
        self.quasiquoted = prev;

        Ok(Rc::from(Datum::List(Rc::from(Ast(
            Rc::from(Datum::Symbol(String::from("quasiquote"))),

            Rc::from(Datum::List(Rc::from(Ast(
                next,
                Rc::from(Datum::None)
            ))))
        )))))
    }

    fn complete_collection(&mut self, token: LexToken) -> Result<Rc<Datum>, ParseError> {
        let is_bv = match token.token_type {
            LexTokenType::ByteVectorStart => true,
            _ => false,
        };

        let mut lex_stack = vec![];
        let mut bv_stack  = vec![];

        /* counting indexes helps greatly with calculating position dependent
         * syntax rules like dot notation in lists
         */
        let mut iter_count = 0;
        let mut dot_idx = (None, None, None);

        loop {
            let next_tok = self.lexer.next();
            if let None = next_tok {
                return Err(ParseError(E_COLLECTION_TRUNC, None))
            }

            let tok = next_tok.unwrap();

            match tok.token_type {
                // Universal cases
                LexTokenType::Comment | LexTokenType::Directive => continue,
                LexTokenType::NumTypes =>
                    return Err(ParseError(E_TERRIBLE, Some(Ok(tok)))),
                LexTokenType::Unquote if !self.quasiquoted =>
                    return Err(ParseError(E_UNQUOTE_SPL_NONQQ, Some(Ok(tok)))),
                LexTokenType::UnquoteSplice if !self.quasiquoted =>
                    return Err(ParseError(E_UNQUOTE_NONQQ, Some(Ok(tok)))),


                // CollectionEnd must take precedence over the dot notation case
                LexTokenType::CollectionEnd => break,
                _ if let Some(idx) = dot_idx.0 && iter_count - idx > 2 =>
                    return Err(ParseError(E_DOT_IDX, Some(Ok(dot_idx.1.unwrap())))),

                LexTokenType::Dot if token.token_type != LexTokenType::ListStart =>
                    return Err(ParseError(E_VECTOR_DOT, Some(Ok(tok)))),


                // List, Vector cases
                LexTokenType::ListStart | LexTokenType::VectorStart |
                    LexTokenType::ByteVectorStart if !is_bv =>
                        lex_stack.push(self.complete_collection(tok)?),
                LexTokenType::String if !is_bv =>
                    lex_stack.push(Rc::from(Datum::String(read_string(tok)))),
                LexTokenType::Number if !is_bv =>
                    lex_stack.push(Rc::from(Datum::Number(read_number(tok)?))),
                LexTokenType::Char if !is_bv =>
                    lex_stack.push(Rc::from(Datum::Char(read_char(tok)?))),
                LexTokenType::Boolean if !is_bv =>
                    lex_stack.push(Rc::from(Datum::Bool(read_bool(tok)))),
                LexTokenType::Symbol if !is_bv =>
                    lex_stack.push(Rc::from(Datum::Symbol(
                        String::from(&tok.source_doc[tok.start_idx..tok.end_idx])))),
                LexTokenType::Quote if !is_bv =>
                    lex_stack.push(self.complete_quote()?),
                LexTokenType::QuasiQuote if !is_bv =>
                    lex_stack.push(self.complete_quasiquote()?),
                LexTokenType::Unquote if !is_bv && self.quasiquoted =>
                    lex_stack.push(self.complete_unquote()?),
                LexTokenType::UnquoteSplice if !is_bv && self.quasiquoted =>
                    lex_stack.push(self.complete_unquote_splicing(tok)?),


                // List only cases
                LexTokenType::Dot => if let Some(_) = dot_idx.0 {
                    return Err(ParseError(E_TOO_MANY_DOT, Some(Ok(tok))))
                } else {
                    dot_idx = (Some(iter_count), Some(tok), None)
                },


                // ByteVector cases
                LexTokenType::Number if is_bv => {
                    let n = read_number(tok.clone())?
                        .make_inexact();

                    if n.0 < 0.0 || n.0 > 255.0 || n.0.fract() != 0.0 {
                        return Err(ParseError(E_BV_BADBYTE, Some(Ok(tok))))
                    }

                    bv_stack.push(n.0 as u8);
                },

                _ if is_bv => return Err(ParseError(E_BV_NONBYTE, Some(Ok(tok)))),

                // This should never get touched
                _ => todo!("theoretically impossible case in parser::complete_collection"),
            }

            if let Some(idx) = dot_idx.0 && iter_count == idx + 1 {
                dot_idx.2 = Some(lex_stack.pop());
            }

            iter_count += 1;
        }

        if is_bv {
            return Ok(Rc::from(Datum::ByteVector(bv_stack)))
        }

        if token.token_type == LexTokenType::VectorStart {
            return Ok(Rc::from(Datum::Vector(lex_stack)))
        }

        // handle an empty list
        if lex_stack.len() < 1 {
            // dont try to do something like "( . 'thing)"
            if let (_, Some(node), _) = dot_idx {
                return Err(ParseError(E_DOT_EMPTY, Some(Ok(node))))
            }
            return Ok(Rc::from(Datum::List(Rc::from(Ast(Rc::from(Datum::None),
                                                        Rc::from(Datum::None))))))
        }

        let mut from_rear: Rc<Ast>;
        if let (_, _, Some(node)) = dot_idx {
            from_rear = Rc::from(Ast(lex_stack.pop().unwrap(), node.unwrap()));
        } else {
            from_rear = Rc::from(Ast(lex_stack.pop().unwrap(), Rc::from(Datum::None)));
        }

        lex_stack.iter()
            .rev()
            .for_each(|x| {
                from_rear = Rc::from(Ast(x.clone(), Rc::from(Datum::List(from_rear.clone()))));
            });

        Ok(Rc::from(Datum::List(from_rear)))
    }

    fn get_next_datum(&mut self) -> Result<Rc<Datum>, ParseError> {
        if let Some(token) = self.lexer.next() {
            match token.token_type {
                // normal paths:
                LexTokenType::String => Ok(Rc::from(Datum::String(read_string(token)))),
                LexTokenType::Number => Ok(Rc::from(Datum::Number(read_number(token)?))),
                LexTokenType::Char => Ok(Rc::from(Datum::Char(read_char(token)?))),
                LexTokenType::Symbol => Ok(Rc::from(Datum::Symbol(String::from(
                                &token.source_doc[token.start_idx..token.end_idx])))),
                LexTokenType::Boolean => Ok(Rc::from(Datum::Bool(read_bool(token)))),
                LexTokenType::VectorStart | LexTokenType::ListStart |
                    LexTokenType::ByteVectorStart => self.complete_collection(token),
                LexTokenType::Quote => self.complete_quote(),
                LexTokenType::QuasiQuote => self.complete_quasiquote(),
                LexTokenType::Unquote if self.quasiquoted => self.complete_unquote(),
                LexTokenType::UnquoteSplice if self.quasiquoted =>
                    self.complete_unquote_splicing(token),

                // immediate errors:
                LexTokenType::CollectionEnd => Err(ParseError(E_EXTRA_CLOSE, Some(Ok(token)))),
                LexTokenType::NumTypes => Err(ParseError(E_TERRIBLE, Some(Ok(token)))),
                LexTokenType::Dot => Err(ParseError(E_DOT_NO_LIST, Some(Ok(token)))),
                LexTokenType::Unquote if !self.quasiquoted =>
                    Err(ParseError(E_UNQUOTE_NONQQ, Some(Ok(token)))),
                LexTokenType::UnquoteSplice if !self.quasiquoted =>
                    Err(ParseError(E_UNQUOTE_SPL_NONQQ, Some(Ok(token)))),

                // ignore comment, directive:
                _ => self.get_next_datum(),
            }

        // Lexer error
        } else if self.lexer.has_error_state.is_some() {
            Err(ParseError(E_LEX_ERROR,
                           Some(Err(self.lexer.has_error_state.clone().unwrap()))))

        // End of document
        } else {
            Err(ParseError(E_END_OF_DOCUMENT, None))
        }
    }
}


#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_cases() {
        let happy_cases = vec![
            // case, result
            ("\"test\"", "\"test\""),
            ("test", "test"),
            ("(1 2 3)", "(1 2 3)"),
            ("'test", "(quote test)"),
            ("`test", "(quasiquote test)"),
            ("`(,one)", "(quasiquote ((unquote one)))"),
            ("`(test ,@(two))", "(quasiquote (test (unquote-splicing (two))))"),
            ("#u8(0 14 249)", "#u8(0 14 249)"),
            ("(nested lists (are pretty cool))", "(nested lists (are pretty cool))"),
            ("((nested) lists (are (pretty) cool))", "((nested) lists (are (pretty) cool))"),
            ("(dotted . notation)", "(dotted . notation)"),
            ("(longer dotted . notation)", "(longer dotted . notation)"),
            ("(hello \"world\")", "(hello \"world\")"),
            ("; big doc string\n(one two)", "(one two)"),
            ("(list #(vect 2 3 #u8(0 0)))", "(list #(vect 2 3 #u8(0 0)))"),
            ("(#\\xf0 #\\alarm #\\a #\\z)", "(#\\xf0 #\\x7 #\\a #\\z)")
        ];

        let sad_cases = vec![
            "(",
            "( one two ",
            "( one two three ( four )",
            ")",
            "#(st",
            "#u8(0 ",
            "#u8(256)",
            "#u8(two)",
            "(one two ,three)",
            "(one two ,@three)",
            "`(one two ,@4.0)",
            "(. two)",
            "(one . two . three)",
        ];

        println!("+ Testing Happy Cases...");
        happy_cases.iter()
            .for_each(|(case, result)| {
                println!("  - case: {}", *case);
                let mut p = Parser::from(Lexer::from(Rc::from(*case)));
                let res = p.next();
                if let None = res {
                    println!("{}", p.has_error_state.unwrap());
                }
                assert_eq!(
                    format!("{}", res.unwrap()),
                    format!("{}", result)
                );
            });

        println!("+ Testing Sad Cases...");
        sad_cases.iter()
            .for_each(|case| {
                println!("  - case: {}", *case);
                let mut p = Parser::from(Lexer::from(Rc::from(*case)));
                assert!(p.next().is_none() && p.has_error_state.is_some())
            });
    }
}
Parser This commit adds a parser, complete with tests. The parser implements an iterator which returns Datum. It wraps around a Lexer and uses the Lexer's iterator interfact to consume lexemes. It may return an error which may wrap around a LexError or a fully lexed lexeme. In the implementation of the Parser bugs were found in the lexer package. This resulted in the lexing tests being extended as well as several small logic updates. The number package has had slight tweaks to make number representations less cumbersome. Finally, the Datum display logic in the sexpr package has also been updated. Signed-off-by: Ava Affine <ava@sunnypup.io> 2025-05-19 14:38:11 -07:00			`/* Mycelium Scheme`
			`* Copyright (C) 2025 Ava Affine`
			`*`
			`* This program is free software: you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation, either version 3 of the License, or`
			`* (at your option) any later version.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with this program. If not, see <https://www.gnu.org/licenses/>.`
			`*/`

			`use core::fmt::Display;`

			`use crate::lexer::{`
			`LexError,`
			`LexToken,`
			`LexTokenType,`
			`Lexer,`
			`E_CHAR_TOO_LONG,`
			`E_END_OF_DOCUMENT`
			`};`
			`use crate::number::{Number, Numeric};`
			`use crate::sexpr::{Datum, Ast};`

			`use alloc::vec::Vec;`
			`use alloc::vec;`
			`use alloc::rc::Rc;`
			`use alloc::string::String;`


			`pub const E_LEX_ERROR: &str = "error in lexing document";`
			`pub const E_EXTRA_CLOSE: &str = "closing parenthesis closes nothing";`
			`pub const E_TERRIBLE: &str = "something has gone terribly wrong....";`
			`pub const E_VECTOR_DOT: &str = "dotted notation not valid in vectors";`
			`pub const E_DOT_NO_LIST: &str = "dotted notation used outside of list";`
			`pub const E_CHAR_TRUNCATED: &str = "character literal is truncated";`
			`pub const E_CHAR_HEX_PARSE: &str = "hexadecimal character literal failed to parse";`
			`pub const E_COLLECTION_TRUNC: &str = "collection is truncated";`
			`pub const E_BV_BADBYTE: &str = "number provided is not a real byte";`
			`pub const E_BV_NONBYTE: &str = "bytevector elements must all be bytes";`
			`pub const E_TOO_MANY_DOT: &str = "valid dot notation only includes one dot";`
			`pub const E_DOT_IDX: &str = "dot should preceed only last element in list";`
			`pub const E_DOT_EMPTY: &str = "cannot apply dotted notation to otherwise empty list";`
			`pub const E_UNQUOTE_NONQQ: &str = "unquote must be within a quasiquoted form";`
			`pub const E_UNQUOTE_SPL_NONQQ: &str = "unquote-splicing must be within a quasiquoted form";`
			`pub const E_UNQUOTE_SPL_COLL: &str = "expected list or vector after unquote-splicing";`


			`/* ParseError`
			`* 0: error string`
			`* 1: either problematic lexing token, or a lexing error`
			`*/`
			`#[derive(Clone)]`
			`pub struct ParseError(pub &'static str, pub Option<Result<LexToken, LexError>>);`

			`impl Display for ParseError {`
			`fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {`
			`let err_snippet_start = \|t: &LexToken\| -> usize {`
			`/* backtrack from current index until we either hit`
			`* - beginning of line`
			`* - 25 characters ago`
			`* - the doc Start`
			`*/`
			`if t.source_doc.len() < 25 {`
			`0`

			`} else {`
			`let mut idx = t.start_idx;`
			`while t.start_idx - idx < 25 {`
			`idx -= 1;`
			`if t.source_doc[idx..]`
			`.char_indices()`
			`.next()`
			`.is_some_and(\|(i, x)\| x == '\n' && i == idx) {`
			`idx += 1;`
			`break;`
			`}`
			`}`

			`idx`
			`}`
			`};`

			`let err_snippet_end = \|t: &LexToken\| -> usize {`
			`/* read through document until we either hit`
			`* - end of line`
			`* - 25 characters forward`
			`* - the doc end`
			`*/`
			`if t.source_doc.len() - t.end_idx < 25 {`
			`t.source_doc.len()`

			`} else {`
			`let mut idx = t.end_idx;`
			`while idx - t.end_idx < 25 {`
			`idx += 1;`
			`if t.source_doc[idx..]`
			`.char_indices()`
			`.next()`
			`.is_some_and(\|(i, x)\| x == '\n' && i == idx) {`
			`break;`
			`}`
			`}`

			`idx`
			`}`
			`};`

			`if let Some(frag) = &self.1 {`
			`match frag {`
			`Ok(token) => {`
			`write!(f, "Error parsing syntax: {}\n", self.0)?;`
			`write!(f," problematic token: {}\n",`
			`&token.source_doc[token.start_idx..token.end_idx])?;`
			`write!(f," {}\n",`
			`&token.source_doc[err_snippet_start(token)..err_snippet_end(token)])?;`
			`},`

			`Err(e) => {`
			`return e.fmt(f);`
			`}`
			`}`
			`}`

			`write!(f, "Error parsing syntax: {}\n", self.0)`
			`}`
			`}`

			`pub struct Parser {`
			`lexer: Lexer,`
			`pub has_error_state: Option<ParseError>,`
			`delayed: Vec<Rc<Datum>>,`
			`quasiquoted: bool,`
			`}`

			`/* The From and Iterator traits serve as the primary`
			`* interface to work with the parser. It is expected to`
			`* make a Lexer first, and then use casting or type conv`
			`* to make it into a parser and then a final AST, which`
			`* we can then convert into a VM image once the compile`
			`* step is finished.`
			`*/`

			`impl From<Lexer> for Parser {`
			`fn from(l: Lexer) -> Parser {`
			`Parser {`
			`lexer: l,`
			`has_error_state: None,`
			`delayed: vec![],`
			`quasiquoted: false`
			`}`
			`}`
			`}`

			`impl Iterator for Parser {`
			`type Item = Rc<Datum>;`

			`fn next(&mut self) -> Option<Self::Item> {`
			`if self.has_error_state.is_some() {`
			`return None;`
			`}`

			`if self.delayed.len() > 0 {`
			`return self.delayed.pop()`
			`}`

			`let res = self.get_next_datum();`
			`if let Err(ref e) = res {`
			`self.has_error_state = Some(e.clone());`
			`}`

			`return res.ok()`
			`}`
			`}`

			`fn read_number(token: LexToken) -> Result<Number, ParseError> {`
			`return match (&token.source_doc[token.start_idx..token.end_idx]).parse::<Number>() {`
			`Ok(num) => Ok(num),`
			`Err(e) => Err(ParseError(e, Some(Ok(token)))),`
			`}`
			`}`

			`fn read_char(token: LexToken) -> Result<u8, ParseError> {`
			`if token.end_idx - token.start_idx < 2 {`
			`return Err(ParseError(E_CHAR_TRUNCATED, Some(Ok(token))))`
			`}`

			`match &token.source_doc[token.start_idx + 2..token.end_idx] {`
			`"alarm" => Ok(7),`
			`"backspace" => Ok(8),`
			`"delete" => Ok(127),`
			`"escape" => Ok(33),`
			`"newline" => Ok('\n' as u8),`
			`"null" => Ok(0),`
			`"return" => Ok(13),`
			`"space" => Ok(32),`
			`"tab" => Ok(11),`
			`_ if token.source_doc[token.start_idx + 2..].starts_with('x') &&`
			`token.end_idx - token.start_idx > 2 => {`
			`if token.end_idx - token.start_idx > 5 {`
			`return Err(ParseError(E_CHAR_TOO_LONG, Some(Ok(token))))`
			`}`

			`match u8::from_str_radix(`
			`&token.source_doc[token.start_idx + 3..token.end_idx],`
			`16) {`
			`Ok(u) => Ok(u),`
			`Err(_) => Err(ParseError(E_CHAR_HEX_PARSE, Some(Ok(token))))`
			`}`
			`},`
			`_ => Ok(token.source_doc.as_bytes()[token.start_idx + 2])`
			`}`
			`}`

			`fn read_bool(token: LexToken) -> bool {`
			`match &token.source_doc[token.start_idx..token.end_idx] {`
			`"#t" => true,`
			`"#f" => false,`
			`_ => panic!("impossible boolean")`
			`}`
			`}`

			`fn read_string(token: LexToken) -> Vec<u8> {`
			`if token.end_idx - token.start_idx < 3 {`
			`// empty string other than delimiters`
			`Vec::default()`
			`} else {`
			`token.source_doc[token.start_idx + 1..token.end_idx - 1]`
			`.as_bytes()`
			`.to_vec()`
			`}`
			`}`

			`impl Parser {`
			`/* Rules we must mind:`
			`* 0. at this stage, drop and ignore comments, directives`
			`* 1. quote, quasiquote, unquote, and unquote splicing`
			`* all require another input after them (excluding`
			`* collection end)`
			`* 2. unquote-splicing explicitly requires a form I think?`
			`* (verify)`
			`* 3. vectors, lists, may have nested collections in them`
			`* so track collection state in the parser's stack.`
			`* 4. list dotted notation needs next datum put in cdr.`
			`* 5. bytevectors can only have numbers from 0-255 in them.`
			`*/`

			`fn complete_quote(&mut self) -> Result<Rc<Datum>, ParseError> {`
			`let next = self.get_next_datum()?;`
			`Ok(Rc::from(Datum::List(Rc::from(Ast(`
			`Rc::from(Datum::Symbol(String::from("quote"))),`

			`Rc::from(Datum::List(Rc::from(Ast(`
			`next,`
			`Rc::from(Datum::None)`
			`))))`
			`)))))`

			`}`

			`fn complete_unquote_splicing(&mut self, tok: LexToken) -> Result<Rc<Datum>, ParseError> {`
			`let next = self.get_next_datum()?;`
			`match *next {`
			`Datum::List(_) \| Datum::Vector(_) \| Datum::Symbol(_) => (),`
			`_ => return Err(ParseError(E_UNQUOTE_SPL_COLL, Some(Ok(tok))))`
			`}`

			`Ok(Rc::from(Datum::List(Rc::from(Ast(`
			`Rc::from(Datum::Symbol(String::from("unquote-splicing"))),`

			`Rc::from(Datum::List(Rc::from(Ast(`
			`next,`
			`Rc::from(Datum::None)`
			`))))`
			`)))))`
			`}`

			`fn complete_unquote(&mut self) -> Result<Rc<Datum>, ParseError> {`
			`let next = self.get_next_datum()?;`
			`Ok(Rc::from(Datum::List(Rc::from(Ast(`
			`Rc::from(Datum::Symbol(String::from("unquote"))),`

			`Rc::from(Datum::List(Rc::from(Ast(`
			`next,`
			`Rc::from(Datum::None)`
			`))))`
			`)))))`
			`}`

			`fn complete_quasiquote(&mut self) -> Result<Rc<Datum>, ParseError> {`
			`let prev = self.quasiquoted; // handle nesting appropriately`
			`self.quasiquoted = true;`
			`let next = self.get_next_datum()?;`
			`self.quasiquoted = prev;`

			`Ok(Rc::from(Datum::List(Rc::from(Ast(`
			`Rc::from(Datum::Symbol(String::from("quasiquote"))),`

			`Rc::from(Datum::List(Rc::from(Ast(`
			`next,`
			`Rc::from(Datum::None)`
			`))))`
			`)))))`
			`}`

			`fn complete_collection(&mut self, token: LexToken) -> Result<Rc<Datum>, ParseError> {`
			`let is_bv = match token.token_type {`
			`LexTokenType::ByteVectorStart => true,`
			`_ => false,`
			`};`

			`let mut lex_stack = vec![];`
			`let mut bv_stack = vec![];`

			`/* counting indexes helps greatly with calculating position dependent`
			`* syntax rules like dot notation in lists`
			`*/`
			`let mut iter_count = 0;`
			`let mut dot_idx = (None, None, None);`

			`loop {`
			`let next_tok = self.lexer.next();`
			`if let None = next_tok {`
			`return Err(ParseError(E_COLLECTION_TRUNC, None))`
			`}`

			`let tok = next_tok.unwrap();`

			`match tok.token_type {`
			`// Universal cases`
			`LexTokenType::Comment \| LexTokenType::Directive => continue,`
			`LexTokenType::NumTypes =>`
			`return Err(ParseError(E_TERRIBLE, Some(Ok(tok)))),`
			`LexTokenType::Unquote if !self.quasiquoted =>`
			`return Err(ParseError(E_UNQUOTE_SPL_NONQQ, Some(Ok(tok)))),`
			`LexTokenType::UnquoteSplice if !self.quasiquoted =>`
			`return Err(ParseError(E_UNQUOTE_NONQQ, Some(Ok(tok)))),`


			`// CollectionEnd must take precedence over the dot notation case`
			`LexTokenType::CollectionEnd => break,`
			`_ if let Some(idx) = dot_idx.0 && iter_count - idx > 2 =>`
			`return Err(ParseError(E_DOT_IDX, Some(Ok(dot_idx.1.unwrap())))),`

			`LexTokenType::Dot if token.token_type != LexTokenType::ListStart =>`
			`return Err(ParseError(E_VECTOR_DOT, Some(Ok(tok)))),`


			`// List, Vector cases`
			`LexTokenType::ListStart \| LexTokenType::VectorStart \|`
			`LexTokenType::ByteVectorStart if !is_bv =>`
			`lex_stack.push(self.complete_collection(tok)?),`
			`LexTokenType::String if !is_bv =>`
			`lex_stack.push(Rc::from(Datum::String(read_string(tok)))),`
			`LexTokenType::Number if !is_bv =>`
			`lex_stack.push(Rc::from(Datum::Number(read_number(tok)?))),`
			`LexTokenType::Char if !is_bv =>`
			`lex_stack.push(Rc::from(Datum::Char(read_char(tok)?))),`
			`LexTokenType::Boolean if !is_bv =>`
			`lex_stack.push(Rc::from(Datum::Bool(read_bool(tok)))),`
			`LexTokenType::Symbol if !is_bv =>`
			`lex_stack.push(Rc::from(Datum::Symbol(`
			`String::from(&tok.source_doc[tok.start_idx..tok.end_idx])))),`
			`LexTokenType::Quote if !is_bv =>`
			`lex_stack.push(self.complete_quote()?),`
			`LexTokenType::QuasiQuote if !is_bv =>`
			`lex_stack.push(self.complete_quasiquote()?),`
			`LexTokenType::Unquote if !is_bv && self.quasiquoted =>`
			`lex_stack.push(self.complete_unquote()?),`
			`LexTokenType::UnquoteSplice if !is_bv && self.quasiquoted =>`
			`lex_stack.push(self.complete_unquote_splicing(tok)?),`


			`// List only cases`
			`LexTokenType::Dot => if let Some(_) = dot_idx.0 {`
			`return Err(ParseError(E_TOO_MANY_DOT, Some(Ok(tok))))`
			`} else {`
			`dot_idx = (Some(iter_count), Some(tok), None)`
			`},`


			`// ByteVector cases`
			`LexTokenType::Number if is_bv => {`
			`let n = read_number(tok.clone())?`
			`.make_inexact();`

			`if n.0 < 0.0 \|\| n.0 > 255.0 \|\| n.0.fract() != 0.0 {`
			`return Err(ParseError(E_BV_BADBYTE, Some(Ok(tok))))`
			`}`

			`bv_stack.push(n.0 as u8);`
			`},`

			`_ if is_bv => return Err(ParseError(E_BV_NONBYTE, Some(Ok(tok)))),`

			`// This should never get touched`
			`_ => todo!("theoretically impossible case in parser::complete_collection"),`
			`}`

			`if let Some(idx) = dot_idx.0 && iter_count == idx + 1 {`
			`dot_idx.2 = Some(lex_stack.pop());`
			`}`

			`iter_count += 1;`
			`}`

			`if is_bv {`
			`return Ok(Rc::from(Datum::ByteVector(bv_stack)))`
			`}`

			`if token.token_type == LexTokenType::VectorStart {`
			`return Ok(Rc::from(Datum::Vector(lex_stack)))`
			`}`

			`// handle an empty list`
			`if lex_stack.len() < 1 {`
			`// dont try to do something like "( . 'thing)"`
			`if let (_, Some(node), _) = dot_idx {`
			`return Err(ParseError(E_DOT_EMPTY, Some(Ok(node))))`
			`}`
			`return Ok(Rc::from(Datum::List(Rc::from(Ast(Rc::from(Datum::None),`
			`Rc::from(Datum::None))))))`
			`}`

			`let mut from_rear: Rc<Ast>;`
			`if let (_, _, Some(node)) = dot_idx {`
			`from_rear = Rc::from(Ast(lex_stack.pop().unwrap(), node.unwrap()));`
			`} else {`
			`from_rear = Rc::from(Ast(lex_stack.pop().unwrap(), Rc::from(Datum::None)));`
			`}`

			`lex_stack.iter()`
			`.rev()`
			`.for_each(\|x\| {`
			`from_rear = Rc::from(Ast(x.clone(), Rc::from(Datum::List(from_rear.clone()))));`
			`});`

			`Ok(Rc::from(Datum::List(from_rear)))`
			`}`

			`fn get_next_datum(&mut self) -> Result<Rc<Datum>, ParseError> {`
			`if let Some(token) = self.lexer.next() {`
			`match token.token_type {`
			`// normal paths:`
			`LexTokenType::String => Ok(Rc::from(Datum::String(read_string(token)))),`
			`LexTokenType::Number => Ok(Rc::from(Datum::Number(read_number(token)?))),`
			`LexTokenType::Char => Ok(Rc::from(Datum::Char(read_char(token)?))),`
			`LexTokenType::Symbol => Ok(Rc::from(Datum::Symbol(String::from(`
			`&token.source_doc[token.start_idx..token.end_idx])))),`
			`LexTokenType::Boolean => Ok(Rc::from(Datum::Bool(read_bool(token)))),`
			`LexTokenType::VectorStart \| LexTokenType::ListStart \|`
			`LexTokenType::ByteVectorStart => self.complete_collection(token),`
			`LexTokenType::Quote => self.complete_quote(),`
			`LexTokenType::QuasiQuote => self.complete_quasiquote(),`
			`LexTokenType::Unquote if self.quasiquoted => self.complete_unquote(),`
			`LexTokenType::UnquoteSplice if self.quasiquoted =>`
			`self.complete_unquote_splicing(token),`

			`// immediate errors:`
			`LexTokenType::CollectionEnd => Err(ParseError(E_EXTRA_CLOSE, Some(Ok(token)))),`
			`LexTokenType::NumTypes => Err(ParseError(E_TERRIBLE, Some(Ok(token)))),`
			`LexTokenType::Dot => Err(ParseError(E_DOT_NO_LIST, Some(Ok(token)))),`
			`LexTokenType::Unquote if !self.quasiquoted =>`
			`Err(ParseError(E_UNQUOTE_NONQQ, Some(Ok(token)))),`
			`LexTokenType::UnquoteSplice if !self.quasiquoted =>`
			`Err(ParseError(E_UNQUOTE_SPL_NONQQ, Some(Ok(token)))),`

			`// ignore comment, directive:`
			`_ => self.get_next_datum(),`
			`}`

			`// Lexer error`
			`} else if self.lexer.has_error_state.is_some() {`
			`Err(ParseError(E_LEX_ERROR,`
			`Some(Err(self.lexer.has_error_state.clone().unwrap()))))`

			`// End of document`
			`} else {`
			`Err(ParseError(E_END_OF_DOCUMENT, None))`
			`}`
			`}`
			`}`


			`#[cfg(test)]`
			`mod tests {`
			`use super::*;`

			`#[test]`
			`fn test_parse_cases() {`
			`let happy_cases = vec![`
			`// case, result`
			`("\"test\"", "\"test\""),`
			`("test", "test"),`
			`("(1 2 3)", "(1 2 3)"),`
			`("'test", "(quote test)"),`
			("`test", "(quasiquote test)"),
			("`(,one)", "(quasiquote ((unquote one)))"),
			("`(test ,@(two))", "(quasiquote (test (unquote-splicing (two))))"),
			`("#u8(0 14 249)", "#u8(0 14 249)"),`
			`("(nested lists (are pretty cool))", "(nested lists (are pretty cool))"),`
			`("((nested) lists (are (pretty) cool))", "((nested) lists (are (pretty) cool))"),`
			`("(dotted . notation)", "(dotted . notation)"),`
			`("(longer dotted . notation)", "(longer dotted . notation)"),`
			`("(hello \"world\")", "(hello \"world\")"),`
			`("; big doc string\n(one two)", "(one two)"),`
			`("(list #(vect 2 3 #u8(0 0)))", "(list #(vect 2 3 #u8(0 0)))"),`
			`("(#\\xf0 #\\alarm #\\a #\\z)", "(#\\xf0 #\\x7 #\\a #\\z)")`
			`];`

			`let sad_cases = vec![`
			`"(",`
			`"( one two ",`
			`"( one two three ( four )",`
			`")",`
			`"#(st",`
			`"#u8(0 ",`
			`"#u8(256)",`
			`"#u8(two)",`
			`"(one two ,three)",`
			`"(one two ,@three)",`
			"`(one two ,@4.0)",
			`"(. two)",`
			`"(one . two . three)",`
			`];`

			`println!("+ Testing Happy Cases...");`
			`happy_cases.iter()`
			`.for_each(\|(case, result)\| {`
			`println!(" - case: {}", *case);`
			`let mut p = Parser::from(Lexer::from(Rc::from(*case)));`
			`let res = p.next();`
			`if let None = res {`
			`println!("{}", p.has_error_state.unwrap());`
			`}`
			`assert_eq!(`
			`format!("{}", res.unwrap()),`
			`format!("{}", result)`
			`);`
			`});`

			`println!("+ Testing Sad Cases...");`
			`sad_cases.iter()`
			`.for_each(\|case\| {`
			`println!(" - case: {}", *case);`
			`let mut p = Parser::from(Lexer::from(Rc::from(*case)));`
			`assert!(p.next().is_none() && p.has_error_state.is_some())`
			`});`
			`}`
			`}`