WIP HyphaeVM

This commit is a WORK IN PROGRESS for the base implementation of the HyphaeVM. This will be squashed into a larger commit eventually when the work of implementing the HyphaeVM is finished. Do note the in progress number package implementation in snippets Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-06-26 10:52:54 -07:00 · 2025-06-26 10:52:54 -07:00 · 0476160ae3
commit 0476160ae3
parent 3a0a141738
17 changed files with 2065 additions and 17 deletions
--- a/mycelium/src/hmap.rs
+++ b/mycelium/src/hmap.rs
@ -1,211 +0,0 @@
-/* Mycelium Scheme
- * Copyright (C) 2025 Ava Affine
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-use alloc::slice;
-use alloc::vec::Vec;
-use alloc::vec;
-use alloc::boxed::Box;
-use alloc::string::String;
-
-/* Use a prime number so that the modulus operation
- * provides better avalanche effect
- */
-const INDEXED_BUCKETS: u8 = 199;
-
-/* This only has to work to make quasi unique indexes from
- * variable names. Any given program will not have so many
- * symbols that this becomes a bottleneck in runtime.
- *
- * Priorities:
- *   - SPEED in embedded code
- *   - avalanche effect
- *
- * Not a priority: minimal collisions
- *
- * Just to make sure this is not misused I keep it private.
- * And yes, I am sure a B-Tree would be better.
- *
- * TODO: Make sure that the obvious timing attacks
- *     dont create risk for scheme crypto libraries...
- *     or more likely rip and replace with a better nostd hashmap
- */
-#[inline]
-fn string_hash(input: &String) -> u8 {
-    input
-        .chars()
-        // each letter and number get a digit
-        .map(|c| c.to_digit(36)
-            // all else is 0
-            .or_else(|| Some(0))
-            .unwrap())
-        // modulo reduction
-        .reduce(|acc, i| (acc + i) % INDEXED_BUCKETS as u32)
-        // TODO: some analysis on which cases end up here
-        .or_else(|| Some(0))
-        .unwrap() as u8
-}
-
-#[derive(Clone)]
-pub struct Bucket<T: Clone>(Vec<(String, T)>);
-#[derive(Clone)]
-pub struct QuickMap<T: Clone>(Box<[Bucket<T>; INDEXED_BUCKETS as usize]>);
-
-impl<'a, T: Clone> QuickMap<T> {
-    const ARRAY_REPEAT_VALUE: Bucket<T> = Bucket(vec![]);
-
-    pub fn new() -> QuickMap<T> {
-        QuickMap(Box::new([QuickMap::ARRAY_REPEAT_VALUE; INDEXED_BUCKETS as usize]))
-    }
-
-    pub fn get(&self, arg: &String) -> Option<&T> {
-        let idx = string_hash(&arg);
-        for kv in self.0[idx as usize].0.iter() {
-            if &kv.0 == arg {
-                return Some(&kv.1);
-            }
-        }
-
-        return None;
-    }
-
-    pub fn remove(&mut self, arg: &String) -> Option<T> {
-        let idx = string_hash(&arg);
-        let len = self.0[idx as usize].0.len();
-        for i in 0..len {
-            if &self
-                .0[idx as usize]
-                .0[i as usize]
-                .0 == arg {
-                    return Some(self.0[idx as usize].0.swap_remove(i).1);
-            }
-        }
-
-        return None;
-    }
-
-    pub fn contains_key(&self, arg: &String) -> bool {
-        let idx = string_hash(arg);
-        for kv in self.0[idx as usize].0.iter() {
-            if &kv.0 == arg {
-                return true;
-            }
-        }
-
-        return false;
-    }
-
-    pub fn insert(&mut self, k: String, v: T) -> Option<T> {
-        let idx = string_hash(&k);
-        for kv in self.0[idx as usize].0.iter_mut() {
-            if kv.0 == k {
-                let tmp = kv.1.clone();
-                kv.1 = v;
-                return Some(tmp);
-            }
-        }
-
-        self.0[idx as usize].0.push((k, v));
-        return None
-    }
-
-    pub fn iter(&'a self) -> QuickMapIter<'a, T> {
-        QuickMapIter::<'a, T>{
-            buckets: &self.0,
-            bucket_cursor: 0,
-            vec_iter: self.0[0].0.iter(),
-        }
-    }
-}
-
-#[derive(Clone)]
-pub struct QuickMapIter<'a, T: Clone> {
-    buckets: &'a [Bucket<T>; INDEXED_BUCKETS as usize],
-    bucket_cursor: usize,
-    vec_iter: slice::Iter<'a, (String, T)>,
-}
-
-impl<'a, T: Clone> Iterator for QuickMapIter<'a, T> {
-    type Item = &'a (String, T);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.vec_iter
-            .next()
-            .or_else(|| {
-                self.bucket_cursor += 1;
-                if self.bucket_cursor == INDEXED_BUCKETS as usize{
-                    None
-                } else {
-                    self.vec_iter = self.buckets[self.bucket_cursor].0.iter();
-                    self.next()
-                }
-            })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn add_fetch_and_remove_simple() {
-        let mut q = QuickMap::<u8>::new();
-        let key = String::from("test");
-        q.insert(String::from("test"), 1);
-        assert_eq!(*q.get(&key).unwrap(), 1);
-        assert!(q.contains_key(&key));
-        assert_eq!(
-            q.remove(&key),
-            Some(1),
-        );
-        assert_eq!(q.contains_key(&key), false);
-        assert_eq!(q.get(&key), None);
-    }
-
-    #[test]
-    fn iter_test() {
-        let mut q = QuickMap::<u8>::new();
-        let k1 = String::from("test1");
-        let k2 = String::from("test1@"); // will be in same bucket
-        let k3 = String::from("test2");
-        let k4 = String::from("test2--"); // will be in same bucket
-        q.insert(k1.clone(), 1);
-        q.insert(k2.clone(), 2);
-        q.insert(k3.clone(), 3);
-        q.insert(k4.clone(), 4);
-
-        // test k1 and k2 in same bucket but that other keys are not
-        assert_eq!(q.0[string_hash(&k1) as usize].0.len(), 2);
-        // test k3 and k4 in same bucket but that other keys are not
-        assert_eq!(q.0[string_hash(&k3) as usize].0.len(), 2);
-
-        let mut i = q.iter();
-        let entry1 = i.next().unwrap();
-        let entry2 = i.next().unwrap();
-        let entry3 = i.next().unwrap();
-        let entry4 = i.next().unwrap();
-
-        assert_eq!(i.next(), None);
-        assert_eq!(entry1.0, k1);
-        assert_eq!(entry1.1, 1);
-        assert_eq!(entry2.0, k2);
-        assert_eq!(entry2.1, 2);
-        assert_eq!(entry3.0, k3);
-        assert_eq!(entry3.1, 3);
-        assert_eq!(entry4.0, k4);
-        assert_eq!(entry4.1, 4);
-    }
-}
--- a/mycelium/src/lib.rs
+++ b/mycelium/src/lib.rs
@ -25,7 +25,6 @@ pub mod sexpr;
 pub mod lexer;
 pub mod parser;
 pub mod number;
-pub mod stackstack;
-pub mod hmap;

 extern crate alloc;
+
--- a/mycelium/src/number.rs
+++ b/mycelium/src/number.rs
@ -529,7 +529,7 @@ impl Numeric for Float {
        if self.0.fract() == 0.0 {
            Fraction(self.0 as isize, 1)
        } else {
-            unimplemented!("insert rational approximation procedure here")
+            todo!("rational approximation implementation")
        }
    }
 }
--- a/mycelium/src/parser.rs
+++ b/mycelium/src/parser.rs
@ -16,6 +16,7 @@
 */

 use core::fmt::Display;
+use core::cell::RefCell;

 use crate::lexer::{
    LexError,
@ -414,11 +415,11 @@ impl Parser {
        }

        if is_bv {
-            return Ok(Rc::from(Datum::ByteVector(bv_stack)))
+            return Ok(Rc::from(Datum::ByteVector(RefCell::from(bv_stack))))
        }

        if token.token_type == LexTokenType::VectorStart {
-            return Ok(Rc::from(Datum::Vector(lex_stack)))
+            return Ok(Rc::from(Datum::Vector(RefCell::from(lex_stack))))
        }

        // handle an empty list
--- a/mycelium/src/sexpr.rs
+++ b/mycelium/src/sexpr.rs
@ -16,6 +16,9 @@
 */

 use core::fmt::{self, Formatter};
+use core::ops::Index;
+use core::cell::RefCell;
+
 use alloc::format;
 use alloc::rc::Rc;
 use alloc::vec::Vec;
@ -23,7 +26,7 @@ use alloc::string::String;

 use crate::number::Number;

-#[derive(Default, Clone)]
+#[derive(Default, Clone, PartialEq)]
 pub enum Datum {
    Number(Number),
    Bool(bool),
@ -31,8 +34,8 @@ pub enum Datum {
    Symbol(String),
    Char(u8),
    String(Vec<u8>),
-    Vector(Vec<Rc<Datum>>),
-    ByteVector(Vec<u8>),
+    Vector(RefCell<Vec<Rc<Datum>>>),
+    ByteVector(RefCell<Vec<u8>>),
    #[default]
    None,
 }
@ -45,7 +48,8 @@ fn byte_to_escaped_char(b: u8) -> String {
    }
 }

-fn fmt_vec<T: fmt::Display>(v: &Vec<T>) -> String {
+fn fmt_vec<T: fmt::Display>(ve: &RefCell<Vec<T>>) -> String {
+    let v = ve.borrow();
    if v.len() == 0 {
        return String::new()
    }
@ -102,9 +106,46 @@ impl fmt::Debug for Datum {
 }


-#[derive(Default, Clone)]
+#[derive(Default, Clone, PartialEq)]
 pub struct Ast(pub Rc<Datum>, pub Rc<Datum>);

+impl Ast {
+    pub fn subsl(&self, start: isize, end: isize) -> Ast {
+        if end - start == 1 {
+            return Ast(Rc::from(self[start as usize].clone()), Rc::from(Datum::None))
+        }
+
+        if end == 0 {
+            return Ast(
+                Rc::from((*(self.0)).clone()),
+                Rc::from(Datum::None)
+            )
+        }
+
+        let Datum::List(ref next) = *self.1 else {
+            panic!("index into improper list form")
+        };
+
+        if start <= 0 {
+            Ast(
+                Rc::from((*(self.0)).clone()),
+                Rc::from(Datum::List(
+                    Rc::from(next.subsl(start - 1, end - 1))))
+            )
+
+        } else {
+            next.subsl(start - 1, end - 1)
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        let Datum::List(ref next) = *self.1 else {
+            return 1
+        };
+        1 + next.len()
+    }
+}
+
 impl Iterator for Ast {
    type Item = Rc<Datum>;

@ -127,6 +168,25 @@ impl Iterator for Ast {
    }
 }

+impl Index<usize> for Ast {
+    type Output = Datum;
+    fn index(&self, index: usize) -> &Self::Output {
+        if index == 0 {
+            if let Datum::None = *self.0 {
+                panic!("out of bounds indexing into AST")
+            } else {
+                self.0.as_ref()
+            }
+        } else {
+            let Datum::List(ref next) = *self.1 else {
+                panic!("out of bounds indexing into AST")
+            };
+
+            next.index(index - 1)
+        }
+    }
+}
+
 impl fmt::Display for Ast {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "({}", self.0)?;
--- a/mycelium/src/stackstack.rs
+++ b/mycelium/src/stackstack.rs
@ -1,234 +0,0 @@
-/*  Mycelium Scheme
- *  Copyright (C) 2025 Ava Affine
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-use core::fmt::{self, Debug, Formatter};
-use core::ops::Index;
-use alloc::rc::Rc;
-
-struct StackInner<T: Sized> {
-    pub next: Stack<T>,
-    pub data: T
-}
-
-struct Stack<T: Sized> (Rc<Option<StackInner<T>>>);
-
-struct StackStackInner<T: Sized> {
-    next: StackStack<T>,
-    count: usize,
-    stack: Stack<T>,
-}
-
-pub struct StackStack<T: Sized> (Rc<Option<StackStackInner<T>>>);
-
-impl<T> From<T> for StackInner<T> {
-    fn from(t: T) -> StackInner<T> {
-        StackInner {
-            next: Stack(Rc::from(None)),
-            data: t,
-        }
-    }
-}
-
-impl<T> From<StackInner<T>> for Stack<T> {
-    fn from(t: StackInner<T>) -> Stack<T> {
-        Stack(Rc::from(Some(t)))
-    }
-}
-
-impl<T> Index<usize> for StackStack<T> {
-    type Output = T;
-    fn index(&self, index: usize) -> &T {
-        if let Some(ref inner) = *self.0 {
-            // pass on to next
-            if inner.count <= index {
-                &inner.next[index - inner.count]
-
-            // fetch from our stack
-            } else {
-                let mut idx = index;
-                let mut cursor = &inner.stack;
-                while let Some(ref node) = *cursor.0 {
-                    if idx == 0 {
-                        return &node.data
-                    }
-                    idx -= 1;
-                    cursor = &node.next;
-                }
-                // should never hit this case
-                panic!("encountered inconsistent lengths in stackstack")
-            }
-
-        // guaranteed out of bounds
-        } else {
-            panic!("index out of bounds on stackstack access")
-        }
-    }
-}
-
-impl<T: Debug> Debug for StackStack<T> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-        let mut ss_idx = 1;
-        let mut ss_cur = &*self.0;
-        while let Some(ref inner) = ss_cur {
-            write!(f, "Frame {ss_idx}:")?;
-            let mut s_cur = &*inner.stack.0;
-            while let Some(ref node) = s_cur {
-                write!(f, " {:#?}", node.data)?;
-                s_cur = &*node.next.0;
-            }
-            write!(f, "\n")?;
-            ss_cur = &*inner.next.0;
-            ss_idx += 1;
-        }
-
-        write!(f, "\n")
-    }
-}
-
-impl<T> Stack<T> {
-    fn push(&mut self, item: T) {
-        self.0 = Rc::from(Some(StackInner{
-            data: item,
-            next: Stack(self.0.clone()),
-        }))
-    }
-
-    fn pop(&mut self) -> T {
-        // clone self.0 and then drop first ref, decreasing strong count back to 1
-        let d = self.0.clone();
-        self.0 = Rc::new(None);
-
-        // deconstruct the rc that formerly held self.0
-        let b = Rc::into_inner(d).unwrap();
-        if let Some(inner) = b {
-            let data = inner.data;
-            self.0 = inner.next.0;
-            data
-        } else {
-            panic!("pop from 0 length stack")
-        }
-    }
-}
-
-impl<T> StackStack<T> {
-    pub fn push_current_stack(&mut self, item: T) {
-        if let Some(inner) = Rc::get_mut(&mut self.0).unwrap() {
-            inner.stack.push(item);
-            inner.count += 1;
-        } else {
-            panic!("push to uninitialized stackstack")
-        }
-    }
-
-    pub fn pop_current_stack(&mut self) -> T {
-        if let Some(inner) = Rc::get_mut(&mut self.0).unwrap() {
-            inner.count -= 1;
-            inner.stack.pop()
-        } else {
-            panic!("pop from uninitialized stackstack")
-        }
-    }
-
-    pub fn add_stack(&mut self) {
-        self.0 = Rc::from(Some(StackStackInner{
-            next: StackStack(self.0.clone()),
-            count: 0,
-            stack: Stack(Rc::from(None)),
-        }))
-    }
-
-    pub fn destroy_top_stack(&mut self) {
-        let s = Rc::get_mut(&mut self.0).unwrap();
-        if let Some(inner) = s {
-            self.0 = inner.next.0.clone()
-        } else {
-            panic!("del from empty stackstack")
-        }
-    }
-
-    pub fn new() -> StackStack<T> {
-        StackStack(Rc::from(Some(StackStackInner{
-            count: 0,
-            next: StackStack(Rc::from(None)),
-            stack: Stack(Rc::from(None)),
-        })))
-    }
-
-    pub fn len(&self) -> usize {
-        if let Some(ref inner) = *self.0 {
-            if let Some(_) = *inner.next.0 {
-                inner.next.len() + inner.count
-            } else {
-                inner.count
-            }
-        } else {
-            0
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_alloc_new_stack_and_push_many() {
-        let mut g = StackStack::<i8>::new();
-        g.add_stack();
-        g.push_current_stack(0);
-        g.push_current_stack(1);
-        g.push_current_stack(2);
-        assert_eq!(g.len(), 3);
-        g.add_stack();
-        g.push_current_stack(3);
-        g.push_current_stack(4);
-        assert_eq!(g.len(), 5);
-
-        assert_eq!(g.pop_current_stack(), 4);
-        assert_eq!(g.pop_current_stack(), 3);
-        g.destroy_top_stack();
-        assert_eq!(g.pop_current_stack(), 2);
-        assert_eq!(g.pop_current_stack(), 1);
-        assert_eq!(g.pop_current_stack(), 0);
-    }
-
-    #[test]
-    fn test_stack_index_bounds() {
-        let mut g = StackStack::<i8>::new();
-        g.add_stack();
-        g.push_current_stack(0);
-        g.push_current_stack(1);
-        g.push_current_stack(2);
-        assert_eq!(g.len(), 3);
-        g.add_stack();
-        g.push_current_stack(3);
-        g.push_current_stack(4);
-        assert_eq!(g.len(), 5);
-
-        assert_eq!(g[0], 4);
-        assert_eq!(g[1], 3);
-        assert_eq!(g[2], 2);
-        assert_eq!(g[3], 1);
-        assert_eq!(g[4], 0);
-
-        g.destroy_top_stack();
-        assert_eq!(g.len(), 3);
-        assert_eq!(g[0], 2);
-        assert_eq!(g[1], 1);
-        assert_eq!(g[2], 0);
-    }
-}