HyphaeVM - WIP

This commit is a WORK IN PROGRESS for the base implementation of the HyphaeVM. This will be squashed into a larger commit eventually when the work of implementing the HyphaeVM is finished. Of note, the ISA is mostly finished and much of the VM design is in place. Yet to be done are a few traps in mycelium, migrating pieces like the number package and the sexpr package into the VM package, and of course much testing. Signed-off-by: Ava Affine <ava@sunnypup.io>
2025-06-26 10:52:54 -07:00 · 2025-06-26 10:52:54 -07:00 · 4ad319213d
commit 4ad319213d
parent 3a0a141738
17 changed files with 2073 additions and 17 deletions
--- a/mycelium/src/hmap.rs
+++ b/mycelium/src/hmap.rs
@ -1,211 +0,0 @@
-/* Mycelium Scheme
- * Copyright (C) 2025 Ava Affine
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-use alloc::slice;
-use alloc::vec::Vec;
-use alloc::vec;
-use alloc::boxed::Box;
-use alloc::string::String;
-
-/* Use a prime number so that the modulus operation
- * provides better avalanche effect
- */
-const INDEXED_BUCKETS: u8 = 199;
-
-/* This only has to work to make quasi unique indexes from
- * variable names. Any given program will not have so many
- * symbols that this becomes a bottleneck in runtime.
- *
- * Priorities:
- *   - SPEED in embedded code
- *   - avalanche effect
- *
- * Not a priority: minimal collisions
- *
- * Just to make sure this is not misused I keep it private.
- * And yes, I am sure a B-Tree would be better.
- *
- * TODO: Make sure that the obvious timing attacks
- *     dont create risk for scheme crypto libraries...
- *     or more likely rip and replace with a better nostd hashmap
- */
-#[inline]
-fn string_hash(input: &String) -> u8 {
-    input
-        .chars()
-        // each letter and number get a digit
-        .map(|c| c.to_digit(36)
-            // all else is 0
-            .or_else(|| Some(0))
-            .unwrap())
-        // modulo reduction
-        .reduce(|acc, i| (acc + i) % INDEXED_BUCKETS as u32)
-        // TODO: some analysis on which cases end up here
-        .or_else(|| Some(0))
-        .unwrap() as u8
-}
-
-#[derive(Clone)]
-pub struct Bucket<T: Clone>(Vec<(String, T)>);
-#[derive(Clone)]
-pub struct QuickMap<T: Clone>(Box<[Bucket<T>; INDEXED_BUCKETS as usize]>);
-
-impl<'a, T: Clone> QuickMap<T> {
-    const ARRAY_REPEAT_VALUE: Bucket<T> = Bucket(vec![]);
-
-    pub fn new() -> QuickMap<T> {
-        QuickMap(Box::new([QuickMap::ARRAY_REPEAT_VALUE; INDEXED_BUCKETS as usize]))
-    }
-
-    pub fn get(&self, arg: &String) -> Option<&T> {
-        let idx = string_hash(&arg);
-        for kv in self.0[idx as usize].0.iter() {
-            if &kv.0 == arg {
-                return Some(&kv.1);
-            }
-        }
-
-        return None;
-    }
-
-    pub fn remove(&mut self, arg: &String) -> Option<T> {
-        let idx = string_hash(&arg);
-        let len = self.0[idx as usize].0.len();
-        for i in 0..len {
-            if &self
-                .0[idx as usize]
-                .0[i as usize]
-                .0 == arg {
-                    return Some(self.0[idx as usize].0.swap_remove(i).1);
-            }
-        }
-
-        return None;
-    }
-
-    pub fn contains_key(&self, arg: &String) -> bool {
-        let idx = string_hash(arg);
-        for kv in self.0[idx as usize].0.iter() {
-            if &kv.0 == arg {
-                return true;
-            }
-        }
-
-        return false;
-    }
-
-    pub fn insert(&mut self, k: String, v: T) -> Option<T> {
-        let idx = string_hash(&k);
-        for kv in self.0[idx as usize].0.iter_mut() {
-            if kv.0 == k {
-                let tmp = kv.1.clone();
-                kv.1 = v;
-                return Some(tmp);
-            }
-        }
-
-        self.0[idx as usize].0.push((k, v));
-        return None
-    }
-
-    pub fn iter(&'a self) -> QuickMapIter<'a, T> {
-        QuickMapIter::<'a, T>{
-            buckets: &self.0,
-            bucket_cursor: 0,
-            vec_iter: self.0[0].0.iter(),
-        }
-    }
-}
-
-#[derive(Clone)]
-pub struct QuickMapIter<'a, T: Clone> {
-    buckets: &'a [Bucket<T>; INDEXED_BUCKETS as usize],
-    bucket_cursor: usize,
-    vec_iter: slice::Iter<'a, (String, T)>,
-}
-
-impl<'a, T: Clone> Iterator for QuickMapIter<'a, T> {
-    type Item = &'a (String, T);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.vec_iter
-            .next()
-            .or_else(|| {
-                self.bucket_cursor += 1;
-                if self.bucket_cursor == INDEXED_BUCKETS as usize{
-                    None
-                } else {
-                    self.vec_iter = self.buckets[self.bucket_cursor].0.iter();
-                    self.next()
-                }
-            })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn add_fetch_and_remove_simple() {
-        let mut q = QuickMap::<u8>::new();
-        let key = String::from("test");
-        q.insert(String::from("test"), 1);
-        assert_eq!(*q.get(&key).unwrap(), 1);
-        assert!(q.contains_key(&key));
-        assert_eq!(
-            q.remove(&key),
-            Some(1),
-        );
-        assert_eq!(q.contains_key(&key), false);
-        assert_eq!(q.get(&key), None);
-    }
-
-    #[test]
-    fn iter_test() {
-        let mut q = QuickMap::<u8>::new();
-        let k1 = String::from("test1");
-        let k2 = String::from("test1@"); // will be in same bucket
-        let k3 = String::from("test2");
-        let k4 = String::from("test2--"); // will be in same bucket
-        q.insert(k1.clone(), 1);
-        q.insert(k2.clone(), 2);
-        q.insert(k3.clone(), 3);
-        q.insert(k4.clone(), 4);
-
-        // test k1 and k2 in same bucket but that other keys are not
-        assert_eq!(q.0[string_hash(&k1) as usize].0.len(), 2);
-        // test k3 and k4 in same bucket but that other keys are not
-        assert_eq!(q.0[string_hash(&k3) as usize].0.len(), 2);
-
-        let mut i = q.iter();
-        let entry1 = i.next().unwrap();
-        let entry2 = i.next().unwrap();
-        let entry3 = i.next().unwrap();
-        let entry4 = i.next().unwrap();
-
-        assert_eq!(i.next(), None);
-        assert_eq!(entry1.0, k1);
-        assert_eq!(entry1.1, 1);
-        assert_eq!(entry2.0, k2);
-        assert_eq!(entry2.1, 2);
-        assert_eq!(entry3.0, k3);
-        assert_eq!(entry3.1, 3);
-        assert_eq!(entry4.0, k4);
-        assert_eq!(entry4.1, 4);
-    }
-}
--- a/mycelium/src/lib.rs
+++ b/mycelium/src/lib.rs
@ -25,7 +25,6 @@ pub mod sexpr;
 pub mod lexer;
 pub mod parser;
 pub mod number;
-pub mod stackstack;
-pub mod hmap;

 extern crate alloc;
+
--- a/mycelium/src/number.rs
+++ b/mycelium/src/number.rs
@ -529,7 +529,7 @@ impl Numeric for Float {
        if self.0.fract() == 0.0 {
            Fraction(self.0 as isize, 1)
        } else {
-            unimplemented!("insert rational approximation procedure here")
+            todo!("rational approximation implementation")
        }
    }
 }
--- a/mycelium/src/parser.rs
+++ b/mycelium/src/parser.rs
@ -16,6 +16,7 @@
 */

 use core::fmt::Display;
+use core::cell::RefCell;

 use crate::lexer::{
    LexError,
@ -414,11 +415,11 @@ impl Parser {
        }

        if is_bv {
-            return Ok(Rc::from(Datum::ByteVector(bv_stack)))
+            return Ok(Rc::from(Datum::ByteVector(RefCell::from(bv_stack))))
        }

        if token.token_type == LexTokenType::VectorStart {
-            return Ok(Rc::from(Datum::Vector(lex_stack)))
+            return Ok(Rc::from(Datum::Vector(RefCell::from(lex_stack))))
        }

        // handle an empty list
--- a/mycelium/src/sexpr.rs
+++ b/mycelium/src/sexpr.rs
@ -16,6 +16,9 @@
 */

 use core::fmt::{self, Formatter};
+use core::ops::Index;
+use core::cell::RefCell;
+
 use alloc::format;
 use alloc::rc::Rc;
 use alloc::vec::Vec;
@ -23,7 +26,7 @@ use alloc::string::String;

 use crate::number::Number;

-#[derive(Default, Clone)]
+#[derive(Default, Clone, PartialEq)]
 pub enum Datum {
    Number(Number),
    Bool(bool),
@ -31,8 +34,8 @@ pub enum Datum {
    Symbol(String),
    Char(u8),
    String(Vec<u8>),
-    Vector(Vec<Rc<Datum>>),
-    ByteVector(Vec<u8>),
+    Vector(RefCell<Vec<Rc<Datum>>>),
+    ByteVector(RefCell<Vec<u8>>),
    #[default]
    None,
 }
@ -45,7 +48,8 @@ fn byte_to_escaped_char(b: u8) -> String {
    }
 }

-fn fmt_vec<T: fmt::Display>(v: &Vec<T>) -> String {
+fn fmt_vec<T: fmt::Display>(ve: &RefCell<Vec<T>>) -> String {
+    let v = ve.borrow();
    if v.len() == 0 {
        return String::new()
    }
@ -102,9 +106,46 @@ impl fmt::Debug for Datum {
 }


-#[derive(Default, Clone)]
+#[derive(Default, Clone, PartialEq)]
 pub struct Ast(pub Rc<Datum>, pub Rc<Datum>);

+impl Ast {
+    pub fn subsl(&self, start: isize, end: isize) -> Ast {
+        if end - start == 1 {
+            return Ast(Rc::from(self[start as usize].clone()), Rc::from(Datum::None))
+        }
+
+        if end == 0 {
+            return Ast(
+                Rc::from((*(self.0)).clone()),
+                Rc::from(Datum::None)
+            )
+        }
+
+        let Datum::List(ref next) = *self.1 else {
+            panic!("index into improper list form")
+        };
+
+        if start <= 0 {
+            Ast(
+                Rc::from((*(self.0)).clone()),
+                Rc::from(Datum::List(
+                    Rc::from(next.subsl(start - 1, end - 1))))
+            )
+
+        } else {
+            next.subsl(start - 1, end - 1)
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        let Datum::List(ref next) = *self.1 else {
+            return 1
+        };
+        1 + next.len()
+    }
+}
+
 impl Iterator for Ast {
    type Item = Rc<Datum>;

@ -127,6 +168,25 @@ impl Iterator for Ast {
    }
 }

+impl Index<usize> for Ast {
+    type Output = Datum;
+    fn index(&self, index: usize) -> &Self::Output {
+        if index == 0 {
+            if let Datum::None = *self.0 {
+                panic!("out of bounds indexing into AST")
+            } else {
+                self.0.as_ref()
+            }
+        } else {
+            let Datum::List(ref next) = *self.1 else {
+                panic!("out of bounds indexing into AST")
+            };
+
+            next.index(index - 1)
+        }
+    }
+}
+
 impl fmt::Display for Ast {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "({}", self.0)?;
--- a/mycelium/src/stackstack.rs
+++ b/mycelium/src/stackstack.rs
@ -1,234 +0,0 @@
-/*  Mycelium Scheme
- *  Copyright (C) 2025 Ava Affine
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
-
-use core::fmt::{self, Debug, Formatter};
-use core::ops::Index;
-use alloc::rc::Rc;
-
-struct StackInner<T: Sized> {
-    pub next: Stack<T>,
-    pub data: T
-}
-
-struct Stack<T: Sized> (Rc<Option<StackInner<T>>>);
-
-struct StackStackInner<T: Sized> {
-    next: StackStack<T>,
-    count: usize,
-    stack: Stack<T>,
-}
-
-pub struct StackStack<T: Sized> (Rc<Option<StackStackInner<T>>>);
-
-impl<T> From<T> for StackInner<T> {
-    fn from(t: T) -> StackInner<T> {
-        StackInner {
-            next: Stack(Rc::from(None)),
-            data: t,
-        }
-    }
-}
-
-impl<T> From<StackInner<T>> for Stack<T> {
-    fn from(t: StackInner<T>) -> Stack<T> {
-        Stack(Rc::from(Some(t)))
-    }
-}
-
-impl<T> Index<usize> for StackStack<T> {
-    type Output = T;
-    fn index(&self, index: usize) -> &T {
-        if let Some(ref inner) = *self.0 {
-            // pass on to next
-            if inner.count <= index {
-                &inner.next[index - inner.count]
-
-            // fetch from our stack
-            } else {
-                let mut idx = index;
-                let mut cursor = &inner.stack;
-                while let Some(ref node) = *cursor.0 {
-                    if idx == 0 {
-                        return &node.data
-                    }
-                    idx -= 1;
-                    cursor = &node.next;
-                }
-                // should never hit this case
-                panic!("encountered inconsistent lengths in stackstack")
-            }
-
-        // guaranteed out of bounds
-        } else {
-            panic!("index out of bounds on stackstack access")
-        }
-    }
-}
-
-impl<T: Debug> Debug for StackStack<T> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-        let mut ss_idx = 1;
-        let mut ss_cur = &*self.0;
-        while let Some(ref inner) = ss_cur {
-            write!(f, "Frame {ss_idx}:")?;
-            let mut s_cur = &*inner.stack.0;
-            while let Some(ref node) = s_cur {
-                write!(f, " {:#?}", node.data)?;
-                s_cur = &*node.next.0;
-            }
-            write!(f, "\n")?;
-            ss_cur = &*inner.next.0;
-            ss_idx += 1;
-        }
-
-        write!(f, "\n")
-    }
-}
-
-impl<T> Stack<T> {
-    fn push(&mut self, item: T) {
-        self.0 = Rc::from(Some(StackInner{
-            data: item,
-            next: Stack(self.0.clone()),
-        }))
-    }
-
-    fn pop(&mut self) -> T {
-        // clone self.0 and then drop first ref, decreasing strong count back to 1
-        let d = self.0.clone();
-        self.0 = Rc::new(None);
-
-        // deconstruct the rc that formerly held self.0
-        let b = Rc::into_inner(d).unwrap();
-        if let Some(inner) = b {
-            let data = inner.data;
-            self.0 = inner.next.0;
-            data
-        } else {
-            panic!("pop from 0 length stack")
-        }
-    }
-}
-
-impl<T> StackStack<T> {
-    pub fn push_current_stack(&mut self, item: T) {
-        if let Some(inner) = Rc::get_mut(&mut self.0).unwrap() {
-            inner.stack.push(item);
-            inner.count += 1;
-        } else {
-            panic!("push to uninitialized stackstack")
-        }
-    }
-
-    pub fn pop_current_stack(&mut self) -> T {
-        if let Some(inner) = Rc::get_mut(&mut self.0).unwrap() {
-            inner.count -= 1;
-            inner.stack.pop()
-        } else {
-            panic!("pop from uninitialized stackstack")
-        }
-    }
-
-    pub fn add_stack(&mut self) {
-        self.0 = Rc::from(Some(StackStackInner{
-            next: StackStack(self.0.clone()),
-            count: 0,
-            stack: Stack(Rc::from(None)),
-        }))
-    }
-
-    pub fn destroy_top_stack(&mut self) {
-        let s = Rc::get_mut(&mut self.0).unwrap();
-        if let Some(inner) = s {
-            self.0 = inner.next.0.clone()
-        } else {
-            panic!("del from empty stackstack")
-        }
-    }
-
-    pub fn new() -> StackStack<T> {
-        StackStack(Rc::from(Some(StackStackInner{
-            count: 0,
-            next: StackStack(Rc::from(None)),
-            stack: Stack(Rc::from(None)),
-        })))
-    }
-
-    pub fn len(&self) -> usize {
-        if let Some(ref inner) = *self.0 {
-            if let Some(_) = *inner.next.0 {
-                inner.next.len() + inner.count
-            } else {
-                inner.count
-            }
-        } else {
-            0
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_alloc_new_stack_and_push_many() {
-        let mut g = StackStack::<i8>::new();
-        g.add_stack();
-        g.push_current_stack(0);
-        g.push_current_stack(1);
-        g.push_current_stack(2);
-        assert_eq!(g.len(), 3);
-        g.add_stack();
-        g.push_current_stack(3);
-        g.push_current_stack(4);
-        assert_eq!(g.len(), 5);
-
-        assert_eq!(g.pop_current_stack(), 4);
-        assert_eq!(g.pop_current_stack(), 3);
-        g.destroy_top_stack();
-        assert_eq!(g.pop_current_stack(), 2);
-        assert_eq!(g.pop_current_stack(), 1);
-        assert_eq!(g.pop_current_stack(), 0);
-    }
-
-    #[test]
-    fn test_stack_index_bounds() {
-        let mut g = StackStack::<i8>::new();
-        g.add_stack();
-        g.push_current_stack(0);
-        g.push_current_stack(1);
-        g.push_current_stack(2);
-        assert_eq!(g.len(), 3);
-        g.add_stack();
-        g.push_current_stack(3);
-        g.push_current_stack(4);
-        assert_eq!(g.len(), 5);
-
-        assert_eq!(g[0], 4);
-        assert_eq!(g[1], 3);
-        assert_eq!(g[2], 2);
-        assert_eq!(g[3], 1);
-        assert_eq!(g[4], 0);
-
-        g.destroy_top_stack();
-        assert_eq!(g.len(), 3);
-        assert_eq!(g[0], 2);
-        assert_eq!(g[1], 1);
-        assert_eq!(g[2], 0);
-    }
-}