WIP HyphaeVM

This commit is a WORK IN PROGRESS for the base implementation of the
HyphaeVM. This will be squashed into a larger commit eventually when
the work of implementing the HyphaeVM is finished.

Do note the in progress number package implementation in snippets

Signed-off-by: Ava Affine <ava@sunnypup.io>
This commit is contained in:
Ava Apples Affine 2025-06-26 10:52:54 -07:00
parent 3a0a141738
commit 0476160ae3
17 changed files with 2065 additions and 17 deletions

View file

@ -1,211 +0,0 @@
/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use alloc::slice;
use alloc::vec::Vec;
use alloc::vec;
use alloc::boxed::Box;
use alloc::string::String;
/* Use a prime number so that the modulus operation
* provides better avalanche effect
*/
const INDEXED_BUCKETS: u8 = 199;
/* This only has to work to make quasi unique indexes from
* variable names. Any given program will not have so many
* symbols that this becomes a bottleneck in runtime.
*
* Priorities:
* - SPEED in embedded code
* - avalanche effect
*
* Not a priority: minimal collisions
*
* Just to make sure this is not misused I keep it private.
* And yes, I am sure a B-Tree would be better.
*
* TODO: Make sure that the obvious timing attacks
* dont create risk for scheme crypto libraries...
* or more likely rip and replace with a better nostd hashmap
*/
#[inline]
fn string_hash(input: &String) -> u8 {
input
.chars()
// each letter and number get a digit
.map(|c| c.to_digit(36)
// all else is 0
.or_else(|| Some(0))
.unwrap())
// modulo reduction
.reduce(|acc, i| (acc + i) % INDEXED_BUCKETS as u32)
// TODO: some analysis on which cases end up here
.or_else(|| Some(0))
.unwrap() as u8
}
#[derive(Clone)]
pub struct Bucket<T: Clone>(Vec<(String, T)>);
#[derive(Clone)]
pub struct QuickMap<T: Clone>(Box<[Bucket<T>; INDEXED_BUCKETS as usize]>);
impl<'a, T: Clone> QuickMap<T> {
const ARRAY_REPEAT_VALUE: Bucket<T> = Bucket(vec![]);
pub fn new() -> QuickMap<T> {
QuickMap(Box::new([QuickMap::ARRAY_REPEAT_VALUE; INDEXED_BUCKETS as usize]))
}
pub fn get(&self, arg: &String) -> Option<&T> {
let idx = string_hash(&arg);
for kv in self.0[idx as usize].0.iter() {
if &kv.0 == arg {
return Some(&kv.1);
}
}
return None;
}
pub fn remove(&mut self, arg: &String) -> Option<T> {
let idx = string_hash(&arg);
let len = self.0[idx as usize].0.len();
for i in 0..len {
if &self
.0[idx as usize]
.0[i as usize]
.0 == arg {
return Some(self.0[idx as usize].0.swap_remove(i).1);
}
}
return None;
}
pub fn contains_key(&self, arg: &String) -> bool {
let idx = string_hash(arg);
for kv in self.0[idx as usize].0.iter() {
if &kv.0 == arg {
return true;
}
}
return false;
}
pub fn insert(&mut self, k: String, v: T) -> Option<T> {
let idx = string_hash(&k);
for kv in self.0[idx as usize].0.iter_mut() {
if kv.0 == k {
let tmp = kv.1.clone();
kv.1 = v;
return Some(tmp);
}
}
self.0[idx as usize].0.push((k, v));
return None
}
pub fn iter(&'a self) -> QuickMapIter<'a, T> {
QuickMapIter::<'a, T>{
buckets: &self.0,
bucket_cursor: 0,
vec_iter: self.0[0].0.iter(),
}
}
}
#[derive(Clone)]
pub struct QuickMapIter<'a, T: Clone> {
buckets: &'a [Bucket<T>; INDEXED_BUCKETS as usize],
bucket_cursor: usize,
vec_iter: slice::Iter<'a, (String, T)>,
}
impl<'a, T: Clone> Iterator for QuickMapIter<'a, T> {
type Item = &'a (String, T);
fn next(&mut self) -> Option<Self::Item> {
self.vec_iter
.next()
.or_else(|| {
self.bucket_cursor += 1;
if self.bucket_cursor == INDEXED_BUCKETS as usize{
None
} else {
self.vec_iter = self.buckets[self.bucket_cursor].0.iter();
self.next()
}
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn add_fetch_and_remove_simple() {
let mut q = QuickMap::<u8>::new();
let key = String::from("test");
q.insert(String::from("test"), 1);
assert_eq!(*q.get(&key).unwrap(), 1);
assert!(q.contains_key(&key));
assert_eq!(
q.remove(&key),
Some(1),
);
assert_eq!(q.contains_key(&key), false);
assert_eq!(q.get(&key), None);
}
#[test]
fn iter_test() {
let mut q = QuickMap::<u8>::new();
let k1 = String::from("test1");
let k2 = String::from("test1@"); // will be in same bucket
let k3 = String::from("test2");
let k4 = String::from("test2--"); // will be in same bucket
q.insert(k1.clone(), 1);
q.insert(k2.clone(), 2);
q.insert(k3.clone(), 3);
q.insert(k4.clone(), 4);
// test k1 and k2 in same bucket but that other keys are not
assert_eq!(q.0[string_hash(&k1) as usize].0.len(), 2);
// test k3 and k4 in same bucket but that other keys are not
assert_eq!(q.0[string_hash(&k3) as usize].0.len(), 2);
let mut i = q.iter();
let entry1 = i.next().unwrap();
let entry2 = i.next().unwrap();
let entry3 = i.next().unwrap();
let entry4 = i.next().unwrap();
assert_eq!(i.next(), None);
assert_eq!(entry1.0, k1);
assert_eq!(entry1.1, 1);
assert_eq!(entry2.0, k2);
assert_eq!(entry2.1, 2);
assert_eq!(entry3.0, k3);
assert_eq!(entry3.1, 3);
assert_eq!(entry4.0, k4);
assert_eq!(entry4.1, 4);
}
}

View file

@ -25,7 +25,6 @@ pub mod sexpr;
pub mod lexer;
pub mod parser;
pub mod number;
pub mod stackstack;
pub mod hmap;
extern crate alloc;

View file

@ -529,7 +529,7 @@ impl Numeric for Float {
if self.0.fract() == 0.0 {
Fraction(self.0 as isize, 1)
} else {
unimplemented!("insert rational approximation procedure here")
todo!("rational approximation implementation")
}
}
}

View file

@ -16,6 +16,7 @@
*/
use core::fmt::Display;
use core::cell::RefCell;
use crate::lexer::{
LexError,
@ -414,11 +415,11 @@ impl Parser {
}
if is_bv {
return Ok(Rc::from(Datum::ByteVector(bv_stack)))
return Ok(Rc::from(Datum::ByteVector(RefCell::from(bv_stack))))
}
if token.token_type == LexTokenType::VectorStart {
return Ok(Rc::from(Datum::Vector(lex_stack)))
return Ok(Rc::from(Datum::Vector(RefCell::from(lex_stack))))
}
// handle an empty list

View file

@ -16,6 +16,9 @@
*/
use core::fmt::{self, Formatter};
use core::ops::Index;
use core::cell::RefCell;
use alloc::format;
use alloc::rc::Rc;
use alloc::vec::Vec;
@ -23,7 +26,7 @@ use alloc::string::String;
use crate::number::Number;
#[derive(Default, Clone)]
#[derive(Default, Clone, PartialEq)]
pub enum Datum {
Number(Number),
Bool(bool),
@ -31,8 +34,8 @@ pub enum Datum {
Symbol(String),
Char(u8),
String(Vec<u8>),
Vector(Vec<Rc<Datum>>),
ByteVector(Vec<u8>),
Vector(RefCell<Vec<Rc<Datum>>>),
ByteVector(RefCell<Vec<u8>>),
#[default]
None,
}
@ -45,7 +48,8 @@ fn byte_to_escaped_char(b: u8) -> String {
}
}
fn fmt_vec<T: fmt::Display>(v: &Vec<T>) -> String {
fn fmt_vec<T: fmt::Display>(ve: &RefCell<Vec<T>>) -> String {
let v = ve.borrow();
if v.len() == 0 {
return String::new()
}
@ -102,9 +106,46 @@ impl fmt::Debug for Datum {
}
#[derive(Default, Clone)]
#[derive(Default, Clone, PartialEq)]
pub struct Ast(pub Rc<Datum>, pub Rc<Datum>);
impl Ast {
pub fn subsl(&self, start: isize, end: isize) -> Ast {
if end - start == 1 {
return Ast(Rc::from(self[start as usize].clone()), Rc::from(Datum::None))
}
if end == 0 {
return Ast(
Rc::from((*(self.0)).clone()),
Rc::from(Datum::None)
)
}
let Datum::List(ref next) = *self.1 else {
panic!("index into improper list form")
};
if start <= 0 {
Ast(
Rc::from((*(self.0)).clone()),
Rc::from(Datum::List(
Rc::from(next.subsl(start - 1, end - 1))))
)
} else {
next.subsl(start - 1, end - 1)
}
}
pub fn len(&self) -> usize {
let Datum::List(ref next) = *self.1 else {
return 1
};
1 + next.len()
}
}
impl Iterator for Ast {
type Item = Rc<Datum>;
@ -127,6 +168,25 @@ impl Iterator for Ast {
}
}
impl Index<usize> for Ast {
type Output = Datum;
fn index(&self, index: usize) -> &Self::Output {
if index == 0 {
if let Datum::None = *self.0 {
panic!("out of bounds indexing into AST")
} else {
self.0.as_ref()
}
} else {
let Datum::List(ref next) = *self.1 else {
panic!("out of bounds indexing into AST")
};
next.index(index - 1)
}
}
}
impl fmt::Display for Ast {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "({}", self.0)?;

View file

@ -1,234 +0,0 @@
/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
use core::fmt::{self, Debug, Formatter};
use core::ops::Index;
use alloc::rc::Rc;
struct StackInner<T: Sized> {
pub next: Stack<T>,
pub data: T
}
struct Stack<T: Sized> (Rc<Option<StackInner<T>>>);
struct StackStackInner<T: Sized> {
next: StackStack<T>,
count: usize,
stack: Stack<T>,
}
pub struct StackStack<T: Sized> (Rc<Option<StackStackInner<T>>>);
impl<T> From<T> for StackInner<T> {
fn from(t: T) -> StackInner<T> {
StackInner {
next: Stack(Rc::from(None)),
data: t,
}
}
}
impl<T> From<StackInner<T>> for Stack<T> {
fn from(t: StackInner<T>) -> Stack<T> {
Stack(Rc::from(Some(t)))
}
}
impl<T> Index<usize> for StackStack<T> {
type Output = T;
fn index(&self, index: usize) -> &T {
if let Some(ref inner) = *self.0 {
// pass on to next
if inner.count <= index {
&inner.next[index - inner.count]
// fetch from our stack
} else {
let mut idx = index;
let mut cursor = &inner.stack;
while let Some(ref node) = *cursor.0 {
if idx == 0 {
return &node.data
}
idx -= 1;
cursor = &node.next;
}
// should never hit this case
panic!("encountered inconsistent lengths in stackstack")
}
// guaranteed out of bounds
} else {
panic!("index out of bounds on stackstack access")
}
}
}
impl<T: Debug> Debug for StackStack<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let mut ss_idx = 1;
let mut ss_cur = &*self.0;
while let Some(ref inner) = ss_cur {
write!(f, "Frame {ss_idx}:")?;
let mut s_cur = &*inner.stack.0;
while let Some(ref node) = s_cur {
write!(f, " {:#?}", node.data)?;
s_cur = &*node.next.0;
}
write!(f, "\n")?;
ss_cur = &*inner.next.0;
ss_idx += 1;
}
write!(f, "\n")
}
}
impl<T> Stack<T> {
fn push(&mut self, item: T) {
self.0 = Rc::from(Some(StackInner{
data: item,
next: Stack(self.0.clone()),
}))
}
fn pop(&mut self) -> T {
// clone self.0 and then drop first ref, decreasing strong count back to 1
let d = self.0.clone();
self.0 = Rc::new(None);
// deconstruct the rc that formerly held self.0
let b = Rc::into_inner(d).unwrap();
if let Some(inner) = b {
let data = inner.data;
self.0 = inner.next.0;
data
} else {
panic!("pop from 0 length stack")
}
}
}
impl<T> StackStack<T> {
pub fn push_current_stack(&mut self, item: T) {
if let Some(inner) = Rc::get_mut(&mut self.0).unwrap() {
inner.stack.push(item);
inner.count += 1;
} else {
panic!("push to uninitialized stackstack")
}
}
pub fn pop_current_stack(&mut self) -> T {
if let Some(inner) = Rc::get_mut(&mut self.0).unwrap() {
inner.count -= 1;
inner.stack.pop()
} else {
panic!("pop from uninitialized stackstack")
}
}
pub fn add_stack(&mut self) {
self.0 = Rc::from(Some(StackStackInner{
next: StackStack(self.0.clone()),
count: 0,
stack: Stack(Rc::from(None)),
}))
}
pub fn destroy_top_stack(&mut self) {
let s = Rc::get_mut(&mut self.0).unwrap();
if let Some(inner) = s {
self.0 = inner.next.0.clone()
} else {
panic!("del from empty stackstack")
}
}
pub fn new() -> StackStack<T> {
StackStack(Rc::from(Some(StackStackInner{
count: 0,
next: StackStack(Rc::from(None)),
stack: Stack(Rc::from(None)),
})))
}
pub fn len(&self) -> usize {
if let Some(ref inner) = *self.0 {
if let Some(_) = *inner.next.0 {
inner.next.len() + inner.count
} else {
inner.count
}
} else {
0
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_alloc_new_stack_and_push_many() {
let mut g = StackStack::<i8>::new();
g.add_stack();
g.push_current_stack(0);
g.push_current_stack(1);
g.push_current_stack(2);
assert_eq!(g.len(), 3);
g.add_stack();
g.push_current_stack(3);
g.push_current_stack(4);
assert_eq!(g.len(), 5);
assert_eq!(g.pop_current_stack(), 4);
assert_eq!(g.pop_current_stack(), 3);
g.destroy_top_stack();
assert_eq!(g.pop_current_stack(), 2);
assert_eq!(g.pop_current_stack(), 1);
assert_eq!(g.pop_current_stack(), 0);
}
#[test]
fn test_stack_index_bounds() {
let mut g = StackStack::<i8>::new();
g.add_stack();
g.push_current_stack(0);
g.push_current_stack(1);
g.push_current_stack(2);
assert_eq!(g.len(), 3);
g.add_stack();
g.push_current_stack(3);
g.push_current_stack(4);
assert_eq!(g.len(), 5);
assert_eq!(g[0], 4);
assert_eq!(g[1], 3);
assert_eq!(g[2], 2);
assert_eq!(g[3], 1);
assert_eq!(g[4], 0);
g.destroy_top_stack();
assert_eq!(g.len(), 3);
assert_eq!(g[0], 2);
assert_eq!(g[1], 1);
assert_eq!(g[2], 0);
}
}