QuickMap and CI

This commit adds a new datatype: the quickmap. It provides an extremely
fast but relatively high collision string hash to whatever key hashmap.

This is optimized purely for variable names, and is intended to use for
variable name lookup during (eval ...) or (string->symbol ...) if I must
implement that.

The hashmap is bucketed, with a set number of buckets (199 in this case).
Each bucket is preallocated at initialization containing an empty vector.
There will only ever be one of these initialized as it is only to contain
globally accessible names.

The interface provides new, get, remove, contains_key, insert, and an
iterator implementation.

Unit tests are included and CI is updated.

Signed-off-by: Ava Affine <ava@sunnypup.io>
This commit is contained in:
Ava Apples Affine 2025-06-10 14:16:38 -07:00
parent a12d15b2cd
commit 3a0a141738
3 changed files with 217 additions and 0 deletions

211
mycelium/src/hmap.rs Executable file
View file

@ -0,0 +1,211 @@
/* Mycelium Scheme
* Copyright (C) 2025 Ava Affine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use alloc::slice;
use alloc::vec::Vec;
use alloc::vec;
use alloc::boxed::Box;
use alloc::string::String;
/* Use a prime number so that the modulus operation
* provides better avalanche effect
*/
const INDEXED_BUCKETS: u8 = 199;
/* This only has to work to make quasi unique indexes from
* variable names. Any given program will not have so many
* symbols that this becomes a bottleneck in runtime.
*
* Priorities:
* - SPEED in embedded code
* - avalanche effect
*
* Not a priority: minimal collisions
*
* Just to make sure this is not misused I keep it private.
* And yes, I am sure a B-Tree would be better.
*
* TODO: Make sure that the obvious timing attacks
* dont create risk for scheme crypto libraries...
* or more likely rip and replace with a better nostd hashmap
*/
#[inline]
fn string_hash(input: &String) -> u8 {
input
.chars()
// each letter and number get a digit
.map(|c| c.to_digit(36)
// all else is 0
.or_else(|| Some(0))
.unwrap())
// modulo reduction
.reduce(|acc, i| (acc + i) % INDEXED_BUCKETS as u32)
// TODO: some analysis on which cases end up here
.or_else(|| Some(0))
.unwrap() as u8
}
#[derive(Clone)]
pub struct Bucket<T: Clone>(Vec<(String, T)>);
#[derive(Clone)]
pub struct QuickMap<T: Clone>(Box<[Bucket<T>; INDEXED_BUCKETS as usize]>);
impl<'a, T: Clone> QuickMap<T> {
const ARRAY_REPEAT_VALUE: Bucket<T> = Bucket(vec![]);
pub fn new() -> QuickMap<T> {
QuickMap(Box::new([QuickMap::ARRAY_REPEAT_VALUE; INDEXED_BUCKETS as usize]))
}
pub fn get(&self, arg: &String) -> Option<&T> {
let idx = string_hash(&arg);
for kv in self.0[idx as usize].0.iter() {
if &kv.0 == arg {
return Some(&kv.1);
}
}
return None;
}
pub fn remove(&mut self, arg: &String) -> Option<T> {
let idx = string_hash(&arg);
let len = self.0[idx as usize].0.len();
for i in 0..len {
if &self
.0[idx as usize]
.0[i as usize]
.0 == arg {
return Some(self.0[idx as usize].0.swap_remove(i).1);
}
}
return None;
}
pub fn contains_key(&self, arg: &String) -> bool {
let idx = string_hash(arg);
for kv in self.0[idx as usize].0.iter() {
if &kv.0 == arg {
return true;
}
}
return false;
}
pub fn insert(&mut self, k: String, v: T) -> Option<T> {
let idx = string_hash(&k);
for kv in self.0[idx as usize].0.iter_mut() {
if kv.0 == k {
let tmp = kv.1.clone();
kv.1 = v;
return Some(tmp);
}
}
self.0[idx as usize].0.push((k, v));
return None
}
pub fn iter(&'a self) -> QuickMapIter<'a, T> {
QuickMapIter::<'a, T>{
buckets: &self.0,
bucket_cursor: 0,
vec_iter: self.0[0].0.iter(),
}
}
}
#[derive(Clone)]
pub struct QuickMapIter<'a, T: Clone> {
buckets: &'a [Bucket<T>; INDEXED_BUCKETS as usize],
bucket_cursor: usize,
vec_iter: slice::Iter<'a, (String, T)>,
}
impl<'a, T: Clone> Iterator for QuickMapIter<'a, T> {
type Item = &'a (String, T);
fn next(&mut self) -> Option<Self::Item> {
self.vec_iter
.next()
.or_else(|| {
self.bucket_cursor += 1;
if self.bucket_cursor == INDEXED_BUCKETS as usize{
None
} else {
self.vec_iter = self.buckets[self.bucket_cursor].0.iter();
self.next()
}
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn add_fetch_and_remove_simple() {
let mut q = QuickMap::<u8>::new();
let key = String::from("test");
q.insert(String::from("test"), 1);
assert_eq!(*q.get(&key).unwrap(), 1);
assert!(q.contains_key(&key));
assert_eq!(
q.remove(&key),
Some(1),
);
assert_eq!(q.contains_key(&key), false);
assert_eq!(q.get(&key), None);
}
#[test]
fn iter_test() {
let mut q = QuickMap::<u8>::new();
let k1 = String::from("test1");
let k2 = String::from("test1@"); // will be in same bucket
let k3 = String::from("test2");
let k4 = String::from("test2--"); // will be in same bucket
q.insert(k1.clone(), 1);
q.insert(k2.clone(), 2);
q.insert(k3.clone(), 3);
q.insert(k4.clone(), 4);
// test k1 and k2 in same bucket but that other keys are not
assert_eq!(q.0[string_hash(&k1) as usize].0.len(), 2);
// test k3 and k4 in same bucket but that other keys are not
assert_eq!(q.0[string_hash(&k3) as usize].0.len(), 2);
let mut i = q.iter();
let entry1 = i.next().unwrap();
let entry2 = i.next().unwrap();
let entry3 = i.next().unwrap();
let entry4 = i.next().unwrap();
assert_eq!(i.next(), None);
assert_eq!(entry1.0, k1);
assert_eq!(entry1.1, 1);
assert_eq!(entry2.0, k2);
assert_eq!(entry2.1, 2);
assert_eq!(entry3.0, k3);
assert_eq!(entry3.1, 3);
assert_eq!(entry4.0, k4);
assert_eq!(entry4.1, 4);
}
}