diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4fa8b7a..d6d19ac 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -35,3 +35,8 @@ unit-test-stackstack: stage: test-backend script: - cargo test stackstack + +unit-test-quickmap: + stage: test-backend + script: + - cargo test hmap diff --git a/mycelium/src/hmap.rs b/mycelium/src/hmap.rs new file mode 100755 index 0000000..6b417e2 --- /dev/null +++ b/mycelium/src/hmap.rs @@ -0,0 +1,211 @@ +/* Mycelium Scheme + * Copyright (C) 2025 Ava Affine + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +use alloc::slice; +use alloc::vec::Vec; +use alloc::vec; +use alloc::boxed::Box; +use alloc::string::String; + +/* Use a prime number so that the modulus operation + * provides better avalanche effect + */ +const INDEXED_BUCKETS: u8 = 199; + +/* This only has to work to make quasi unique indexes from + * variable names. Any given program will not have so many + * symbols that this becomes a bottleneck in runtime. + * + * Priorities: + * - SPEED in embedded code + * - avalanche effect + * + * Not a priority: minimal collisions + * + * Just to make sure this is not misused I keep it private. + * And yes, I am sure a B-Tree would be better. + * + * TODO: Make sure that the obvious timing attacks + * dont create risk for scheme crypto libraries... + * or more likely rip and replace with a better nostd hashmap + */ +#[inline] +fn string_hash(input: &String) -> u8 { + input + .chars() + // each letter and number get a digit + .map(|c| c.to_digit(36) + // all else is 0 + .or_else(|| Some(0)) + .unwrap()) + // modulo reduction + .reduce(|acc, i| (acc + i) % INDEXED_BUCKETS as u32) + // TODO: some analysis on which cases end up here + .or_else(|| Some(0)) + .unwrap() as u8 +} + +#[derive(Clone)] +pub struct Bucket(Vec<(String, T)>); +#[derive(Clone)] +pub struct QuickMap(Box<[Bucket; INDEXED_BUCKETS as usize]>); + +impl<'a, T: Clone> QuickMap { + const ARRAY_REPEAT_VALUE: Bucket = Bucket(vec![]); + + pub fn new() -> QuickMap { + QuickMap(Box::new([QuickMap::ARRAY_REPEAT_VALUE; INDEXED_BUCKETS as usize])) + } + + pub fn get(&self, arg: &String) -> Option<&T> { + let idx = string_hash(&arg); + for kv in self.0[idx as usize].0.iter() { + if &kv.0 == arg { + return Some(&kv.1); + } + } + + return None; + } + + pub fn remove(&mut self, arg: &String) -> Option { + let idx = string_hash(&arg); + let len = self.0[idx as usize].0.len(); + for i in 0..len { + if &self + .0[idx as usize] + .0[i as usize] + .0 == arg { + return Some(self.0[idx as usize].0.swap_remove(i).1); + } + } + + return None; + } + + pub fn contains_key(&self, arg: &String) -> bool { + let idx = string_hash(arg); + for kv in self.0[idx as usize].0.iter() { + if &kv.0 == arg { + return true; + } + } + + return false; + } + + pub fn insert(&mut self, k: String, v: T) -> Option { + let idx = string_hash(&k); + for kv in self.0[idx as usize].0.iter_mut() { + if kv.0 == k { + let tmp = kv.1.clone(); + kv.1 = v; + return Some(tmp); + } + } + + self.0[idx as usize].0.push((k, v)); + return None + } + + pub fn iter(&'a self) -> QuickMapIter<'a, T> { + QuickMapIter::<'a, T>{ + buckets: &self.0, + bucket_cursor: 0, + vec_iter: self.0[0].0.iter(), + } + } +} + +#[derive(Clone)] +pub struct QuickMapIter<'a, T: Clone> { + buckets: &'a [Bucket; INDEXED_BUCKETS as usize], + bucket_cursor: usize, + vec_iter: slice::Iter<'a, (String, T)>, +} + +impl<'a, T: Clone> Iterator for QuickMapIter<'a, T> { + type Item = &'a (String, T); + + fn next(&mut self) -> Option { + self.vec_iter + .next() + .or_else(|| { + self.bucket_cursor += 1; + if self.bucket_cursor == INDEXED_BUCKETS as usize{ + None + } else { + self.vec_iter = self.buckets[self.bucket_cursor].0.iter(); + self.next() + } + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn add_fetch_and_remove_simple() { + let mut q = QuickMap::::new(); + let key = String::from("test"); + q.insert(String::from("test"), 1); + assert_eq!(*q.get(&key).unwrap(), 1); + assert!(q.contains_key(&key)); + assert_eq!( + q.remove(&key), + Some(1), + ); + assert_eq!(q.contains_key(&key), false); + assert_eq!(q.get(&key), None); + } + + #[test] + fn iter_test() { + let mut q = QuickMap::::new(); + let k1 = String::from("test1"); + let k2 = String::from("test1@"); // will be in same bucket + let k3 = String::from("test2"); + let k4 = String::from("test2--"); // will be in same bucket + q.insert(k1.clone(), 1); + q.insert(k2.clone(), 2); + q.insert(k3.clone(), 3); + q.insert(k4.clone(), 4); + + // test k1 and k2 in same bucket but that other keys are not + assert_eq!(q.0[string_hash(&k1) as usize].0.len(), 2); + // test k3 and k4 in same bucket but that other keys are not + assert_eq!(q.0[string_hash(&k3) as usize].0.len(), 2); + + let mut i = q.iter(); + let entry1 = i.next().unwrap(); + let entry2 = i.next().unwrap(); + let entry3 = i.next().unwrap(); + let entry4 = i.next().unwrap(); + + assert_eq!(i.next(), None); + assert_eq!(entry1.0, k1); + assert_eq!(entry1.1, 1); + assert_eq!(entry2.0, k2); + assert_eq!(entry2.1, 2); + assert_eq!(entry3.0, k3); + assert_eq!(entry3.1, 3); + assert_eq!(entry4.0, k4); + assert_eq!(entry4.1, 4); + } +} diff --git a/mycelium/src/lib.rs b/mycelium/src/lib.rs index bb8a157..eb3bc56 100644 --- a/mycelium/src/lib.rs +++ b/mycelium/src/lib.rs @@ -26,5 +26,6 @@ pub mod lexer; pub mod parser; pub mod number; pub mod stackstack; +pub mod hmap; extern crate alloc;