From 0f85292e6f2d448c8db6a7f64ca57f163f9b8323 Mon Sep 17 00:00:00 2001 From: Ava Affine Date: Thu, 14 Aug 2025 07:20:03 +0000 Subject: [PATCH] Generate a user manual. This commit extends the documentation held in instructions.toml into a full description of the hyphaeVM design and capabilities. Additionally, instructions.toml is renamed to vm.toml. Finally, the build script outputs a text file (hyphae_manual.txt) that provides a comprehensive manual on the use and effects of HyphaeVM. fixes: #37 Signed-off-by: Ava Affine --- hyphae/build.rs | 178 ++++++++- hyphae/instructions.toml | 394 ------------------ hyphae/src/heap.rs | 3 - hyphae/src/vm.rs | 20 +- hyphae/vm.toml | 835 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 1016 insertions(+), 414 deletions(-) delete mode 100644 hyphae/instructions.toml create mode 100644 hyphae/vm.toml diff --git a/hyphae/build.rs b/hyphae/build.rs index 78115a1..7984d48 100644 --- a/hyphae/build.rs +++ b/hyphae/build.rs @@ -4,18 +4,74 @@ use std::io::{BufWriter, Write}; use std::path::Path; use serde::Deserialize; +const DOCUMENTATION_TITLE: &str = r" +▗▖ ▗▖▗▖ ▗▖▗▄▄▖ ▗▖ ▗▖ ▗▄▖ ▗▄▄▄▖ ▗▖ ▗▖▗▖ ▗▖ +▐▌ ▐▌ ▝▚▞▘ ▐▌ ▐▌▐▌ ▐▌▐▌ ▐▌▐▌ ▐▌ ▐▌▐▛▚▞▜▌ +▐▛▀▜▌ ▐▌ ▐▛▀▘ ▐▛▀▜▌▐▛▀▜▌▐▛▀▀▘ ▐▌ ▐▌▐▌ ▐▌ +▐▌ ▐▌ ▐▌ ▐▌ ▐▌ ▐▌▐▌ ▐▌▐▙▄▄▖ ▝▚▞▘ ▐▌ ▐▌ +"; + +const DOCUMENTATION_MODES: &str = r" + ▗▄▖ ▗▄▄▄ ▗▄▄▄ ▗▄▄▖ ▗▄▄▄▖ ▗▄▄▖ ▗▄▄▖▗▄▄▄▖▗▖ ▗▖ ▗▄▄▖ ▗▖ ▗▖ ▗▄▖ ▗▄▄▄ ▗▄▄▄▖ ▗▄▄▖ +▐▌ ▐▌▐▌ █▐▌ █▐▌ ▐▌▐▌ ▐▌ ▐▌ █ ▐▛▚▖▐▌▐▌ ▐▛▚▞▜▌▐▌ ▐▌▐▌ █▐▌ ▐▌ +▐▛▀▜▌▐▌ █▐▌ █▐▛▀▚▖▐▛▀▀▘ ▝▀▚▖ ▝▀▚▖ █ ▐▌ ▝▜▌▐▌▝▜▌ ▐▌ ▐▌▐▌ ▐▌▐▌ █▐▛▀▀▘ ▝▀▚▖ +▐▌ ▐▌▐▙▄▄▀▐▙▄▄▀▐▌ ▐▌▐▙▄▄▖▗▄▄▞▘▗▄▄▞▘▗▄█▄▖▐▌ ▐▌▝▚▄▞▘ ▐▌ ▐▌▝▚▄▞▘▐▙▄▄▀▐▙▄▄▖▗▄▄▞▘ +"; + +const DOCUMENTATION_REGS: &str = r" +▗▄▄▖ ▗▄▄▄▖ ▗▄▄▖▗▄▄▄▖ ▗▄▄▖▗▄▄▄▖▗▄▄▄▖▗▄▄▖ ▗▄▄▖ +▐▌ ▐▌▐▌ ▐▌ █ ▐▌ █ ▐▌ ▐▌ ▐▌▐▌ +▐▛▀▚▖▐▛▀▀▘▐▌▝▜▌ █ ▝▀▚▖ █ ▐▛▀▀▘▐▛▀▚▖ ▝▀▚▖ +▐▌ ▐▌▐▙▄▄▖▝▚▄▞▘▗▄█▄▖▗▄▄▞▘ █ ▐▙▄▄▖▐▌ ▐▌▗▄▄▞▘ +"; + +const DOCUMENTATION_DTS: &str = r" +▗▄▄▄ ▗▄▖▗▄▄▄▖▗▄▖ ▗▄▄▄▖▗▖ ▗▖▗▄▄▖ ▗▄▄▄▖ ▗▄▄▖ +▐▌ █▐▌ ▐▌ █ ▐▌ ▐▌ █ ▝▚▞▘ ▐▌ ▐▌▐▌ ▐▌ +▐▌ █▐▛▀▜▌ █ ▐▛▀▜▌ █ ▐▌ ▐▛▀▘ ▐▛▀▀▘ ▝▀▚▖ +▐▙▄▄▀▐▌ ▐▌ █ ▐▌ ▐▌ █ ▐▌ ▐▌ ▐▙▄▄▖▗▄▄▞▘ +"; + +const DOCUMENTATION_INSTRS: &str = r" +▗▄▄▄▖▗▖ ▗▖ ▗▄▄▖▗▄▄▄▖▗▄▄▖ ▗▖ ▗▖ ▗▄▄▖▗▄▄▄▖▗▄▄▄▖ ▗▄▖ ▗▖ ▗▖ ▗▄▄▖ + █ ▐▛▚▖▐▌▐▌ █ ▐▌ ▐▌▐▌ ▐▌▐▌ █ █ ▐▌ ▐▌▐▛▚▖▐▌▐▌ + █ ▐▌ ▝▜▌ ▝▀▚▖ █ ▐▛▀▚▖▐▌ ▐▌▐▌ █ █ ▐▌ ▐▌▐▌ ▝▜▌ ▝▀▚▖ +▗▄█▄▖▐▌ ▐▌▗▄▄▞▘ █ ▐▌ ▐▌▝▚▄▞▘▝▚▄▄▖ █ ▗▄█▄▖▝▚▄▞▘▐▌ ▐▌▗▄▄▞▘ +"; + #[derive(Deserialize)] struct Document { - pub instructions: Vec, + pub description: String, + pub datum: String, + pub error_handling: String, + pub sym_table: String, + pub traps: String, + pub registers: Vec, + pub data_types: Vec, + pub instructions: Vec, + pub addressing_modes: Vec, +} + +#[derive(Deserialize)] +struct Description { + pub name: String, + pub description: String, +} + +#[derive(Deserialize)] +struct AddressingMode { + pub name: String, + pub mutable: bool, + pub symbol: String, + pub example: String, + pub description: String, } -// dont warn about unused fields in json instruction struct -#[allow(dead_code)] #[derive(Deserialize)] struct Instruction { - pub name: String, - pub args: Vec, - pub output: String, + pub name: String, + pub args: Vec, + pub output: String, pub description: String, } @@ -23,12 +79,16 @@ fn main() { let mut peak = 0; let output_path = Path::new(&env::var("OUT_DIR").unwrap()) .join("hyphae_instr.rs"); - let input = fs::read_to_string("instructions.toml") + let doc_path = Path::new(&env::var("OUT_DIR").unwrap()) + .join("hyphae_manual.txt"); + let input = fs::read_to_string("vm.toml") .unwrap(); let mut output_file = BufWriter::new(File::create(&output_path).unwrap()); + let mut manual_file = + BufWriter::new(File::create(&doc_path).unwrap()); - let instruction_table: Document = + let doc: Document = toml::from_str(&input) .expect("hyphae: failed to parse instructions.toml"); @@ -56,7 +116,7 @@ fn main() { isa_num_args += " pub fn num_args(&self) -> Result {\n"; isa_num_args += " match self.0 {\n"; - instruction_table.instructions.iter() + doc.instructions.iter() .enumerate() .for_each(|(idx, instr)| { let const_name = instr.name.to_ascii_uppercase(); @@ -109,6 +169,104 @@ fn main() { write!(&mut output_file, "{}", isa).unwrap(); write!(&mut output_file, "\n\npub const TOTAL_INSTRUCTIONS: usize = {};", peak) .unwrap(); + + let separator = "----------------------------\n"; + let section_end = "\n\n\n"; + let mut documentation = String::from(""); + + documentation += DOCUMENTATION_TITLE; + documentation += separator; + documentation += &doc.description; + documentation += section_end; + + documentation += "Datum\n"; + documentation += separator; + documentation += &doc.datum; + documentation += section_end; + + documentation += "Error Handling\n"; + documentation += separator; + documentation += &doc.error_handling; + documentation += section_end; + + documentation += "Symbol Table\n"; + documentation += separator; + documentation += &doc.sym_table; + documentation += section_end; + + documentation += "Traps\n"; + documentation += separator; + documentation += &doc.traps; + documentation += section_end; + + documentation += DOCUMENTATION_MODES; + documentation += "\n\n"; + doc.addressing_modes.iter() + .for_each(|i| { + documentation += &i.name; + documentation += "\n"; + documentation += separator; + if i.mutable { + documentation += "Provides mutable access.\n\n"; + } + documentation += "symbol: "; + documentation += &i.symbol; + documentation += "\nexample: "; + documentation += &i.example; + documentation += "\n\n"; + documentation += &i.description; + documentation += section_end; + }); + documentation += "\n"; + + documentation += DOCUMENTATION_DTS; + documentation += "\n\n"; + doc.data_types.iter() + .for_each(|i| { + documentation += "> "; + documentation += &i.name; + documentation += "\n"; + documentation += &i.description; + documentation += section_end; + }); + + documentation += DOCUMENTATION_REGS; + documentation += "\n\n"; + doc.registers.iter() + .for_each(|i| { + documentation += "> "; + documentation += &i.name; + documentation += "\n"; + documentation += &i.description; + documentation += section_end; + }); + + documentation += DOCUMENTATION_INSTRS; + documentation += "\n\n"; + doc.instructions.iter() + .for_each(|i| { + documentation += &i.name; + documentation += "\n"; + documentation += separator; + documentation += "Arguments:"; + i.args.iter().for_each(|j| { + documentation += " "; + documentation += j; + }); + documentation += "\n"; + documentation += "Sets expression register to: "; + documentation += if !i.output.is_empty() { + &i.output + } else { + "" + }; + documentation += "\n\n"; + documentation += &i.description; + documentation += section_end; + }); + + write!(manual_file, "{}", documentation).unwrap(); + println!("cargo::rerun-if-changed=build.rs"); - println!("cargo::rerun-if-changed=instructions.json"); + println!("cargo::rerun-if-changed=vm.toml"); } diff --git a/hyphae/instructions.toml b/hyphae/instructions.toml deleted file mode 100644 index 9fd0b38..0000000 --- a/hyphae/instructions.toml +++ /dev/null @@ -1,394 +0,0 @@ -# TODO: add the following info -# - introductory VM info (description, list of components) -# - info on the different data types -# - info on garbage collection -# - info on program execution -# - info on error handling -# - info on traps -# - info on numbers -# - info on symtable (and its uses) - -[[addressing_modes]] -name = "expr" -mutable = true -symbol = "$expr" -example = "inc $expr" -description = "The expression register is used as a default output, or input by many instructions." - -[[addressing_modes]] -name = "operand" -mutable = true -symbol = "$oper" -example = "add $oper1, $oper2" -description = "There are four operand registers N=(0, 1, 2, 3, and 4). They are for storing mutable data." - -[[addressing_modes]] -name = "stack" -mutable = false -symbol = "%N" -example = "dupl %0, $expr" -description = "Stack addressing mode takes an index in to the stack to read from." - -[[addressing_modes]] -name = "instruction" -mutable = false -symbol = "@N" -example = "jmp @100" -description = "Instruction addressing mode indexes by instruction into the program." - -[[addressing_modes]] -name = "numeric" -mutable = false -symbol = "N" -example = "const $expr, 100" -description = "Numeric addressing mode provides read only integer constants to instructions" - -[[addressing_modes]] -name = "char" -mutable = false -symbol = "'N'" -example = "const $expr, 'c'" -description = "Char addressing mode provides read only character constants to instructions" - -[[addressing_modes]] -name = "boolean" -mutable = false -symbol = "{true|false}" -example = "const $expr, true" -description = "Boolean addressing mode provides read only booleans to instructions" - -[[instructions]] -name = "trap" -args = ["index"] -output = "result of function" -description = "triggers callback in trap vector at index" - -[[instructions]] -name = "bind" -args = ["name", "operand"] -output = "" -description = "map name to operand in sym table." - -[[instructions]] -name = "unbind" -args = ["name"] -output = "" -description = "remove name mapping from sym table." - -[[instructions]] -name = "bound" -args = ["name"] -output = "expr = true if name is bound" -description = "test if a name is already bound" - -[[instructions]] -name = "push" -args = ["operand"] -output = "" -description = "pushes deep copy of operand onto stack." - -[[instructions]] -name = "pop" -args = [] -output = "" -description = "removes element at top of stack." - -[[instructions]] -name = "enter" -args = [] -output = "" -description = "create new stack frame" - -[[instructions]] -name = "exit" -args = [] -output = "" -description = "delete current stack frame" - -[[instructions]] -name = "link" -args = ["src", "dest"] -output = "" -description = "shallow copies src into dest" - -[[instructions]] -name = "dupl" -args = ["src", "dest"] -output = "" -description = "deep copies src into dest" - -[[instructions]] -name = "clear" -args = ["dest"] -output = "" -description = "clears dest" - -[[instructions]] -name = "nop" -args = [] -output = "" -description = "no operation" - -[[instructions]] -name = "halt" -args = [] -output = "" -description = "halts the VM" - -[[instructions]] -name = "panic" -args = ["error"] -output = "" -description = "sets error state and halts VM" - -[[instructions]] -name = "jmp" -args = ["addr"] -output = "" -description = "sets ictr register to addr" - -[[instructions]] -name = "jmpif" -args = ["addr"] -output = "" -description = "if expr register holds true, sets ictr to addr" - -[[instructions]] -name = "eq" -args = ["a", "b"] -output = "a == b" -description = "equality test" - -[[instructions]] -name = "lt" -args = ["a", "b"] -output = "a < b" -description = "less than test" - -[[instructions]] -name = "gt" -args = ["a", "b"] -output = "a > b" -description = "greater than test" - -[[instructions]] -name = "lte" -args = ["a", "b"] -output = "a <= b" -description = "less than equals test" - -[[instructions]] -name = "gte" -args = ["a", "b"] -output = "a >= b" -description = "greater than equals test" - -[[instructions]] -name = "bool_not" -args = [] -output = "expr = !expr" -description = "boolean not" - -[[instructions]] -name = "bool_and" -args = ["a", "b"] -output = "a && b" -description = "boolean and" - -[[instructions]] -name = "bool_or" -args = ["a", "b"] -output = "a || b" -description = "boolean or" - -[[instructions]] -name = "byte_and" -args = ["a", "b"] -output = "a & b" -description = "bitwise and" - -[[instructions]] -name = "byte_or" -args = ["a", "b"] -output = "a | b" -description = "bitwise or" - -[[instructions]] -name = "xor" -args = ["a", "b"] -output = "a xor b" -description = "bitwise exclusive or" - -[[instructions]] -name = "byte_not" -args = [] -output = "expr = !expr" -description = "bitwise not" - -[[instructions]] -name = "add" -args = ["a", "b"] -output = "a + b" -description = "numeric addition" - -[[instructions]] -name = "sub" -args = ["a", "b"] -output = "a - b" -description = "numeric subtraction" - -[[instructions]] -name = "mul" -args = ["a", "b"] -output = "a * b" -description = "numeric multiplication" - -[[instructions]] -name = "fdiv" -args = ["a", "b"] -output = "a / b" -description = "numeric FLOAT division" - -[[instructions]] -name = "idiv" -args = ["a", "b"] -output = "a / b" -description = "numeric INTEGER division" - -[[instructions]] -name = "pow" -args = ["a", "b"] -output = "a ^ b" -description = "numeric operation to raise a to the power of b" - -[[instructions]] -name = "modulo" -args = ["a", "b"] -output = "a % b" -description = "numeric modulo operation" - -[[instructions]] -name = "rem" -args = ["a", "b"] -output = "remainder from a / b" -description = "remainder from integer division" - -[[instructions]] -name = "inc" -args = ["src"] -output = "" -description = "increments number at source" - -[[instructions]] -name = "dec" -args = ["src"] -output = "" -description = "decrements number at source" - -[[instructions]] -name = "ctos" -args = ["src"] -output = "" -description = "mutates a char datum into a string datum" - -[[instructions]] -name = "cton" -args = ["src"] -output = "" -description = "mutates a char datum into a number datum" - -[[instructions]] -name = "ntoc" -args = ["src"] -output = "" -description = "mutates a number datum into a char datum" - -[[instructions]] -name = "ntoi" -args = ["src"] -output = "" -description = "mutates a number datum into its exact form" - -[[instructions]] -name = "ntoe" -args = ["src"] -output = "" -description = "mutates a number datum into its inexact form" - -[[instructions]] -name = "const" -args = ["dst", "data"] -output = "" -description = "sets dst location to constant integer data" - -[[instructions]] -name = "mkvec" -args = [] -output = "a blank vector" -description = "creates a new vector" - -[[instructions]] -name = "mkbvec" -args = [] -output = "a blank bytevector" -description = "creates a blank bytevector" - -[[instructions]] -name = "mkstr" -args = [] -output = "an empty string" -description = "creates a new empty string" - -[[instructions]] -name = "index" -args = ["collection", "index"] -output = "collection[index]" -description = "extracts element from collection at index" - -[[instructions]] -name = "length" -args = ["collection"] -output = "length of collection" -description = "calculates length of collection" - -[[instructions]] -name = "subsl" -args = ["collection", "start", "end"] -output = "collection[start:end]" -description = "returns a subset from collection denoted by start and end indexes" - -[[instructions]] -name = "inser" -args = ["collection", "elem", "idx"] -output = "" -description = "inserts an element at specified index into a collection" - -[[instructions]] -name = "cons" -args = ["left", "right"] -output = "resulting collection" -description = "either append right to left or make new list from both" - -[[instructions]] -name = "car" -args = ["list"] -output = "returns first element in cons cell" -description = "takes an AST and returns first element in top level cons cell" - -[[instructions]] -name = "cdr" -args = ["list"] -output = "returns last element in cons cell" -description = "takes an AST and returns last element in top level cons cell" - -[[instructions]] -name = "concat" -args = ["string_l", "string_r"] -output = "string_l+string_r" -description = "concatenates string r to string l and returns a new string" - -[[instructions]] -name = "s_append" -args = ["parent", "child"] -output = "" -description = "append in place child character into parent string" diff --git a/hyphae/src/heap.rs b/hyphae/src/heap.rs index b670f80..46f4aab 100644 --- a/hyphae/src/heap.rs +++ b/hyphae/src/heap.rs @@ -22,7 +22,6 @@ use alloc::rc::Rc; use alloc::vec::Vec; use alloc::boxed::Box; use alloc::fmt::Debug; -use alloc::string::String; use organelle::Number; @@ -147,7 +146,6 @@ pub enum Datum { Number(Number), Bool(bool), Cons(Cons), - Symbol(String), Char(u8), String(Vec), Vector(Vec>), @@ -162,7 +160,6 @@ impl Clone for Datum { Datum::Number(n) => Datum::Number(n.clone()), Datum::Bool(n) => Datum::Bool(n.clone()), Datum::Cons(n) => Datum::Cons(n.deep_copy()), - Datum::Symbol(n) => Datum::Symbol(n.clone()), Datum::Char(n) => Datum::Char(n.clone()), Datum::String(n) => Datum::String(n.clone()), Datum::Vector(n) => diff --git a/hyphae/src/vm.rs b/hyphae/src/vm.rs index 020fb91..7c247fe 100644 --- a/hyphae/src/vm.rs +++ b/hyphae/src/vm.rs @@ -255,7 +255,7 @@ impl VM { // stack ops i::PUSH => self.stack.push_current_stack( access!(&instr.1[0]).deep_copy()), - i::POP => _ = self.stack.pop_current_stack(), + i::POP => self.expr = self.stack.pop_current_stack(), i::ENTER => self.stack.add_stack(), i::EXIT => self.stack.destroy_top_stack(), @@ -326,7 +326,7 @@ impl VM { }; let Datum::Number(ref r) = **access!(&instr.1[1]) else { - e!("illgal argument to IDIV instruction"); + e!("illegal argument to IDIV instruction"); }; let Fraction(l, 1) = l.make_exact() else { @@ -562,12 +562,18 @@ impl VM { i::CONS => { let mut l = access!(&instr.1[0]).clone(); if let Datum::Cons(l) = l.deref_mut() { - l.append(access!(&instr.1[1]).clone()); + self.expr = Datum::Cons(l.deep_copy()).into(); } else { - access!(&instr.1[0], Datum::Cons(Cons( - Some(l), - Some(access!(&instr.1[1]).clone()) - )).into()); + self.expr = Datum::Cons(Cons( + Some(l.clone()), + None + )).into(); + } + + if let Datum::Cons(l) = l.deref_mut() { + l.append(access!(&instr.1[1]).deep_copy()); + } else { + e!("cons instruction expression register consistency"); } }, diff --git a/hyphae/vm.toml b/hyphae/vm.toml new file mode 100644 index 0000000..b445a25 --- /dev/null +++ b/hyphae/vm.toml @@ -0,0 +1,835 @@ +description = """ +HyphaeVM is a bytecode VM that aims to provide a simplified instruction set to +language implementors and other programmers who wish to use higher level +features without making too many compromises on overhead or performance. + +The simplified instruction set greatly reduces the work in language design and +allows for simpler compilers overall. Meanwhile, the VM still meets performance +needs for modern application development. + +HyphaeVM contains an instruction set, instruction set implementation, garbage +collection (reference counting), error handling, dynamic number package, vector +based data types, cons cell based dynamic data types, trap functions that +are programmatically extendable, as well as faux-registers for mutable access +to datum in an otherwise immutable stack based VM. +""" + +datum = """ +HyphaeVM instructions operate on Datum. A Datum can hold one of many data types +(see data types). The Datum type is implemented as a union type over each +data type's underlying form. Each Datum as stored in the VM is reference +counted. Each Datum will be automatically deallocated when it is no longer +referenced anywhere in the VM state. + +Given that datum are reference counted it is possible to make both shallow and +deep copies to a source datum (see instructions: link and dupl). Information on +whether a datum is a shallow or deep copy of another datum is not accessible at +runtime without custom trap functions. It is up to the programmer to track what +they themselves have created. + +Best of luck, friend. +""" + +error_handling = """ +The VM has fields for error_state and can store any given datum as an error. +Use the PANIC instruction to store an error, set the error state, and halt +HyphaeVM. +""" + +sym_table = """ +A symbol table is provided as part of HyphaeVM. It will map symbols to valid +address (see addressing modes). This is not provided for the implementation of +variables in languages. It is recommended that any {trans|com}piler implemented +for HyphaeVM reduce variables to Datum on the stack. However, the symbol table +is very useful for linking with library code or adding debug symbols to an +application. +""" + +traps = """ +HyphaeVM includes a trap vector. VM extenders can use this to store platform or +language specific functions that can then be called from bytecode. +""" + +[[registers]] +name = "expr" +description = """ +The expr register acts as a default return value store for instructions that +generate new data. Many instructions will set expr. Some instructions will even +use expr as an input. + +The expr register provides mutable access. +""" + +[[registers]] +name = "operand" +description = """ +There are four operand registers. These each can be used as a type of scratch +space for oeprating on Datum without pushing to or popping from the stack. + +The operand registers provide mutable access. +""" + +[[registers]] +name = "error" +description = """ +The error register is set by PANIC and is accessed by the VM to explain an +error state. + +The error register does not provide mutable access. +""" + +[[registers]] +name = "ictr" +description = """ +The ictr register acts as the well known "pc" register in many CPUs... With the +caveat that the program is indexed per instruction and not per byte. This is +because the VM has its own logic to deserialize instructions from bytecode so +there is no reason not to rule out a whole class of errors where a bad offset +causes the instruction loader to start loading with some operand. + +The ictr register does not hold a datum. Just an underlying native unsigned +integer (usize). +""" + +[[data_types]] +name = "number" +description = """ +The dynamic number type is defined in the 'Organelle' package. It is a number +built to enable implementation of the Scheme R7RS "small" specification. The +number type may be stored with any variety of underlying implementation. + +NOTE: The number type is currently undergoing a redesign and will be +reimplemented as a more efficient and predictable type. +""" + +[[data_types]] +name = "string" +description = """ +The string type is implemented by a vector of bytes. It implements a superset +of the functionality that a bytevector implements. +""" + +[[data_types]] +name = "bool" +description = """ +The boolean type is implemented as whatever Rust chooses to represent it. +""" + +[[data_types]] +name = "cons" +description = """ +The cons cell is implemented as a pair of datum. This can contain any type in +either field. Data is referenced and not fully encapsulated within this type. +The cons cell can be used to create linkedlists, or any other dynamic data type +that relies on heap allocated units. +""" + +[[data_types]] +name = "char" +description = "a single byte" + +[[data_types]] +name = "vector" +description = """ +A vector is a list of Datum stored in a contiguous block of memory. It is +represented by the Rust Vector type. +""" + +[[data_types]] +name = "ByteVector" +description = "A bytevector is a vector that only contains individual bytes" + +[[data_types]] +name = "None" +description = """ +The none datum is a null type. It is not checkable or creatable by any +instruction except clear. + +It is requested that programmers refrain from implementing custom traps to use +this type. Doing so is in incredibly bad form. If one is finding themselves +attempting to use None datums it is advised that they rethink their program +logic. +""" + +[[addressing_modes]] +name = "expression" +mutable = true +symbol = "$expr" +example = "inc $expr" +description = """ +The expression register is used as a default output, or input by many +instructions (see registers). +""" + +[[addressing_modes]] +name = "operand" +mutable = true +symbol = "$oper" +example = "add $oper1, $oper2" +description = """ +There are four operand registers N=(0, 1, 2, 3, and 4) (see registers). +""" + +[[addressing_modes]] +name = "stack" +mutable = false +symbol = "%N" +example = "dupl %0, $expr" +description = """ +Stack addressing mode takes an index (N). This index is used to get the Nth +element from the top of the stack. + +Keep in mind that any push instruction will then shift the element that a given +stack index refers to. +""" + +[[addressing_modes]] +name = "instruction" +mutable = false +symbol = "@N" +example = "jmp @100" +description = """ +Instruction addressing takes an index (N). The index represents the Nth +instruction in the program. Given how deserialization works in HyphaeVM, this +index does not have to account for operands... just instructions. +""" + +[[addressing_modes]] +name = "numeric" +mutable = false +symbol = "N" +example = "const $expr, 100" +description = """ +Numeric addressing mode accepts a single unsigned 8 bit integer as an argument. + +Not many instructions will read constants. Most will require that you use the +CONST instruction to construct a real datum for use in the program. +""" + +[[addressing_modes]] +name = "character" +mutable = false +symbol = "'N'" +example = "const $expr, 'c'" +description = """ +Character addressing mode accepts a single character as an argument. + +Not many instructions will read constants. Most will require that you use the +CONST instruction to construct a real datum for use in the program. +""" + +[[addressing_modes]] +name = "boolean" +mutable = false +symbol = "{true|false}" +example = "const $expr, true" +description = """ +Boolean addressing mode accepts a single character as an argument. + +Not many instructions will read constants. Most will require that you use the +CONST instruction to construct a real datum for use in the program. +""" + +[[instructions]] +name = "trap" +args = ["index"] +output = "result of function" +description = """ +The trap instruction will accept as its argument only a numeric constant. +This constant will be used as an index into the VM trap vector. Once accessed, +the VM triggers the corresponding callback, which may vastly mutate VM state. + +Will halt VM with error state if input is not a valid index into trap vector. +""" + +[[instructions]] +name = "bind" +args = ["name", "operand"] +output = "" +description = """ +The bind instruction will accept only a string datum as its name input. It +then maps the name to whatever address the operand input references in the VMs +symbol table. +""" + +[[instructions]] +name = "unbind" +args = ["name"] +output = "" +description = """ +The unbind instruction will accept only a string datum as its name operand. It +then removes the mapping that corresponds to name from the VMs symbol table. +""" + +[[instructions]] +name = "bound" +args = ["name"] +output = "expr = true if name is bound" +description = """ +The bound instruction will accept only a string datum as its name operand. It +will test if the name is already bound in the VMs symbol table. The expression +register will be set to a boolean datum representing whether or not the name is +bound. +""" + +[[instructions]] +name = "push" +args = ["operand"] +output = "" +description = """ +The push instruction accepts one operand of any type. It will push a deep copy +of the input onto the VM's stack. +""" + +[[instructions]] +name = "pop" +args = [] +output = "first datum on top of stack" +description = """ +The pop instruction removes the first element at the top of the VMs stack. The +expression register is set to the element returned in this manner. +""" + +[[instructions]] +name = "enter" +args = [] +output = "" +description = """ +The enter instruction creates a new stack frame. Subsequent push instructions +apply new elements to a separate stack that corresponds to this frame. Stack +indexes will still access across all frames as if they were one unified stack. +""" + +[[instructions]] +name = "exit" +args = [] +output = "" +description = """ +The exit instruction deletes current stack frame. All information is simply +discarded. The stack fragment corresponding to the previous stack frame is then +subject to subsequent push or pop operations. + +Together, enter and exit are useful for making sure that a dynamic routine that +makes use of the stack is properly cleaned up after. +""" + +[[instructions]] +name = "link" +args = ["src", "dest"] +output = "" +description = """ +The link instruction shallow copies the src operand into the destination that +the dst operand specifies. Shallow copy of source operand increases its +reference count. + +Destination operand requires mutable access. + +For more information on shallow vs deep copy see datum. +""" + +[[instructions]] +name = "dupl" +args = ["src", "dest"] +output = "" +description = """ +The dupl instruction deep copies the src operand into the destination that the +dst operand specifies. + +Destination operand requires mutable access. + +For more information on shallow vs deep copy see datum. +""" + +[[instructions]] +name = "clear" +args = ["dest"] +output = "" +description = """ +The clear instruction sets whatever destination is specified by its operand to +a None datum. + +Destination operand requires mutable access. + +Please do not use the clear instruction to try to work with None datum. It is +provided for cleanup/cleanliness purposes. This can be used to destroy a +shallow copy, decreasing its reference count. +""" + +[[instructions]] +name = "nop" +args = [] +output = "" +description = "no operation" + +[[instructions]] +name = "halt" +args = [] +output = "" +description = """ +The halt instruction sets the VM running state to false. This halts the VM. +""" + +[[instructions]] +name = "panic" +args = ["error"] +output = "" +description = """ +The panic instruction accepts an error operand and shallow copies it into the +error register. Then, error_state flag in the VM is set and the VM is halted. +""" + +[[instructions]] +name = "jmp" +args = ["addr"] +output = "" +description = """ +The jump (jmp) instruction accepts only an instruction addres (see addressing +modes). It sets the ictr register to the referenced instruction index. +""" + +[[instructions]] +name = "jmpif" +args = ["addr"] +output = "" +description = """ +The jump (jmp) instruction accepts only an instruction addres (see addressing +modes). It sets the ictr register to the referenced instruction index if and +only if the expression register holds a boolean true value... So make sure to +set the expression register. +""" + +[[instructions]] +name = "eq" +args = ["a", "b"] +output = "a == b" +description = """ +The eq instruction performs an equality test and sets the expression register +to the resulting boolean value. In this case "equality" is set by the Rust +PartialEq trait logic as derived across the datum type (hyphae/src/heap.rs). +""" + +[[instructions]] +name = "lt" +args = ["a", "b"] +output = "a < b" +description = """ +The lt instruction accepts two number datum and performs a numeric less than +test. The expression register is set to a boolean value based on whether the +first input is strictly less than the second input. +""" + +[[instructions]] +name = "gt" +args = ["a", "b"] +output = "a > b" +description = """ +The gt instruction accepts two number datum and performs a numeric greater than +test. The expression register is set to a boolean value based on whether the +first input is strictly greater than the second input. +""" + +[[instructions]] +name = "lte" +args = ["a", "b"] +output = "a <= b" +description = """ +The lte instruction accepts two number datum and performs a numeric less than +equals test. The expression register is set to a boolean value based on whether +the first input is less than or equal to the second input. +""" + +[[instructions]] +name = "gte" +args = ["a", "b"] +output = "a >= b" +description = """ +The gte instruction accepts two number datum and performs a numeric greater +than equals test. The expression register is set to a boolean value based on if +the first input is greater than or equal to the second input. +""" + +[[instructions]] +name = "bool_not" +args = [] +output = "expr = !expr" +description = """ +The bool_not instruction reads the expression register, expecting a boolean +value. It then writes the opposite boolean value back into the expression +register. +""" + +[[instructions]] +name = "bool_and" +args = ["a", "b"] +output = "a && b" +description = """ +The bool_and instruction accepts two operands, both of which must be boolean +datum. Bool_and writes the result of a boolean and operation on both of these +inputs to the expression register. +""" + +[[instructions]] +name = "bool_or" +args = ["a", "b"] +output = "a || b" +description = """ +The bool_or instruction accepts two operands, both of which must be boolean +datum. Bool_or writes the result of a boolean or operation on both of these +inputs to the expression register. +""" + +[[instructions]] +name = "byte_and" +args = ["a", "b"] +output = "a & b" +description = """ +The byte_and instruction accepts two character operands. This operation writes +the expression register the result of bitwise and on both operands. The +resulting type in the expression register is a character. +""" + +[[instructions]] +name = "byte_or" +args = ["a", "b"] +output = "a | b" +description = """ +The byte_or instruction accepts two character operands. This operation writes +the expression register the result of bitwise or on both operands. The output +stored in the expression register is a character. +""" + +[[instructions]] +name = "xor" +args = ["a", "b"] +output = "a xor b" +description = """ +The xor instruction accepts two character operands. This operation writes to +the expression register the result of a bitwise exclusive or operation on both +inputs. The resulting datum in the expression register is of type character. +""" + +[[instructions]] +name = "byte_not" +args = [] +output = "expr = !expr" +description = """ +The byte_not instruction reads the contents of the expression register, which +is expected to contain a character value. It then writes the corresponding +bitwise not character back to the expression register. +""" + +[[instructions]] +name = "add" +args = ["a", "b"] +output = "a + b" +description = """ +The add instruction accepts two number inputs and writes the sum of both to the +expression register. +""" + +[[instructions]] +name = "sub" +args = ["a", "b"] +output = "a - b" +description = """ +The sub instruction accepts two number inputs and writes the difference of the +last from the first into the expression register. +""" + +[[instructions]] +name = "mul" +args = ["a", "b"] +output = "a * b" +description = """ +The mul instruction accepts two number inputs and writes their product to the +expression register. +""" + +[[instructions]] +name = "fdiv" +args = ["a", "b"] +output = "a / b" +description = """ +The fdiv instruction accepts two number inputs and writes the quotient of the +first divided by the second to the expression register. + +This is a float division operation. +""" + +[[instructions]] +name = "idiv" +args = ["a", "b"] +output = "a / b" +description = """ +The fdiv instruction accepts two number inputs and writes the quotient of the +first divided by the second to the expression register. + +This is an integer division operation. +Instruction will halt VM with error state if non integer inputs are provided. +""" + +[[instructions]] +name = "pow" +args = ["a", "b"] +output = "a ^ b" +description = """ +The pow instruction accepts two number inputs and writes the result of taking +the first to the power of the second to the expression register. +""" + +[[instructions]] +name = "modulo" +args = ["a", "b"] +output = "a % b" +description = """ +The modulo instruction accepts two number inputs and writes the result of the +first modulo the second to the expression register. +""" + +[[instructions]] +name = "rem" +args = ["a", "b"] +output = "remainder from a / b" +description = """ +The rem instruction accepts two number inputs, performs integer division on +them, determines the remainder of this operation, and writes it to the +expression register. +""" + +[[instructions]] +name = "inc" +args = ["src"] +output = "" +description = """ +The inc instruction accepts a single number input. The number input is directly +overwritten with itself incremented by one. + +Requires mutable access to input address. +""" + +[[instructions]] +name = "dec" +args = ["src"] +output = "" +description = """ +The dec instruction accepts a single number input. The number input is directly +overwritten with itself deccremented by one. + +Requires mutable access to input address. +""" + +[[instructions]] +name = "ctos" +args = ["src"] +output = "" +description = """ +The ctos instruction accepts a single character input. This operand is +overwritten with a string datum that contains the operand. + +Requires mutable access to input address. +""" + +[[instructions]] +name = "cton" +args = ["src"] +output = "" +description = """ +The cton instruction accepts a single character input. This operand is +overwritten with a number datum that represents the value formerly held in the +character byte. + +Requires mutable access to input address. +""" + +[[instructions]] +name = "ntoc" +args = ["src"] +output = "" +description = """ +The ntoc instruction accepts a single number input. This operand is overwritten +with a character datum that holds the byte representing the input number. + +Will halt VM with error state if the input number is not a positive number in +8 bit range, or if the input number is not an integer. + +Requires mutable access to input address. +""" + +[[instructions]] +name = "ntoi" +args = ["src"] +output = "" +description = """ +The ntoi instruction accepts a single number input. This operand is overwritten +by a new number datum that represents the inexact form of the source number. + +The inexact form is a normalization of fraction or scientific notation datum to +float datum. + +Requires mutable access to input address. +""" + +[[instructions]] +name = "ntoe" +args = ["src"] +output = "" +description = """ +The ntoe instruction accepts a single number input. This operand is overwritten +by a new number datum that represents the exact form of the source number. + +The exact form is a normalization of float or scientific notation datum into +fraction datum. + +Rational approximation is not yet implemented in the organelle number library. +Attempting to convert a float *with a decimal* will result in the VM crashing +due to an umimplemented!() macro in organelle. + +Requires mutable access to input address. +""" + +[[instructions]] +name = "const" +args = ["dst", "data"] +output = "" +description = """ +The const instruction will accept constant number, bool or char data as a data +operand. It will set the destination operand to a freshly allocated datum +corresponding to the data input. + +Requires mutable access to destination operand. +""" + +[[instructions]] +name = "mkvec" +args = [] +output = "a blank vector" +description = """ +The mkvec instruction sets the expression register to a new (blank) vector +datum. +""" + +[[instructions]] +name = "mkbvec" +args = [] +output = "a blank bytevector" +description = """ +The mkbvec instruction sets the expression register to a new (blank) bytevector +datum. +""" + +[[instructions]] +name = "mkstr" +args = [] +output = "an empty string" +description = """ +The mkstr instruction sets the expression register to a new (blank) string +datum. +""" + +[[instructions]] +name = "index" +args = ["collection", "index"] +output = "collection[index]" +description = """ +The index instruction accepts any collection datum (string, vector, bytevector, +cons cell) as well as an index (number datum). The instruction sets the +expression register to the corresponding element from the given collection at +the given index. +""" + +[[instructions]] +name = "length" +args = ["collection"] +output = "length of collection" +description = """ +The length instruction takes any collection datum (string, vector, bytevector, +cons cell) and sets the expression register to a number datum holding the +length of the collection. +""" + +[[instructions]] +name = "subsl" +args = ["collection", "start", "end"] +output = "collection[start:end]" +description = """ +The subsl instruction takes any collection datum (string, vector, bytevector, +cons cell), as well as two number index datum (start and end). The expression +register is set to the subset of the collection starting at index start and +ending at index end. + +This instruction panics if start or end are not positive whole numbers. +""" + +[[instructions]] +name = "inser" +args = ["collection", "elem", "idx"] +output = "" +description = """ +The inser instruction accepts any non listcollection datum (string, vector, +bytevector) as well as a number index and an element datum. The collection is +modified in place by inserting the element into it at the provided index. + +The instruction panics if the index is not a valid whole positive number. The +instruction will also panic if a datum of any type other than character is +inserted into a bytevector or string. Vectors can contain any element. + +Requires mutable access to the collection operand. +""" + +[[instructions]] +name = "cons" +args = ["left", "right"] +output = "resulting collection" +description = """ +The cons instruction accepts two datum of any types. If the first (left) +element is of type cons cell it is deep copied into the expression register. + +Otherwise, a new cons list is generated in the expression register containing +the left element. + +Finally, the right element is appended to whatever list is in the expression +register. +""" + +[[instructions]] +name = "car" +args = ["list"] +output = "returns first element in cons cell" +description = """ +The car instruction takes a cons cell and returns a shallow copy (pointer) to +the first element in the cons cell. The expression register is set to the +shallow copy. +""" + +[[instructions]] +name = "cdr" +args = ["list"] +output = "returns last element in cons cell" +description = """ +The car instruction takes a cons cell and returns a shallow copy (pointer) to +the second element in the cons cell. The expression register is set to the +shallow copy. +""" + +[[instructions]] +name = "concat" +args = ["string_l", "string_r"] +output = "string_l+string_r" +description = """ +The concat instruction accepts two string datum. It sets the expression +register to the result of concatenating the second string to the end of the +first string. +""" + +[[instructions]] +name = "s_append" +args = ["parent", "child"] +output = "" +description = """ +The s_append instruction accepts two datum, a parent and a child. The parent +datum is expected to be of type string, and the child datum is expected to be +of type character. The string is modified in place by appending the character +to the end of it. + +Requires mutable access to the parent operand. +"""