From 244d57d4ef7a16d7d5e88100c0423141c243e82a Mon Sep 17 00:00:00 2001 From: Ava Affine Date: Thu, 14 Aug 2025 07:20:03 +0000 Subject: [PATCH] WIP: elaborate on Hyphae in instructions.toml Signed-off-by: Ava Affine --- hyphae/instructions.toml | 530 ++++++++++++++++++++++++++++++++++----- hyphae/src/heap.rs | 3 - hyphae/src/vm.rs | 4 +- 3 files changed, 465 insertions(+), 72 deletions(-) diff --git a/hyphae/instructions.toml b/hyphae/instructions.toml index 9fd0b38..bfa34e9 100644 --- a/hyphae/instructions.toml +++ b/hyphae/instructions.toml @@ -1,127 +1,359 @@ -# TODO: add the following info -# - introductory VM info (description, list of components) -# - info on the different data types -# - info on garbage collection -# - info on program execution -# - info on error handling -# - info on traps -# - info on numbers -# - info on symtable (and its uses) +description = """ +HyphaeVM is a bytecode VM that aims to provide a simplified instruction set to +language implementors and other programmers who wish to use higher level +features without making too many compromises on overhead or performance. + +The simplified instruction set greatly reduces the work in language design and +allows for simpler compilers overall. Meanwhile, the VM still meets performance +needs for modern application development. + +HyphaeVM contains an instruction set, instruction set implementation, garbage +collection (reference counting), error handling, dynamic number package, vector +based data types, cons cell based dynamic data types, trap functions that +are programmatically extendable, as well as faux-registers for mutable access +to datum in an otherwise immutable stack based VM. +""" + +datum = """ +HyphaeVM instructions operate on Datum. A Datum can hold one of many data types +(see data types). The Datum type is implemented as a union type over each +data type's underlying form. Each Datum as stored in the VM is reference +counted. Each Datum will be automatically deallocated when it is no longer +referenced anywhere in the VM state. + +Given that datum are reference counted it is possible to make both shallow and +deep copies to a source datum (see instructions: link and dupl). Information on +whether a datum is a shallow or deep copy of another datum is not accessible at +runtime without custom trap functions. It is up to the programmer to track what +they themselves have created. + +Best of luck, friend. +""" + +error_handling = """ +The VM has fields for error_state and can store any given datum as an error. +Use the PANIC instruction to store an error, set the error state, and halt +HyphaeVM. +""" + +sym_table = """ +A symbol table is provided as part of HyphaeVM. It will map symbols to valid +address (see addressing modes). This is not provided for the implementation of +variables in languages. It is recommended that any {trans|com}piler implemented +for HyphaeVM reduce variables to Datum on the stack. However, the symbol table +is very useful for linking with library code or adding debug symbols to an +application. +""" + +traps = """ +HyphaeVM includes a trap vector. VM extenders can use this to store platform or +language specific functions that can then be called from bytecode. +""" + +[[registers]] +name = "expr" +description = """ +The expr register acts as a default return value store for instructions that +generate new data. Many instructions will set expr. Some instructions will even +use expr as an input. + +The expr register provides mutable access. +""" + +[[registers]] +name = "operand" +description = """ +There are four operand registers. These each can be used as a type of scratch +space for oeprating on Datum without pushing to or popping from the stack. + +The operand registers provide mutable access. +""" + +[[registers]] +name = "error" +description = """ +The error register is set by PANIC and is accessed by the VM to explain an +error state. + +The error register does not provide mutable access. +""" + +[[registers]] +name = "ictr" +description = """ +The ictr register acts as the well known "pc" register in many CPUs... With the +caveat that the program is indexed per instruction and not per byte. This is +because the VM has its own logic to deserialize instructions from bytecode so +there is no reason not to rule out a whole class of errors where a bad offset +causes the instruction loader to start loading with some operand. + +The ictr register does not hold a datum. Just an underlying native unsigned +integer (usize). +""" + +[[data_types]] +name = "number" +description = """ +The dynamic number type is defined in the 'Organelle' package. It is a number +built to enable implementation of the Scheme R7RS "small" specification. The +number type may be stored with any variety of underlying implementation. + +NOTE: The number type is currently undergoing a redesign and will be +reimplemented as a more efficient and predictable type. +""" + +[[data_types]] +name = "string" +description = """ +The string type is implemented by a vector of bytes. It implements a superset +of the functionality that a bytevector implements. +""" + +[[data_types]] +name = "bool" +description = """ +The boolean type is implemented as whatever Rust chooses to represent it. +""" + +[[data_types]] +name = "cons" +description = """ +The cons cell is implemented as a pair of datum. This can contain any type in +either field. Data is referenced and not fully encapsulated within this type. +The cons cell can be used to create linkedlists, or any other dynamic data type +that relies on heap allocated units. +""" + +[[data_types]] +name = "char" +description = "a single byte" + +[[data_types]] +name = "vector" +description = """ +A vector is a list of Datum stored in a contiguous block of memory. It is +represented by the Rust Vector type. +""" + +[[data_types]] +name = "ByteVector" +description = "A bytevector is a vector that only contains individual bytes" + +[[data_types]] +name = "None" +description = """ +The none datum is a null type. It is not checkable or creatable by any +instruction except clear. + +It is requested that programmers refrain from implementing custom traps to use +this type. Doing so is in incredibly bad form. If one is finding themselves +attempting to use None datums it is advised that they rethink their program +logic. +""" [[addressing_modes]] -name = "expr" +name = "expression" mutable = true symbol = "$expr" example = "inc $expr" -description = "The expression register is used as a default output, or input by many instructions." +description = """ +The expression register is used as a default output, or input by many +instructions (see registers). +""" [[addressing_modes]] name = "operand" mutable = true symbol = "$oper" example = "add $oper1, $oper2" -description = "There are four operand registers N=(0, 1, 2, 3, and 4). They are for storing mutable data." +description = """ +There are four operand registers N=(0, 1, 2, 3, and 4) (see registers). +""" [[addressing_modes]] name = "stack" mutable = false symbol = "%N" example = "dupl %0, $expr" -description = "Stack addressing mode takes an index in to the stack to read from." +description = """ +Stack addressing mode takes an index (N). This index is used to get the Nth +element from the top of the stack. + +Keep in mind that any push instruction will then shift the element that a given +stack index refers to. +""" [[addressing_modes]] name = "instruction" mutable = false symbol = "@N" example = "jmp @100" -description = "Instruction addressing mode indexes by instruction into the program." +description = """ +Instruction addressing takes an index (N). The index represents the Nth +instruction in the program. Given how deserialization works in HyphaeVM, this +index does not have to account for operands... just instructions. +""" [[addressing_modes]] name = "numeric" mutable = false symbol = "N" example = "const $expr, 100" -description = "Numeric addressing mode provides read only integer constants to instructions" +description = """ +Numeric addressing mode accepts a single unsigned 8 bit integer as an argument. + +Not many instructions will read constants. Most will require that you use the +CONST instruction to construct a real datum for use in the program. +""" [[addressing_modes]] -name = "char" +name = "character" mutable = false symbol = "'N'" example = "const $expr, 'c'" -description = "Char addressing mode provides read only character constants to instructions" +description = """ +Character addressing mode accepts a single character as an argument. + +Not many instructions will read constants. Most will require that you use the +CONST instruction to construct a real datum for use in the program. +""" [[addressing_modes]] name = "boolean" mutable = false symbol = "{true|false}" example = "const $expr, true" -description = "Boolean addressing mode provides read only booleans to instructions" +description = """ +Boolean addressing mode accepts a single character as an argument. + +Not many instructions will read constants. Most will require that you use the +CONST instruction to construct a real datum for use in the program. +""" [[instructions]] name = "trap" args = ["index"] output = "result of function" -description = "triggers callback in trap vector at index" +description = """ +The trap instruction will accept as its argument only a numeric constant. +This constant will be used as an index into the VM trap vector. Once accessed, +the VM triggers the corresponding callback, which may vastly mutate VM state. + +Will halt VM with error state if input is not a valid index into trap vector. +""" [[instructions]] name = "bind" args = ["name", "operand"] output = "" -description = "map name to operand in sym table." +description = """ +The bind instruction will accept only a string datum as its name input. It +then maps the name to whatever address the operand input references in the VMs +symbol table. +""" [[instructions]] name = "unbind" args = ["name"] output = "" -description = "remove name mapping from sym table." +description = """ +The unbind instruction will accept only a string datum as its name operand. It +then removes the mapping that corresponds to name from the VMs symbol table. +""" [[instructions]] name = "bound" args = ["name"] output = "expr = true if name is bound" -description = "test if a name is already bound" +description = """ +The bound instruction will accept only a string datum as its name operand. It +will test if the name is already bound in the VMs symbol table. The expression +register will be set to a boolean datum representing whether or not the name is +bound. +""" [[instructions]] name = "push" args = ["operand"] output = "" -description = "pushes deep copy of operand onto stack." +description = """ +The push instruction accepts one operand of any type. It will push a deep copy +of the input onto the VM's stack. +""" [[instructions]] name = "pop" args = [] -output = "" -description = "removes element at top of stack." +output = "first datum on top of stack" +description = """ +The pop instruction removes the first element at the top of the VMs stack. The +expression register is set to the element returned in this manner. +""" [[instructions]] name = "enter" args = [] output = "" -description = "create new stack frame" +description = """ +The enter instruction creates a new stack frame. Subsequent push instructions +apply new elements to a separate stack that corresponds to this frame. Stack +indexes will still access across all frames as if they were one unified stack. +""" [[instructions]] name = "exit" args = [] output = "" -description = "delete current stack frame" +description = """ +The exit instruction deletes current stack frame. All information is simply +discarded. The stack fragment corresponding to the previous stack frame is then +subject to subsequent push or pop operations. + +Together, enter and exit are useful for making sure that a dynamic routine that +makes use of the stack is properly cleaned up after. +""" [[instructions]] name = "link" args = ["src", "dest"] output = "" -description = "shallow copies src into dest" +description = """ +The link instruction shallow copies the src operand into the destination that +the dst operand specifies. Shallow copy of source operand increases its +reference count. + +Destination operand requires mutable access. + +For more information on shallow vs deep copy see datum. +""" [[instructions]] name = "dupl" args = ["src", "dest"] output = "" -description = "deep copies src into dest" +description = """ +The dupl instruction deep copies the src operand into the destination that the +dst operand specifies. + +Destination operand requires mutable access. + +For more information on shallow vs deep copy see datum. +""" [[instructions]] name = "clear" args = ["dest"] output = "" -description = "clears dest" +description = """ +The clear instruction sets whatever destination is specified by its operand to +a None datum. + +Destination operand requires mutable access. + +Please do not use the clear instruction to try to work with None datum. It is +provided for cleanup/cleanliness purposes. This can be used to destroy a +shallow copy, decreasing its reference count. +""" [[instructions]] name = "nop" @@ -133,223 +365,387 @@ description = "no operation" name = "halt" args = [] output = "" -description = "halts the VM" +description = """ +The halt instruction sets the VM running state to false. This halts the VM. +""" [[instructions]] name = "panic" args = ["error"] output = "" -description = "sets error state and halts VM" +description = """ +The panic instruction accepts an error operand and shallow copies it into the +error register. Then, error_state flag in the VM is set and the VM is halted. +""" [[instructions]] name = "jmp" args = ["addr"] output = "" -description = "sets ictr register to addr" +description = """ +The jump (jmp) instruction accepts only an instruction addres (see addressing +modes). It sets the ictr register to the referenced instruction index. +""" [[instructions]] name = "jmpif" args = ["addr"] output = "" -description = "if expr register holds true, sets ictr to addr" +description = """ +The jump (jmp) instruction accepts only an instruction addres (see addressing +modes). It sets the ictr register to the referenced instruction index if and +only if the expression register holds a boolean true value... So make sure to +set the expression register. +""" [[instructions]] name = "eq" args = ["a", "b"] output = "a == b" -description = "equality test" +description = """ +The eq instruction performs an equality test and sets the expression register +to the resulting boolean value. In this case "equality" is set by the Rust +PartialEq trait logic as derived across the datum type (hyphae/src/heap.rs). +""" [[instructions]] name = "lt" args = ["a", "b"] output = "a < b" -description = "less than test" +description = """ +The lt instruction accepts two number datum and performs a numeric less than +test. The expression register is set to a boolean value based on whether the +first input is strictly less than the second input. +""" [[instructions]] name = "gt" args = ["a", "b"] output = "a > b" -description = "greater than test" +description = """ +The gt instruction accepts two number datum and performs a numeric greater than +test. The expression register is set to a boolean value based on whether the +first input is strictly greater than the second input. +""" [[instructions]] name = "lte" args = ["a", "b"] output = "a <= b" -description = "less than equals test" +description = """ +The lte instruction accepts two number datum and performs a numeric less than +equals test. The expression register is set to a boolean value based on whether +the first input is less than or equal to the second input. +""" [[instructions]] name = "gte" args = ["a", "b"] output = "a >= b" -description = "greater than equals test" +description = """ +The gte instruction accepts two number datum and performs a numeric greater +than equals test. The expression register is set to a boolean value based on if +the first input is greater than or equal to the second input. +""" [[instructions]] name = "bool_not" args = [] output = "expr = !expr" -description = "boolean not" +description = """ +The bool_not instruction reads the expression register, expecting a boolean +value. It then writes the opposite boolean value back into the expression +register. +""" [[instructions]] name = "bool_and" args = ["a", "b"] output = "a && b" -description = "boolean and" +description = """ +The bool_and instruction accepts two operands, both of which must be boolean +datum. Bool_and writes the result of a boolean and operation on both of these +inputs to the expression register. +""" [[instructions]] name = "bool_or" args = ["a", "b"] output = "a || b" -description = "boolean or" +description = """ +The bool_or instruction accepts two operands, both of which must be boolean +datum. Bool_or writes the result of a boolean or operation on both of these +inputs to the expression register. +""" [[instructions]] name = "byte_and" args = ["a", "b"] output = "a & b" -description = "bitwise and" +description = """ +The byte_and instruction accepts two character operands. This operation writes +the expression register the result of bitwise and on both operands. The +resulting type in the expression register is a character. +""" [[instructions]] name = "byte_or" args = ["a", "b"] output = "a | b" -description = "bitwise or" +description = """ +The byte_or instruction accepts two character operands. This operation writes +the expression register the result of bitwise or on both operands. The output +stored in the expression register is a character. +""" [[instructions]] name = "xor" args = ["a", "b"] output = "a xor b" -description = "bitwise exclusive or" +description = """ +The xor instruction accepts two character operands. This operation writes to +the expression register the result of a bitwise exclusive or operation on both +inputs. The resulting datum in the expression register is of type character. +""" [[instructions]] name = "byte_not" args = [] output = "expr = !expr" -description = "bitwise not" +description = """ +The byte_not instruction reads the contents of the expression register, which +is expected to contain a character value. It then writes the corresponding +bitwise not character back to the expression register. +""" [[instructions]] name = "add" args = ["a", "b"] output = "a + b" -description = "numeric addition" +description = """ +The add instruction accepts two number inputs and writes the sum of both to the +expression register. +""" [[instructions]] name = "sub" args = ["a", "b"] output = "a - b" -description = "numeric subtraction" +description = """ +The sub instruction accepts two number inputs and writes the difference of the +last from the first into the expression register. +""" [[instructions]] name = "mul" args = ["a", "b"] output = "a * b" -description = "numeric multiplication" +description = """ +The mul instruction accepts two number inputs and writes their product to the +expression register. +""" [[instructions]] name = "fdiv" args = ["a", "b"] output = "a / b" -description = "numeric FLOAT division" +description = """ +The fdiv instruction accepts two number inputs and writes the quotient of the +first divided by the second to the expression register. + +This is a float division operation. +""" [[instructions]] name = "idiv" args = ["a", "b"] output = "a / b" -description = "numeric INTEGER division" +description = """ +The fdiv instruction accepts two number inputs and writes the quotient of the +first divided by the second to the expression register. + +This is an integer division operation. +Instruction will halt VM with error state if non integer inputs are provided. +""" [[instructions]] name = "pow" args = ["a", "b"] output = "a ^ b" -description = "numeric operation to raise a to the power of b" +description = """ +The pow instruction accepts two number inputs and writes the result of taking +the first to the power of the second to the expression register. +""" [[instructions]] name = "modulo" args = ["a", "b"] output = "a % b" -description = "numeric modulo operation" +description = """ +The modulo instruction accepts two number inputs and writes the result of the +first modulo the second to the expression register. +""" [[instructions]] name = "rem" args = ["a", "b"] output = "remainder from a / b" -description = "remainder from integer division" +description = """ +The rem instruction accepts two number inputs, performs integer division on +them, determines the remainder of this operation, and writes it to the +expression register. +""" [[instructions]] name = "inc" args = ["src"] output = "" -description = "increments number at source" +description = """ +The inc instruction accepts a single number input. The number input is directly +overwritten with itself incremented by one. + +Requires mutable access to input address. +""" [[instructions]] name = "dec" args = ["src"] output = "" -description = "decrements number at source" +description = """ +The dec instruction accepts a single number input. The number input is directly +overwritten with itself deccremented by one. + +Requires mutable access to input address. +""" [[instructions]] name = "ctos" args = ["src"] output = "" -description = "mutates a char datum into a string datum" +description = """ +The ctos instruction accepts a single character input. This operand is +overwritten with a string datum that contains the operand. + +Requires mutable access to input address. +""" [[instructions]] name = "cton" args = ["src"] output = "" -description = "mutates a char datum into a number datum" +description = """ +The cton instruction accepts a single character input. This operand is +overwritten with a number datum that represents the value formerly held in the +character byte. + +Requires mutable access to input address. +""" [[instructions]] name = "ntoc" args = ["src"] output = "" -description = "mutates a number datum into a char datum" +description = """ +The ntoc instruction accepts a single number input. This operand is overwritten +with a character datum that holds the byte representing the input number. + +Will halt VM with error state if the input number is not a positive number in +8 bit range, or if the input number is not an integer. + +Requires mutable access to input address. +""" [[instructions]] name = "ntoi" args = ["src"] output = "" -description = "mutates a number datum into its exact form" +description = """ +The ntoi instruction accepts a single number input. This operand is overwritten +by a new number datum that represents the inexact form of the source number. + +The inexact form is a normalization of fraction or scientific notation datum to +float datum. + +Requires mutable access to input address. +""" [[instructions]] name = "ntoe" args = ["src"] output = "" -description = "mutates a number datum into its inexact form" +description = """ +The ntoe instruction accepts a single number input. This operand is overwritten +by a new number datum that represents the exact form of the source number. + +The exact form is a normalization of float or scientific notation datum into +fraction datum. + +Rational approximation is not yet implemented in the organelle number library. +Attempting to convert a float *with a decimal* will result in the VM crashing +due to an umimplemented!() macro in organelle. + +Requires mutable access to input address. +""" [[instructions]] name = "const" args = ["dst", "data"] output = "" -description = "sets dst location to constant integer data" +description = """ +The const instruction will accept constant number, bool or char data as a data +operand. It will set the destination operand to a freshly allocated datum +corresponding to the data input. + +Requires mutable access to destination operand. +""" [[instructions]] name = "mkvec" args = [] output = "a blank vector" -description = "creates a new vector" +description = """ +The mkvec instruction sets the expression register to a new (blank) vector +datum. +""" [[instructions]] name = "mkbvec" args = [] output = "a blank bytevector" -description = "creates a blank bytevector" +description = """ +The mkbvec instruction sets the expression register to a new (blank) bytevector +datum. +""" [[instructions]] name = "mkstr" args = [] output = "an empty string" -description = "creates a new empty string" +description = """ +The mkstr instruction sets the expression register to a new (blank) string +datum. +""" [[instructions]] name = "index" args = ["collection", "index"] output = "collection[index]" -description = "extracts element from collection at index" +description = """ +The index instruction accepts any collection datum (string, vector, bytevector, +cons cell) as well as an index (number datum). The instruction sets the +expression register to the corresponding element from the given collection at +the given index. +""" [[instructions]] name = "length" args = ["collection"] output = "length of collection" -description = "calculates length of collection" +description = """ +The length instruction accepts any collection datum (string, vector, bytevector, +cons cell) and sets the expression register to a number datum holding the +length of the collection. +""" [[instructions]] name = "subsl" diff --git a/hyphae/src/heap.rs b/hyphae/src/heap.rs index b670f80..46f4aab 100644 --- a/hyphae/src/heap.rs +++ b/hyphae/src/heap.rs @@ -22,7 +22,6 @@ use alloc::rc::Rc; use alloc::vec::Vec; use alloc::boxed::Box; use alloc::fmt::Debug; -use alloc::string::String; use organelle::Number; @@ -147,7 +146,6 @@ pub enum Datum { Number(Number), Bool(bool), Cons(Cons), - Symbol(String), Char(u8), String(Vec), Vector(Vec>), @@ -162,7 +160,6 @@ impl Clone for Datum { Datum::Number(n) => Datum::Number(n.clone()), Datum::Bool(n) => Datum::Bool(n.clone()), Datum::Cons(n) => Datum::Cons(n.deep_copy()), - Datum::Symbol(n) => Datum::Symbol(n.clone()), Datum::Char(n) => Datum::Char(n.clone()), Datum::String(n) => Datum::String(n.clone()), Datum::Vector(n) => diff --git a/hyphae/src/vm.rs b/hyphae/src/vm.rs index 020fb91..494ed3e 100644 --- a/hyphae/src/vm.rs +++ b/hyphae/src/vm.rs @@ -255,7 +255,7 @@ impl VM { // stack ops i::PUSH => self.stack.push_current_stack( access!(&instr.1[0]).deep_copy()), - i::POP => _ = self.stack.pop_current_stack(), + i::POP => self.expr = self.stack.pop_current_stack(), i::ENTER => self.stack.add_stack(), i::EXIT => self.stack.destroy_top_stack(), @@ -326,7 +326,7 @@ impl VM { }; let Datum::Number(ref r) = **access!(&instr.1[1]) else { - e!("illgal argument to IDIV instruction"); + e!("illegal argument to IDIV instruction"); }; let Fraction(l, 1) = l.make_exact() else {