description = """ HyphaeVM is a bytecode VM that aims to provide a simplified instruction set to language implementors and other programmers who wish to use higher level features without making too many compromises on overhead or performance. The simplified instruction set greatly reduces the work in language design and allows for simpler compilers overall. Meanwhile, the VM still meets performance needs for modern application development. HyphaeVM contains an instruction set, instruction set implementation, garbage collection (reference counting), error handling, dynamic number package, vector based data types, cons cell based dynamic data types, trap functions that are programmatically extendable, as well as faux-registers for mutable access to datum in an otherwise immutable stack based VM. """ datum = """ HyphaeVM instructions operate on Datum. A Datum can hold one of many data types (see data types). The Datum type is implemented as a union type over each data type's underlying form. Each Datum as stored in the VM is reference counted. Each Datum will be automatically deallocated when it is no longer referenced anywhere in the VM state. Given that datum are reference counted it is possible to make both shallow and deep copies to a source datum (see instructions: link and dupl). Information on whether a datum is a shallow or deep copy of another datum is not accessible at runtime without custom trap functions. It is up to the programmer to track what they themselves have created. Best of luck, friend. """ error_handling = """ The VM has fields for error_state and can store any given datum as an error. Use the PANIC instruction to store an error, set the error state, and halt HyphaeVM. """ sym_table = """ A symbol table is provided as part of HyphaeVM. It will map symbols to valid address (see addressing modes). This is not provided for the implementation of variables in languages. It is recommended that any {trans|com}piler implemented for HyphaeVM reduce variables to Datum on the stack. However, the symbol table is very useful for linking with library code or adding debug symbols to an application. """ traps = """ HyphaeVM includes a trap vector. VM extenders can use this to store platform or language specific functions that can then be called from bytecode. """ [[registers]] name = "expr" description = """ The expr register acts as a default return value store for instructions that generate new data. Many instructions will set expr. Some instructions will even use expr as an input. The expr register provides mutable access. """ [[registers]] name = "operand" description = """ There are four operand registers. These each can be used as a type of scratch space for oeprating on Datum without pushing to or popping from the stack. The operand registers provide mutable access. """ [[registers]] name = "error" description = """ The error register is set by PANIC and is accessed by the VM to explain an error state. The error register does not provide mutable access. """ [[registers]] name = "ictr" description = """ The ictr register acts as the well known "pc" register in many CPUs... With the caveat that the program is indexed per instruction and not per byte. This is because the VM has its own logic to deserialize instructions from bytecode so there is no reason not to rule out a whole class of errors where a bad offset causes the instruction loader to start loading with some operand. The ictr register does not hold a datum. Just an underlying native unsigned integer (usize). """ [[data_types]] name = "number" description = """ The dynamic number type is defined in the 'Organelle' package. It is a number built to enable implementation of the Scheme R7RS "small" specification. The number type may be stored with any variety of underlying implementation. NOTE: The number type is currently undergoing a redesign and will be reimplemented as a more efficient and predictable type. """ [[data_types]] name = "string" description = """ The string type is implemented by a vector of bytes. It implements a superset of the functionality that a bytevector implements. """ [[data_types]] name = "bool" description = """ The boolean type is implemented as whatever Rust chooses to represent it. """ [[data_types]] name = "cons" description = """ The cons cell is implemented as a pair of datum. This can contain any type in either field. Data is referenced and not fully encapsulated within this type. The cons cell can be used to create linkedlists, or any other dynamic data type that relies on heap allocated units. """ [[data_types]] name = "char" description = "a single byte" [[data_types]] name = "vector" description = """ A vector is a list of Datum stored in a contiguous block of memory. It is represented by the Rust Vector type. """ [[data_types]] name = "ByteVector" description = "A bytevector is a vector that only contains individual bytes" [[data_types]] name = "None" description = """ The none datum is a null type. It is not checkable or creatable by any instruction except clear. It is requested that programmers refrain from implementing custom traps to use this type. Doing so is in incredibly bad form. If one is finding themselves attempting to use None datums it is advised that they rethink their program logic. """ [[addressing_modes]] name = "expression" mutable = true symbol = "$expr" example = "inc $expr" description = """ The expression register is used as a default output, or input by many instructions (see registers). """ [[addressing_modes]] name = "operand" mutable = true symbol = "$oper" example = "add $oper1, $oper2" description = """ There are four operand registers N=(0, 1, 2, 3, and 4) (see registers). """ [[addressing_modes]] name = "stack" mutable = false symbol = "%N" example = "dupl %0, $expr" description = """ Stack addressing mode takes an index (N). This index is used to get the Nth element from the top of the stack. Keep in mind that any push instruction will then shift the element that a given stack index refers to. """ [[addressing_modes]] name = "instruction" mutable = false symbol = "@N" example = "jmp @100" description = """ Instruction addressing takes an index (N). The index represents the Nth instruction in the program. Given how deserialization works in HyphaeVM, this index does not have to account for operands... just instructions. """ [[addressing_modes]] name = "numeric" mutable = false symbol = "N" example = "const $expr, 100" description = """ Numeric addressing mode accepts a single unsigned 8 bit integer as an argument. Not many instructions will read constants. Most will require that you use the CONST instruction to construct a real datum for use in the program. """ [[addressing_modes]] name = "character" mutable = false symbol = "'N'" example = "const $expr, 'c'" description = """ Character addressing mode accepts a single character as an argument. Not many instructions will read constants. Most will require that you use the CONST instruction to construct a real datum for use in the program. """ [[addressing_modes]] name = "boolean" mutable = false symbol = "{true|false}" example = "const $expr, true" description = """ Boolean addressing mode accepts a single character as an argument. Not many instructions will read constants. Most will require that you use the CONST instruction to construct a real datum for use in the program. """ [[instructions]] name = "trap" args = ["index"] output = "result of function" description = """ The trap instruction will accept as its argument only a numeric constant. This constant will be used as an index into the VM trap vector. Once accessed, the VM triggers the corresponding callback, which may vastly mutate VM state. Will halt VM with error state if input is not a valid index into trap vector. """ [[instructions]] name = "bind" args = ["name", "operand"] output = "" description = """ The bind instruction will accept only a string datum as its name input. It then maps the name to whatever address the operand input references in the VMs symbol table. """ [[instructions]] name = "unbind" args = ["name"] output = "" description = """ The unbind instruction will accept only a string datum as its name operand. It then removes the mapping that corresponds to name from the VMs symbol table. """ [[instructions]] name = "bound" args = ["name"] output = "expr = true if name is bound" description = """ The bound instruction will accept only a string datum as its name operand. It will test if the name is already bound in the VMs symbol table. The expression register will be set to a boolean datum representing whether or not the name is bound. """ [[instructions]] name = "push" args = ["operand"] output = "" description = """ The push instruction accepts one operand of any type. It will push a deep copy of the input onto the VM's stack. """ [[instructions]] name = "pop" args = [] output = "first datum on top of stack" description = """ The pop instruction removes the first element at the top of the VMs stack. The expression register is set to the element returned in this manner. """ [[instructions]] name = "enter" args = [] output = "" description = """ The enter instruction creates a new stack frame. Subsequent push instructions apply new elements to a separate stack that corresponds to this frame. Stack indexes will still access across all frames as if they were one unified stack. """ [[instructions]] name = "exit" args = [] output = "" description = """ The exit instruction deletes current stack frame. All information is simply discarded. The stack fragment corresponding to the previous stack frame is then subject to subsequent push or pop operations. Together, enter and exit are useful for making sure that a dynamic routine that makes use of the stack is properly cleaned up after. """ [[instructions]] name = "link" args = ["src", "dest"] output = "" description = """ The link instruction shallow copies the src operand into the destination that the dst operand specifies. Shallow copy of source operand increases its reference count. Destination operand requires mutable access. For more information on shallow vs deep copy see datum. """ [[instructions]] name = "dupl" args = ["src", "dest"] output = "" description = """ The dupl instruction deep copies the src operand into the destination that the dst operand specifies. Destination operand requires mutable access. For more information on shallow vs deep copy see datum. """ [[instructions]] name = "clear" args = ["dest"] output = "" description = """ The clear instruction sets whatever destination is specified by its operand to a None datum. Destination operand requires mutable access. Please do not use the clear instruction to try to work with None datum. It is provided for cleanup/cleanliness purposes. This can be used to destroy a shallow copy, decreasing its reference count. """ [[instructions]] name = "nop" args = [] output = "" description = "no operation" [[instructions]] name = "halt" args = [] output = "" description = """ The halt instruction sets the VM running state to false. This halts the VM. """ [[instructions]] name = "panic" args = ["error"] output = "" description = """ The panic instruction accepts an error operand and shallow copies it into the error register. Then, error_state flag in the VM is set and the VM is halted. """ [[instructions]] name = "jmp" args = ["addr"] output = "" description = """ The jump (jmp) instruction accepts only an instruction addres (see addressing modes). It sets the ictr register to the referenced instruction index. """ [[instructions]] name = "jmpif" args = ["addr"] output = "" description = """ The jump (jmp) instruction accepts only an instruction addres (see addressing modes). It sets the ictr register to the referenced instruction index if and only if the expression register holds a boolean true value... So make sure to set the expression register. """ [[instructions]] name = "eq" args = ["a", "b"] output = "a == b" description = """ The eq instruction performs an equality test and sets the expression register to the resulting boolean value. In this case "equality" is set by the Rust PartialEq trait logic as derived across the datum type (hyphae/src/heap.rs). """ [[instructions]] name = "lt" args = ["a", "b"] output = "a < b" description = """ The lt instruction accepts two number datum and performs a numeric less than test. The expression register is set to a boolean value based on whether the first input is strictly less than the second input. """ [[instructions]] name = "gt" args = ["a", "b"] output = "a > b" description = """ The gt instruction accepts two number datum and performs a numeric greater than test. The expression register is set to a boolean value based on whether the first input is strictly greater than the second input. """ [[instructions]] name = "lte" args = ["a", "b"] output = "a <= b" description = """ The lte instruction accepts two number datum and performs a numeric less than equals test. The expression register is set to a boolean value based on whether the first input is less than or equal to the second input. """ [[instructions]] name = "gte" args = ["a", "b"] output = "a >= b" description = """ The gte instruction accepts two number datum and performs a numeric greater than equals test. The expression register is set to a boolean value based on if the first input is greater than or equal to the second input. """ [[instructions]] name = "bool_not" args = [] output = "expr = !expr" description = """ The bool_not instruction reads the expression register, expecting a boolean value. It then writes the opposite boolean value back into the expression register. """ [[instructions]] name = "bool_and" args = ["a", "b"] output = "a && b" description = """ The bool_and instruction accepts two operands, both of which must be boolean datum. Bool_and writes the result of a boolean and operation on both of these inputs to the expression register. """ [[instructions]] name = "bool_or" args = ["a", "b"] output = "a || b" description = """ The bool_or instruction accepts two operands, both of which must be boolean datum. Bool_or writes the result of a boolean or operation on both of these inputs to the expression register. """ [[instructions]] name = "byte_and" args = ["a", "b"] output = "a & b" description = """ The byte_and instruction accepts two character operands. This operation writes the expression register the result of bitwise and on both operands. The resulting type in the expression register is a character. """ [[instructions]] name = "byte_or" args = ["a", "b"] output = "a | b" description = """ The byte_or instruction accepts two character operands. This operation writes the expression register the result of bitwise or on both operands. The output stored in the expression register is a character. """ [[instructions]] name = "xor" args = ["a", "b"] output = "a xor b" description = """ The xor instruction accepts two character operands. This operation writes to the expression register the result of a bitwise exclusive or operation on both inputs. The resulting datum in the expression register is of type character. """ [[instructions]] name = "byte_not" args = [] output = "expr = !expr" description = """ The byte_not instruction reads the contents of the expression register, which is expected to contain a character value. It then writes the corresponding bitwise not character back to the expression register. """ [[instructions]] name = "add" args = ["a", "b"] output = "a + b" description = """ The add instruction accepts two number inputs and writes the sum of both to the expression register. """ [[instructions]] name = "sub" args = ["a", "b"] output = "a - b" description = """ The sub instruction accepts two number inputs and writes the difference of the last from the first into the expression register. """ [[instructions]] name = "mul" args = ["a", "b"] output = "a * b" description = """ The mul instruction accepts two number inputs and writes their product to the expression register. """ [[instructions]] name = "fdiv" args = ["a", "b"] output = "a / b" description = """ The fdiv instruction accepts two number inputs and writes the quotient of the first divided by the second to the expression register. This is a float division operation. """ [[instructions]] name = "idiv" args = ["a", "b"] output = "a / b" description = """ The fdiv instruction accepts two number inputs and writes the quotient of the first divided by the second to the expression register. This is an integer division operation. Instruction will halt VM with error state if non integer inputs are provided. """ [[instructions]] name = "pow" args = ["a", "b"] output = "a ^ b" description = """ The pow instruction accepts two number inputs and writes the result of taking the first to the power of the second to the expression register. """ [[instructions]] name = "modulo" args = ["a", "b"] output = "a % b" description = """ The modulo instruction accepts two number inputs and writes the result of the first modulo the second to the expression register. """ [[instructions]] name = "rem" args = ["a", "b"] output = "remainder from a / b" description = """ The rem instruction accepts two number inputs, performs integer division on them, determines the remainder of this operation, and writes it to the expression register. """ [[instructions]] name = "inc" args = ["src"] output = "" description = """ The inc instruction accepts a single number input. The number input is directly overwritten with itself incremented by one. Requires mutable access to input address. """ [[instructions]] name = "dec" args = ["src"] output = "" description = """ The dec instruction accepts a single number input. The number input is directly overwritten with itself deccremented by one. Requires mutable access to input address. """ [[instructions]] name = "ctos" args = ["src"] output = "" description = """ The ctos instruction accepts a single character input. This operand is overwritten with a string datum that contains the operand. Requires mutable access to input address. """ [[instructions]] name = "cton" args = ["src"] output = "" description = """ The cton instruction accepts a single character input. This operand is overwritten with a number datum that represents the value formerly held in the character byte. Requires mutable access to input address. """ [[instructions]] name = "ntoc" args = ["src"] output = "" description = """ The ntoc instruction accepts a single number input. This operand is overwritten with a character datum that holds the byte representing the input number. Will halt VM with error state if the input number is not a positive number in 8 bit range, or if the input number is not an integer. Requires mutable access to input address. """ [[instructions]] name = "ntoi" args = ["src"] output = "" description = """ The ntoi instruction accepts a single number input. This operand is overwritten by a new number datum that represents the inexact form of the source number. The inexact form is a normalization of fraction or scientific notation datum to float datum. Requires mutable access to input address. """ [[instructions]] name = "ntoe" args = ["src"] output = "" description = """ The ntoe instruction accepts a single number input. This operand is overwritten by a new number datum that represents the exact form of the source number. The exact form is a normalization of float or scientific notation datum into fraction datum. Rational approximation is not yet implemented in the organelle number library. Attempting to convert a float *with a decimal* will result in the VM crashing due to an umimplemented!() macro in organelle. Requires mutable access to input address. """ [[instructions]] name = "const" args = ["dst", "data"] output = "" description = """ The const instruction will accept constant number, bool or char data as a data operand. It will set the destination operand to a freshly allocated datum corresponding to the data input. Requires mutable access to destination operand. """ [[instructions]] name = "mkvec" args = [] output = "a blank vector" description = """ The mkvec instruction sets the expression register to a new (blank) vector datum. """ [[instructions]] name = "mkbvec" args = [] output = "a blank bytevector" description = """ The mkbvec instruction sets the expression register to a new (blank) bytevector datum. """ [[instructions]] name = "mkstr" args = [] output = "an empty string" description = """ The mkstr instruction sets the expression register to a new (blank) string datum. """ [[instructions]] name = "index" args = ["collection", "index"] output = "collection[index]" description = """ The index instruction accepts any collection datum (string, vector, bytevector, cons cell) as well as an index (number datum). The instruction sets the expression register to the corresponding element from the given collection at the given index. """ [[instructions]] name = "length" args = ["collection"] output = "length of collection" description = """ The length instruction takes any collection datum (string, vector, bytevector, cons cell) and sets the expression register to a number datum holding the length of the collection. """ [[instructions]] name = "subsl" args = ["collection", "start", "end"] output = "collection[start:end]" description = """ The subsl instruction takes any collection datum (string, vector, bytevector, cons cell), as well as two number index datum (start and end). The expression register is set to the subset of the collection starting at index start and ending at index end. This instruction panics if start or end are not positive whole numbers. """ [[instructions]] name = "inser" args = ["collection", "elem", "idx"] output = "" description = """ The inser instruction accepts any non listcollection datum (string, vector, bytevector) as well as a number index and an element datum. The collection is modified in place by inserting the element into it at the provided index. The instruction panics if the index is not a valid whole positive number. The instruction will also panic if a datum of any type other than character is inserted into a bytevector or string. Vectors can contain any element. Requires mutable access to the collection operand. """ [[instructions]] name = "cons" args = ["left", "right"] output = "resulting collection" description = """ The cons instruction accepts two datum of any types. If the first (left) element is of type cons cell it is deep copied into the expression register. Otherwise, a new cons list is generated in the expression register containing the left element. Finally, the right element is appended to whatever list is in the expression register. """ [[instructions]] name = "car" args = ["list"] output = "returns first element in cons cell" description = """ The car instruction takes a cons cell and returns a shallow copy (pointer) to the first element in the cons cell. The expression register is set to the shallow copy. """ [[instructions]] name = "cdr" args = ["list"] output = "returns last element in cons cell" description = """ The car instruction takes a cons cell and returns a shallow copy (pointer) to the second element in the cons cell. The expression register is set to the shallow copy. """ [[instructions]] name = "concat" args = ["string_l", "string_r"] output = "string_l+string_r" description = """ The concat instruction accepts two string datum. It sets the expression register to the result of concatenating the second string to the end of the first string. """ [[instructions]] name = "s_append" args = ["parent", "child"] output = "" description = """ The s_append instruction accepts two datum, a parent and a child. The parent datum is expected to be of type string, and the child datum is expected to be of type character. The string is modified in place by appending the character to the end of it. Requires mutable access to the parent operand. """