Add basic AST functionality

Add a parser grammar
Currently this is a subset of the grammar, enough to get reasonable work going.
2025-03-31 18:43:50 +02:00 · 2025-03-31 18:43:34 +02:00
56 changed files with 132 additions and 6979 deletions
@@ -1,2 +1,2 @@
 CompileFlags:
-  Add: ["-std=c23", "-x", "c", "-D_POSIX_C_SOURCE=200809L"]
+  Add: ["-std=c23", "-x", "c"]
@@ -16,10 +16,8 @@ jobs:
          echo "http://dl-cdn.alpinelinux.org/alpine/edge/main" >> /etc/apk/repositories
          echo "http://dl-cdn.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories
          # determine correct clang version and then install it
          apk update
-          RT_VERSION=$(apk search -v compiler-rt | grep -o "compiler-rt-[0-9]*" | head -1 | grep -o "[0-9]*")
+          apk add --no-cache llvm19 clang19 clang19-analyzer compiler-rt valgrind
          apk add --no-cache llvm${RT_VERSION} clang${RT_VERSION} clang${RT_VERSION}-analyzer compiler-rt valgrind
          # Verify versions
          echo "---------------------"
@@ -36,7 +34,3 @@ jobs:
      - name: make validate
        run: |
          make validate
      - name: make test
        run: |
          make test
@@ -1,5 +1,7 @@
 *.o
 *.d
 /core
-/build
+/oas
 /oas-asan
 /oas-msan
 /reports
@@ -1,46 +1,54 @@
-.PHONY: all clean distclean release debug afl asan msan validate analyze fuzz
+.PHONY: all clean clean-objects clean-reports run sanitize validate fuzz
-debug: 
+CC=clang
-	make -rRf make/debug.mk all
+LD=clang
 CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L
 LDFLAGS?=
-all: debug release afl asan msan
+SOURCES = $(shell find src/ -type f -name '*.c')
 OBJECTS = $(SOURCES:.c=.o)
 DEPENDENCIES = $(SOURCES:.c=.d)
 TARGET?=oas
 OUTPUTS=oas oas-asan oas-msan oas-afl
 RUNARGUMENTS?=-tokens tests/input/valid.asm
 all: $(TARGET)
-release: 
+run: $(TARGET)
-	make -rRf make/release.mk all
+	./$(TARGET) $(RUNARGUMENTS)
 afl:
 	make -rRf make/afl.mk all
 fuzz:
-	make -rRf make/afl.mk fuzz
+	make CC="afl-clang-fast" LD="afl-clang-fast" TARGET="oas-afl" clean-objects all
 	make clean-objects
 	mkdir -p reports/afl
 	afl-fuzz -i tests/input -o reports/afl -m none -- ./oas-afl -tokens @@
-asan:
+sanitize:
-	make -rRf make/asan.mk all
+	make CFLAGS="$(CFLAGS) -fsanitize=address,undefined" \
 		LDFLAGS="-fsanitize=address,undefined" \
 		TARGET="oas-asan" clean-objects all
 	make CFLAGS="$(CFLAGS) -fsanitize=memory -fsanitize-memory-track-origins=2" \
 		LDFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2" \
 		TARGET="oas-msan" clean-objects all 
 	make clean-objects
-msan:
+validate:
 	make -rRf make/msan.mk all
 validate: asan msan debug release
 	./validate.sh
-analyze:
+$(TARGET): $(OBJECTS)
-	make -rRf make/analyze.mk clean all
+	$(LD) $(LDFLAGS) -o $@ $^
-test:
+%.o: %.c
-	make -rRf make/test.mk test
+	$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
-clean:
+-include $(DEPENDENCIES)
 	make -rRf make/release.mk clean
 	make -rRf make/debug.mk clean
 	make -rRf make/afl.mk clean
 	make -rRf make/msan.mk clean
 	make -rRf make/asan.mk clean
 	make -rRf make/analyze.mk clean
 	make -rRf make/test.mk clean
 	rm -rf build/
-distclean: clean
+clean-objects:
-	make -rRf make/afl.mk distclean
+	rm -f $(OBJECTS) $(DEPENDENCIES)
-	make -rRf make/analyze.mk distclean
+
 clean-reports:
 	rm -rf reports/
 clean: clean-objects
 	rm -f $(TARGET) $(OUTPUTS)
@@ -1,29 +0,0 @@
 # Building
 To build oas in the default configuration you just need (gnu) make and a
 sufficiently modern clang.
 ```
 make
 ```
 ## Make targets
 There are a number of make targets available to build various instrumented
 builds that are used in validation, analysis and sanitizing. Some of these may
 require extra dependencies.
 - `debug`: Creates the debug build in `build/debug`. This is the default target.
 - `all`: Builds all binary executable targets. These are
   `debug`, `release`, `msan`, `asan` and `afl`. All executables can be found
   in `build/` in a subdirectory matching their target names.
 - `release`: Creates the release build in `build/release`
 - `afl`: Creates a build with AFL++ instrumentation for fuzzing
 - `fuzz`: Starts the fuzzer with the instrumented afl executable
 - `asan`: builds with the address and undefined clang sanitizers
 - `msan`: builds with the memory clang sanitizer
 - `validate`: Builds `debug`, `msan`, and `asan` targets, then runs the
   validation script. This script executes the sanitizer targets and runs
   Valgrind on the debug target across multiple modes and test input files.
@@ -1,24 +1,24 @@
 /* string literals are lexer identifier tokens with that particular value */
 <program>   ::= <statement>*
-<statement> ::= <label> | <directive> | <instruction> | <newline>
+<statement> ::= ( <label> | <directive> | <instruction> ) <newline>
 <label> ::= <identifier> <colon>
-<directive> ::= <dot> (<section_directive> | <export_directive> | <import_directive> ) <newline>
+<directive> ::= <dot> <section>
-<section_directive> ::= "section" <identifier>
+<section>   ::= "section" <identifier>
-<export_directive> ::= "export" <identifier>
+<instruction> ::= <identifier> <operands>
-<import_directive> ::= "import" <identifier>
+<operands> ::= <operand> ( <comma> <operands> )*
 <instruction> ::= <identifier> <operands> <newline>
 <operands> ::= <operand> ( <comma> <operand> )*
 <operand>  ::= <register> | <immediate> | <memory>
-<immediate> ::= <number> | <label_reference>
+<register> ::= <register_base> | <register_extra>
 <register_base> ::= "rax" | "rbx" | "rcx" | "rdx" | "rsi" | "rdi" | "rbp" | "rsp"
 <register_extra> ::= "r8" | "r9" | "r10" | "r11" | "r12" | "r13" | "r14" | "r15" 
 <immediate> ::= <number> | <label_reference>
 <number> ::= <octal> | <binary> | <decimal> | <hexadecimal>
 <label_reference> ::= <identifier>
@@ -34,10 +34,3 @@
 <register_offset> ::= <plus_or_minus> <number>
 <plus_or_minus> ::= <plus> | <minus>
 /* These are lexer identifiers with the correct string value */
 <section> ::= "section"
 <register> ::= "rax" | "rbx" | "rcx" | "rdx" | "rsi" | "rdi" | "rbp" | "rsp" |
 "r8" | "r9" | "r10" | "r11" | "r12" | "r13" | "r14" | "r15"
@@ -1,14 +0,0 @@
 .PHONY: fuzz distclean
 CC=afl-clang-fast
 LD=afl-clang-fast
 BUILD_DIR=build/afl/
 -include make/base.mk
 fuzz: $(BUILD_DIR)$(TARGET)
 	mkdir -p reports/afl
 	afl-fuzz -i tests/input -o reports/afl -m none -- ./$< -tokens @@
 distclean: clean
 	rm -rf reports/afl
@@ -1,9 +0,0 @@
 BUILD_DIR=build/analyze/
 -include make/base.mk
 analyze:
 	mkdir -p reports/static-analysis
 	scan-build -o reports/static-analysis/ -plist-html --status-bugs make -rRf make/analyze.mk all
 distclean: clean
 	rm -rf reports/static-analysis
@@ -1,5 +0,0 @@
 CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fsanitize=address,undefined
 LDFLAGS=-fsanitize=address,undefined
 BUILD_DIR=build/asan/
 -include make/base.mk
@@ -1,27 +0,0 @@
 .PHONY: all clean
 CC?=clang
 LD?=clang
 CFLAGS?=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L
 LDFLAGS?=
 BUILD_DIR?=build/debug/
 SOURCES?=$(shell find src/ -type f -name '*.c')
 OBJECTS=$(patsubst %.c,$(BUILD_DIR)%.o,$(SOURCES))
 DEPENDENCIES=$(OBJECTS:.o=.d)
 TARGET?=oas
 all: $(BUILD_DIR)$(TARGET)
 $(BUILD_DIR)$(TARGET): $(OBJECTS)
 	$(LD) $(LDFLAGS) -o $@ $^
 $(BUILD_DIR)%.o: %.c
 	mkdir -p $(dir $@)
 	$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
 -include $(DEPENDENCIES)
 clean:
 	rm -rf $(BUILD_DIR)
@@ -1 +0,0 @@
 -include make/base.mk
@@ -1,5 +0,0 @@
 CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fsanitize=memory
 LDFLAGS=-fsanitize=memory
 BUILD_DIR=build/msan/
 -include make/base.mk
@@ -1,5 +0,0 @@
 CFLAGS?=-Wall -Wextra -Wpedantic -Werror -O2 -std=c23 -flto -fomit-frame-pointer -DNDEBUG -D_POSIX_C_SOURCE=200809L
 LDFLAGS?=-flto -s -Wl,--gc-sections
 BUILD_DIR?=build/release/
 -include make/base.mk
@@ -1,21 +0,0 @@
 .PHONY: test
 CFLAGS?=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fprofile-instr-generate -fcoverage-mapping
 LDFLAGS?=-fprofile-instr-generate
 BUILD_DIR=build/test/
 TARGET=oas-tests
 SOURCES = $(filter-out src/main.c, $(shell find src/ tests/ -type f -name '*.c'))
 -include make/base.mk
 test: $(BUILD_DIR)$(TARGET)
 	mkdir -p reports/coverage
 	LLVM_PROFILE_FILE="reports/coverage/tests.profraw" $(BUILD_DIR)$(TARGET)
 	llvm-profdata merge -sparse reports/coverage/tests.profraw -o reports/coverage/tests.profdata
 	llvm-cov show $(BUILD_DIR)$(TARGET) -instr-profile=reports/coverage/tests.profdata -format=html -output-dir=reports/coverage/html -ignore-filename-regex="tests/.*"
 	@echo "--"
 	@echo "Test coverage:"
 	@echo "file://$$(realpath reports/coverage/html/index.html)"
 	@echo "--"
 clean:
 	rm -rf reports/coverage
@@ -1,9 +1,8 @@
 #include "ast.h"
 #include "error.h"
 #include <assert.h>
 #include <string.h>
-error_t *const err_ast_children_cap = &(error_t){
+error_t *err_node_children_cap = &(error_t){
    .message = "Failed to increase ast node children, max capacity reached"};
 error_t *ast_node_alloc(ast_node_t **output) {
@@ -17,15 +16,20 @@ error_t *ast_node_alloc(ast_node_t **output) {
    return nullptr;
 }
 void ast_node_free_value(ast_node_t *node) {
    // TODO: decide how value ownership will work and clean it up here
 }
 void ast_node_free(ast_node_t *node) {
    if (node == nullptr)
        return;
    if (node->children) {
        for (size_t i = 0; i < node->len; ++i)
            ast_node_free(node->children[i]);
        free(node->children);
    }
    ast_node_free_value(node);
    memset(node, 0, sizeof(ast_node_t));
    free(node);
 }
@@ -44,7 +48,7 @@ error_t *ast_node_alloc_children(ast_node_t *node) {
 error_t *ast_node_grow_cap(ast_node_t *node) {
    if (node->cap >= node_max_children_cap) {
-        return err_ast_children_cap;
+        return err_node_children_cap;
    }
    size_t new_cap = node->cap * 2;
@@ -78,130 +82,3 @@ error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child) {
    return nullptr;
 }
 const char *ast_node_id_to_cstr(node_id_t id) {
    switch (id) {
    case NODE_INVALID:
        return "NODE_INVALID";
    case NODE_PROGRAM:
        return "NODE_PROGRAM";
    case NODE_STATEMENT:
        return "NODE_STATEMENT";
    case NODE_LABEL:
        return "NODE_LABEL";
    case NODE_DIRECTIVE:
        return "NODE_DIRECTIVE";
    case NODE_INSTRUCTION:
        return "NODE_INSTRUCTION";
    case NODE_OPERANDS:
        return "NODE_OPERANDS";
    case NODE_OPERAND:
        return "NODE_OPERAND";
    case NODE_IMMEDIATE:
        return "NODE_IMMEDIATE";
    case NODE_MEMORY:
        return "NODE_MEMORY";
    case NODE_NUMBER:
        return "NODE_NUMBER";
    case NODE_LABEL_REFERENCE:
        return "NODE_LABEL_REFERENCE";
    case NODE_MEMORY_EXPRESSION:
        return "NODE_MEMORY_EXPRESSION";
    case NODE_REGISTER_EXPRESSION:
        return "NODE_REGISTER_EXPRESSION";
    case NODE_REGISTER_INDEX:
        return "NODE_REGISTER_INDEX";
    case NODE_REGISTER_OFFSET:
        return "NODE_REGISTER_OFFSET";
    case NODE_PLUS_OR_MINUS:
        return "NODE_PLUS_OR_MINUS";
    case NODE_SECTION_DIRECTIVE:
        return "NODE_SECTION_DIRECTIVE";
    case NODE_IMPORT_DIRECTIVE:
        return "NODE_IMPORT_DIRECTIVE";
    case NODE_EXPORT_DIRECTIVE:
        return "NODE_EXPORT_DIRECTIVE";
    case NODE_REGISTER:
        return "NODE_REGISTER";
    case NODE_SECTION:
        return "NODE_SECTION";
    case NODE_IDENTIFIER:
        return "NODE_IDENTIFIER";
    case NODE_DECIMAL:
        return "NODE_DECIMAL";
    case NODE_HEXADECIMAL:
        return "NODE_HEXADECIMAL";
    case NODE_OCTAL:
        return "NODE_OCTAL";
    case NODE_BINARY:
        return "NODE_BINARY";
    case NODE_CHAR:
        return "NODE_CHAR";
    case NODE_STRING:
        return "NODE_STRING";
    case NODE_COLON:
        return "NODE_COLON";
    case NODE_COMMA:
        return "NODE_COMMA";
    case NODE_LBRACKET:
        return "NODE_LBRACKET";
    case NODE_RBRACKET:
        return "NODE_RBRACKET";
    case NODE_PLUS:
        return "NODE_PLUS";
    case NODE_MINUS:
        return "NODE_MINUS";
    case NODE_ASTERISK:
        return "NODE_ASTERISK";
    case NODE_DOT:
        return "NODE_DOT";
    case NODE_NEWLINE:
        return "NODE_NEWLINE";
    case NODE_IMPORT:
        return "NODE_IMPORT";
    case NODE_EXPORT:
        return "NODE_EXPORT";
    }
    assert(!"Unreachable, weird node id" && id);
    __builtin_unreachable();
 }
 static void ast_node_print_internal(ast_node_t *node, int indent) {
    if (node == NULL) {
        return;
    }
    for (int i = 0; i < indent; i++) {
        printf("  ");
    }
    printf("%s", ast_node_id_to_cstr(node->id));
    if (node->token_entry && node->token_entry->token.value &&
        node->id != NODE_NEWLINE) {
        printf(" \"%s\"", node->token_entry->token.value);
    }
    printf("\n");
    for (size_t i = 0; i < node->len; i++) {
        ast_node_print_internal(node->children[i], indent + 1);
    }
 }
 void ast_node_print(ast_node_t *node) {
    ast_node_print_internal(node, 0);
 }
 void ast_node_prune(ast_node_t *node, node_id_t id) {
    size_t new_len = 0;
    for (size_t i = 0; i < node->len; i++) {
        auto child = node->children[i];
        if (child->id == id) {
            ast_node_free(child);
            continue;
        }
        ast_node_prune(child, id);
        node->children[new_len] = child;
        new_len++;
    }
    node->len = new_len;
 }
@@ -1,62 +1,16 @@
 #ifndef INCLUDE_SRC_AST_H_
 #define INCLUDE_SRC_AST_H_
 #include "data/registers.h"
 #include "error.h"
 #include "lexer.h"
 #include "tokenlist.h"
 #include <assert.h>
 #include <stddef.h>
 #include <stdint.h>
 extern error_t *const err_ast_children_cap;
 typedef enum node_id {
    NODE_INVALID,
    NODE_PROGRAM,
    NODE_STATEMENT,
    NODE_LABEL,
    NODE_DIRECTIVE,
-    NODE_INSTRUCTION,
+    NODE_LABEL,
-    NODE_OPERANDS,
+    NODE_INSTRUCTION
    NODE_OPERAND,
    NODE_IMMEDIATE,
    NODE_MEMORY,
    NODE_NUMBER,
    NODE_LABEL_REFERENCE,
    NODE_MEMORY_EXPRESSION,
    NODE_REGISTER_EXPRESSION,
    NODE_REGISTER_INDEX,
    NODE_REGISTER_OFFSET,
    NODE_PLUS_OR_MINUS,
    NODE_SECTION_DIRECTIVE,
    NODE_IMPORT_DIRECTIVE,
    NODE_EXPORT_DIRECTIVE,
    // Validated primitives
    NODE_REGISTER,
    NODE_SECTION,
    NODE_IMPORT,
    NODE_EXPORT,
    // Primitive nodes
    NODE_IDENTIFIER,
    NODE_DECIMAL,
    NODE_HEXADECIMAL,
    NODE_OCTAL,
    NODE_BINARY,
    NODE_CHAR,
    NODE_STRING,
    NODE_COLON,
    NODE_COMMA,
    NODE_LBRACKET,
    NODE_RBRACKET,
    NODE_PLUS,
    NODE_MINUS,
    NODE_ASTERISK,
    NODE_DOT,
    NODE_NEWLINE,
 } node_id_t;
 typedef struct ast_node ast_node_t;
@@ -65,78 +19,22 @@ constexpr size_t node_default_children_cap = 8;
 /* 65K ought to be enough for anybody */
 constexpr size_t node_max_children_cap = 1 << 16;
 typedef struct number {
    uint64_t value;
    operand_size_t size;
 } number_t;
 typedef struct register_ {
    register_id_t id;
    operand_size_t size;
 } register_t;
 typedef struct opcode_encoding {
    uint8_t buffer[32];
    size_t len;
 } opcode_encoding_t;
 typedef struct instruction {
    bool has_reference;
    opcode_encoding_t encoding;
    int64_t address;
 } instruction_t;
 typedef struct reference {
    int64_t offset;
    int64_t address;
    operand_size_t size;
 } reference_t;
 typedef struct {
    int64_t address;
 } label_t;
 struct ast_node {
    node_id_t id;
-    tokenlist_entry_t *token_entry;
+    lexer_token_t *token;
    size_t len;
    size_t cap;
    ast_node_t **children;
    union {
-        register_t reg;
+        struct {
-        number_t number;
+            uint64_t value;
-        instruction_t instruction;
+            int size;
-        reference_t reference;
+        } integer;
-        label_t label;
+        char *name;
    } value;
 };
 static inline register_t *ast_node_register_value(ast_node_t *node) {
    assert(node->id == NODE_REGISTER);
    return &node->value.reg;
 }
 static inline number_t *ast_node_number_value(ast_node_t *node) {
    assert(node->id == NODE_NUMBER);
    return &node->value.number;
 }
 static inline instruction_t *ast_node_instruction_value(ast_node_t *node) {
    assert(node->id == NODE_INSTRUCTION);
    return &node->value.instruction;
 }
 static inline reference_t *ast_node_reference_value(ast_node_t *node) {
    assert(node->id == NODE_LABEL_REFERENCE);
    return &node->value.reference;
 }
 static inline label_t *ast_node_label_value(ast_node_t *node) {
    assert(node->id == NODE_LABEL);
    return &node->value.label;
 }
 /**
 * @brief Allocates a new AST node
 *
@@ -171,28 +69,4 @@ void ast_node_free(ast_node_t *node);
 */
 error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child);
 /**
 * @brief Prints an AST starting from the given node
 *
 * Prints a representation of the AST with indentation to show structure.
 * Each node's type is shown, and if a node has an associated token value,
 * that value is printed in quotes.
 *
 * @param node The root node of the AST to print
 */
 void ast_node_print(ast_node_t *node);
 /**
 * Prune the children with a given id
 *
 * The tree is recursively visited and all child nodes of a given ID are pruned
 * completely. If a node has the giver id, it will get removed along wih all its
 * children, even if some of those children have different ids. The root node id
 * is never checked so the tree is guaranteed to remain and allocated valid.
 *
 * @param node The root of the tree you want to prune
 * @param id The id of the nodes you want to prune
 */
 void ast_node_prune(ast_node_t *node, node_id_t id);
 #endif // INCLUDE_SRC_AST_H_
@@ -1,6 +0,0 @@
 #include "bytes.h"
 #include "error.h"
 error_t *const err_bytes_no_capacity = &(error_t){
    .message = "Not enough capacity in bytes buffer",
 };
@@ -1,60 +0,0 @@
 #ifndef INCLUDE_SRC_BYTES_H_
 #define INCLUDE_SRC_BYTES_H_
 #include "error.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
 extern error_t *const err_bytes_no_capacity;
 typedef struct bytes {
    size_t len;
    size_t cap;
    uint8_t buffer[];
 } bytes_t;
 #define LOCAL_BYTES_ANONYMOUS(N)                                               \
    &(struct {                                                                 \
        size_t len;                                                            \
        size_t cap;                                                            \
        uint8_t buffer[(N)];                                                   \
    }) {                                                                       \
        0, (N), {}                                                             \
    }
 #define LOCAL_BYTES(N) (bytes_t *)LOCAL_BYTES_ANONYMOUS(N);
 static inline error_t *bytes_append_uint8(bytes_t *bytes, uint8_t value) {
    if (bytes->len >= bytes->cap)
        return err_bytes_no_capacity;
    bytes->buffer[bytes->len++] = value;
    return nullptr;
 }
 static inline error_t *bytes_append_array(bytes_t *dst, size_t n,
                                          uint8_t buffer[static n]) {
    if (dst->len + n >= dst->cap)
        return err_bytes_no_capacity;
    memcpy(dst->buffer + dst->len, buffer, n);
    dst->len += n;
    return nullptr;
 }
 static inline error_t *bytes_append_bytes(bytes_t *dst, bytes_t *src) {
    return bytes_append_array(dst, src->len, src->buffer);
 }
 static inline error_t *bytes_append_uint16(bytes_t *dst, uint16_t value) {
    return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
 }
 static inline error_t *bytes_append_uint32(bytes_t *dst, uint32_t value) {
    return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
 }
 static inline error_t *bytes_append_uint64(bytes_t *dst, uint64_t value) {
    return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
 }
 #endif // INCLUDE_SRC_BYTES_H_
@@ -1,265 +0,0 @@
 #include "opcodes.h"
 // clang-format off
 opcode_data_t *const opcodes[] = {
    // RET
    &(opcode_data_t) {
        .mnemonic = "ret",
        .opcode = 0xC3,
        .opcode_extension = opcode_extension_none,
        .operand_count = 0,
    },
    // RET imm16
    &(opcode_data_t) {
        .mnemonic = "ret",
        .opcode = 0xC2,
        .opcode_extension = opcode_extension_none,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16 },
        },
    },
    // PUSH imm8
    &(opcode_data_t) {
        .mnemonic = "push",
        .opcode = 0x6A,
        .opcode_extension = opcode_extension_none,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_8},
        },
    },
    // PUSH imm16
    &(opcode_data_t) {
        .mnemonic = "push",
        .opcode = 0x68,
        .opcode_extension = opcode_extension_none,
        .operand_size_prefix = true,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16},
        },
    },
    // PUSH imm32
    &(opcode_data_t) {
        .mnemonic = "push",
        .opcode = 0x68,
        .opcode_extension = opcode_extension_none,
        .operand_size_prefix = false,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32},
        },
    },
    // PUSH reg16, 
    &(opcode_data_t) {
        .mnemonic = "push",
        .opcode = 0x50,
        .opcode_extension = opcode_extension_none,
        .encoding_class = ENCODING_OPCODE_REGISTER,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
        },
    },
    // PUSH reg64
    &(opcode_data_t) {
        .mnemonic = "push",
        .opcode = 0x50,
        .opcode_extension = opcode_extension_none,
        .encoding_class = ENCODING_OPCODE_REGISTER,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
        },
    },
    // NOT reg16
    &(opcode_data_t) {
        .mnemonic = "not",
        .opcode = 0xF7,
        .opcode_extension = 2,
        .operand_size_prefix = true,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
        },
    },
    // NOT reg32
    &(opcode_data_t) {
        .mnemonic = "not",
        .opcode = 0xF7,
        .opcode_extension = 2,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
        },
    },
    // NOT reg64
    &(opcode_data_t) {
        .mnemonic = "not",
        .opcode = 0xF7,
        .opcode_extension = 2,
        .rex_w_prefix = true,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
        },
    },
    // NEG reg16
    &(opcode_data_t) {
        .mnemonic = "neg",
        .opcode = 0xF7,
        .opcode_extension = 3,
        .operand_size_prefix = true,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
        },
    },
    // NEG reg32
    &(opcode_data_t) {
        .mnemonic = "neg",
        .opcode = 0xF7,
        .opcode_extension = 3,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
        },
    },
    // NEG reg64
    &(opcode_data_t) {
        .mnemonic = "neg",
        .opcode = 0xF7,
        .opcode_extension = 3,
        .rex_w_prefix = true,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
        },
    },
    // CALL rel32
    &(opcode_data_t) {
        .mnemonic = "call",
        .opcode = 0xE8,
        .opcode_extension = opcode_extension_none,
        .encoding_class = ENCODING_DEFAULT,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32 },
        },
    },
    // CALL reg64
    &(opcode_data_t) {
        .mnemonic = "call",
        .opcode = 0xFF,
        .opcode_extension = 2,
        .encoding_class = ENCODING_DEFAULT,
        .rex_w_prefix = true,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
        },
    },
    // CALL mem64
    &(opcode_data_t) {
        .mnemonic = "call",
        .opcode = 0xFF,
        .opcode_extension = 2,
        .encoding_class = ENCODING_DEFAULT,
        .rex_w_prefix = true,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_MEMORY, .size = OPERAND_SIZE_64 },
        },
    },
    // JMP rel8 (short jump)
    &(opcode_data_t) {
        .mnemonic = "jmp",
        .opcode = 0xEB,
        .opcode_extension = opcode_extension_none,
        .encoding_class = ENCODING_DEFAULT,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_8 },
        },
    },
    // JMP rel16
    &(opcode_data_t) {
        .mnemonic = "jmp",
        .opcode = 0xE9,
        .opcode_extension = opcode_extension_none,
        .encoding_class = ENCODING_DEFAULT,
        .operand_size_prefix = true,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16 },
        },
    },
    // JMP reg16
    &(opcode_data_t) {
        .mnemonic = "jmp",
        .opcode = 0xFF,
        .opcode_extension = 4,
        .encoding_class = ENCODING_DEFAULT,
        .operand_size_prefix = true,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
        },
    },
    // JMP rel32 (near jump)
    &(opcode_data_t) {
        .mnemonic = "jmp",
        .opcode = 0xE9,
        .opcode_extension = opcode_extension_none,
        .encoding_class = ENCODING_DEFAULT,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32 },
        },
    },
    // JMP reg32
    &(opcode_data_t) {
        .mnemonic = "jmp",
        .opcode = 0xFF,
        .opcode_extension = 4,
        .encoding_class = ENCODING_DEFAULT,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
        },
    },
    // JMP reg64
    &(opcode_data_t) {
        .mnemonic = "jmp",
        .opcode = 0xFF,
        .opcode_extension = 4,
        .encoding_class = ENCODING_DEFAULT,
        .rex_w_prefix = true,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
        },
    },
    // JMP mem64
    &(opcode_data_t) {
        .mnemonic = "jmp",
        .opcode = 0xFF,
        .opcode_extension = 4,
        .encoding_class = ENCODING_DEFAULT,
        .rex_w_prefix = true,
        .operand_count = 1,
        .operands = {
            { .kind = OPERAND_MEMORY, .size = OPERAND_SIZE_64 },
        },
    },
    nullptr,
 };
@@ -1,56 +0,0 @@
 #ifndef INCLUDE_DATA_OPCODES_H_
 #define INCLUDE_DATA_OPCODES_H_
 #include "../data/registers.h"
 #include <stddef.h>
 #include <stdint.h>
 constexpr uint8_t rex_prefix = 0x40;
 constexpr uint8_t rex_prefix_w = 0x48;
 constexpr uint8_t rex_prefix_r = 0x44;
 constexpr uint8_t rex_prefix_x = 0x42;
 constexpr uint8_t rex_prefix_b = 0x41;
 constexpr uint8_t operand_size_prefix = 0x66;
 constexpr uint8_t memory_size_prefix = 0x67;
 constexpr uint8_t lock_prefix = 0xF0;
 constexpr uint8_t repne_prefix = 0xF2;
 constexpr uint8_t rep_prefix = 0xF3;
 typedef enum encoding_class {
    ENCODING_DEFAULT,         // use modrm+sib for registers and memory, append
                              // immediates
    ENCODING_OPCODE_REGISTER, // encode the register in the last 3 bits of the
                              // opcode
 } encoding_class_t;
 typedef enum operand_kind {
    OPERAND_REGISTER,
    OPERAND_MEMORY,
    OPERAND_IMMEDIATE,
 } operand_kind_t;
 typedef struct operand_info {
    operand_kind_t kind;
    operand_size_t size;
 } operand_info_t;
 constexpr uint8_t opcode_extension_none = 0xFF;
 typedef struct opcode_data {
    const char *mnemonic;
    uint16_t opcode;
    uint8_t opcode_extension; // 3 bits for the opcode extension in the reg
                              // field of a modr/m byte
    encoding_class_t encoding_class;
    bool operand_size_prefix;
    bool address_size_prefix;
    bool rex_w_prefix;
    size_t operand_count;
    operand_info_t operands[3];
 } opcode_data_t;
 extern opcode_data_t *const opcodes[];
 #endif // INCLUDE_DATA_OPCODES_H_
@@ -1,92 +0,0 @@
 #include "registers.h"
 register_data_t *const registers[] = {
    // Instruction pointer
    &(register_data_t){"rip",  REG_RIP, OPERAND_SIZE_64},
    &(register_data_t){"eip",  REG_RIP, OPERAND_SIZE_32},
    &(register_data_t){"ip",   REG_RIP, OPERAND_SIZE_16},
    // 64-bit general purpose registers
    &(register_data_t){"rax",  REG_A,   OPERAND_SIZE_64},
    &(register_data_t){"rcx",  REG_C,   OPERAND_SIZE_64},
    &(register_data_t){"rdx",  REG_D,   OPERAND_SIZE_64},
    &(register_data_t){"rbx",  REG_B,   OPERAND_SIZE_64},
    &(register_data_t){"rsp",  REG_SP,  OPERAND_SIZE_64},
    &(register_data_t){"rbp",  REG_BP,  OPERAND_SIZE_64},
    &(register_data_t){"rsi",  REG_SI,  OPERAND_SIZE_64},
    &(register_data_t){"rdi",  REG_DI,  OPERAND_SIZE_64},
    &(register_data_t){"r8",   REG_8,   OPERAND_SIZE_64},
    &(register_data_t){"r9",   REG_9,   OPERAND_SIZE_64},
    &(register_data_t){"r10",  REG_10,  OPERAND_SIZE_64},
    &(register_data_t){"r11",  REG_11,  OPERAND_SIZE_64},
    &(register_data_t){"r12",  REG_12,  OPERAND_SIZE_64},
    &(register_data_t){"r13",  REG_13,  OPERAND_SIZE_64},
    &(register_data_t){"r14",  REG_14,  OPERAND_SIZE_64},
    &(register_data_t){"r15",  REG_15,  OPERAND_SIZE_64},
    // 32-bit general purpose registers
    &(register_data_t){"eax",  REG_A,   OPERAND_SIZE_32},
    &(register_data_t){"ecx",  REG_C,   OPERAND_SIZE_32},
    &(register_data_t){"edx",  REG_D,   OPERAND_SIZE_32},
    &(register_data_t){"ebx",  REG_B,   OPERAND_SIZE_32},
    &(register_data_t){"esp",  REG_SP,  OPERAND_SIZE_32},
    &(register_data_t){"ebp",  REG_BP,  OPERAND_SIZE_32},
    &(register_data_t){"esi",  REG_SI,  OPERAND_SIZE_32},
    &(register_data_t){"edi",  REG_DI,  OPERAND_SIZE_32},
    &(register_data_t){"r8d",  REG_8,   OPERAND_SIZE_32},
    &(register_data_t){"r9d",  REG_9,   OPERAND_SIZE_32},
    &(register_data_t){"r10d", REG_10,  OPERAND_SIZE_32},
    &(register_data_t){"r11d", REG_11,  OPERAND_SIZE_32},
    &(register_data_t){"r12d", REG_12,  OPERAND_SIZE_32},
    &(register_data_t){"r13d", REG_13,  OPERAND_SIZE_32},
    &(register_data_t){"r14d", REG_14,  OPERAND_SIZE_32},
    &(register_data_t){"r15d", REG_15,  OPERAND_SIZE_32},
    // 16-bit general purpose registers
    &(register_data_t){"ax",   REG_A,   OPERAND_SIZE_16},
    &(register_data_t){"cx",   REG_C,   OPERAND_SIZE_16},
    &(register_data_t){"dx",   REG_D,   OPERAND_SIZE_16},
    &(register_data_t){"bx",   REG_B,   OPERAND_SIZE_16},
    &(register_data_t){"sp",   REG_SP,  OPERAND_SIZE_16},
    &(register_data_t){"bp",   REG_BP,  OPERAND_SIZE_16},
    &(register_data_t){"si",   REG_SI,  OPERAND_SIZE_16},
    &(register_data_t){"di",   REG_DI,  OPERAND_SIZE_16},
    &(register_data_t){"r8w",  REG_8,   OPERAND_SIZE_16},
    &(register_data_t){"r9w",  REG_9,   OPERAND_SIZE_16},
    &(register_data_t){"r10w", REG_10,  OPERAND_SIZE_16},
    &(register_data_t){"r11w", REG_11,  OPERAND_SIZE_16},
    &(register_data_t){"r12w", REG_12,  OPERAND_SIZE_16},
    &(register_data_t){"r13w", REG_13,  OPERAND_SIZE_16},
    &(register_data_t){"r14w", REG_14,  OPERAND_SIZE_16},
    &(register_data_t){"r15w", REG_15,  OPERAND_SIZE_16},
    // 8-bit general purpose registers (low byte)
    &(register_data_t){"al",   REG_A,   OPERAND_SIZE_8 },
    &(register_data_t){"cl",   REG_C,   OPERAND_SIZE_8 },
    &(register_data_t){"dl",   REG_D,   OPERAND_SIZE_8 },
    &(register_data_t){"bl",   REG_B,   OPERAND_SIZE_8 },
    &(register_data_t){"spl",  REG_SP,  OPERAND_SIZE_8 },
    &(register_data_t){"bpl",  REG_BP,  OPERAND_SIZE_8 },
    &(register_data_t){"sil",  REG_SI,  OPERAND_SIZE_8 },
    &(register_data_t){"dil",  REG_DI,  OPERAND_SIZE_8 },
    &(register_data_t){"r8b",  REG_8,   OPERAND_SIZE_8 },
    &(register_data_t){"r9b",  REG_9,   OPERAND_SIZE_8 },
    &(register_data_t){"r10b", REG_10,  OPERAND_SIZE_8 },
    &(register_data_t){"r11b", REG_11,  OPERAND_SIZE_8 },
    &(register_data_t){"r12b", REG_12,  OPERAND_SIZE_8 },
    &(register_data_t){"r13b", REG_13,  OPERAND_SIZE_8 },
    &(register_data_t){"r14b", REG_14,  OPERAND_SIZE_8 },
    &(register_data_t){"r15b", REG_15,  OPERAND_SIZE_8 },
    // x87 floating point registers
    &(register_data_t){"st0",  REG_ST0, OPERAND_SIZE_80},
    &(register_data_t){"st1",  REG_ST1, OPERAND_SIZE_80},
    &(register_data_t){"st2",  REG_ST2, OPERAND_SIZE_80},
    &(register_data_t){"st3",  REG_ST3, OPERAND_SIZE_80},
    &(register_data_t){"st4",  REG_ST4, OPERAND_SIZE_80},
    &(register_data_t){"st5",  REG_ST5, OPERAND_SIZE_80},
    &(register_data_t){"st6",  REG_ST6, OPERAND_SIZE_80},
    &(register_data_t){"st7",  REG_ST7, OPERAND_SIZE_80},
    nullptr,
 };
@@ -1,82 +0,0 @@
 #ifndef INCLUDE_DATA_REGISTERS_H_
 #define INCLUDE_DATA_REGISTERS_H_
 typedef enum operand_size {
    OPERAND_SIZE_INVALID = 0,
    OPERAND_SIZE_8 = 1 << 0,
    OPERAND_SIZE_16 = 1 << 1,
    OPERAND_SIZE_32 = 1 << 2,
    OPERAND_SIZE_64 = 1 << 3,
    OPERAND_SIZE_80 = 1 << 4,
    OPERAND_SIZE_128 = 1 << 5,
    OPERAND_SIZE_256 = 1 << 6,
    OPERAND_SIZE_512 = 1 << 7,
 } operand_size_t;
 static inline operand_size_t bits_to_operand_size(int bits) {
    switch (bits) {
    case 8:
        return OPERAND_SIZE_8;
    case 16:
        return OPERAND_SIZE_16;
    case 32:
        return OPERAND_SIZE_32;
    case 64:
        return OPERAND_SIZE_64;
    case 80:
        return OPERAND_SIZE_80;
    case 128:
        return OPERAND_SIZE_128;
    case 256:
        return OPERAND_SIZE_256;
    case 512:
        return OPERAND_SIZE_512;
    default:
        return OPERAND_SIZE_INVALID;
    }
 }
 typedef enum register_id {
    // Special registers
    REG_RIP = -1,
    // General purpose registers
    REG_A = 0x0000,
    REG_C,
    REG_D,
    REG_B,
    REG_SP,
    REG_BP,
    REG_SI,
    REG_DI,
    REG_8,
    REG_9,
    REG_10,
    REG_11,
    REG_12,
    REG_13,
    REG_14,
    REG_15,
    REG_ST0 = 0x1000,
    REG_ST1,
    REG_ST2,
    REG_ST3,
    REG_ST4,
    REG_ST5,
    REG_ST6,
    REG_ST7,
 } register_id_t;
 typedef struct register_data {
    const char *name;
    register_id_t id;
    operand_size_t size;
 } register_data_t;
 extern register_data_t *const registers[];
 #endif // INCLUDE_DATA_REGISTERS_H_
@@ -1,711 +0,0 @@
 #include "encoder.h"
 #include "../bytes.h"
 #include "../data/opcodes.h"
 #include "symbols.h"
 #include <assert.h>
 #include <errno.h>
 #include <string.h>
 /**
 * General encoder flow:
 *
 * There are 2 major passes the encoder does:
 *
 * First pass:
 *   - Run through the AST and collect information:
 *     - Set register values
 *     - Parse/set number values
 *     - Mark all instructions that use label references
 *   - Encode all instructions that don't use label references
 *   - Update addresses of all labels and instructions. Use an estimated
 *     instruction size for those instructions that use label references.
 *
 * Second pass:
 *   - Run through the AST for all instructions that use label references and
 *     collect size information using the estimated addresses from pass 1
 *   - Encode label references with the estimated addresses, this fixes their
 *     size.
 *   - Update all addresses
 *
 * Iteration:
 *   - Repeat the second pass until addresses converge
 */
 error_t *const err_encoder_invalid_register =
    &(error_t){.message = "Invalid register"};
 error_t *const err_encoder_number_overflow =
    &(error_t){.message = "Number overflows the storage"};
 error_t *const err_encoder_invalid_number_format =
    &(error_t){.message = "Invalid number format"};
 error_t *const err_encoder_invalid_size_suffix =
    &(error_t){.message = "Invalid number size suffix"};
 error_t *const err_encoder_unknown_symbol_reference =
    &(error_t){.message = "Referenced an unknown symbol"};
 error_t *const err_encoder_no_encoding_found =
    &(error_t){.message = "No encoding found for instruction"};
 error_t *const err_encoder_not_implemented =
    &(error_t){.message = "Implementation for this opcode is missing"};
 error_t *const err_encoder_unexpected_length =
    &(error_t){.message = "Unexpectedly long encoding"};
 error_t *encoder_alloc(encoder_t **output, ast_node_t *ast) {
    *output = nullptr;
    encoder_t *encoder = calloc(1, sizeof(encoder_t));
    if (encoder == nullptr)
        return err_allocation_failed;
    encoder->ast = ast;
    error_t *err = symbol_table_alloc(&encoder->symbols);
    if (err) {
        free(encoder);
        return err;
    }
    *output = encoder;
    return nullptr;
 }
 void encoder_free(encoder_t *encoder) {
    if (encoder == nullptr)
        return;
    symbol_table_free(encoder->symbols);
    free(encoder);
 }
 bool encoder_is_symbols_node(ast_node_t *node) {
    switch (node->id) {
    case NODE_LABEL:
    case NODE_LABEL_REFERENCE:
    case NODE_EXPORT_DIRECTIVE:
    case NODE_IMPORT_DIRECTIVE:
        return true;
    default:
        return false;
    }
 }
 int encoder_get_number_base(ast_node_t *number) {
    switch (number->children[0]->id) {
    case NODE_BINARY:
        return 2;
    case NODE_OCTAL:
        return 8;
    case NODE_DECIMAL:
        return 10;
    case NODE_HEXADECIMAL:
        return 16;
    default:
        assert(false);
    }
    __builtin_unreachable();
 }
 bool is_valid_size_suffix(int bits) {
    switch (bits) {
    case 0:
    case 8:
    case 16:
    case 32:
    case 64:
        return true;
    default:
        return false;
    }
 }
 bool is_overflow(uint64_t value, int bits) {
    if (bits == 0 || bits >= 64)
        return false;
    uint64_t max_value = (1ULL << bits) - 1;
    return value > max_value;
 }
 operand_size_t encoder_get_size_mask(uint64_t value, int bits) {
    if (bits != 0)
        return bits_to_operand_size(bits);
    operand_size_t mask = OPERAND_SIZE_64;
    if (value < (1ULL << 8))
        mask |= OPERAND_SIZE_8;
    if (value < (1ULL << 16))
        mask |= OPERAND_SIZE_16;
    if (value < (1ULL << 32))
        mask |= OPERAND_SIZE_32;
    return mask;
 }
 error_t *encoder_set_number_value(ast_node_t *node) {
    assert(node->id == NODE_NUMBER);
    assert(node->children[0]);
    const char *number = node->children[0]->token_entry->token.value;
    int base = encoder_get_number_base(node);
    if (base != 10)
        number += 2; // all except base 10 use a 0x, 0o or 0b prefix
    char *endptr;
    errno = 0;
    uint64_t value = strtoull(number, &endptr, base);
    if (errno == ERANGE)
        return err_encoder_number_overflow;
    if (endptr == number)
        return err_encoder_invalid_number_format;
    int bits = 0;
    if (*endptr == ':') {
        const char *suffix = endptr + 1;
        bits = strtol(suffix, &endptr, 10);
        if (endptr == suffix)
            return err_encoder_invalid_number_format;
    }
    if (*endptr != '\0')
        return err_encoder_invalid_number_format;
    if (!is_valid_size_suffix(bits))
        return err_encoder_invalid_size_suffix;
    if (is_overflow(value, bits))
        return err_encoder_number_overflow;
    node->value.number.value = value;
    node->value.number.size = encoder_get_size_mask(value, bits);
    return nullptr;
 }
 error_t *encoder_set_register_value(ast_node_t *node) {
    assert(node->id == NODE_REGISTER);
    const char *value = node->token_entry->token.value;
    for (size_t i = 0; registers[i] != nullptr; ++i) {
        if (strcmp(value, registers[i]->name) == 0) {
            node->value.reg.id = registers[i]->id;
            node->value.reg.size = registers[i]->size;
            return nullptr;
        }
    }
    return err_encoder_invalid_register;
 }
 /**
 * Set the opcode extension in the modrm field
 */
 static inline uint8_t modrm_extension(uint8_t modrm, uint8_t extension) {
    assert(extension != opcode_extension_none);
    assert((extension & 0b111) == extension);
    return (modrm & ~modrm_reg_mask) | extension << 3;
 }
 /**
 * Return the rex bit for reg field in modrm
 */
 static inline uint8_t modrm_reg_rex(uint8_t rex, register_id_t id) {
    if (id & 0b1000)
        rex |= rex_prefix_r;
    return rex;
 }
 /**
 * update modrm reg field with the given register, must be used alongside
 * modrm_reg_rex
 */
 static inline uint8_t modrm_reg(uint8_t modrm, register_id_t id) {
    return (modrm & ~modrm_reg_mask) | (id & 0b111) << 3;
 }
 /**
 * Return the rex bit for rm field in modrm
 */
 static inline uint8_t modrm_rm_rex(uint8_t rex, register_id_t id) {
    if (id & 0b1000)
        rex |= rex_prefix_b;
    return rex;
 }
 /**
 * update modrm rm field with the given register, must be used alongside
 * modrm_rm_rex
 */
 static inline uint8_t modrm_rm(uint8_t modrm, register_id_t id) {
    assert((modrm & modrm_mod_mask) == modrm_mod_register);
    return (modrm & ~modrm_rm_mask) | (id & 0b111);
 }
 error_t *encoder_collect_info(encoder_t *encoder, ast_node_t *node,
                              ast_node_t *statement) {
    error_t *err = nullptr;
    if (encoder_is_symbols_node(node)) {
        err = symbol_table_update(encoder->symbols, node, statement);
        if (statement->id == NODE_INSTRUCTION)
            statement->value.instruction.has_reference = true;
    } else if (node->id == NODE_NUMBER)
        err = encoder_set_number_value(node);
    else if (node->id == NODE_REGISTER)
        err = encoder_set_register_value(node);
    if (err)
        return err;
    for (size_t i = 0; i < node->len; ++i) {
        error_t *err =
            encoder_collect_info(encoder, node->children[i], statement);
        if (err)
            return err;
    }
    return nullptr;
 }
 bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
    switch (info->kind) {
    case OPERAND_REGISTER:
        return operand->id == NODE_REGISTER &&
               ast_node_register_value(operand)->size == info->size;
    case OPERAND_MEMORY:
        return operand->id == NODE_MEMORY;
    case OPERAND_IMMEDIATE: {
        if (operand->id != NODE_IMMEDIATE)
            return false;
        ast_node_t *child = operand->children[0];
        if (child->id == NODE_NUMBER)
            return (ast_node_number_value(child)->size & info->size) > 0;
        else if (child->id == NODE_LABEL_REFERENCE) {
            return info->size &= ast_node_reference_value(child)->size;
        }
    } // end OPERAND_IMMEDIATE case
    }
    assert(false && "unreachable");
    __builtin_unreachable();
 }
 bool is_opcode_match(opcode_data_t *opcode, const char *mnemonic,
                     ast_node_t *operands) {
    if (strcmp(opcode->mnemonic, mnemonic) != 0)
        return false;
    if (opcode->operand_count != operands->len)
        return false;
    for (size_t i = 0; i < operands->len; ++i) {
        if (!is_operand_match(&opcode->operands[i], operands->children[i]))
            return false;
    }
    return true;
 }
 error_t *encoder_get_opcode_data(ast_node_t *instruction, ast_node_t *operands,
                                 opcode_data_t **opcode_out) {
    const char *mnemonic = instruction->children[0]->token_entry->token.value;
    for (size_t i = 0; opcodes[i]; ++i) {
        opcode_data_t *opcode = opcodes[i];
        if (is_opcode_match(opcode, mnemonic, operands)) {
            *opcode_out = opcode;
            return nullptr;
        }
    }
    return err_encoder_no_encoding_found;
 }
 error_t *encode_two_operand(encoder_t *encoder, opcode_data_t *opcode,
                            ast_node_t *operands, bytes_t *encoding,
                            uint8_t *rex) {
    (void)encoder;
    (void)opcode;
    (void)operands;
    (void)encoding;
    (void)rex;
    assert(encoding->len >= 1 && "must have 1+ opcode byte in buffer already");
    return err_encoder_not_implemented;
 }
 error_t *encode_one_register_in_opcode(encoder_t *encoder,
                                       opcode_data_t *opcode,
                                       ast_node_t *operands, bytes_t *encoding,
                                       uint8_t *rex) {
    (void)encoder;
    (void)opcode;
    register_id_t id = ast_node_register_value(operands->children[0])->id;
    encoding->buffer[encoding->len - 1] |= id & 0b111;
    if ((id & 0b1000) > 0) {
        *rex |= rex_prefix_r;
    }
    return nullptr;
 }
 error_t *encode_one_register(encoder_t *encoder, opcode_data_t *opcode,
                             ast_node_t *operands, bytes_t *encoding,
                             uint8_t *rex) {
    (void)encoder;
    assert(operands->len == 1);
    assert(operands->children[0]->id == NODE_REGISTER);
    register_id_t id = ast_node_register_value(operands->children[0])->id;
    uint8_t modrm = modrm_mod_register;
    if (opcode->opcode_extension != opcode_extension_none) {
        // register goes in rm field, extension goes in mod field
        modrm = modrm_extension(modrm, opcode->opcode_extension);
        modrm = modrm_rm(modrm, id);
        *rex = modrm_rm_rex(*rex, id);
    } else {
        // register goes in reg field
        // NOTE:
        // it's actually likely this case just doesn't exist at all and all
        // opcodes that take one register in modr/m _all_ have extended opcdes
        modrm = modrm_reg(modrm, id);
        *rex = modrm_reg_rex(*rex, id);
    }
    return bytes_append_uint8(encoding, modrm);
 }
 error_t *encode_one_immediate(encoder_t *encoder, opcode_data_t *opcode,
                              ast_node_t *operands, bytes_t *encoding,
                              uint8_t *rex) {
    (void)encoder;
    (void)opcode;
    (void)rex;
    assert(operands->len == 1);
    assert(operands->children[0]->id == NODE_IMMEDIATE);
    assert(operands->children[0]->len == 1);
    ast_node_t *immediate = operands->children[0]->children[0];
    assert(immediate->id == NODE_NUMBER ||
           immediate->id == NODE_LABEL_REFERENCE);
    operand_size_t size = opcode->operands[0].size;
    if (immediate->id == NODE_NUMBER) {
        uint64_t value = ast_node_number_value(immediate)->value;
        error_t *err = nullptr;
        switch (size) {
        case OPERAND_SIZE_8:
            err = bytes_append_uint8(encoding, value);
            break;
        case OPERAND_SIZE_16:
            err = bytes_append_uint16(encoding, value);
            break;
        case OPERAND_SIZE_32:
            err = bytes_append_uint32(encoding, value);
            break;
        case OPERAND_SIZE_64:
            err = bytes_append_uint64(encoding, value);
            break;
        default:
            assert(false && "intentionally unhandled");
        }
        return err;
    } else {
        reference_t *reference = ast_node_reference_value(immediate);
        switch (size) {
        case OPERAND_SIZE_64:
            return bytes_append_uint64(encoding, reference->address);
        case OPERAND_SIZE_32:
            return bytes_append_uint32(encoding, reference->offset);
        case OPERAND_SIZE_16:
            return bytes_append_uint16(encoding, reference->offset);
        case OPERAND_SIZE_8:
            return bytes_append_uint8(encoding, reference->offset);
        default:
            assert(false && "intentionally unhandled");
        }
    }
    __builtin_unreachable();
 }
 error_t *encode_one_memory(encoder_t *encoder, opcode_data_t *opcode,
                           ast_node_t *operands, bytes_t *encoding,
                           uint8_t *rex) {
    (void)encoder;
    (void)opcode;
    (void)operands;
    (void)encoding;
    (void)rex;
    return err_encoder_not_implemented;
 }
 error_t *encode_one_operand(encoder_t *encoder, opcode_data_t *opcode,
                            ast_node_t *operands, bytes_t *encoding,
                            uint8_t *rex) {
    switch (opcode->operands[0].kind) {
    case OPERAND_REGISTER:
        if (opcode->encoding_class == ENCODING_OPCODE_REGISTER)
            return encode_one_register_in_opcode(encoder, opcode, operands,
                                                 encoding, rex);
        else
            return encode_one_register(encoder, opcode, operands, encoding,
                                       rex);
    case OPERAND_MEMORY:
        return encode_one_memory(encoder, opcode, operands, encoding, rex);
    case OPERAND_IMMEDIATE:
        return encode_one_immediate(encoder, opcode, operands, encoding, rex);
    }
 }
 error_t *encoder_encode_instruction(encoder_t *encoder,
                                    ast_node_t *instruction) {
    ast_node_t *operands = instruction->children[1];
    opcode_data_t *opcode = nullptr;
    error_t *err = encoder_get_opcode_data(instruction, operands, &opcode);
    if (err)
        return err;
    uint8_t rex = 0;
    bytes_t *encoding = LOCAL_BYTES(32);
    if (opcode->opcode > 0xFF &&
        (err = bytes_append_uint8(encoding, opcode->opcode >> 8)))
        return err;
    if ((err = bytes_append_uint8(encoding, opcode->opcode & 0xFF)))
        return err;
    // NOTE:operand encoders all expect the opcode to be in the buffer already.
    // Some of them rely on this to encode the register value in the opcode
    // byte.
    switch (opcode->operand_count) {
    case 0:
        break;
    case 1:
        err = encode_one_operand(encoder, opcode, operands, encoding, &rex);
        break;
    case 2:
        err = encode_two_operand(encoder, opcode, operands, encoding, &rex);
        break;
    default:
        err = err_encoder_not_implemented;
    }
    if (err)
        return err;
    // produce the actual encoding output in the NODE_INSTRUCTION value
    instruction_t *instruction_value = ast_node_instruction_value(instruction);
    uint8_t *output = instruction_value->encoding.buffer;
    size_t output_len = 0;
    // Handle prefixes
    if (opcode->rex_w_prefix)
        rex = rex_prefix_w;
    if (opcode->address_size_prefix)
        output[output_len++] = memory_size_prefix;
    if (opcode->operand_size_prefix)
        output[output_len++] = operand_size_prefix;
    if (rex > 0)
        output[output_len++] = rex;
    // copy the encoded opcode and operands
    if (encoding->len > 20)
        return err_encoder_unexpected_length;
    memcpy(output + output_len, encoding->buffer, encoding->len);
    output_len += encoding->len;
    instruction_value->encoding.len = output_len;
    return nullptr;
 }
 /**
 * Initial guess for instruction size of instructions that contain a label
 * reference
 */
 constexpr size_t instruction_size_estimate = 10;
 /**
 * Perform the initial pass over the AST.
 *
 * - Collect information about the operands
 *   - parse and set number values
 *   - set the register values
 *   - determine if label references are used by an instruction
 * - encode instructions that don't use label references
 * - determine estimated addresses of each statement
 *
 */
 error_t *encoder_first_pass(encoder_t *encoder) {
    ast_node_t *root = encoder->ast;
    assert(root->id == NODE_PROGRAM);
    uintptr_t address = 0;
    for (size_t i = 0; i < root->len; ++i) {
        ast_node_t *statement = root->children[i];
        error_t *err = encoder_collect_info(encoder, statement, statement);
        if (err)
            return err;
        if (statement->id == NODE_INSTRUCTION &&
            ast_node_instruction_value(statement)->has_reference == false) {
            err = encoder_encode_instruction(encoder, statement);
            if (err)
                return err;
            instruction_t *instruction = ast_node_instruction_value(statement);
            instruction->address = address;
            address += instruction->encoding.len;
        } else if (statement->id == NODE_INSTRUCTION) {
            instruction_t *instruction = ast_node_instruction_value(statement);
            instruction->encoding.len = instruction_size_estimate;
            instruction->address = address;
            address += instruction_size_estimate;
        } else if (statement->id == NODE_LABEL) {
            label_t *label = ast_node_label_value(statement);
            label->address = address;
        }
    }
    return nullptr;
 }
 operand_size_t signed_to_size_mask(int64_t value) {
    operand_size_t size = OPERAND_SIZE_64;
    if (value >= INT8_MIN && value <= INT8_MAX)
        size |= OPERAND_SIZE_8;
    if (value >= INT16_MIN && value <= INT16_MAX)
        size |= OPERAND_SIZE_16;
    if (value >= INT32_MIN && value <= INT32_MAX)
        size |= OPERAND_SIZE_32;
    return size;
 }
 int64_t statement_offset(ast_node_t *from, ast_node_t *to) {
    assert(from->id == NODE_INSTRUCTION);
    assert(to->id == NODE_LABEL);
    instruction_t *instruction = ast_node_instruction_value(from);
    int64_t from_addr = instruction->address + instruction->encoding.len;
    int64_t to_addr = ast_node_label_value(to)->address;
    return to_addr - from_addr;
 }
 error_t *encoder_collect_reference_info(encoder_t *encoder, ast_node_t *node,
                                        ast_node_t *statement) {
    assert(statement->id == NODE_INSTRUCTION);
    if (node->id == NODE_LABEL_REFERENCE) {
        const char *name = node->token_entry->token.value;
        symbol_t *symbol = symbol_table_lookup(encoder->symbols, name);
        assert(symbol && symbol->statement &&
               symbol->statement->id == NODE_LABEL);
        int64_t offset = statement_offset(statement, symbol->statement);
        int64_t absolute = ast_node_label_value(symbol->statement)->address;
        operand_size_t size = signed_to_size_mask(offset);
        node->value.reference.address = absolute;
        node->value.reference.offset = offset;
        node->value.reference.size = size;
    }
    for (size_t i = 0; i < node->len; ++i) {
        error_t *err = encoder_collect_reference_info(
            encoder, node->children[i], statement);
        if (err)
            return err;
    }
    return nullptr;
 }
 bool encoder_should_reencode(ast_node_t *statement) {
    if (statement->id != NODE_INSTRUCTION)
        return false;
    instruction_t *instruction = ast_node_instruction_value(statement);
    return instruction->has_reference;
 }
 void set_statement_address(ast_node_t *statement, int64_t address) {
    if (statement->id == NODE_INSTRUCTION) {
        ast_node_instruction_value(statement)->address = address;
    } else if (statement->id == NODE_LABEL) {
        ast_node_label_value(statement)->address = address;
    }
 }
 size_t get_statement_length(ast_node_t *statement) {
    if (statement->id != NODE_INSTRUCTION)
        return 0;
    return ast_node_instruction_value(statement)->encoding.len;
 }
 /**
 * Perform the second pass. Updates the label info and encodes all instructions
 * that have a label reference.that performs actual encoding.
 */
 error_t *encoder_second_pass(encoder_t *encoder, bool *did_update) {
    ast_node_t *root = encoder->ast;
    *did_update = false;
    int64_t address = 0;
    for (size_t i = 0; i < root->len; ++i) {
        ast_node_t *statement = root->children[i];
        set_statement_address(statement, address);
        size_t before = get_statement_length(statement);
        if (encoder_should_reencode(statement)) {
            error_t *err =
                encoder_collect_reference_info(encoder, statement, statement);
            if (err)
                return err;
            err = encoder_encode_instruction(encoder, statement);
            if (err)
                return err;
        }
        size_t after = get_statement_length(statement);
        *did_update = *did_update || (before != after);
        address += after;
    }
    return nullptr;
 }
 opcode_data_t *encoder_find_opcode(ast_node_t *instruction) {
    for (size_t i = 0; opcodes[i] != nullptr; ++i) {
        const char *mnemonic =
            instruction->children[0]->token_entry->token.value;
        ast_node_t *operands = instruction->children[1];
        if (is_opcode_match(opcodes[i], mnemonic, operands))
            return opcodes[i];
    }
    return nullptr;
 }
 error_t *encoder_check_symbols(encoder_t *encoder) {
    for (size_t i = 0; i < encoder->symbols->len; ++i)
        if (encoder->symbols->symbols[i].kind == SYMBOL_REFERENCE)
            return err_encoder_unknown_symbol_reference;
    return nullptr;
 }
 error_t *encoder_encode(encoder_t *encoder) {
    error_t *err = encoder_first_pass(encoder);
    if (err)
        return err;
    err = encoder_check_symbols(encoder);
    if (err)
        return err;
    bool did_update = true;
    for (int i = 0; i < 10 && did_update; ++i) {
        err = encoder_second_pass(encoder, &did_update);
        if (err)
            return err;
    }
    return nullptr;
 }
@@ -1,33 +0,0 @@
 #ifndef INCLUDE_ENCODER_ENCODER_H_
 #define INCLUDE_ENCODER_ENCODER_H_
 #include "symbols.h"
 typedef struct encoder {
    symbol_table_t *symbols;
    ast_node_t *ast;
 } encoder_t;
 constexpr uint8_t modrm_mod_memory = 0b00'000'000;
 constexpr uint8_t modrm_mod_memory_displacement8 = 0b01'000'000;
 constexpr uint8_t modrm_mod_memory_displacement32 = 0b10'000'000;
 constexpr uint8_t modrm_mod_register = 0b11'000'000;
 constexpr uint8_t modrm_reg_mask = 0b00'111'000;
 constexpr uint8_t modrm_rm_mask = 0b00'000'111;
 constexpr uint8_t modrm_mod_mask = 0b11'000'000;
 error_t *encoder_alloc(encoder_t **encoder, ast_node_t *ast);
 error_t *encoder_encode(encoder_t *encoder);
 void encoder_free(encoder_t *encoder);
 extern error_t *const err_encoder_invalid_register;
 extern error_t *const err_encoder_number_overflow;
 extern error_t *const err_encoder_invalid_number_format;
 extern error_t *const err_encoder_invalid_size_suffix;
 extern error_t *const err_encoder_unknown_symbol_reference;
 extern error_t *const err_encoder_no_encoding_found;
 extern error_t *const err_encoder_not_implemented;
 extern error_t *const err_encoder_unexpected_length;
 #endif // INCLUDE_ENCODER_ENCODER_H_
@@ -1,165 +0,0 @@
 #include "symbols.h"
 #include "../error.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 constexpr size_t symbol_table_default_cap = 64;
 constexpr size_t symbol_table_max_cap = 1 << 16;
 error_t *const err_symbol_table_invalid_node = &(error_t){
    .message = "Unexpected node id when adding symbol to symbol table"};
 error_t *const err_symbol_table_max_cap = &(error_t){
    .message = "Failed to increase symbol table length, max capacity reached"};
 error_t *const err_symbol_table_incompatible_symbols =
    &(error_t){.message = "Failed to update symbol with incompatible kind"};
 error_t *symbol_table_alloc(symbol_table_t **output) {
    *output = nullptr;
    symbol_table_t *table = calloc(1, sizeof(symbol_table_t));
    if (table == nullptr)
        return err_allocation_failed;
    table->symbols = calloc(symbol_table_default_cap, sizeof(symbol_t));
    if (table->symbols == nullptr) {
        free(table);
        return err_allocation_failed;
    }
    table->cap = symbol_table_default_cap;
    table->len = 0;
    *output = table;
    return nullptr;
 }
 void symbol_table_free(symbol_table_t *table) {
    free(table->symbols);
    free(table);
 }
 error_t *symbol_table_grow_cap(symbol_table_t *table) {
    if (table->cap >= symbol_table_max_cap)
        return err_symbol_table_max_cap;
    size_t new_cap = table->cap * 2;
    symbol_t *new_symbols = realloc(table->symbols, new_cap * sizeof(symbol_t));
    if (new_symbols == nullptr)
        return err_allocation_failed;
    table->symbols = new_symbols;
    table->cap = new_cap;
    return nullptr;
 }
 error_t *symbol_table_get_node_info(ast_node_t *node, symbol_kind_t *kind,
                                    char **name) {
    switch (node->id) {
    case NODE_LABEL:
        *kind = SYMBOL_LOCAL;
        *name = node->children[0]->token_entry->token.value;
        return nullptr;
    case NODE_LABEL_REFERENCE:
        *kind = SYMBOL_REFERENCE;
        *name = node->token_entry->token.value;
        return nullptr;
    case NODE_IMPORT_DIRECTIVE:
        *kind = SYMBOL_IMPORT;
        *name = node->children[1]->token_entry->token.value;
        return nullptr;
    case NODE_EXPORT_DIRECTIVE:
        *kind = SYMBOL_EXPORT;
        *name = node->children[1]->token_entry->token.value;
        return nullptr;
    default:
        return err_symbol_table_invalid_node;
    }
    __builtin_unreachable();
 }
 /*
 old  \  new  | REFERENCE | LOCAL    | IMPORT   | EXPORT   |
 -------------|-----------|----------|----------|----------|
 REFERENCE    |           | replace  | replace  | replace  |
 -------------|-----------|----------|----------|----------|
 LOCAL        |           |          |   ERR    | replace  |
 -------------|-----------|----------|----------|----------|
 IMPORT       |           |          |          |   ERR    |
 -------------|-----------|----------|----------|----------|
 EXPORT       |           |          |   ERR    |          |
 -------------|-----------|----------|----------|----------|
 */
 bool symbol_table_should_upgrade(symbol_kind_t old, symbol_kind_t new) {
    if (old == SYMBOL_REFERENCE)
        return new != SYMBOL_REFERENCE;
    if (old == SYMBOL_LOCAL)
        return new == SYMBOL_EXPORT;
    return false;
 }
 bool symbol_table_should_error(symbol_kind_t old, symbol_kind_t new) {
    if (new == SYMBOL_IMPORT)
        return old == SYMBOL_LOCAL || old == SYMBOL_EXPORT;
    if (new == SYMBOL_EXPORT)
        return old == SYMBOL_IMPORT;
    return false;
 }
 /**
 * @pre The symbol _must not_ already be in the table.
 */
 error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
                          ast_node_t *statement) {
    if (table->len >= table->cap) {
        error_t *err = symbol_table_grow_cap(table);
        if (err)
            return err;
    }
    table->symbols[table->len] = (symbol_t){
        .name = name,
        .kind = kind,
        .statement = statement,
    };
    table->len += 1;
    return nullptr;
 }
 error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node,
                             ast_node_t *statement) {
    char *name;
    symbol_kind_t kind;
    error_t *err = symbol_table_get_node_info(node, &kind, &name);
    if (err)
        return err;
    if (kind != SYMBOL_LOCAL)
        statement = nullptr;
    symbol_t *symbol = symbol_table_lookup(table, name);
    if (!symbol)
        return symbol_table_add(table, name, kind, statement);
    if (symbol_table_should_error(symbol->kind, kind))
        return err_symbol_table_incompatible_symbols;
    if (symbol_table_should_upgrade(symbol->kind, kind)) {
        symbol->kind = kind;
    }
    if (kind == SYMBOL_LOCAL && symbol->statement == nullptr)
        symbol->statement = statement;
    return nullptr;
 }
 symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name) {
    for (size_t i = 0; i < table->len; ++i) {
        if (strcmp(table->symbols[i].name, name) == 0)
            return &table->symbols[i];
    }
    return nullptr;
 }
@@ -1,47 +0,0 @@
 #ifndef INCLUDE_ENCODER_SYMBOLS_H_
 #define INCLUDE_ENCODER_SYMBOLS_H_
 #include "../ast.h"
 extern error_t *const err_symbol_table_invalid_node;
 extern error_t *const err_symbol_table_max_cap;
 extern error_t *const err_symbol_table_incompatible_symbols;
 typedef enum symbol_kind {
    SYMBOL_REFERENCE,
    SYMBOL_LOCAL,
    SYMBOL_EXPORT,
    SYMBOL_IMPORT,
 } symbol_kind_t;
 /**
 * Represent a symbol in the program
 *
 * Symbols with the same name can only be in the table once. IMPORT or EXPORT
 * symbols take precedence over REFERENCE symbols. If any reference symbols
 * remain after the first encoding pass this indicates an error. Trying to add
 * an IMPORT or EXPORT symbol if the same name already exists as the other kind
 * is an error.
 *
 * This symbol table never taken ownership of the name string, it's lifted
 * straight from the node->token.value.
 */
 typedef struct symbol {
    char *name;
    symbol_kind_t kind;
    ast_node_t *statement;
 } symbol_t;
 typedef struct symbol_table {
    size_t cap;
    size_t len;
    symbol_t *symbols;
 } symbol_table_t;
 error_t *symbol_table_alloc(symbol_table_t **table);
 void symbol_table_free(symbol_table_t *table);
 error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node,
                             ast_node_t *statement);
 symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name);
 #endif // INCLUDE_ENCODER_SYMBOLS_H_
@@ -9,13 +9,8 @@ error_t *const err_errorf_alloc = &(error_t){
 error_t *const err_errorf_length = &(error_t){
    .message =
        "Formatting of another error failed to determine the error length"};
 error_t *const err_eof =
    &(error_t){.message = "Read failed because EOF is reached"};
-error_t *const err_unknown_read_failure =
+error_t *err_allocation_failed =
    &(error_t){.message = "Unknown read error"};
 error_t *const err_allocation_failed =
    &(error_t){.message = "Memory allocation failed"};
 error_t *errorf(const char *fmt, ...) {
@@ -19,8 +19,6 @@ static inline void error_free(error_t *err) {
 }
 /* Some global errors */
-extern error_t *const err_allocation_failed;
+extern error_t *err_allocation_failed;
 extern error_t *const err_eof;
 extern error_t *const err_unknown_read_failure;
 #endif // INCLUDE_SRC_ERROR_H_
@@ -5,16 +5,21 @@
 #include <errno.h>
 #include <string.h>
-error_t *const err_lexer_already_open = &(error_t){
+error_t *err_lexer_already_open = &(error_t){
    .message =
        "Can't open on a lexer object that is already opened. Close it first."};
-error_t *const err_lexer_prefix_too_large =
+error_t *err_prefix_too_large =
    &(error_t){.message = "Prefix too large for internal lexer buffer"};
-error_t *const err_lexer_buffer_underrun = &(error_t){
+error_t *err_buffer_underrun = &(error_t){
    .message = "Buffer does not contain enough characters for lexer_consume_n"};
-error_t *const err_lexer_consume_excessive_length =
+error_t *err_consume_excessive_length =
    &(error_t){.message = "Too many valid characters to consume"};
 error_t *err_eof =
    &(error_t){.message = "Can't read from file because EOF is reached"};
 error_t *err_unknown_read = &(error_t){.message = "Unknown read error"};
 typedef bool (*char_predicate_t)(char);
 const char *lexer_token_id_to_cstr(lexer_token_id_t id) {
@@ -107,7 +112,7 @@ error_t *lexer_fill_buffer(lexer_t *lex) {
        if (n == 0 && ferror(lex->fp))
            return errorf("Read error: %s", strerror(errno));
        if (n == 0)
-            return err_unknown_read_failure;
+            return err_unknown_read;
        remaining -= n;
        lex->buffer_count += n;
    }
@@ -136,7 +141,7 @@ error_t *lexer_open(lexer_t *lex, char *path) {
 *
 * @pre There must be at least n characters in the input buffer
 */
-void lexer_shift_buffer(lexer_t *lex, size_t n) {
+void lexer_shift_buffer(lexer_t *lex, int n) {
    assert(lex->buffer_count >= n);
    lex->buffer_count -= n;
    memmove(lex->buffer, lex->buffer + n, lex->buffer_count);
@@ -177,9 +182,9 @@ error_t *lexer_not_implemented(lexer_t *lex, lexer_token_t *token) {
 error_t *lexer_consume_n(lexer_t *lex, const size_t len,
                         char buffer[static len], const size_t n) {
    if (lex->buffer_count < n)
-        return err_lexer_buffer_underrun;
+        return err_buffer_underrun;
-    if (n > len)
+    if (len > n)
-        return err_lexer_consume_excessive_length;
+        return err_consume_excessive_length;
    memcpy(buffer, lex->buffer, n);
    lexer_shift_buffer(lex, n);
@@ -224,7 +229,7 @@ error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n],
                (lex->buffer_count > 0 && is_valid(lex->buffer[0]));
        if (have_more_characters && *n_consumed == buffer_size) {
-            return err_lexer_consume_excessive_length;
+            return err_consume_excessive_length;
        }
    } while (have_more_characters);
    return nullptr;
@@ -294,12 +299,11 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
    error_t *err = lexer_consume(lex, max_number_length - so_far,
                                 buffer + so_far, is_valid, &n);
-    if (err == err_lexer_consume_excessive_length) {
+    if (err == err_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Number length exceeds the maximum of 128 characters";
    }
    lex->character_number += n;
    so_far += n;
    if (n == 0) {
        token->id = TOKEN_ERROR;
@@ -325,15 +329,14 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
    if (suffix_length > 0) {
        err = lexer_consume_n(lex, max_number_length - so_far, buffer + so_far,
                              suffix_length);
-        if (err == err_lexer_consume_excessive_length) {
+        if (err == err_consume_excessive_length) {
            token->id = TOKEN_ERROR;
            token->explanation =
                "Number length exceeds the maximum of 128 characters";
        } else {
            lex->character_number += suffix_length;
        }
    }
    lex->character_number += n;
    token->value = strdup(buffer);
    return nullptr;
 }
@@ -403,7 +406,7 @@ error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) {
    error_t *err = lexer_consume(lex, max_identifier_length, buffer,
                                 is_identifier_character, &n);
-    if (err == err_lexer_consume_excessive_length) {
+    if (err == err_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Identifier length exceeds the maximum of 128 characters";
@@ -446,7 +449,7 @@ error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) {
    error_t *err = lexer_consume(lex, max_whitespace_length, buffer,
                                 is_whitespace_character, &n);
-    if (err == err_lexer_consume_excessive_length) {
+    if (err == err_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Whitespace length exceeds the maximum of 1024 characters";
@@ -481,7 +484,7 @@ error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) {
    error_t *err = lexer_consume(lex, max_comment_length, buffer,
                                 is_comment_character, &n);
-    if (err == err_lexer_consume_excessive_length) {
+    if (err == err_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Comment length exceeds the maximum of 1024 characters";
@@ -5,10 +5,7 @@
 #include <stddef.h>
 #include <stdio.h>
-extern error_t *const err_lexer_already_open;
+extern error_t *err_eof;
 extern error_t *const err_lexer_prefix_too_large;
 extern error_t *const err_lexer_buffer_underrun;
 extern error_t *const err_lexer_consume_excessive_length;
 typedef enum {
    TOKEN_ERROR,
@@ -1,8 +1,5 @@
 #include "ast.h"
 #include "encoder/encoder.h"
 #include "error.h"
 #include "lexer.h"
 #include "parser/parser.h"
 #include "tokenlist.h"
 #include <limits.h>
@@ -10,137 +7,38 @@
 #include <stdlib.h>
 #include <string.h>
-typedef enum mode {
+bool print_token(lexer_token_t *token) {
-    MODE_INVALID = -1,
+    lexer_token_print(token);
-    MODE_AST,
+    return true;
    MODE_TEXT,
    MODE_TOKENS,
    MODE_ENCODING,
 } mode_t;
 void print_tokens(tokenlist_t *list) {
    for (auto entry = list->head; entry; entry = entry->next) {
        auto token = &entry->token;
        lexer_token_print(token);
    }
 }
-void print_text(tokenlist_t *list) {
+bool print_value(lexer_token_t *token) {
-    for (auto entry = list->head; entry; entry = entry->next) {
+    if (token->id == TOKEN_ERROR) {
-        auto token = &entry->token;
+        printf("%s\n", token->value);
-        if (token->id == TOKEN_ERROR) {
+        for (size_t i = 0; i < token->character_number; ++i)
            printf("%s\n", token->value);
            for (size_t i = 0; i < token->character_number; ++i)
                printf(" ");
            printf("^-- %s\n", token->explanation);
            return;
        } else {
            printf("%s", token->value);
        }
    }
 }
 error_t *print_ast(tokenlist_t *list) {
    parse_result_t result = parse(list->head);
    if (result.err)
        return result.err;
    ast_node_print(result.node);
    if (result.next != nullptr) {
        puts("First unparsed token:");
        lexer_token_print(&result.next->token);
    }
    ast_node_free(result.node);
    if (result.next != nullptr) {
        return errorf("did not parse entire input token stream");
    }
    return nullptr;
 }
 void print_hex(size_t len, uint8_t bytes[static len]) {
    for (size_t i = 0; i < len; i++) {
        printf("%02x", bytes[i]);
        if (i < len - 1) {
            printf(" ");
-        }
+        printf("^-- %s\n", token->explanation);
    } else {
        printf("%s", token->value);
    }
-    printf("\n");
+    return token->id != TOKEN_ERROR;
 }
 error_t *print_encoding(tokenlist_t *list) {
    parse_result_t result = parse(list->head);
    if (result.err)
        return result.err;
    encoder_t *encoder;
    error_t *err = encoder_alloc(&encoder, result.node);
    if (err)
        goto cleanup_ast;
    err = encoder_encode(encoder);
    if (err)
        goto cleanup_ast;
    ast_node_t *root = result.node;
    for (size_t i = 0; i < root->len; ++i) {
        ast_node_t *node = root->children[i];
        if (node->id != NODE_INSTRUCTION)
            continue;
        print_hex(node->value.instruction.encoding.len,
                  node->value.instruction.encoding.buffer);
    }
    encoder_free(encoder);
    ast_node_free(result.node);
    return nullptr;
 cleanup_ast:
    ast_node_free(result.node);
    return err;
 }
 int get_execution_mode(int argc, char *argv[]) {
    if (argc != 3)
        return MODE_INVALID;
    if (strcmp(argv[1], "tokens") == 0)
        return MODE_TOKENS;
    if (strcmp(argv[1], "text") == 0)
        return MODE_TEXT;
    if (strcmp(argv[1], "ast") == 0)
        return MODE_AST;
    if (strcmp(argv[1], "encoding") == 0)
        return MODE_ENCODING;
    return MODE_INVALID;
 }
 error_t *do_action(mode_t mode, tokenlist_t *list) {
    switch (mode) {
    case MODE_TOKENS:
        print_tokens(list);
        return nullptr;
    case MODE_TEXT:
        print_text(list);
        return nullptr;
    case MODE_AST:
        return print_ast(list);
    case MODE_ENCODING:
        return print_encoding(list);
    case MODE_INVALID:
        /* can't happen */
    }
    __builtin_unreachable();
 }
 int main(int argc, char *argv[]) {
-    mode_t mode = get_execution_mode(argc, argv);
+    if (argc != 3 ||
-    if (mode == MODE_INVALID) {
+        (strcmp(argv[1], "-tokens") != 0 && strcmp(argv[1], "-text") != 0)) {
-        puts("Usage: oas [tokens|text|ast|encoding] <filename>");
+        puts("Usage: oas -tokens <filename>");
-        exit(1);
+        puts("Usage: oas -text <filename>");
        return 1;
    }
    bool (*print_fn)(lexer_token_t *);
    char *filename = argv[2];
    if (strcmp(argv[1], "-tokens") == 0) {
        print_fn = print_token;
    } else {
        print_fn = print_value;
    }
    lexer_t *lex = &(lexer_t){};
    error_t *err = lexer_open(lex, filename);
@@ -156,10 +54,9 @@ int main(int argc, char *argv[]) {
    if (err)
        goto cleanup_tokens;
-    err = do_action(mode, list);
+    for (auto entry = list->head; entry; entry = entry->next) {
-    if (err)
+        print_fn(&entry->token);
-        goto cleanup_tokens;
+    }
    tokenlist_free(list);
    error_free(err);
    return 0;
@@ -1,134 +0,0 @@
 #include "combinators.h"
 #include "util.h"
 // Parse a list of the given parser delimited by the given token id. Does not
 // store the delimiters in the parent node
 parse_result_t parse_list(tokenlist_entry_t *current, node_id_t id,
                          bool allow_none, lexer_token_id_t delimiter_id,
                          parser_t parser) {
    ast_node_t *many;
    error_t *err = ast_node_alloc(&many);
    parse_result_t result;
    if (err)
        return parse_error(err);
    many->id = id;
    while (current) {
        // Skip beyond the delimiter on all but the first iteration
        if (many->len > 0) {
            if (current->token.id != delimiter_id)
                break;
            current = tokenlist_next(current);
            if (current == nullptr) {
                // FIXME: this isn't quite right, we can't consume the delimiter
                // if the next element will fail to parse but it's late and I
                // must think this through tomorrow
                break;
            }
        }
        result = parser(current);
        if (result.err == err_parse_no_match)
            break;
        if (result.err) {
            ast_node_free(many);
            return result;
        }
        err = ast_node_add_child(many, result.node);
        if (err) {
            ast_node_free(many);
            ast_node_free(result.node);
            return parse_error(err);
        }
        current = result.next;
    }
    if (!allow_none && many->len == 0) {
        ast_node_free(many);
        return parse_no_match();
    }
    return parse_success(many, current);
 }
 parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]) {
    parser_t parser;
    while ((parser = *parsers++)) {
        parse_result_t result = parser(current);
        if (result.err == nullptr)
            return result;
    }
    return parse_no_match();
 }
 // parse as many of the giver parsers objects in a row as possible,
 // potentially allowing none wraps the found objects in a new ast node with
 // the given note id
 parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id,
                          bool allow_none, parser_t parser) {
    ast_node_t *many;
    error_t *err = ast_node_alloc(&many);
    parse_result_t result;
    if (err)
        return parse_error(err);
    many->id = id;
    while (current) {
        result = parser(current);
        if (result.err == err_parse_no_match)
            break;
        if (result.err) {
            ast_node_free(many);
            return result;
        }
        err = ast_node_add_child(many, result.node);
        if (err) {
            ast_node_free(many);
            ast_node_free(result.node);
            return parse_error(err);
        }
        current = result.next;
    }
    if (!allow_none && many->len == 0) {
        ast_node_free(many);
        return parse_no_match();
    }
    return parse_success(many, current);
 }
 // Parse all tries to parse all parsers consecutively and if it succeeds it
 // wraps the parsed nodes in a new parent node.
 parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
                                 parser_t parsers[]) {
    ast_node_t *all;
    error_t *err = ast_node_alloc(&all);
    parse_result_t result;
    if (err)
        return parse_error(err);
    all->id = id;
    parser_t parser;
    while ((parser = *parsers++) && current) {
        result = parser(current);
        if (result.err) {
            ast_node_free(all);
            return result;
        }
        err = ast_node_add_child(all, result.node);
        if (err) {
            ast_node_free(result.node);
            ast_node_free(all);
            return parse_error(err);
        }
        current = result.next;
    }
    // token stream ended before we matched all parsers
    if (parser != nullptr) {
        ast_node_free(all);
        return parse_no_match();
    }
    return parse_success(all, current);
 }
@@ -1,25 +0,0 @@
 #ifndef INCLUDE_PARSER_COMBINATORS_H_
 #define INCLUDE_PARSER_COMBINATORS_H_
 #include "util.h"
 typedef parse_result_t (*parser_t)(tokenlist_entry_t *);
 parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]);
 // parse as many of the giver parsers objects in a row as possible, potentially
 // allowing none wraps the found objects in a new ast node with the given note
 // id
 parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id,
                          bool allow_none, parser_t parser);
 parse_result_t parse_list(tokenlist_entry_t *current, node_id_t id,
                          bool allow_none, lexer_token_id_t delimiter_id,
                          parser_t parser);
 // Parse all tries to parse all parsers consecutively and if it succeeds it
 // wraps the parsed nodes in a new parent node.
 parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
                                 parser_t parsers[]);
 #endif // INCLUDE_PARSER_COMBINATORS_H_
@@ -1,164 +0,0 @@
 #include "parser.h"
 #include "../ast.h"
 #include "../lexer.h"
 #include "../tokenlist.h"
 #include "combinators.h"
 #include "primitives.h"
 #include "util.h"
 parse_result_t parse_number(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal,
                          parse_binary, nullptr};
    parse_result_t result = parse_any(current, parsers);
    return parse_result_wrap(NODE_NUMBER, result);
 }
 parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_plus, parse_minus, nullptr};
    return parse_any(current, parsers);
 }
 parse_result_t parse_register_index(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_plus, parse_register, parse_asterisk,
                          parse_number, nullptr};
    return parse_consecutive(current, NODE_REGISTER_INDEX, parsers);
 }
 parse_result_t parse_register_offset(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_plus_or_minus, parse_number, nullptr};
    return parse_consecutive(current, NODE_REGISTER_OFFSET, parsers);
 }
 parse_result_t parse_register_expression(tokenlist_entry_t *current) {
    parse_result_t result;
    ast_node_t *expr;
    error_t *err = ast_node_alloc(&expr);
    if (err)
        return parse_error(err);
    expr->id = NODE_REGISTER_EXPRESSION;
    // <register>
    result = parse_register(current);
    if (result.err) {
        ast_node_free(expr);
        return result;
    }
    err = ast_node_add_child(expr, result.node);
    if (err) {
        ast_node_free(result.node);
        ast_node_free(expr);
        return parse_error(err);
    }
    current = result.next;
    // <register_index>?
    result = parse_register_index(current);
    if (result.err) {
        error_free(result.err);
    } else {
        err = ast_node_add_child(expr, result.node);
        if (err) {
            ast_node_free(result.node);
            ast_node_free(expr);
            return parse_error(err);
        }
        current = result.next;
    }
    // <register_offset>?
    result = parse_register_offset(current);
    if (result.err) {
        error_free(result.err);
    } else {
        err = ast_node_add_child(expr, result.node);
        if (err) {
            ast_node_free(result.node);
            ast_node_free(expr);
            return parse_error(err);
        }
        current = result.next;
    }
    return parse_success(expr, current);
 }
 parse_result_t parse_immediate(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_number, parse_label_reference, nullptr};
    parse_result_t result = parse_any(current, parsers);
    return parse_result_wrap(NODE_IMMEDIATE, result);
 }
 parse_result_t parse_memory_expression(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_register_expression, parse_label_reference,
                          nullptr};
    return parse_any(current, parsers);
 }
 parse_result_t parse_memory(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_lbracket, parse_memory_expression,
                          parse_rbracket, nullptr};
    return parse_consecutive(current, NODE_MEMORY, parsers);
 }
 parse_result_t parse_operand(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_register, parse_memory, parse_immediate,
                          nullptr};
    return parse_any(current, parsers);
 }
 parse_result_t parse_operands(tokenlist_entry_t *current) {
    return parse_list(current, NODE_OPERANDS, true, TOKEN_COMMA, parse_operand);
 }
 parse_result_t parse_label(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_identifier, parse_colon, nullptr};
    return parse_consecutive(current, NODE_LABEL, parsers);
 }
 parse_result_t parse_section_directive(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_section, parse_identifier, nullptr};
    return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers);
 }
 parse_result_t parse_import_directive(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_import, parse_identifier, nullptr};
    return parse_consecutive(current, NODE_IMPORT_DIRECTIVE, parsers);
 }
 parse_result_t parse_export_directive(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_export, parse_identifier, nullptr};
    return parse_consecutive(current, NODE_EXPORT_DIRECTIVE, parsers);
 }
 parse_result_t parse_directive_options(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_section_directive, parse_import_directive,
                          parse_export_directive, nullptr};
    return parse_any(current, parsers);
 }
 parse_result_t parse_directive(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_dot, parse_directive_options, parse_newline,
                          nullptr};
    return parse_consecutive(current, NODE_DIRECTIVE, parsers);
 }
 parse_result_t parse_instruction(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_identifier, parse_operands, parse_newline,
                          nullptr};
    return parse_consecutive(current, NODE_INSTRUCTION, parsers);
 }
 parse_result_t parse_statement(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
                          parse_newline, nullptr};
    return parse_any(current, parsers);
 }
 parse_result_t parse(tokenlist_entry_t *current) {
    current = tokenlist_skip_trivia(current);
    parse_result_t result =
        parse_many(current, NODE_PROGRAM, true, parse_statement);
    if (result.node != nullptr)
        ast_node_prune(result.node, NODE_NEWLINE);
    return result;
 }
@@ -1,9 +0,0 @@
 #ifndef INCLUDE_PARSER_PARSER_H_
 #define INCLUDE_PARSER_PARSER_H_
 #include "../tokenlist.h"
 #include "util.h"
 parse_result_t parse(tokenlist_entry_t *current);
 #endif // INCLUDE_PARSER_PARSER_H_
@@ -1,110 +0,0 @@
 #include "primitives.h"
 #include "../ast.h"
 #include "../data/registers.h"
 #include <string.h>
 parse_result_t parse_identifier(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_IDENTIFIER, nullptr);
 }
 parse_result_t parse_decimal(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_DECIMAL, NODE_DECIMAL, nullptr);
 }
 parse_result_t parse_hexadecimal(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_HEXADECIMAL, NODE_HEXADECIMAL, nullptr);
 }
 parse_result_t parse_binary(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_BINARY, NODE_BINARY, nullptr);
 }
 parse_result_t parse_octal(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_OCTAL, NODE_OCTAL, nullptr);
 }
 parse_result_t parse_string(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_STRING, NODE_STRING, nullptr);
 }
 parse_result_t parse_char(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_CHAR, NODE_CHAR, nullptr);
 }
 parse_result_t parse_colon(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_COLON, NODE_COLON, nullptr);
 }
 parse_result_t parse_comma(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_COMMA, NODE_COMMA, nullptr);
 }
 parse_result_t parse_lbracket(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_LBRACKET, NODE_LBRACKET, nullptr);
 }
 parse_result_t parse_rbracket(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_RBRACKET, NODE_RBRACKET, nullptr);
 }
 parse_result_t parse_plus(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_PLUS, NODE_PLUS, nullptr);
 }
 parse_result_t parse_minus(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_MINUS, NODE_MINUS, nullptr);
 }
 parse_result_t parse_asterisk(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_ASTERISK, NODE_ASTERISK, nullptr);
 }
 parse_result_t parse_dot(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
 }
 parse_result_t parse_newline(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_NEWLINE, NODE_NEWLINE, nullptr);
 }
 parse_result_t parse_label_reference(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
                       nullptr);
 }
 bool is_register_token(lexer_token_t *token) {
    for (size_t i = 0; registers[i] != nullptr; ++i)
        if (strcmp(token->value, registers[i]->name) == 0)
            return true;
    return false;
 }
 parse_result_t parse_register(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_REGISTER,
                       is_register_token);
 }
 bool is_section_token(lexer_token_t *token) {
    return strcmp(token->value, "section") == 0;
 }
 parse_result_t parse_section(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_SECTION,
                       is_section_token);
 }
 bool is_import_token(lexer_token_t *token) {
    return strcmp(token->value, "import") == 0;
 }
 parse_result_t parse_import(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_IMPORT, is_import_token);
 }
 bool is_export_token(lexer_token_t *token) {
    return strcmp(token->value, "export") == 0;
 }
 parse_result_t parse_export(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_EXPORT, is_export_token);
 }
@@ -1,33 +0,0 @@
 #ifndef INCLUDE_PARSER_PRIMITIVES_H_
 #define INCLUDE_PARSER_PRIMITIVES_H_
 #include "util.h"
 parse_result_t parse_identifier(tokenlist_entry_t *current);
 parse_result_t parse_decimal(tokenlist_entry_t *current);
 parse_result_t parse_hexadecimal(tokenlist_entry_t *current);
 parse_result_t parse_binary(tokenlist_entry_t *current);
 parse_result_t parse_octal(tokenlist_entry_t *current);
 parse_result_t parse_string(tokenlist_entry_t *current);
 parse_result_t parse_char(tokenlist_entry_t *current);
 parse_result_t parse_colon(tokenlist_entry_t *current);
 parse_result_t parse_comma(tokenlist_entry_t *current);
 parse_result_t parse_lbracket(tokenlist_entry_t *current);
 parse_result_t parse_rbracket(tokenlist_entry_t *current);
 parse_result_t parse_plus(tokenlist_entry_t *current);
 parse_result_t parse_minus(tokenlist_entry_t *current);
 parse_result_t parse_asterisk(tokenlist_entry_t *current);
 parse_result_t parse_dot(tokenlist_entry_t *current);
 parse_result_t parse_newline(tokenlist_entry_t *current);
 parse_result_t parse_label_reference(tokenlist_entry_t *current);
 /* These are "primitives" with a different name and some extra validation on top
 * for example, register is just an identifier but it only matches a limited set
 * of values
 */
 parse_result_t parse_register(tokenlist_entry_t *current);
 parse_result_t parse_section(tokenlist_entry_t *current);
 parse_result_t parse_import(tokenlist_entry_t *current);
 parse_result_t parse_export(tokenlist_entry_t *current);
 #endif // INCLUDE_PARSER_PRIMITIVES_H_
@@ -1,56 +0,0 @@
 #include "util.h"
 #include "../tokenlist.h"
 error_t *const err_parse_no_match =
    &(error_t){.message = "parsing failed to find the correct token sequence"};
 parse_result_t parse_error(error_t *err) {
    return (parse_result_t){.err = err};
 }
 parse_result_t parse_no_match() {
    return parse_error(err_parse_no_match);
 }
 parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next) {
    next = tokenlist_skip_trivia(next);
    return (parse_result_t){.node = ast, .next = next};
 }
 parse_result_t parse_token(tokenlist_entry_t *current,
                           lexer_token_id_t token_id, node_id_t ast_id,
                           token_validator_t is_valid) {
    if (current->token.id != token_id ||
        (is_valid && !is_valid(&current->token)))
        return parse_no_match();
    ast_node_t *node;
    error_t *err = ast_node_alloc(&node);
    if (err)
        return parse_error(err);
    node->id = ast_id;
    node->token_entry = current;
    return parse_success(node, current->next);
 }
 parse_result_t parse_result_wrap(node_id_t id, parse_result_t result) {
    if (result.err)
        return result;
    ast_node_t *node;
    error_t *err = ast_node_alloc(&node);
    if (err) {
        ast_node_free(result.node);
        return parse_error(err);
    }
    node->id = id;
    err = ast_node_add_child(node, result.node);
    if (err) {
        ast_node_free(result.node);
        return parse_error(err);
    }
    return parse_success(node, result.next);
 }
@@ -1,26 +0,0 @@
 #ifndef INCLUDE_PARSER_UTIL_H_
 #define INCLUDE_PARSER_UTIL_H_
 #include "../ast.h"
 #include "../error.h"
 #include "../tokenlist.h"
 typedef struct parse_result {
    error_t *err;
    tokenlist_entry_t *next;
    ast_node_t *node;
 } parse_result_t;
 typedef bool (*token_validator_t)(lexer_token_t *);
 parse_result_t parse_error(error_t *err);
 parse_result_t parse_no_match();
 parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next);
 parse_result_t parse_token(tokenlist_entry_t *current,
                           lexer_token_id_t token_id, node_id_t ast_id,
                           token_validator_t is_valid);
 parse_result_t parse_result_wrap(node_id_t id, parse_result_t result);
 extern error_t *const err_parse_no_match;
 #endif // INCLUDE_PARSER_UTIL_H_
@@ -81,25 +81,3 @@ error_t *tokenlist_fill(tokenlist_t *list, lexer_t *lex) {
        return err;
    return nullptr;
 }
 bool is_trivia(tokenlist_entry_t *trivia) {
    switch (trivia->token.id) {
    case TOKEN_WHITESPACE:
    case TOKEN_COMMENT:
        return true;
    default:
        return false;
    }
 }
 tokenlist_entry_t *tokenlist_skip_trivia(tokenlist_entry_t *current) {
    while (current && is_trivia(current))
        current = current->next;
    return current;
 }
 tokenlist_entry_t *tokenlist_next(tokenlist_entry_t *current) {
    if (!current)
        return nullptr;
    return tokenlist_skip_trivia(current->next);
 }
@@ -27,14 +27,4 @@ error_t *tokenlist_fill(tokenlist_t *list, lexer_t *lex);
 void tokenlist_free(tokenlist_t *list);
 /**
 * Return the first token entry that isn't whitespace, newline or comment
 */
 tokenlist_entry_t *tokenlist_skip_trivia(tokenlist_entry_t *current);
 /**
 * Return the next token entry that isn't whitespace, newline or comment
 */
 tokenlist_entry_t *tokenlist_next(tokenlist_entry_t *current);
 #endif // INCLUDE_SRC_TOKENLIST_H_
@@ -1,6 +0,0 @@
 BasedOnStyle:    LLVM
 IndentWidth:     4
 Cpp11BracedListStyle: true
 AlignArrayOfStructures: Left
 AllowShortFunctionsOnASingleLine: Empty
 ColumnLimit: 120
@@ -1,22 +0,0 @@
 #include "../src/ast.h"
 #include "munit.h"
 MunitResult test_ast_node_alloc(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    ast_node_t *node = nullptr;
    error_t *err = ast_node_alloc(&node);
    munit_assert_ptr_not_null(node);
    munit_assert_ptr_null(err);
    ast_node_free(node);
    return MUNIT_OK;
 }
 MunitTest ast_tests[] = {
    {"/node_alloc", test_ast_node_alloc, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {nullptr,       nullptr,             nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
 };
@@ -1,164 +0,0 @@
 #include "../src/bytes.h"
 #include "munit.h"
 MunitResult test_bytes_initializer(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    bytes_t *bytes = LOCAL_BYTES(16);
    munit_assert_size(bytes->len, ==, 0);
    munit_assert_size(bytes->cap, ==, 16);
    for (size_t i = 0; i < 16; ++i)
        munit_assert_uint8(bytes->buffer[i], ==, 0);
    return MUNIT_OK;
 }
 MunitResult test_bytes_append_uint8(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    bytes_t *bytes = LOCAL_BYTES(16);
    munit_assert_size(bytes->len, ==, 0);
    munit_assert_size(bytes->cap, ==, 16);
    for (size_t i = 0; i < 16; ++i) {
        error_t *err = bytes_append_uint8(bytes, (uint8_t)i);
        munit_assert_null(err);
        munit_assert_uint8(bytes->buffer[i], ==, (uint8_t)i);
    }
    error_t *err = bytes_append_uint8(bytes, 0xFF);
    munit_assert_ptr(err, ==, err_bytes_no_capacity);
    return MUNIT_OK;
 }
 MunitResult test_bytes_append_array(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    bytes_t *bytes = LOCAL_BYTES(16);
    munit_assert_size(bytes->len, ==, 0);
    munit_assert_size(bytes->cap, ==, 16);
    uint8_t test_array[] = {0x01, 0x02, 0x03, 0x04, 0x05};
    size_t array_len = sizeof(test_array) / sizeof(test_array[0]);
    error_t *err = bytes_append_array(bytes, array_len, test_array);
    munit_assert_null(err);
    munit_assert_size(bytes->len, ==, array_len);
    for (size_t i = 0; i < array_len; ++i) {
        munit_assert_uint8(bytes->buffer[i], ==, test_array[i]);
    }
    uint8_t second_array[] = {0x06, 0x07, 0x08};
    size_t second_len = sizeof(second_array) / sizeof(second_array[0]);
    err = bytes_append_array(bytes, second_len, second_array);
    munit_assert_null(err);
    munit_assert_size(bytes->len, ==, array_len + second_len);
    for (size_t i = 0; i < second_len; ++i) {
        munit_assert_uint8(bytes->buffer[array_len + i], ==, second_array[i]);
    }
    uint8_t overflow_array[10] = {0}; // Array that would exceed capacity
    err = bytes_append_array(bytes, sizeof(overflow_array), overflow_array);
    munit_assert_ptr(err, ==, err_bytes_no_capacity);
    munit_assert_size(bytes->len, ==, array_len + second_len);
    return MUNIT_OK;
 }
 MunitResult test_bytes_append_bytes(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    bytes_t *src = LOCAL_BYTES(8);
    bytes_t *dst = LOCAL_BYTES(16);
    // Fill source bytes with test data
    for (uint8_t i = 0; i < 5; ++i) {
        error_t *err = bytes_append_uint8(src, i + 1);
        munit_assert_null(err);
    }
    munit_assert_size(src->len, ==, 5);
    // Append source to destination
    error_t *err = bytes_append_bytes(dst, src);
    munit_assert_null(err);
    munit_assert_size(dst->len, ==, src->len);
    // Verify destination contents match source
    for (size_t i = 0; i < src->len; ++i) {
        munit_assert_uint8(dst->buffer[i], ==, src->buffer[i]);
    }
    // Fill source with more data and append again
    for (uint8_t i = 0; i < 3; ++i) {
        err = bytes_append_uint8(src, i + 6);
        munit_assert_null(err);
    }
    munit_assert_size(src->len, ==, 8);
    // Append updated source
    err = bytes_append_bytes(dst, src);
    munit_assert_null(err);
    munit_assert_size(dst->len, ==, 13); // 5 + 8
    // Test capacity boundary
    src->len = 4; // manually set length to barely not fit
    err = bytes_append_bytes(dst, src);
    munit_assert_ptr(err, ==, err_bytes_no_capacity);
    munit_assert_size(dst->len, ==, 13); // Length unchanged after error
    return MUNIT_OK;
 }
 MunitResult test_bytes_append_uint16(const MunitParameter params[], void *data) {
    bytes_t *bytes = LOCAL_BYTES(16);
    munit_assert_size(bytes->len, ==, 0);
    munit_assert_size(bytes->cap, ==, 16);
    bytes_append_uint16(bytes, 0xFFAA);
    munit_assert_size(bytes->len, ==, 2);
    munit_assert_uint8(bytes->buffer[0], ==, 0xAA);
    munit_assert_uint8(bytes->buffer[1], ==, 0xFF);
    return MUNIT_OK;
 }
 MunitResult test_bytes_append_uint32(const MunitParameter params[], void *data) {
    bytes_t *bytes = LOCAL_BYTES(16);
    munit_assert_size(bytes->len, ==, 0);
    munit_assert_size(bytes->cap, ==, 16);
    bytes_append_uint32(bytes, 0xAABBCCDD);
    munit_assert_size(bytes->len, ==, 4);
    munit_assert_uint8(bytes->buffer[0], ==, 0xDD);
    munit_assert_uint8(bytes->buffer[1], ==, 0xCC);
    munit_assert_uint8(bytes->buffer[2], ==, 0xBB);
    munit_assert_uint8(bytes->buffer[3], ==, 0xAA);
    return MUNIT_OK;
 }
 MunitResult test_bytes_append_uint64(const MunitParameter params[], void *data) {
    bytes_t *bytes = LOCAL_BYTES(16);
    munit_assert_size(bytes->len, ==, 0);
    munit_assert_size(bytes->cap, ==, 16);
    bytes_append_uint64(bytes, 0xAABBCCDDEEFF9988);
    munit_assert_size(bytes->len, ==, 8);
    munit_assert_uint8(bytes->buffer[0], ==, 0x88);
    munit_assert_uint8(bytes->buffer[1], ==, 0x99);
    munit_assert_uint8(bytes->buffer[2], ==, 0xFF);
    munit_assert_uint8(bytes->buffer[3], ==, 0xEE);
    munit_assert_uint8(bytes->buffer[4], ==, 0xDD);
    munit_assert_uint8(bytes->buffer[5], ==, 0xCC);
    munit_assert_uint8(bytes->buffer[6], ==, 0xBB);
    munit_assert_uint8(bytes->buffer[7], ==, 0xAA);
    return MUNIT_OK;
 }
 MunitTest bytes_tests[] = {
    {"/initializer",   test_bytes_initializer,   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/append_uint8",  test_bytes_append_uint8,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/append_array",  test_bytes_append_array,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/append_bytes",  test_bytes_append_bytes,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/append_uint16", test_bytes_append_uint16, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/append_uint32", test_bytes_append_uint32, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/append_uint64", test_bytes_append_uint64, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {nullptr,          nullptr,                  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
 };
@@ -1,65 +0,0 @@
 lbl_0:  ; 65 symbols used for testing growing the symbols table
 lbl_1:
 lbl_2:
 lbl_3:
 lbl_4:
 lbl_5:
 lbl_6:
 lbl_7:
 lbl_8:
 lbl_9:
 lbl_10:
 lbl_11:
 lbl_12:
 lbl_13:
 lbl_14:
 lbl_15:
 lbl_16:
 lbl_17:
 lbl_18:
 lbl_19:
 lbl_20:
 lbl_21:
 lbl_22:
 lbl_23:
 lbl_24:
 lbl_25:
 lbl_26:
 lbl_27:
 lbl_28:
 lbl_29:
 lbl_30:
 lbl_31:
 lbl_32:
 lbl_33:
 lbl_34:
 lbl_35:
 lbl_36:
 lbl_37:
 lbl_38:
 lbl_39:
 lbl_40:
 lbl_41:
 lbl_42:
 lbl_43:
 lbl_44:
 lbl_45:
 lbl_46:
 lbl_47:
 lbl_48:
 lbl_49:
 lbl_50:
 lbl_51:
 lbl_52:
 lbl_53:
 lbl_54:
 lbl_55:
 lbl_56:
 lbl_57:
 lbl_58:
 lbl_59:
 lbl_60:
 lbl_61:
 lbl_62:
 lbl_63:
 lbl_64:
@@ -1,5 +0,0 @@
 ; regression test for two issues:
 ;  - parsing two zero operand instructions in a row
 ;  - a zero operand instruction just before eof
    syscall
    ret
@@ -1,5 +0,0 @@
 ; sample program with trivia on the head of the tokenlist
 _start:
    xor rax, rax
    call exit
@@ -1,12 +0,0 @@
 .import test
 .export test
 test:
    call test
 .import more
 .export more
 more:
    call more
 .import other
 .export other
 other:
    call other
@@ -1,20 +1,5 @@
 .section text
 ; Small valid code snippet that should contain all different AST nodes
 .export _start
 .import exit
 _start:
-    mov eax, ebx
+    mov eax, 555            ; move 555 into eax
    lea eax, [eax + ebx * 4 + 8]
    lea eax, [eax + 8]
    lea eax, [eax + ebx * 8]
    lea eax, [esp - 24]
    lea eax, [eax + ebx * 4 - 8]
    lea eax, [_start]
    mov eax, _start
    mov eax, 555
    push 0o777
    xor eax, 0xDEADBEEF
    and ecx, 0o770
@@ -22,5 +7,3 @@ _start:
    push 0xffff:64
    push 0o777:16
    push 0b0001:16
    mov rax, 0
    call exit
@@ -1,896 +0,0 @@
 #include "../src/lexer.h"
 #include "../src/error.h"
 #include "munit.h"
 #include <string.h>
 void lexer_setup_memory_test(lexer_t *lex, const char *input) {
    munit_assert_null(lex->fp);
    FILE *stream = fmemopen((void *)input, strlen(input), "rb");
    munit_assert_not_null(stream);
    lex->fp = stream;
    lex->line_number = 0;
    lex->character_number = 0;
    lex->buffer_count = 0;
 }
 void lexer_expect_one_token(lexer_t *lex, lexer_token_id_t id, const char *value, size_t line, size_t column) {
    lexer_token_t token = {};
    error_t *err = lexer_next(lex, &token);
    munit_assert_null(err);
    munit_assert_int(token.id, ==, id);
    munit_assert_string_equal(token.value, value);
    munit_assert_int(token.line_number, ==, line);
    munit_assert_int(token.character_number, ==, column);
    lexer_token_cleanup(&token);
 }
 void lexer_expect_eof(lexer_t *lex) {
    lexer_token_t token = {};
    error_t *err = lexer_next(lex, &token);
    munit_assert_ptr_equal(err, err_eof);
 }
 void lexer_test_one_token(lexer_token_id_t id, const char *value) {
    lexer_t lex = {};
    lexer_setup_memory_test(&lex, value);
    lexer_expect_one_token(&lex, id, value, 0, 0);
    lexer_expect_eof(&lex);
    lexer_close(&lex);
 }
 MunitResult test_lexer_identifier(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_IDENTIFIER, "identifier");
    lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier");
    lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier123_55");
    return MUNIT_OK;
 }
 typedef struct token_data {
    lexer_token_id_t id;
    char *value;
    size_t line;
    size_t column;
 } token_data_t;
 typedef struct boundary {
    const char *input;
    token_data_t first;
    token_data_t second;
 } boundary_t;
 void test_lexer_boundary(boundary_t boundaries[]) {
    for (size_t i = 0; boundaries[i].input; ++i) {
        auto boundary = boundaries[i];
        auto first = boundary.first;
        auto second = boundary.second;
        lexer_t lex = {};
        lexer_setup_memory_test(&lex, boundary.input);
        lexer_expect_one_token(&lex, first.id, first.value, first.line, first.column);
        lexer_expect_one_token(&lex, second.id, second.value, second.line, second.column);
        lexer_expect_eof(&lex);
        lexer_close(&lex);
    }
 }
 MunitResult test_lexer_identifier_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"id:",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COLON, ":", 0, 2}         },
        {"id[",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_LBRACKET, "[", 0, 2}      },
        {"id]",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_RBRACKET, "]", 0, 2}      },
        {"id+",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_PLUS, "+", 0, 2}          },
        {"id-",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_MINUS, "-", 0, 2}         },
        {"id*",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_ASTERISK, "*", 0, 2}      },
        {"id.",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_DOT, ".", 0, 2}           },
        {"id;comment", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COMMENT, ";comment", 0, 2}},
        {"id\n",       {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 2}      },
        {"id\r\n",     {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 2}    },
        {"id ",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 2}    },
        {"id\t",       {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 2}   },
        {nullptr,      {},                             {}                               },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_decimal(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_DECIMAL, "123");
    lexer_test_one_token(TOKEN_DECIMAL, "0");
    lexer_test_one_token(TOKEN_DECIMAL, "42");
    return MUNIT_OK;
 }
 MunitResult test_lexer_decimal_with_suffix(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_DECIMAL, "123:8");
    lexer_test_one_token(TOKEN_DECIMAL, "0:16");
    lexer_test_one_token(TOKEN_DECIMAL, "42:32");
    lexer_test_one_token(TOKEN_DECIMAL, "69:64");
    return MUNIT_OK;
 }
 MunitResult test_lexer_hexadecimal(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x0");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xabcdef");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABCDEF");
    return MUNIT_OK;
 }
 MunitResult test_lexer_hexadecimal_with_suffix(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123:8");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD:16");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABC:32");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xffff:64");
    return MUNIT_OK;
 }
 MunitResult test_lexer_octal(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_OCTAL, "0o777");
    lexer_test_one_token(TOKEN_OCTAL, "0o0");
    lexer_test_one_token(TOKEN_OCTAL, "0o123");
    return MUNIT_OK;
 }
 MunitResult test_lexer_octal_with_suffix(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_OCTAL, "0o777:8");
    lexer_test_one_token(TOKEN_OCTAL, "0o123:16");
    lexer_test_one_token(TOKEN_OCTAL, "0o777:32");
    lexer_test_one_token(TOKEN_OCTAL, "0o123:64");
    return MUNIT_OK;
 }
 MunitResult test_lexer_binary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_BINARY, "0b101");
    lexer_test_one_token(TOKEN_BINARY, "0b0");
    lexer_test_one_token(TOKEN_BINARY, "0b1");
    lexer_test_one_token(TOKEN_BINARY, "0b01010101");
    return MUNIT_OK;
 }
 MunitResult test_lexer_binary_with_suffix(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_BINARY, "0b101:8");
    lexer_test_one_token(TOKEN_BINARY, "0b0:16");
    lexer_test_one_token(TOKEN_BINARY, "0b1:32");
    lexer_test_one_token(TOKEN_BINARY, "0b01010101:64");
    return MUNIT_OK;
 }
 MunitResult test_lexer_colon(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_COLON, ":");
    return MUNIT_OK;
 }
 MunitResult test_lexer_comma(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_COMMA, ",");
    return MUNIT_OK;
 }
 MunitResult test_lexer_lbracket(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_LBRACKET, "[");
    return MUNIT_OK;
 }
 MunitResult test_lexer_rbracket(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_RBRACKET, "]");
    return MUNIT_OK;
 }
 MunitResult test_lexer_plus(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_PLUS, "+");
    return MUNIT_OK;
 }
 MunitResult test_lexer_minus(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_MINUS, "-");
    return MUNIT_OK;
 }
 MunitResult test_lexer_asterisk(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_ASTERISK, "*");
    return MUNIT_OK;
 }
 MunitResult test_lexer_dot(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_DOT, ".");
    return MUNIT_OK;
 }
 MunitResult test_lexer_comment(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_COMMENT, ";This is a comment");
    lexer_test_one_token(TOKEN_COMMENT, "; Another comment");
    lexer_test_one_token(TOKEN_COMMENT, ";");
    return MUNIT_OK;
 }
 MunitResult test_lexer_whitespace(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_WHITESPACE, " ");
    lexer_test_one_token(TOKEN_WHITESPACE, "  ");
    lexer_test_one_token(TOKEN_WHITESPACE, "\t");
    lexer_test_one_token(TOKEN_WHITESPACE, " \t ");
    return MUNIT_OK;
 }
 MunitResult test_lexer_newlines(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    // Test simple newline
    lexer_t lex = {};
    lexer_setup_memory_test(&lex, "\n");
    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 0);
    lexer_expect_eof(&lex);
    lexer_close(&lex);
    // Test Windows-style newline
    lexer_t lex2 = {};
    lexer_setup_memory_test(&lex2, "\r\n");
    lexer_expect_one_token(&lex2, TOKEN_NEWLINE, "\r\n", 0, 0);
    lexer_expect_eof(&lex2);
    lexer_close(&lex2);
    return MUNIT_OK;
 }
 MunitResult test_lexer_line_numbers(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_t lex = {};
    lexer_setup_memory_test(&lex, "a\nb\nc");
    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "a", 0, 0);
    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 1);
    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "b", 1, 0);
    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 1, 1);
    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "c", 2, 0);
    lexer_expect_eof(&lex);
    lexer_close(&lex);
    return MUNIT_OK;
 }
 MunitResult test_lexer_decimal_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"123,",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMA, ",", 0, 3}      },
        {"123:",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COLON, ":", 0, 3}      },
        {"123[",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 3}   },
        {"123]",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 3}   },
        {"123+",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_PLUS, "+", 0, 3}       },
        {"123-",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_MINUS, "-", 0, 3}      },
        {"123*",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 3}   },
        {"123.",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_DOT, ".", 0, 3}        },
        {"123;",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMENT, ";", 0, 3}    },
        {"123\n",   {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 3}   },
        {"123\r\n", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 3} },
        {"123 ",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 3} },
        {"123\t",   {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 3}},
        {nullptr,   {},                           {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_hexadecimal_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"0x123,",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
        {"0x123:",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
        {"0x123[",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
        {"0x123]",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
        {"0x123+",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
        {"0x123-",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
        {"0x123*",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
        {"0x123.",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
        {"0x123;",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
        {"0x123\n",   {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
        {"0x123\r\n", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
        {"0x123 ",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
        {"0x123\t",   {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
        {nullptr,     {},                                 {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_octal_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"0o123,",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
        {"0o123:",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
        {"0o123[",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
        {"0o123]",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
        {"0o123+",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
        {"0o123-",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
        {"0o123*",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
        {"0o123.",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
        {"0o123;",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
        {"0o123\n",   {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
        {"0o123\r\n", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
        {"0o123 ",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
        {"0o123\t",   {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
        {nullptr,     {},                           {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_binary_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"0b101,",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
        {"0b101:",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
        {"0b101[",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
        {"0b101]",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
        {"0b101+",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
        {"0b101-",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
        {"0b101*",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
        {"0b101.",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
        {"0b101;",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
        {"0b101\n",   {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
        {"0b101\r\n", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
        {"0b101 ",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
        {"0b101\t",   {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
        {nullptr,     {},                            {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_colon_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {":,",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"::",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {":[",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {":]",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {":+",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {":-",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {":*",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {":.",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {":;",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {":\n",   {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {":\r\n", {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {": ",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {":\t",   {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                       {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_comma_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {",,",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {",:",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {",[",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {",]",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {",+",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {",-",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {",*",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {",.",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {",;",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {",\n",   {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {",\r\n", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {", ",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {",\t",   {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                       {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_lbracket_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"[,",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"[:",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"[[",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"[]",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"[+",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"[-",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"[*",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"[.",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"[;",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"[\n",   {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"[\r\n", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"[ ",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"[\t",   {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                          {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_rbracket_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"],",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"]:",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"][",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"]]",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"]+",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"]-",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"]*",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"].",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"];",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"]\n",   {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"]\r\n", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"] ",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"]\t",   {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                          {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_plus_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"+,",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"+:",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"+[",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"+]",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"++",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"+-",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"+*",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"+.",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"+;",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"+\n",   {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"+\r\n", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"+ ",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"+\t",   {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                      {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_minus_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"-,",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"-:",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"-[",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"-]",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"-+",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"--",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"-*",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"-.",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"-;",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"-\n",   {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"-\r\n", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"- ",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"-\t",   {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                       {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_asterisk_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"*,",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"*:",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"*[",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"*]",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"*+",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"*-",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"**",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"*.",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"*;",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"*\n",   {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"*\r\n", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"* ",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"*\t",   {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                          {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_dot_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {".,",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {".:",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {".[",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {".]",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {".+",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {".-",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {".*",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"..",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {".;",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {".\n",   {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {".\r\n", {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {". ",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {".\t",   {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                     {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_comment_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {";comment\n",   {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8}  },
        {";comment\r\n", {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 8}},
        {nullptr,        {},                                {}                           },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_whitespace_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {" ,",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMA, ",", 0, 1}     },
        {" :",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COLON, ":", 0, 1}     },
        {" [",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}  },
        {" ]",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}  },
        {" +",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_PLUS, "+", 0, 1}      },
        {" -",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_MINUS, "-", 0, 1}     },
        {" *",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}  },
        {" .",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_DOT, ".", 0, 1}       },
        {" ;",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}   },
        {" \n",   {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}  },
        {" \r\n", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1}},
        {nullptr, {},                            {}                           },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_newline_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"\n,",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0}      },
        {"\n:",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COLON, ":", 1, 0}      },
        {"\n[",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0}   },
        {"\n]",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0}   },
        {"\n+",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0}       },
        {"\n-",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0}      },
        {"\n*",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0}   },
        {"\n.",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_DOT, ".", 1, 0}        },
        {"\n;",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0}    },
        {"\n\n",   {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0}   },
        {"\n\r\n", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} },
        {"\n ",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} },
        {"\n\t",   {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}},
        {nullptr,  {},                          {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_crlf_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"\r\n,",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0}      },
        {"\r\n:",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COLON, ":", 1, 0}      },
        {"\r\n[",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0}   },
        {"\r\n]",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0}   },
        {"\r\n+",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0}       },
        {"\r\n-",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0}      },
        {"\r\n*",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0}   },
        {"\r\n.",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_DOT, ".", 1, 0}        },
        {"\r\n;",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0}    },
        {"\r\n\n",   {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0}   },
        {"\r\n\r\n", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} },
        {"\r\n ",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} },
        {"\r\n\t",   {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}},
        {nullptr,    {},                            {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_number_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"0x123:8,",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_COMMA, ",", 0, 7}      },
        {"0x123:16:",    {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_COLON, ":", 0, 8}      },
        {"0o777:32[",    {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_LBRACKET, "[", 0, 8}   },
        {"0b101:64]",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_RBRACKET, "]", 0, 8}   },
        {"0x123:8+",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_PLUS, "+", 0, 7}       },
        {"0x123:16-",    {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_MINUS, "-", 0, 8}      },
        {"0o777:32*",    {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_ASTERISK, "*", 0, 8}   },
        {"0b101:64.",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_DOT, ".", 0, 8}        },
        {"0x123:8;",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_COMMENT, ";", 0, 7}    },
        {"0x123:16\n",   {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8}   },
        {"0o777:32\r\n", {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_NEWLINE, "\r\n", 0, 8} },
        {"0b101:64 ",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_WHITESPACE, " ", 0, 8} },
        {"0x123:8\t",    {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_WHITESPACE, "\t", 0, 7}},
        {nullptr,        {},                                    {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_maximum_length_numbers(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    char *numbers[] = {
        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "9999999999999999999988",
        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "9999999999999999998:64",
        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "9999999999999999999988",
        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "9999999999999999998:64",
        nullptr,
    };
    for (size_t i = 0; numbers[i]; ++i) {
        auto number = numbers[i];
        munit_assert_size(128, ==, strlen(number));
        lexer_t lex = {};
        lexer_token_t token = {};
        lexer_setup_memory_test(&lex, number);
        lexer_next(&lex, &token);
        munit_assert_true(token.id == TOKEN_DECIMAL || token.id == TOKEN_HEXADECIMAL);
        munit_assert_size(128, ==, strlen(token.value));
        lexer_token_cleanup(&token);
        lexer_close(&lex);
    }
    return MUNIT_OK;
 }
 MunitResult test_lexer_too_long_numbers(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    char *numbers[] = {
        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "99999999999999999999988",
        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "99999999999999999999988",
        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "99999999999999999998:64",
        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "99999999999999999998:64",
    };
    // Without suffix we expect 128 characters and then failure
    for (size_t i = 0; i < 2; ++i) {
        auto number = numbers[i];
        munit_assert_size(129, ==, strlen(number));
        lexer_t lex = {};
        lexer_token_t token = {};
        lexer_setup_memory_test(&lex, number);
        lexer_next(&lex, &token);
        munit_assert_int(TOKEN_ERROR, ==, token.id);
        munit_assert_size(128, ==, strlen(token.value));
        lexer_token_cleanup(&token);
        lexer_close(&lex);
    }
    // With suffix we fail at the suffix boundary
    for (size_t i = 2; i < 4; ++i) {
        auto number = numbers[i];
        munit_assert_size(129, ==, strlen(number));
        lexer_t lex = {};
        lexer_token_t token = {};
        lexer_setup_memory_test(&lex, number);
        lexer_next(&lex, &token);
        munit_assert_int(TOKEN_ERROR, ==, token.id);
        munit_assert_size(128, >=, strlen(token.value));
        lexer_token_cleanup(&token);
        lexer_expect_one_token(&lex, TOKEN_COLON, ":", 0, 126);
        lexer_expect_one_token(&lex, TOKEN_DECIMAL, "64", 0, 127);
        lexer_close(&lex);
    }
    return MUNIT_OK;
 }
 MunitResult test_lexer_max_whitespace_length(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    char whitespace[1025];
    memset(whitespace, ' ', 1024);
    whitespace[1024] = '\0';
    munit_assert_size(1024, ==, strlen(whitespace));
    lexer_t lex = {};
    lexer_token_t token = {};
    lexer_setup_memory_test(&lex, whitespace);
    lexer_next(&lex, &token);
    munit_assert_int(TOKEN_WHITESPACE, ==, token.id);
    munit_assert_size(1024, ==, strlen(token.value));
    lexer_token_cleanup(&token);
    lexer_close(&lex);
    return MUNIT_OK;
 }
 MunitResult test_lexer_too_long_whitespace(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    char whitespace[1026];
    memset(whitespace, ' ', 1025);
    whitespace[1025] = '\0';
    munit_assert_size(1025, ==, strlen(whitespace));
    lexer_t lex = {};
    lexer_token_t token = {};
    lexer_setup_memory_test(&lex, whitespace);
    lexer_next(&lex, &token);
    munit_assert_int(TOKEN_ERROR, ==, token.id);
    munit_assert_size(1024, ==, strlen(token.value));
    lexer_token_cleanup(&token);
    lexer_expect_one_token(&lex, TOKEN_WHITESPACE, " ", 0, 1024);
    lexer_close(&lex);
    return MUNIT_OK;
 }
 MunitTest lexer_tests[] = {
    {"/identifier",              test_lexer_identifier,              nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/identifier_boundary",     test_lexer_identifier_boundary,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/decimal",                 test_lexer_decimal,                 nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/decimal_boundary",        test_lexer_decimal_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/hexadecimal",             test_lexer_hexadecimal,             nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/hexadecimal_with_suffix", test_lexer_hexadecimal_with_suffix, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/hexadecimal_boundary",    test_lexer_hexadecimal_boundary,    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/octal",                   test_lexer_octal,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/octal_with_suffix",       test_lexer_octal_with_suffix,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/octal_boundary",          test_lexer_octal_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/binary",                  test_lexer_binary,                  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/binary_with_suffix",      test_lexer_binary_with_suffix,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/binary_boundary",         test_lexer_binary_boundary,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/number_boundary",         test_lexer_number_boundary,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/colon",                   test_lexer_colon,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/colon_boundary",          test_lexer_colon_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/comma",                   test_lexer_comma,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/comma_boundary",          test_lexer_comma_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/lbracket",                test_lexer_lbracket,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/lbracket_boundary",       test_lexer_lbracket_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/rbracket",                test_lexer_rbracket,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/rbracket_boundary",       test_lexer_rbracket_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/plus",                    test_lexer_plus,                    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/plus_boundary",           test_lexer_plus_boundary,           nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/minus",                   test_lexer_minus,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/minus_boundary",          test_lexer_minus_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/asterisk",                test_lexer_asterisk,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/asterisk_boundary",       test_lexer_asterisk_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/dot",                     test_lexer_dot,                     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/dot_boundary",            test_lexer_dot_boundary,            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/comment",                 test_lexer_comment,                 nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/comment_boundary",        test_lexer_comment_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/whitespace",              test_lexer_whitespace,              nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/whitespace_boundary",     test_lexer_whitespace_boundary,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/newlines",                test_lexer_newlines,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/newline_boundary",        test_lexer_newline_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/crlf_boundary",           test_lexer_crlf_boundary,           nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/line_numbers",            test_lexer_line_numbers,            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/maximum_length_numbers",  test_lexer_maximum_length_numbers,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/too_long_numbers",        test_lexer_too_long_numbers,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/max_whitespace_length",   test_lexer_max_whitespace_length,   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/too_long_whitespace",     test_lexer_too_long_whitespace,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {nullptr,                    nullptr,                            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
 };
@@ -1,22 +0,0 @@
 #include "munit.h"
 extern MunitTest ast_tests[];
 extern MunitTest lexer_tests[];
 extern MunitTest regression_tests[];
 extern MunitTest symbols_tests[];
 extern MunitTest bytes_tests[];
 int main(int argc, char *argv[MUNIT_ARRAY_PARAM(argc + 1)]) {
    MunitSuite suites[] = {
        {"/regression", regression_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
        {"/ast",        ast_tests,        nullptr, 1, MUNIT_SUITE_OPTION_NONE},
        {"/lexer",      lexer_tests,      nullptr, 1, MUNIT_SUITE_OPTION_NONE},
        {"/symbols",    symbols_tests,    nullptr, 1, MUNIT_SUITE_OPTION_NONE},
        {"/bytes",      bytes_tests,      nullptr, 1, MUNIT_SUITE_OPTION_NONE},
        {nullptr,       nullptr,          nullptr, 0, MUNIT_SUITE_OPTION_NONE},
    };
    MunitSuite master_suite = {"/oas", nullptr, suites, 1, MUNIT_SUITE_OPTION_NONE};
    return munit_suite_main(&master_suite, nullptr, argc, argv);
 }
@@ -1,535 +0,0 @@
 /* µnit Testing Framework
 * Copyright (c) 2013-2017 Evan Nemerson <evan@nemerson.com>
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #if !defined(MUNIT_H)
 #define MUNIT_H
 #include <stdarg.h>
 #include <stdlib.h>
 #define MUNIT_VERSION(major, minor, revision) \
  (((major) << 16) | ((minor) << 8) | (revision))
 #define MUNIT_CURRENT_VERSION MUNIT_VERSION(0, 4, 1)
 #if defined(_MSC_VER) && (_MSC_VER < 1600)
 #  define munit_int8_t   __int8
 #  define munit_uint8_t  unsigned __int8
 #  define munit_int16_t  __int16
 #  define munit_uint16_t unsigned __int16
 #  define munit_int32_t  __int32
 #  define munit_uint32_t unsigned __int32
 #  define munit_int64_t  __int64
 #  define munit_uint64_t unsigned __int64
 #else
 #  include <stdint.h>
 #  define munit_int8_t   int8_t
 #  define munit_uint8_t  uint8_t
 #  define munit_int16_t  int16_t
 #  define munit_uint16_t uint16_t
 #  define munit_int32_t  int32_t
 #  define munit_uint32_t uint32_t
 #  define munit_int64_t  int64_t
 #  define munit_uint64_t uint64_t
 #endif
 #if defined(_MSC_VER) && (_MSC_VER < 1800)
 #  if !defined(PRIi8)
 #    define PRIi8 "i"
 #  endif
 #  if !defined(PRIi16)
 #    define PRIi16 "i"
 #  endif
 #  if !defined(PRIi32)
 #    define PRIi32 "i"
 #  endif
 #  if !defined(PRIi64)
 #    define PRIi64 "I64i"
 #  endif
 #  if !defined(PRId8)
 #    define PRId8 "d"
 #  endif
 #  if !defined(PRId16)
 #    define PRId16 "d"
 #  endif
 #  if !defined(PRId32)
 #    define PRId32 "d"
 #  endif
 #  if !defined(PRId64)
 #    define PRId64 "I64d"
 #  endif
 #  if !defined(PRIx8)
 #    define PRIx8 "x"
 #  endif
 #  if !defined(PRIx16)
 #    define PRIx16 "x"
 #  endif
 #  if !defined(PRIx32)
 #    define PRIx32 "x"
 #  endif
 #  if !defined(PRIx64)
 #    define PRIx64 "I64x"
 #  endif
 #  if !defined(PRIu8)
 #    define PRIu8 "u"
 #  endif
 #  if !defined(PRIu16)
 #    define PRIu16 "u"
 #  endif
 #  if !defined(PRIu32)
 #    define PRIu32 "u"
 #  endif
 #  if !defined(PRIu64)
 #    define PRIu64 "I64u"
 #  endif
 #else
 #  include <inttypes.h>
 #endif
 #if !defined(munit_bool)
 #  if defined(bool)
 #    define munit_bool bool
 #  elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
 #    define munit_bool _Bool
 #  else
 #    define munit_bool int
 #  endif
 #endif
 #if defined(__cplusplus)
 extern "C" {
 #endif
 #if defined(__GNUC__)
 #  define MUNIT_LIKELY(expr) (__builtin_expect ((expr), 1))
 #  define MUNIT_UNLIKELY(expr) (__builtin_expect ((expr), 0))
 #  define MUNIT_UNUSED __attribute__((__unused__))
 #else
 #  define MUNIT_LIKELY(expr) (expr)
 #  define MUNIT_UNLIKELY(expr) (expr)
 #  define MUNIT_UNUSED
 #endif
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__PGI)
 #  define MUNIT_ARRAY_PARAM(name) name
 #else
 #  define MUNIT_ARRAY_PARAM(name)
 #endif
 #if !defined(_WIN32)
 #  define MUNIT_SIZE_MODIFIER "z"
 #  define MUNIT_CHAR_MODIFIER "hh"
 #  define MUNIT_SHORT_MODIFIER "h"
 #else
 #  if defined(_M_X64) || defined(__amd64__)
 #    define MUNIT_SIZE_MODIFIER "I64"
 #  else
 #    define MUNIT_SIZE_MODIFIER ""
 #  endif
 #  define MUNIT_CHAR_MODIFIER ""
 #  define MUNIT_SHORT_MODIFIER ""
 #endif
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
 #  define MUNIT_NO_RETURN _Noreturn
 #elif defined(__GNUC__)
 #  define MUNIT_NO_RETURN __attribute__((__noreturn__))
 #elif defined(_MSC_VER)
 #  define MUNIT_NO_RETURN __declspec(noreturn)
 #else
 #  define MUNIT_NO_RETURN
 #endif
 #if defined(_MSC_VER) &&  (_MSC_VER >= 1500)
 #  define MUNIT_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127))
 #  define MUNIT_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop))
 #else
 #  define MUNIT_PUSH_DISABLE_MSVC_C4127_
 #  define MUNIT_POP_DISABLE_MSVC_C4127_
 #endif
 typedef enum {
  MUNIT_LOG_DEBUG,
  MUNIT_LOG_INFO,
  MUNIT_LOG_WARNING,
  MUNIT_LOG_ERROR
 } MunitLogLevel;
 #if defined(__GNUC__) && !defined(__MINGW32__)
 #  define MUNIT_PRINTF(string_index, first_to_check) __attribute__((format (printf, string_index, first_to_check)))
 #else
 #  define MUNIT_PRINTF(string_index, first_to_check)
 #endif
 MUNIT_PRINTF(4, 5)
 void munit_logf_ex(MunitLogLevel level, const char* filename, int line, const char* format, ...);
 #define munit_logf(level, format, ...) \
  munit_logf_ex(level, __FILE__, __LINE__, format, __VA_ARGS__)
 #define munit_log(level, msg) \
  munit_logf(level, "%s", msg)
 MUNIT_NO_RETURN
 MUNIT_PRINTF(3, 4)
 void munit_errorf_ex(const char* filename, int line, const char* format, ...);
 #define munit_errorf(format, ...) \
  munit_errorf_ex(__FILE__, __LINE__, format, __VA_ARGS__)
 #define munit_error(msg) \
  munit_errorf("%s", msg)
 #define munit_assert(expr) \
  do { \
    if (!MUNIT_LIKELY(expr)) { \
      munit_error("assertion failed: " #expr); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_true(expr) \
  do { \
    if (!MUNIT_LIKELY(expr)) { \
      munit_error("assertion failed: " #expr " is not true"); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_false(expr) \
  do { \
    if (!MUNIT_LIKELY(!(expr))) { \
      munit_error("assertion failed: " #expr " is not false"); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_type_full(prefix, suffix, T, fmt, a, op, b)   \
  do { \
    T munit_tmp_a_ = (a); \
    T munit_tmp_b_ = (b); \
    if (!(munit_tmp_a_ op munit_tmp_b_)) {                               \
      munit_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")", \
                   #a, #op, #b, munit_tmp_a_, #op, munit_tmp_b_); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_type(T, fmt, a, op, b) \
  munit_assert_type_full("", "", T, fmt, a, op, b)
 #define munit_assert_char(a, op, b) \
  munit_assert_type_full("'\\x", "'", char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b)
 #define munit_assert_uchar(a, op, b) \
  munit_assert_type_full("'\\x", "'", unsigned char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b)
 #define munit_assert_short(a, op, b) \
  munit_assert_type(short, MUNIT_SHORT_MODIFIER "d", a, op, b)
 #define munit_assert_ushort(a, op, b) \
  munit_assert_type(unsigned short, MUNIT_SHORT_MODIFIER "u", a, op, b)
 #define munit_assert_int(a, op, b) \
  munit_assert_type(int, "d", a, op, b)
 #define munit_assert_uint(a, op, b) \
  munit_assert_type(unsigned int, "u", a, op, b)
 #define munit_assert_long(a, op, b) \
  munit_assert_type(long int, "ld", a, op, b)
 #define munit_assert_ulong(a, op, b) \
  munit_assert_type(unsigned long int, "lu", a, op, b)
 #define munit_assert_llong(a, op, b) \
  munit_assert_type(long long int, "lld", a, op, b)
 #define munit_assert_ullong(a, op, b) \
  munit_assert_type(unsigned long long int, "llu", a, op, b)
 #define munit_assert_size(a, op, b) \
  munit_assert_type(size_t, MUNIT_SIZE_MODIFIER "u", a, op, b)
 #define munit_assert_float(a, op, b) \
  munit_assert_type(float, "f", a, op, b)
 #define munit_assert_double(a, op, b) \
  munit_assert_type(double, "g", a, op, b)
 #define munit_assert_ptr(a, op, b) \
  munit_assert_type(const void*, "p", a, op, b)
 #define munit_assert_int8(a, op, b)             \
  munit_assert_type(munit_int8_t, PRIi8, a, op, b)
 #define munit_assert_uint8(a, op, b) \
  munit_assert_type(munit_uint8_t, PRIu8, a, op, b)
 #define munit_assert_int16(a, op, b) \
  munit_assert_type(munit_int16_t, PRIi16, a, op, b)
 #define munit_assert_uint16(a, op, b) \
  munit_assert_type(munit_uint16_t, PRIu16, a, op, b)
 #define munit_assert_int32(a, op, b) \
  munit_assert_type(munit_int32_t, PRIi32, a, op, b)
 #define munit_assert_uint32(a, op, b) \
  munit_assert_type(munit_uint32_t, PRIu32, a, op, b)
 #define munit_assert_int64(a, op, b) \
  munit_assert_type(munit_int64_t, PRIi64, a, op, b)
 #define munit_assert_uint64(a, op, b) \
  munit_assert_type(munit_uint64_t, PRIu64, a, op, b)
 #define munit_assert_double_equal(a, b, precision) \
  do { \
    const double munit_tmp_a_ = (a); \
    const double munit_tmp_b_ = (b); \
    const double munit_tmp_diff_ = ((munit_tmp_a_ - munit_tmp_b_) < 0) ? \
      -(munit_tmp_a_ - munit_tmp_b_) : \
      (munit_tmp_a_ - munit_tmp_b_); \
    if (MUNIT_UNLIKELY(munit_tmp_diff_ > 1e-##precision)) { \
      munit_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)", \
 		   #a, #b, munit_tmp_a_, munit_tmp_b_); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #include <string.h>
 #define munit_assert_string_equal(a, b) \
  do { \
    const char* munit_tmp_a_ = a; \
    const char* munit_tmp_b_ = b; \
    if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) != 0)) { \
      munit_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")", \
                   #a, #b, munit_tmp_a_, munit_tmp_b_); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_string_not_equal(a, b) \
  do { \
    const char* munit_tmp_a_ = a; \
    const char* munit_tmp_b_ = b; \
    if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) == 0)) { \
      munit_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")", \
                   #a, #b, munit_tmp_a_, munit_tmp_b_); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_memory_equal(size, a, b) \
  do { \
    const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \
    const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \
    const size_t munit_tmp_size_ = (size); \
    if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) != 0) { \
      size_t munit_tmp_pos_; \
      for (munit_tmp_pos_ = 0 ; munit_tmp_pos_ < munit_tmp_size_ ; munit_tmp_pos_++) { \
        if (munit_tmp_a_[munit_tmp_pos_] != munit_tmp_b_[munit_tmp_pos_]) { \
          munit_errorf("assertion failed: memory %s == %s, at offset %" MUNIT_SIZE_MODIFIER "u", \
                       #a, #b, munit_tmp_pos_); \
          break; \
        } \
      } \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_memory_not_equal(size, a, b) \
  do { \
    const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \
    const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \
    const size_t munit_tmp_size_ = (size); \
    if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) == 0) { \
      munit_errorf("assertion failed: memory %s != %s (%zu bytes)", \
                   #a, #b, munit_tmp_size_); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_ptr_equal(a, b) \
  munit_assert_ptr(a, ==, b)
 #define munit_assert_ptr_not_equal(a, b) \
  munit_assert_ptr(a, !=, b)
 #define munit_assert_null(ptr) \
  munit_assert_ptr(ptr, ==, NULL)
 #define munit_assert_not_null(ptr) \
  munit_assert_ptr(ptr, !=, NULL)
 #define munit_assert_ptr_null(ptr) \
  munit_assert_ptr(ptr, ==, NULL)
 #define munit_assert_ptr_not_null(ptr) \
  munit_assert_ptr(ptr, !=, NULL)
 /*** Memory allocation ***/
 void* munit_malloc_ex(const char* filename, int line, size_t size);
 #define munit_malloc(size) \
  munit_malloc_ex(__FILE__, __LINE__, (size))
 #define munit_new(type) \
  ((type*) munit_malloc(sizeof(type)))
 #define munit_calloc(nmemb, size) \
  munit_malloc((nmemb) * (size))
 #define munit_newa(type, nmemb) \
  ((type*) munit_calloc((nmemb), sizeof(type)))
 /*** Random number generation ***/
 void munit_rand_seed(munit_uint32_t seed);
 munit_uint32_t munit_rand_uint32(void);
 int munit_rand_int_range(int min, int max);
 double munit_rand_double(void);
 void munit_rand_memory(size_t size, munit_uint8_t buffer[MUNIT_ARRAY_PARAM(size)]);
 /*** Tests and Suites ***/
 typedef enum {
  /* Test successful */
  MUNIT_OK,
  /* Test failed */
  MUNIT_FAIL,
  /* Test was skipped */
  MUNIT_SKIP,
  /* Test failed due to circumstances not intended to be tested
   * (things like network errors, invalid parameter value, failure to
   * allocate memory in the test harness, etc.). */
  MUNIT_ERROR
 } MunitResult;
 typedef struct {
  char*  name;
  char** values;
 } MunitParameterEnum;
 typedef struct {
  char* name;
  char* value;
 } MunitParameter;
 const char* munit_parameters_get(const MunitParameter params[], const char* key);
 typedef enum {
  MUNIT_TEST_OPTION_NONE             = 0,
  MUNIT_TEST_OPTION_SINGLE_ITERATION = 1 << 0,
  MUNIT_TEST_OPTION_TODO             = 1 << 1
 } MunitTestOptions;
 typedef MunitResult (* MunitTestFunc)(const MunitParameter params[], void* user_data_or_fixture);
 typedef void*       (* MunitTestSetup)(const MunitParameter params[], void* user_data);
 typedef void        (* MunitTestTearDown)(void* fixture);
 typedef struct {
  char*               name;
  MunitTestFunc       test;
  MunitTestSetup      setup;
  MunitTestTearDown   tear_down;
  MunitTestOptions    options;
  MunitParameterEnum* parameters;
 } MunitTest;
 typedef enum {
  MUNIT_SUITE_OPTION_NONE = 0
 } MunitSuiteOptions;
 typedef struct MunitSuite_ MunitSuite;
 struct MunitSuite_ {
  char*             prefix;
  MunitTest*        tests;
  MunitSuite*       suites;
  unsigned int      iterations;
  MunitSuiteOptions options;
 };
 int munit_suite_main(const MunitSuite* suite, void* user_data, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)]);
 /* Note: I'm not very happy with this API; it's likely to change if I
 * figure out something better.  Suggestions welcome. */
 typedef struct MunitArgument_ MunitArgument;
 struct MunitArgument_ {
  char* name;
  munit_bool (* parse_argument)(const MunitSuite* suite, void* user_data, int* arg, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)]);
  void (* write_help)(const MunitArgument* argument, void* user_data);
 };
 int munit_suite_main_custom(const MunitSuite* suite,
                            void* user_data,
                            int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)],
                            const MunitArgument arguments[]);
 #if defined(MUNIT_ENABLE_ASSERT_ALIASES)
 #define assert_true(expr) munit_assert_true(expr)
 #define assert_false(expr) munit_assert_false(expr)
 #define assert_char(a, op, b) munit_assert_char(a, op, b)
 #define assert_uchar(a, op, b) munit_assert_uchar(a, op, b)
 #define assert_short(a, op, b) munit_assert_short(a, op, b)
 #define assert_ushort(a, op, b) munit_assert_ushort(a, op, b)
 #define assert_int(a, op, b) munit_assert_int(a, op, b)
 #define assert_uint(a, op, b) munit_assert_uint(a, op, b)
 #define assert_long(a, op, b) munit_assert_long(a, op, b)
 #define assert_ulong(a, op, b) munit_assert_ulong(a, op, b)
 #define assert_llong(a, op, b) munit_assert_llong(a, op, b)
 #define assert_ullong(a, op, b) munit_assert_ullong(a, op, b)
 #define assert_size(a, op, b) munit_assert_size(a, op, b)
 #define assert_float(a, op, b) munit_assert_float(a, op, b)
 #define assert_double(a, op, b) munit_assert_double(a, op, b)
 #define assert_ptr(a, op, b) munit_assert_ptr(a, op, b)
 #define assert_int8(a, op, b) munit_assert_int8(a, op, b)
 #define assert_uint8(a, op, b) munit_assert_uint8(a, op, b)
 #define assert_int16(a, op, b) munit_assert_int16(a, op, b)
 #define assert_uint16(a, op, b) munit_assert_uint16(a, op, b)
 #define assert_int32(a, op, b) munit_assert_int32(a, op, b)
 #define assert_uint32(a, op, b) munit_assert_uint32(a, op, b)
 #define assert_int64(a, op, b) munit_assert_int64(a, op, b)
 #define assert_uint64(a, op, b) munit_assert_uint64(a, op, b)
 #define assert_double_equal(a, b, precision) munit_assert_double_equal(a, b, precision)
 #define assert_string_equal(a, b) munit_assert_string_equal(a, b)
 #define assert_string_not_equal(a, b) munit_assert_string_not_equal(a, b)
 #define assert_memory_equal(size, a, b) munit_assert_memory_equal(size, a, b)
 #define assert_memory_not_equal(size, a, b) munit_assert_memory_not_equal(size, a, b)
 #define assert_ptr_equal(a, b) munit_assert_ptr_equal(a, b)
 #define assert_ptr_not_equal(a, b) munit_assert_ptr_not_equal(a, b)
 #define assert_ptr_null(ptr) munit_assert_null_equal(ptr)
 #define assert_ptr_not_null(ptr) munit_assert_not_null(ptr)
 #define assert_null(ptr) munit_assert_null(ptr)
 #define assert_not_null(ptr) munit_assert_not_null(ptr)
 #endif /* defined(MUNIT_ENABLE_ASSERT_ALIASES) */
 #if defined(__cplusplus)
 }
 #endif
 #endif /* !defined(MUNIT_H) */
 #if defined(MUNIT_ENABLE_ASSERT_ALIASES)
 #  if defined(assert)
 #    undef assert
 #  endif
 #  define assert(expr) munit_assert(expr)
 #endif
@@ -1,68 +0,0 @@
 #include "../src/ast.h"
 #include "../src/parser/parser.h"
 #include "munit.h"
 MunitResult test_regression_trivia_head(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_t *lex = &(lexer_t){};
    error_t *err = lexer_open(lex, "tests/input/regression/test_trivia_head.asm");
    munit_assert_null(err);
    tokenlist_t *list;
    err = tokenlist_alloc(&list);
    munit_assert_null(err);
    err = tokenlist_fill(list, lex);
    munit_assert_null(err);
    parse_result_t result = parse(list->head);
    munit_assert_null(result.err);
    munit_assert_null(result.next);
    ast_node_free(result.node);
    tokenlist_free(list);
    return MUNIT_OK;
 }
 MunitResult test_no_operands_eof(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_t *lex = &(lexer_t){};
    error_t *err = lexer_open(lex, "tests/input/regression/test_no_operands_eof.asm");
    munit_assert_null(err);
    tokenlist_t *list;
    err = tokenlist_alloc(&list);
    munit_assert_null(err);
    err = tokenlist_fill(list, lex);
    munit_assert_null(err);
    parse_result_t result = parse(list->head);
    munit_assert_null(result.err);
    munit_assert_null(result.next);
    // Both children should be instructions
    munit_assert_size(result.node->len, ==, 2);
    munit_assert_int(result.node->children[0]->id, ==, NODE_INSTRUCTION);
    munit_assert_int(result.node->children[1]->id, ==, NODE_INSTRUCTION);
    // And they should have empty operands
    munit_assert_size(result.node->children[0]->len, ==, 2);
    munit_assert_size(result.node->children[1]->len, ==, 2);
    munit_assert_size(result.node->children[0]->children[1]->len, ==, 0);
    munit_assert_size(result.node->children[1]->children[1]->len, ==, 0);
    ast_node_free(result.node);
    tokenlist_free(list);
    return MUNIT_OK;
 }
 MunitTest regression_tests[] = {
    {"/trivia_head",     test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/no_operands_eof", test_no_operands_eof,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {nullptr,            nullptr,                     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
 };
@@ -1,393 +0,0 @@
 #include "../src/encoder/symbols.h"
 #include "../src/ast.h"
 #include "../src/error.h"
 #include "../src/lexer.h"
 #include "../src/parser/parser.h"
 #include "munit.h"
 #include <string.h>
 void symbols_setup_test(ast_node_t **node, tokenlist_t **list, char *path) {
    lexer_t *lex = &(lexer_t){};
    lexer_open(lex, path);
    tokenlist_alloc(list);
    tokenlist_fill(*list, lex);
    parse_result_t result = parse((*list)->head);
    lexer_close(lex);
    *node = result.node;
 }
 MunitResult test_symbol_table_alloc(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    symbol_table_t *table = nullptr;
    error_t *err = symbol_table_alloc(&table);
    munit_assert_ptr_not_null(table);
    munit_assert_ptr_null(err);
    munit_assert_size(table->cap, ==, 64); // Default capacity
    munit_assert_size(table->len, ==, 0);
    munit_assert_ptr_not_null(table->symbols);
    symbol_table_free(table);
    return MUNIT_OK;
 }
 MunitResult test_symbol_table_lookup_empty(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    symbol_table_t *table = nullptr;
    symbol_table_alloc(&table);
    symbol_t *symbol = symbol_table_lookup(table, "nonexistent");
    munit_assert_ptr_null(symbol);
    symbol_table_free(table);
    return MUNIT_OK;
 }
 MunitResult test_symbol_add_reference(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    ast_node_t *root;
    tokenlist_t *list;
    symbol_table_t *table = nullptr;
    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
    symbol_table_alloc(&table);
    ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
    ast_node_t *statement = root->children[3]; // The containing statement
    munit_assert_int(reference->id, ==, NODE_LABEL_REFERENCE);
    munit_assert_size(table->len, ==, 0);
    error_t *err = symbol_table_update(table, reference, statement);
    munit_assert_null(err);
    munit_assert_size(table->len, ==, 1);
    symbol_t *symbol = symbol_table_lookup(table, "test");
    munit_assert_not_null(symbol);
    munit_assert_int(SYMBOL_REFERENCE, ==, symbol->kind);
    // For references, the statement should be nullptr
    munit_assert_ptr_null(symbol->statement);
    munit_assert_string_equal(symbol->name, "test");
    symbol_table_free(table);
    ast_node_free(root);
    tokenlist_free(list);
    return MUNIT_OK;
 }
 MunitResult test_symbol_add_label(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    ast_node_t *root;
    tokenlist_t *list;
    symbol_table_t *table = nullptr;
    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
    symbol_table_alloc(&table);
    ast_node_t *label = root->children[2];
    munit_assert_int(label->id, ==, NODE_LABEL);
    munit_assert_size(table->len, ==, 0);
    error_t *err = symbol_table_update(table, label, label);
    munit_assert_null(err);
    munit_assert_size(table->len, ==, 1);
    symbol_t *symbol = symbol_table_lookup(table, "test");
    munit_assert_not_null(symbol);
    munit_assert_int(SYMBOL_LOCAL, ==, symbol->kind);
    munit_assert_ptr_equal(label, symbol->statement);
    munit_assert_string_equal(symbol->name, "test");
    symbol_table_free(table);
    ast_node_free(root);
    tokenlist_free(list);
    return MUNIT_OK;
 }
 MunitResult test_symbol_add_import(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    ast_node_t *root;
    tokenlist_t *list;
    symbol_table_t *table = nullptr;
    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
    symbol_table_alloc(&table);
    ast_node_t *import_directive = root->children[0]->children[1];
    ast_node_t *statement = root->children[0]; // The containing statement
    munit_assert_int(import_directive->id, ==, NODE_IMPORT_DIRECTIVE);
    munit_assert_size(table->len, ==, 0);
    error_t *err = symbol_table_update(table, import_directive, statement);
    munit_assert_null(err);
    munit_assert_size(table->len, ==, 1);
    symbol_t *symbol = symbol_table_lookup(table, "test");
    munit_assert_not_null(symbol);
    munit_assert_int(SYMBOL_IMPORT, ==, symbol->kind);
    // For import directives, the statement should be nullptr
    munit_assert_ptr_null(symbol->statement);
    munit_assert_string_equal(symbol->name, "test");
    symbol_table_free(table);
    ast_node_free(root);
    tokenlist_free(list);
    return MUNIT_OK;
 }
 void test_symbol_update(const char *name, ast_node_t *first, symbol_kind_t first_kind, ast_node_t *first_statement,
                        ast_node_t *second, symbol_kind_t second_kind, ast_node_t *second_statement,
                        bool should_succeed, bool should_update, ast_node_t *expected_statement) {
    symbol_table_t *table = nullptr;
    symbol_table_alloc(&table);
    // Add the first symbol
    error_t *err = symbol_table_update(table, first, first_statement);
    munit_assert_null(err);
    munit_assert_size(table->len, ==, 1);
    // Verify first symbol state
    symbol_t *symbol = symbol_table_lookup(table, name);
    munit_assert_not_null(symbol);
    munit_assert_int(first_kind, ==, symbol->kind);
    munit_assert_string_equal(symbol->name, name);
    // Check statement based on symbol kind
    if (first_kind == SYMBOL_LOCAL) {
        munit_assert_ptr_equal(first_statement, symbol->statement);
    } else {
        munit_assert_ptr_null(symbol->statement);
    }
    // Attempt the second update
    err = symbol_table_update(table, second, second_statement);
    // Check if update succeeded as expected
    if (should_succeed) {
        munit_assert_null(err);
    } else {
        munit_assert_ptr_equal(err, err_symbol_table_incompatible_symbols);
        symbol_table_free(table);
        return;
    }
    // Verify symbol after second update
    symbol = symbol_table_lookup(table, name);
    munit_assert_not_null(symbol);
    // Check if kind updated as expected
    if (should_update) {
        munit_assert_int(second_kind, ==, symbol->kind);
    } else {
        munit_assert_int(first_kind, ==, symbol->kind);
    }
    // Simply check against the expected statement value
    munit_assert_ptr_equal(expected_statement, symbol->statement);
    symbol_table_free(table);
 }
 MunitResult test_symbol_upgrade_valid(const MunitParameter params[], void *data) {
    ast_node_t *root;
    tokenlist_t *list;
    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
    ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
    ast_node_t *reference_statement = root->children[3];
    ast_node_t *label = root->children[2];
    ast_node_t *import_directive = root->children[0]->children[1];
    ast_node_t *import_statement = root->children[0];
    ast_node_t *export_directive = root->children[1]->children[1];
    ast_node_t *export_statement = root->children[1];
    // real upgrades
    test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, label, SYMBOL_LOCAL, label, true, true,
                       label);
    test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, import_directive, SYMBOL_IMPORT,
                       import_statement, true, true, nullptr);
    test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, export_directive, SYMBOL_EXPORT,
                       export_statement, true, true, nullptr);
    test_symbol_update("test", label, SYMBOL_LOCAL, label, export_directive, SYMBOL_EXPORT, export_statement, true,
                       true, label);
    // identity upgrades
    test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, reference, SYMBOL_REFERENCE,
                       reference_statement, true, false, nullptr);
    test_symbol_update("test", label, SYMBOL_LOCAL, label, label, SYMBOL_LOCAL, label, true, false, label);
    test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, import_directive, SYMBOL_IMPORT,
                       import_statement, true, false, nullptr);
    test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, export_directive, SYMBOL_EXPORT,
                       export_statement, true, false, nullptr);
    // downgrades that are allowed and ignored
    test_symbol_update("test", label, SYMBOL_LOCAL, label, reference, SYMBOL_REFERENCE, reference_statement, true,
                       false, label);
    test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, reference, SYMBOL_REFERENCE,
                       reference_statement, true, false, nullptr);
    test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, reference, SYMBOL_REFERENCE,
                       reference_statement, true, false, nullptr);
    test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, label, SYMBOL_LOCAL, label, true,
                       false, label);
    test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, label, SYMBOL_LOCAL, label, true,
                       false, label);
    ast_node_free(root);
    tokenlist_free(list);
    return MUNIT_OK;
 }
 MunitResult test_symbol_upgrade_invalid(const MunitParameter params[], void *data) {
    ast_node_t *root;
    tokenlist_t *list;
    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
    ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
    ast_node_t *reference_statement = root->children[3];
    ast_node_t *label = root->children[2];
    ast_node_t *import_directive = root->children[0]->children[1];
    ast_node_t *import_statement = root->children[0];
    ast_node_t *export_directive = root->children[1]->children[1];
    ast_node_t *export_statement = root->children[1];
    // invalid upgrades
    test_symbol_update("test", label, SYMBOL_LOCAL, label, import_directive, SYMBOL_IMPORT, import_statement, false,
                       false, nullptr);
    test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, import_directive, SYMBOL_IMPORT,
                       import_statement, false, false, nullptr);
    test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, export_directive, SYMBOL_EXPORT,
                       export_statement, false, false, nullptr);
    ast_node_free(root);
    tokenlist_free(list);
    return MUNIT_OK;
 }
 MunitResult test_symbol_add_export(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    ast_node_t *root;
    tokenlist_t *list;
    symbol_table_t *table = nullptr;
    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
    symbol_table_alloc(&table);
    ast_node_t *export_directive = root->children[1]->children[1];
    ast_node_t *statement = root->children[1]; // The containing statement
    munit_assert_int(export_directive->id, ==, NODE_EXPORT_DIRECTIVE);
    munit_assert_size(table->len, ==, 0);
    error_t *err = symbol_table_update(table, export_directive, statement);
    munit_assert_null(err);
    munit_assert_size(table->len, ==, 1);
    symbol_t *symbol = symbol_table_lookup(table, "test");
    munit_assert_not_null(symbol);
    munit_assert_int(SYMBOL_EXPORT, ==, symbol->kind);
    // For export directives, the statement should be nullptr
    munit_assert_ptr_null(symbol->statement);
    munit_assert_string_equal(symbol->name, "test");
    symbol_table_free(table);
    ast_node_free(root);
    tokenlist_free(list);
    return MUNIT_OK;
 }
 MunitResult test_symbol_table_growth(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    ast_node_t *root;
    tokenlist_t *list;
    symbol_table_t *table = nullptr;
    // Set up with our manysymbols.asm file
    symbols_setup_test(&root, &list, "tests/input/manysymbols.asm");
    symbol_table_alloc(&table);
    // Initial capacity should be the default (64)
    munit_assert_size(table->cap, ==, 64);
    munit_assert_size(table->len, ==, 0);
    // Add the first 64 labels (indices 0-63)
    size_t initial_cap = table->cap;
    for (size_t i = 0; i < 64; i++) {
        ast_node_t *label = root->children[i];
        munit_assert_int(label->id, ==, NODE_LABEL);
        error_t *err = symbol_table_update(table, label, label);
        munit_assert_null(err);
        munit_assert_size(table->len, ==, i + 1);
        // Capacity should remain the same for the first 64 labels
        munit_assert_size(table->cap, ==, initial_cap);
    }
    // Now add the 65th label (index 64), which should trigger growth
    ast_node_t *final_label = root->children[64];
    munit_assert_int(final_label->id, ==, NODE_LABEL);
    error_t *err = symbol_table_update(table, final_label, final_label);
    munit_assert_null(err);
    munit_assert_size(table->len, ==, 65);
    // Capacity should have doubled
    munit_assert_size(table->cap, ==, initial_cap * 2);
    // Validate we can look up all the symbols
    for (size_t i = 0; i <= 64; i++) {
        char name[10];
        sprintf(name, "lbl_%zu", i);
        symbol_t *symbol = symbol_table_lookup(table, name);
        munit_assert_not_null(symbol);
        munit_assert_int(SYMBOL_LOCAL, ==, symbol->kind);
        munit_assert_string_equal(symbol->name, name);
        munit_assert_ptr_equal(symbol->statement, root->children[i]);
    }
    symbol_table_free(table);
    ast_node_free(root);
    tokenlist_free(list);
    return MUNIT_OK;
 }
 MunitResult test_symbol_invalid_node(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    ast_node_t *root;
    tokenlist_t *list;
    symbol_table_t *table = nullptr;
    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
    symbol_table_alloc(&table);
    munit_assert_size(table->len, ==, 0);
    error_t *err = symbol_table_update(table, root, root);
    munit_assert_ptr_equal(err, err_symbol_table_invalid_node);
    munit_assert_size(table->len, ==, 0);
    symbol_table_free(table);
    ast_node_free(root);
    tokenlist_free(list);
    return MUNIT_OK;
 }
 MunitTest symbols_tests[] = {
    {"/table_alloc",        test_symbol_table_alloc,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/table_lookup_empty", test_symbol_table_lookup_empty, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/add_reference",      test_symbol_add_reference,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/add_label",          test_symbol_add_label,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/add_import",         test_symbol_add_import,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/add_export",         test_symbol_add_export,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/upgrade_valid",      test_symbol_upgrade_valid,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/upgrade_invalid",    test_symbol_upgrade_invalid,    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/table_growth",       test_symbol_table_growth,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/invalid_node",       test_symbol_invalid_node,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {nullptr,               nullptr,                        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
 };
@@ -2,17 +2,19 @@
 set -euo pipefail
-make analyze debug asan msan
+# Start with static analysis
 make clean all
 mkdir -p reports/static-analysis
 scan-build -o reports/static-analysis/ -plist-html --status-bugs make all
-ASAN=build/asan/oas
+# Run the sanitizer builds and valgrind
-MSAN=build/msan/oas
+make clean sanitize all
 DEBUG=build/debug/oas
-ARGUMENTS=("tokens" "text" "ast")
+ARGUMENTS=("-tokens" "-text")
 while IFS= read -r INPUT_FILE; do
    for ARGS in ${ARGUMENTS[@]}; do
-        $ASAN $ARGS $INPUT_FILE > /dev/null
+        ./oas-asan $ARGS $INPUT_FILE > /dev/null
-        $MSAN $ARGS $INPUT_FILE > /dev/null
+        ./oas-msan $ARGS $INPUT_FILE > /dev/null
-        valgrind --leak-check=full --error-exitcode=1 $DEBUG $ARGS $INPUT_FILE >/dev/null
+        valgrind --leak-check=full --error-exitcode=1 ./oas $ARGS $INPUT_FILE >/dev/null
    done
 done < <(find tests/input/ -type f -name '*.asm')
Author	SHA1	Message	Date
omicron	0f6efa8050	Add basic AST functionality Validate the build / validate-build (push) Successful in 24s Details	2025-03-31 18:43:50 +02:00
omicron	36af377ba0	Add a parser grammar Currently this is a subset of the grammar, enough to get reasonable work going.	2025-03-31 18:43:34 +02:00
`@@ -1,2 +1,2 @@`
	`CompileFlags:`	`CompileFlags:`
	`Add: ["-std=c23", "-x", "c", "-D_POSIX_C_SOURCE=200809L"]`	`Add: ["-std=c23", "-x", "c"]`