Implement one immediate label reference operand

Also adds opcode data for jmp and call
Implement two pass encoding
2025-04-24 14:45:57 +02:00 · 2025-04-24 14:45:46 +02:00 · 2025-04-23 15:57:04 +02:00 · 2025-04-23 15:56:46 +02:00 · 2025-04-18 14:00:08 +02:00 · 2025-04-17 23:28:44 +02:00
53 changed files with 6240 additions and 143 deletions
@@ -1,2 +1,2 @@
 CompileFlags:
-  Add: ["-std=c23", "-x", "c"]
+  Add: ["-std=c23", "-x", "c", "-D_POSIX_C_SOURCE=200809L"]
@@ -16,8 +16,10 @@ jobs:
          echo "http://dl-cdn.alpinelinux.org/alpine/edge/main" >> /etc/apk/repositories
          echo "http://dl-cdn.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories

+          # determine correct clang version and then install it
          apk update
-          apk add --no-cache llvm19 clang19 clang19-analyzer compiler-rt valgrind
+          RT_VERSION=$(apk search -v compiler-rt | grep -o "compiler-rt-[0-9]*" | head -1 | grep -o "[0-9]*")
+          apk add --no-cache llvm${RT_VERSION} clang${RT_VERSION} clang${RT_VERSION}-analyzer compiler-rt valgrind

          # Verify versions
          echo "---------------------"
@@ -34,3 +36,7 @@ jobs:
      - name: make validate
        run: |
          make validate
+
+      - name: make test
+        run: |
+          make test
@@ -1,7 +1,5 @@
 *.o
 *.d
 /core
-/oas
-/oas-asan
-/oas-msan
+/build
 /reports
@@ -1,54 +1,46 @@
-.PHONY: all clean clean-objects clean-reports run sanitize validate fuzz
+.PHONY: all clean distclean release debug afl asan msan validate analyze fuzz

-CC=clang
-LD=clang
-CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L
-LDFLAGS?=
+debug: 
+	make -rRf make/debug.mk all

-SOURCES = $(shell find src/ -type f -name '*.c')
-OBJECTS = $(SOURCES:.c=.o)
-DEPENDENCIES = $(SOURCES:.c=.d)
-TARGET?=oas
-OUTPUTS=oas oas-asan oas-msan oas-afl
-RUNARGUMENTS?=ast tests/input/valid.asm
-
-all: $(TARGET)
+all: debug release afl asan msan
 	

-run: $(TARGET)
-	./$(TARGET) $(RUNARGUMENTS)
+release: 
+	make -rRf make/release.mk all
+
+afl:
+	make -rRf make/afl.mk all

 fuzz:
-	make CC="afl-clang-fast" LD="afl-clang-fast" TARGET="oas-afl" clean-objects all
-	make clean-objects
-	mkdir -p reports/afl
-	afl-fuzz -i tests/input -o reports/afl -m none -- ./oas-afl -tokens @@
+	make -rRf make/afl.mk fuzz

-sanitize:
-	make CFLAGS="$(CFLAGS) -fsanitize=address,undefined" \
-		LDFLAGS="-fsanitize=address,undefined" \
-		TARGET="oas-asan" clean-objects all
-	make CFLAGS="$(CFLAGS) -fsanitize=memory -fsanitize-memory-track-origins=2" \
-		LDFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2" \
-		TARGET="oas-msan" clean-objects all 
-	make clean-objects
+asan:
+	make -rRf make/asan.mk all

-validate:
+msan:
+	make -rRf make/msan.mk all
+
+validate: asan msan debug release
 	./validate.sh

-$(TARGET): $(OBJECTS)
-	$(LD) $(LDFLAGS) -o $@ $^
+analyze:
+	make -rRf make/analyze.mk clean all

-%.o: %.c
-	$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
+test:
+	make -rRf make/test.mk test

-include $(DEPENDENCIES)
+clean:
+	make -rRf make/release.mk clean
+	make -rRf make/debug.mk clean
+	make -rRf make/afl.mk clean
+	make -rRf make/msan.mk clean
+	make -rRf make/asan.mk clean
+	make -rRf make/analyze.mk clean
+	make -rRf make/test.mk clean
+	rm -rf build/

-clean-objects:
-	rm -f $(OBJECTS) $(DEPENDENCIES)
-
-clean-reports:
+distclean: clean
+	make -rRf make/afl.mk distclean
+	make -rRf make/analyze.mk distclean
 	rm -rf reports/
-
-clean: clean-objects
-	rm -f $(TARGET) $(OUTPUTS)
@@ -0,0 +1,29 @@
+# Building
+
+To build oas in the default configuration you just need (gnu) make and a
+sufficiently modern clang.
+
+```
+make
+```
+
+## Make targets
+
+There are a number of make targets available to build various instrumented
+builds that are used in validation, analysis and sanitizing. Some of these may
+require extra dependencies.
+
+
+ - `debug`: Creates the debug build in `build/debug`. This is the default target.
+ - `all`: Builds all binary executable targets. These are
+   `debug`, `release`, `msan`, `asan` and `afl`. All executables can be found
+   in `build/` in a subdirectory matching their target names.
+ - `release`: Creates the release build in `build/release`
+ - `afl`: Creates a build with AFL++ instrumentation for fuzzing
+ - `fuzz`: Starts the fuzzer with the instrumented afl executable
+ - `asan`: builds with the address and undefined clang sanitizers
+ - `msan`: builds with the memory clang sanitizer
+ - `validate`: Builds `debug`, `msan`, and `asan` targets, then runs the
+   validation script. This script executes the sanitizer targets and runs
+   Valgrind on the debug target across multiple modes and test input files.
+
@@ -1,13 +1,17 @@
 <program>   ::= <statement>*
-<statement> ::= <label> | <directive> | <instruction>
+<statement> ::= <label> | <directive> | <instruction> | <newline>

 <label> ::= <identifier> <colon>

-<directive> ::= <dot> <section_directive>
+<directive> ::= <dot> (<section_directive> | <export_directive> | <import_directive> ) <newline>

 <section_directive> ::= "section" <identifier>

-<instruction> ::= <identifier> <operands>
+<export_directive> ::= "export" <identifier>
+
+<import_directive> ::= "import" <identifier>
+
+<instruction> ::= <identifier> <operands> <newline>

 <operands> ::= <operand> ( <comma> <operand> )*

@@ -0,0 +1,14 @@
+.PHONY: fuzz distclean
+
+CC=afl-clang-fast
+LD=afl-clang-fast
+BUILD_DIR=build/afl/
+
+-include make/base.mk
+
+fuzz: $(BUILD_DIR)$(TARGET)
+	mkdir -p reports/afl
+	afl-fuzz -i tests/input -o reports/afl -m none -- ./$< -tokens @@
+
+distclean: clean
+	rm -rf reports/afl
@@ -0,0 +1,9 @@
+BUILD_DIR=build/analyze/
+-include make/base.mk
+
+analyze:
+	mkdir -p reports/static-analysis
+	scan-build -o reports/static-analysis/ -plist-html --status-bugs make -rRf make/analyze.mk all
+
+distclean: clean
+	rm -rf reports/static-analysis
@@ -0,0 +1,5 @@
+CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fsanitize=address,undefined
+LDFLAGS=-fsanitize=address,undefined
+BUILD_DIR=build/asan/
+
+-include make/base.mk
@@ -0,0 +1,27 @@
+.PHONY: all clean
+
+CC?=clang
+LD?=clang
+CFLAGS?=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L
+LDFLAGS?=
+BUILD_DIR?=build/debug/
+
+SOURCES?=$(shell find src/ -type f -name '*.c')
+OBJECTS=$(patsubst %.c,$(BUILD_DIR)%.o,$(SOURCES))
+DEPENDENCIES=$(OBJECTS:.o=.d)
+TARGET?=oas
+
+all: $(BUILD_DIR)$(TARGET)
+	
+
+$(BUILD_DIR)$(TARGET): $(OBJECTS)
+	$(LD) $(LDFLAGS) -o $@ $^
+
+$(BUILD_DIR)%.o: %.c
+	mkdir -p $(dir $@)
+	$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
+
+-include $(DEPENDENCIES)
+
+clean:
+	rm -rf $(BUILD_DIR)
@@ -0,0 +1 @@
+-include make/base.mk
@@ -0,0 +1,5 @@
+CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fsanitize=memory
+LDFLAGS=-fsanitize=memory
+BUILD_DIR=build/msan/
+
+-include make/base.mk
@@ -0,0 +1,5 @@
+CFLAGS?=-Wall -Wextra -Wpedantic -Werror -O2 -std=c23 -flto -fomit-frame-pointer -DNDEBUG -D_POSIX_C_SOURCE=200809L
+LDFLAGS?=-flto -s -Wl,--gc-sections
+BUILD_DIR?=build/release/
+
+-include make/base.mk
@@ -0,0 +1,21 @@
+.PHONY: test
+
+CFLAGS?=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fprofile-instr-generate -fcoverage-mapping
+LDFLAGS?=-fprofile-instr-generate
+BUILD_DIR=build/test/
+TARGET=oas-tests
+SOURCES = $(filter-out src/main.c, $(shell find src/ tests/ -type f -name '*.c'))
+-include make/base.mk
+
+test: $(BUILD_DIR)$(TARGET)
+	mkdir -p reports/coverage
+	LLVM_PROFILE_FILE="reports/coverage/tests.profraw" $(BUILD_DIR)$(TARGET)
+	llvm-profdata merge -sparse reports/coverage/tests.profraw -o reports/coverage/tests.profdata
+	llvm-cov show $(BUILD_DIR)$(TARGET) -instr-profile=reports/coverage/tests.profdata -format=html -output-dir=reports/coverage/html -ignore-filename-regex="tests/.*"
+	@echo "--"
+	@echo "Test coverage:"
+	@echo "file://$$(realpath reports/coverage/html/index.html)"
+	@echo "--"
+
+clean:
+	rm -rf reports/coverage
@@ -3,7 +3,7 @@
 #include <assert.h>
 #include <string.h>

-error_t *err_node_children_cap = &(error_t){
+error_t *const err_ast_children_cap = &(error_t){
    .message = "Failed to increase ast node children, max capacity reached"};

 error_t *ast_node_alloc(ast_node_t **output) {
@@ -17,10 +17,6 @@ error_t *ast_node_alloc(ast_node_t **output) {
    return nullptr;
 }

-void ast_node_free_value(ast_node_t *node) {
-    // TODO: decide how value ownership will work and clean it up here
-}
-
 void ast_node_free(ast_node_t *node) {
    if (node == nullptr)
        return;
@@ -30,8 +26,6 @@ void ast_node_free(ast_node_t *node) {
        free(node->children);
    }

-    ast_node_free_value(node);
-
    memset(node, 0, sizeof(ast_node_t));
    free(node);
 }
@@ -50,7 +44,7 @@ error_t *ast_node_alloc_children(ast_node_t *node) {

 error_t *ast_node_grow_cap(ast_node_t *node) {
    if (node->cap >= node_max_children_cap) {
-        return err_node_children_cap;
+        return err_ast_children_cap;
    }

    size_t new_cap = node->cap * 2;
@@ -123,6 +117,10 @@ const char *ast_node_id_to_cstr(node_id_t id) {
        return "NODE_PLUS_OR_MINUS";
    case NODE_SECTION_DIRECTIVE:
        return "NODE_SECTION_DIRECTIVE";
+    case NODE_IMPORT_DIRECTIVE:
+        return "NODE_IMPORT_DIRECTIVE";
+    case NODE_EXPORT_DIRECTIVE:
+        return "NODE_EXPORT_DIRECTIVE";
    case NODE_REGISTER:
        return "NODE_REGISTER";
    case NODE_SECTION:
@@ -157,6 +155,12 @@ const char *ast_node_id_to_cstr(node_id_t id) {
        return "NODE_ASTERISK";
    case NODE_DOT:
        return "NODE_DOT";
+    case NODE_NEWLINE:
+        return "NODE_NEWLINE";
+    case NODE_IMPORT:
+        return "NODE_IMPORT";
+    case NODE_EXPORT:
+        return "NODE_EXPORT";
    }
    assert(!"Unreachable, weird node id" && id);
    __builtin_unreachable();
@@ -172,7 +176,8 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
    }
    printf("%s", ast_node_id_to_cstr(node->id));

-    if (node->token_entry && node->token_entry->token.value) {
+    if (node->token_entry && node->token_entry->token.value &&
+        node->id != NODE_NEWLINE) {
        printf(" \"%s\"", node->token_entry->token.value);
    }
    printf("\n");
@@ -185,3 +190,18 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
 void ast_node_print(ast_node_t *node) {
    ast_node_print_internal(node, 0);
 }
+
+void ast_node_prune(ast_node_t *node, node_id_t id) {
+    size_t new_len = 0;
+    for (size_t i = 0; i < node->len; i++) {
+        auto child = node->children[i];
+        if (child->id == id) {
+            ast_node_free(child);
+            continue;
+        }
+        ast_node_prune(child, id);
+        node->children[new_len] = child;
+        new_len++;
+    }
+    node->len = new_len;
+}
@@ -1,12 +1,16 @@
 #ifndef INCLUDE_SRC_AST_H_
 #define INCLUDE_SRC_AST_H_

+#include "data/registers.h"
 #include "error.h"
 #include "lexer.h"
 #include "tokenlist.h"
+#include <assert.h>
 #include <stddef.h>
 #include <stdint.h>

+extern error_t *const err_ast_children_cap;
+
 typedef enum node_id {
    NODE_INVALID,

@@ -27,10 +31,14 @@ typedef enum node_id {
    NODE_REGISTER_OFFSET,
    NODE_PLUS_OR_MINUS,
    NODE_SECTION_DIRECTIVE,
+    NODE_IMPORT_DIRECTIVE,
+    NODE_EXPORT_DIRECTIVE,

    // Validated primitives
    NODE_REGISTER,
    NODE_SECTION,
+    NODE_IMPORT,
+    NODE_EXPORT,

    // Primitive nodes
    NODE_IDENTIFIER,
@@ -48,6 +56,7 @@ typedef enum node_id {
    NODE_MINUS,
    NODE_ASTERISK,
    NODE_DOT,
+    NODE_NEWLINE,
 } node_id_t;

 typedef struct ast_node ast_node_t;
@@ -56,6 +65,37 @@ constexpr size_t node_default_children_cap = 8;
 /* 65K ought to be enough for anybody */
 constexpr size_t node_max_children_cap = 1 << 16;

+typedef struct number {
+    uint64_t value;
+    operand_size_t size;
+} number_t;
+
+typedef struct register_ {
+    register_id_t id;
+    operand_size_t size;
+} register_t;
+
+typedef struct opcode_encoding {
+    uint8_t buffer[32];
+    size_t len;
+} opcode_encoding_t;
+
+typedef struct instruction {
+    bool has_reference;
+    opcode_encoding_t encoding;
+    int64_t address;
+} instruction_t;
+
+typedef struct reference {
+    int64_t offset;
+    int64_t address;
+    operand_size_t size;
+} reference_t;
+
+typedef struct {
+    int64_t address;
+} label_t;
+
 struct ast_node {
    node_id_t id;
    tokenlist_entry_t *token_entry;
@@ -64,14 +104,39 @@ struct ast_node {
    ast_node_t **children;

    union {
-        struct {
-            uint64_t value;
-            int size;
-        } integer;
-        char *name;
+        register_t reg;
+        number_t number;
+        instruction_t instruction;
+        reference_t reference;
+        label_t label;
    } value;
 };

+static inline register_t *ast_node_register_value(ast_node_t *node) {
+    assert(node->id == NODE_REGISTER);
+    return &node->value.reg;
+}
+
+static inline number_t *ast_node_number_value(ast_node_t *node) {
+    assert(node->id == NODE_NUMBER);
+    return &node->value.number;
+}
+
+static inline instruction_t *ast_node_instruction_value(ast_node_t *node) {
+    assert(node->id == NODE_INSTRUCTION);
+    return &node->value.instruction;
+}
+
+static inline reference_t *ast_node_reference_value(ast_node_t *node) {
+    assert(node->id == NODE_LABEL_REFERENCE);
+    return &node->value.reference;
+}
+
+static inline label_t *ast_node_label_value(ast_node_t *node) {
+    assert(node->id == NODE_LABEL);
+    return &node->value.label;
+}
+
 /**
 * @brief Allocates a new AST node
 *
@@ -117,4 +182,17 @@ error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child);
 */
 void ast_node_print(ast_node_t *node);

+/**
+ * Prune the children with a given id
+ *
+ * The tree is recursively visited and all child nodes of a given ID are pruned
+ * completely. If a node has the giver id, it will get removed along wih all its
+ * children, even if some of those children have different ids. The root node id
+ * is never checked so the tree is guaranteed to remain and allocated valid.
+ *
+ * @param node The root of the tree you want to prune
+ * @param id The id of the nodes you want to prune
+ */
+void ast_node_prune(ast_node_t *node, node_id_t id);
+
 #endif // INCLUDE_SRC_AST_H_
@@ -0,0 +1,6 @@
+#include "bytes.h"
+#include "error.h"
+
+error_t *const err_bytes_no_capacity = &(error_t){
+    .message = "Not enough capacity in bytes buffer",
+};
@@ -0,0 +1,60 @@
+#ifndef INCLUDE_SRC_BYTES_H_
+#define INCLUDE_SRC_BYTES_H_
+
+#include "error.h"
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+extern error_t *const err_bytes_no_capacity;
+
+typedef struct bytes {
+    size_t len;
+    size_t cap;
+    uint8_t buffer[];
+} bytes_t;
+
+#define LOCAL_BYTES_ANONYMOUS(N)                                               \
+    &(struct {                                                                 \
+        size_t len;                                                            \
+        size_t cap;                                                            \
+        uint8_t buffer[(N)];                                                   \
+    }) {                                                                       \
+        0, (N), {}                                                             \
+    }
+
+#define LOCAL_BYTES(N) (bytes_t *)LOCAL_BYTES_ANONYMOUS(N);
+
+static inline error_t *bytes_append_uint8(bytes_t *bytes, uint8_t value) {
+    if (bytes->len >= bytes->cap)
+        return err_bytes_no_capacity;
+    bytes->buffer[bytes->len++] = value;
+    return nullptr;
+}
+
+static inline error_t *bytes_append_array(bytes_t *dst, size_t n,
+                                          uint8_t buffer[static n]) {
+    if (dst->len + n >= dst->cap)
+        return err_bytes_no_capacity;
+    memcpy(dst->buffer + dst->len, buffer, n);
+    dst->len += n;
+    return nullptr;
+}
+
+static inline error_t *bytes_append_bytes(bytes_t *dst, bytes_t *src) {
+    return bytes_append_array(dst, src->len, src->buffer);
+}
+
+static inline error_t *bytes_append_uint16(bytes_t *dst, uint16_t value) {
+    return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
+}
+
+static inline error_t *bytes_append_uint32(bytes_t *dst, uint32_t value) {
+    return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
+}
+
+static inline error_t *bytes_append_uint64(bytes_t *dst, uint64_t value) {
+    return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
+}
+
+#endif // INCLUDE_SRC_BYTES_H_
@@ -0,0 +1,265 @@
+#include "opcodes.h"
+
+// clang-format off
+opcode_data_t *const opcodes[] = {
+    // RET
+    &(opcode_data_t) {
+        .mnemonic = "ret",
+        .opcode = 0xC3,
+        .opcode_extension = opcode_extension_none,
+        .operand_count = 0,
+    },
+    // RET imm16
+    &(opcode_data_t) {
+        .mnemonic = "ret",
+        .opcode = 0xC2,
+        .opcode_extension = opcode_extension_none,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16 },
+        },
+    },
+    // PUSH imm8
+    &(opcode_data_t) {
+        .mnemonic = "push",
+        .opcode = 0x6A,
+        .opcode_extension = opcode_extension_none,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_8},
+        },
+    },
+    // PUSH imm16
+    &(opcode_data_t) {
+        .mnemonic = "push",
+        .opcode = 0x68,
+        .opcode_extension = opcode_extension_none,
+        .operand_size_prefix = true,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16},
+        },
+    },
+    // PUSH imm32
+    &(opcode_data_t) {
+        .mnemonic = "push",
+        .opcode = 0x68,
+        .opcode_extension = opcode_extension_none,
+        .operand_size_prefix = false,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32},
+        },
+    },
+    // PUSH reg16, 
+    &(opcode_data_t) {
+        .mnemonic = "push",
+        .opcode = 0x50,
+        .opcode_extension = opcode_extension_none,
+        .encoding_class = ENCODING_OPCODE_REGISTER,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
+        },
+    },
+    // PUSH reg64
+    &(opcode_data_t) {
+        .mnemonic = "push",
+        .opcode = 0x50,
+        .opcode_extension = opcode_extension_none,
+        .encoding_class = ENCODING_OPCODE_REGISTER,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
+        },
+    },
+    // NOT reg16
+    &(opcode_data_t) {
+        .mnemonic = "not",
+        .opcode = 0xF7,
+        .opcode_extension = 2,
+        .operand_size_prefix = true,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
+        },
+    },
+    // NOT reg32
+    &(opcode_data_t) {
+        .mnemonic = "not",
+        .opcode = 0xF7,
+        .opcode_extension = 2,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
+        },
+    },
+    // NOT reg64
+    &(opcode_data_t) {
+        .mnemonic = "not",
+        .opcode = 0xF7,
+        .opcode_extension = 2,
+        .rex_w_prefix = true,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
+        },
+    },
+
+    // NEG reg16
+    &(opcode_data_t) {
+        .mnemonic = "neg",
+        .opcode = 0xF7,
+        .opcode_extension = 3,
+        .operand_size_prefix = true,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
+        },
+    },
+    // NEG reg32
+    &(opcode_data_t) {
+        .mnemonic = "neg",
+        .opcode = 0xF7,
+        .opcode_extension = 3,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
+        },
+    },
+    // NEG reg64
+    &(opcode_data_t) {
+        .mnemonic = "neg",
+        .opcode = 0xF7,
+        .opcode_extension = 3,
+        .rex_w_prefix = true,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
+        },
+    },
+    // CALL rel32
+    &(opcode_data_t) {
+        .mnemonic = "call",
+        .opcode = 0xE8,
+        .opcode_extension = opcode_extension_none,
+        .encoding_class = ENCODING_DEFAULT,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32 },
+        },
+    },
+    // CALL reg64
+    &(opcode_data_t) {
+        .mnemonic = "call",
+        .opcode = 0xFF,
+        .opcode_extension = 2,
+        .encoding_class = ENCODING_DEFAULT,
+        .rex_w_prefix = true,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
+        },
+    },
+    // CALL mem64
+    &(opcode_data_t) {
+        .mnemonic = "call",
+        .opcode = 0xFF,
+        .opcode_extension = 2,
+        .encoding_class = ENCODING_DEFAULT,
+        .rex_w_prefix = true,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_MEMORY, .size = OPERAND_SIZE_64 },
+        },
+    },
+    // JMP rel8 (short jump)
+    &(opcode_data_t) {
+        .mnemonic = "jmp",
+        .opcode = 0xEB,
+        .opcode_extension = opcode_extension_none,
+        .encoding_class = ENCODING_DEFAULT,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_8 },
+        },
+    },
+
+    // JMP rel16
+    &(opcode_data_t) {
+        .mnemonic = "jmp",
+        .opcode = 0xE9,
+        .opcode_extension = opcode_extension_none,
+        .encoding_class = ENCODING_DEFAULT,
+        .operand_size_prefix = true,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16 },
+        },
+    },
+
+    // JMP reg16
+    &(opcode_data_t) {
+        .mnemonic = "jmp",
+        .opcode = 0xFF,
+        .opcode_extension = 4,
+        .encoding_class = ENCODING_DEFAULT,
+        .operand_size_prefix = true,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
+        },
+    },
+
+    // JMP rel32 (near jump)
+    &(opcode_data_t) {
+        .mnemonic = "jmp",
+        .opcode = 0xE9,
+        .opcode_extension = opcode_extension_none,
+        .encoding_class = ENCODING_DEFAULT,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32 },
+        },
+    },
+
+    // JMP reg32
+    &(opcode_data_t) {
+        .mnemonic = "jmp",
+        .opcode = 0xFF,
+        .opcode_extension = 4,
+        .encoding_class = ENCODING_DEFAULT,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
+        },
+    },
+
+    // JMP reg64
+    &(opcode_data_t) {
+        .mnemonic = "jmp",
+        .opcode = 0xFF,
+        .opcode_extension = 4,
+        .encoding_class = ENCODING_DEFAULT,
+        .rex_w_prefix = true,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
+        },
+    },
+
+    // JMP mem64
+    &(opcode_data_t) {
+        .mnemonic = "jmp",
+        .opcode = 0xFF,
+        .opcode_extension = 4,
+        .encoding_class = ENCODING_DEFAULT,
+        .rex_w_prefix = true,
+        .operand_count = 1,
+        .operands = {
+            { .kind = OPERAND_MEMORY, .size = OPERAND_SIZE_64 },
+        },
+    },
+    nullptr,
+};
+
@@ -0,0 +1,56 @@
+#ifndef INCLUDE_DATA_OPCODES_H_
+#define INCLUDE_DATA_OPCODES_H_
+
+#include "../data/registers.h"
+#include <stddef.h>
+#include <stdint.h>
+
+constexpr uint8_t rex_prefix = 0x40;
+constexpr uint8_t rex_prefix_w = 0x48;
+constexpr uint8_t rex_prefix_r = 0x44;
+constexpr uint8_t rex_prefix_x = 0x42;
+constexpr uint8_t rex_prefix_b = 0x41;
+
+constexpr uint8_t operand_size_prefix = 0x66;
+constexpr uint8_t memory_size_prefix = 0x67;
+constexpr uint8_t lock_prefix = 0xF0;
+constexpr uint8_t repne_prefix = 0xF2;
+constexpr uint8_t rep_prefix = 0xF3;
+
+typedef enum encoding_class {
+    ENCODING_DEFAULT,         // use modrm+sib for registers and memory, append
+                              // immediates
+    ENCODING_OPCODE_REGISTER, // encode the register in the last 3 bits of the
+                              // opcode
+} encoding_class_t;
+
+typedef enum operand_kind {
+    OPERAND_REGISTER,
+    OPERAND_MEMORY,
+    OPERAND_IMMEDIATE,
+} operand_kind_t;
+
+typedef struct operand_info {
+    operand_kind_t kind;
+    operand_size_t size;
+} operand_info_t;
+
+constexpr uint8_t opcode_extension_none = 0xFF;
+
+typedef struct opcode_data {
+    const char *mnemonic;
+
+    uint16_t opcode;
+    uint8_t opcode_extension; // 3 bits for the opcode extension in the reg
+                              // field of a modr/m byte
+    encoding_class_t encoding_class;
+    bool operand_size_prefix;
+    bool address_size_prefix;
+    bool rex_w_prefix;
+    size_t operand_count;
+    operand_info_t operands[3];
+} opcode_data_t;
+
+extern opcode_data_t *const opcodes[];
+
+#endif // INCLUDE_DATA_OPCODES_H_
@@ -0,0 +1,92 @@
+#include "registers.h"
+
+register_data_t *const registers[] = {
+    // Instruction pointer
+    &(register_data_t){"rip",  REG_RIP, OPERAND_SIZE_64},
+    &(register_data_t){"eip",  REG_RIP, OPERAND_SIZE_32},
+    &(register_data_t){"ip",   REG_RIP, OPERAND_SIZE_16},
+
+    // 64-bit general purpose registers
+    &(register_data_t){"rax",  REG_A,   OPERAND_SIZE_64},
+    &(register_data_t){"rcx",  REG_C,   OPERAND_SIZE_64},
+    &(register_data_t){"rdx",  REG_D,   OPERAND_SIZE_64},
+    &(register_data_t){"rbx",  REG_B,   OPERAND_SIZE_64},
+    &(register_data_t){"rsp",  REG_SP,  OPERAND_SIZE_64},
+    &(register_data_t){"rbp",  REG_BP,  OPERAND_SIZE_64},
+    &(register_data_t){"rsi",  REG_SI,  OPERAND_SIZE_64},
+    &(register_data_t){"rdi",  REG_DI,  OPERAND_SIZE_64},
+    &(register_data_t){"r8",   REG_8,   OPERAND_SIZE_64},
+    &(register_data_t){"r9",   REG_9,   OPERAND_SIZE_64},
+    &(register_data_t){"r10",  REG_10,  OPERAND_SIZE_64},
+    &(register_data_t){"r11",  REG_11,  OPERAND_SIZE_64},
+    &(register_data_t){"r12",  REG_12,  OPERAND_SIZE_64},
+    &(register_data_t){"r13",  REG_13,  OPERAND_SIZE_64},
+    &(register_data_t){"r14",  REG_14,  OPERAND_SIZE_64},
+    &(register_data_t){"r15",  REG_15,  OPERAND_SIZE_64},
+
+    // 32-bit general purpose registers
+    &(register_data_t){"eax",  REG_A,   OPERAND_SIZE_32},
+    &(register_data_t){"ecx",  REG_C,   OPERAND_SIZE_32},
+    &(register_data_t){"edx",  REG_D,   OPERAND_SIZE_32},
+    &(register_data_t){"ebx",  REG_B,   OPERAND_SIZE_32},
+    &(register_data_t){"esp",  REG_SP,  OPERAND_SIZE_32},
+    &(register_data_t){"ebp",  REG_BP,  OPERAND_SIZE_32},
+    &(register_data_t){"esi",  REG_SI,  OPERAND_SIZE_32},
+    &(register_data_t){"edi",  REG_DI,  OPERAND_SIZE_32},
+    &(register_data_t){"r8d",  REG_8,   OPERAND_SIZE_32},
+    &(register_data_t){"r9d",  REG_9,   OPERAND_SIZE_32},
+    &(register_data_t){"r10d", REG_10,  OPERAND_SIZE_32},
+    &(register_data_t){"r11d", REG_11,  OPERAND_SIZE_32},
+    &(register_data_t){"r12d", REG_12,  OPERAND_SIZE_32},
+    &(register_data_t){"r13d", REG_13,  OPERAND_SIZE_32},
+    &(register_data_t){"r14d", REG_14,  OPERAND_SIZE_32},
+    &(register_data_t){"r15d", REG_15,  OPERAND_SIZE_32},
+
+    // 16-bit general purpose registers
+    &(register_data_t){"ax",   REG_A,   OPERAND_SIZE_16},
+    &(register_data_t){"cx",   REG_C,   OPERAND_SIZE_16},
+    &(register_data_t){"dx",   REG_D,   OPERAND_SIZE_16},
+    &(register_data_t){"bx",   REG_B,   OPERAND_SIZE_16},
+    &(register_data_t){"sp",   REG_SP,  OPERAND_SIZE_16},
+    &(register_data_t){"bp",   REG_BP,  OPERAND_SIZE_16},
+    &(register_data_t){"si",   REG_SI,  OPERAND_SIZE_16},
+    &(register_data_t){"di",   REG_DI,  OPERAND_SIZE_16},
+    &(register_data_t){"r8w",  REG_8,   OPERAND_SIZE_16},
+    &(register_data_t){"r9w",  REG_9,   OPERAND_SIZE_16},
+    &(register_data_t){"r10w", REG_10,  OPERAND_SIZE_16},
+    &(register_data_t){"r11w", REG_11,  OPERAND_SIZE_16},
+    &(register_data_t){"r12w", REG_12,  OPERAND_SIZE_16},
+    &(register_data_t){"r13w", REG_13,  OPERAND_SIZE_16},
+    &(register_data_t){"r14w", REG_14,  OPERAND_SIZE_16},
+    &(register_data_t){"r15w", REG_15,  OPERAND_SIZE_16},
+
+    // 8-bit general purpose registers (low byte)
+    &(register_data_t){"al",   REG_A,   OPERAND_SIZE_8 },
+    &(register_data_t){"cl",   REG_C,   OPERAND_SIZE_8 },
+    &(register_data_t){"dl",   REG_D,   OPERAND_SIZE_8 },
+    &(register_data_t){"bl",   REG_B,   OPERAND_SIZE_8 },
+    &(register_data_t){"spl",  REG_SP,  OPERAND_SIZE_8 },
+    &(register_data_t){"bpl",  REG_BP,  OPERAND_SIZE_8 },
+    &(register_data_t){"sil",  REG_SI,  OPERAND_SIZE_8 },
+    &(register_data_t){"dil",  REG_DI,  OPERAND_SIZE_8 },
+    &(register_data_t){"r8b",  REG_8,   OPERAND_SIZE_8 },
+    &(register_data_t){"r9b",  REG_9,   OPERAND_SIZE_8 },
+    &(register_data_t){"r10b", REG_10,  OPERAND_SIZE_8 },
+    &(register_data_t){"r11b", REG_11,  OPERAND_SIZE_8 },
+    &(register_data_t){"r12b", REG_12,  OPERAND_SIZE_8 },
+    &(register_data_t){"r13b", REG_13,  OPERAND_SIZE_8 },
+    &(register_data_t){"r14b", REG_14,  OPERAND_SIZE_8 },
+    &(register_data_t){"r15b", REG_15,  OPERAND_SIZE_8 },
+
+    // x87 floating point registers
+    &(register_data_t){"st0",  REG_ST0, OPERAND_SIZE_80},
+    &(register_data_t){"st1",  REG_ST1, OPERAND_SIZE_80},
+    &(register_data_t){"st2",  REG_ST2, OPERAND_SIZE_80},
+    &(register_data_t){"st3",  REG_ST3, OPERAND_SIZE_80},
+    &(register_data_t){"st4",  REG_ST4, OPERAND_SIZE_80},
+    &(register_data_t){"st5",  REG_ST5, OPERAND_SIZE_80},
+    &(register_data_t){"st6",  REG_ST6, OPERAND_SIZE_80},
+    &(register_data_t){"st7",  REG_ST7, OPERAND_SIZE_80},
+
+    nullptr,
+};
@@ -0,0 +1,82 @@
+#ifndef INCLUDE_DATA_REGISTERS_H_
+#define INCLUDE_DATA_REGISTERS_H_
+
+typedef enum operand_size {
+    OPERAND_SIZE_INVALID = 0,
+
+    OPERAND_SIZE_8 = 1 << 0,
+    OPERAND_SIZE_16 = 1 << 1,
+    OPERAND_SIZE_32 = 1 << 2,
+    OPERAND_SIZE_64 = 1 << 3,
+
+    OPERAND_SIZE_80 = 1 << 4,
+    OPERAND_SIZE_128 = 1 << 5,
+    OPERAND_SIZE_256 = 1 << 6,
+    OPERAND_SIZE_512 = 1 << 7,
+} operand_size_t;
+
+static inline operand_size_t bits_to_operand_size(int bits) {
+    switch (bits) {
+    case 8:
+        return OPERAND_SIZE_8;
+    case 16:
+        return OPERAND_SIZE_16;
+    case 32:
+        return OPERAND_SIZE_32;
+    case 64:
+        return OPERAND_SIZE_64;
+    case 80:
+        return OPERAND_SIZE_80;
+    case 128:
+        return OPERAND_SIZE_128;
+    case 256:
+        return OPERAND_SIZE_256;
+    case 512:
+        return OPERAND_SIZE_512;
+    default:
+        return OPERAND_SIZE_INVALID;
+    }
+}
+
+typedef enum register_id {
+    // Special registers
+    REG_RIP = -1,
+
+    // General purpose registers
+    REG_A = 0x0000,
+    REG_C,
+    REG_D,
+    REG_B,
+    REG_SP,
+    REG_BP,
+    REG_SI,
+    REG_DI,
+
+    REG_8,
+    REG_9,
+    REG_10,
+    REG_11,
+    REG_12,
+    REG_13,
+    REG_14,
+    REG_15,
+
+    REG_ST0 = 0x1000,
+    REG_ST1,
+    REG_ST2,
+    REG_ST3,
+    REG_ST4,
+    REG_ST5,
+    REG_ST6,
+    REG_ST7,
+} register_id_t;
+
+typedef struct register_data {
+    const char *name;
+    register_id_t id;
+    operand_size_t size;
+} register_data_t;
+
+extern register_data_t *const registers[];
+
+#endif // INCLUDE_DATA_REGISTERS_H_
@@ -0,0 +1,711 @@
+#include "encoder.h"
+#include "../bytes.h"
+#include "../data/opcodes.h"
+#include "symbols.h"
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+/**
+ * General encoder flow:
+ *
+ * There are 2 major passes the encoder does:
+ *
+ * First pass:
+ *   - Run through the AST and collect information:
+ *     - Set register values
+ *     - Parse/set number values
+ *     - Mark all instructions that use label references
+ *   - Encode all instructions that don't use label references
+ *   - Update addresses of all labels and instructions. Use an estimated
+ *     instruction size for those instructions that use label references.
+ *
+ * Second pass:
+ *   - Run through the AST for all instructions that use label references and
+ *     collect size information using the estimated addresses from pass 1
+ *   - Encode label references with the estimated addresses, this fixes their
+ *     size.
+ *   - Update all addresses
+ *
+ * Iteration:
+ *   - Repeat the second pass until addresses converge
+ */
+
+error_t *const err_encoder_invalid_register =
+    &(error_t){.message = "Invalid register"};
+error_t *const err_encoder_number_overflow =
+    &(error_t){.message = "Number overflows the storage"};
+error_t *const err_encoder_invalid_number_format =
+    &(error_t){.message = "Invalid number format"};
+error_t *const err_encoder_invalid_size_suffix =
+    &(error_t){.message = "Invalid number size suffix"};
+error_t *const err_encoder_unknown_symbol_reference =
+    &(error_t){.message = "Referenced an unknown symbol"};
+error_t *const err_encoder_no_encoding_found =
+    &(error_t){.message = "No encoding found for instruction"};
+error_t *const err_encoder_not_implemented =
+    &(error_t){.message = "Implementation for this opcode is missing"};
+error_t *const err_encoder_unexpected_length =
+    &(error_t){.message = "Unexpectedly long encoding"};
+
+error_t *encoder_alloc(encoder_t **output, ast_node_t *ast) {
+    *output = nullptr;
+    encoder_t *encoder = calloc(1, sizeof(encoder_t));
+
+    if (encoder == nullptr)
+        return err_allocation_failed;
+
+    encoder->ast = ast;
+
+    error_t *err = symbol_table_alloc(&encoder->symbols);
+    if (err) {
+        free(encoder);
+        return err;
+    }
+
+    *output = encoder;
+    return nullptr;
+}
+
+void encoder_free(encoder_t *encoder) {
+    if (encoder == nullptr)
+        return;
+    symbol_table_free(encoder->symbols);
+    free(encoder);
+}
+
+bool encoder_is_symbols_node(ast_node_t *node) {
+    switch (node->id) {
+    case NODE_LABEL:
+    case NODE_LABEL_REFERENCE:
+    case NODE_EXPORT_DIRECTIVE:
+    case NODE_IMPORT_DIRECTIVE:
+        return true;
+    default:
+        return false;
+    }
+}
+
+int encoder_get_number_base(ast_node_t *number) {
+    switch (number->children[0]->id) {
+    case NODE_BINARY:
+        return 2;
+    case NODE_OCTAL:
+        return 8;
+    case NODE_DECIMAL:
+        return 10;
+    case NODE_HEXADECIMAL:
+        return 16;
+    default:
+        assert(false);
+    }
+    __builtin_unreachable();
+}
+
+bool is_valid_size_suffix(int bits) {
+    switch (bits) {
+    case 0:
+    case 8:
+    case 16:
+    case 32:
+    case 64:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool is_overflow(uint64_t value, int bits) {
+    if (bits == 0 || bits >= 64)
+        return false;
+
+    uint64_t max_value = (1ULL << bits) - 1;
+    return value > max_value;
+}
+
+operand_size_t encoder_get_size_mask(uint64_t value, int bits) {
+    if (bits != 0)
+        return bits_to_operand_size(bits);
+
+    operand_size_t mask = OPERAND_SIZE_64;
+    if (value < (1ULL << 8))
+        mask |= OPERAND_SIZE_8;
+    if (value < (1ULL << 16))
+        mask |= OPERAND_SIZE_16;
+    if (value < (1ULL << 32))
+        mask |= OPERAND_SIZE_32;
+    return mask;
+}
+
+error_t *encoder_set_number_value(ast_node_t *node) {
+    assert(node->id == NODE_NUMBER);
+    assert(node->children[0]);
+    const char *number = node->children[0]->token_entry->token.value;
+    int base = encoder_get_number_base(node);
+
+    if (base != 10)
+        number += 2; // all except base 10 use a 0x, 0o or 0b prefix
+
+    char *endptr;
+    errno = 0;
+    uint64_t value = strtoull(number, &endptr, base);
+
+    if (errno == ERANGE)
+        return err_encoder_number_overflow;
+
+    if (endptr == number)
+        return err_encoder_invalid_number_format;
+
+    int bits = 0;
+    if (*endptr == ':') {
+        const char *suffix = endptr + 1;
+
+        bits = strtol(suffix, &endptr, 10);
+
+        if (endptr == suffix)
+            return err_encoder_invalid_number_format;
+    }
+
+    if (*endptr != '\0')
+        return err_encoder_invalid_number_format;
+
+    if (!is_valid_size_suffix(bits))
+        return err_encoder_invalid_size_suffix;
+
+    if (is_overflow(value, bits))
+        return err_encoder_number_overflow;
+
+    node->value.number.value = value;
+    node->value.number.size = encoder_get_size_mask(value, bits);
+
+    return nullptr;
+}
+
+error_t *encoder_set_register_value(ast_node_t *node) {
+    assert(node->id == NODE_REGISTER);
+
+    const char *value = node->token_entry->token.value;
+
+    for (size_t i = 0; registers[i] != nullptr; ++i) {
+        if (strcmp(value, registers[i]->name) == 0) {
+            node->value.reg.id = registers[i]->id;
+            node->value.reg.size = registers[i]->size;
+            return nullptr;
+        }
+    }
+    return err_encoder_invalid_register;
+}
+
+/**
+ * Set the opcode extension in the modrm field
+ */
+static inline uint8_t modrm_extension(uint8_t modrm, uint8_t extension) {
+    assert(extension != opcode_extension_none);
+    assert((extension & 0b111) == extension);
+    return (modrm & ~modrm_reg_mask) | extension << 3;
+}
+
+/**
+ * Return the rex bit for reg field in modrm
+ */
+static inline uint8_t modrm_reg_rex(uint8_t rex, register_id_t id) {
+    if (id & 0b1000)
+        rex |= rex_prefix_r;
+    return rex;
+}
+
+/**
+ * update modrm reg field with the given register, must be used alongside
+ * modrm_reg_rex
+ */
+static inline uint8_t modrm_reg(uint8_t modrm, register_id_t id) {
+    return (modrm & ~modrm_reg_mask) | (id & 0b111) << 3;
+}
+
+/**
+ * Return the rex bit for rm field in modrm
+ */
+static inline uint8_t modrm_rm_rex(uint8_t rex, register_id_t id) {
+    if (id & 0b1000)
+        rex |= rex_prefix_b;
+    return rex;
+}
+
+/**
+ * update modrm rm field with the given register, must be used alongside
+ * modrm_rm_rex
+ */
+static inline uint8_t modrm_rm(uint8_t modrm, register_id_t id) {
+    assert((modrm & modrm_mod_mask) == modrm_mod_register);
+    return (modrm & ~modrm_rm_mask) | (id & 0b111);
+}
+
+error_t *encoder_collect_info(encoder_t *encoder, ast_node_t *node,
+                              ast_node_t *statement) {
+    error_t *err = nullptr;
+
+    if (encoder_is_symbols_node(node)) {
+        err = symbol_table_update(encoder->symbols, node, statement);
+        if (statement->id == NODE_INSTRUCTION)
+            statement->value.instruction.has_reference = true;
+    } else if (node->id == NODE_NUMBER)
+        err = encoder_set_number_value(node);
+    else if (node->id == NODE_REGISTER)
+        err = encoder_set_register_value(node);
+    if (err)
+        return err;
+
+    for (size_t i = 0; i < node->len; ++i) {
+        error_t *err =
+            encoder_collect_info(encoder, node->children[i], statement);
+        if (err)
+            return err;
+    }
+
+    return nullptr;
+}
+
+bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
+    switch (info->kind) {
+    case OPERAND_REGISTER:
+        return operand->id == NODE_REGISTER &&
+               ast_node_register_value(operand)->size == info->size;
+    case OPERAND_MEMORY:
+        return operand->id == NODE_MEMORY;
+    case OPERAND_IMMEDIATE: {
+        if (operand->id != NODE_IMMEDIATE)
+            return false;
+        ast_node_t *child = operand->children[0];
+
+        if (child->id == NODE_NUMBER)
+            return (ast_node_number_value(child)->size & info->size) > 0;
+        else if (child->id == NODE_LABEL_REFERENCE) {
+            return info->size &= ast_node_reference_value(child)->size;
+        }
+    } // end OPERAND_IMMEDIATE case
+    }
+    assert(false && "unreachable");
+    __builtin_unreachable();
+}
+
+bool is_opcode_match(opcode_data_t *opcode, const char *mnemonic,
+                     ast_node_t *operands) {
+    if (strcmp(opcode->mnemonic, mnemonic) != 0)
+        return false;
+
+    if (opcode->operand_count != operands->len)
+        return false;
+
+    for (size_t i = 0; i < operands->len; ++i) {
+        if (!is_operand_match(&opcode->operands[i], operands->children[i]))
+            return false;
+    }
+
+    return true;
+}
+
+error_t *encoder_get_opcode_data(ast_node_t *instruction, ast_node_t *operands,
+                                 opcode_data_t **opcode_out) {
+    const char *mnemonic = instruction->children[0]->token_entry->token.value;
+
+    for (size_t i = 0; opcodes[i]; ++i) {
+        opcode_data_t *opcode = opcodes[i];
+        if (is_opcode_match(opcode, mnemonic, operands)) {
+            *opcode_out = opcode;
+            return nullptr;
+        }
+    }
+    return err_encoder_no_encoding_found;
+}
+
+error_t *encode_two_operand(encoder_t *encoder, opcode_data_t *opcode,
+                            ast_node_t *operands, bytes_t *encoding,
+                            uint8_t *rex) {
+    (void)encoder;
+    (void)opcode;
+    (void)operands;
+    (void)encoding;
+    (void)rex;
+    assert(encoding->len >= 1 && "must have 1+ opcode byte in buffer already");
+    return err_encoder_not_implemented;
+}
+
+error_t *encode_one_register_in_opcode(encoder_t *encoder,
+                                       opcode_data_t *opcode,
+                                       ast_node_t *operands, bytes_t *encoding,
+                                       uint8_t *rex) {
+    (void)encoder;
+    (void)opcode;
+
+    register_id_t id = ast_node_register_value(operands->children[0])->id;
+    encoding->buffer[encoding->len - 1] |= id & 0b111;
+    if ((id & 0b1000) > 0) {
+        *rex |= rex_prefix_r;
+    }
+    return nullptr;
+}
+
+error_t *encode_one_register(encoder_t *encoder, opcode_data_t *opcode,
+                             ast_node_t *operands, bytes_t *encoding,
+                             uint8_t *rex) {
+    (void)encoder;
+    assert(operands->len == 1);
+    assert(operands->children[0]->id == NODE_REGISTER);
+
+    register_id_t id = ast_node_register_value(operands->children[0])->id;
+
+    uint8_t modrm = modrm_mod_register;
+
+    if (opcode->opcode_extension != opcode_extension_none) {
+        // register goes in rm field, extension goes in mod field
+        modrm = modrm_extension(modrm, opcode->opcode_extension);
+        modrm = modrm_rm(modrm, id);
+        *rex = modrm_rm_rex(*rex, id);
+    } else {
+        // register goes in reg field
+        // NOTE:
+        // it's actually likely this case just doesn't exist at all and all
+        // opcodes that take one register in modr/m _all_ have extended opcdes
+        modrm = modrm_reg(modrm, id);
+        *rex = modrm_reg_rex(*rex, id);
+    }
+
+    return bytes_append_uint8(encoding, modrm);
+}
+
+error_t *encode_one_immediate(encoder_t *encoder, opcode_data_t *opcode,
+                              ast_node_t *operands, bytes_t *encoding,
+                              uint8_t *rex) {
+    (void)encoder;
+    (void)opcode;
+    (void)rex;
+    assert(operands->len == 1);
+    assert(operands->children[0]->id == NODE_IMMEDIATE);
+    assert(operands->children[0]->len == 1);
+    ast_node_t *immediate = operands->children[0]->children[0];
+    assert(immediate->id == NODE_NUMBER ||
+           immediate->id == NODE_LABEL_REFERENCE);
+
+    operand_size_t size = opcode->operands[0].size;
+    if (immediate->id == NODE_NUMBER) {
+        uint64_t value = ast_node_number_value(immediate)->value;
+        error_t *err = nullptr;
+        switch (size) {
+        case OPERAND_SIZE_8:
+            err = bytes_append_uint8(encoding, value);
+            break;
+        case OPERAND_SIZE_16:
+            err = bytes_append_uint16(encoding, value);
+            break;
+        case OPERAND_SIZE_32:
+            err = bytes_append_uint32(encoding, value);
+            break;
+        case OPERAND_SIZE_64:
+            err = bytes_append_uint64(encoding, value);
+            break;
+        default:
+            assert(false && "intentionally unhandled");
+        }
+        return err;
+    } else {
+        reference_t *reference = ast_node_reference_value(immediate);
+        switch (size) {
+        case OPERAND_SIZE_64:
+            return bytes_append_uint64(encoding, reference->address);
+        case OPERAND_SIZE_32:
+            return bytes_append_uint32(encoding, reference->offset);
+        case OPERAND_SIZE_16:
+            return bytes_append_uint16(encoding, reference->offset);
+        case OPERAND_SIZE_8:
+            return bytes_append_uint8(encoding, reference->offset);
+        default:
+            assert(false && "intentionally unhandled");
+        }
+    }
+    __builtin_unreachable();
+}
+
+error_t *encode_one_memory(encoder_t *encoder, opcode_data_t *opcode,
+                           ast_node_t *operands, bytes_t *encoding,
+                           uint8_t *rex) {
+    (void)encoder;
+    (void)opcode;
+    (void)operands;
+    (void)encoding;
+    (void)rex;
+    return err_encoder_not_implemented;
+}
+
+error_t *encode_one_operand(encoder_t *encoder, opcode_data_t *opcode,
+                            ast_node_t *operands, bytes_t *encoding,
+                            uint8_t *rex) {
+    switch (opcode->operands[0].kind) {
+    case OPERAND_REGISTER:
+        if (opcode->encoding_class == ENCODING_OPCODE_REGISTER)
+            return encode_one_register_in_opcode(encoder, opcode, operands,
+                                                 encoding, rex);
+        else
+            return encode_one_register(encoder, opcode, operands, encoding,
+                                       rex);
+    case OPERAND_MEMORY:
+        return encode_one_memory(encoder, opcode, operands, encoding, rex);
+    case OPERAND_IMMEDIATE:
+        return encode_one_immediate(encoder, opcode, operands, encoding, rex);
+    }
+}
+
+error_t *encoder_encode_instruction(encoder_t *encoder,
+                                    ast_node_t *instruction) {
+    ast_node_t *operands = instruction->children[1];
+
+    opcode_data_t *opcode = nullptr;
+    error_t *err = encoder_get_opcode_data(instruction, operands, &opcode);
+    if (err)
+        return err;
+
+    uint8_t rex = 0;
+    bytes_t *encoding = LOCAL_BYTES(32);
+
+    if (opcode->opcode > 0xFF &&
+        (err = bytes_append_uint8(encoding, opcode->opcode >> 8)))
+        return err;
+    if ((err = bytes_append_uint8(encoding, opcode->opcode & 0xFF)))
+        return err;
+
+    // NOTE:operand encoders all expect the opcode to be in the buffer already.
+    // Some of them rely on this to encode the register value in the opcode
+    // byte.
+    switch (opcode->operand_count) {
+    case 0:
+        break;
+    case 1:
+        err = encode_one_operand(encoder, opcode, operands, encoding, &rex);
+        break;
+    case 2:
+        err = encode_two_operand(encoder, opcode, operands, encoding, &rex);
+        break;
+    default:
+        err = err_encoder_not_implemented;
+    }
+    if (err)
+        return err;
+
+    // produce the actual encoding output in the NODE_INSTRUCTION value
+    instruction_t *instruction_value = ast_node_instruction_value(instruction);
+    uint8_t *output = instruction_value->encoding.buffer;
+    size_t output_len = 0;
+
+    // Handle prefixes
+    if (opcode->rex_w_prefix)
+        rex = rex_prefix_w;
+    if (opcode->address_size_prefix)
+        output[output_len++] = memory_size_prefix;
+    if (opcode->operand_size_prefix)
+        output[output_len++] = operand_size_prefix;
+    if (rex > 0)
+        output[output_len++] = rex;
+
+    // copy the encoded opcode and operands
+    if (encoding->len > 20)
+        return err_encoder_unexpected_length;
+    memcpy(output + output_len, encoding->buffer, encoding->len);
+    output_len += encoding->len;
+
+    instruction_value->encoding.len = output_len;
+
+    return nullptr;
+}
+
+/**
+ * Initial guess for instruction size of instructions that contain a label
+ * reference
+ */
+constexpr size_t instruction_size_estimate = 10;
+
+/**
+ * Perform the initial pass over the AST.
+ *
+ * - Collect information about the operands
+ *   - parse and set number values
+ *   - set the register values
+ *   - determine if label references are used by an instruction
+ * - encode instructions that don't use label references
+ * - determine estimated addresses of each statement
+ *
+ */
+error_t *encoder_first_pass(encoder_t *encoder) {
+    ast_node_t *root = encoder->ast;
+    assert(root->id == NODE_PROGRAM);
+
+    uintptr_t address = 0;
+
+    for (size_t i = 0; i < root->len; ++i) {
+        ast_node_t *statement = root->children[i];
+        error_t *err = encoder_collect_info(encoder, statement, statement);
+        if (err)
+            return err;
+
+        if (statement->id == NODE_INSTRUCTION &&
+            ast_node_instruction_value(statement)->has_reference == false) {
+            err = encoder_encode_instruction(encoder, statement);
+            if (err)
+                return err;
+            instruction_t *instruction = ast_node_instruction_value(statement);
+            instruction->address = address;
+            address += instruction->encoding.len;
+        } else if (statement->id == NODE_INSTRUCTION) {
+            instruction_t *instruction = ast_node_instruction_value(statement);
+            instruction->encoding.len = instruction_size_estimate;
+            instruction->address = address;
+            address += instruction_size_estimate;
+        } else if (statement->id == NODE_LABEL) {
+            label_t *label = ast_node_label_value(statement);
+            label->address = address;
+        }
+    }
+
+    return nullptr;
+}
+
+operand_size_t signed_to_size_mask(int64_t value) {
+    operand_size_t size = OPERAND_SIZE_64;
+
+    if (value >= INT8_MIN && value <= INT8_MAX)
+        size |= OPERAND_SIZE_8;
+
+    if (value >= INT16_MIN && value <= INT16_MAX)
+        size |= OPERAND_SIZE_16;
+
+    if (value >= INT32_MIN && value <= INT32_MAX)
+        size |= OPERAND_SIZE_32;
+
+    return size;
+}
+
+int64_t statement_offset(ast_node_t *from, ast_node_t *to) {
+    assert(from->id == NODE_INSTRUCTION);
+    assert(to->id == NODE_LABEL);
+
+    instruction_t *instruction = ast_node_instruction_value(from);
+    int64_t from_addr = instruction->address + instruction->encoding.len;
+    int64_t to_addr = ast_node_label_value(to)->address;
+
+    return to_addr - from_addr;
+}
+
+error_t *encoder_collect_reference_info(encoder_t *encoder, ast_node_t *node,
+                                        ast_node_t *statement) {
+    assert(statement->id == NODE_INSTRUCTION);
+    if (node->id == NODE_LABEL_REFERENCE) {
+        const char *name = node->token_entry->token.value;
+        symbol_t *symbol = symbol_table_lookup(encoder->symbols, name);
+        assert(symbol && symbol->statement &&
+               symbol->statement->id == NODE_LABEL);
+
+        int64_t offset = statement_offset(statement, symbol->statement);
+        int64_t absolute = ast_node_label_value(symbol->statement)->address;
+        operand_size_t size = signed_to_size_mask(offset);
+
+        node->value.reference.address = absolute;
+        node->value.reference.offset = offset;
+        node->value.reference.size = size;
+    }
+
+    for (size_t i = 0; i < node->len; ++i) {
+        error_t *err = encoder_collect_reference_info(
+            encoder, node->children[i], statement);
+        if (err)
+            return err;
+    }
+
+    return nullptr;
+}
+
+bool encoder_should_reencode(ast_node_t *statement) {
+    if (statement->id != NODE_INSTRUCTION)
+        return false;
+
+    instruction_t *instruction = ast_node_instruction_value(statement);
+    return instruction->has_reference;
+}
+
+void set_statement_address(ast_node_t *statement, int64_t address) {
+    if (statement->id == NODE_INSTRUCTION) {
+        ast_node_instruction_value(statement)->address = address;
+    } else if (statement->id == NODE_LABEL) {
+        ast_node_label_value(statement)->address = address;
+    }
+}
+
+size_t get_statement_length(ast_node_t *statement) {
+    if (statement->id != NODE_INSTRUCTION)
+        return 0;
+    return ast_node_instruction_value(statement)->encoding.len;
+}
+
+/**
+ * Perform the second pass. Updates the label info and encodes all instructions
+ * that have a label reference.that performs actual encoding.
+ */
+error_t *encoder_second_pass(encoder_t *encoder, bool *did_update) {
+    ast_node_t *root = encoder->ast;
+
+    *did_update = false;
+    int64_t address = 0;
+    for (size_t i = 0; i < root->len; ++i) {
+        ast_node_t *statement = root->children[i];
+
+        set_statement_address(statement, address);
+        size_t before = get_statement_length(statement);
+
+        if (encoder_should_reencode(statement)) {
+            error_t *err =
+                encoder_collect_reference_info(encoder, statement, statement);
+            if (err)
+                return err;
+            err = encoder_encode_instruction(encoder, statement);
+            if (err)
+                return err;
+        }
+
+        size_t after = get_statement_length(statement);
+        *did_update = *did_update || (before != after);
+        address += after;
+    }
+    return nullptr;
+}
+
+opcode_data_t *encoder_find_opcode(ast_node_t *instruction) {
+    for (size_t i = 0; opcodes[i] != nullptr; ++i) {
+        const char *mnemonic =
+            instruction->children[0]->token_entry->token.value;
+        ast_node_t *operands = instruction->children[1];
+        if (is_opcode_match(opcodes[i], mnemonic, operands))
+            return opcodes[i];
+    }
+    return nullptr;
+}
+
+error_t *encoder_check_symbols(encoder_t *encoder) {
+    for (size_t i = 0; i < encoder->symbols->len; ++i)
+        if (encoder->symbols->symbols[i].kind == SYMBOL_REFERENCE)
+            return err_encoder_unknown_symbol_reference;
+    return nullptr;
+}
+
+error_t *encoder_encode(encoder_t *encoder) {
+    error_t *err = encoder_first_pass(encoder);
+    if (err)
+        return err;
+    err = encoder_check_symbols(encoder);
+    if (err)
+        return err;
+
+    bool did_update = true;
+    for (int i = 0; i < 10 && did_update; ++i) {
+        err = encoder_second_pass(encoder, &did_update);
+        if (err)
+            return err;
+    }
+    return nullptr;
+}
@@ -0,0 +1,33 @@
+#ifndef INCLUDE_ENCODER_ENCODER_H_
+#define INCLUDE_ENCODER_ENCODER_H_
+
+#include "symbols.h"
+
+typedef struct encoder {
+    symbol_table_t *symbols;
+    ast_node_t *ast;
+} encoder_t;
+
+constexpr uint8_t modrm_mod_memory = 0b00'000'000;
+constexpr uint8_t modrm_mod_memory_displacement8 = 0b01'000'000;
+constexpr uint8_t modrm_mod_memory_displacement32 = 0b10'000'000;
+constexpr uint8_t modrm_mod_register = 0b11'000'000;
+
+constexpr uint8_t modrm_reg_mask = 0b00'111'000;
+constexpr uint8_t modrm_rm_mask = 0b00'000'111;
+constexpr uint8_t modrm_mod_mask = 0b11'000'000;
+
+error_t *encoder_alloc(encoder_t **encoder, ast_node_t *ast);
+error_t *encoder_encode(encoder_t *encoder);
+void encoder_free(encoder_t *encoder);
+
+extern error_t *const err_encoder_invalid_register;
+extern error_t *const err_encoder_number_overflow;
+extern error_t *const err_encoder_invalid_number_format;
+extern error_t *const err_encoder_invalid_size_suffix;
+extern error_t *const err_encoder_unknown_symbol_reference;
+extern error_t *const err_encoder_no_encoding_found;
+extern error_t *const err_encoder_not_implemented;
+extern error_t *const err_encoder_unexpected_length;
+
+#endif // INCLUDE_ENCODER_ENCODER_H_
@@ -0,0 +1,165 @@
+#include "symbols.h"
+#include "../error.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+constexpr size_t symbol_table_default_cap = 64;
+constexpr size_t symbol_table_max_cap = 1 << 16;
+
+error_t *const err_symbol_table_invalid_node = &(error_t){
+    .message = "Unexpected node id when adding symbol to symbol table"};
+error_t *const err_symbol_table_max_cap = &(error_t){
+    .message = "Failed to increase symbol table length, max capacity reached"};
+error_t *const err_symbol_table_incompatible_symbols =
+    &(error_t){.message = "Failed to update symbol with incompatible kind"};
+
+error_t *symbol_table_alloc(symbol_table_t **output) {
+    *output = nullptr;
+
+    symbol_table_t *table = calloc(1, sizeof(symbol_table_t));
+    if (table == nullptr)
+        return err_allocation_failed;
+
+    table->symbols = calloc(symbol_table_default_cap, sizeof(symbol_t));
+    if (table->symbols == nullptr) {
+        free(table);
+        return err_allocation_failed;
+    }
+
+    table->cap = symbol_table_default_cap;
+    table->len = 0;
+
+    *output = table;
+    return nullptr;
+}
+
+void symbol_table_free(symbol_table_t *table) {
+    free(table->symbols);
+    free(table);
+}
+
+error_t *symbol_table_grow_cap(symbol_table_t *table) {
+    if (table->cap >= symbol_table_max_cap)
+        return err_symbol_table_max_cap;
+
+    size_t new_cap = table->cap * 2;
+    symbol_t *new_symbols = realloc(table->symbols, new_cap * sizeof(symbol_t));
+    if (new_symbols == nullptr)
+        return err_allocation_failed;
+
+    table->symbols = new_symbols;
+    table->cap = new_cap;
+
+    return nullptr;
+}
+
+error_t *symbol_table_get_node_info(ast_node_t *node, symbol_kind_t *kind,
+                                    char **name) {
+    switch (node->id) {
+    case NODE_LABEL:
+        *kind = SYMBOL_LOCAL;
+        *name = node->children[0]->token_entry->token.value;
+        return nullptr;
+    case NODE_LABEL_REFERENCE:
+        *kind = SYMBOL_REFERENCE;
+        *name = node->token_entry->token.value;
+        return nullptr;
+    case NODE_IMPORT_DIRECTIVE:
+        *kind = SYMBOL_IMPORT;
+        *name = node->children[1]->token_entry->token.value;
+        return nullptr;
+    case NODE_EXPORT_DIRECTIVE:
+        *kind = SYMBOL_EXPORT;
+        *name = node->children[1]->token_entry->token.value;
+        return nullptr;
+    default:
+        return err_symbol_table_invalid_node;
+    }
+    __builtin_unreachable();
+}
+
+/*
+old  \  new  | REFERENCE | LOCAL    | IMPORT   | EXPORT   |
+-------------|-----------|----------|----------|----------|
+REFERENCE    |           | replace  | replace  | replace  |
+-------------|-----------|----------|----------|----------|
+LOCAL        |           |          |   ERR    | replace  |
+-------------|-----------|----------|----------|----------|
+IMPORT       |           |          |          |   ERR    |
+-------------|-----------|----------|----------|----------|
+EXPORT       |           |          |   ERR    |          |
+-------------|-----------|----------|----------|----------|
+*/
+
+bool symbol_table_should_upgrade(symbol_kind_t old, symbol_kind_t new) {
+    if (old == SYMBOL_REFERENCE)
+        return new != SYMBOL_REFERENCE;
+    if (old == SYMBOL_LOCAL)
+        return new == SYMBOL_EXPORT;
+    return false;
+}
+
+bool symbol_table_should_error(symbol_kind_t old, symbol_kind_t new) {
+    if (new == SYMBOL_IMPORT)
+        return old == SYMBOL_LOCAL || old == SYMBOL_EXPORT;
+    if (new == SYMBOL_EXPORT)
+        return old == SYMBOL_IMPORT;
+    return false;
+}
+
+/**
+ * @pre The symbol _must not_ already be in the table.
+ */
+error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
+                          ast_node_t *statement) {
+    if (table->len >= table->cap) {
+        error_t *err = symbol_table_grow_cap(table);
+        if (err)
+            return err;
+    }
+
+    table->symbols[table->len] = (symbol_t){
+        .name = name,
+        .kind = kind,
+        .statement = statement,
+    };
+
+    table->len += 1;
+
+    return nullptr;
+}
+
+error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node,
+                             ast_node_t *statement) {
+    char *name;
+    symbol_kind_t kind;
+    error_t *err = symbol_table_get_node_info(node, &kind, &name);
+    if (err)
+        return err;
+
+    if (kind != SYMBOL_LOCAL)
+        statement = nullptr;
+
+    symbol_t *symbol = symbol_table_lookup(table, name);
+    if (!symbol)
+        return symbol_table_add(table, name, kind, statement);
+    if (symbol_table_should_error(symbol->kind, kind))
+        return err_symbol_table_incompatible_symbols;
+    if (symbol_table_should_upgrade(symbol->kind, kind)) {
+        symbol->kind = kind;
+    }
+
+    if (kind == SYMBOL_LOCAL && symbol->statement == nullptr)
+        symbol->statement = statement;
+
+    return nullptr;
+}
+
+symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name) {
+    for (size_t i = 0; i < table->len; ++i) {
+        if (strcmp(table->symbols[i].name, name) == 0)
+            return &table->symbols[i];
+    }
+    return nullptr;
+}
@@ -0,0 +1,47 @@
+#ifndef INCLUDE_ENCODER_SYMBOLS_H_
+#define INCLUDE_ENCODER_SYMBOLS_H_
+
+#include "../ast.h"
+
+extern error_t *const err_symbol_table_invalid_node;
+extern error_t *const err_symbol_table_max_cap;
+extern error_t *const err_symbol_table_incompatible_symbols;
+
+typedef enum symbol_kind {
+    SYMBOL_REFERENCE,
+    SYMBOL_LOCAL,
+    SYMBOL_EXPORT,
+    SYMBOL_IMPORT,
+} symbol_kind_t;
+
+/**
+ * Represent a symbol in the program
+ *
+ * Symbols with the same name can only be in the table once. IMPORT or EXPORT
+ * symbols take precedence over REFERENCE symbols. If any reference symbols
+ * remain after the first encoding pass this indicates an error. Trying to add
+ * an IMPORT or EXPORT symbol if the same name already exists as the other kind
+ * is an error.
+ *
+ * This symbol table never taken ownership of the name string, it's lifted
+ * straight from the node->token.value.
+ */
+typedef struct symbol {
+    char *name;
+    symbol_kind_t kind;
+    ast_node_t *statement;
+} symbol_t;
+
+typedef struct symbol_table {
+    size_t cap;
+    size_t len;
+    symbol_t *symbols;
+} symbol_table_t;
+
+error_t *symbol_table_alloc(symbol_table_t **table);
+void symbol_table_free(symbol_table_t *table);
+error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node,
+                             ast_node_t *statement);
+symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name);
+
+#endif // INCLUDE_ENCODER_SYMBOLS_H_
@@ -9,8 +9,13 @@ error_t *const err_errorf_alloc = &(error_t){
 error_t *const err_errorf_length = &(error_t){
    .message =
        "Formatting of another error failed to determine the error length"};
+error_t *const err_eof =
+    &(error_t){.message = "Read failed because EOF is reached"};

-error_t *err_allocation_failed =
+error_t *const err_unknown_read_failure =
+    &(error_t){.message = "Unknown read error"};
+
+error_t *const err_allocation_failed =
    &(error_t){.message = "Memory allocation failed"};

 error_t *errorf(const char *fmt, ...) {
@@ -19,6 +19,8 @@ static inline void error_free(error_t *err) {
 }

 /* Some global errors */
-extern error_t *err_allocation_failed;
+extern error_t *const err_allocation_failed;
+extern error_t *const err_eof;
+extern error_t *const err_unknown_read_failure;

 #endif // INCLUDE_SRC_ERROR_H_
@@ -5,21 +5,16 @@
 #include <errno.h>
 #include <string.h>

-error_t *err_lexer_already_open = &(error_t){
+error_t *const err_lexer_already_open = &(error_t){
    .message =
        "Can't open on a lexer object that is already opened. Close it first."};
-error_t *err_prefix_too_large =
+error_t *const err_lexer_prefix_too_large =
    &(error_t){.message = "Prefix too large for internal lexer buffer"};
-error_t *err_buffer_underrun = &(error_t){
+error_t *const err_lexer_buffer_underrun = &(error_t){
    .message = "Buffer does not contain enough characters for lexer_consume_n"};
-error_t *err_consume_excessive_length =
+error_t *const err_lexer_consume_excessive_length =
    &(error_t){.message = "Too many valid characters to consume"};

-error_t *err_eof =
-    &(error_t){.message = "Can't read from file because EOF is reached"};
-
-error_t *err_unknown_read = &(error_t){.message = "Unknown read error"};
-
 typedef bool (*char_predicate_t)(char);

 const char *lexer_token_id_to_cstr(lexer_token_id_t id) {
@@ -112,7 +107,7 @@ error_t *lexer_fill_buffer(lexer_t *lex) {
        if (n == 0 && ferror(lex->fp))
            return errorf("Read error: %s", strerror(errno));
        if (n == 0)
-            return err_unknown_read;
+            return err_unknown_read_failure;
        remaining -= n;
        lex->buffer_count += n;
    }
@@ -141,7 +136,7 @@ error_t *lexer_open(lexer_t *lex, char *path) {
 *
 * @pre There must be at least n characters in the input buffer
 */
-void lexer_shift_buffer(lexer_t *lex, int n) {
+void lexer_shift_buffer(lexer_t *lex, size_t n) {
    assert(lex->buffer_count >= n);
    lex->buffer_count -= n;
    memmove(lex->buffer, lex->buffer + n, lex->buffer_count);
@@ -182,9 +177,9 @@ error_t *lexer_not_implemented(lexer_t *lex, lexer_token_t *token) {
 error_t *lexer_consume_n(lexer_t *lex, const size_t len,
                         char buffer[static len], const size_t n) {
    if (lex->buffer_count < n)
-        return err_buffer_underrun;
+        return err_lexer_buffer_underrun;
    if (n > len)
-        return err_consume_excessive_length;
+        return err_lexer_consume_excessive_length;

    memcpy(buffer, lex->buffer, n);
    lexer_shift_buffer(lex, n);
@@ -229,7 +224,7 @@ error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n],
                (lex->buffer_count > 0 && is_valid(lex->buffer[0]));

        if (have_more_characters && *n_consumed == buffer_size) {
-            return err_consume_excessive_length;
+            return err_lexer_consume_excessive_length;
        }
    } while (have_more_characters);
    return nullptr;
@@ -299,11 +294,12 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {

    error_t *err = lexer_consume(lex, max_number_length - so_far,
                                 buffer + so_far, is_valid, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Number length exceeds the maximum of 128 characters";
    }
+    lex->character_number += n;
    so_far += n;
    if (n == 0) {
        token->id = TOKEN_ERROR;
@@ -329,14 +325,15 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
    if (suffix_length > 0) {
        err = lexer_consume_n(lex, max_number_length - so_far, buffer + so_far,
                              suffix_length);
-        if (err == err_consume_excessive_length) {
+        if (err == err_lexer_consume_excessive_length) {
            token->id = TOKEN_ERROR;
            token->explanation =
                "Number length exceeds the maximum of 128 characters";
+        } else {
+            lex->character_number += suffix_length;
        }
    }

-    lex->character_number += n;
    token->value = strdup(buffer);
    return nullptr;
 }
@@ -406,7 +403,7 @@ error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) {

    error_t *err = lexer_consume(lex, max_identifier_length, buffer,
                                 is_identifier_character, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Identifier length exceeds the maximum of 128 characters";
@@ -449,7 +446,7 @@ error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) {

    error_t *err = lexer_consume(lex, max_whitespace_length, buffer,
                                 is_whitespace_character, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Whitespace length exceeds the maximum of 1024 characters";
@@ -484,7 +481,7 @@ error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) {

    error_t *err = lexer_consume(lex, max_comment_length, buffer,
                                 is_comment_character, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Comment length exceeds the maximum of 1024 characters";
@@ -5,7 +5,10 @@
 #include <stddef.h>
 #include <stdio.h>

-extern error_t *err_eof;
+extern error_t *const err_lexer_already_open;
+extern error_t *const err_lexer_prefix_too_large;
+extern error_t *const err_lexer_buffer_underrun;
+extern error_t *const err_lexer_consume_excessive_length;

 typedef enum {
    TOKEN_ERROR,
@@ -1,3 +1,5 @@
+#include "ast.h"
+#include "encoder/encoder.h"
 #include "error.h"
 #include "lexer.h"
 #include "parser/parser.h"
@@ -8,7 +10,13 @@
 #include <stdlib.h>
 #include <string.h>

-typedef enum mode { MODE_AST, MODE_TEXT, MODE_TOKENS } mode_t;
+typedef enum mode {
+    MODE_INVALID = -1,
+    MODE_AST,
+    MODE_TEXT,
+    MODE_TOKENS,
+    MODE_ENCODING,
+} mode_t;

 void print_tokens(tokenlist_t *list) {
    for (auto entry = list->head; entry; entry = entry->next) {
@@ -32,39 +40,106 @@ void print_text(tokenlist_t *list) {
    }
 }

-void print_ast(tokenlist_t *list) {
+error_t *print_ast(tokenlist_t *list) {
    parse_result_t result = parse(list->head);
-    if (result.err) {
-        puts(result.err->message);
-        error_free(result.err);
-        return;
-    }
+    if (result.err)
+        return result.err;
+
    ast_node_print(result.node);

    if (result.next != nullptr) {
        puts("First unparsed token:");
        lexer_token_print(&result.next->token);
    }
-
    ast_node_free(result.node);
+    if (result.next != nullptr) {
+        return errorf("did not parse entire input token stream");
+    }
+    return nullptr;
+}
+
+void print_hex(size_t len, uint8_t bytes[static len]) {
+    for (size_t i = 0; i < len; i++) {
+        printf("%02x", bytes[i]);
+        if (i < len - 1) {
+            printf(" ");
+        }
+    }
+    printf("\n");
+}
+
+error_t *print_encoding(tokenlist_t *list) {
+    parse_result_t result = parse(list->head);
+    if (result.err)
+        return result.err;
+
+    encoder_t *encoder;
+    error_t *err = encoder_alloc(&encoder, result.node);
+    if (err)
+        goto cleanup_ast;
+
+    err = encoder_encode(encoder);
+    if (err)
+        goto cleanup_ast;
+
+    ast_node_t *root = result.node;
+    for (size_t i = 0; i < root->len; ++i) {
+        ast_node_t *node = root->children[i];
+        if (node->id != NODE_INSTRUCTION)
+            continue;
+
+        print_hex(node->value.instruction.encoding.len,
+                  node->value.instruction.encoding.buffer);
+    }
+
+    encoder_free(encoder);
+    ast_node_free(result.node);
+    return nullptr;
+
+cleanup_ast:
+    ast_node_free(result.node);
+    return err;
 }

 int get_execution_mode(int argc, char *argv[]) {
-    if (argc != 3 || (strcmp(argv[1], "tokens") != 0 &&
-                      strcmp(argv[1], "text") != 0 && strcmp(argv[1], "ast"))) {
-        puts("Usage: oas [tokens|text|ast] <filename>");
-        exit(1);
-    }
+    if (argc != 3)
+        return MODE_INVALID;

    if (strcmp(argv[1], "tokens") == 0)
        return MODE_TOKENS;
    if (strcmp(argv[1], "text") == 0)
        return MODE_TEXT;
-    return MODE_AST;
+    if (strcmp(argv[1], "ast") == 0)
+        return MODE_AST;
+    if (strcmp(argv[1], "encoding") == 0)
+        return MODE_ENCODING;
+    return MODE_INVALID;
+}
+
+error_t *do_action(mode_t mode, tokenlist_t *list) {
+    switch (mode) {
+    case MODE_TOKENS:
+        print_tokens(list);
+        return nullptr;
+    case MODE_TEXT:
+        print_text(list);
+        return nullptr;
+    case MODE_AST:
+        return print_ast(list);
+    case MODE_ENCODING:
+        return print_encoding(list);
+    case MODE_INVALID:
+        /* can't happen */
+    }
+    __builtin_unreachable();
 }

 int main(int argc, char *argv[]) {
    mode_t mode = get_execution_mode(argc, argv);
+    if (mode == MODE_INVALID) {
+        puts("Usage: oas [tokens|text|ast|encoding] <filename>");
+        exit(1);
+    }
    char *filename = argv[2];

    lexer_t *lex = &(lexer_t){};
@@ -81,17 +156,9 @@ int main(int argc, char *argv[]) {
    if (err)
        goto cleanup_tokens;

-    switch (mode) {
-    case MODE_TOKENS:
-        print_tokens(list);
-        break;
-    case MODE_TEXT:
-        print_text(list);
-        break;
-    case MODE_AST:
-        print_ast(list);
-        break;
-    }
+    err = do_action(mode, list);
+    if (err)
+        goto cleanup_tokens;

    tokenlist_free(list);
    error_free(err);
@@ -1,4 +1,5 @@
 #include "combinators.h"
+#include "util.h"

 // Parse a list of the given parser delimited by the given token id. Does not
 // store the delimiters in the parent node
@@ -122,5 +123,12 @@ parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
        }
        current = result.next;
    }
+
+    // token stream ended before we matched all parsers
+    if (parser != nullptr) {
+        ast_node_free(all);
+        return parse_no_match();
+    }
+
    return parse_success(all, current);
 }
@@ -83,13 +83,14 @@ parse_result_t parse_register_expression(tokenlist_entry_t *current) {
 }

 parse_result_t parse_immediate(tokenlist_entry_t *current) {
-    parser_t parsers[] = {parse_number, parse_identifier, nullptr};
+    parser_t parsers[] = {parse_number, parse_label_reference, nullptr};
    parse_result_t result = parse_any(current, parsers);
    return parse_result_wrap(NODE_IMMEDIATE, result);
 }

 parse_result_t parse_memory_expression(tokenlist_entry_t *current) {
-    parser_t parsers[] = {parse_register_expression, parse_identifier, nullptr};
+    parser_t parsers[] = {parse_register_expression, parse_label_reference,
+                          nullptr};
    return parse_any(current, parsers);
 }

@@ -119,22 +120,45 @@ parse_result_t parse_section_directive(tokenlist_entry_t *current) {
    return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers);
 }

+parse_result_t parse_import_directive(tokenlist_entry_t *current) {
+    parser_t parsers[] = {parse_import, parse_identifier, nullptr};
+    return parse_consecutive(current, NODE_IMPORT_DIRECTIVE, parsers);
+}
+
+parse_result_t parse_export_directive(tokenlist_entry_t *current) {
+    parser_t parsers[] = {parse_export, parse_identifier, nullptr};
+    return parse_consecutive(current, NODE_EXPORT_DIRECTIVE, parsers);
+}
+
+parse_result_t parse_directive_options(tokenlist_entry_t *current) {
+    parser_t parsers[] = {parse_section_directive, parse_import_directive,
+                          parse_export_directive, nullptr};
+    return parse_any(current, parsers);
+}
+
 parse_result_t parse_directive(tokenlist_entry_t *current) {
-    parser_t parsers[] = {parse_dot, parse_section_directive, nullptr};
+    parser_t parsers[] = {parse_dot, parse_directive_options, parse_newline,
+                          nullptr};
    return parse_consecutive(current, NODE_DIRECTIVE, parsers);
 }

 parse_result_t parse_instruction(tokenlist_entry_t *current) {
-    parser_t parsers[] = {parse_identifier, parse_operands, nullptr};
+    parser_t parsers[] = {parse_identifier, parse_operands, parse_newline,
+                          nullptr};
    return parse_consecutive(current, NODE_INSTRUCTION, parsers);
 }

 parse_result_t parse_statement(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
-                          nullptr};
+                          parse_newline, nullptr};
    return parse_any(current, parsers);
 }

 parse_result_t parse(tokenlist_entry_t *current) {
-    return parse_many(current, NODE_PROGRAM, true, parse_statement);
+    current = tokenlist_skip_trivia(current);
+    parse_result_t result =
+        parse_many(current, NODE_PROGRAM, true, parse_statement);
+    if (result.node != nullptr)
+        ast_node_prune(result.node, NODE_NEWLINE);
+    return result;
 }
@@ -1,5 +1,6 @@
 #include "primitives.h"
 #include "../ast.h"
+#include "../data/registers.h"
 #include <string.h>

 parse_result_t parse_identifier(tokenlist_entry_t *current) {
@@ -62,28 +63,18 @@ parse_result_t parse_dot(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
 }

+parse_result_t parse_newline(tokenlist_entry_t *current) {
+    return parse_token(current, TOKEN_NEWLINE, NODE_NEWLINE, nullptr);
+}
+
 parse_result_t parse_label_reference(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
                       nullptr);
 }

-const char *registers[] = {
-    // 64-bit registers
-    "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10",
-    "r11", "r12", "r13", "r14", "r15",
-    // 32-bit registers
-    "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d",
-    "r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
-    // 16-bit registers
-    "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w",
-    "r11w", "r12w", "r13w", "r14w", "r15w",
-    // 8-bit low registers
-    "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b",
-    "r11b", "r12b", "r13b", "r14b", "r15b", nullptr};
-
 bool is_register_token(lexer_token_t *token) {
    for (size_t i = 0; registers[i] != nullptr; ++i)
-        if (strcmp(token->value, registers[i]) == 0)
+        if (strcmp(token->value, registers[i]->name) == 0)
            return true;
    return false;
 }
@@ -101,3 +92,19 @@ parse_result_t parse_section(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_SECTION,
                       is_section_token);
 }
+
+bool is_import_token(lexer_token_t *token) {
+    return strcmp(token->value, "import") == 0;
+}
+
+parse_result_t parse_import(tokenlist_entry_t *current) {
+    return parse_token(current, TOKEN_IDENTIFIER, NODE_IMPORT, is_import_token);
+}
+
+bool is_export_token(lexer_token_t *token) {
+    return strcmp(token->value, "export") == 0;
+}
+
+parse_result_t parse_export(tokenlist_entry_t *current) {
+    return parse_token(current, TOKEN_IDENTIFIER, NODE_EXPORT, is_export_token);
+}
@@ -18,6 +18,7 @@ parse_result_t parse_plus(tokenlist_entry_t *current);
 parse_result_t parse_minus(tokenlist_entry_t *current);
 parse_result_t parse_asterisk(tokenlist_entry_t *current);
 parse_result_t parse_dot(tokenlist_entry_t *current);
+parse_result_t parse_newline(tokenlist_entry_t *current);
 parse_result_t parse_label_reference(tokenlist_entry_t *current);

 /* These are "primitives" with a different name and some extra validation on top
@@ -26,5 +27,7 @@ parse_result_t parse_label_reference(tokenlist_entry_t *current);
 */
 parse_result_t parse_register(tokenlist_entry_t *current);
 parse_result_t parse_section(tokenlist_entry_t *current);
+parse_result_t parse_import(tokenlist_entry_t *current);
+parse_result_t parse_export(tokenlist_entry_t *current);

 #endif // INCLUDE_PARSER_PRIMITIVES_H_
@@ -1,7 +1,7 @@
 #include "util.h"
 #include "../tokenlist.h"

-error_t *err_parse_no_match =
+error_t *const err_parse_no_match =
    &(error_t){.message = "parsing failed to find the correct token sequence"};

 parse_result_t parse_error(error_t *err) {
@@ -21,6 +21,6 @@ parse_result_t parse_token(tokenlist_entry_t *current,
                           token_validator_t is_valid);
 parse_result_t parse_result_wrap(node_id_t id, parse_result_t result);

-extern error_t *err_parse_no_match;
+extern error_t *const err_parse_no_match;

 #endif // INCLUDE_PARSER_UTIL_H_
@@ -86,7 +86,6 @@ bool is_trivia(tokenlist_entry_t *trivia) {
    switch (trivia->token.id) {
    case TOKEN_WHITESPACE:
    case TOKEN_COMMENT:
-    case TOKEN_NEWLINE:
        return true;
    default:
        return false;
@@ -0,0 +1,6 @@
+BasedOnStyle:    LLVM
+IndentWidth:     4
+Cpp11BracedListStyle: true
+AlignArrayOfStructures: Left
+AllowShortFunctionsOnASingleLine: Empty
+ColumnLimit: 120
@@ -0,0 +1,22 @@
+#include "../src/ast.h"
+#include "munit.h"
+
+MunitResult test_ast_node_alloc(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    ast_node_t *node = nullptr;
+    error_t *err = ast_node_alloc(&node);
+
+    munit_assert_ptr_not_null(node);
+    munit_assert_ptr_null(err);
+
+    ast_node_free(node);
+
+    return MUNIT_OK;
+}
+
+MunitTest ast_tests[] = {
+    {"/node_alloc", test_ast_node_alloc, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {nullptr,       nullptr,             nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
+};
@@ -0,0 +1,164 @@
+#include "../src/bytes.h"
+#include "munit.h"
+
+MunitResult test_bytes_initializer(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    bytes_t *bytes = LOCAL_BYTES(16);
+    munit_assert_size(bytes->len, ==, 0);
+    munit_assert_size(bytes->cap, ==, 16);
+    for (size_t i = 0; i < 16; ++i)
+        munit_assert_uint8(bytes->buffer[i], ==, 0);
+    return MUNIT_OK;
+}
+
+MunitResult test_bytes_append_uint8(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    bytes_t *bytes = LOCAL_BYTES(16);
+    munit_assert_size(bytes->len, ==, 0);
+    munit_assert_size(bytes->cap, ==, 16);
+    for (size_t i = 0; i < 16; ++i) {
+        error_t *err = bytes_append_uint8(bytes, (uint8_t)i);
+        munit_assert_null(err);
+        munit_assert_uint8(bytes->buffer[i], ==, (uint8_t)i);
+    }
+
+    error_t *err = bytes_append_uint8(bytes, 0xFF);
+    munit_assert_ptr(err, ==, err_bytes_no_capacity);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_bytes_append_array(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    bytes_t *bytes = LOCAL_BYTES(16);
+    munit_assert_size(bytes->len, ==, 0);
+    munit_assert_size(bytes->cap, ==, 16);
+
+    uint8_t test_array[] = {0x01, 0x02, 0x03, 0x04, 0x05};
+    size_t array_len = sizeof(test_array) / sizeof(test_array[0]);
+    error_t *err = bytes_append_array(bytes, array_len, test_array);
+    munit_assert_null(err);
+    munit_assert_size(bytes->len, ==, array_len);
+
+    for (size_t i = 0; i < array_len; ++i) {
+        munit_assert_uint8(bytes->buffer[i], ==, test_array[i]);
+    }
+
+    uint8_t second_array[] = {0x06, 0x07, 0x08};
+    size_t second_len = sizeof(second_array) / sizeof(second_array[0]);
+    err = bytes_append_array(bytes, second_len, second_array);
+    munit_assert_null(err);
+    munit_assert_size(bytes->len, ==, array_len + second_len);
+    for (size_t i = 0; i < second_len; ++i) {
+        munit_assert_uint8(bytes->buffer[array_len + i], ==, second_array[i]);
+    }
+
+    uint8_t overflow_array[10] = {0}; // Array that would exceed capacity
+    err = bytes_append_array(bytes, sizeof(overflow_array), overflow_array);
+    munit_assert_ptr(err, ==, err_bytes_no_capacity);
+    munit_assert_size(bytes->len, ==, array_len + second_len);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_bytes_append_bytes(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    bytes_t *src = LOCAL_BYTES(8);
+    bytes_t *dst = LOCAL_BYTES(16);
+
+    // Fill source bytes with test data
+    for (uint8_t i = 0; i < 5; ++i) {
+        error_t *err = bytes_append_uint8(src, i + 1);
+        munit_assert_null(err);
+    }
+    munit_assert_size(src->len, ==, 5);
+
+    // Append source to destination
+    error_t *err = bytes_append_bytes(dst, src);
+    munit_assert_null(err);
+    munit_assert_size(dst->len, ==, src->len);
+
+    // Verify destination contents match source
+    for (size_t i = 0; i < src->len; ++i) {
+        munit_assert_uint8(dst->buffer[i], ==, src->buffer[i]);
+    }
+
+    // Fill source with more data and append again
+    for (uint8_t i = 0; i < 3; ++i) {
+        err = bytes_append_uint8(src, i + 6);
+        munit_assert_null(err);
+    }
+    munit_assert_size(src->len, ==, 8);
+
+    // Append updated source
+    err = bytes_append_bytes(dst, src);
+    munit_assert_null(err);
+    munit_assert_size(dst->len, ==, 13); // 5 + 8
+
+    // Test capacity boundary
+    src->len = 4; // manually set length to barely not fit
+    err = bytes_append_bytes(dst, src);
+    munit_assert_ptr(err, ==, err_bytes_no_capacity);
+    munit_assert_size(dst->len, ==, 13); // Length unchanged after error
+
+    return MUNIT_OK;
+}
+MunitResult test_bytes_append_uint16(const MunitParameter params[], void *data) {
+    bytes_t *bytes = LOCAL_BYTES(16);
+    munit_assert_size(bytes->len, ==, 0);
+    munit_assert_size(bytes->cap, ==, 16);
+
+    bytes_append_uint16(bytes, 0xFFAA);
+    munit_assert_size(bytes->len, ==, 2);
+    munit_assert_uint8(bytes->buffer[0], ==, 0xAA);
+    munit_assert_uint8(bytes->buffer[1], ==, 0xFF);
+
+    return MUNIT_OK;
+}
+MunitResult test_bytes_append_uint32(const MunitParameter params[], void *data) {
+    bytes_t *bytes = LOCAL_BYTES(16);
+    munit_assert_size(bytes->len, ==, 0);
+    munit_assert_size(bytes->cap, ==, 16);
+
+    bytes_append_uint32(bytes, 0xAABBCCDD);
+    munit_assert_size(bytes->len, ==, 4);
+    munit_assert_uint8(bytes->buffer[0], ==, 0xDD);
+    munit_assert_uint8(bytes->buffer[1], ==, 0xCC);
+    munit_assert_uint8(bytes->buffer[2], ==, 0xBB);
+    munit_assert_uint8(bytes->buffer[3], ==, 0xAA);
+    return MUNIT_OK;
+}
+MunitResult test_bytes_append_uint64(const MunitParameter params[], void *data) {
+    bytes_t *bytes = LOCAL_BYTES(16);
+    munit_assert_size(bytes->len, ==, 0);
+    munit_assert_size(bytes->cap, ==, 16);
+
+    bytes_append_uint64(bytes, 0xAABBCCDDEEFF9988);
+    munit_assert_size(bytes->len, ==, 8);
+    munit_assert_uint8(bytes->buffer[0], ==, 0x88);
+    munit_assert_uint8(bytes->buffer[1], ==, 0x99);
+    munit_assert_uint8(bytes->buffer[2], ==, 0xFF);
+    munit_assert_uint8(bytes->buffer[3], ==, 0xEE);
+    munit_assert_uint8(bytes->buffer[4], ==, 0xDD);
+    munit_assert_uint8(bytes->buffer[5], ==, 0xCC);
+    munit_assert_uint8(bytes->buffer[6], ==, 0xBB);
+    munit_assert_uint8(bytes->buffer[7], ==, 0xAA);
+    return MUNIT_OK;
+}
+
+MunitTest bytes_tests[] = {
+    {"/initializer",   test_bytes_initializer,   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/append_uint8",  test_bytes_append_uint8,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/append_array",  test_bytes_append_array,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/append_bytes",  test_bytes_append_bytes,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/append_uint16", test_bytes_append_uint16, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/append_uint32", test_bytes_append_uint32, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/append_uint64", test_bytes_append_uint64, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {nullptr,          nullptr,                  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
+};
@@ -0,0 +1,65 @@
+lbl_0:  ; 65 symbols used for testing growing the symbols table
+lbl_1:
+lbl_2:
+lbl_3:
+lbl_4:
+lbl_5:
+lbl_6:
+lbl_7:
+lbl_8:
+lbl_9:
+lbl_10:
+lbl_11:
+lbl_12:
+lbl_13:
+lbl_14:
+lbl_15:
+lbl_16:
+lbl_17:
+lbl_18:
+lbl_19:
+lbl_20:
+lbl_21:
+lbl_22:
+lbl_23:
+lbl_24:
+lbl_25:
+lbl_26:
+lbl_27:
+lbl_28:
+lbl_29:
+lbl_30:
+lbl_31:
+lbl_32:
+lbl_33:
+lbl_34:
+lbl_35:
+lbl_36:
+lbl_37:
+lbl_38:
+lbl_39:
+lbl_40:
+lbl_41:
+lbl_42:
+lbl_43:
+lbl_44:
+lbl_45:
+lbl_46:
+lbl_47:
+lbl_48:
+lbl_49:
+lbl_50:
+lbl_51:
+lbl_52:
+lbl_53:
+lbl_54:
+lbl_55:
+lbl_56:
+lbl_57:
+lbl_58:
+lbl_59:
+lbl_60:
+lbl_61:
+lbl_62:
+lbl_63:
+lbl_64:
@@ -0,0 +1,5 @@
+; regression test for two issues:
+;  - parsing two zero operand instructions in a row
+;  - a zero operand instruction just before eof
+    syscall
+    ret
@@ -0,0 +1,5 @@
+; sample program with trivia on the head of the tokenlist
+
+_start:
+    xor rax, rax
+    call exit
@@ -0,0 +1,12 @@
+.import test
+.export test
+test:
+    call test
+.import more
+.export more
+more:
+    call more
+.import other
+.export other
+other:
+    call other
@@ -2,6 +2,9 @@

 ; Small valid code snippet that should contain all different AST nodes

+.export _start
+.import exit
+
 _start:
    mov eax, ebx
    lea eax, [eax + ebx * 4 + 8]
@@ -19,3 +22,5 @@ _start:
    push 0xffff:64
    push 0o777:16
    push 0b0001:16
+    mov rax, 0
+    call exit
@@ -0,0 +1,896 @@
+#include "../src/lexer.h"
+#include "../src/error.h"
+#include "munit.h"
+#include <string.h>
+
+void lexer_setup_memory_test(lexer_t *lex, const char *input) {
+    munit_assert_null(lex->fp);
+    FILE *stream = fmemopen((void *)input, strlen(input), "rb");
+    munit_assert_not_null(stream);
+    lex->fp = stream;
+    lex->line_number = 0;
+    lex->character_number = 0;
+    lex->buffer_count = 0;
+}
+
+void lexer_expect_one_token(lexer_t *lex, lexer_token_id_t id, const char *value, size_t line, size_t column) {
+    lexer_token_t token = {};
+
+    error_t *err = lexer_next(lex, &token);
+    munit_assert_null(err);
+
+    munit_assert_int(token.id, ==, id);
+    munit_assert_string_equal(token.value, value);
+    munit_assert_int(token.line_number, ==, line);
+    munit_assert_int(token.character_number, ==, column);
+    lexer_token_cleanup(&token);
+}
+
+void lexer_expect_eof(lexer_t *lex) {
+    lexer_token_t token = {};
+    error_t *err = lexer_next(lex, &token);
+    munit_assert_ptr_equal(err, err_eof);
+}
+
+void lexer_test_one_token(lexer_token_id_t id, const char *value) {
+    lexer_t lex = {};
+    lexer_setup_memory_test(&lex, value);
+    lexer_expect_one_token(&lex, id, value, 0, 0);
+    lexer_expect_eof(&lex);
+    lexer_close(&lex);
+}
+
+MunitResult test_lexer_identifier(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_IDENTIFIER, "identifier");
+    lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier");
+    lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier123_55");
+    return MUNIT_OK;
+}
+
+typedef struct token_data {
+    lexer_token_id_t id;
+    char *value;
+    size_t line;
+    size_t column;
+} token_data_t;
+
+typedef struct boundary {
+    const char *input;
+    token_data_t first;
+    token_data_t second;
+} boundary_t;
+
+void test_lexer_boundary(boundary_t boundaries[]) {
+    for (size_t i = 0; boundaries[i].input; ++i) {
+        auto boundary = boundaries[i];
+        auto first = boundary.first;
+        auto second = boundary.second;
+
+        lexer_t lex = {};
+        lexer_setup_memory_test(&lex, boundary.input);
+        lexer_expect_one_token(&lex, first.id, first.value, first.line, first.column);
+        lexer_expect_one_token(&lex, second.id, second.value, second.line, second.column);
+        lexer_expect_eof(&lex);
+        lexer_close(&lex);
+    }
+}
+
+MunitResult test_lexer_identifier_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"id:",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COLON, ":", 0, 2}         },
+        {"id[",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_LBRACKET, "[", 0, 2}      },
+        {"id]",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_RBRACKET, "]", 0, 2}      },
+        {"id+",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_PLUS, "+", 0, 2}          },
+        {"id-",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_MINUS, "-", 0, 2}         },
+        {"id*",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_ASTERISK, "*", 0, 2}      },
+        {"id.",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_DOT, ".", 0, 2}           },
+        {"id;comment", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COMMENT, ";comment", 0, 2}},
+        {"id\n",       {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 2}      },
+        {"id\r\n",     {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 2}    },
+        {"id ",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 2}    },
+        {"id\t",       {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 2}   },
+        {nullptr,      {},                             {}                               },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_decimal(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_DECIMAL, "123");
+    lexer_test_one_token(TOKEN_DECIMAL, "0");
+    lexer_test_one_token(TOKEN_DECIMAL, "42");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_decimal_with_suffix(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_DECIMAL, "123:8");
+    lexer_test_one_token(TOKEN_DECIMAL, "0:16");
+    lexer_test_one_token(TOKEN_DECIMAL, "42:32");
+    lexer_test_one_token(TOKEN_DECIMAL, "69:64");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_hexadecimal(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x0");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xabcdef");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABCDEF");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_hexadecimal_with_suffix(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123:8");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD:16");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABC:32");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xffff:64");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_octal(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_OCTAL, "0o777");
+    lexer_test_one_token(TOKEN_OCTAL, "0o0");
+    lexer_test_one_token(TOKEN_OCTAL, "0o123");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_octal_with_suffix(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_OCTAL, "0o777:8");
+    lexer_test_one_token(TOKEN_OCTAL, "0o123:16");
+    lexer_test_one_token(TOKEN_OCTAL, "0o777:32");
+    lexer_test_one_token(TOKEN_OCTAL, "0o123:64");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_binary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_BINARY, "0b101");
+    lexer_test_one_token(TOKEN_BINARY, "0b0");
+    lexer_test_one_token(TOKEN_BINARY, "0b1");
+    lexer_test_one_token(TOKEN_BINARY, "0b01010101");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_binary_with_suffix(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_BINARY, "0b101:8");
+    lexer_test_one_token(TOKEN_BINARY, "0b0:16");
+    lexer_test_one_token(TOKEN_BINARY, "0b1:32");
+    lexer_test_one_token(TOKEN_BINARY, "0b01010101:64");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_colon(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_COLON, ":");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_comma(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_COMMA, ",");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_lbracket(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_LBRACKET, "[");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_rbracket(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_RBRACKET, "]");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_plus(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_PLUS, "+");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_minus(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_MINUS, "-");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_asterisk(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_ASTERISK, "*");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_dot(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_DOT, ".");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_comment(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_COMMENT, ";This is a comment");
+    lexer_test_one_token(TOKEN_COMMENT, "; Another comment");
+    lexer_test_one_token(TOKEN_COMMENT, ";");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_whitespace(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_WHITESPACE, " ");
+    lexer_test_one_token(TOKEN_WHITESPACE, "  ");
+    lexer_test_one_token(TOKEN_WHITESPACE, "\t");
+    lexer_test_one_token(TOKEN_WHITESPACE, " \t ");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_newlines(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    // Test simple newline
+    lexer_t lex = {};
+    lexer_setup_memory_test(&lex, "\n");
+    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 0);
+    lexer_expect_eof(&lex);
+    lexer_close(&lex);
+
+    // Test Windows-style newline
+    lexer_t lex2 = {};
+    lexer_setup_memory_test(&lex2, "\r\n");
+    lexer_expect_one_token(&lex2, TOKEN_NEWLINE, "\r\n", 0, 0);
+    lexer_expect_eof(&lex2);
+    lexer_close(&lex2);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_line_numbers(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    lexer_t lex = {};
+    lexer_setup_memory_test(&lex, "a\nb\nc");
+
+    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "a", 0, 0);
+    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 1);
+    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "b", 1, 0);
+    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 1, 1);
+    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "c", 2, 0);
+    lexer_expect_eof(&lex);
+    lexer_close(&lex);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_decimal_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"123,",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMA, ",", 0, 3}      },
+        {"123:",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COLON, ":", 0, 3}      },
+        {"123[",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 3}   },
+        {"123]",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 3}   },
+        {"123+",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_PLUS, "+", 0, 3}       },
+        {"123-",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_MINUS, "-", 0, 3}      },
+        {"123*",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 3}   },
+        {"123.",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_DOT, ".", 0, 3}        },
+        {"123;",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMENT, ";", 0, 3}    },
+        {"123\n",   {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 3}   },
+        {"123\r\n", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 3} },
+        {"123 ",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 3} },
+        {"123\t",   {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 3}},
+        {nullptr,   {},                           {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_hexadecimal_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"0x123,",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
+        {"0x123:",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
+        {"0x123[",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
+        {"0x123]",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
+        {"0x123+",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
+        {"0x123-",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
+        {"0x123*",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
+        {"0x123.",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
+        {"0x123;",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
+        {"0x123\n",   {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
+        {"0x123\r\n", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
+        {"0x123 ",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
+        {"0x123\t",   {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
+        {nullptr,     {},                                 {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_octal_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"0o123,",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
+        {"0o123:",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
+        {"0o123[",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
+        {"0o123]",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
+        {"0o123+",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
+        {"0o123-",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
+        {"0o123*",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
+        {"0o123.",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
+        {"0o123;",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
+        {"0o123\n",   {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
+        {"0o123\r\n", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
+        {"0o123 ",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
+        {"0o123\t",   {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
+        {nullptr,     {},                           {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_binary_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"0b101,",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
+        {"0b101:",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
+        {"0b101[",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
+        {"0b101]",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
+        {"0b101+",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
+        {"0b101-",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
+        {"0b101*",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
+        {"0b101.",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
+        {"0b101;",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
+        {"0b101\n",   {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
+        {"0b101\r\n", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
+        {"0b101 ",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
+        {"0b101\t",   {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
+        {nullptr,     {},                            {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_colon_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {":,",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {"::",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {":[",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {":]",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {":+",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {":-",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {":*",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {":.",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {":;",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {":\n",   {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {":\r\n", {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {": ",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {":\t",   {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                       {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_comma_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {",,",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {",:",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {",[",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {",]",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {",+",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {",-",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {",*",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {",.",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {",;",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {",\n",   {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {",\r\n", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {", ",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {",\t",   {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                       {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_lbracket_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"[,",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {"[:",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {"[[",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {"[]",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {"[+",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {"[-",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {"[*",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {"[.",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {"[;",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {"[\n",   {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {"[\r\n", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {"[ ",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {"[\t",   {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                          {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_rbracket_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"],",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {"]:",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {"][",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {"]]",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {"]+",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {"]-",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {"]*",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {"].",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {"];",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {"]\n",   {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {"]\r\n", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {"] ",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {"]\t",   {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                          {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_plus_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"+,",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {"+:",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {"+[",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {"+]",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {"++",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {"+-",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {"+*",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {"+.",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {"+;",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {"+\n",   {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {"+\r\n", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {"+ ",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {"+\t",   {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                      {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_minus_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"-,",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {"-:",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {"-[",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {"-]",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {"-+",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {"--",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {"-*",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {"-.",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {"-;",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {"-\n",   {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {"-\r\n", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {"- ",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {"-\t",   {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                       {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_asterisk_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"*,",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {"*:",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {"*[",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {"*]",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {"*+",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {"*-",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {"**",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {"*.",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {"*;",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {"*\n",   {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {"*\r\n", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {"* ",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {"*\t",   {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                          {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_dot_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {".,",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {".:",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {".[",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {".]",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {".+",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {".-",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {".*",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {"..",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {".;",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {".\n",   {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {".\r\n", {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {". ",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {".\t",   {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                     {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_comment_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {";comment\n",   {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8}  },
+        {";comment\r\n", {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 8}},
+        {nullptr,        {},                                {}                           },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_whitespace_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {" ,",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMA, ",", 0, 1}     },
+        {" :",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COLON, ":", 0, 1}     },
+        {" [",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}  },
+        {" ]",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}  },
+        {" +",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_PLUS, "+", 0, 1}      },
+        {" -",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_MINUS, "-", 0, 1}     },
+        {" *",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}  },
+        {" .",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_DOT, ".", 0, 1}       },
+        {" ;",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}   },
+        {" \n",   {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}  },
+        {" \r\n", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1}},
+        {nullptr, {},                            {}                           },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_newline_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"\n,",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0}      },
+        {"\n:",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COLON, ":", 1, 0}      },
+        {"\n[",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0}   },
+        {"\n]",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0}   },
+        {"\n+",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0}       },
+        {"\n-",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0}      },
+        {"\n*",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0}   },
+        {"\n.",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_DOT, ".", 1, 0}        },
+        {"\n;",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0}    },
+        {"\n\n",   {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0}   },
+        {"\n\r\n", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} },
+        {"\n ",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} },
+        {"\n\t",   {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}},
+        {nullptr,  {},                          {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_crlf_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"\r\n,",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0}      },
+        {"\r\n:",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COLON, ":", 1, 0}      },
+        {"\r\n[",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0}   },
+        {"\r\n]",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0}   },
+        {"\r\n+",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0}       },
+        {"\r\n-",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0}      },
+        {"\r\n*",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0}   },
+        {"\r\n.",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_DOT, ".", 1, 0}        },
+        {"\r\n;",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0}    },
+        {"\r\n\n",   {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0}   },
+        {"\r\n\r\n", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} },
+        {"\r\n ",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} },
+        {"\r\n\t",   {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}},
+        {nullptr,    {},                            {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_number_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"0x123:8,",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_COMMA, ",", 0, 7}      },
+        {"0x123:16:",    {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_COLON, ":", 0, 8}      },
+        {"0o777:32[",    {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_LBRACKET, "[", 0, 8}   },
+        {"0b101:64]",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_RBRACKET, "]", 0, 8}   },
+        {"0x123:8+",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_PLUS, "+", 0, 7}       },
+        {"0x123:16-",    {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_MINUS, "-", 0, 8}      },
+        {"0o777:32*",    {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_ASTERISK, "*", 0, 8}   },
+        {"0b101:64.",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_DOT, ".", 0, 8}        },
+        {"0x123:8;",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_COMMENT, ";", 0, 7}    },
+        {"0x123:16\n",   {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8}   },
+        {"0o777:32\r\n", {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_NEWLINE, "\r\n", 0, 8} },
+        {"0b101:64 ",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_WHITESPACE, " ", 0, 8} },
+        {"0x123:8\t",    {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_WHITESPACE, "\t", 0, 7}},
+        {nullptr,        {},                                    {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_maximum_length_numbers(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    char *numbers[] = {
+        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "9999999999999999999988",
+        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "9999999999999999998:64",
+        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "9999999999999999999988",
+        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "9999999999999999998:64",
+        nullptr,
+    };
+
+    for (size_t i = 0; numbers[i]; ++i) {
+        auto number = numbers[i];
+        munit_assert_size(128, ==, strlen(number));
+        lexer_t lex = {};
+        lexer_token_t token = {};
+        lexer_setup_memory_test(&lex, number);
+        lexer_next(&lex, &token);
+        munit_assert_true(token.id == TOKEN_DECIMAL || token.id == TOKEN_HEXADECIMAL);
+        munit_assert_size(128, ==, strlen(token.value));
+        lexer_token_cleanup(&token);
+        lexer_close(&lex);
+    }
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_too_long_numbers(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    char *numbers[] = {
+        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "99999999999999999999988",
+        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "99999999999999999999988",
+        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "99999999999999999998:64",
+        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "99999999999999999998:64",
+    };
+
+    // Without suffix we expect 128 characters and then failure
+    for (size_t i = 0; i < 2; ++i) {
+        auto number = numbers[i];
+        munit_assert_size(129, ==, strlen(number));
+        lexer_t lex = {};
+        lexer_token_t token = {};
+        lexer_setup_memory_test(&lex, number);
+        lexer_next(&lex, &token);
+        munit_assert_int(TOKEN_ERROR, ==, token.id);
+        munit_assert_size(128, ==, strlen(token.value));
+        lexer_token_cleanup(&token);
+        lexer_close(&lex);
+    }
+
+    // With suffix we fail at the suffix boundary
+    for (size_t i = 2; i < 4; ++i) {
+        auto number = numbers[i];
+        munit_assert_size(129, ==, strlen(number));
+        lexer_t lex = {};
+        lexer_token_t token = {};
+        lexer_setup_memory_test(&lex, number);
+        lexer_next(&lex, &token);
+        munit_assert_int(TOKEN_ERROR, ==, token.id);
+        munit_assert_size(128, >=, strlen(token.value));
+        lexer_token_cleanup(&token);
+
+        lexer_expect_one_token(&lex, TOKEN_COLON, ":", 0, 126);
+        lexer_expect_one_token(&lex, TOKEN_DECIMAL, "64", 0, 127);
+        lexer_close(&lex);
+    }
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_max_whitespace_length(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    char whitespace[1025];
+    memset(whitespace, ' ', 1024);
+    whitespace[1024] = '\0';
+
+    munit_assert_size(1024, ==, strlen(whitespace));
+    lexer_t lex = {};
+    lexer_token_t token = {};
+    lexer_setup_memory_test(&lex, whitespace);
+    lexer_next(&lex, &token);
+    munit_assert_int(TOKEN_WHITESPACE, ==, token.id);
+    munit_assert_size(1024, ==, strlen(token.value));
+    lexer_token_cleanup(&token);
+    lexer_close(&lex);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_too_long_whitespace(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    char whitespace[1026];
+    memset(whitespace, ' ', 1025);
+    whitespace[1025] = '\0';
+
+    munit_assert_size(1025, ==, strlen(whitespace));
+    lexer_t lex = {};
+    lexer_token_t token = {};
+    lexer_setup_memory_test(&lex, whitespace);
+    lexer_next(&lex, &token);
+    munit_assert_int(TOKEN_ERROR, ==, token.id);
+    munit_assert_size(1024, ==, strlen(token.value));
+    lexer_token_cleanup(&token);
+
+    lexer_expect_one_token(&lex, TOKEN_WHITESPACE, " ", 0, 1024);
+    lexer_close(&lex);
+
+    return MUNIT_OK;
+}
+
+MunitTest lexer_tests[] = {
+    {"/identifier",              test_lexer_identifier,              nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/identifier_boundary",     test_lexer_identifier_boundary,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/decimal",                 test_lexer_decimal,                 nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/decimal_boundary",        test_lexer_decimal_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/hexadecimal",             test_lexer_hexadecimal,             nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/hexadecimal_with_suffix", test_lexer_hexadecimal_with_suffix, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/hexadecimal_boundary",    test_lexer_hexadecimal_boundary,    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/octal",                   test_lexer_octal,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/octal_with_suffix",       test_lexer_octal_with_suffix,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/octal_boundary",          test_lexer_octal_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/binary",                  test_lexer_binary,                  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/binary_with_suffix",      test_lexer_binary_with_suffix,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/binary_boundary",         test_lexer_binary_boundary,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/number_boundary",         test_lexer_number_boundary,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/colon",                   test_lexer_colon,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/colon_boundary",          test_lexer_colon_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/comma",                   test_lexer_comma,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/comma_boundary",          test_lexer_comma_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/lbracket",                test_lexer_lbracket,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/lbracket_boundary",       test_lexer_lbracket_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/rbracket",                test_lexer_rbracket,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/rbracket_boundary",       test_lexer_rbracket_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/plus",                    test_lexer_plus,                    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/plus_boundary",           test_lexer_plus_boundary,           nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/minus",                   test_lexer_minus,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/minus_boundary",          test_lexer_minus_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/asterisk",                test_lexer_asterisk,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/asterisk_boundary",       test_lexer_asterisk_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/dot",                     test_lexer_dot,                     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/dot_boundary",            test_lexer_dot_boundary,            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/comment",                 test_lexer_comment,                 nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/comment_boundary",        test_lexer_comment_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/whitespace",              test_lexer_whitespace,              nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/whitespace_boundary",     test_lexer_whitespace_boundary,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/newlines",                test_lexer_newlines,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/newline_boundary",        test_lexer_newline_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/crlf_boundary",           test_lexer_crlf_boundary,           nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/line_numbers",            test_lexer_line_numbers,            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/maximum_length_numbers",  test_lexer_maximum_length_numbers,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/too_long_numbers",        test_lexer_too_long_numbers,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/max_whitespace_length",   test_lexer_max_whitespace_length,   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/too_long_whitespace",     test_lexer_too_long_whitespace,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {nullptr,                    nullptr,                            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
+};
@@ -0,0 +1,22 @@
+#include "munit.h"
+
+extern MunitTest ast_tests[];
+extern MunitTest lexer_tests[];
+extern MunitTest regression_tests[];
+extern MunitTest symbols_tests[];
+extern MunitTest bytes_tests[];
+
+int main(int argc, char *argv[MUNIT_ARRAY_PARAM(argc + 1)]) {
+    MunitSuite suites[] = {
+        {"/regression", regression_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
+        {"/ast",        ast_tests,        nullptr, 1, MUNIT_SUITE_OPTION_NONE},
+        {"/lexer",      lexer_tests,      nullptr, 1, MUNIT_SUITE_OPTION_NONE},
+        {"/symbols",    symbols_tests,    nullptr, 1, MUNIT_SUITE_OPTION_NONE},
+        {"/bytes",      bytes_tests,      nullptr, 1, MUNIT_SUITE_OPTION_NONE},
+        {nullptr,       nullptr,          nullptr, 0, MUNIT_SUITE_OPTION_NONE},
+    };
+
+    MunitSuite master_suite = {"/oas", nullptr, suites, 1, MUNIT_SUITE_OPTION_NONE};
+
+    return munit_suite_main(&master_suite, nullptr, argc, argv);
+}
@@ -0,0 +1,535 @@
+/* µnit Testing Framework
+ * Copyright (c) 2013-2017 Evan Nemerson <evan@nemerson.com>
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(MUNIT_H)
+#define MUNIT_H
+
+#include <stdarg.h>
+#include <stdlib.h>
+
+#define MUNIT_VERSION(major, minor, revision) \
+  (((major) << 16) | ((minor) << 8) | (revision))
+
+#define MUNIT_CURRENT_VERSION MUNIT_VERSION(0, 4, 1)
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+#  define munit_int8_t   __int8
+#  define munit_uint8_t  unsigned __int8
+#  define munit_int16_t  __int16
+#  define munit_uint16_t unsigned __int16
+#  define munit_int32_t  __int32
+#  define munit_uint32_t unsigned __int32
+#  define munit_int64_t  __int64
+#  define munit_uint64_t unsigned __int64
+#else
+#  include <stdint.h>
+#  define munit_int8_t   int8_t
+#  define munit_uint8_t  uint8_t
+#  define munit_int16_t  int16_t
+#  define munit_uint16_t uint16_t
+#  define munit_int32_t  int32_t
+#  define munit_uint32_t uint32_t
+#  define munit_int64_t  int64_t
+#  define munit_uint64_t uint64_t
+#endif
+
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#  if !defined(PRIi8)
+#    define PRIi8 "i"
+#  endif
+#  if !defined(PRIi16)
+#    define PRIi16 "i"
+#  endif
+#  if !defined(PRIi32)
+#    define PRIi32 "i"
+#  endif
+#  if !defined(PRIi64)
+#    define PRIi64 "I64i"
+#  endif
+#  if !defined(PRId8)
+#    define PRId8 "d"
+#  endif
+#  if !defined(PRId16)
+#    define PRId16 "d"
+#  endif
+#  if !defined(PRId32)
+#    define PRId32 "d"
+#  endif
+#  if !defined(PRId64)
+#    define PRId64 "I64d"
+#  endif
+#  if !defined(PRIx8)
+#    define PRIx8 "x"
+#  endif
+#  if !defined(PRIx16)
+#    define PRIx16 "x"
+#  endif
+#  if !defined(PRIx32)
+#    define PRIx32 "x"
+#  endif
+#  if !defined(PRIx64)
+#    define PRIx64 "I64x"
+#  endif
+#  if !defined(PRIu8)
+#    define PRIu8 "u"
+#  endif
+#  if !defined(PRIu16)
+#    define PRIu16 "u"
+#  endif
+#  if !defined(PRIu32)
+#    define PRIu32 "u"
+#  endif
+#  if !defined(PRIu64)
+#    define PRIu64 "I64u"
+#  endif
+#else
+#  include <inttypes.h>
+#endif
+
+#if !defined(munit_bool)
+#  if defined(bool)
+#    define munit_bool bool
+#  elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+#    define munit_bool _Bool
+#  else
+#    define munit_bool int
+#  endif
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if defined(__GNUC__)
+#  define MUNIT_LIKELY(expr) (__builtin_expect ((expr), 1))
+#  define MUNIT_UNLIKELY(expr) (__builtin_expect ((expr), 0))
+#  define MUNIT_UNUSED __attribute__((__unused__))
+#else
+#  define MUNIT_LIKELY(expr) (expr)
+#  define MUNIT_UNLIKELY(expr) (expr)
+#  define MUNIT_UNUSED
+#endif
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__PGI)
+#  define MUNIT_ARRAY_PARAM(name) name
+#else
+#  define MUNIT_ARRAY_PARAM(name)
+#endif
+
+#if !defined(_WIN32)
+#  define MUNIT_SIZE_MODIFIER "z"
+#  define MUNIT_CHAR_MODIFIER "hh"
+#  define MUNIT_SHORT_MODIFIER "h"
+#else
+#  if defined(_M_X64) || defined(__amd64__)
+#    define MUNIT_SIZE_MODIFIER "I64"
+#  else
+#    define MUNIT_SIZE_MODIFIER ""
+#  endif
+#  define MUNIT_CHAR_MODIFIER ""
+#  define MUNIT_SHORT_MODIFIER ""
+#endif
+
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+#  define MUNIT_NO_RETURN _Noreturn
+#elif defined(__GNUC__)
+#  define MUNIT_NO_RETURN __attribute__((__noreturn__))
+#elif defined(_MSC_VER)
+#  define MUNIT_NO_RETURN __declspec(noreturn)
+#else
+#  define MUNIT_NO_RETURN
+#endif
+
+#if defined(_MSC_VER) &&  (_MSC_VER >= 1500)
+#  define MUNIT_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127))
+#  define MUNIT_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop))
+#else
+#  define MUNIT_PUSH_DISABLE_MSVC_C4127_
+#  define MUNIT_POP_DISABLE_MSVC_C4127_
+#endif
+
+typedef enum {
+  MUNIT_LOG_DEBUG,
+  MUNIT_LOG_INFO,
+  MUNIT_LOG_WARNING,
+  MUNIT_LOG_ERROR
+} MunitLogLevel;
+
+#if defined(__GNUC__) && !defined(__MINGW32__)
+#  define MUNIT_PRINTF(string_index, first_to_check) __attribute__((format (printf, string_index, first_to_check)))
+#else
+#  define MUNIT_PRINTF(string_index, first_to_check)
+#endif
+
+MUNIT_PRINTF(4, 5)
+void munit_logf_ex(MunitLogLevel level, const char* filename, int line, const char* format, ...);
+
+#define munit_logf(level, format, ...) \
+  munit_logf_ex(level, __FILE__, __LINE__, format, __VA_ARGS__)
+
+#define munit_log(level, msg) \
+  munit_logf(level, "%s", msg)
+
+MUNIT_NO_RETURN
+MUNIT_PRINTF(3, 4)
+void munit_errorf_ex(const char* filename, int line, const char* format, ...);
+
+#define munit_errorf(format, ...) \
+  munit_errorf_ex(__FILE__, __LINE__, format, __VA_ARGS__)
+
+#define munit_error(msg) \
+  munit_errorf("%s", msg)
+
+#define munit_assert(expr) \
+  do { \
+    if (!MUNIT_LIKELY(expr)) { \
+      munit_error("assertion failed: " #expr); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_true(expr) \
+  do { \
+    if (!MUNIT_LIKELY(expr)) { \
+      munit_error("assertion failed: " #expr " is not true"); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_false(expr) \
+  do { \
+    if (!MUNIT_LIKELY(!(expr))) { \
+      munit_error("assertion failed: " #expr " is not false"); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_type_full(prefix, suffix, T, fmt, a, op, b)   \
+  do { \
+    T munit_tmp_a_ = (a); \
+    T munit_tmp_b_ = (b); \
+    if (!(munit_tmp_a_ op munit_tmp_b_)) {                               \
+      munit_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")", \
+                   #a, #op, #b, munit_tmp_a_, #op, munit_tmp_b_); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_type(T, fmt, a, op, b) \
+  munit_assert_type_full("", "", T, fmt, a, op, b)
+
+#define munit_assert_char(a, op, b) \
+  munit_assert_type_full("'\\x", "'", char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b)
+#define munit_assert_uchar(a, op, b) \
+  munit_assert_type_full("'\\x", "'", unsigned char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b)
+#define munit_assert_short(a, op, b) \
+  munit_assert_type(short, MUNIT_SHORT_MODIFIER "d", a, op, b)
+#define munit_assert_ushort(a, op, b) \
+  munit_assert_type(unsigned short, MUNIT_SHORT_MODIFIER "u", a, op, b)
+#define munit_assert_int(a, op, b) \
+  munit_assert_type(int, "d", a, op, b)
+#define munit_assert_uint(a, op, b) \
+  munit_assert_type(unsigned int, "u", a, op, b)
+#define munit_assert_long(a, op, b) \
+  munit_assert_type(long int, "ld", a, op, b)
+#define munit_assert_ulong(a, op, b) \
+  munit_assert_type(unsigned long int, "lu", a, op, b)
+#define munit_assert_llong(a, op, b) \
+  munit_assert_type(long long int, "lld", a, op, b)
+#define munit_assert_ullong(a, op, b) \
+  munit_assert_type(unsigned long long int, "llu", a, op, b)
+
+#define munit_assert_size(a, op, b) \
+  munit_assert_type(size_t, MUNIT_SIZE_MODIFIER "u", a, op, b)
+
+#define munit_assert_float(a, op, b) \
+  munit_assert_type(float, "f", a, op, b)
+#define munit_assert_double(a, op, b) \
+  munit_assert_type(double, "g", a, op, b)
+#define munit_assert_ptr(a, op, b) \
+  munit_assert_type(const void*, "p", a, op, b)
+
+#define munit_assert_int8(a, op, b)             \
+  munit_assert_type(munit_int8_t, PRIi8, a, op, b)
+#define munit_assert_uint8(a, op, b) \
+  munit_assert_type(munit_uint8_t, PRIu8, a, op, b)
+#define munit_assert_int16(a, op, b) \
+  munit_assert_type(munit_int16_t, PRIi16, a, op, b)
+#define munit_assert_uint16(a, op, b) \
+  munit_assert_type(munit_uint16_t, PRIu16, a, op, b)
+#define munit_assert_int32(a, op, b) \
+  munit_assert_type(munit_int32_t, PRIi32, a, op, b)
+#define munit_assert_uint32(a, op, b) \
+  munit_assert_type(munit_uint32_t, PRIu32, a, op, b)
+#define munit_assert_int64(a, op, b) \
+  munit_assert_type(munit_int64_t, PRIi64, a, op, b)
+#define munit_assert_uint64(a, op, b) \
+  munit_assert_type(munit_uint64_t, PRIu64, a, op, b)
+
+#define munit_assert_double_equal(a, b, precision) \
+  do { \
+    const double munit_tmp_a_ = (a); \
+    const double munit_tmp_b_ = (b); \
+    const double munit_tmp_diff_ = ((munit_tmp_a_ - munit_tmp_b_) < 0) ? \
+      -(munit_tmp_a_ - munit_tmp_b_) : \
+      (munit_tmp_a_ - munit_tmp_b_); \
+    if (MUNIT_UNLIKELY(munit_tmp_diff_ > 1e-##precision)) { \
+      munit_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)", \
+		   #a, #b, munit_tmp_a_, munit_tmp_b_); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#include <string.h>
+#define munit_assert_string_equal(a, b) \
+  do { \
+    const char* munit_tmp_a_ = a; \
+    const char* munit_tmp_b_ = b; \
+    if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) != 0)) { \
+      munit_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")", \
+                   #a, #b, munit_tmp_a_, munit_tmp_b_); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_string_not_equal(a, b) \
+  do { \
+    const char* munit_tmp_a_ = a; \
+    const char* munit_tmp_b_ = b; \
+    if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) == 0)) { \
+      munit_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")", \
+                   #a, #b, munit_tmp_a_, munit_tmp_b_); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_memory_equal(size, a, b) \
+  do { \
+    const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \
+    const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \
+    const size_t munit_tmp_size_ = (size); \
+    if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) != 0) { \
+      size_t munit_tmp_pos_; \
+      for (munit_tmp_pos_ = 0 ; munit_tmp_pos_ < munit_tmp_size_ ; munit_tmp_pos_++) { \
+        if (munit_tmp_a_[munit_tmp_pos_] != munit_tmp_b_[munit_tmp_pos_]) { \
+          munit_errorf("assertion failed: memory %s == %s, at offset %" MUNIT_SIZE_MODIFIER "u", \
+                       #a, #b, munit_tmp_pos_); \
+          break; \
+        } \
+      } \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_memory_not_equal(size, a, b) \
+  do { \
+    const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \
+    const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \
+    const size_t munit_tmp_size_ = (size); \
+    if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) == 0) { \
+      munit_errorf("assertion failed: memory %s != %s (%zu bytes)", \
+                   #a, #b, munit_tmp_size_); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_ptr_equal(a, b) \
+  munit_assert_ptr(a, ==, b)
+#define munit_assert_ptr_not_equal(a, b) \
+  munit_assert_ptr(a, !=, b)
+#define munit_assert_null(ptr) \
+  munit_assert_ptr(ptr, ==, NULL)
+#define munit_assert_not_null(ptr) \
+  munit_assert_ptr(ptr, !=, NULL)
+#define munit_assert_ptr_null(ptr) \
+  munit_assert_ptr(ptr, ==, NULL)
+#define munit_assert_ptr_not_null(ptr) \
+  munit_assert_ptr(ptr, !=, NULL)
+
+/*** Memory allocation ***/
+
+void* munit_malloc_ex(const char* filename, int line, size_t size);
+
+#define munit_malloc(size) \
+  munit_malloc_ex(__FILE__, __LINE__, (size))
+
+#define munit_new(type) \
+  ((type*) munit_malloc(sizeof(type)))
+
+#define munit_calloc(nmemb, size) \
+  munit_malloc((nmemb) * (size))
+
+#define munit_newa(type, nmemb) \
+  ((type*) munit_calloc((nmemb), sizeof(type)))
+
+/*** Random number generation ***/
+
+void munit_rand_seed(munit_uint32_t seed);
+munit_uint32_t munit_rand_uint32(void);
+int munit_rand_int_range(int min, int max);
+double munit_rand_double(void);
+void munit_rand_memory(size_t size, munit_uint8_t buffer[MUNIT_ARRAY_PARAM(size)]);
+
+/*** Tests and Suites ***/
+
+typedef enum {
+  /* Test successful */
+  MUNIT_OK,
+  /* Test failed */
+  MUNIT_FAIL,
+  /* Test was skipped */
+  MUNIT_SKIP,
+  /* Test failed due to circumstances not intended to be tested
+   * (things like network errors, invalid parameter value, failure to
+   * allocate memory in the test harness, etc.). */
+  MUNIT_ERROR
+} MunitResult;
+
+typedef struct {
+  char*  name;
+  char** values;
+} MunitParameterEnum;
+
+typedef struct {
+  char* name;
+  char* value;
+} MunitParameter;
+
+const char* munit_parameters_get(const MunitParameter params[], const char* key);
+
+typedef enum {
+  MUNIT_TEST_OPTION_NONE             = 0,
+  MUNIT_TEST_OPTION_SINGLE_ITERATION = 1 << 0,
+  MUNIT_TEST_OPTION_TODO             = 1 << 1
+} MunitTestOptions;
+
+typedef MunitResult (* MunitTestFunc)(const MunitParameter params[], void* user_data_or_fixture);
+typedef void*       (* MunitTestSetup)(const MunitParameter params[], void* user_data);
+typedef void        (* MunitTestTearDown)(void* fixture);
+
+typedef struct {
+  char*               name;
+  MunitTestFunc       test;
+  MunitTestSetup      setup;
+  MunitTestTearDown   tear_down;
+  MunitTestOptions    options;
+  MunitParameterEnum* parameters;
+} MunitTest;
+
+typedef enum {
+  MUNIT_SUITE_OPTION_NONE = 0
+} MunitSuiteOptions;
+
+typedef struct MunitSuite_ MunitSuite;
+
+struct MunitSuite_ {
+  char*             prefix;
+  MunitTest*        tests;
+  MunitSuite*       suites;
+  unsigned int      iterations;
+  MunitSuiteOptions options;
+};
+
+int munit_suite_main(const MunitSuite* suite, void* user_data, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)]);
+
+/* Note: I'm not very happy with this API; it's likely to change if I
+ * figure out something better.  Suggestions welcome. */
+
+typedef struct MunitArgument_ MunitArgument;
+
+struct MunitArgument_ {
+  char* name;
+  munit_bool (* parse_argument)(const MunitSuite* suite, void* user_data, int* arg, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)]);
+  void (* write_help)(const MunitArgument* argument, void* user_data);
+};
+
+int munit_suite_main_custom(const MunitSuite* suite,
+                            void* user_data,
+                            int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)],
+                            const MunitArgument arguments[]);
+
+#if defined(MUNIT_ENABLE_ASSERT_ALIASES)
+
+#define assert_true(expr) munit_assert_true(expr)
+#define assert_false(expr) munit_assert_false(expr)
+#define assert_char(a, op, b) munit_assert_char(a, op, b)
+#define assert_uchar(a, op, b) munit_assert_uchar(a, op, b)
+#define assert_short(a, op, b) munit_assert_short(a, op, b)
+#define assert_ushort(a, op, b) munit_assert_ushort(a, op, b)
+#define assert_int(a, op, b) munit_assert_int(a, op, b)
+#define assert_uint(a, op, b) munit_assert_uint(a, op, b)
+#define assert_long(a, op, b) munit_assert_long(a, op, b)
+#define assert_ulong(a, op, b) munit_assert_ulong(a, op, b)
+#define assert_llong(a, op, b) munit_assert_llong(a, op, b)
+#define assert_ullong(a, op, b) munit_assert_ullong(a, op, b)
+#define assert_size(a, op, b) munit_assert_size(a, op, b)
+#define assert_float(a, op, b) munit_assert_float(a, op, b)
+#define assert_double(a, op, b) munit_assert_double(a, op, b)
+#define assert_ptr(a, op, b) munit_assert_ptr(a, op, b)
+
+#define assert_int8(a, op, b) munit_assert_int8(a, op, b)
+#define assert_uint8(a, op, b) munit_assert_uint8(a, op, b)
+#define assert_int16(a, op, b) munit_assert_int16(a, op, b)
+#define assert_uint16(a, op, b) munit_assert_uint16(a, op, b)
+#define assert_int32(a, op, b) munit_assert_int32(a, op, b)
+#define assert_uint32(a, op, b) munit_assert_uint32(a, op, b)
+#define assert_int64(a, op, b) munit_assert_int64(a, op, b)
+#define assert_uint64(a, op, b) munit_assert_uint64(a, op, b)
+
+#define assert_double_equal(a, b, precision) munit_assert_double_equal(a, b, precision)
+#define assert_string_equal(a, b) munit_assert_string_equal(a, b)
+#define assert_string_not_equal(a, b) munit_assert_string_not_equal(a, b)
+#define assert_memory_equal(size, a, b) munit_assert_memory_equal(size, a, b)
+#define assert_memory_not_equal(size, a, b) munit_assert_memory_not_equal(size, a, b)
+#define assert_ptr_equal(a, b) munit_assert_ptr_equal(a, b)
+#define assert_ptr_not_equal(a, b) munit_assert_ptr_not_equal(a, b)
+#define assert_ptr_null(ptr) munit_assert_null_equal(ptr)
+#define assert_ptr_not_null(ptr) munit_assert_not_null(ptr)
+
+#define assert_null(ptr) munit_assert_null(ptr)
+#define assert_not_null(ptr) munit_assert_not_null(ptr)
+
+#endif /* defined(MUNIT_ENABLE_ASSERT_ALIASES) */
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* !defined(MUNIT_H) */
+
+#if defined(MUNIT_ENABLE_ASSERT_ALIASES)
+#  if defined(assert)
+#    undef assert
+#  endif
+#  define assert(expr) munit_assert(expr)
+#endif
@@ -0,0 +1,68 @@
+#include "../src/ast.h"
+#include "../src/parser/parser.h"
+#include "munit.h"
+
+MunitResult test_regression_trivia_head(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    lexer_t *lex = &(lexer_t){};
+    error_t *err = lexer_open(lex, "tests/input/regression/test_trivia_head.asm");
+    munit_assert_null(err);
+
+    tokenlist_t *list;
+    err = tokenlist_alloc(&list);
+    munit_assert_null(err);
+
+    err = tokenlist_fill(list, lex);
+    munit_assert_null(err);
+
+    parse_result_t result = parse(list->head);
+    munit_assert_null(result.err);
+    munit_assert_null(result.next);
+
+    ast_node_free(result.node);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitResult test_no_operands_eof(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    lexer_t *lex = &(lexer_t){};
+    error_t *err = lexer_open(lex, "tests/input/regression/test_no_operands_eof.asm");
+    munit_assert_null(err);
+
+    tokenlist_t *list;
+    err = tokenlist_alloc(&list);
+    munit_assert_null(err);
+
+    err = tokenlist_fill(list, lex);
+    munit_assert_null(err);
+
+    parse_result_t result = parse(list->head);
+    munit_assert_null(result.err);
+    munit_assert_null(result.next);
+
+    // Both children should be instructions
+    munit_assert_size(result.node->len, ==, 2);
+    munit_assert_int(result.node->children[0]->id, ==, NODE_INSTRUCTION);
+    munit_assert_int(result.node->children[1]->id, ==, NODE_INSTRUCTION);
+
+    // And they should have empty operands
+    munit_assert_size(result.node->children[0]->len, ==, 2);
+    munit_assert_size(result.node->children[1]->len, ==, 2);
+    munit_assert_size(result.node->children[0]->children[1]->len, ==, 0);
+    munit_assert_size(result.node->children[1]->children[1]->len, ==, 0);
+
+    ast_node_free(result.node);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitTest regression_tests[] = {
+    {"/trivia_head",     test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/no_operands_eof", test_no_operands_eof,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {nullptr,            nullptr,                     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
+};
@@ -0,0 +1,393 @@
+#include "../src/encoder/symbols.h"
+#include "../src/ast.h"
+#include "../src/error.h"
+#include "../src/lexer.h"
+#include "../src/parser/parser.h"
+#include "munit.h"
+#include <string.h>
+
+void symbols_setup_test(ast_node_t **node, tokenlist_t **list, char *path) {
+    lexer_t *lex = &(lexer_t){};
+    lexer_open(lex, path);
+    tokenlist_alloc(list);
+    tokenlist_fill(*list, lex);
+    parse_result_t result = parse((*list)->head);
+    lexer_close(lex);
+
+    *node = result.node;
+}
+
+MunitResult test_symbol_table_alloc(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    symbol_table_t *table = nullptr;
+    error_t *err = symbol_table_alloc(&table);
+
+    munit_assert_ptr_not_null(table);
+    munit_assert_ptr_null(err);
+    munit_assert_size(table->cap, ==, 64); // Default capacity
+    munit_assert_size(table->len, ==, 0);
+    munit_assert_ptr_not_null(table->symbols);
+
+    symbol_table_free(table);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_table_lookup_empty(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    symbol_table_t *table = nullptr;
+    symbol_table_alloc(&table);
+
+    symbol_t *symbol = symbol_table_lookup(table, "nonexistent");
+    munit_assert_ptr_null(symbol);
+
+    symbol_table_free(table);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_add_reference(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    ast_node_t *root;
+    tokenlist_t *list;
+    symbol_table_t *table = nullptr;
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+    symbol_table_alloc(&table);
+
+    ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
+    ast_node_t *statement = root->children[3]; // The containing statement
+    munit_assert_int(reference->id, ==, NODE_LABEL_REFERENCE);
+    munit_assert_size(table->len, ==, 0);
+
+    error_t *err = symbol_table_update(table, reference, statement);
+    munit_assert_null(err);
+    munit_assert_size(table->len, ==, 1);
+
+    symbol_t *symbol = symbol_table_lookup(table, "test");
+    munit_assert_not_null(symbol);
+    munit_assert_int(SYMBOL_REFERENCE, ==, symbol->kind);
+    // For references, the statement should be nullptr
+    munit_assert_ptr_null(symbol->statement);
+    munit_assert_string_equal(symbol->name, "test");
+
+    symbol_table_free(table);
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_add_label(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    ast_node_t *root;
+    tokenlist_t *list;
+    symbol_table_t *table = nullptr;
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+    symbol_table_alloc(&table);
+
+    ast_node_t *label = root->children[2];
+    munit_assert_int(label->id, ==, NODE_LABEL);
+    munit_assert_size(table->len, ==, 0);
+
+    error_t *err = symbol_table_update(table, label, label);
+    munit_assert_null(err);
+    munit_assert_size(table->len, ==, 1);
+
+    symbol_t *symbol = symbol_table_lookup(table, "test");
+    munit_assert_not_null(symbol);
+    munit_assert_int(SYMBOL_LOCAL, ==, symbol->kind);
+    munit_assert_ptr_equal(label, symbol->statement);
+    munit_assert_string_equal(symbol->name, "test");
+
+    symbol_table_free(table);
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_add_import(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    ast_node_t *root;
+    tokenlist_t *list;
+    symbol_table_t *table = nullptr;
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+    symbol_table_alloc(&table);
+
+    ast_node_t *import_directive = root->children[0]->children[1];
+    ast_node_t *statement = root->children[0]; // The containing statement
+    munit_assert_int(import_directive->id, ==, NODE_IMPORT_DIRECTIVE);
+    munit_assert_size(table->len, ==, 0);
+
+    error_t *err = symbol_table_update(table, import_directive, statement);
+    munit_assert_null(err);
+    munit_assert_size(table->len, ==, 1);
+
+    symbol_t *symbol = symbol_table_lookup(table, "test");
+    munit_assert_not_null(symbol);
+    munit_assert_int(SYMBOL_IMPORT, ==, symbol->kind);
+    // For import directives, the statement should be nullptr
+    munit_assert_ptr_null(symbol->statement);
+    munit_assert_string_equal(symbol->name, "test");
+
+    symbol_table_free(table);
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+void test_symbol_update(const char *name, ast_node_t *first, symbol_kind_t first_kind, ast_node_t *first_statement,
+                        ast_node_t *second, symbol_kind_t second_kind, ast_node_t *second_statement,
+                        bool should_succeed, bool should_update, ast_node_t *expected_statement) {
+    symbol_table_t *table = nullptr;
+    symbol_table_alloc(&table);
+
+    // Add the first symbol
+    error_t *err = symbol_table_update(table, first, first_statement);
+    munit_assert_null(err);
+    munit_assert_size(table->len, ==, 1);
+
+    // Verify first symbol state
+    symbol_t *symbol = symbol_table_lookup(table, name);
+    munit_assert_not_null(symbol);
+    munit_assert_int(first_kind, ==, symbol->kind);
+    munit_assert_string_equal(symbol->name, name);
+
+    // Check statement based on symbol kind
+    if (first_kind == SYMBOL_LOCAL) {
+        munit_assert_ptr_equal(first_statement, symbol->statement);
+    } else {
+        munit_assert_ptr_null(symbol->statement);
+    }
+
+    // Attempt the second update
+    err = symbol_table_update(table, second, second_statement);
+
+    // Check if update succeeded as expected
+    if (should_succeed) {
+        munit_assert_null(err);
+    } else {
+        munit_assert_ptr_equal(err, err_symbol_table_incompatible_symbols);
+        symbol_table_free(table);
+        return;
+    }
+
+    // Verify symbol after second update
+    symbol = symbol_table_lookup(table, name);
+    munit_assert_not_null(symbol);
+
+    // Check if kind updated as expected
+    if (should_update) {
+        munit_assert_int(second_kind, ==, symbol->kind);
+    } else {
+        munit_assert_int(first_kind, ==, symbol->kind);
+    }
+
+    // Simply check against the expected statement value
+    munit_assert_ptr_equal(expected_statement, symbol->statement);
+
+    symbol_table_free(table);
+}
+
+MunitResult test_symbol_upgrade_valid(const MunitParameter params[], void *data) {
+    ast_node_t *root;
+    tokenlist_t *list;
+
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+
+    ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
+    ast_node_t *reference_statement = root->children[3];
+    ast_node_t *label = root->children[2];
+    ast_node_t *import_directive = root->children[0]->children[1];
+    ast_node_t *import_statement = root->children[0];
+    ast_node_t *export_directive = root->children[1]->children[1];
+    ast_node_t *export_statement = root->children[1];
+
+    // real upgrades
+    test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, label, SYMBOL_LOCAL, label, true, true,
+                       label);
+    test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, import_directive, SYMBOL_IMPORT,
+                       import_statement, true, true, nullptr);
+    test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, export_directive, SYMBOL_EXPORT,
+                       export_statement, true, true, nullptr);
+    test_symbol_update("test", label, SYMBOL_LOCAL, label, export_directive, SYMBOL_EXPORT, export_statement, true,
+                       true, label);
+
+    // identity upgrades
+    test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, reference, SYMBOL_REFERENCE,
+                       reference_statement, true, false, nullptr);
+    test_symbol_update("test", label, SYMBOL_LOCAL, label, label, SYMBOL_LOCAL, label, true, false, label);
+    test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, import_directive, SYMBOL_IMPORT,
+                       import_statement, true, false, nullptr);
+    test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, export_directive, SYMBOL_EXPORT,
+                       export_statement, true, false, nullptr);
+
+    // downgrades that are allowed and ignored
+    test_symbol_update("test", label, SYMBOL_LOCAL, label, reference, SYMBOL_REFERENCE, reference_statement, true,
+                       false, label);
+    test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, reference, SYMBOL_REFERENCE,
+                       reference_statement, true, false, nullptr);
+    test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, reference, SYMBOL_REFERENCE,
+                       reference_statement, true, false, nullptr);
+    test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, label, SYMBOL_LOCAL, label, true,
+                       false, label);
+    test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, label, SYMBOL_LOCAL, label, true,
+                       false, label);
+
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_upgrade_invalid(const MunitParameter params[], void *data) {
+    ast_node_t *root;
+    tokenlist_t *list;
+
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+
+    ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
+    ast_node_t *reference_statement = root->children[3];
+    ast_node_t *label = root->children[2];
+    ast_node_t *import_directive = root->children[0]->children[1];
+    ast_node_t *import_statement = root->children[0];
+    ast_node_t *export_directive = root->children[1]->children[1];
+    ast_node_t *export_statement = root->children[1];
+
+    // invalid upgrades
+    test_symbol_update("test", label, SYMBOL_LOCAL, label, import_directive, SYMBOL_IMPORT, import_statement, false,
+                       false, nullptr);
+    test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, import_directive, SYMBOL_IMPORT,
+                       import_statement, false, false, nullptr);
+    test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, export_directive, SYMBOL_EXPORT,
+                       export_statement, false, false, nullptr);
+
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_add_export(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    ast_node_t *root;
+    tokenlist_t *list;
+    symbol_table_t *table = nullptr;
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+    symbol_table_alloc(&table);
+
+    ast_node_t *export_directive = root->children[1]->children[1];
+    ast_node_t *statement = root->children[1]; // The containing statement
+    munit_assert_int(export_directive->id, ==, NODE_EXPORT_DIRECTIVE);
+    munit_assert_size(table->len, ==, 0);
+
+    error_t *err = symbol_table_update(table, export_directive, statement);
+    munit_assert_null(err);
+    munit_assert_size(table->len, ==, 1);
+
+    symbol_t *symbol = symbol_table_lookup(table, "test");
+    munit_assert_not_null(symbol);
+    munit_assert_int(SYMBOL_EXPORT, ==, symbol->kind);
+    // For export directives, the statement should be nullptr
+    munit_assert_ptr_null(symbol->statement);
+    munit_assert_string_equal(symbol->name, "test");
+
+    symbol_table_free(table);
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_table_growth(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    ast_node_t *root;
+    tokenlist_t *list;
+    symbol_table_t *table = nullptr;
+
+    // Set up with our manysymbols.asm file
+    symbols_setup_test(&root, &list, "tests/input/manysymbols.asm");
+    symbol_table_alloc(&table);
+
+    // Initial capacity should be the default (64)
+    munit_assert_size(table->cap, ==, 64);
+    munit_assert_size(table->len, ==, 0);
+
+    // Add the first 64 labels (indices 0-63)
+    size_t initial_cap = table->cap;
+    for (size_t i = 0; i < 64; i++) {
+        ast_node_t *label = root->children[i];
+        munit_assert_int(label->id, ==, NODE_LABEL);
+
+        error_t *err = symbol_table_update(table, label, label);
+        munit_assert_null(err);
+        munit_assert_size(table->len, ==, i + 1);
+
+        // Capacity should remain the same for the first 64 labels
+        munit_assert_size(table->cap, ==, initial_cap);
+    }
+
+    // Now add the 65th label (index 64), which should trigger growth
+    ast_node_t *final_label = root->children[64];
+    munit_assert_int(final_label->id, ==, NODE_LABEL);
+
+    error_t *err = symbol_table_update(table, final_label, final_label);
+    munit_assert_null(err);
+    munit_assert_size(table->len, ==, 65);
+
+    // Capacity should have doubled
+    munit_assert_size(table->cap, ==, initial_cap * 2);
+
+    // Validate we can look up all the symbols
+    for (size_t i = 0; i <= 64; i++) {
+        char name[10];
+        sprintf(name, "lbl_%zu", i);
+
+        symbol_t *symbol = symbol_table_lookup(table, name);
+        munit_assert_not_null(symbol);
+        munit_assert_int(SYMBOL_LOCAL, ==, symbol->kind);
+        munit_assert_string_equal(symbol->name, name);
+        munit_assert_ptr_equal(symbol->statement, root->children[i]);
+    }
+
+    symbol_table_free(table);
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_invalid_node(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    ast_node_t *root;
+    tokenlist_t *list;
+    symbol_table_t *table = nullptr;
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+    symbol_table_alloc(&table);
+
+    munit_assert_size(table->len, ==, 0);
+    error_t *err = symbol_table_update(table, root, root);
+    munit_assert_ptr_equal(err, err_symbol_table_invalid_node);
+    munit_assert_size(table->len, ==, 0);
+
+    symbol_table_free(table);
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitTest symbols_tests[] = {
+    {"/table_alloc",        test_symbol_table_alloc,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/table_lookup_empty", test_symbol_table_lookup_empty, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/add_reference",      test_symbol_add_reference,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/add_label",          test_symbol_add_label,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/add_import",         test_symbol_add_import,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/add_export",         test_symbol_add_export,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/upgrade_valid",      test_symbol_upgrade_valid,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/upgrade_invalid",    test_symbol_upgrade_invalid,    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/table_growth",       test_symbol_table_growth,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/invalid_node",       test_symbol_invalid_node,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {nullptr,               nullptr,                        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
+};
@@ -2,19 +2,17 @@

 set -euo pipefail

-# Start with static analysis
-make clean all
-mkdir -p reports/static-analysis
-scan-build -o reports/static-analysis/ -plist-html --status-bugs make all
+make analyze debug asan msan

-# Run the sanitizer builds and valgrind
-make clean sanitize all
+ASAN=build/asan/oas
+MSAN=build/msan/oas
+DEBUG=build/debug/oas

 ARGUMENTS=("tokens" "text" "ast")
 while IFS= read -r INPUT_FILE; do
    for ARGS in ${ARGUMENTS[@]}; do
-        ./oas-asan $ARGS $INPUT_FILE > /dev/null
-        ./oas-msan $ARGS $INPUT_FILE > /dev/null
-        valgrind --leak-check=full --error-exitcode=1 ./oas $ARGS $INPUT_FILE >/dev/null
+        $ASAN $ARGS $INPUT_FILE > /dev/null
+        $MSAN $ARGS $INPUT_FILE > /dev/null
+        valgrind --leak-check=full --error-exitcode=1 $DEBUG $ARGS $INPUT_FILE >/dev/null
    done
 done < <(find tests/input/ -type f -name '*.asm')
Author	SHA1	Message	Date
omicron	7cefc3564d	Implement one immediate label reference operand Validate the build / validate-build (push) Successful in 43s Details Also adds opcode data for jmp and call	2025-04-24 14:45:57 +02:00
omicron	c848995ad6	Implement two pass encoding First pass: - collect information for numbers, registers and which instructions contain label references - encode all instructions that don't contain label references - Set (temporary) addresses for each instruction Second pass: - Collect information about label references (address, offset, size) - encode all instructions that contain label references - Update (if necessary) addresses for each instruction The second pass is iterated 10 times or until no instructions change size, whichever comes first.	2025-04-24 14:45:46 +02:00
omicron	5272fdb227	Add more values to the ast to facilitate encoding - Add a instruction value that contains the encoding, the address and a flag to indicate if this instruction contains label references - Add label value that contains an address - Add reference value that contains offset, an absolute address and an operand size - define types for all value options in the union - define accessor functions for all the values in the union	2025-04-23 15:57:04 +02:00
omicron	0acc3f27f3	Update symbols tests for new API	2025-04-23 15:56:46 +02:00
omicron	9c6b69e187	Symbol table now keeps track of label statements Before it kept track of a more specific node that referenced the symbol in some way. Now it will only keep track of the actual label defining statements. This is done to facilitate encoding. The encoder can now go from a symbol name to the statement that defines the symbol. Restructure the encoder to deal with this and pass the correct statement to the symbol update function.	2025-04-18 14:00:08 +02:00
omicron	530e3fb423	Fix parse_memory_expression to use parse_label_reference Validate the build / validate-build (push) Successful in 37s Details	2025-04-17 23:28:44 +02:00
omicron	ea5164e584	Make compiler-rt version match the clang version in the gitea action Validate the build / validate-build (push) Successful in 38s Details	2025-04-17 16:42:23 +02:00
omicron	bf3fd83b64	Let the release build error on warnings Add -Werror to the release configuration. Also add the release build as a dependency of the make validate rule. The idea is that builds should not pass validation if they have warnings but it shouldn't stop debug builds during development from compiling while work is in progress.	2025-04-17 15:18:28 +02:00
omicron	6f78d26ea1	Change the n argument of lexer_shift_buffer to size_t from int Validate the build / validate-build (push) Failing after 35s Details	2025-04-17 15:12:56 +02:00
omicron	1a79bf050e	Remove unused ast_node_free_value Values are all inside the ast struct and require no cleanup other than freeing the ast struct.	2025-04-17 15:10:36 +02:00
omicron	26cb374c1d	Update gitignore, add /build and remove old build artifacts	2025-04-17 15:09:29 +02:00
omicron	d97cfb97be	Implement printing the encoding in main Validate the build / validate-build (push) Successful in 33s Details	2025-04-16 23:10:17 +02:00
omicron	99c9dcd985	Incomplete second pass encoding	2025-04-16 23:10:09 +02:00
omicron	7e9c1bfda2	Add bytes type and tests bytes_t is a local (automatic) allocation array that carries the length and capacity with it.	2025-04-16 23:10:09 +02:00
omicron	d8ae126e9a	Add opcode encoding value for NODE_INSTRUCTION entries in the AST	2025-04-16 23:10:09 +02:00
omicron	68dcd9dcce	Add first encoding pass First pass collects all the symbols and interprets number and register tokens into usable data for the later passes.	2025-04-16 23:10:00 +02:00
omicron	dcf90b72e0	Add register and number values to AST nodes	2025-04-16 23:10:00 +02:00
omicron	2cf69f5e18	Add initial limited opcode data	2025-04-16 23:09:47 +02:00
omicron	d59559d327	Add registers data table Change the validated primitive parse_register so that it uses the data table instead	2025-04-16 13:46:19 +02:00
omicron	ac14925a0a	Add symbols tests	2025-04-16 13:46:19 +02:00
omicron	2a7bb479ac	initial symbol table implementation	2025-04-16 13:46:19 +02:00
omicron	ef22c0b620	Add .import and .export to the input test file	2025-04-16 13:46:19 +02:00
omicron	8c0e9926c5	Make main properly return with failure on parsing errors	2025-04-16 13:46:19 +02:00
omicron	d3d69b82d5	Add .import and .export directive to the grammar and parser	2025-04-16 13:46:10 +02:00
omicron	dc210e409c	fix parse_immediate to accept label_reference instead of identifier	2025-04-16 13:41:28 +02:00
omicron	00272d69bf	Add regression test for parse zero operands at eof Validate the build / validate-build (push) Successful in 30s Details	2025-04-16 13:16:55 +02:00
omicron	2385d38608	Prune the parse tree of NODE_NEWLINE after parsing succeeds	2025-04-16 13:01:02 +02:00
omicron	242fd9baa5	Fix grammar not being able to disambiguate some instructions When two identifiers follow eachother it could be two instruction mnemonics or one instruction mnemonic and one operand. To fix this TOKEN_NEWLINE has been reintroduced as a semantic token. The grammar has been changed to allow empty statements and every instruction and directive has to end in a newline. Labels do not have to end in a newline. In addition to updating the grammar, the implementation of tokenlist, ast and parser has been updated to reflect these changes.	2025-04-16 12:34:44 +02:00
omicron	1574ec6249	Fix parse_consecutive behavior when the token stream runs out	2025-04-16 12:13:02 +02:00
omicron	92c63092a1	Add regression test for trivia at the head of tokenlist Validate the build / validate-build (push) Successful in 29s Details	2025-04-09 01:17:09 +02:00
omicron	5560de2904	Make sure parse skips past initial trivia in the tokenlist	2025-04-09 01:15:51 +02:00
omicron	2bea87b39a	Run tests in the validate gitea action Validate the build / validate-build (push) Successful in 29s Details	2025-04-06 09:23:25 +02:00
omicron	2eb7b3c2f1	use llvm to generate test coverage	2025-04-06 09:17:51 +02:00
omicron	f1f4c93a8e	Fix bug in lexer_next_number not correctly tracking character number Validate the build / validate-build (push) Successful in 28s Details When a number has a suffix the lexer state didn't record the number of characters consumed for this suffix. This made the lexer state be 2-3 characters short in its line location reporting until it encountered a newline character. It did not otherwise corrupt the state of the lexer.	2025-04-05 01:41:40 +02:00
omicron	27099c9899	Add initial unit tests - Add µnit source and header files - Add test target to the build system - Implement a thorough lexer test suite - Implement a minimal AST test suite	2025-04-05 01:37:04 +02:00
omicron	3fead8017b	Rename lexer errors	2025-04-05 01:37:04 +02:00
omicron	af66790cff	Clean up error definitions, location and expose them in the headers - Exposes all errors in the header file so any user of the api can test for the specific error conditions - Mark all static error pointers as const - Move generic errors into error.h - Name all errors err_modulename_* for errors that belong to a specific module and err_* for generic errors.	2025-04-05 01:37:04 +02:00
omicron	cb8768b1d0	Make clangd aware of the _POSIX_C_SOURCE define in the build system	2025-04-05 01:37:04 +02:00
omicron	1571c52012	Add some building documentation that clarifies the make targets Validate the build / validate-build (push) Successful in 26s Details	2025-04-04 02:18:11 +02:00
omicron	0f9ced8eb1	Rework the build system to be more modular Split most of the work off into make/base.mk and allow for easy wrappers to be created around that that can build with different instrumentation in their own build directory. Create wrappers for the following: - release build - debug build - afl++ fuzzing build - static analysis with clang - clang memory sanitizer - clang address/undefined sanitizer	2025-04-04 02:18:02 +02:00