Add more symbols tests

expose symbols table errors in the header
Add tests for all 4 kinds of symbols being added
2025-04-08 21:57:59 +02:00 · 2025-04-08 21:57:28 +02:00 · 2025-04-08 21:02:49 +02:00 · 2025-04-08 21:01:59 +02:00 · 2025-04-08 20:38:08 +02:00 · 2025-04-08 20:37:09 +02:00
37 changed files with 4299 additions and 97 deletions
@@ -1,2 +1,2 @@
 CompileFlags:
-  Add: ["-std=c23", "-x", "c"]
+  Add: ["-std=c23", "-x", "c", "-D_POSIX_C_SOURCE=200809L"]
@@ -34,3 +34,7 @@ jobs:
      - name: make validate
        run: |
          make validate
+
+      - name: make test
+        run: |
+          make test
@@ -1,54 +1,46 @@
-.PHONY: all clean clean-objects clean-reports run sanitize validate fuzz
+.PHONY: all clean distclean release debug afl asan msan validate analyze fuzz

-CC=clang
-LD=clang
-CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L
-LDFLAGS?=
+debug: 
+	make -rRf make/debug.mk all

-SOURCES = $(shell find src/ -type f -name '*.c')
-OBJECTS = $(SOURCES:.c=.o)
-DEPENDENCIES = $(SOURCES:.c=.d)
-TARGET?=oas
-OUTPUTS=oas oas-asan oas-msan oas-afl
-RUNARGUMENTS?=ast tests/input/valid.asm
-
-all: $(TARGET)
+all: debug release afl asan msan
 	

-run: $(TARGET)
-	./$(TARGET) $(RUNARGUMENTS)
+release: 
+	make -rRf make/release.mk all
+
+afl:
+	make -rRf make/afl.mk all

 fuzz:
-	make CC="afl-clang-fast" LD="afl-clang-fast" TARGET="oas-afl" clean-objects all
-	make clean-objects
-	mkdir -p reports/afl
-	afl-fuzz -i tests/input -o reports/afl -m none -- ./oas-afl -tokens @@
+	make -rRf make/afl.mk fuzz

-sanitize:
-	make CFLAGS="$(CFLAGS) -fsanitize=address,undefined" \
-		LDFLAGS="-fsanitize=address,undefined" \
-		TARGET="oas-asan" clean-objects all
-	make CFLAGS="$(CFLAGS) -fsanitize=memory -fsanitize-memory-track-origins=2" \
-		LDFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2" \
-		TARGET="oas-msan" clean-objects all 
-	make clean-objects
+asan:
+	make -rRf make/asan.mk all

-validate:
+msan:
+	make -rRf make/msan.mk all
+
+validate: asan msan debug
 	./validate.sh

-$(TARGET): $(OBJECTS)
-	$(LD) $(LDFLAGS) -o $@ $^
+analyze:
+	make -rRf make/analyze.mk clean all

-%.o: %.c
-	$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
+test:
+	make -rRf make/test.mk test

-include $(DEPENDENCIES)
+clean:
+	make -rRf make/release.mk clean
+	make -rRf make/debug.mk clean
+	make -rRf make/afl.mk clean
+	make -rRf make/msan.mk clean
+	make -rRf make/asan.mk clean
+	make -rRf make/analyze.mk clean
+	make -rRf make/test.mk clean
+	rm -rf build/

-clean-objects:
-	rm -f $(OBJECTS) $(DEPENDENCIES)
-
-clean-reports:
+distclean: clean
+	make -rRf make/afl.mk distclean
+	make -rRf make/analyze.mk distclean
 	rm -rf reports/
-
-clean: clean-objects
-	rm -f $(TARGET) $(OUTPUTS)
@@ -0,0 +1,29 @@
+# Building
+
+To build oas in the default configuration you just need (gnu) make and a
+sufficiently modern clang.
+
+```
+make
+```
+
+## Make targets
+
+There are a number of make targets available to build various instrumented
+builds that are used in validation, analysis and sanitizing. Some of these may
+require extra dependencies.
+
+
+ - `debug`: Creates the debug build in `build/debug`. This is the default target.
+ - `all`: Builds all binary executable targets. These are
+   `debug`, `release`, `msan`, `asan` and `afl`. All executables can be found
+   in `build/` in a subdirectory matching their target names.
+ - `release`: Creates the release build in `build/release`
+ - `afl`: Creates a build with AFL++ instrumentation for fuzzing
+ - `fuzz`: Starts the fuzzer with the instrumented afl executable
+ - `asan`: builds with the address and undefined clang sanitizers
+ - `msan`: builds with the memory clang sanitizer
+ - `validate`: Builds `debug`, `msan`, and `asan` targets, then runs the
+   validation script. This script executes the sanitizer targets and runs
+   Valgrind on the debug target across multiple modes and test input files.
+
@@ -3,10 +3,14 @@

 <label> ::= <identifier> <colon>

-<directive> ::= <dot> <section_directive>
+<directive> ::= <dot> (<section_directive> | <export_directive> | <import_directive> )

 <section_directive> ::= "section" <identifier>

+<export_directive> ::= "export" <identifier>
+
+<import_directive> ::= "import" <identifier>
+
 <instruction> ::= <identifier> <operands>

 <operands> ::= <operand> ( <comma> <operand> )*
@@ -0,0 +1,14 @@
+.PHONY: fuzz distclean
+
+CC=afl-clang-fast
+LD=afl-clang-fast
+BUILD_DIR=build/afl/
+
+-include make/base.mk
+
+fuzz: $(BUILD_DIR)$(TARGET)
+	mkdir -p reports/afl
+	afl-fuzz -i tests/input -o reports/afl -m none -- ./$< -tokens @@
+
+distclean: clean
+	rm -rf reports/afl
@@ -0,0 +1,9 @@
+BUILD_DIR=build/analyze/
+-include make/base.mk
+
+analyze:
+	mkdir -p reports/static-analysis
+	scan-build -o reports/static-analysis/ -plist-html --status-bugs make -rRf make/analyze.mk all
+
+distclean: clean
+	rm -rf reports/static-analysis
@@ -0,0 +1,5 @@
+CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fsanitize=address,undefined
+LDFLAGS=-fsanitize=address,undefined
+BUILD_DIR=build/asan/
+
+-include make/base.mk
@@ -0,0 +1,27 @@
+.PHONY: all clean
+
+CC?=clang
+LD?=clang
+CFLAGS?=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L
+LDFLAGS?=
+BUILD_DIR?=build/debug/
+
+SOURCES?=$(shell find src/ -type f -name '*.c')
+OBJECTS=$(patsubst %.c,$(BUILD_DIR)%.o,$(SOURCES))
+DEPENDENCIES=$(OBJECTS:.o=.d)
+TARGET?=oas
+
+all: $(BUILD_DIR)$(TARGET)
+	
+
+$(BUILD_DIR)$(TARGET): $(OBJECTS)
+	$(LD) $(LDFLAGS) -o $@ $^
+
+$(BUILD_DIR)%.o: %.c
+	mkdir -p $(dir $@)
+	$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
+
+-include $(DEPENDENCIES)
+
+clean:
+	rm -rf $(BUILD_DIR)
@@ -0,0 +1 @@
+-include make/base.mk
@@ -0,0 +1,5 @@
+CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fsanitize=memory
+LDFLAGS=-fsanitize=memory
+BUILD_DIR=build/msan/
+
+-include make/base.mk
@@ -0,0 +1,5 @@
+CFLAGS?=-Wall -Wextra -Wpedantic -O2 -std=c23 -flto -fomit-frame-pointer -DNDEBUG -D_POSIX_C_SOURCE=200809L
+LDFLAGS?=-flto -s -Wl,--gc-sections
+BUILD_DIR?=build/release/
+
+-include make/base.mk
@@ -0,0 +1,21 @@
+.PHONY: test
+
+CFLAGS?=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fprofile-instr-generate -fcoverage-mapping
+LDFLAGS?=-fprofile-instr-generate
+BUILD_DIR=build/test/
+TARGET=oas-tests
+SOURCES = $(filter-out src/main.c, $(shell find src/ tests/ -type f -name '*.c'))
+-include make/base.mk
+
+test: $(BUILD_DIR)$(TARGET)
+	mkdir -p reports/coverage
+	LLVM_PROFILE_FILE="reports/coverage/tests.profraw" $(BUILD_DIR)$(TARGET)
+	llvm-profdata merge -sparse reports/coverage/tests.profraw -o reports/coverage/tests.profdata
+	llvm-cov show $(BUILD_DIR)$(TARGET) -instr-profile=reports/coverage/tests.profdata -format=html -output-dir=reports/coverage/html -ignore-filename-regex="tests/.*"
+	@echo "--"
+	@echo "Test coverage:"
+	@echo "file://$$(realpath reports/coverage/html/index.html)"
+	@echo "--"
+
+clean:
+	rm -rf reports/coverage
@@ -3,7 +3,7 @@
 #include <assert.h>
 #include <string.h>

-error_t *err_node_children_cap = &(error_t){
+error_t *const err_ast_children_cap = &(error_t){
    .message = "Failed to increase ast node children, max capacity reached"};

 error_t *ast_node_alloc(ast_node_t **output) {
@@ -50,7 +50,7 @@ error_t *ast_node_alloc_children(ast_node_t *node) {

 error_t *ast_node_grow_cap(ast_node_t *node) {
    if (node->cap >= node_max_children_cap) {
-        return err_node_children_cap;
+        return err_ast_children_cap;
    }

    size_t new_cap = node->cap * 2;
@@ -123,6 +123,10 @@ const char *ast_node_id_to_cstr(node_id_t id) {
        return "NODE_PLUS_OR_MINUS";
    case NODE_SECTION_DIRECTIVE:
        return "NODE_SECTION_DIRECTIVE";
+    case NODE_IMPORT_DIRECTIVE:
+        return "NODE_IMPORT_DIRECTIVE";
+    case NODE_EXPORT_DIRECTIVE:
+        return "NODE_EXPORT_DIRECTIVE";
    case NODE_REGISTER:
        return "NODE_REGISTER";
    case NODE_SECTION:
@@ -157,6 +161,10 @@ const char *ast_node_id_to_cstr(node_id_t id) {
        return "NODE_ASTERISK";
    case NODE_DOT:
        return "NODE_DOT";
+    case NODE_IMPORT:
+        return "NODE_IMPORT";
+    case NODE_EXPORT:
+        return "NODE_EXPORT";
    }
    assert(!"Unreachable, weird node id" && id);
    __builtin_unreachable();
@@ -7,6 +7,8 @@
 #include <stddef.h>
 #include <stdint.h>

+extern error_t *const err_ast_children_cap;
+
 typedef enum node_id {
    NODE_INVALID,

@@ -27,10 +29,14 @@ typedef enum node_id {
    NODE_REGISTER_OFFSET,
    NODE_PLUS_OR_MINUS,
    NODE_SECTION_DIRECTIVE,
+    NODE_IMPORT_DIRECTIVE,
+    NODE_EXPORT_DIRECTIVE,

    // Validated primitives
    NODE_REGISTER,
    NODE_SECTION,
+    NODE_IMPORT,
+    NODE_EXPORT,

    // Primitive nodes
    NODE_IDENTIFIER,
@@ -0,0 +1,159 @@
+#include "symbols.h"
+#include "../error.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+constexpr size_t symbol_table_default_cap = 64;
+constexpr size_t symbol_table_max_cap = 1 << 16;
+
+error_t *const err_symbol_table_invalid_node = &(error_t){
+    .message = "Unexpected node id when adding symbol to symbol table"};
+error_t *const err_symbol_table_max_cap = &(error_t){
+    .message = "Failed to increase symbol table length, max capacity reached"};
+error_t *const err_symbol_table_incompatible_symbols =
+    &(error_t){.message = "Failed to update symbol with incompatible kind"};
+
+error_t *symbol_table_alloc(symbol_table_t **output) {
+    *output = nullptr;
+
+    symbol_table_t *table = calloc(1, sizeof(symbol_table_t));
+    if (table == nullptr)
+        return err_allocation_failed;
+
+    table->symbols = calloc(symbol_table_default_cap, sizeof(symbol_t));
+    if (table->symbols == nullptr) {
+        free(table);
+        return err_allocation_failed;
+    }
+
+    table->cap = symbol_table_default_cap;
+    table->len = 0;
+
+    *output = table;
+    return nullptr;
+}
+
+void symbol_table_free(symbol_table_t *table) {
+    free(table->symbols);
+    free(table);
+}
+
+error_t *symbol_table_grow_cap(symbol_table_t *table) {
+    if (table->cap >= symbol_table_max_cap)
+        return err_symbol_table_max_cap;
+
+    size_t new_cap = table->cap * 2;
+    symbol_t *new_symbols = realloc(table->symbols, new_cap * sizeof(symbol_t));
+    if (new_symbols == nullptr)
+        return err_allocation_failed;
+
+    table->symbols = new_symbols;
+    table->cap = new_cap;
+
+    return nullptr;
+}
+
+error_t *symbol_table_get_node_info(ast_node_t *node, symbol_kind_t *kind,
+                                    char **name) {
+    switch (node->id) {
+    case NODE_LABEL:
+        *kind = SYMBOL_LOCAL;
+        *name = node->children[0]->token_entry->token.value;
+        return nullptr;
+    case NODE_LABEL_REFERENCE:
+        *kind = SYMBOL_REFERENCE;
+        *name = node->token_entry->token.value;
+        return nullptr;
+    case NODE_IMPORT_DIRECTIVE:
+        *kind = SYMBOL_IMPORT;
+        *name = node->children[1]->token_entry->token.value;
+        return nullptr;
+    case NODE_EXPORT_DIRECTIVE:
+        *kind = SYMBOL_EXPORT;
+        *name = node->children[1]->token_entry->token.value;
+        return nullptr;
+    default:
+        return err_symbol_table_invalid_node;
+    }
+    __builtin_unreachable();
+}
+
+/*
+old  \  new  | REFERENCE | LOCAL    | IMPORT   | EXPORT   |
+-------------|-----------|----------|----------|----------|
+REFERENCE    |           | replace  | replace  | replace  |
+-------------|-----------|----------|----------|----------|
+LOCAL        |           |          |   ERR    | replace  |
+-------------|-----------|----------|----------|----------|
+IMPORT       |           |          |          |   ERR    |
+-------------|-----------|----------|----------|----------|
+EXPORT       |           |          |   ERR    |          |
+-------------|-----------|----------|----------|----------|
+*/
+
+bool symbol_table_should_update(symbol_kind_t old, symbol_kind_t new) {
+    if (old == SYMBOL_REFERENCE)
+        return new != SYMBOL_REFERENCE;
+    if (old == SYMBOL_LOCAL)
+        return new == SYMBOL_EXPORT;
+    return false;
+}
+
+bool symbol_table_should_error(symbol_kind_t old, symbol_kind_t new) {
+    if (new == SYMBOL_IMPORT)
+        return old == SYMBOL_LOCAL || old == SYMBOL_EXPORT;
+    if (new == SYMBOL_EXPORT)
+        return old == SYMBOL_IMPORT;
+    return false;
+}
+
+/**
+ * @pre The symbol _must not_ already be in the table.
+ */
+error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
+                          ast_node_t *node) {
+    if (table->len >= table->cap) {
+        error_t *err = symbol_table_grow_cap(table);
+        if (err)
+            return err;
+    }
+
+    table->symbols[table->len] = (symbol_t){
+        .name = name,
+        .kind = kind,
+        .node = node,
+    };
+
+    table->len += 1;
+
+    return nullptr;
+}
+
+error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node) {
+    char *name;
+    symbol_kind_t kind;
+    error_t *err = symbol_table_get_node_info(node, &kind, &name);
+    if (err)
+        return err;
+
+    symbol_t *symbol = symbol_table_lookup(table, name);
+    if (!symbol)
+        return symbol_table_add(table, name, kind, node);
+    if (symbol_table_should_error(symbol->kind, kind))
+        return err_symbol_table_incompatible_symbols;
+    if (symbol_table_should_update(symbol->kind, kind)) {
+        symbol->name = name;
+        symbol->kind = kind;
+        symbol->node = node;
+    }
+    return nullptr;
+}
+
+symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name) {
+    for (size_t i = 0; i < table->len; ++i) {
+        if (strcmp(table->symbols[i].name, name) == 0)
+            return &table->symbols[i];
+    }
+    return nullptr;
+}
@@ -0,0 +1,46 @@
+#ifndef INCLUDE_ENCODER_SYMBOLS_H_
+#define INCLUDE_ENCODER_SYMBOLS_H_
+
+#include "../ast.h"
+
+extern error_t *const err_symbol_table_invalid_node;
+extern error_t *const err_symbol_table_max_cap;
+extern error_t *const err_symbol_table_incompatible_symbols;
+
+typedef enum symbol_kind {
+    SYMBOL_REFERENCE,
+    SYMBOL_LOCAL,
+    SYMBOL_EXPORT,
+    SYMBOL_IMPORT,
+} symbol_kind_t;
+
+/**
+ * Represent a symbol in the program
+ *
+ * Symbols with the same name can only be in the table once. IMPORT or EXPORT
+ * symbols take precedence over REFERENCE symbols. If any reference symbols
+ * remain after the first encoding pass this indicates an error. Trying to add
+ * an IMPORT or EXPORT symbol if the same name already exists as the other kind
+ * is an error.
+ *
+ * This symbol table never taken ownership of the name string, it's lifted
+ * straight from the node->token.value.
+ */
+typedef struct symbol {
+    char *name;
+    symbol_kind_t kind;
+    ast_node_t *node;
+} symbol_t;
+
+typedef struct symbol_table {
+    size_t cap;
+    size_t len;
+    symbol_t *symbols;
+} symbol_table_t;
+
+error_t *symbol_table_alloc(symbol_table_t **table);
+void symbol_table_free(symbol_table_t *table);
+error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node);
+symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name);
+
+#endif // INCLUDE_ENCODER_SYMBOLS_H_
@@ -9,8 +9,13 @@ error_t *const err_errorf_alloc = &(error_t){
 error_t *const err_errorf_length = &(error_t){
    .message =
        "Formatting of another error failed to determine the error length"};
+error_t *const err_eof =
+    &(error_t){.message = "Read failed because EOF is reached"};

-error_t *err_allocation_failed =
+error_t *const err_unknown_read_failure =
+    &(error_t){.message = "Unknown read error"};
+
+error_t *const err_allocation_failed =
    &(error_t){.message = "Memory allocation failed"};

 error_t *errorf(const char *fmt, ...) {
@@ -19,6 +19,8 @@ static inline void error_free(error_t *err) {
 }

 /* Some global errors */
-extern error_t *err_allocation_failed;
+extern error_t *const err_allocation_failed;
+extern error_t *const err_eof;
+extern error_t *const err_unknown_read_failure;

 #endif // INCLUDE_SRC_ERROR_H_
@@ -5,21 +5,16 @@
 #include <errno.h>
 #include <string.h>

-error_t *err_lexer_already_open = &(error_t){
+error_t *const err_lexer_already_open = &(error_t){
    .message =
        "Can't open on a lexer object that is already opened. Close it first."};
-error_t *err_prefix_too_large =
+error_t *const err_lexer_prefix_too_large =
    &(error_t){.message = "Prefix too large for internal lexer buffer"};
-error_t *err_buffer_underrun = &(error_t){
+error_t *const err_lexer_buffer_underrun = &(error_t){
    .message = "Buffer does not contain enough characters for lexer_consume_n"};
-error_t *err_consume_excessive_length =
+error_t *const err_lexer_consume_excessive_length =
    &(error_t){.message = "Too many valid characters to consume"};

-error_t *err_eof =
-    &(error_t){.message = "Can't read from file because EOF is reached"};
-
-error_t *err_unknown_read = &(error_t){.message = "Unknown read error"};
-
 typedef bool (*char_predicate_t)(char);

 const char *lexer_token_id_to_cstr(lexer_token_id_t id) {
@@ -112,7 +107,7 @@ error_t *lexer_fill_buffer(lexer_t *lex) {
        if (n == 0 && ferror(lex->fp))
            return errorf("Read error: %s", strerror(errno));
        if (n == 0)
-            return err_unknown_read;
+            return err_unknown_read_failure;
        remaining -= n;
        lex->buffer_count += n;
    }
@@ -182,9 +177,9 @@ error_t *lexer_not_implemented(lexer_t *lex, lexer_token_t *token) {
 error_t *lexer_consume_n(lexer_t *lex, const size_t len,
                         char buffer[static len], const size_t n) {
    if (lex->buffer_count < n)
-        return err_buffer_underrun;
+        return err_lexer_buffer_underrun;
    if (n > len)
-        return err_consume_excessive_length;
+        return err_lexer_consume_excessive_length;

    memcpy(buffer, lex->buffer, n);
    lexer_shift_buffer(lex, n);
@@ -229,7 +224,7 @@ error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n],
                (lex->buffer_count > 0 && is_valid(lex->buffer[0]));

        if (have_more_characters && *n_consumed == buffer_size) {
-            return err_consume_excessive_length;
+            return err_lexer_consume_excessive_length;
        }
    } while (have_more_characters);
    return nullptr;
@@ -299,11 +294,12 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {

    error_t *err = lexer_consume(lex, max_number_length - so_far,
                                 buffer + so_far, is_valid, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Number length exceeds the maximum of 128 characters";
    }
+    lex->character_number += n;
    so_far += n;
    if (n == 0) {
        token->id = TOKEN_ERROR;
@@ -329,14 +325,15 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
    if (suffix_length > 0) {
        err = lexer_consume_n(lex, max_number_length - so_far, buffer + so_far,
                              suffix_length);
-        if (err == err_consume_excessive_length) {
+        if (err == err_lexer_consume_excessive_length) {
            token->id = TOKEN_ERROR;
            token->explanation =
                "Number length exceeds the maximum of 128 characters";
+        } else {
+            lex->character_number += suffix_length;
        }
    }

-    lex->character_number += n;
    token->value = strdup(buffer);
    return nullptr;
 }
@@ -406,7 +403,7 @@ error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) {

    error_t *err = lexer_consume(lex, max_identifier_length, buffer,
                                 is_identifier_character, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Identifier length exceeds the maximum of 128 characters";
@@ -449,7 +446,7 @@ error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) {

    error_t *err = lexer_consume(lex, max_whitespace_length, buffer,
                                 is_whitespace_character, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Whitespace length exceeds the maximum of 1024 characters";
@@ -484,7 +481,7 @@ error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) {

    error_t *err = lexer_consume(lex, max_comment_length, buffer,
                                 is_comment_character, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Comment length exceeds the maximum of 1024 characters";
@@ -5,7 +5,10 @@
 #include <stddef.h>
 #include <stdio.h>

-extern error_t *err_eof;
+extern error_t *const err_lexer_already_open;
+extern error_t *const err_lexer_prefix_too_large;
+extern error_t *const err_lexer_buffer_underrun;
+extern error_t *const err_lexer_consume_excessive_length;

 typedef enum {
    TOKEN_ERROR,
@@ -32,21 +32,22 @@ void print_text(tokenlist_t *list) {
    }
 }

-void print_ast(tokenlist_t *list) {
+error_t *print_ast(tokenlist_t *list) {
    parse_result_t result = parse(list->head);
-    if (result.err) {
-        puts(result.err->message);
-        error_free(result.err);
-        return;
-    }
+    if (result.err)
+        return result.err;
+
    ast_node_print(result.node);

    if (result.next != nullptr) {
        puts("First unparsed token:");
        lexer_token_print(&result.next->token);
    }
-
    ast_node_free(result.node);
+    if (result.next != nullptr) {
+        return errorf("did not parse entire input token stream");
+    }
+    return nullptr;
 }

 int get_execution_mode(int argc, char *argv[]) {
@@ -63,6 +64,20 @@ int get_execution_mode(int argc, char *argv[]) {
    return MODE_AST;
 }

+error_t *do_action(mode_t mode, tokenlist_t *list) {
+    switch (mode) {
+    case MODE_TOKENS:
+        print_tokens(list);
+        return nullptr;
+    case MODE_TEXT:
+        print_text(list);
+        return nullptr;
+    case MODE_AST:
+        return print_ast(list);
+    }
+    __builtin_unreachable();
+}
+
 int main(int argc, char *argv[]) {
    mode_t mode = get_execution_mode(argc, argv);
    char *filename = argv[2];
@@ -81,17 +96,9 @@ int main(int argc, char *argv[]) {
    if (err)
        goto cleanup_tokens;

-    switch (mode) {
-    case MODE_TOKENS:
-        print_tokens(list);
-        break;
-    case MODE_TEXT:
-        print_text(list);
-        break;
-    case MODE_AST:
-        print_ast(list);
-        break;
-    }
+    err = do_action(mode, list);
+    if (err)
+        goto cleanup_tokens;

    tokenlist_free(list);
    error_free(err);
@@ -83,7 +83,7 @@ parse_result_t parse_register_expression(tokenlist_entry_t *current) {
 }

 parse_result_t parse_immediate(tokenlist_entry_t *current) {
-    parser_t parsers[] = {parse_number, parse_identifier, nullptr};
+    parser_t parsers[] = {parse_number, parse_label_reference, nullptr};
    parse_result_t result = parse_any(current, parsers);
    return parse_result_wrap(NODE_IMMEDIATE, result);
 }
@@ -119,8 +119,24 @@ parse_result_t parse_section_directive(tokenlist_entry_t *current) {
    return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers);
 }

+parse_result_t parse_import_directive(tokenlist_entry_t *current) {
+    parser_t parsers[] = {parse_import, parse_identifier, nullptr};
+    return parse_consecutive(current, NODE_IMPORT_DIRECTIVE, parsers);
+}
+
+parse_result_t parse_export_directive(tokenlist_entry_t *current) {
+    parser_t parsers[] = {parse_export, parse_identifier, nullptr};
+    return parse_consecutive(current, NODE_EXPORT_DIRECTIVE, parsers);
+}
+
+parse_result_t parse_directive_options(tokenlist_entry_t *current) {
+    parser_t parsers[] = {parse_section_directive, parse_import_directive,
+                          parse_export_directive, nullptr};
+    return parse_any(current, parsers);
+}
+
 parse_result_t parse_directive(tokenlist_entry_t *current) {
-    parser_t parsers[] = {parse_dot, parse_section_directive, nullptr};
+    parser_t parsers[] = {parse_dot, parse_directive_options, nullptr};
    return parse_consecutive(current, NODE_DIRECTIVE, parsers);
 }

@@ -101,3 +101,19 @@ parse_result_t parse_section(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_SECTION,
                       is_section_token);
 }
+
+bool is_import_token(lexer_token_t *token) {
+    return strcmp(token->value, "import") == 0;
+}
+
+parse_result_t parse_import(tokenlist_entry_t *current) {
+    return parse_token(current, TOKEN_IDENTIFIER, NODE_IMPORT, is_import_token);
+}
+
+bool is_export_token(lexer_token_t *token) {
+    return strcmp(token->value, "export") == 0;
+}
+
+parse_result_t parse_export(tokenlist_entry_t *current) {
+    return parse_token(current, TOKEN_IDENTIFIER, NODE_EXPORT, is_export_token);
+}
@@ -26,5 +26,7 @@ parse_result_t parse_label_reference(tokenlist_entry_t *current);
 */
 parse_result_t parse_register(tokenlist_entry_t *current);
 parse_result_t parse_section(tokenlist_entry_t *current);
+parse_result_t parse_import(tokenlist_entry_t *current);
+parse_result_t parse_export(tokenlist_entry_t *current);

 #endif // INCLUDE_PARSER_PRIMITIVES_H_
@@ -1,7 +1,7 @@
 #include "util.h"
 #include "../tokenlist.h"

-error_t *err_parse_no_match =
+error_t *const err_parse_no_match =
    &(error_t){.message = "parsing failed to find the correct token sequence"};

 parse_result_t parse_error(error_t *err) {
@@ -21,6 +21,6 @@ parse_result_t parse_token(tokenlist_entry_t *current,
                           token_validator_t is_valid);
 parse_result_t parse_result_wrap(node_id_t id, parse_result_t result);

-extern error_t *err_parse_no_match;
+extern error_t *const err_parse_no_match;

 #endif // INCLUDE_PARSER_UTIL_H_
@@ -0,0 +1,6 @@
+BasedOnStyle:    LLVM
+IndentWidth:     4
+Cpp11BracedListStyle: true
+AlignArrayOfStructures: Left
+AllowShortFunctionsOnASingleLine: Empty
+ColumnLimit: 120
@@ -0,0 +1,22 @@
+#include "../src/ast.h"
+#include "munit.h"
+
+MunitResult test_ast_node_alloc(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    ast_node_t *node = nullptr;
+    error_t *err = ast_node_alloc(&node);
+
+    munit_assert_ptr_not_null(node);
+    munit_assert_ptr_null(err);
+
+    ast_node_free(node);
+
+    return MUNIT_OK;
+}
+
+MunitTest ast_tests[] = {
+    {"/node_alloc", test_ast_node_alloc, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {nullptr,       nullptr,             nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
+};
@@ -0,0 +1,12 @@
+.import test
+.export test
+test:
+    call test
+.import more
+.export more
+more:
+    call more
+.import other
+.export other
+other:
+    call other
@@ -2,6 +2,9 @@

 ; Small valid code snippet that should contain all different AST nodes

+.export _start
+.import exit
+
 _start:
    mov eax, ebx
    lea eax, [eax + ebx * 4 + 8]
@@ -19,3 +22,5 @@ _start:
    push 0xffff:64
    push 0o777:16
    push 0b0001:16
+    mov rax, 0
+    call exit
@@ -0,0 +1,896 @@
+#include "../src/lexer.h"
+#include "../src/error.h"
+#include "munit.h"
+#include <string.h>
+
+void lexer_setup_memory_test(lexer_t *lex, const char *input) {
+    munit_assert_null(lex->fp);
+    FILE *stream = fmemopen((void *)input, strlen(input), "rb");
+    munit_assert_not_null(stream);
+    lex->fp = stream;
+    lex->line_number = 0;
+    lex->character_number = 0;
+    lex->buffer_count = 0;
+}
+
+void lexer_expect_one_token(lexer_t *lex, lexer_token_id_t id, const char *value, size_t line, size_t column) {
+    lexer_token_t token = {};
+
+    error_t *err = lexer_next(lex, &token);
+    munit_assert_null(err);
+
+    munit_assert_int(token.id, ==, id);
+    munit_assert_string_equal(token.value, value);
+    munit_assert_int(token.line_number, ==, line);
+    munit_assert_int(token.character_number, ==, column);
+    lexer_token_cleanup(&token);
+}
+
+void lexer_expect_eof(lexer_t *lex) {
+    lexer_token_t token = {};
+    error_t *err = lexer_next(lex, &token);
+    munit_assert_ptr_equal(err, err_eof);
+}
+
+void lexer_test_one_token(lexer_token_id_t id, const char *value) {
+    lexer_t lex = {};
+    lexer_setup_memory_test(&lex, value);
+    lexer_expect_one_token(&lex, id, value, 0, 0);
+    lexer_expect_eof(&lex);
+    lexer_close(&lex);
+}
+
+MunitResult test_lexer_identifier(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_IDENTIFIER, "identifier");
+    lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier");
+    lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier123_55");
+    return MUNIT_OK;
+}
+
+typedef struct token_data {
+    lexer_token_id_t id;
+    char *value;
+    size_t line;
+    size_t column;
+} token_data_t;
+
+typedef struct boundary {
+    const char *input;
+    token_data_t first;
+    token_data_t second;
+} boundary_t;
+
+void test_lexer_boundary(boundary_t boundaries[]) {
+    for (size_t i = 0; boundaries[i].input; ++i) {
+        auto boundary = boundaries[i];
+        auto first = boundary.first;
+        auto second = boundary.second;
+
+        lexer_t lex = {};
+        lexer_setup_memory_test(&lex, boundary.input);
+        lexer_expect_one_token(&lex, first.id, first.value, first.line, first.column);
+        lexer_expect_one_token(&lex, second.id, second.value, second.line, second.column);
+        lexer_expect_eof(&lex);
+        lexer_close(&lex);
+    }
+}
+
+MunitResult test_lexer_identifier_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"id:",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COLON, ":", 0, 2}         },
+        {"id[",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_LBRACKET, "[", 0, 2}      },
+        {"id]",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_RBRACKET, "]", 0, 2}      },
+        {"id+",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_PLUS, "+", 0, 2}          },
+        {"id-",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_MINUS, "-", 0, 2}         },
+        {"id*",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_ASTERISK, "*", 0, 2}      },
+        {"id.",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_DOT, ".", 0, 2}           },
+        {"id;comment", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COMMENT, ";comment", 0, 2}},
+        {"id\n",       {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 2}      },
+        {"id\r\n",     {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 2}    },
+        {"id ",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 2}    },
+        {"id\t",       {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 2}   },
+        {nullptr,      {},                             {}                               },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_decimal(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_DECIMAL, "123");
+    lexer_test_one_token(TOKEN_DECIMAL, "0");
+    lexer_test_one_token(TOKEN_DECIMAL, "42");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_decimal_with_suffix(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_DECIMAL, "123:8");
+    lexer_test_one_token(TOKEN_DECIMAL, "0:16");
+    lexer_test_one_token(TOKEN_DECIMAL, "42:32");
+    lexer_test_one_token(TOKEN_DECIMAL, "69:64");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_hexadecimal(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x0");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xabcdef");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABCDEF");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_hexadecimal_with_suffix(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123:8");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD:16");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABC:32");
+    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xffff:64");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_octal(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_OCTAL, "0o777");
+    lexer_test_one_token(TOKEN_OCTAL, "0o0");
+    lexer_test_one_token(TOKEN_OCTAL, "0o123");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_octal_with_suffix(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_OCTAL, "0o777:8");
+    lexer_test_one_token(TOKEN_OCTAL, "0o123:16");
+    lexer_test_one_token(TOKEN_OCTAL, "0o777:32");
+    lexer_test_one_token(TOKEN_OCTAL, "0o123:64");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_binary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_BINARY, "0b101");
+    lexer_test_one_token(TOKEN_BINARY, "0b0");
+    lexer_test_one_token(TOKEN_BINARY, "0b1");
+    lexer_test_one_token(TOKEN_BINARY, "0b01010101");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_binary_with_suffix(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_BINARY, "0b101:8");
+    lexer_test_one_token(TOKEN_BINARY, "0b0:16");
+    lexer_test_one_token(TOKEN_BINARY, "0b1:32");
+    lexer_test_one_token(TOKEN_BINARY, "0b01010101:64");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_colon(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_COLON, ":");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_comma(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_COMMA, ",");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_lbracket(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_LBRACKET, "[");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_rbracket(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_RBRACKET, "]");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_plus(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_PLUS, "+");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_minus(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_MINUS, "-");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_asterisk(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_ASTERISK, "*");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_dot(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_DOT, ".");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_comment(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_COMMENT, ";This is a comment");
+    lexer_test_one_token(TOKEN_COMMENT, "; Another comment");
+    lexer_test_one_token(TOKEN_COMMENT, ";");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_whitespace(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    lexer_test_one_token(TOKEN_WHITESPACE, " ");
+    lexer_test_one_token(TOKEN_WHITESPACE, "  ");
+    lexer_test_one_token(TOKEN_WHITESPACE, "\t");
+    lexer_test_one_token(TOKEN_WHITESPACE, " \t ");
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_newlines(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    // Test simple newline
+    lexer_t lex = {};
+    lexer_setup_memory_test(&lex, "\n");
+    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 0);
+    lexer_expect_eof(&lex);
+    lexer_close(&lex);
+
+    // Test Windows-style newline
+    lexer_t lex2 = {};
+    lexer_setup_memory_test(&lex2, "\r\n");
+    lexer_expect_one_token(&lex2, TOKEN_NEWLINE, "\r\n", 0, 0);
+    lexer_expect_eof(&lex2);
+    lexer_close(&lex2);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_line_numbers(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    lexer_t lex = {};
+    lexer_setup_memory_test(&lex, "a\nb\nc");
+
+    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "a", 0, 0);
+    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 1);
+    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "b", 1, 0);
+    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 1, 1);
+    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "c", 2, 0);
+    lexer_expect_eof(&lex);
+    lexer_close(&lex);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_decimal_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"123,",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMA, ",", 0, 3}      },
+        {"123:",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COLON, ":", 0, 3}      },
+        {"123[",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 3}   },
+        {"123]",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 3}   },
+        {"123+",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_PLUS, "+", 0, 3}       },
+        {"123-",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_MINUS, "-", 0, 3}      },
+        {"123*",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 3}   },
+        {"123.",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_DOT, ".", 0, 3}        },
+        {"123;",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMENT, ";", 0, 3}    },
+        {"123\n",   {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 3}   },
+        {"123\r\n", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 3} },
+        {"123 ",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 3} },
+        {"123\t",   {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 3}},
+        {nullptr,   {},                           {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_hexadecimal_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"0x123,",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
+        {"0x123:",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
+        {"0x123[",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
+        {"0x123]",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
+        {"0x123+",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
+        {"0x123-",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
+        {"0x123*",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
+        {"0x123.",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
+        {"0x123;",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
+        {"0x123\n",   {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
+        {"0x123\r\n", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
+        {"0x123 ",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
+        {"0x123\t",   {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
+        {nullptr,     {},                                 {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_octal_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"0o123,",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
+        {"0o123:",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
+        {"0o123[",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
+        {"0o123]",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
+        {"0o123+",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
+        {"0o123-",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
+        {"0o123*",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
+        {"0o123.",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
+        {"0o123;",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
+        {"0o123\n",   {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
+        {"0o123\r\n", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
+        {"0o123 ",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
+        {"0o123\t",   {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
+        {nullptr,     {},                           {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_binary_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"0b101,",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
+        {"0b101:",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
+        {"0b101[",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
+        {"0b101]",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
+        {"0b101+",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
+        {"0b101-",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
+        {"0b101*",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
+        {"0b101.",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
+        {"0b101;",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
+        {"0b101\n",   {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
+        {"0b101\r\n", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
+        {"0b101 ",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
+        {"0b101\t",   {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
+        {nullptr,     {},                            {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_colon_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {":,",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {"::",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {":[",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {":]",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {":+",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {":-",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {":*",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {":.",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {":;",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {":\n",   {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {":\r\n", {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {": ",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {":\t",   {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                       {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_comma_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {",,",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {",:",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {",[",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {",]",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {",+",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {",-",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {",*",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {",.",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {",;",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {",\n",   {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {",\r\n", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {", ",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {",\t",   {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                       {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_lbracket_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"[,",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {"[:",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {"[[",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {"[]",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {"[+",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {"[-",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {"[*",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {"[.",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {"[;",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {"[\n",   {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {"[\r\n", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {"[ ",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {"[\t",   {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                          {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_rbracket_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"],",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {"]:",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {"][",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {"]]",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {"]+",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {"]-",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {"]*",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {"].",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {"];",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {"]\n",   {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {"]\r\n", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {"] ",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {"]\t",   {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                          {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_plus_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"+,",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {"+:",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {"+[",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {"+]",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {"++",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {"+-",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {"+*",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {"+.",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {"+;",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {"+\n",   {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {"+\r\n", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {"+ ",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {"+\t",   {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                      {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_minus_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"-,",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {"-:",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {"-[",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {"-]",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {"-+",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {"--",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {"-*",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {"-.",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {"-;",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {"-\n",   {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {"-\r\n", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {"- ",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {"-\t",   {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                       {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_asterisk_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"*,",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {"*:",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {"*[",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {"*]",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {"*+",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {"*-",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {"**",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {"*.",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {"*;",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {"*\n",   {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {"*\r\n", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {"* ",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {"*\t",   {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                          {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_dot_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {".,",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
+        {".:",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
+        {".[",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
+        {".]",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
+        {".+",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
+        {".-",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
+        {".*",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
+        {"..",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
+        {".;",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
+        {".\n",   {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
+        {".\r\n", {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
+        {". ",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
+        {".\t",   {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
+        {nullptr, {},                     {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_comment_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {";comment\n",   {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8}  },
+        {";comment\r\n", {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 8}},
+        {nullptr,        {},                                {}                           },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_whitespace_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {" ,",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMA, ",", 0, 1}     },
+        {" :",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COLON, ":", 0, 1}     },
+        {" [",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}  },
+        {" ]",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}  },
+        {" +",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_PLUS, "+", 0, 1}      },
+        {" -",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_MINUS, "-", 0, 1}     },
+        {" *",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}  },
+        {" .",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_DOT, ".", 0, 1}       },
+        {" ;",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}   },
+        {" \n",   {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}  },
+        {" \r\n", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1}},
+        {nullptr, {},                            {}                           },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_newline_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"\n,",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0}      },
+        {"\n:",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COLON, ":", 1, 0}      },
+        {"\n[",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0}   },
+        {"\n]",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0}   },
+        {"\n+",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0}       },
+        {"\n-",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0}      },
+        {"\n*",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0}   },
+        {"\n.",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_DOT, ".", 1, 0}        },
+        {"\n;",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0}    },
+        {"\n\n",   {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0}   },
+        {"\n\r\n", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} },
+        {"\n ",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} },
+        {"\n\t",   {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}},
+        {nullptr,  {},                          {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_crlf_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"\r\n,",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0}      },
+        {"\r\n:",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COLON, ":", 1, 0}      },
+        {"\r\n[",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0}   },
+        {"\r\n]",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0}   },
+        {"\r\n+",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0}       },
+        {"\r\n-",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0}      },
+        {"\r\n*",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0}   },
+        {"\r\n.",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_DOT, ".", 1, 0}        },
+        {"\r\n;",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0}    },
+        {"\r\n\n",   {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0}   },
+        {"\r\n\r\n", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} },
+        {"\r\n ",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} },
+        {"\r\n\t",   {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}},
+        {nullptr,    {},                            {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_number_boundary(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    boundary_t boundaries[] = {
+        {"0x123:8,",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_COMMA, ",", 0, 7}      },
+        {"0x123:16:",    {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_COLON, ":", 0, 8}      },
+        {"0o777:32[",    {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_LBRACKET, "[", 0, 8}   },
+        {"0b101:64]",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_RBRACKET, "]", 0, 8}   },
+        {"0x123:8+",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_PLUS, "+", 0, 7}       },
+        {"0x123:16-",    {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_MINUS, "-", 0, 8}      },
+        {"0o777:32*",    {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_ASTERISK, "*", 0, 8}   },
+        {"0b101:64.",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_DOT, ".", 0, 8}        },
+        {"0x123:8;",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_COMMENT, ";", 0, 7}    },
+        {"0x123:16\n",   {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8}   },
+        {"0o777:32\r\n", {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_NEWLINE, "\r\n", 0, 8} },
+        {"0b101:64 ",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_WHITESPACE, " ", 0, 8} },
+        {"0x123:8\t",    {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_WHITESPACE, "\t", 0, 7}},
+        {nullptr,        {},                                    {}                            },
+    };
+
+    test_lexer_boundary(boundaries);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_maximum_length_numbers(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    char *numbers[] = {
+        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "9999999999999999999988",
+        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "9999999999999999998:64",
+        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "9999999999999999999988",
+        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "9999999999999999998:64",
+        nullptr,
+    };
+
+    for (size_t i = 0; numbers[i]; ++i) {
+        auto number = numbers[i];
+        munit_assert_size(128, ==, strlen(number));
+        lexer_t lex = {};
+        lexer_token_t token = {};
+        lexer_setup_memory_test(&lex, number);
+        lexer_next(&lex, &token);
+        munit_assert_true(token.id == TOKEN_DECIMAL || token.id == TOKEN_HEXADECIMAL);
+        munit_assert_size(128, ==, strlen(token.value));
+        lexer_token_cleanup(&token);
+        lexer_close(&lex);
+    }
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_too_long_numbers(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    char *numbers[] = {
+        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "99999999999999999999988",
+        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "99999999999999999999988",
+        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "99999999999999999998:64",
+        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
+        "99999999999999999998:64",
+    };
+
+    // Without suffix we expect 128 characters and then failure
+    for (size_t i = 0; i < 2; ++i) {
+        auto number = numbers[i];
+        munit_assert_size(129, ==, strlen(number));
+        lexer_t lex = {};
+        lexer_token_t token = {};
+        lexer_setup_memory_test(&lex, number);
+        lexer_next(&lex, &token);
+        munit_assert_int(TOKEN_ERROR, ==, token.id);
+        munit_assert_size(128, ==, strlen(token.value));
+        lexer_token_cleanup(&token);
+        lexer_close(&lex);
+    }
+
+    // With suffix we fail at the suffix boundary
+    for (size_t i = 2; i < 4; ++i) {
+        auto number = numbers[i];
+        munit_assert_size(129, ==, strlen(number));
+        lexer_t lex = {};
+        lexer_token_t token = {};
+        lexer_setup_memory_test(&lex, number);
+        lexer_next(&lex, &token);
+        munit_assert_int(TOKEN_ERROR, ==, token.id);
+        munit_assert_size(128, >=, strlen(token.value));
+        lexer_token_cleanup(&token);
+
+        lexer_expect_one_token(&lex, TOKEN_COLON, ":", 0, 126);
+        lexer_expect_one_token(&lex, TOKEN_DECIMAL, "64", 0, 127);
+        lexer_close(&lex);
+    }
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_max_whitespace_length(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    char whitespace[1025];
+    memset(whitespace, ' ', 1024);
+    whitespace[1024] = '\0';
+
+    munit_assert_size(1024, ==, strlen(whitespace));
+    lexer_t lex = {};
+    lexer_token_t token = {};
+    lexer_setup_memory_test(&lex, whitespace);
+    lexer_next(&lex, &token);
+    munit_assert_int(TOKEN_WHITESPACE, ==, token.id);
+    munit_assert_size(1024, ==, strlen(token.value));
+    lexer_token_cleanup(&token);
+    lexer_close(&lex);
+
+    return MUNIT_OK;
+}
+
+MunitResult test_lexer_too_long_whitespace(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    char whitespace[1026];
+    memset(whitespace, ' ', 1025);
+    whitespace[1025] = '\0';
+
+    munit_assert_size(1025, ==, strlen(whitespace));
+    lexer_t lex = {};
+    lexer_token_t token = {};
+    lexer_setup_memory_test(&lex, whitespace);
+    lexer_next(&lex, &token);
+    munit_assert_int(TOKEN_ERROR, ==, token.id);
+    munit_assert_size(1024, ==, strlen(token.value));
+    lexer_token_cleanup(&token);
+
+    lexer_expect_one_token(&lex, TOKEN_WHITESPACE, " ", 0, 1024);
+    lexer_close(&lex);
+
+    return MUNIT_OK;
+}
+
+MunitTest lexer_tests[] = {
+    {"/identifier",              test_lexer_identifier,              nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/identifier_boundary",     test_lexer_identifier_boundary,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/decimal",                 test_lexer_decimal,                 nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/decimal_boundary",        test_lexer_decimal_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/hexadecimal",             test_lexer_hexadecimal,             nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/hexadecimal_with_suffix", test_lexer_hexadecimal_with_suffix, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/hexadecimal_boundary",    test_lexer_hexadecimal_boundary,    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/octal",                   test_lexer_octal,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/octal_with_suffix",       test_lexer_octal_with_suffix,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/octal_boundary",          test_lexer_octal_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/binary",                  test_lexer_binary,                  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/binary_with_suffix",      test_lexer_binary_with_suffix,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/binary_boundary",         test_lexer_binary_boundary,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/number_boundary",         test_lexer_number_boundary,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/colon",                   test_lexer_colon,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/colon_boundary",          test_lexer_colon_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/comma",                   test_lexer_comma,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/comma_boundary",          test_lexer_comma_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/lbracket",                test_lexer_lbracket,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/lbracket_boundary",       test_lexer_lbracket_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/rbracket",                test_lexer_rbracket,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/rbracket_boundary",       test_lexer_rbracket_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/plus",                    test_lexer_plus,                    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/plus_boundary",           test_lexer_plus_boundary,           nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/minus",                   test_lexer_minus,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/minus_boundary",          test_lexer_minus_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/asterisk",                test_lexer_asterisk,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/asterisk_boundary",       test_lexer_asterisk_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/dot",                     test_lexer_dot,                     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/dot_boundary",            test_lexer_dot_boundary,            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/comment",                 test_lexer_comment,                 nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/comment_boundary",        test_lexer_comment_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/whitespace",              test_lexer_whitespace,              nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/whitespace_boundary",     test_lexer_whitespace_boundary,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/newlines",                test_lexer_newlines,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/newline_boundary",        test_lexer_newline_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/crlf_boundary",           test_lexer_crlf_boundary,           nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/line_numbers",            test_lexer_line_numbers,            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/maximum_length_numbers",  test_lexer_maximum_length_numbers,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/too_long_numbers",        test_lexer_too_long_numbers,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/max_whitespace_length",   test_lexer_max_whitespace_length,   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/too_long_whitespace",     test_lexer_too_long_whitespace,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {nullptr,                    nullptr,                            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
+};
@@ -0,0 +1,18 @@
+#include "munit.h"
+
+extern MunitTest ast_tests[];
+extern MunitTest lexer_tests[];
+extern MunitTest symbols_tests[];
+
+int main(int argc, char *argv[MUNIT_ARRAY_PARAM(argc + 1)]) {
+    MunitSuite suites[] = {
+        {"/ast",     ast_tests,     nullptr, 1, MUNIT_SUITE_OPTION_NONE},
+        {"/lexer",   lexer_tests,   nullptr, 1, MUNIT_SUITE_OPTION_NONE},
+        {"/symbols", symbols_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
+        {nullptr,    nullptr,       nullptr, 0, MUNIT_SUITE_OPTION_NONE},
+    };
+
+    MunitSuite master_suite = {"/oas", nullptr, suites, 1, MUNIT_SUITE_OPTION_NONE};
+
+    return munit_suite_main(&master_suite, nullptr, argc, argv);
+}
@@ -0,0 +1,535 @@
+/* µnit Testing Framework
+ * Copyright (c) 2013-2017 Evan Nemerson <evan@nemerson.com>
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(MUNIT_H)
+#define MUNIT_H
+
+#include <stdarg.h>
+#include <stdlib.h>
+
+#define MUNIT_VERSION(major, minor, revision) \
+  (((major) << 16) | ((minor) << 8) | (revision))
+
+#define MUNIT_CURRENT_VERSION MUNIT_VERSION(0, 4, 1)
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+#  define munit_int8_t   __int8
+#  define munit_uint8_t  unsigned __int8
+#  define munit_int16_t  __int16
+#  define munit_uint16_t unsigned __int16
+#  define munit_int32_t  __int32
+#  define munit_uint32_t unsigned __int32
+#  define munit_int64_t  __int64
+#  define munit_uint64_t unsigned __int64
+#else
+#  include <stdint.h>
+#  define munit_int8_t   int8_t
+#  define munit_uint8_t  uint8_t
+#  define munit_int16_t  int16_t
+#  define munit_uint16_t uint16_t
+#  define munit_int32_t  int32_t
+#  define munit_uint32_t uint32_t
+#  define munit_int64_t  int64_t
+#  define munit_uint64_t uint64_t
+#endif
+
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#  if !defined(PRIi8)
+#    define PRIi8 "i"
+#  endif
+#  if !defined(PRIi16)
+#    define PRIi16 "i"
+#  endif
+#  if !defined(PRIi32)
+#    define PRIi32 "i"
+#  endif
+#  if !defined(PRIi64)
+#    define PRIi64 "I64i"
+#  endif
+#  if !defined(PRId8)
+#    define PRId8 "d"
+#  endif
+#  if !defined(PRId16)
+#    define PRId16 "d"
+#  endif
+#  if !defined(PRId32)
+#    define PRId32 "d"
+#  endif
+#  if !defined(PRId64)
+#    define PRId64 "I64d"
+#  endif
+#  if !defined(PRIx8)
+#    define PRIx8 "x"
+#  endif
+#  if !defined(PRIx16)
+#    define PRIx16 "x"
+#  endif
+#  if !defined(PRIx32)
+#    define PRIx32 "x"
+#  endif
+#  if !defined(PRIx64)
+#    define PRIx64 "I64x"
+#  endif
+#  if !defined(PRIu8)
+#    define PRIu8 "u"
+#  endif
+#  if !defined(PRIu16)
+#    define PRIu16 "u"
+#  endif
+#  if !defined(PRIu32)
+#    define PRIu32 "u"
+#  endif
+#  if !defined(PRIu64)
+#    define PRIu64 "I64u"
+#  endif
+#else
+#  include <inttypes.h>
+#endif
+
+#if !defined(munit_bool)
+#  if defined(bool)
+#    define munit_bool bool
+#  elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+#    define munit_bool _Bool
+#  else
+#    define munit_bool int
+#  endif
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if defined(__GNUC__)
+#  define MUNIT_LIKELY(expr) (__builtin_expect ((expr), 1))
+#  define MUNIT_UNLIKELY(expr) (__builtin_expect ((expr), 0))
+#  define MUNIT_UNUSED __attribute__((__unused__))
+#else
+#  define MUNIT_LIKELY(expr) (expr)
+#  define MUNIT_UNLIKELY(expr) (expr)
+#  define MUNIT_UNUSED
+#endif
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__PGI)
+#  define MUNIT_ARRAY_PARAM(name) name
+#else
+#  define MUNIT_ARRAY_PARAM(name)
+#endif
+
+#if !defined(_WIN32)
+#  define MUNIT_SIZE_MODIFIER "z"
+#  define MUNIT_CHAR_MODIFIER "hh"
+#  define MUNIT_SHORT_MODIFIER "h"
+#else
+#  if defined(_M_X64) || defined(__amd64__)
+#    define MUNIT_SIZE_MODIFIER "I64"
+#  else
+#    define MUNIT_SIZE_MODIFIER ""
+#  endif
+#  define MUNIT_CHAR_MODIFIER ""
+#  define MUNIT_SHORT_MODIFIER ""
+#endif
+
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+#  define MUNIT_NO_RETURN _Noreturn
+#elif defined(__GNUC__)
+#  define MUNIT_NO_RETURN __attribute__((__noreturn__))
+#elif defined(_MSC_VER)
+#  define MUNIT_NO_RETURN __declspec(noreturn)
+#else
+#  define MUNIT_NO_RETURN
+#endif
+
+#if defined(_MSC_VER) &&  (_MSC_VER >= 1500)
+#  define MUNIT_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127))
+#  define MUNIT_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop))
+#else
+#  define MUNIT_PUSH_DISABLE_MSVC_C4127_
+#  define MUNIT_POP_DISABLE_MSVC_C4127_
+#endif
+
+typedef enum {
+  MUNIT_LOG_DEBUG,
+  MUNIT_LOG_INFO,
+  MUNIT_LOG_WARNING,
+  MUNIT_LOG_ERROR
+} MunitLogLevel;
+
+#if defined(__GNUC__) && !defined(__MINGW32__)
+#  define MUNIT_PRINTF(string_index, first_to_check) __attribute__((format (printf, string_index, first_to_check)))
+#else
+#  define MUNIT_PRINTF(string_index, first_to_check)
+#endif
+
+MUNIT_PRINTF(4, 5)
+void munit_logf_ex(MunitLogLevel level, const char* filename, int line, const char* format, ...);
+
+#define munit_logf(level, format, ...) \
+  munit_logf_ex(level, __FILE__, __LINE__, format, __VA_ARGS__)
+
+#define munit_log(level, msg) \
+  munit_logf(level, "%s", msg)
+
+MUNIT_NO_RETURN
+MUNIT_PRINTF(3, 4)
+void munit_errorf_ex(const char* filename, int line, const char* format, ...);
+
+#define munit_errorf(format, ...) \
+  munit_errorf_ex(__FILE__, __LINE__, format, __VA_ARGS__)
+
+#define munit_error(msg) \
+  munit_errorf("%s", msg)
+
+#define munit_assert(expr) \
+  do { \
+    if (!MUNIT_LIKELY(expr)) { \
+      munit_error("assertion failed: " #expr); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_true(expr) \
+  do { \
+    if (!MUNIT_LIKELY(expr)) { \
+      munit_error("assertion failed: " #expr " is not true"); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_false(expr) \
+  do { \
+    if (!MUNIT_LIKELY(!(expr))) { \
+      munit_error("assertion failed: " #expr " is not false"); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_type_full(prefix, suffix, T, fmt, a, op, b)   \
+  do { \
+    T munit_tmp_a_ = (a); \
+    T munit_tmp_b_ = (b); \
+    if (!(munit_tmp_a_ op munit_tmp_b_)) {                               \
+      munit_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")", \
+                   #a, #op, #b, munit_tmp_a_, #op, munit_tmp_b_); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_type(T, fmt, a, op, b) \
+  munit_assert_type_full("", "", T, fmt, a, op, b)
+
+#define munit_assert_char(a, op, b) \
+  munit_assert_type_full("'\\x", "'", char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b)
+#define munit_assert_uchar(a, op, b) \
+  munit_assert_type_full("'\\x", "'", unsigned char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b)
+#define munit_assert_short(a, op, b) \
+  munit_assert_type(short, MUNIT_SHORT_MODIFIER "d", a, op, b)
+#define munit_assert_ushort(a, op, b) \
+  munit_assert_type(unsigned short, MUNIT_SHORT_MODIFIER "u", a, op, b)
+#define munit_assert_int(a, op, b) \
+  munit_assert_type(int, "d", a, op, b)
+#define munit_assert_uint(a, op, b) \
+  munit_assert_type(unsigned int, "u", a, op, b)
+#define munit_assert_long(a, op, b) \
+  munit_assert_type(long int, "ld", a, op, b)
+#define munit_assert_ulong(a, op, b) \
+  munit_assert_type(unsigned long int, "lu", a, op, b)
+#define munit_assert_llong(a, op, b) \
+  munit_assert_type(long long int, "lld", a, op, b)
+#define munit_assert_ullong(a, op, b) \
+  munit_assert_type(unsigned long long int, "llu", a, op, b)
+
+#define munit_assert_size(a, op, b) \
+  munit_assert_type(size_t, MUNIT_SIZE_MODIFIER "u", a, op, b)
+
+#define munit_assert_float(a, op, b) \
+  munit_assert_type(float, "f", a, op, b)
+#define munit_assert_double(a, op, b) \
+  munit_assert_type(double, "g", a, op, b)
+#define munit_assert_ptr(a, op, b) \
+  munit_assert_type(const void*, "p", a, op, b)
+
+#define munit_assert_int8(a, op, b)             \
+  munit_assert_type(munit_int8_t, PRIi8, a, op, b)
+#define munit_assert_uint8(a, op, b) \
+  munit_assert_type(munit_uint8_t, PRIu8, a, op, b)
+#define munit_assert_int16(a, op, b) \
+  munit_assert_type(munit_int16_t, PRIi16, a, op, b)
+#define munit_assert_uint16(a, op, b) \
+  munit_assert_type(munit_uint16_t, PRIu16, a, op, b)
+#define munit_assert_int32(a, op, b) \
+  munit_assert_type(munit_int32_t, PRIi32, a, op, b)
+#define munit_assert_uint32(a, op, b) \
+  munit_assert_type(munit_uint32_t, PRIu32, a, op, b)
+#define munit_assert_int64(a, op, b) \
+  munit_assert_type(munit_int64_t, PRIi64, a, op, b)
+#define munit_assert_uint64(a, op, b) \
+  munit_assert_type(munit_uint64_t, PRIu64, a, op, b)
+
+#define munit_assert_double_equal(a, b, precision) \
+  do { \
+    const double munit_tmp_a_ = (a); \
+    const double munit_tmp_b_ = (b); \
+    const double munit_tmp_diff_ = ((munit_tmp_a_ - munit_tmp_b_) < 0) ? \
+      -(munit_tmp_a_ - munit_tmp_b_) : \
+      (munit_tmp_a_ - munit_tmp_b_); \
+    if (MUNIT_UNLIKELY(munit_tmp_diff_ > 1e-##precision)) { \
+      munit_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)", \
+		   #a, #b, munit_tmp_a_, munit_tmp_b_); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#include <string.h>
+#define munit_assert_string_equal(a, b) \
+  do { \
+    const char* munit_tmp_a_ = a; \
+    const char* munit_tmp_b_ = b; \
+    if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) != 0)) { \
+      munit_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")", \
+                   #a, #b, munit_tmp_a_, munit_tmp_b_); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_string_not_equal(a, b) \
+  do { \
+    const char* munit_tmp_a_ = a; \
+    const char* munit_tmp_b_ = b; \
+    if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) == 0)) { \
+      munit_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")", \
+                   #a, #b, munit_tmp_a_, munit_tmp_b_); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_memory_equal(size, a, b) \
+  do { \
+    const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \
+    const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \
+    const size_t munit_tmp_size_ = (size); \
+    if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) != 0) { \
+      size_t munit_tmp_pos_; \
+      for (munit_tmp_pos_ = 0 ; munit_tmp_pos_ < munit_tmp_size_ ; munit_tmp_pos_++) { \
+        if (munit_tmp_a_[munit_tmp_pos_] != munit_tmp_b_[munit_tmp_pos_]) { \
+          munit_errorf("assertion failed: memory %s == %s, at offset %" MUNIT_SIZE_MODIFIER "u", \
+                       #a, #b, munit_tmp_pos_); \
+          break; \
+        } \
+      } \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_memory_not_equal(size, a, b) \
+  do { \
+    const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \
+    const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \
+    const size_t munit_tmp_size_ = (size); \
+    if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) == 0) { \
+      munit_errorf("assertion failed: memory %s != %s (%zu bytes)", \
+                   #a, #b, munit_tmp_size_); \
+    } \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
+  } while (0) \
+  MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_ptr_equal(a, b) \
+  munit_assert_ptr(a, ==, b)
+#define munit_assert_ptr_not_equal(a, b) \
+  munit_assert_ptr(a, !=, b)
+#define munit_assert_null(ptr) \
+  munit_assert_ptr(ptr, ==, NULL)
+#define munit_assert_not_null(ptr) \
+  munit_assert_ptr(ptr, !=, NULL)
+#define munit_assert_ptr_null(ptr) \
+  munit_assert_ptr(ptr, ==, NULL)
+#define munit_assert_ptr_not_null(ptr) \
+  munit_assert_ptr(ptr, !=, NULL)
+
+/*** Memory allocation ***/
+
+void* munit_malloc_ex(const char* filename, int line, size_t size);
+
+#define munit_malloc(size) \
+  munit_malloc_ex(__FILE__, __LINE__, (size))
+
+#define munit_new(type) \
+  ((type*) munit_malloc(sizeof(type)))
+
+#define munit_calloc(nmemb, size) \
+  munit_malloc((nmemb) * (size))
+
+#define munit_newa(type, nmemb) \
+  ((type*) munit_calloc((nmemb), sizeof(type)))
+
+/*** Random number generation ***/
+
+void munit_rand_seed(munit_uint32_t seed);
+munit_uint32_t munit_rand_uint32(void);
+int munit_rand_int_range(int min, int max);
+double munit_rand_double(void);
+void munit_rand_memory(size_t size, munit_uint8_t buffer[MUNIT_ARRAY_PARAM(size)]);
+
+/*** Tests and Suites ***/
+
+typedef enum {
+  /* Test successful */
+  MUNIT_OK,
+  /* Test failed */
+  MUNIT_FAIL,
+  /* Test was skipped */
+  MUNIT_SKIP,
+  /* Test failed due to circumstances not intended to be tested
+   * (things like network errors, invalid parameter value, failure to
+   * allocate memory in the test harness, etc.). */
+  MUNIT_ERROR
+} MunitResult;
+
+typedef struct {
+  char*  name;
+  char** values;
+} MunitParameterEnum;
+
+typedef struct {
+  char* name;
+  char* value;
+} MunitParameter;
+
+const char* munit_parameters_get(const MunitParameter params[], const char* key);
+
+typedef enum {
+  MUNIT_TEST_OPTION_NONE             = 0,
+  MUNIT_TEST_OPTION_SINGLE_ITERATION = 1 << 0,
+  MUNIT_TEST_OPTION_TODO             = 1 << 1
+} MunitTestOptions;
+
+typedef MunitResult (* MunitTestFunc)(const MunitParameter params[], void* user_data_or_fixture);
+typedef void*       (* MunitTestSetup)(const MunitParameter params[], void* user_data);
+typedef void        (* MunitTestTearDown)(void* fixture);
+
+typedef struct {
+  char*               name;
+  MunitTestFunc       test;
+  MunitTestSetup      setup;
+  MunitTestTearDown   tear_down;
+  MunitTestOptions    options;
+  MunitParameterEnum* parameters;
+} MunitTest;
+
+typedef enum {
+  MUNIT_SUITE_OPTION_NONE = 0
+} MunitSuiteOptions;
+
+typedef struct MunitSuite_ MunitSuite;
+
+struct MunitSuite_ {
+  char*             prefix;
+  MunitTest*        tests;
+  MunitSuite*       suites;
+  unsigned int      iterations;
+  MunitSuiteOptions options;
+};
+
+int munit_suite_main(const MunitSuite* suite, void* user_data, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)]);
+
+/* Note: I'm not very happy with this API; it's likely to change if I
+ * figure out something better.  Suggestions welcome. */
+
+typedef struct MunitArgument_ MunitArgument;
+
+struct MunitArgument_ {
+  char* name;
+  munit_bool (* parse_argument)(const MunitSuite* suite, void* user_data, int* arg, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)]);
+  void (* write_help)(const MunitArgument* argument, void* user_data);
+};
+
+int munit_suite_main_custom(const MunitSuite* suite,
+                            void* user_data,
+                            int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)],
+                            const MunitArgument arguments[]);
+
+#if defined(MUNIT_ENABLE_ASSERT_ALIASES)
+
+#define assert_true(expr) munit_assert_true(expr)
+#define assert_false(expr) munit_assert_false(expr)
+#define assert_char(a, op, b) munit_assert_char(a, op, b)
+#define assert_uchar(a, op, b) munit_assert_uchar(a, op, b)
+#define assert_short(a, op, b) munit_assert_short(a, op, b)
+#define assert_ushort(a, op, b) munit_assert_ushort(a, op, b)
+#define assert_int(a, op, b) munit_assert_int(a, op, b)
+#define assert_uint(a, op, b) munit_assert_uint(a, op, b)
+#define assert_long(a, op, b) munit_assert_long(a, op, b)
+#define assert_ulong(a, op, b) munit_assert_ulong(a, op, b)
+#define assert_llong(a, op, b) munit_assert_llong(a, op, b)
+#define assert_ullong(a, op, b) munit_assert_ullong(a, op, b)
+#define assert_size(a, op, b) munit_assert_size(a, op, b)
+#define assert_float(a, op, b) munit_assert_float(a, op, b)
+#define assert_double(a, op, b) munit_assert_double(a, op, b)
+#define assert_ptr(a, op, b) munit_assert_ptr(a, op, b)
+
+#define assert_int8(a, op, b) munit_assert_int8(a, op, b)
+#define assert_uint8(a, op, b) munit_assert_uint8(a, op, b)
+#define assert_int16(a, op, b) munit_assert_int16(a, op, b)
+#define assert_uint16(a, op, b) munit_assert_uint16(a, op, b)
+#define assert_int32(a, op, b) munit_assert_int32(a, op, b)
+#define assert_uint32(a, op, b) munit_assert_uint32(a, op, b)
+#define assert_int64(a, op, b) munit_assert_int64(a, op, b)
+#define assert_uint64(a, op, b) munit_assert_uint64(a, op, b)
+
+#define assert_double_equal(a, b, precision) munit_assert_double_equal(a, b, precision)
+#define assert_string_equal(a, b) munit_assert_string_equal(a, b)
+#define assert_string_not_equal(a, b) munit_assert_string_not_equal(a, b)
+#define assert_memory_equal(size, a, b) munit_assert_memory_equal(size, a, b)
+#define assert_memory_not_equal(size, a, b) munit_assert_memory_not_equal(size, a, b)
+#define assert_ptr_equal(a, b) munit_assert_ptr_equal(a, b)
+#define assert_ptr_not_equal(a, b) munit_assert_ptr_not_equal(a, b)
+#define assert_ptr_null(ptr) munit_assert_null_equal(ptr)
+#define assert_ptr_not_null(ptr) munit_assert_not_null(ptr)
+
+#define assert_null(ptr) munit_assert_null(ptr)
+#define assert_not_null(ptr) munit_assert_not_null(ptr)
+
+#endif /* defined(MUNIT_ENABLE_ASSERT_ALIASES) */
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* !defined(MUNIT_H) */
+
+#if defined(MUNIT_ENABLE_ASSERT_ALIASES)
+#  if defined(assert)
+#    undef assert
+#  endif
+#  define assert(expr) munit_assert(expr)
+#endif
@@ -0,0 +1,272 @@
+#include "../src/encoder/symbols.h"
+#include "../src/ast.h"
+#include "../src/error.h"
+#include "../src/lexer.h"
+#include "../src/parser/parser.h"
+#include "munit.h"
+#include <string.h>
+
+void symbols_setup_test(ast_node_t **node, tokenlist_t **list, char *path) {
+    lexer_t *lex = &(lexer_t){};
+    lexer_open(lex, path);
+    tokenlist_alloc(list);
+    tokenlist_fill(*list, lex);
+    parse_result_t result = parse((*list)->head);
+    lexer_close(lex);
+
+    *node = result.node;
+}
+
+MunitResult test_symbol_table_alloc(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    symbol_table_t *table = nullptr;
+    error_t *err = symbol_table_alloc(&table);
+
+    munit_assert_ptr_not_null(table);
+    munit_assert_ptr_null(err);
+    munit_assert_size(table->cap, ==, 64); // Default capacity
+    munit_assert_size(table->len, ==, 0);
+    munit_assert_ptr_not_null(table->symbols);
+
+    symbol_table_free(table);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_table_lookup_empty(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+
+    symbol_table_t *table = nullptr;
+    symbol_table_alloc(&table);
+
+    symbol_t *symbol = symbol_table_lookup(table, "nonexistent");
+    munit_assert_ptr_null(symbol);
+
+    symbol_table_free(table);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_add_reference(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    ast_node_t *root;
+    tokenlist_t *list;
+    symbol_table_t *table = nullptr;
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+    symbol_table_alloc(&table);
+
+    ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
+    munit_assert_int(reference->id, ==, NODE_LABEL_REFERENCE);
+    munit_assert_size(table->len, ==, 0);
+
+    error_t *err = symbol_table_update(table, reference);
+    munit_assert_null(err);
+    munit_assert_size(table->len, ==, 1);
+
+    symbol_t *symbol = symbol_table_lookup(table, "test");
+    munit_assert_not_null(symbol);
+    munit_assert_int(SYMBOL_REFERENCE, ==, symbol->kind);
+    munit_assert_ptr_equal(reference, symbol->node);
+    munit_assert_string_equal(symbol->name, "test");
+
+    symbol_table_free(table);
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_add_label(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    ast_node_t *root;
+    tokenlist_t *list;
+    symbol_table_t *table = nullptr;
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+    symbol_table_alloc(&table);
+
+    ast_node_t *label = root->children[2];
+    munit_assert_int(label->id, ==, NODE_LABEL);
+    munit_assert_size(table->len, ==, 0);
+
+    error_t *err = symbol_table_update(table, label);
+    munit_assert_null(err);
+    munit_assert_size(table->len, ==, 1);
+
+    symbol_t *symbol = symbol_table_lookup(table, "test");
+    munit_assert_not_null(symbol);
+    munit_assert_int(SYMBOL_LOCAL, ==, symbol->kind);
+    munit_assert_ptr_equal(label, symbol->node);
+    munit_assert_string_equal(symbol->name, "test");
+
+    symbol_table_free(table);
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_add_import(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    ast_node_t *root;
+    tokenlist_t *list;
+    symbol_table_t *table = nullptr;
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+    symbol_table_alloc(&table);
+
+    ast_node_t *import_directive = root->children[0]->children[1];
+    munit_assert_int(import_directive->id, ==, NODE_IMPORT_DIRECTIVE);
+    munit_assert_size(table->len, ==, 0);
+
+    error_t *err = symbol_table_update(table, import_directive);
+    munit_assert_null(err);
+    munit_assert_size(table->len, ==, 1);
+
+    symbol_t *symbol = symbol_table_lookup(table, "test");
+    munit_assert_not_null(symbol);
+    munit_assert_int(SYMBOL_IMPORT, ==, symbol->kind);
+    munit_assert_ptr_equal(import_directive, symbol->node);
+    munit_assert_string_equal(symbol->name, "test");
+
+    symbol_table_free(table);
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+void test_symbol_update(const char *name, ast_node_t *first, symbol_kind_t first_kind, ast_node_t *second,
+                        symbol_kind_t second_kind, bool should_succeed, bool should_update) {
+    symbol_table_t *table = nullptr;
+    symbol_table_alloc(&table);
+
+    munit_assert_size(table->len, ==, 0);
+    error_t *err = symbol_table_update(table, first);
+    munit_assert_null(err);
+    munit_assert_size(table->len, ==, 1);
+
+    symbol_t *symbol = symbol_table_lookup(table, name);
+    munit_assert_not_null(symbol);
+    munit_assert_int(first_kind, ==, symbol->kind);
+    munit_assert_ptr_equal(first, symbol->node);
+    munit_assert_string_equal(symbol->name, name);
+
+    err = symbol_table_update(table, second);
+    if (should_succeed)
+        munit_assert_null(err);
+    else
+        munit_assert_ptr_equal(err, err_symbol_table_incompatible_symbols);
+    munit_assert_size(table->len, ==, 1);
+
+    symbol = symbol_table_lookup(table, name);
+    if (should_update) {
+        munit_assert_not_null(symbol);
+        munit_assert_int(second_kind, ==, symbol->kind);
+        munit_assert_ptr_equal(second, symbol->node);
+        munit_assert_string_equal(symbol->name, name);
+    } else {
+        munit_assert_not_null(symbol);
+        munit_assert_int(first_kind, ==, symbol->kind);
+        munit_assert_ptr_equal(first, symbol->node);
+        munit_assert_string_equal(symbol->name, name);
+    }
+
+    symbol_table_free(table);
+}
+
+MunitResult test_symbol_upgrade_valid(const MunitParameter params[], void *data) {
+    ast_node_t *root;
+    tokenlist_t *list;
+
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+
+    ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
+    ast_node_t *label = root->children[2];
+    ast_node_t *import_directive = root->children[0]->children[1];
+    ast_node_t *export_directive = root->children[1]->children[1];
+
+    // real upgrades
+    test_symbol_update("test", reference, SYMBOL_REFERENCE, label, SYMBOL_LOCAL, true, true);
+    test_symbol_update("test", reference, SYMBOL_REFERENCE, import_directive, SYMBOL_IMPORT, true, true);
+    test_symbol_update("test", reference, SYMBOL_REFERENCE, export_directive, SYMBOL_EXPORT, true, true);
+    test_symbol_update("test", label, SYMBOL_LOCAL, export_directive, SYMBOL_EXPORT, true, true);
+
+    // identity upgrades
+    test_symbol_update("test", reference, SYMBOL_REFERENCE, reference, SYMBOL_REFERENCE, true, false);
+    test_symbol_update("test", label, SYMBOL_LOCAL, label, SYMBOL_LOCAL, true, false);
+    test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_directive, SYMBOL_IMPORT, true, false);
+    test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_directive, SYMBOL_EXPORT, true, false);
+
+    // downgrades that are allowed and ignored
+    test_symbol_update("test", label, SYMBOL_LOCAL, reference, SYMBOL_REFERENCE, true, false);
+    test_symbol_update("test", import_directive, SYMBOL_IMPORT, reference, SYMBOL_REFERENCE, true, false);
+    test_symbol_update("test", export_directive, SYMBOL_EXPORT, reference, SYMBOL_REFERENCE, true, false);
+    test_symbol_update("test", export_directive, SYMBOL_EXPORT, label, SYMBOL_LOCAL, true, false);
+    test_symbol_update("test", import_directive, SYMBOL_IMPORT, label, SYMBOL_LOCAL, true, false);
+
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_upgrade_invalid(const MunitParameter params[], void *data) {
+    ast_node_t *root;
+    tokenlist_t *list;
+
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+
+    ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
+    ast_node_t *label = root->children[2];
+    ast_node_t *import_directive = root->children[0]->children[1];
+    ast_node_t *export_directive = root->children[1]->children[1];
+
+    // invalid upgrades
+    test_symbol_update("test", label, SYMBOL_LOCAL, import_directive, SYMBOL_IMPORT, false, false);
+    test_symbol_update("test", export_directive, SYMBOL_EXPORT, import_directive, SYMBOL_IMPORT, false, false);
+    test_symbol_update("test", import_directive, SYMBOL_IMPORT, export_directive, SYMBOL_EXPORT, false, false);
+
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitResult test_symbol_add_export(const MunitParameter params[], void *data) {
+    (void)params;
+    (void)data;
+    ast_node_t *root;
+    tokenlist_t *list;
+    symbol_table_t *table = nullptr;
+    symbols_setup_test(&root, &list, "tests/input/symbols.asm");
+    symbol_table_alloc(&table);
+
+    ast_node_t *export_directive = root->children[1]->children[1];
+    munit_assert_int(export_directive->id, ==, NODE_EXPORT_DIRECTIVE);
+    munit_assert_size(table->len, ==, 0);
+
+    error_t *err = symbol_table_update(table, export_directive);
+    munit_assert_null(err);
+    munit_assert_size(table->len, ==, 1);
+
+    symbol_t *symbol = symbol_table_lookup(table, "test");
+    munit_assert_not_null(symbol);
+    munit_assert_int(SYMBOL_EXPORT, ==, symbol->kind);
+    munit_assert_ptr_equal(export_directive, symbol->node);
+    munit_assert_string_equal(symbol->name, "test");
+
+    symbol_table_free(table);
+    ast_node_free(root);
+    tokenlist_free(list);
+    return MUNIT_OK;
+}
+
+MunitTest symbols_tests[] = {
+    {"/table_alloc",        test_symbol_table_alloc,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/table_lookup_empty", test_symbol_table_lookup_empty, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/add_reference",      test_symbol_add_reference,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/add_label",          test_symbol_add_label,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/add_import",         test_symbol_add_import,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/add_export",         test_symbol_add_export,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/upgrade_valid",      test_symbol_upgrade_valid,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {"/upgrade_invalid",    test_symbol_upgrade_invalid,    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
+    {nullptr,               nullptr,                        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
+};
@@ -2,19 +2,17 @@

 set -euo pipefail

-# Start with static analysis
-make clean all
-mkdir -p reports/static-analysis
-scan-build -o reports/static-analysis/ -plist-html --status-bugs make all
+make analyze debug asan msan

-# Run the sanitizer builds and valgrind
-make clean sanitize all
+ASAN=build/asan/oas
+MSAN=build/msan/oas
+DEBUG=build/debug/oas

 ARGUMENTS=("tokens" "text" "ast")
 while IFS= read -r INPUT_FILE; do
    for ARGS in ${ARGUMENTS[@]}; do
-        ./oas-asan $ARGS $INPUT_FILE > /dev/null
-        ./oas-msan $ARGS $INPUT_FILE > /dev/null
-        valgrind --leak-check=full --error-exitcode=1 ./oas $ARGS $INPUT_FILE >/dev/null
+        $ASAN $ARGS $INPUT_FILE > /dev/null
+        $MSAN $ARGS $INPUT_FILE > /dev/null
+        valgrind --leak-check=full --error-exitcode=1 $DEBUG $ARGS $INPUT_FILE >/dev/null
    done
 done < <(find tests/input/ -type f -name '*.asm')
Author	SHA1	Message	Date
omicron	d7a6f39068	Add more symbols tests Validate the build / validate-build (push) Successful in 31s Details	2025-04-08 21:57:59 +02:00
omicron	8c62924b63	expose symbols table errors in the header	2025-04-08 21:57:28 +02:00
omicron	347512e599	Add tests for all 4 kinds of symbols being added Validate the build / validate-build (push) Successful in 33s Details	2025-04-08 21:02:49 +02:00
omicron	abf5e3063a	Implement support for import and export directives in the symbols table	2025-04-08 21:01:59 +02:00
omicron	ac45c1ea84	Add symbols tests	2025-04-08 20:38:08 +02:00
omicron	b514f5d78b	Fix bug in symbol_table_add where it did not increment the length	2025-04-08 20:37:09 +02:00
omicron	2710784872	fix parse_immediate to accept label_reference instead of identifier	2025-04-07 12:50:39 +02:00
omicron	b38b5d220a	Add .import and .export to the input test file Validate the build / validate-build (push) Successful in 41s Details	2025-04-07 10:52:49 +02:00
omicron	9549951fe1	Make main properly return with failure on parsing errors	2025-04-07 10:50:57 +02:00
omicron	0afc1d869a	Add .import and .export directive to the grammar and parser	2025-04-07 10:49:57 +02:00
omicron	d3141e764c	initial symbol table implementation	2025-04-06 20:55:04 +02:00
omicron	2bea87b39a	Run tests in the validate gitea action Validate the build / validate-build (push) Successful in 29s Details	2025-04-06 09:23:25 +02:00
omicron	2eb7b3c2f1	use llvm to generate test coverage	2025-04-06 09:17:51 +02:00
omicron	f1f4c93a8e	Fix bug in lexer_next_number not correctly tracking character number Validate the build / validate-build (push) Successful in 28s Details When a number has a suffix the lexer state didn't record the number of characters consumed for this suffix. This made the lexer state be 2-3 characters short in its line location reporting until it encountered a newline character. It did not otherwise corrupt the state of the lexer.	2025-04-05 01:41:40 +02:00
omicron	27099c9899	Add initial unit tests - Add µnit source and header files - Add test target to the build system - Implement a thorough lexer test suite - Implement a minimal AST test suite	2025-04-05 01:37:04 +02:00
omicron	3fead8017b	Rename lexer errors	2025-04-05 01:37:04 +02:00
omicron	af66790cff	Clean up error definitions, location and expose them in the headers - Exposes all errors in the header file so any user of the api can test for the specific error conditions - Mark all static error pointers as const - Move generic errors into error.h - Name all errors err_modulename_* for errors that belong to a specific module and err_* for generic errors.	2025-04-05 01:37:04 +02:00
omicron	cb8768b1d0	Make clangd aware of the _POSIX_C_SOURCE define in the build system	2025-04-05 01:37:04 +02:00
omicron	1571c52012	Add some building documentation that clarifies the make targets Validate the build / validate-build (push) Successful in 26s Details	2025-04-04 02:18:11 +02:00
omicron	0f9ced8eb1	Rework the build system to be more modular Split most of the work off into make/base.mk and allow for easy wrappers to be created around that that can build with different instrumentation in their own build directory. Create wrappers for the following: - release build - debug build - afl++ fuzzing build - static analysis with clang - clang memory sanitizer - clang address/undefined sanitizer	2025-04-04 02:18:02 +02:00