Add regression test for parse zero operands at eof

Prune the parse tree of NODE_NEWLINE after parsing succeeds
Fix grammar not being able to disambiguate some instructions
2025-04-16 13:16:55 +02:00 · 2025-04-16 13:01:02 +02:00 · 2025-04-16 12:34:44 +02:00 · 2025-04-16 12:13:02 +02:00 · 2025-04-09 01:17:09 +02:00 · 2025-04-09 01:15:51 +02:00
36 changed files with 3865 additions and 85 deletions
--- a/.clangd
+++ b/.clangd
@@ -1,2 +1,2 @@
 CompileFlags:
-  Add: ["-std=c23", "-x", "c"]
+  Add: ["-std=c23", "-x", "c", "-D_POSIX_C_SOURCE=200809L"]
--- a/.gitea/workflows/validate.yaml
+++ b/.gitea/workflows/validate.yaml
@@ -34,3 +34,7 @@ jobs:
      - name: make validate
        run: |
          make validate
      - name: make test
        run: |
          make test
--- a/72
+++ b/72
@@ -1,54 +1,46 @@
-.PHONY: all clean clean-objects clean-reports run sanitize validate fuzz
+.PHONY: all clean distclean release debug afl asan msan validate analyze fuzz
-CC=clang
+debug: 
-LD=clang
+	make -rRf make/debug.mk all
 CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L
 LDFLAGS?=
-SOURCES = $(shell find src/ -type f -name '*.c')
+all: debug release afl asan msan
 OBJECTS = $(SOURCES:.c=.o)
 DEPENDENCIES = $(SOURCES:.c=.d)
 TARGET?=oas
 OUTPUTS=oas oas-asan oas-msan oas-afl
 RUNARGUMENTS?=ast tests/input/valid.asm
 all: $(TARGET)
-run: $(TARGET)
+release: 
-	./$(TARGET) $(RUNARGUMENTS)
+	make -rRf make/release.mk all
 afl:
 	make -rRf make/afl.mk all
 fuzz:
-	make CC="afl-clang-fast" LD="afl-clang-fast" TARGET="oas-afl" clean-objects all
+	make -rRf make/afl.mk fuzz
 	make clean-objects
 	mkdir -p reports/afl
 	afl-fuzz -i tests/input -o reports/afl -m none -- ./oas-afl -tokens @@
-sanitize:
+asan:
-	make CFLAGS="$(CFLAGS) -fsanitize=address,undefined" \
+	make -rRf make/asan.mk all
 		LDFLAGS="-fsanitize=address,undefined" \
 		TARGET="oas-asan" clean-objects all
 	make CFLAGS="$(CFLAGS) -fsanitize=memory -fsanitize-memory-track-origins=2" \
 		LDFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2" \
 		TARGET="oas-msan" clean-objects all 
 	make clean-objects
-validate:
+msan:
 	make -rRf make/msan.mk all
 validate: asan msan debug
 	./validate.sh
-$(TARGET): $(OBJECTS)
+analyze:
-	$(LD) $(LDFLAGS) -o $@ $^
+	make -rRf make/analyze.mk clean all
-%.o: %.c
+test:
-	$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
+	make -rRf make/test.mk test
-include $(DEPENDENCIES)
+clean:
 	make -rRf make/release.mk clean
 	make -rRf make/debug.mk clean
 	make -rRf make/afl.mk clean
 	make -rRf make/msan.mk clean
 	make -rRf make/asan.mk clean
 	make -rRf make/analyze.mk clean
 	make -rRf make/test.mk clean
 	rm -rf build/
-clean-objects:
+distclean: clean
-	rm -f $(OBJECTS) $(DEPENDENCIES)
+	make -rRf make/afl.mk distclean
-
+	make -rRf make/analyze.mk distclean
 clean-reports:
 	rm -rf reports/
 clean: clean-objects
 	rm -f $(TARGET) $(OUTPUTS)
--- a/doc/BUILDING.md
+++ b/doc/BUILDING.md
@@ -0,0 +1,29 @@
 # Building
 To build oas in the default configuration you just need (gnu) make and a
 sufficiently modern clang.
 ```
 make
 ```
 ## Make targets
 There are a number of make targets available to build various instrumented
 builds that are used in validation, analysis and sanitizing. Some of these may
 require extra dependencies.
 - `debug`: Creates the debug build in `build/debug`. This is the default target.
 - `all`: Builds all binary executable targets. These are
   `debug`, `release`, `msan`, `asan` and `afl`. All executables can be found
   in `build/` in a subdirectory matching their target names.
 - `release`: Creates the release build in `build/release`
 - `afl`: Creates a build with AFL++ instrumentation for fuzzing
 - `fuzz`: Starts the fuzzer with the instrumented afl executable
 - `asan`: builds with the address and undefined clang sanitizers
 - `msan`: builds with the memory clang sanitizer
 - `validate`: Builds `debug`, `msan`, and `asan` targets, then runs the
   validation script. This script executes the sanitizer targets and runs
   Valgrind on the debug target across multiple modes and test input files.
--- a/doc/parser_grammar.txt
+++ b/doc/parser_grammar.txt
@@ -1,13 +1,13 @@
 <program>   ::= <statement>*
-<statement> ::= <label> | <directive> | <instruction>
+<statement> ::= <label> | <directive> | <instruction> | <newline>
 <label> ::= <identifier> <colon>
-<directive> ::= <dot> <section_directive>
+<directive> ::= <dot> <section_directive> <newline>
 <section_directive> ::= "section" <identifier>
-<instruction> ::= <identifier> <operands>
+<instruction> ::= <identifier> <operands> <newline>
 <operands> ::= <operand> ( <comma> <operand> )*
--- a/make/afl.mk
+++ b/make/afl.mk
@@ -0,0 +1,14 @@
 .PHONY: fuzz distclean
 CC=afl-clang-fast
 LD=afl-clang-fast
 BUILD_DIR=build/afl/
 -include make/base.mk
 fuzz: $(BUILD_DIR)$(TARGET)
 	mkdir -p reports/afl
 	afl-fuzz -i tests/input -o reports/afl -m none -- ./$< -tokens @@
 distclean: clean
 	rm -rf reports/afl
--- a/make/analyze.mk
+++ b/make/analyze.mk
@@ -0,0 +1,9 @@
 BUILD_DIR=build/analyze/
 -include make/base.mk
 analyze:
 	mkdir -p reports/static-analysis
 	scan-build -o reports/static-analysis/ -plist-html --status-bugs make -rRf make/analyze.mk all
 distclean: clean
 	rm -rf reports/static-analysis
--- a/make/asan.mk
+++ b/make/asan.mk
@@ -0,0 +1,5 @@
 CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fsanitize=address,undefined
 LDFLAGS=-fsanitize=address,undefined
 BUILD_DIR=build/asan/
 -include make/base.mk
--- a/make/base.mk
+++ b/make/base.mk
@@ -0,0 +1,27 @@
 .PHONY: all clean
 CC?=clang
 LD?=clang
 CFLAGS?=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L
 LDFLAGS?=
 BUILD_DIR?=build/debug/
 SOURCES?=$(shell find src/ -type f -name '*.c')
 OBJECTS=$(patsubst %.c,$(BUILD_DIR)%.o,$(SOURCES))
 DEPENDENCIES=$(OBJECTS:.o=.d)
 TARGET?=oas
 all: $(BUILD_DIR)$(TARGET)
 $(BUILD_DIR)$(TARGET): $(OBJECTS)
 	$(LD) $(LDFLAGS) -o $@ $^
 $(BUILD_DIR)%.o: %.c
 	mkdir -p $(dir $@)
 	$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
 -include $(DEPENDENCIES)
 clean:
 	rm -rf $(BUILD_DIR)
--- a/make/debug.mk
+++ b/make/debug.mk
@@ -0,0 +1 @@
 -include make/base.mk
--- a/make/msan.mk
+++ b/make/msan.mk
@@ -0,0 +1,5 @@
 CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fsanitize=memory
 LDFLAGS=-fsanitize=memory
 BUILD_DIR=build/msan/
 -include make/base.mk
--- a/make/release.mk
+++ b/make/release.mk
@@ -0,0 +1,5 @@
 CFLAGS?=-Wall -Wextra -Wpedantic -O2 -std=c23 -flto -fomit-frame-pointer -DNDEBUG -D_POSIX_C_SOURCE=200809L
 LDFLAGS?=-flto -s -Wl,--gc-sections
 BUILD_DIR?=build/release/
 -include make/base.mk
--- a/make/test.mk
+++ b/make/test.mk
@@ -0,0 +1,21 @@
 .PHONY: test
 CFLAGS?=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fprofile-instr-generate -fcoverage-mapping
 LDFLAGS?=-fprofile-instr-generate
 BUILD_DIR=build/test/
 TARGET=oas-tests
 SOURCES = $(filter-out src/main.c, $(shell find src/ tests/ -type f -name '*.c'))
 -include make/base.mk
 test: $(BUILD_DIR)$(TARGET)
 	mkdir -p reports/coverage
 	LLVM_PROFILE_FILE="reports/coverage/tests.profraw" $(BUILD_DIR)$(TARGET)
 	llvm-profdata merge -sparse reports/coverage/tests.profraw -o reports/coverage/tests.profdata
 	llvm-cov show $(BUILD_DIR)$(TARGET) -instr-profile=reports/coverage/tests.profdata -format=html -output-dir=reports/coverage/html -ignore-filename-regex="tests/.*"
 	@echo "--"
 	@echo "Test coverage:"
 	@echo "file://$$(realpath reports/coverage/html/index.html)"
 	@echo "--"
 clean:
 	rm -rf reports/coverage
--- a/src/ast.c
+++ b/src/ast.c
@@ -3,7 +3,7 @@
 #include <assert.h>
 #include <string.h>
-error_t *err_node_children_cap = &(error_t){
+error_t *const err_ast_children_cap = &(error_t){
    .message = "Failed to increase ast node children, max capacity reached"};
 error_t *ast_node_alloc(ast_node_t **output) {
@@ -50,7 +50,7 @@ error_t *ast_node_alloc_children(ast_node_t *node) {
 error_t *ast_node_grow_cap(ast_node_t *node) {
    if (node->cap >= node_max_children_cap) {
-        return err_node_children_cap;
+        return err_ast_children_cap;
    }
    size_t new_cap = node->cap * 2;
@@ -157,6 +157,8 @@ const char *ast_node_id_to_cstr(node_id_t id) {
        return "NODE_ASTERISK";
    case NODE_DOT:
        return "NODE_DOT";
    case NODE_NEWLINE:
        return "NODE_NEWLINE";
    }
    assert(!"Unreachable, weird node id" && id);
    __builtin_unreachable();
@@ -172,7 +174,8 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
    }
    printf("%s", ast_node_id_to_cstr(node->id));
-    if (node->token_entry && node->token_entry->token.value) {
+    if (node->token_entry && node->token_entry->token.value &&
        node->id != NODE_NEWLINE) {
        printf(" \"%s\"", node->token_entry->token.value);
    }
    printf("\n");
@@ -185,3 +188,18 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
 void ast_node_print(ast_node_t *node) {
    ast_node_print_internal(node, 0);
 }
 void ast_node_prune(ast_node_t *node, node_id_t id) {
    size_t new_len = 0;
    for (size_t i = 0; i < node->len; i++) {
        auto child = node->children[i];
        if (child->id == id) {
            ast_node_free(child);
            continue;
        }
        ast_node_prune(child, id);
        node->children[new_len] = child;
        new_len++;
    }
    node->len = new_len;
 }
--- a/src/ast.h
+++ b/src/ast.h
@@ -7,6 +7,8 @@
 #include <stddef.h>
 #include <stdint.h>
 extern error_t *const err_ast_children_cap;
 typedef enum node_id {
    NODE_INVALID,
@@ -48,6 +50,7 @@ typedef enum node_id {
    NODE_MINUS,
    NODE_ASTERISK,
    NODE_DOT,
    NODE_NEWLINE,
 } node_id_t;
 typedef struct ast_node ast_node_t;
@@ -117,4 +120,17 @@ error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child);
 */
 void ast_node_print(ast_node_t *node);
 /**
 * Prune the children with a given id
 *
 * The tree is recursively visited and all child nodes of a given ID are pruned
 * completely. If a node has the giver id, it will get removed along wih all its
 * children, even if some of those children have different ids. The root node id
 * is never checked so the tree is guaranteed to remain and allocated valid.
 *
 * @param node The root of the tree you want to prune
 * @param id The id of the nodes you want to prune
 */
 void ast_node_prune(ast_node_t *node, node_id_t id);
 #endif // INCLUDE_SRC_AST_H_
--- a/src/error.c
+++ b/src/error.c
@@ -9,8 +9,13 @@ error_t *const err_errorf_alloc = &(error_t){
 error_t *const err_errorf_length = &(error_t){
    .message =
        "Formatting of another error failed to determine the error length"};
 error_t *const err_eof =
    &(error_t){.message = "Read failed because EOF is reached"};
-error_t *err_allocation_failed =
+error_t *const err_unknown_read_failure =
    &(error_t){.message = "Unknown read error"};
 error_t *const err_allocation_failed =
    &(error_t){.message = "Memory allocation failed"};
 error_t *errorf(const char *fmt, ...) {
--- a/src/error.h
+++ b/src/error.h
@@ -19,6 +19,8 @@ static inline void error_free(error_t *err) {
 }
 /* Some global errors */
-extern error_t *err_allocation_failed;
+extern error_t *const err_allocation_failed;
 extern error_t *const err_eof;
 extern error_t *const err_unknown_read_failure;
 #endif // INCLUDE_SRC_ERROR_H_
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -5,21 +5,16 @@
 #include <errno.h>
 #include <string.h>
-error_t *err_lexer_already_open = &(error_t){
+error_t *const err_lexer_already_open = &(error_t){
    .message =
        "Can't open on a lexer object that is already opened. Close it first."};
-error_t *err_prefix_too_large =
+error_t *const err_lexer_prefix_too_large =
    &(error_t){.message = "Prefix too large for internal lexer buffer"};
-error_t *err_buffer_underrun = &(error_t){
+error_t *const err_lexer_buffer_underrun = &(error_t){
    .message = "Buffer does not contain enough characters for lexer_consume_n"};
-error_t *err_consume_excessive_length =
+error_t *const err_lexer_consume_excessive_length =
    &(error_t){.message = "Too many valid characters to consume"};
 error_t *err_eof =
    &(error_t){.message = "Can't read from file because EOF is reached"};
 error_t *err_unknown_read = &(error_t){.message = "Unknown read error"};
 typedef bool (*char_predicate_t)(char);
 const char *lexer_token_id_to_cstr(lexer_token_id_t id) {
@@ -112,7 +107,7 @@ error_t *lexer_fill_buffer(lexer_t *lex) {
        if (n == 0 && ferror(lex->fp))
            return errorf("Read error: %s", strerror(errno));
        if (n == 0)
-            return err_unknown_read;
+            return err_unknown_read_failure;
        remaining -= n;
        lex->buffer_count += n;
    }
@@ -182,9 +177,9 @@ error_t *lexer_not_implemented(lexer_t *lex, lexer_token_t *token) {
 error_t *lexer_consume_n(lexer_t *lex, const size_t len,
                         char buffer[static len], const size_t n) {
    if (lex->buffer_count < n)
-        return err_buffer_underrun;
+        return err_lexer_buffer_underrun;
    if (n > len)
-        return err_consume_excessive_length;
+        return err_lexer_consume_excessive_length;
    memcpy(buffer, lex->buffer, n);
    lexer_shift_buffer(lex, n);
@@ -229,7 +224,7 @@ error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n],
                (lex->buffer_count > 0 && is_valid(lex->buffer[0]));
        if (have_more_characters && *n_consumed == buffer_size) {
-            return err_consume_excessive_length;
+            return err_lexer_consume_excessive_length;
        }
    } while (have_more_characters);
    return nullptr;
@@ -299,11 +294,12 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
    error_t *err = lexer_consume(lex, max_number_length - so_far,
                                 buffer + so_far, is_valid, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Number length exceeds the maximum of 128 characters";
    }
    lex->character_number += n;
    so_far += n;
    if (n == 0) {
        token->id = TOKEN_ERROR;
@@ -329,14 +325,15 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
    if (suffix_length > 0) {
        err = lexer_consume_n(lex, max_number_length - so_far, buffer + so_far,
                              suffix_length);
-        if (err == err_consume_excessive_length) {
+        if (err == err_lexer_consume_excessive_length) {
            token->id = TOKEN_ERROR;
            token->explanation =
                "Number length exceeds the maximum of 128 characters";
        } else {
            lex->character_number += suffix_length;
        }
    }
    lex->character_number += n;
    token->value = strdup(buffer);
    return nullptr;
 }
@@ -406,7 +403,7 @@ error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) {
    error_t *err = lexer_consume(lex, max_identifier_length, buffer,
                                 is_identifier_character, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Identifier length exceeds the maximum of 128 characters";
@@ -449,7 +446,7 @@ error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) {
    error_t *err = lexer_consume(lex, max_whitespace_length, buffer,
                                 is_whitespace_character, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Whitespace length exceeds the maximum of 1024 characters";
@@ -484,7 +481,7 @@ error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) {
    error_t *err = lexer_consume(lex, max_comment_length, buffer,
                                 is_comment_character, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Comment length exceeds the maximum of 1024 characters";
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -5,7 +5,10 @@
 #include <stddef.h>
 #include <stdio.h>
-extern error_t *err_eof;
+extern error_t *const err_lexer_already_open;
 extern error_t *const err_lexer_prefix_too_large;
 extern error_t *const err_lexer_buffer_underrun;
 extern error_t *const err_lexer_consume_excessive_length;
 typedef enum {
    TOKEN_ERROR,
--- a/src/parser/combinators.c
+++ b/src/parser/combinators.c
@@ -1,4 +1,5 @@
 #include "combinators.h"
 #include "util.h"
 // Parse a list of the given parser delimited by the given token id. Does not
 // store the delimiters in the parent node
@@ -122,5 +123,12 @@ parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
        }
        current = result.next;
    }
    // token stream ended before we matched all parsers
    if (parser != nullptr) {
        ast_node_free(all);
        return parse_no_match();
    }
    return parse_success(all, current);
 }
--- a/src/parser/parser.c
+++ b/src/parser/parser.c
@@ -120,21 +120,28 @@ parse_result_t parse_section_directive(tokenlist_entry_t *current) {
 }
 parse_result_t parse_directive(tokenlist_entry_t *current) {
-    parser_t parsers[] = {parse_dot, parse_section_directive, nullptr};
+    parser_t parsers[] = {parse_dot, parse_section_directive, parse_newline,
                          nullptr};
    return parse_consecutive(current, NODE_DIRECTIVE, parsers);
 }
 parse_result_t parse_instruction(tokenlist_entry_t *current) {
-    parser_t parsers[] = {parse_identifier, parse_operands, nullptr};
+    parser_t parsers[] = {parse_identifier, parse_operands, parse_newline,
                          nullptr};
    return parse_consecutive(current, NODE_INSTRUCTION, parsers);
 }
 parse_result_t parse_statement(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
-                          nullptr};
+                          parse_newline, nullptr};
    return parse_any(current, parsers);
 }
 parse_result_t parse(tokenlist_entry_t *current) {
-    return parse_many(current, NODE_PROGRAM, true, parse_statement);
+    current = tokenlist_skip_trivia(current);
    parse_result_t result =
        parse_many(current, NODE_PROGRAM, true, parse_statement);
    if (result.node != nullptr)
        ast_node_prune(result.node, NODE_NEWLINE);
    return result;
 }
--- a/src/parser/primitives.c
+++ b/src/parser/primitives.c
@@ -62,6 +62,10 @@ parse_result_t parse_dot(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
 }
 parse_result_t parse_newline(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_NEWLINE, NODE_NEWLINE, nullptr);
 }
 parse_result_t parse_label_reference(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
                       nullptr);
--- a/src/parser/primitives.h
+++ b/src/parser/primitives.h
@@ -18,6 +18,7 @@ parse_result_t parse_plus(tokenlist_entry_t *current);
 parse_result_t parse_minus(tokenlist_entry_t *current);
 parse_result_t parse_asterisk(tokenlist_entry_t *current);
 parse_result_t parse_dot(tokenlist_entry_t *current);
 parse_result_t parse_newline(tokenlist_entry_t *current);
 parse_result_t parse_label_reference(tokenlist_entry_t *current);
 /* These are "primitives" with a different name and some extra validation on top
--- a/src/parser/util.c
+++ b/src/parser/util.c
@@ -1,7 +1,7 @@
 #include "util.h"
 #include "../tokenlist.h"
-error_t *err_parse_no_match =
+error_t *const err_parse_no_match =
    &(error_t){.message = "parsing failed to find the correct token sequence"};
 parse_result_t parse_error(error_t *err) {
--- a/src/parser/util.h
+++ b/src/parser/util.h
@@ -21,6 +21,6 @@ parse_result_t parse_token(tokenlist_entry_t *current,
                           token_validator_t is_valid);
 parse_result_t parse_result_wrap(node_id_t id, parse_result_t result);
-extern error_t *err_parse_no_match;
+extern error_t *const err_parse_no_match;
 #endif // INCLUDE_PARSER_UTIL_H_
--- a/src/tokenlist.c
+++ b/src/tokenlist.c
@@ -86,7 +86,6 @@ bool is_trivia(tokenlist_entry_t *trivia) {
    switch (trivia->token.id) {
    case TOKEN_WHITESPACE:
    case TOKEN_COMMENT:
    case TOKEN_NEWLINE:
        return true;
    default:
        return false;
--- a/tests/.clang-format
+++ b/tests/.clang-format
@@ -0,0 +1,6 @@
 BasedOnStyle:    LLVM
 IndentWidth:     4
 Cpp11BracedListStyle: true
 AlignArrayOfStructures: Left
 AllowShortFunctionsOnASingleLine: Empty
 ColumnLimit: 120
--- a/tests/ast.c
+++ b/tests/ast.c
@@ -0,0 +1,22 @@
 #include "../src/ast.h"
 #include "munit.h"
 MunitResult test_ast_node_alloc(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    ast_node_t *node = nullptr;
    error_t *err = ast_node_alloc(&node);
    munit_assert_ptr_not_null(node);
    munit_assert_ptr_null(err);
    ast_node_free(node);
    return MUNIT_OK;
 }
 MunitTest ast_tests[] = {
    {"/node_alloc", test_ast_node_alloc, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {nullptr,       nullptr,             nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
 };
--- a/tests/input/regression/test_no_operands_eof.asm
+++ b/tests/input/regression/test_no_operands_eof.asm
@@ -0,0 +1,5 @@
 ; regression test for two issues:
 ;  - parsing two zero operand instructions in a row
 ;  - a zero operand instruction just before eof
    syscall
    ret
--- a/tests/input/regression/test_trivia_head.asm
+++ b/tests/input/regression/test_trivia_head.asm
@@ -0,0 +1,5 @@
 ; sample program with trivia on the head of the tokenlist
 _start:
    xor rax, rax
    call exit
--- a/tests/lexer.c
+++ b/tests/lexer.c
@@ -0,0 +1,896 @@
 #include "../src/lexer.h"
 #include "../src/error.h"
 #include "munit.h"
 #include <string.h>
 void lexer_setup_memory_test(lexer_t *lex, const char *input) {
    munit_assert_null(lex->fp);
    FILE *stream = fmemopen((void *)input, strlen(input), "rb");
    munit_assert_not_null(stream);
    lex->fp = stream;
    lex->line_number = 0;
    lex->character_number = 0;
    lex->buffer_count = 0;
 }
 void lexer_expect_one_token(lexer_t *lex, lexer_token_id_t id, const char *value, size_t line, size_t column) {
    lexer_token_t token = {};
    error_t *err = lexer_next(lex, &token);
    munit_assert_null(err);
    munit_assert_int(token.id, ==, id);
    munit_assert_string_equal(token.value, value);
    munit_assert_int(token.line_number, ==, line);
    munit_assert_int(token.character_number, ==, column);
    lexer_token_cleanup(&token);
 }
 void lexer_expect_eof(lexer_t *lex) {
    lexer_token_t token = {};
    error_t *err = lexer_next(lex, &token);
    munit_assert_ptr_equal(err, err_eof);
 }
 void lexer_test_one_token(lexer_token_id_t id, const char *value) {
    lexer_t lex = {};
    lexer_setup_memory_test(&lex, value);
    lexer_expect_one_token(&lex, id, value, 0, 0);
    lexer_expect_eof(&lex);
    lexer_close(&lex);
 }
 MunitResult test_lexer_identifier(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_IDENTIFIER, "identifier");
    lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier");
    lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier123_55");
    return MUNIT_OK;
 }
 typedef struct token_data {
    lexer_token_id_t id;
    char *value;
    size_t line;
    size_t column;
 } token_data_t;
 typedef struct boundary {
    const char *input;
    token_data_t first;
    token_data_t second;
 } boundary_t;
 void test_lexer_boundary(boundary_t boundaries[]) {
    for (size_t i = 0; boundaries[i].input; ++i) {
        auto boundary = boundaries[i];
        auto first = boundary.first;
        auto second = boundary.second;
        lexer_t lex = {};
        lexer_setup_memory_test(&lex, boundary.input);
        lexer_expect_one_token(&lex, first.id, first.value, first.line, first.column);
        lexer_expect_one_token(&lex, second.id, second.value, second.line, second.column);
        lexer_expect_eof(&lex);
        lexer_close(&lex);
    }
 }
 MunitResult test_lexer_identifier_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"id:",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COLON, ":", 0, 2}         },
        {"id[",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_LBRACKET, "[", 0, 2}      },
        {"id]",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_RBRACKET, "]", 0, 2}      },
        {"id+",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_PLUS, "+", 0, 2}          },
        {"id-",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_MINUS, "-", 0, 2}         },
        {"id*",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_ASTERISK, "*", 0, 2}      },
        {"id.",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_DOT, ".", 0, 2}           },
        {"id;comment", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COMMENT, ";comment", 0, 2}},
        {"id\n",       {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 2}      },
        {"id\r\n",     {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 2}    },
        {"id ",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 2}    },
        {"id\t",       {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 2}   },
        {nullptr,      {},                             {}                               },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_decimal(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_DECIMAL, "123");
    lexer_test_one_token(TOKEN_DECIMAL, "0");
    lexer_test_one_token(TOKEN_DECIMAL, "42");
    return MUNIT_OK;
 }
 MunitResult test_lexer_decimal_with_suffix(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_DECIMAL, "123:8");
    lexer_test_one_token(TOKEN_DECIMAL, "0:16");
    lexer_test_one_token(TOKEN_DECIMAL, "42:32");
    lexer_test_one_token(TOKEN_DECIMAL, "69:64");
    return MUNIT_OK;
 }
 MunitResult test_lexer_hexadecimal(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x0");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xabcdef");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABCDEF");
    return MUNIT_OK;
 }
 MunitResult test_lexer_hexadecimal_with_suffix(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123:8");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD:16");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABC:32");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xffff:64");
    return MUNIT_OK;
 }
 MunitResult test_lexer_octal(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_OCTAL, "0o777");
    lexer_test_one_token(TOKEN_OCTAL, "0o0");
    lexer_test_one_token(TOKEN_OCTAL, "0o123");
    return MUNIT_OK;
 }
 MunitResult test_lexer_octal_with_suffix(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_OCTAL, "0o777:8");
    lexer_test_one_token(TOKEN_OCTAL, "0o123:16");
    lexer_test_one_token(TOKEN_OCTAL, "0o777:32");
    lexer_test_one_token(TOKEN_OCTAL, "0o123:64");
    return MUNIT_OK;
 }
 MunitResult test_lexer_binary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_BINARY, "0b101");
    lexer_test_one_token(TOKEN_BINARY, "0b0");
    lexer_test_one_token(TOKEN_BINARY, "0b1");
    lexer_test_one_token(TOKEN_BINARY, "0b01010101");
    return MUNIT_OK;
 }
 MunitResult test_lexer_binary_with_suffix(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_BINARY, "0b101:8");
    lexer_test_one_token(TOKEN_BINARY, "0b0:16");
    lexer_test_one_token(TOKEN_BINARY, "0b1:32");
    lexer_test_one_token(TOKEN_BINARY, "0b01010101:64");
    return MUNIT_OK;
 }
 MunitResult test_lexer_colon(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_COLON, ":");
    return MUNIT_OK;
 }
 MunitResult test_lexer_comma(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_COMMA, ",");
    return MUNIT_OK;
 }
 MunitResult test_lexer_lbracket(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_LBRACKET, "[");
    return MUNIT_OK;
 }
 MunitResult test_lexer_rbracket(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_RBRACKET, "]");
    return MUNIT_OK;
 }
 MunitResult test_lexer_plus(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_PLUS, "+");
    return MUNIT_OK;
 }
 MunitResult test_lexer_minus(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_MINUS, "-");
    return MUNIT_OK;
 }
 MunitResult test_lexer_asterisk(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_ASTERISK, "*");
    return MUNIT_OK;
 }
 MunitResult test_lexer_dot(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_DOT, ".");
    return MUNIT_OK;
 }
 MunitResult test_lexer_comment(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_COMMENT, ";This is a comment");
    lexer_test_one_token(TOKEN_COMMENT, "; Another comment");
    lexer_test_one_token(TOKEN_COMMENT, ";");
    return MUNIT_OK;
 }
 MunitResult test_lexer_whitespace(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_WHITESPACE, " ");
    lexer_test_one_token(TOKEN_WHITESPACE, "  ");
    lexer_test_one_token(TOKEN_WHITESPACE, "\t");
    lexer_test_one_token(TOKEN_WHITESPACE, " \t ");
    return MUNIT_OK;
 }
 MunitResult test_lexer_newlines(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    // Test simple newline
    lexer_t lex = {};
    lexer_setup_memory_test(&lex, "\n");
    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 0);
    lexer_expect_eof(&lex);
    lexer_close(&lex);
    // Test Windows-style newline
    lexer_t lex2 = {};
    lexer_setup_memory_test(&lex2, "\r\n");
    lexer_expect_one_token(&lex2, TOKEN_NEWLINE, "\r\n", 0, 0);
    lexer_expect_eof(&lex2);
    lexer_close(&lex2);
    return MUNIT_OK;
 }
 MunitResult test_lexer_line_numbers(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_t lex = {};
    lexer_setup_memory_test(&lex, "a\nb\nc");
    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "a", 0, 0);
    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 1);
    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "b", 1, 0);
    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 1, 1);
    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "c", 2, 0);
    lexer_expect_eof(&lex);
    lexer_close(&lex);
    return MUNIT_OK;
 }
 MunitResult test_lexer_decimal_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"123,",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMA, ",", 0, 3}      },
        {"123:",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COLON, ":", 0, 3}      },
        {"123[",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 3}   },
        {"123]",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 3}   },
        {"123+",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_PLUS, "+", 0, 3}       },
        {"123-",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_MINUS, "-", 0, 3}      },
        {"123*",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 3}   },
        {"123.",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_DOT, ".", 0, 3}        },
        {"123;",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMENT, ";", 0, 3}    },
        {"123\n",   {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 3}   },
        {"123\r\n", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 3} },
        {"123 ",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 3} },
        {"123\t",   {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 3}},
        {nullptr,   {},                           {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_hexadecimal_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"0x123,",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
        {"0x123:",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
        {"0x123[",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
        {"0x123]",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
        {"0x123+",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
        {"0x123-",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
        {"0x123*",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
        {"0x123.",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
        {"0x123;",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
        {"0x123\n",   {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
        {"0x123\r\n", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
        {"0x123 ",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
        {"0x123\t",   {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
        {nullptr,     {},                                 {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_octal_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"0o123,",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
        {"0o123:",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
        {"0o123[",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
        {"0o123]",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
        {"0o123+",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
        {"0o123-",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
        {"0o123*",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
        {"0o123.",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
        {"0o123;",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
        {"0o123\n",   {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
        {"0o123\r\n", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
        {"0o123 ",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
        {"0o123\t",   {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
        {nullptr,     {},                           {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_binary_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"0b101,",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
        {"0b101:",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
        {"0b101[",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
        {"0b101]",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
        {"0b101+",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
        {"0b101-",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
        {"0b101*",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
        {"0b101.",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
        {"0b101;",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
        {"0b101\n",   {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
        {"0b101\r\n", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
        {"0b101 ",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
        {"0b101\t",   {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
        {nullptr,     {},                            {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_colon_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {":,",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"::",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {":[",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {":]",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {":+",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {":-",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {":*",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {":.",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {":;",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {":\n",   {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {":\r\n", {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {": ",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {":\t",   {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                       {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_comma_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {",,",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {",:",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {",[",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {",]",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {",+",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {",-",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {",*",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {",.",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {",;",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {",\n",   {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {",\r\n", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {", ",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {",\t",   {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                       {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_lbracket_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"[,",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"[:",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"[[",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"[]",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"[+",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"[-",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"[*",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"[.",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"[;",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"[\n",   {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"[\r\n", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"[ ",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"[\t",   {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                          {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_rbracket_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"],",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"]:",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"][",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"]]",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"]+",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"]-",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"]*",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"].",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"];",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"]\n",   {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"]\r\n", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"] ",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"]\t",   {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                          {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_plus_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"+,",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"+:",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"+[",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"+]",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"++",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"+-",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"+*",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"+.",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"+;",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"+\n",   {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"+\r\n", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"+ ",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"+\t",   {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                      {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_minus_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"-,",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"-:",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"-[",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"-]",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"-+",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"--",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"-*",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"-.",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"-;",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"-\n",   {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"-\r\n", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"- ",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"-\t",   {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                       {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_asterisk_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"*,",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"*:",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"*[",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"*]",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"*+",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"*-",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"**",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"*.",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"*;",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"*\n",   {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"*\r\n", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"* ",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"*\t",   {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                          {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_dot_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {".,",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {".:",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {".[",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {".]",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {".+",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {".-",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {".*",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"..",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {".;",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {".\n",   {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {".\r\n", {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {". ",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {".\t",   {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                     {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_comment_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {";comment\n",   {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8}  },
        {";comment\r\n", {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 8}},
        {nullptr,        {},                                {}                           },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_whitespace_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {" ,",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMA, ",", 0, 1}     },
        {" :",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COLON, ":", 0, 1}     },
        {" [",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}  },
        {" ]",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}  },
        {" +",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_PLUS, "+", 0, 1}      },
        {" -",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_MINUS, "-", 0, 1}     },
        {" *",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}  },
        {" .",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_DOT, ".", 0, 1}       },
        {" ;",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}   },
        {" \n",   {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}  },
        {" \r\n", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1}},
        {nullptr, {},                            {}                           },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_newline_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"\n,",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0}      },
        {"\n:",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COLON, ":", 1, 0}      },
        {"\n[",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0}   },
        {"\n]",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0}   },
        {"\n+",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0}       },
        {"\n-",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0}      },
        {"\n*",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0}   },
        {"\n.",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_DOT, ".", 1, 0}        },
        {"\n;",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0}    },
        {"\n\n",   {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0}   },
        {"\n\r\n", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} },
        {"\n ",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} },
        {"\n\t",   {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}},
        {nullptr,  {},                          {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_crlf_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"\r\n,",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0}      },
        {"\r\n:",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COLON, ":", 1, 0}      },
        {"\r\n[",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0}   },
        {"\r\n]",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0}   },
        {"\r\n+",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0}       },
        {"\r\n-",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0}      },
        {"\r\n*",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0}   },
        {"\r\n.",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_DOT, ".", 1, 0}        },
        {"\r\n;",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0}    },
        {"\r\n\n",   {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0}   },
        {"\r\n\r\n", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} },
        {"\r\n ",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} },
        {"\r\n\t",   {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}},
        {nullptr,    {},                            {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_number_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"0x123:8,",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_COMMA, ",", 0, 7}      },
        {"0x123:16:",    {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_COLON, ":", 0, 8}      },
        {"0o777:32[",    {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_LBRACKET, "[", 0, 8}   },
        {"0b101:64]",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_RBRACKET, "]", 0, 8}   },
        {"0x123:8+",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_PLUS, "+", 0, 7}       },
        {"0x123:16-",    {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_MINUS, "-", 0, 8}      },
        {"0o777:32*",    {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_ASTERISK, "*", 0, 8}   },
        {"0b101:64.",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_DOT, ".", 0, 8}        },
        {"0x123:8;",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_COMMENT, ";", 0, 7}    },
        {"0x123:16\n",   {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8}   },
        {"0o777:32\r\n", {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_NEWLINE, "\r\n", 0, 8} },
        {"0b101:64 ",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_WHITESPACE, " ", 0, 8} },
        {"0x123:8\t",    {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_WHITESPACE, "\t", 0, 7}},
        {nullptr,        {},                                    {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_maximum_length_numbers(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    char *numbers[] = {
        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "9999999999999999999988",
        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "9999999999999999998:64",
        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "9999999999999999999988",
        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "9999999999999999998:64",
        nullptr,
    };
    for (size_t i = 0; numbers[i]; ++i) {
        auto number = numbers[i];
        munit_assert_size(128, ==, strlen(number));
        lexer_t lex = {};
        lexer_token_t token = {};
        lexer_setup_memory_test(&lex, number);
        lexer_next(&lex, &token);
        munit_assert_true(token.id == TOKEN_DECIMAL || token.id == TOKEN_HEXADECIMAL);
        munit_assert_size(128, ==, strlen(token.value));
        lexer_token_cleanup(&token);
        lexer_close(&lex);
    }
    return MUNIT_OK;
 }
 MunitResult test_lexer_too_long_numbers(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    char *numbers[] = {
        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "99999999999999999999988",
        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "99999999999999999999988",
        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "99999999999999999998:64",
        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "99999999999999999998:64",
    };
    // Without suffix we expect 128 characters and then failure
    for (size_t i = 0; i < 2; ++i) {
        auto number = numbers[i];
        munit_assert_size(129, ==, strlen(number));
        lexer_t lex = {};
        lexer_token_t token = {};
        lexer_setup_memory_test(&lex, number);
        lexer_next(&lex, &token);
        munit_assert_int(TOKEN_ERROR, ==, token.id);
        munit_assert_size(128, ==, strlen(token.value));
        lexer_token_cleanup(&token);
        lexer_close(&lex);
    }
    // With suffix we fail at the suffix boundary
    for (size_t i = 2; i < 4; ++i) {
        auto number = numbers[i];
        munit_assert_size(129, ==, strlen(number));
        lexer_t lex = {};
        lexer_token_t token = {};
        lexer_setup_memory_test(&lex, number);
        lexer_next(&lex, &token);
        munit_assert_int(TOKEN_ERROR, ==, token.id);
        munit_assert_size(128, >=, strlen(token.value));
        lexer_token_cleanup(&token);
        lexer_expect_one_token(&lex, TOKEN_COLON, ":", 0, 126);
        lexer_expect_one_token(&lex, TOKEN_DECIMAL, "64", 0, 127);
        lexer_close(&lex);
    }
    return MUNIT_OK;
 }
 MunitResult test_lexer_max_whitespace_length(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    char whitespace[1025];
    memset(whitespace, ' ', 1024);
    whitespace[1024] = '\0';
    munit_assert_size(1024, ==, strlen(whitespace));
    lexer_t lex = {};
    lexer_token_t token = {};
    lexer_setup_memory_test(&lex, whitespace);
    lexer_next(&lex, &token);
    munit_assert_int(TOKEN_WHITESPACE, ==, token.id);
    munit_assert_size(1024, ==, strlen(token.value));
    lexer_token_cleanup(&token);
    lexer_close(&lex);
    return MUNIT_OK;
 }
 MunitResult test_lexer_too_long_whitespace(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    char whitespace[1026];
    memset(whitespace, ' ', 1025);
    whitespace[1025] = '\0';
    munit_assert_size(1025, ==, strlen(whitespace));
    lexer_t lex = {};
    lexer_token_t token = {};
    lexer_setup_memory_test(&lex, whitespace);
    lexer_next(&lex, &token);
    munit_assert_int(TOKEN_ERROR, ==, token.id);
    munit_assert_size(1024, ==, strlen(token.value));
    lexer_token_cleanup(&token);
    lexer_expect_one_token(&lex, TOKEN_WHITESPACE, " ", 0, 1024);
    lexer_close(&lex);
    return MUNIT_OK;
 }
 MunitTest lexer_tests[] = {
    {"/identifier",              test_lexer_identifier,              nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/identifier_boundary",     test_lexer_identifier_boundary,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/decimal",                 test_lexer_decimal,                 nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/decimal_boundary",        test_lexer_decimal_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/hexadecimal",             test_lexer_hexadecimal,             nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/hexadecimal_with_suffix", test_lexer_hexadecimal_with_suffix, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/hexadecimal_boundary",    test_lexer_hexadecimal_boundary,    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/octal",                   test_lexer_octal,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/octal_with_suffix",       test_lexer_octal_with_suffix,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/octal_boundary",          test_lexer_octal_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/binary",                  test_lexer_binary,                  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/binary_with_suffix",      test_lexer_binary_with_suffix,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/binary_boundary",         test_lexer_binary_boundary,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/number_boundary",         test_lexer_number_boundary,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/colon",                   test_lexer_colon,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/colon_boundary",          test_lexer_colon_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/comma",                   test_lexer_comma,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/comma_boundary",          test_lexer_comma_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/lbracket",                test_lexer_lbracket,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/lbracket_boundary",       test_lexer_lbracket_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/rbracket",                test_lexer_rbracket,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/rbracket_boundary",       test_lexer_rbracket_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/plus",                    test_lexer_plus,                    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/plus_boundary",           test_lexer_plus_boundary,           nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/minus",                   test_lexer_minus,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/minus_boundary",          test_lexer_minus_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/asterisk",                test_lexer_asterisk,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/asterisk_boundary",       test_lexer_asterisk_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/dot",                     test_lexer_dot,                     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/dot_boundary",            test_lexer_dot_boundary,            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/comment",                 test_lexer_comment,                 nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/comment_boundary",        test_lexer_comment_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/whitespace",              test_lexer_whitespace,              nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/whitespace_boundary",     test_lexer_whitespace_boundary,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/newlines",                test_lexer_newlines,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/newline_boundary",        test_lexer_newline_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/crlf_boundary",           test_lexer_crlf_boundary,           nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/line_numbers",            test_lexer_line_numbers,            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/maximum_length_numbers",  test_lexer_maximum_length_numbers,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/too_long_numbers",        test_lexer_too_long_numbers,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/max_whitespace_length",   test_lexer_max_whitespace_length,   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/too_long_whitespace",     test_lexer_too_long_whitespace,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {nullptr,                    nullptr,                            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
 };
--- a/tests/main.c
+++ b/tests/main.c
@@ -0,0 +1,18 @@
 #include "munit.h"
 extern MunitTest ast_tests[];
 extern MunitTest lexer_tests[];
 extern MunitTest regression_tests[];
 int main(int argc, char *argv[MUNIT_ARRAY_PARAM(argc + 1)]) {
    MunitSuite suites[] = {
        {"/regression", regression_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
        {"/ast",        ast_tests,        nullptr, 1, MUNIT_SUITE_OPTION_NONE},
        {"/lexer",      lexer_tests,      nullptr, 1, MUNIT_SUITE_OPTION_NONE},
        {nullptr,       nullptr,          nullptr, 0, MUNIT_SUITE_OPTION_NONE},
    };
    MunitSuite master_suite = {"/oas", nullptr, suites, 1, MUNIT_SUITE_OPTION_NONE};
    return munit_suite_main(&master_suite, nullptr, argc, argv);
 }
--- a/tests/munit.c
+++ b/tests/munit.c
--- a/tests/munit.h
+++ b/tests/munit.h
@@ -0,0 +1,535 @@
 /* µnit Testing Framework
 * Copyright (c) 2013-2017 Evan Nemerson <evan@nemerson.com>
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
 #if !defined(MUNIT_H)
 #define MUNIT_H
 #include <stdarg.h>
 #include <stdlib.h>
 #define MUNIT_VERSION(major, minor, revision) \
  (((major) << 16) | ((minor) << 8) | (revision))
 #define MUNIT_CURRENT_VERSION MUNIT_VERSION(0, 4, 1)
 #if defined(_MSC_VER) && (_MSC_VER < 1600)
 #  define munit_int8_t   __int8
 #  define munit_uint8_t  unsigned __int8
 #  define munit_int16_t  __int16
 #  define munit_uint16_t unsigned __int16
 #  define munit_int32_t  __int32
 #  define munit_uint32_t unsigned __int32
 #  define munit_int64_t  __int64
 #  define munit_uint64_t unsigned __int64
 #else
 #  include <stdint.h>
 #  define munit_int8_t   int8_t
 #  define munit_uint8_t  uint8_t
 #  define munit_int16_t  int16_t
 #  define munit_uint16_t uint16_t
 #  define munit_int32_t  int32_t
 #  define munit_uint32_t uint32_t
 #  define munit_int64_t  int64_t
 #  define munit_uint64_t uint64_t
 #endif
 #if defined(_MSC_VER) && (_MSC_VER < 1800)
 #  if !defined(PRIi8)
 #    define PRIi8 "i"
 #  endif
 #  if !defined(PRIi16)
 #    define PRIi16 "i"
 #  endif
 #  if !defined(PRIi32)
 #    define PRIi32 "i"
 #  endif
 #  if !defined(PRIi64)
 #    define PRIi64 "I64i"
 #  endif
 #  if !defined(PRId8)
 #    define PRId8 "d"
 #  endif
 #  if !defined(PRId16)
 #    define PRId16 "d"
 #  endif
 #  if !defined(PRId32)
 #    define PRId32 "d"
 #  endif
 #  if !defined(PRId64)
 #    define PRId64 "I64d"
 #  endif
 #  if !defined(PRIx8)
 #    define PRIx8 "x"
 #  endif
 #  if !defined(PRIx16)
 #    define PRIx16 "x"
 #  endif
 #  if !defined(PRIx32)
 #    define PRIx32 "x"
 #  endif
 #  if !defined(PRIx64)
 #    define PRIx64 "I64x"
 #  endif
 #  if !defined(PRIu8)
 #    define PRIu8 "u"
 #  endif
 #  if !defined(PRIu16)
 #    define PRIu16 "u"
 #  endif
 #  if !defined(PRIu32)
 #    define PRIu32 "u"
 #  endif
 #  if !defined(PRIu64)
 #    define PRIu64 "I64u"
 #  endif
 #else
 #  include <inttypes.h>
 #endif
 #if !defined(munit_bool)
 #  if defined(bool)
 #    define munit_bool bool
 #  elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
 #    define munit_bool _Bool
 #  else
 #    define munit_bool int
 #  endif
 #endif
 #if defined(__cplusplus)
 extern "C" {
 #endif
 #if defined(__GNUC__)
 #  define MUNIT_LIKELY(expr) (__builtin_expect ((expr), 1))
 #  define MUNIT_UNLIKELY(expr) (__builtin_expect ((expr), 0))
 #  define MUNIT_UNUSED __attribute__((__unused__))
 #else
 #  define MUNIT_LIKELY(expr) (expr)
 #  define MUNIT_UNLIKELY(expr) (expr)
 #  define MUNIT_UNUSED
 #endif
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__PGI)
 #  define MUNIT_ARRAY_PARAM(name) name
 #else
 #  define MUNIT_ARRAY_PARAM(name)
 #endif
 #if !defined(_WIN32)
 #  define MUNIT_SIZE_MODIFIER "z"
 #  define MUNIT_CHAR_MODIFIER "hh"
 #  define MUNIT_SHORT_MODIFIER "h"
 #else
 #  if defined(_M_X64) || defined(__amd64__)
 #    define MUNIT_SIZE_MODIFIER "I64"
 #  else
 #    define MUNIT_SIZE_MODIFIER ""
 #  endif
 #  define MUNIT_CHAR_MODIFIER ""
 #  define MUNIT_SHORT_MODIFIER ""
 #endif
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
 #  define MUNIT_NO_RETURN _Noreturn
 #elif defined(__GNUC__)
 #  define MUNIT_NO_RETURN __attribute__((__noreturn__))
 #elif defined(_MSC_VER)
 #  define MUNIT_NO_RETURN __declspec(noreturn)
 #else
 #  define MUNIT_NO_RETURN
 #endif
 #if defined(_MSC_VER) &&  (_MSC_VER >= 1500)
 #  define MUNIT_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127))
 #  define MUNIT_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop))
 #else
 #  define MUNIT_PUSH_DISABLE_MSVC_C4127_
 #  define MUNIT_POP_DISABLE_MSVC_C4127_
 #endif
 typedef enum {
  MUNIT_LOG_DEBUG,
  MUNIT_LOG_INFO,
  MUNIT_LOG_WARNING,
  MUNIT_LOG_ERROR
 } MunitLogLevel;
 #if defined(__GNUC__) && !defined(__MINGW32__)
 #  define MUNIT_PRINTF(string_index, first_to_check) __attribute__((format (printf, string_index, first_to_check)))
 #else
 #  define MUNIT_PRINTF(string_index, first_to_check)
 #endif
 MUNIT_PRINTF(4, 5)
 void munit_logf_ex(MunitLogLevel level, const char* filename, int line, const char* format, ...);
 #define munit_logf(level, format, ...) \
  munit_logf_ex(level, __FILE__, __LINE__, format, __VA_ARGS__)
 #define munit_log(level, msg) \
  munit_logf(level, "%s", msg)
 MUNIT_NO_RETURN
 MUNIT_PRINTF(3, 4)
 void munit_errorf_ex(const char* filename, int line, const char* format, ...);
 #define munit_errorf(format, ...) \
  munit_errorf_ex(__FILE__, __LINE__, format, __VA_ARGS__)
 #define munit_error(msg) \
  munit_errorf("%s", msg)
 #define munit_assert(expr) \
  do { \
    if (!MUNIT_LIKELY(expr)) { \
      munit_error("assertion failed: " #expr); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_true(expr) \
  do { \
    if (!MUNIT_LIKELY(expr)) { \
      munit_error("assertion failed: " #expr " is not true"); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_false(expr) \
  do { \
    if (!MUNIT_LIKELY(!(expr))) { \
      munit_error("assertion failed: " #expr " is not false"); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_type_full(prefix, suffix, T, fmt, a, op, b)   \
  do { \
    T munit_tmp_a_ = (a); \
    T munit_tmp_b_ = (b); \
    if (!(munit_tmp_a_ op munit_tmp_b_)) {                               \
      munit_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")", \
                   #a, #op, #b, munit_tmp_a_, #op, munit_tmp_b_); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_type(T, fmt, a, op, b) \
  munit_assert_type_full("", "", T, fmt, a, op, b)
 #define munit_assert_char(a, op, b) \
  munit_assert_type_full("'\\x", "'", char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b)
 #define munit_assert_uchar(a, op, b) \
  munit_assert_type_full("'\\x", "'", unsigned char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b)
 #define munit_assert_short(a, op, b) \
  munit_assert_type(short, MUNIT_SHORT_MODIFIER "d", a, op, b)
 #define munit_assert_ushort(a, op, b) \
  munit_assert_type(unsigned short, MUNIT_SHORT_MODIFIER "u", a, op, b)
 #define munit_assert_int(a, op, b) \
  munit_assert_type(int, "d", a, op, b)
 #define munit_assert_uint(a, op, b) \
  munit_assert_type(unsigned int, "u", a, op, b)
 #define munit_assert_long(a, op, b) \
  munit_assert_type(long int, "ld", a, op, b)
 #define munit_assert_ulong(a, op, b) \
  munit_assert_type(unsigned long int, "lu", a, op, b)
 #define munit_assert_llong(a, op, b) \
  munit_assert_type(long long int, "lld", a, op, b)
 #define munit_assert_ullong(a, op, b) \
  munit_assert_type(unsigned long long int, "llu", a, op, b)
 #define munit_assert_size(a, op, b) \
  munit_assert_type(size_t, MUNIT_SIZE_MODIFIER "u", a, op, b)
 #define munit_assert_float(a, op, b) \
  munit_assert_type(float, "f", a, op, b)
 #define munit_assert_double(a, op, b) \
  munit_assert_type(double, "g", a, op, b)
 #define munit_assert_ptr(a, op, b) \
  munit_assert_type(const void*, "p", a, op, b)
 #define munit_assert_int8(a, op, b)             \
  munit_assert_type(munit_int8_t, PRIi8, a, op, b)
 #define munit_assert_uint8(a, op, b) \
  munit_assert_type(munit_uint8_t, PRIu8, a, op, b)
 #define munit_assert_int16(a, op, b) \
  munit_assert_type(munit_int16_t, PRIi16, a, op, b)
 #define munit_assert_uint16(a, op, b) \
  munit_assert_type(munit_uint16_t, PRIu16, a, op, b)
 #define munit_assert_int32(a, op, b) \
  munit_assert_type(munit_int32_t, PRIi32, a, op, b)
 #define munit_assert_uint32(a, op, b) \
  munit_assert_type(munit_uint32_t, PRIu32, a, op, b)
 #define munit_assert_int64(a, op, b) \
  munit_assert_type(munit_int64_t, PRIi64, a, op, b)
 #define munit_assert_uint64(a, op, b) \
  munit_assert_type(munit_uint64_t, PRIu64, a, op, b)
 #define munit_assert_double_equal(a, b, precision) \
  do { \
    const double munit_tmp_a_ = (a); \
    const double munit_tmp_b_ = (b); \
    const double munit_tmp_diff_ = ((munit_tmp_a_ - munit_tmp_b_) < 0) ? \
      -(munit_tmp_a_ - munit_tmp_b_) : \
      (munit_tmp_a_ - munit_tmp_b_); \
    if (MUNIT_UNLIKELY(munit_tmp_diff_ > 1e-##precision)) { \
      munit_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)", \
 		   #a, #b, munit_tmp_a_, munit_tmp_b_); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #include <string.h>
 #define munit_assert_string_equal(a, b) \
  do { \
    const char* munit_tmp_a_ = a; \
    const char* munit_tmp_b_ = b; \
    if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) != 0)) { \
      munit_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")", \
                   #a, #b, munit_tmp_a_, munit_tmp_b_); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_string_not_equal(a, b) \
  do { \
    const char* munit_tmp_a_ = a; \
    const char* munit_tmp_b_ = b; \
    if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) == 0)) { \
      munit_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")", \
                   #a, #b, munit_tmp_a_, munit_tmp_b_); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_memory_equal(size, a, b) \
  do { \
    const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \
    const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \
    const size_t munit_tmp_size_ = (size); \
    if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) != 0) { \
      size_t munit_tmp_pos_; \
      for (munit_tmp_pos_ = 0 ; munit_tmp_pos_ < munit_tmp_size_ ; munit_tmp_pos_++) { \
        if (munit_tmp_a_[munit_tmp_pos_] != munit_tmp_b_[munit_tmp_pos_]) { \
          munit_errorf("assertion failed: memory %s == %s, at offset %" MUNIT_SIZE_MODIFIER "u", \
                       #a, #b, munit_tmp_pos_); \
          break; \
        } \
      } \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_memory_not_equal(size, a, b) \
  do { \
    const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \
    const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \
    const size_t munit_tmp_size_ = (size); \
    if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) == 0) { \
      munit_errorf("assertion failed: memory %s != %s (%zu bytes)", \
                   #a, #b, munit_tmp_size_); \
    } \
    MUNIT_PUSH_DISABLE_MSVC_C4127_ \
  } while (0) \
  MUNIT_POP_DISABLE_MSVC_C4127_
 #define munit_assert_ptr_equal(a, b) \
  munit_assert_ptr(a, ==, b)
 #define munit_assert_ptr_not_equal(a, b) \
  munit_assert_ptr(a, !=, b)
 #define munit_assert_null(ptr) \
  munit_assert_ptr(ptr, ==, NULL)
 #define munit_assert_not_null(ptr) \
  munit_assert_ptr(ptr, !=, NULL)
 #define munit_assert_ptr_null(ptr) \
  munit_assert_ptr(ptr, ==, NULL)
 #define munit_assert_ptr_not_null(ptr) \
  munit_assert_ptr(ptr, !=, NULL)
 /*** Memory allocation ***/
 void* munit_malloc_ex(const char* filename, int line, size_t size);
 #define munit_malloc(size) \
  munit_malloc_ex(__FILE__, __LINE__, (size))
 #define munit_new(type) \
  ((type*) munit_malloc(sizeof(type)))
 #define munit_calloc(nmemb, size) \
  munit_malloc((nmemb) * (size))
 #define munit_newa(type, nmemb) \
  ((type*) munit_calloc((nmemb), sizeof(type)))
 /*** Random number generation ***/
 void munit_rand_seed(munit_uint32_t seed);
 munit_uint32_t munit_rand_uint32(void);
 int munit_rand_int_range(int min, int max);
 double munit_rand_double(void);
 void munit_rand_memory(size_t size, munit_uint8_t buffer[MUNIT_ARRAY_PARAM(size)]);
 /*** Tests and Suites ***/
 typedef enum {
  /* Test successful */
  MUNIT_OK,
  /* Test failed */
  MUNIT_FAIL,
  /* Test was skipped */
  MUNIT_SKIP,
  /* Test failed due to circumstances not intended to be tested
   * (things like network errors, invalid parameter value, failure to
   * allocate memory in the test harness, etc.). */
  MUNIT_ERROR
 } MunitResult;
 typedef struct {
  char*  name;
  char** values;
 } MunitParameterEnum;
 typedef struct {
  char* name;
  char* value;
 } MunitParameter;
 const char* munit_parameters_get(const MunitParameter params[], const char* key);
 typedef enum {
  MUNIT_TEST_OPTION_NONE             = 0,
  MUNIT_TEST_OPTION_SINGLE_ITERATION = 1 << 0,
  MUNIT_TEST_OPTION_TODO             = 1 << 1
 } MunitTestOptions;
 typedef MunitResult (* MunitTestFunc)(const MunitParameter params[], void* user_data_or_fixture);
 typedef void*       (* MunitTestSetup)(const MunitParameter params[], void* user_data);
 typedef void        (* MunitTestTearDown)(void* fixture);
 typedef struct {
  char*               name;
  MunitTestFunc       test;
  MunitTestSetup      setup;
  MunitTestTearDown   tear_down;
  MunitTestOptions    options;
  MunitParameterEnum* parameters;
 } MunitTest;
 typedef enum {
  MUNIT_SUITE_OPTION_NONE = 0
 } MunitSuiteOptions;
 typedef struct MunitSuite_ MunitSuite;
 struct MunitSuite_ {
  char*             prefix;
  MunitTest*        tests;
  MunitSuite*       suites;
  unsigned int      iterations;
  MunitSuiteOptions options;
 };
 int munit_suite_main(const MunitSuite* suite, void* user_data, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)]);
 /* Note: I'm not very happy with this API; it's likely to change if I
 * figure out something better.  Suggestions welcome. */
 typedef struct MunitArgument_ MunitArgument;
 struct MunitArgument_ {
  char* name;
  munit_bool (* parse_argument)(const MunitSuite* suite, void* user_data, int* arg, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)]);
  void (* write_help)(const MunitArgument* argument, void* user_data);
 };
 int munit_suite_main_custom(const MunitSuite* suite,
                            void* user_data,
                            int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)],
                            const MunitArgument arguments[]);
 #if defined(MUNIT_ENABLE_ASSERT_ALIASES)
 #define assert_true(expr) munit_assert_true(expr)
 #define assert_false(expr) munit_assert_false(expr)
 #define assert_char(a, op, b) munit_assert_char(a, op, b)
 #define assert_uchar(a, op, b) munit_assert_uchar(a, op, b)
 #define assert_short(a, op, b) munit_assert_short(a, op, b)
 #define assert_ushort(a, op, b) munit_assert_ushort(a, op, b)
 #define assert_int(a, op, b) munit_assert_int(a, op, b)
 #define assert_uint(a, op, b) munit_assert_uint(a, op, b)
 #define assert_long(a, op, b) munit_assert_long(a, op, b)
 #define assert_ulong(a, op, b) munit_assert_ulong(a, op, b)
 #define assert_llong(a, op, b) munit_assert_llong(a, op, b)
 #define assert_ullong(a, op, b) munit_assert_ullong(a, op, b)
 #define assert_size(a, op, b) munit_assert_size(a, op, b)
 #define assert_float(a, op, b) munit_assert_float(a, op, b)
 #define assert_double(a, op, b) munit_assert_double(a, op, b)
 #define assert_ptr(a, op, b) munit_assert_ptr(a, op, b)
 #define assert_int8(a, op, b) munit_assert_int8(a, op, b)
 #define assert_uint8(a, op, b) munit_assert_uint8(a, op, b)
 #define assert_int16(a, op, b) munit_assert_int16(a, op, b)
 #define assert_uint16(a, op, b) munit_assert_uint16(a, op, b)
 #define assert_int32(a, op, b) munit_assert_int32(a, op, b)
 #define assert_uint32(a, op, b) munit_assert_uint32(a, op, b)
 #define assert_int64(a, op, b) munit_assert_int64(a, op, b)
 #define assert_uint64(a, op, b) munit_assert_uint64(a, op, b)
 #define assert_double_equal(a, b, precision) munit_assert_double_equal(a, b, precision)
 #define assert_string_equal(a, b) munit_assert_string_equal(a, b)
 #define assert_string_not_equal(a, b) munit_assert_string_not_equal(a, b)
 #define assert_memory_equal(size, a, b) munit_assert_memory_equal(size, a, b)
 #define assert_memory_not_equal(size, a, b) munit_assert_memory_not_equal(size, a, b)
 #define assert_ptr_equal(a, b) munit_assert_ptr_equal(a, b)
 #define assert_ptr_not_equal(a, b) munit_assert_ptr_not_equal(a, b)
 #define assert_ptr_null(ptr) munit_assert_null_equal(ptr)
 #define assert_ptr_not_null(ptr) munit_assert_not_null(ptr)
 #define assert_null(ptr) munit_assert_null(ptr)
 #define assert_not_null(ptr) munit_assert_not_null(ptr)
 #endif /* defined(MUNIT_ENABLE_ASSERT_ALIASES) */
 #if defined(__cplusplus)
 }
 #endif
 #endif /* !defined(MUNIT_H) */
 #if defined(MUNIT_ENABLE_ASSERT_ALIASES)
 #  if defined(assert)
 #    undef assert
 #  endif
 #  define assert(expr) munit_assert(expr)
 #endif
--- a/tests/regression.c
+++ b/tests/regression.c
@@ -0,0 +1,68 @@
 #include "../src/ast.h"
 #include "../src/parser/parser.h"
 #include "munit.h"
 MunitResult test_regression_trivia_head(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_t *lex = &(lexer_t){};
    error_t *err = lexer_open(lex, "tests/input/regression/test_trivia_head.asm");
    munit_assert_null(err);
    tokenlist_t *list;
    err = tokenlist_alloc(&list);
    munit_assert_null(err);
    err = tokenlist_fill(list, lex);
    munit_assert_null(err);
    parse_result_t result = parse(list->head);
    munit_assert_null(result.err);
    munit_assert_null(result.next);
    ast_node_free(result.node);
    tokenlist_free(list);
    return MUNIT_OK;
 }
 MunitResult test_no_operands_eof(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_t *lex = &(lexer_t){};
    error_t *err = lexer_open(lex, "tests/input/regression/test_no_operands_eof.asm");
    munit_assert_null(err);
    tokenlist_t *list;
    err = tokenlist_alloc(&list);
    munit_assert_null(err);
    err = tokenlist_fill(list, lex);
    munit_assert_null(err);
    parse_result_t result = parse(list->head);
    munit_assert_null(result.err);
    munit_assert_null(result.next);
    // Both children should be instructions
    munit_assert_size(result.node->len, ==, 2);
    munit_assert_int(result.node->children[0]->id, ==, NODE_INSTRUCTION);
    munit_assert_int(result.node->children[1]->id, ==, NODE_INSTRUCTION);
    // And they should have empty operands
    munit_assert_size(result.node->children[0]->len, ==, 2);
    munit_assert_size(result.node->children[1]->len, ==, 2);
    munit_assert_size(result.node->children[0]->children[1]->len, ==, 0);
    munit_assert_size(result.node->children[1]->children[1]->len, ==, 0);
    ast_node_free(result.node);
    tokenlist_free(list);
    return MUNIT_OK;
 }
 MunitTest regression_tests[] = {
    {"/trivia_head",     test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/no_operands_eof", test_no_operands_eof,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {nullptr,            nullptr,                     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
 };
--- a/validate.sh
+++ b/validate.sh
@@ -2,19 +2,17 @@
 set -euo pipefail
-# Start with static analysis
+make analyze debug asan msan
 make clean all
 mkdir -p reports/static-analysis
 scan-build -o reports/static-analysis/ -plist-html --status-bugs make all
-# Run the sanitizer builds and valgrind
+ASAN=build/asan/oas
-make clean sanitize all
+MSAN=build/msan/oas
 DEBUG=build/debug/oas
 ARGUMENTS=("tokens" "text" "ast")
 while IFS= read -r INPUT_FILE; do
    for ARGS in ${ARGUMENTS[@]}; do
-        ./oas-asan $ARGS $INPUT_FILE > /dev/null
+        $ASAN $ARGS $INPUT_FILE > /dev/null
-        ./oas-msan $ARGS $INPUT_FILE > /dev/null
+        $MSAN $ARGS $INPUT_FILE > /dev/null
-        valgrind --leak-check=full --error-exitcode=1 ./oas $ARGS $INPUT_FILE >/dev/null
+        valgrind --leak-check=full --error-exitcode=1 $DEBUG $ARGS $INPUT_FILE >/dev/null
    done
 done < <(find tests/input/ -type f -name '*.asm')
Author	SHA1	Message	Date
omicron	00272d69bf	Add regression test for parse zero operands at eof All checks were successful Validate the build / validate-build (push) Successful in 30s Details	2025-04-16 13:16:55 +02:00
omicron	2385d38608	Prune the parse tree of NODE_NEWLINE after parsing succeeds	2025-04-16 13:01:02 +02:00
omicron	242fd9baa5	Fix grammar not being able to disambiguate some instructions When two identifiers follow eachother it could be two instruction mnemonics or one instruction mnemonic and one operand. To fix this TOKEN_NEWLINE has been reintroduced as a semantic token. The grammar has been changed to allow empty statements and every instruction and directive has to end in a newline. Labels do not have to end in a newline. In addition to updating the grammar, the implementation of tokenlist, ast and parser has been updated to reflect these changes.	2025-04-16 12:34:44 +02:00
omicron	1574ec6249	Fix parse_consecutive behavior when the token stream runs out	2025-04-16 12:13:02 +02:00
omicron	92c63092a1	Add regression test for trivia at the head of tokenlist All checks were successful Validate the build / validate-build (push) Successful in 29s Details	2025-04-09 01:17:09 +02:00
omicron	5560de2904	Make sure parse skips past initial trivia in the tokenlist	2025-04-09 01:15:51 +02:00
omicron	2bea87b39a	Run tests in the validate gitea action All checks were successful Validate the build / validate-build (push) Successful in 29s Details	2025-04-06 09:23:25 +02:00
omicron	2eb7b3c2f1	use llvm to generate test coverage	2025-04-06 09:17:51 +02:00
omicron	f1f4c93a8e	Fix bug in lexer_next_number not correctly tracking character number All checks were successful Validate the build / validate-build (push) Successful in 28s Details When a number has a suffix the lexer state didn't record the number of characters consumed for this suffix. This made the lexer state be 2-3 characters short in its line location reporting until it encountered a newline character. It did not otherwise corrupt the state of the lexer.	2025-04-05 01:41:40 +02:00
omicron	27099c9899	Add initial unit tests - Add µnit source and header files - Add test target to the build system - Implement a thorough lexer test suite - Implement a minimal AST test suite	2025-04-05 01:37:04 +02:00
omicron	3fead8017b	Rename lexer errors	2025-04-05 01:37:04 +02:00
omicron	af66790cff	Clean up error definitions, location and expose them in the headers - Exposes all errors in the header file so any user of the api can test for the specific error conditions - Mark all static error pointers as const - Move generic errors into error.h - Name all errors err_modulename_* for errors that belong to a specific module and err_* for generic errors.	2025-04-05 01:37:04 +02:00
omicron	cb8768b1d0	Make clangd aware of the _POSIX_C_SOURCE define in the build system	2025-04-05 01:37:04 +02:00
omicron	1571c52012	Add some building documentation that clarifies the make targets All checks were successful Validate the build / validate-build (push) Successful in 26s Details	2025-04-04 02:18:11 +02:00
omicron	0f9ced8eb1	Rework the build system to be more modular Split most of the work off into make/base.mk and allow for easy wrappers to be created around that that can build with different instrumentation in their own build directory. Create wrappers for the following: - release build - debug build - afl++ fuzzing build - static analysis with clang - clang memory sanitizer - clang address/undefined sanitizer	2025-04-04 02:18:02 +02:00
`@@ -1,2 +1,2 @@`
	`CompileFlags:`	`CompileFlags:`
	`Add: ["-std=c23", "-x", "c"]`	`Add: ["-std=c23", "-x", "c", "-D_POSIX_C_SOURCE=200809L"]`