Fix bug in lexer_next_number not correctly tracking character number

When a number has a suffix the lexer state didn't record the number of characters consumed for this suffix. This made the lexer state be 2-3 characters short in its line location reporting until it encountered a newline character. It did not otherwise corrupt the state of the lexer.
Add initial unit tests
2025-04-05 01:41:40 +02:00 · 2025-04-05 01:37:04 +02:00 · 2025-04-05 01:37:04 +02:00 · 2025-04-05 01:37:04 +02:00 · 2025-04-05 01:37:04 +02:00
12 changed files with 942 additions and 36 deletions
--- a/.clangd
+++ b/.clangd
@@ -1,2 +1,2 @@
 CompileFlags:
-  Add: ["-std=c23", "-x", "c"]
+  Add: ["-std=c23", "-x", "c", "-D_POSIX_C_SOURCE=200809L"]
--- a/src/ast.c
+++ b/src/ast.c
@@ -3,7 +3,7 @@
 #include <assert.h>
 #include <string.h>
-error_t *err_node_children_cap = &(error_t){
+error_t *const err_ast_children_cap = &(error_t){
    .message = "Failed to increase ast node children, max capacity reached"};
 error_t *ast_node_alloc(ast_node_t **output) {
@@ -50,7 +50,7 @@ error_t *ast_node_alloc_children(ast_node_t *node) {
 error_t *ast_node_grow_cap(ast_node_t *node) {
    if (node->cap >= node_max_children_cap) {
-        return err_node_children_cap;
+        return err_ast_children_cap;
    }
    size_t new_cap = node->cap * 2;
--- a/src/ast.h
+++ b/src/ast.h
@@ -7,6 +7,8 @@
 #include <stddef.h>
 #include <stdint.h>
 extern error_t *const err_ast_children_cap;
 typedef enum node_id {
    NODE_INVALID,
--- a/src/error.c
+++ b/src/error.c
@@ -9,8 +9,13 @@ error_t *const err_errorf_alloc = &(error_t){
 error_t *const err_errorf_length = &(error_t){
    .message =
        "Formatting of another error failed to determine the error length"};
 error_t *const err_eof =
    &(error_t){.message = "Read failed because EOF is reached"};
-error_t *err_allocation_failed =
+error_t *const err_unknown_read_failure =
    &(error_t){.message = "Unknown read error"};
 error_t *const err_allocation_failed =
    &(error_t){.message = "Memory allocation failed"};
 error_t *errorf(const char *fmt, ...) {
--- a/src/error.h
+++ b/src/error.h
@@ -19,6 +19,8 @@ static inline void error_free(error_t *err) {
 }
 /* Some global errors */
-extern error_t *err_allocation_failed;
+extern error_t *const err_allocation_failed;
 extern error_t *const err_eof;
 extern error_t *const err_unknown_read_failure;
 #endif // INCLUDE_SRC_ERROR_H_
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -5,21 +5,16 @@
 #include <errno.h>
 #include <string.h>
-error_t *err_lexer_already_open = &(error_t){
+error_t *const err_lexer_already_open = &(error_t){
    .message =
        "Can't open on a lexer object that is already opened. Close it first."};
-error_t *err_prefix_too_large =
+error_t *const err_lexer_prefix_too_large =
    &(error_t){.message = "Prefix too large for internal lexer buffer"};
-error_t *err_buffer_underrun = &(error_t){
+error_t *const err_lexer_buffer_underrun = &(error_t){
    .message = "Buffer does not contain enough characters for lexer_consume_n"};
-error_t *err_consume_excessive_length =
+error_t *const err_lexer_consume_excessive_length =
    &(error_t){.message = "Too many valid characters to consume"};
 error_t *err_eof =
    &(error_t){.message = "Can't read from file because EOF is reached"};
 error_t *err_unknown_read = &(error_t){.message = "Unknown read error"};
 typedef bool (*char_predicate_t)(char);
 const char *lexer_token_id_to_cstr(lexer_token_id_t id) {
@@ -112,7 +107,7 @@ error_t *lexer_fill_buffer(lexer_t *lex) {
        if (n == 0 && ferror(lex->fp))
            return errorf("Read error: %s", strerror(errno));
        if (n == 0)
-            return err_unknown_read;
+            return err_unknown_read_failure;
        remaining -= n;
        lex->buffer_count += n;
    }
@@ -182,9 +177,9 @@ error_t *lexer_not_implemented(lexer_t *lex, lexer_token_t *token) {
 error_t *lexer_consume_n(lexer_t *lex, const size_t len,
                         char buffer[static len], const size_t n) {
    if (lex->buffer_count < n)
-        return err_buffer_underrun;
+        return err_lexer_buffer_underrun;
    if (n > len)
-        return err_consume_excessive_length;
+        return err_lexer_consume_excessive_length;
    memcpy(buffer, lex->buffer, n);
    lexer_shift_buffer(lex, n);
@@ -229,7 +224,7 @@ error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n],
                (lex->buffer_count > 0 && is_valid(lex->buffer[0]));
        if (have_more_characters && *n_consumed == buffer_size) {
-            return err_consume_excessive_length;
+            return err_lexer_consume_excessive_length;
        }
    } while (have_more_characters);
    return nullptr;
@@ -299,11 +294,12 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
    error_t *err = lexer_consume(lex, max_number_length - so_far,
                                 buffer + so_far, is_valid, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Number length exceeds the maximum of 128 characters";
    }
    lex->character_number += n;
    so_far += n;
    if (n == 0) {
        token->id = TOKEN_ERROR;
@@ -329,14 +325,15 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
    if (suffix_length > 0) {
        err = lexer_consume_n(lex, max_number_length - so_far, buffer + so_far,
                              suffix_length);
-        if (err == err_consume_excessive_length) {
+        if (err == err_lexer_consume_excessive_length) {
            token->id = TOKEN_ERROR;
            token->explanation =
                "Number length exceeds the maximum of 128 characters";
        } else {
            lex->character_number += suffix_length;
        }
    }
    lex->character_number += n;
    token->value = strdup(buffer);
    return nullptr;
 }
@@ -406,7 +403,7 @@ error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) {
    error_t *err = lexer_consume(lex, max_identifier_length, buffer,
                                 is_identifier_character, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Identifier length exceeds the maximum of 128 characters";
@@ -449,7 +446,7 @@ error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) {
    error_t *err = lexer_consume(lex, max_whitespace_length, buffer,
                                 is_whitespace_character, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Whitespace length exceeds the maximum of 1024 characters";
@@ -484,7 +481,7 @@ error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) {
    error_t *err = lexer_consume(lex, max_comment_length, buffer,
                                 is_comment_character, &n);
-    if (err == err_consume_excessive_length) {
+    if (err == err_lexer_consume_excessive_length) {
        token->id = TOKEN_ERROR;
        token->explanation =
            "Comment length exceeds the maximum of 1024 characters";
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -5,7 +5,10 @@
 #include <stddef.h>
 #include <stdio.h>
-extern error_t *err_eof;
+extern error_t *const err_lexer_already_open;
 extern error_t *const err_lexer_prefix_too_large;
 extern error_t *const err_lexer_buffer_underrun;
 extern error_t *const err_lexer_consume_excessive_length;
 typedef enum {
    TOKEN_ERROR,
--- a/src/parser/util.c
+++ b/src/parser/util.c
@@ -1,7 +1,7 @@
 #include "util.h"
 #include "../tokenlist.h"
-error_t *err_parse_no_match =
+error_t *const err_parse_no_match =
    &(error_t){.message = "parsing failed to find the correct token sequence"};
 parse_result_t parse_error(error_t *err) {
--- a/src/parser/util.h
+++ b/src/parser/util.h
@@ -21,6 +21,6 @@ parse_result_t parse_token(tokenlist_entry_t *current,
                           token_validator_t is_valid);
 parse_result_t parse_result_wrap(node_id_t id, parse_result_t result);
-extern error_t *err_parse_no_match;
+extern error_t *const err_parse_no_match;
 #endif // INCLUDE_PARSER_UTIL_H_
--- a/tests/ast.c
+++ b/tests/ast.c
@@ -1,7 +1,7 @@
 #include "../src/ast.h"
 #include "munit.h"
-static MunitResult test_ast_node_alloc(const MunitParameter params[], void *data) {
+MunitResult test_ast_node_alloc(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
@@ -16,9 +16,7 @@ static MunitResult test_ast_node_alloc(const MunitParameter params[], void *data
    return MUNIT_OK;
 }
-static MunitTest ast_tests[] = {
+MunitTest ast_tests[] = {
    {"/node_alloc", test_ast_node_alloc, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {nullptr,       nullptr,             nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
 };
 const MunitSuite ast_test_suite = {"/ast", ast_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE};
--- a/tests/lexer.c
+++ b/tests/lexer.c
@@ -0,0 +1,896 @@
 #include "../src/lexer.h"
 #include "../src/error.h"
 #include "munit.h"
 #include <string.h>
 void lexer_setup_memory_test(lexer_t *lex, const char *input) {
    munit_assert_null(lex->fp);
    FILE *stream = fmemopen((void *)input, strlen(input), "rb");
    munit_assert_not_null(stream);
    lex->fp = stream;
    lex->line_number = 0;
    lex->character_number = 0;
    lex->buffer_count = 0;
 }
 void lexer_expect_one_token(lexer_t *lex, lexer_token_id_t id, const char *value, size_t line, size_t column) {
    lexer_token_t token = {};
    error_t *err = lexer_next(lex, &token);
    munit_assert_null(err);
    munit_assert_int(token.id, ==, id);
    munit_assert_string_equal(token.value, value);
    munit_assert_int(token.line_number, ==, line);
    munit_assert_int(token.character_number, ==, column);
    lexer_token_cleanup(&token);
 }
 void lexer_expect_eof(lexer_t *lex) {
    lexer_token_t token = {};
    error_t *err = lexer_next(lex, &token);
    munit_assert_ptr_equal(err, err_eof);
 }
 void lexer_test_one_token(lexer_token_id_t id, const char *value) {
    lexer_t lex = {};
    lexer_setup_memory_test(&lex, value);
    lexer_expect_one_token(&lex, id, value, 0, 0);
    lexer_expect_eof(&lex);
    lexer_close(&lex);
 }
 MunitResult test_lexer_identifier(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_IDENTIFIER, "identifier");
    lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier");
    lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier123_55");
    return MUNIT_OK;
 }
 typedef struct token_data {
    lexer_token_id_t id;
    char *value;
    size_t line;
    size_t column;
 } token_data_t;
 typedef struct boundary {
    const char *input;
    token_data_t first;
    token_data_t second;
 } boundary_t;
 void test_lexer_boundary(boundary_t boundaries[]) {
    for (size_t i = 0; boundaries[i].input; ++i) {
        auto boundary = boundaries[i];
        auto first = boundary.first;
        auto second = boundary.second;
        lexer_t lex = {};
        lexer_setup_memory_test(&lex, boundary.input);
        lexer_expect_one_token(&lex, first.id, first.value, first.line, first.column);
        lexer_expect_one_token(&lex, second.id, second.value, second.line, second.column);
        lexer_expect_eof(&lex);
        lexer_close(&lex);
    }
 }
 MunitResult test_lexer_identifier_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"id:",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COLON, ":", 0, 2}         },
        {"id[",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_LBRACKET, "[", 0, 2}      },
        {"id]",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_RBRACKET, "]", 0, 2}      },
        {"id+",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_PLUS, "+", 0, 2}          },
        {"id-",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_MINUS, "-", 0, 2}         },
        {"id*",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_ASTERISK, "*", 0, 2}      },
        {"id.",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_DOT, ".", 0, 2}           },
        {"id;comment", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COMMENT, ";comment", 0, 2}},
        {"id\n",       {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 2}      },
        {"id\r\n",     {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 2}    },
        {"id ",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 2}    },
        {"id\t",       {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 2}   },
        {nullptr,      {},                             {}                               },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_decimal(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_DECIMAL, "123");
    lexer_test_one_token(TOKEN_DECIMAL, "0");
    lexer_test_one_token(TOKEN_DECIMAL, "42");
    return MUNIT_OK;
 }
 MunitResult test_lexer_decimal_with_suffix(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_DECIMAL, "123:8");
    lexer_test_one_token(TOKEN_DECIMAL, "0:16");
    lexer_test_one_token(TOKEN_DECIMAL, "42:32");
    lexer_test_one_token(TOKEN_DECIMAL, "69:64");
    return MUNIT_OK;
 }
 MunitResult test_lexer_hexadecimal(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x0");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xabcdef");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABCDEF");
    return MUNIT_OK;
 }
 MunitResult test_lexer_hexadecimal_with_suffix(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123:8");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD:16");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABC:32");
    lexer_test_one_token(TOKEN_HEXADECIMAL, "0xffff:64");
    return MUNIT_OK;
 }
 MunitResult test_lexer_octal(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_OCTAL, "0o777");
    lexer_test_one_token(TOKEN_OCTAL, "0o0");
    lexer_test_one_token(TOKEN_OCTAL, "0o123");
    return MUNIT_OK;
 }
 MunitResult test_lexer_octal_with_suffix(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_OCTAL, "0o777:8");
    lexer_test_one_token(TOKEN_OCTAL, "0o123:16");
    lexer_test_one_token(TOKEN_OCTAL, "0o777:32");
    lexer_test_one_token(TOKEN_OCTAL, "0o123:64");
    return MUNIT_OK;
 }
 MunitResult test_lexer_binary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_BINARY, "0b101");
    lexer_test_one_token(TOKEN_BINARY, "0b0");
    lexer_test_one_token(TOKEN_BINARY, "0b1");
    lexer_test_one_token(TOKEN_BINARY, "0b01010101");
    return MUNIT_OK;
 }
 MunitResult test_lexer_binary_with_suffix(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_BINARY, "0b101:8");
    lexer_test_one_token(TOKEN_BINARY, "0b0:16");
    lexer_test_one_token(TOKEN_BINARY, "0b1:32");
    lexer_test_one_token(TOKEN_BINARY, "0b01010101:64");
    return MUNIT_OK;
 }
 MunitResult test_lexer_colon(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_COLON, ":");
    return MUNIT_OK;
 }
 MunitResult test_lexer_comma(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_COMMA, ",");
    return MUNIT_OK;
 }
 MunitResult test_lexer_lbracket(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_LBRACKET, "[");
    return MUNIT_OK;
 }
 MunitResult test_lexer_rbracket(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_RBRACKET, "]");
    return MUNIT_OK;
 }
 MunitResult test_lexer_plus(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_PLUS, "+");
    return MUNIT_OK;
 }
 MunitResult test_lexer_minus(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_MINUS, "-");
    return MUNIT_OK;
 }
 MunitResult test_lexer_asterisk(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_ASTERISK, "*");
    return MUNIT_OK;
 }
 MunitResult test_lexer_dot(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_DOT, ".");
    return MUNIT_OK;
 }
 MunitResult test_lexer_comment(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_COMMENT, ";This is a comment");
    lexer_test_one_token(TOKEN_COMMENT, "; Another comment");
    lexer_test_one_token(TOKEN_COMMENT, ";");
    return MUNIT_OK;
 }
 MunitResult test_lexer_whitespace(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_test_one_token(TOKEN_WHITESPACE, " ");
    lexer_test_one_token(TOKEN_WHITESPACE, "  ");
    lexer_test_one_token(TOKEN_WHITESPACE, "\t");
    lexer_test_one_token(TOKEN_WHITESPACE, " \t ");
    return MUNIT_OK;
 }
 MunitResult test_lexer_newlines(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    // Test simple newline
    lexer_t lex = {};
    lexer_setup_memory_test(&lex, "\n");
    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 0);
    lexer_expect_eof(&lex);
    lexer_close(&lex);
    // Test Windows-style newline
    lexer_t lex2 = {};
    lexer_setup_memory_test(&lex2, "\r\n");
    lexer_expect_one_token(&lex2, TOKEN_NEWLINE, "\r\n", 0, 0);
    lexer_expect_eof(&lex2);
    lexer_close(&lex2);
    return MUNIT_OK;
 }
 MunitResult test_lexer_line_numbers(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    lexer_t lex = {};
    lexer_setup_memory_test(&lex, "a\nb\nc");
    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "a", 0, 0);
    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 1);
    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "b", 1, 0);
    lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 1, 1);
    lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "c", 2, 0);
    lexer_expect_eof(&lex);
    lexer_close(&lex);
    return MUNIT_OK;
 }
 MunitResult test_lexer_decimal_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"123,",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMA, ",", 0, 3}      },
        {"123:",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COLON, ":", 0, 3}      },
        {"123[",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 3}   },
        {"123]",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 3}   },
        {"123+",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_PLUS, "+", 0, 3}       },
        {"123-",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_MINUS, "-", 0, 3}      },
        {"123*",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 3}   },
        {"123.",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_DOT, ".", 0, 3}        },
        {"123;",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMENT, ";", 0, 3}    },
        {"123\n",   {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 3}   },
        {"123\r\n", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 3} },
        {"123 ",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 3} },
        {"123\t",   {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 3}},
        {nullptr,   {},                           {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_hexadecimal_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"0x123,",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
        {"0x123:",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
        {"0x123[",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
        {"0x123]",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
        {"0x123+",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
        {"0x123-",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
        {"0x123*",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
        {"0x123.",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
        {"0x123;",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
        {"0x123\n",   {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
        {"0x123\r\n", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
        {"0x123 ",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
        {"0x123\t",   {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
        {nullptr,     {},                                 {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_octal_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"0o123,",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
        {"0o123:",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
        {"0o123[",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
        {"0o123]",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
        {"0o123+",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
        {"0o123-",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
        {"0o123*",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
        {"0o123.",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
        {"0o123;",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
        {"0o123\n",   {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
        {"0o123\r\n", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
        {"0o123 ",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
        {"0o123\t",   {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
        {nullptr,     {},                           {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_binary_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"0b101,",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      },
        {"0b101:",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COLON, ":", 0, 5}      },
        {"0b101[",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   },
        {"0b101]",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   },
        {"0b101+",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       },
        {"0b101-",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      },
        {"0b101*",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   },
        {"0b101.",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_DOT, ".", 0, 5}        },
        {"0b101;",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    },
        {"0b101\n",   {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   },
        {"0b101\r\n", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
        {"0b101 ",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
        {"0b101\t",   {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
        {nullptr,     {},                            {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_colon_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {":,",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"::",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {":[",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {":]",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {":+",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {":-",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {":*",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {":.",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {":;",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {":\n",   {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {":\r\n", {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {": ",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {":\t",   {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                       {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_comma_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {",,",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {",:",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {",[",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {",]",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {",+",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {",-",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {",*",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {",.",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {",;",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {",\n",   {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {",\r\n", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {", ",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {",\t",   {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                       {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_lbracket_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"[,",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"[:",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"[[",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"[]",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"[+",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"[-",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"[*",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"[.",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"[;",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"[\n",   {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"[\r\n", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"[ ",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"[\t",   {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                          {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_rbracket_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"],",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"]:",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"][",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"]]",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"]+",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"]-",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"]*",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"].",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"];",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"]\n",   {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"]\r\n", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"] ",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"]\t",   {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                          {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_plus_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"+,",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"+:",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"+[",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"+]",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"++",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"+-",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"+*",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"+.",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"+;",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"+\n",   {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"+\r\n", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"+ ",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"+\t",   {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                      {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_minus_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"-,",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"-:",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"-[",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"-]",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"-+",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"--",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"-*",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"-.",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"-;",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"-\n",   {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"-\r\n", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"- ",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"-\t",   {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                       {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_asterisk_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"*,",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {"*:",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {"*[",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {"*]",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {"*+",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {"*-",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {"**",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"*.",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {"*;",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {"*\n",   {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {"*\r\n", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {"* ",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {"*\t",   {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                          {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_dot_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {".,",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      },
        {".:",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COLON, ":", 0, 1}      },
        {".[",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   },
        {".]",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   },
        {".+",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       },
        {".-",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      },
        {".*",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   },
        {"..",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_DOT, ".", 0, 1}        },
        {".;",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    },
        {".\n",   {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   },
        {".\r\n", {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
        {". ",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
        {".\t",   {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
        {nullptr, {},                     {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_comment_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {";comment\n",   {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8}  },
        {";comment\r\n", {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 8}},
        {nullptr,        {},                                {}                           },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_whitespace_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {" ,",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMA, ",", 0, 1}     },
        {" :",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COLON, ":", 0, 1}     },
        {" [",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}  },
        {" ]",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}  },
        {" +",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_PLUS, "+", 0, 1}      },
        {" -",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_MINUS, "-", 0, 1}     },
        {" *",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}  },
        {" .",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_DOT, ".", 0, 1}       },
        {" ;",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}   },
        {" \n",   {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}  },
        {" \r\n", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1}},
        {nullptr, {},                            {}                           },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_newline_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"\n,",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0}      },
        {"\n:",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COLON, ":", 1, 0}      },
        {"\n[",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0}   },
        {"\n]",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0}   },
        {"\n+",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0}       },
        {"\n-",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0}      },
        {"\n*",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0}   },
        {"\n.",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_DOT, ".", 1, 0}        },
        {"\n;",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0}    },
        {"\n\n",   {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0}   },
        {"\n\r\n", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} },
        {"\n ",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} },
        {"\n\t",   {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}},
        {nullptr,  {},                          {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_crlf_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"\r\n,",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0}      },
        {"\r\n:",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COLON, ":", 1, 0}      },
        {"\r\n[",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0}   },
        {"\r\n]",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0}   },
        {"\r\n+",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0}       },
        {"\r\n-",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0}      },
        {"\r\n*",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0}   },
        {"\r\n.",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_DOT, ".", 1, 0}        },
        {"\r\n;",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0}    },
        {"\r\n\n",   {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0}   },
        {"\r\n\r\n", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} },
        {"\r\n ",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} },
        {"\r\n\t",   {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}},
        {nullptr,    {},                            {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_number_boundary(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    boundary_t boundaries[] = {
        {"0x123:8,",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_COMMA, ",", 0, 7}      },
        {"0x123:16:",    {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_COLON, ":", 0, 8}      },
        {"0o777:32[",    {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_LBRACKET, "[", 0, 8}   },
        {"0b101:64]",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_RBRACKET, "]", 0, 8}   },
        {"0x123:8+",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_PLUS, "+", 0, 7}       },
        {"0x123:16-",    {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_MINUS, "-", 0, 8}      },
        {"0o777:32*",    {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_ASTERISK, "*", 0, 8}   },
        {"0b101:64.",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_DOT, ".", 0, 8}        },
        {"0x123:8;",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_COMMENT, ";", 0, 7}    },
        {"0x123:16\n",   {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8}   },
        {"0o777:32\r\n", {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_NEWLINE, "\r\n", 0, 8} },
        {"0b101:64 ",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_WHITESPACE, " ", 0, 8} },
        {"0x123:8\t",    {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_WHITESPACE, "\t", 0, 7}},
        {nullptr,        {},                                    {}                            },
    };
    test_lexer_boundary(boundaries);
    return MUNIT_OK;
 }
 MunitResult test_lexer_maximum_length_numbers(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    char *numbers[] = {
        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "9999999999999999999988",
        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "9999999999999999998:64",
        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "9999999999999999999988",
        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "9999999999999999998:64",
        nullptr,
    };
    for (size_t i = 0; numbers[i]; ++i) {
        auto number = numbers[i];
        munit_assert_size(128, ==, strlen(number));
        lexer_t lex = {};
        lexer_token_t token = {};
        lexer_setup_memory_test(&lex, number);
        lexer_next(&lex, &token);
        munit_assert_true(token.id == TOKEN_DECIMAL || token.id == TOKEN_HEXADECIMAL);
        munit_assert_size(128, ==, strlen(token.value));
        lexer_token_cleanup(&token);
        lexer_close(&lex);
    }
    return MUNIT_OK;
 }
 MunitResult test_lexer_too_long_numbers(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    char *numbers[] = {
        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "99999999999999999999988",
        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "99999999999999999999988",
        "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "99999999999999999998:64",
        "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
        "99999999999999999998:64",
    };
    // Without suffix we expect 128 characters and then failure
    for (size_t i = 0; i < 2; ++i) {
        auto number = numbers[i];
        munit_assert_size(129, ==, strlen(number));
        lexer_t lex = {};
        lexer_token_t token = {};
        lexer_setup_memory_test(&lex, number);
        lexer_next(&lex, &token);
        munit_assert_int(TOKEN_ERROR, ==, token.id);
        munit_assert_size(128, ==, strlen(token.value));
        lexer_token_cleanup(&token);
        lexer_close(&lex);
    }
    // With suffix we fail at the suffix boundary
    for (size_t i = 2; i < 4; ++i) {
        auto number = numbers[i];
        munit_assert_size(129, ==, strlen(number));
        lexer_t lex = {};
        lexer_token_t token = {};
        lexer_setup_memory_test(&lex, number);
        lexer_next(&lex, &token);
        munit_assert_int(TOKEN_ERROR, ==, token.id);
        munit_assert_size(128, >=, strlen(token.value));
        lexer_token_cleanup(&token);
        lexer_expect_one_token(&lex, TOKEN_COLON, ":", 0, 126);
        lexer_expect_one_token(&lex, TOKEN_DECIMAL, "64", 0, 127);
        lexer_close(&lex);
    }
    return MUNIT_OK;
 }
 MunitResult test_lexer_max_whitespace_length(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    char whitespace[1025];
    memset(whitespace, ' ', 1024);
    whitespace[1024] = '\0';
    munit_assert_size(1024, ==, strlen(whitespace));
    lexer_t lex = {};
    lexer_token_t token = {};
    lexer_setup_memory_test(&lex, whitespace);
    lexer_next(&lex, &token);
    munit_assert_int(TOKEN_WHITESPACE, ==, token.id);
    munit_assert_size(1024, ==, strlen(token.value));
    lexer_token_cleanup(&token);
    lexer_close(&lex);
    return MUNIT_OK;
 }
 MunitResult test_lexer_too_long_whitespace(const MunitParameter params[], void *data) {
    (void)params;
    (void)data;
    char whitespace[1026];
    memset(whitespace, ' ', 1025);
    whitespace[1025] = '\0';
    munit_assert_size(1025, ==, strlen(whitespace));
    lexer_t lex = {};
    lexer_token_t token = {};
    lexer_setup_memory_test(&lex, whitespace);
    lexer_next(&lex, &token);
    munit_assert_int(TOKEN_ERROR, ==, token.id);
    munit_assert_size(1024, ==, strlen(token.value));
    lexer_token_cleanup(&token);
    lexer_expect_one_token(&lex, TOKEN_WHITESPACE, " ", 0, 1024);
    lexer_close(&lex);
    return MUNIT_OK;
 }
 MunitTest lexer_tests[] = {
    {"/identifier",              test_lexer_identifier,              nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/identifier_boundary",     test_lexer_identifier_boundary,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/decimal",                 test_lexer_decimal,                 nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/decimal_boundary",        test_lexer_decimal_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/hexadecimal",             test_lexer_hexadecimal,             nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/hexadecimal_with_suffix", test_lexer_hexadecimal_with_suffix, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/hexadecimal_boundary",    test_lexer_hexadecimal_boundary,    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/octal",                   test_lexer_octal,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/octal_with_suffix",       test_lexer_octal_with_suffix,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/octal_boundary",          test_lexer_octal_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/binary",                  test_lexer_binary,                  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/binary_with_suffix",      test_lexer_binary_with_suffix,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/binary_boundary",         test_lexer_binary_boundary,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/number_boundary",         test_lexer_number_boundary,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/colon",                   test_lexer_colon,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/colon_boundary",          test_lexer_colon_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/comma",                   test_lexer_comma,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/comma_boundary",          test_lexer_comma_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/lbracket",                test_lexer_lbracket,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/lbracket_boundary",       test_lexer_lbracket_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/rbracket",                test_lexer_rbracket,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/rbracket_boundary",       test_lexer_rbracket_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/plus",                    test_lexer_plus,                    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/plus_boundary",           test_lexer_plus_boundary,           nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/minus",                   test_lexer_minus,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/minus_boundary",          test_lexer_minus_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/asterisk",                test_lexer_asterisk,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/asterisk_boundary",       test_lexer_asterisk_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/dot",                     test_lexer_dot,                     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/dot_boundary",            test_lexer_dot_boundary,            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/comment",                 test_lexer_comment,                 nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/comment_boundary",        test_lexer_comment_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/whitespace",              test_lexer_whitespace,              nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/whitespace_boundary",     test_lexer_whitespace_boundary,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/newlines",                test_lexer_newlines,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/newline_boundary",        test_lexer_newline_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/crlf_boundary",           test_lexer_crlf_boundary,           nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/line_numbers",            test_lexer_line_numbers,            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/maximum_length_numbers",  test_lexer_maximum_length_numbers,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/too_long_numbers",        test_lexer_too_long_numbers,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/max_whitespace_length",   test_lexer_max_whitespace_length,   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {"/too_long_whitespace",     test_lexer_too_long_whitespace,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
    {nullptr,                    nullptr,                            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
 };
--- a/tests/main.c
+++ b/tests/main.c
@@ -1,13 +1,16 @@
 #include "munit.h"
-extern const MunitSuite ast_test_suite;
+extern MunitTest ast_tests[];
 extern MunitTest lexer_tests[];
 int main(int argc, char *argv[MUNIT_ARRAY_PARAM(argc + 1)]) {
-    MunitSuite master_suite = {"/oas", nullptr, nullptr, 1, MUNIT_SUITE_OPTION_NONE};
+    MunitSuite suites[] = {
        {"/ast",   ast_tests,   nullptr, 1, MUNIT_SUITE_OPTION_NONE},
        {"/lexer", lexer_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
        {nullptr,  nullptr,     nullptr, 0, MUNIT_SUITE_OPTION_NONE},
    };
-    MunitSuite suites[] = {ast_test_suite, nullptr};
+    MunitSuite master_suite = {"/oas", nullptr, suites, 1, MUNIT_SUITE_OPTION_NONE};
    master_suite.suites = suites;
    return munit_suite_main(&master_suite, nullptr, argc, argv);
 }
Author	SHA1	Message	Date
omicron	f1f4c93a8e	Fix bug in lexer_next_number not correctly tracking character number All checks were successful Validate the build / validate-build (push) Successful in 28s Details When a number has a suffix the lexer state didn't record the number of characters consumed for this suffix. This made the lexer state be 2-3 characters short in its line location reporting until it encountered a newline character. It did not otherwise corrupt the state of the lexer.	2025-04-05 01:41:40 +02:00
omicron	27099c9899	Add initial unit tests - Add µnit source and header files - Add test target to the build system - Implement a thorough lexer test suite - Implement a minimal AST test suite	2025-04-05 01:37:04 +02:00
omicron	3fead8017b	Rename lexer errors	2025-04-05 01:37:04 +02:00
omicron	af66790cff	Clean up error definitions, location and expose them in the headers - Exposes all errors in the header file so any user of the api can test for the specific error conditions - Mark all static error pointers as const - Move generic errors into error.h - Name all errors err_modulename_* for errors that belong to a specific module and err_* for generic errors.	2025-04-05 01:37:04 +02:00
omicron	cb8768b1d0	Make clangd aware of the _POSIX_C_SOURCE define in the build system	2025-04-05 01:37:04 +02:00
`@@ -1,2 +1,2 @@`
	`CompileFlags:`	`CompileFlags:`
	`Add: ["-std=c23", "-x", "c"]`	`Add: ["-std=c23", "-x", "c", "-D_POSIX_C_SOURCE=200809L"]`