Add more grammar rules to the parser

Add a parser combinator to parse a delimited list
make parse_success always skip past trivia in the tokenlist
2025-04-01 23:43:05 +02:00 · 2025-04-01 23:39:48 +02:00 · 2025-04-01 23:39:01 +02:00 · 2025-04-01 23:36:08 +02:00 · 2025-04-01 23:22:29 +02:00 · 2025-04-01 22:09:36 +02:00
17 changed files with 528 additions and 42 deletions
--- a/2
+++ b/2
@ -10,7 +10,7 @@ OBJECTS = $(SOURCES:.c=.o)
 DEPENDENCIES = $(SOURCES:.c=.d)
 TARGET?=oas
 OUTPUTS=oas oas-asan oas-msan oas-afl
-RUNARGUMENTS?=-tokens tests/input/valid.asm
+RUNARGUMENTS?=ast tests/input/valid.asm
 all: $(TARGET)
--- a/doc/parser_grammar.txt
+++ b/doc/parser_grammar.txt
@ -1,24 +1,20 @@
 /* string literals are lexer identifier tokens with that particular value */
 <program>   ::= <statement>*
-<statement> ::= ( <label> | <directive> | <instruction> ) <newline>
+<statement> ::= <label> | <directive> | <instruction>
 <label> ::= <identifier> <colon>
-<directive> ::= <dot> <section>
+<directive> ::= <dot> <section_directive>
-<section>   ::= "section" <identifier>
+<section_directive> ::= "section" <identifier>
 <instruction> ::= <identifier> <operands>
-<operands> ::= <operand> ( <comma> <operands> )*
+<operands> ::= <operand> ( <comma> <operand> )*
 <operand>  ::= <register> | <immediate> | <memory>
 <register> ::= <register_base> | <register_extra>
 <register_base> ::= "rax" | "rbx" | "rcx" | "rdx" | "rsi" | "rdi" | "rbp" | "rsp"
 <register_extra> ::= "r8" | "r9" | "r10" | "r11" | "r12" | "r13" | "r14" | "r15" 
 <immediate> ::= <number> | <label_reference>
 <number> ::= <octal> | <binary> | <decimal> | <hexadecimal>
 <label_reference> ::= <identifier>
@ -34,3 +30,10 @@
 <register_offset> ::= <plus_or_minus> <number>
 <plus_or_minus> ::= <plus> | <minus>
 /* These are lexer identifiers with the correct string value */
 <section> ::= "section"
 <register> ::= "rax" | "rbx" | "rcx" | "rdx" | "rsi" | "rdi" | "rbp" | "rsp" |
 "r8" | "r9" | "r10" | "r11" | "r12" | "r13" | "r14" | "r15"
--- a/src/ast.c
+++ b/src/ast.c
@ -26,6 +26,7 @@ void ast_node_free(ast_node_t *node) {
    if (node->children) {
        for (size_t i = 0; i < node->len; ++i)
            ast_node_free(node->children[i]);
        free(node->children);
    }
    ast_node_free_value(node);
--- a/src/ast.h
+++ b/src/ast.h
@ -3,14 +3,51 @@
 #include "error.h"
 #include "lexer.h"
 #include "tokenlist.h"
 #include <stddef.h>
 #include <stdint.h>
 typedef enum node_id {
    NODE_INVALID,
    NODE_PROGRAM,
-    NODE_DIRECTIVE,
+    NODE_STATEMENT,
    NODE_LABEL,
-    NODE_INSTRUCTION
+    NODE_DIRECTIVE,
    NODE_INSTRUCTION,
    NODE_OPERANDS,
    NODE_OPERAND,
    NODE_IMMEDIATE,
    NODE_MEMORY,
    NODE_NUMBER,
    NODE_LABEL_REFERENCE,
    NODE_MEMORY_EXPRESSION,
    NODE_REGISTER_EXPRESSION,
    NODE_REGISTER_INDEX,
    NODE_REGISTER_OFFSET,
    NODE_PLUS_OR_MINUS,
    NODE_SECTION_DIRECTIVE,
    // Validated primitives
    NODE_REGISTER,
    NODE_SECTION,
    // Primitive nodes
    NODE_IDENTIFIER,
    NODE_DECIMAL,
    NODE_HEXADECIMAL,
    NODE_OCTAL,
    NODE_BINARY,
    NODE_CHAR,
    NODE_STRING,
    NODE_COLON,
    NODE_COMMA,
    NODE_LBRACKET,
    NODE_RBRACKET,
    NODE_PLUS,
    NODE_MINUS,
    NODE_ASTERISK,
    NODE_DOT,
 } node_id_t;
 typedef struct ast_node ast_node_t;
@ -21,7 +58,7 @@ constexpr size_t node_max_children_cap = 1 << 16;
 struct ast_node {
    node_id_t id;
-    lexer_token_t *token;
+    tokenlist_entry_t *token_entry;
    size_t len;
    size_t cap;
    ast_node_t **children;
--- a/src/lexer.c
+++ b/src/lexer.c
@ -183,7 +183,7 @@ error_t *lexer_consume_n(lexer_t *lex, const size_t len,
                         char buffer[static len], const size_t n) {
    if (lex->buffer_count < n)
        return err_buffer_underrun;
-    if (len > n)
+    if (n > len)
        return err_consume_excessive_length;
    memcpy(buffer, lex->buffer, n);
--- a/src/main.c
+++ b/src/main.c
@ -1,5 +1,6 @@
 #include "error.h"
 #include "lexer.h"
 #include "parser.h"
 #include "tokenlist.h"
 #include <limits.h>
@ -7,38 +8,57 @@
 #include <stdlib.h>
 #include <string.h>
-bool print_token(lexer_token_t *token) {
+typedef enum mode { MODE_AST, MODE_TEXT, MODE_TOKENS } mode_t;
 void print_tokens(tokenlist_t *list) {
    for (auto entry = list->head; entry; entry = entry->next) {
        auto token = &entry->token;
        lexer_token_print(token);
-    return true;
+    }
 }
-bool print_value(lexer_token_t *token) {
+void print_text(tokenlist_t *list) {
    for (auto entry = list->head; entry; entry = entry->next) {
        auto token = &entry->token;
        if (token->id == TOKEN_ERROR) {
            printf("%s\n", token->value);
            for (size_t i = 0; i < token->character_number; ++i)
                printf(" ");
            printf("^-- %s\n", token->explanation);
            return;
        } else {
            printf("%s", token->value);
        }
-    return token->id != TOKEN_ERROR;
+    }
 }
 void parse_ast(tokenlist_t *list) {
    parse_result_t result = parse(list->head);
    if (result.err) {
        puts(result.err->message);
        error_free(result.err);
        return;
    }
    ast_node_free(result.node);
 }
 int get_execution_mode(int argc, char *argv[]) {
    if (argc != 3 || (strcmp(argv[1], "tokens") != 0 &&
                      strcmp(argv[1], "text") != 0 && strcmp(argv[1], "ast"))) {
        puts("Usage: oas [tokens|text|ast] <filename>");
        exit(1);
    }
    if (strcmp(argv[1], "tokens") == 0)
        return MODE_TOKENS;
    if (strcmp(argv[1], "text") == 0)
        return MODE_TEXT;
    return MODE_AST;
 }
 int main(int argc, char *argv[]) {
-    if (argc != 3 ||
+    mode_t mode = get_execution_mode(argc, argv);
        (strcmp(argv[1], "-tokens") != 0 && strcmp(argv[1], "-text") != 0)) {
        puts("Usage: oas -tokens <filename>");
        puts("Usage: oas -text <filename>");
        return 1;
    }
    bool (*print_fn)(lexer_token_t *);
    char *filename = argv[2];
    if (strcmp(argv[1], "-tokens") == 0) {
        print_fn = print_token;
    } else {
        print_fn = print_value;
    }
    lexer_t *lex = &(lexer_t){};
    error_t *err = lexer_open(lex, filename);
@ -54,9 +74,18 @@ int main(int argc, char *argv[]) {
    if (err)
        goto cleanup_tokens;
-    for (auto entry = list->head; entry; entry = entry->next) {
+    switch (mode) {
-        print_fn(&entry->token);
+    case MODE_TOKENS:
        print_tokens(list);
        break;
    case MODE_TEXT:
        print_text(list);
        break;
    case MODE_AST:
        parse_ast(list);
        break;
    }
    tokenlist_free(list);
    error_free(err);
    return 0;
--- a/src/parser.c
+++ b/src/parser.c
@ -0,0 +1,53 @@
 #include "parser.h"
 #include "ast.h"
 #include "lexer.h"
 #include "parser_combinators.h"
 #include "parser_primitives.h"
 #include "parser_util.h"
 #include "tokenlist.h"
 parse_result_t parse_number(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal,
                          parse_binary, nullptr};
    return parse_any(current, parsers);
 }
 parse_result_t parse_operand(tokenlist_entry_t *current) {
    // FIXME: not the correct set of parsers
    parser_t parsers[] = {parse_register, parse_number, nullptr};
    return parse_any(current, parsers);
 }
 parse_result_t parse_operands(tokenlist_entry_t *current) {
    return parse_list(current, NODE_OPERANDS, true, TOKEN_COMMA, parse_operand);
 }
 parse_result_t parse_label(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_identifier, parse_colon, nullptr};
    return parse_consecutive(current, NODE_LABEL, parsers);
 }
 parse_result_t parse_section_directive(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_section, parse_identifier, nullptr};
    return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers);
 }
 parse_result_t parse_directive(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_dot, parse_section_directive, nullptr};
    return parse_consecutive(current, NODE_LABEL, parsers);
 }
 parse_result_t parse_instruction(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_identifier, parse_operands, nullptr};
    return parse_consecutive(current, NODE_INSTRUCTION, parsers);
 }
 parse_result_t parse_statement(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
                          nullptr};
    return parse_any(current, parsers);
 }
 parse_result_t parse(tokenlist_entry_t *current) {
    return parse_many(current, NODE_PROGRAM, true, parse_statement);
 }
--- a/src/parser.h
+++ b/src/parser.h
@ -0,0 +1,11 @@
 #ifndef INCLUDE_SRC_PARSER_H_
 #define INCLUDE_SRC_PARSER_H_
 #include "ast.h"
 #include "error.h"
 #include "parser_util.h"
 #include "tokenlist.h"
 parse_result_t parse(tokenlist_entry_t *current);
 #endif // INCLUDE_SRC_PARSER_H_
--- a/src/parser_combinators.c
+++ b/src/parser_combinators.c
@ -0,0 +1,124 @@
 #include "parser_combinators.h"
 // Parse a list of the given parser delimited by the given token id. Does not
 // store the delimiters in the parent node
 parse_result_t parse_list(tokenlist_entry_t *current, node_id_t id,
                          bool allow_none, lexer_token_id_t delimiter_id,
                          parser_t parser) {
    ast_node_t *many;
    error_t *err = ast_node_alloc(&many);
    parse_result_t result;
    if (err)
        return parse_error(err);
    many->id = id;
    while (current) {
        // Skip beyond the delimiter on all but the first iteration
        if (many->len > 0) {
            if (current->token.id != delimiter_id)
                break;
            current = tokenlist_next(current);
            if (current == nullptr) {
                // FIXME: this isn't quite right, we can't consume the delimiter
                // if the next element will fail to parse but it's late and I
                // must think this through tomorrow
                break;
            }
        }
        result = parser(current);
        if (result.err == err_parse_no_match)
            break;
        if (result.err) {
            ast_node_free(many);
            return result;
        }
        err = ast_node_add_child(many, result.node);
        if (err) {
            ast_node_free(many);
            ast_node_free(result.node);
            return parse_error(err);
        }
        current = result.next;
    }
    if (!allow_none && many->len == 0) {
        ast_node_free(many);
        return parse_no_match();
    }
    return parse_success(many, current);
 }
 parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]) {
    for (parser_t parser = *parsers; parser; parser = *parsers++) {
        parse_result_t result = parser(current);
        if (result.err == nullptr)
            return result;
    }
    return parse_no_match();
 }
 // parse as many of the giver parsers objects in a row as possible,
 // potentially allowing none wraps the found objects in a new ast node with
 // the given note id
 parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id,
                          bool allow_none, parser_t parser) {
    ast_node_t *many;
    error_t *err = ast_node_alloc(&many);
    parse_result_t result;
    if (err)
        return parse_error(err);
    many->id = id;
    while (current) {
        result = parser(current);
        if (result.err == err_parse_no_match)
            break;
        if (result.err) {
            ast_node_free(many);
            return result;
        }
        err = ast_node_add_child(many, result.node);
        if (err) {
            ast_node_free(many);
            ast_node_free(result.node);
            return parse_error(err);
        }
        current = result.next;
    }
    if (!allow_none && many->len == 0) {
        ast_node_free(many);
        return parse_no_match();
    }
    return parse_success(many, current);
 }
 // Parse all tries to parse all parsers consecutively and if it succeeds it
 // wraps the parsed nodes in a new parent node.
 parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
                                 parser_t parsers[]) {
    ast_node_t *all;
    error_t *err = ast_node_alloc(&all);
    parse_result_t result;
    if (err)
        return parse_error(err);
    all->id = id;
    for (parser_t parser = *parsers; parser && current; parser = *parsers++) {
        result = parser(current);
        if (result.err) {
            ast_node_free(all);
            return result;
        }
        err = ast_node_add_child(all, result.node);
        if (err) {
            ast_node_free(result.node);
            ast_node_free(all);
            return parse_error(err);
        }
        current = result.next;
    }
    return parse_success(all, current);
 }
--- a/src/parser_combinators.h
+++ b/src/parser_combinators.h
@ -0,0 +1,20 @@
 #include "parser_util.h"
 typedef parse_result_t (*parser_t)(tokenlist_entry_t *);
 parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]);
 // parse as many of the giver parsers objects in a row as possible, potentially
 // allowing none wraps the found objects in a new ast node with the given note
 // id
 parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id,
                          bool allow_none, parser_t parser);
 parse_result_t parse_list(tokenlist_entry_t *current, node_id_t id,
                          bool allow_none, lexer_token_id_t delimiter_id,
                          parser_t parser);
 // Parse all tries to parse all parsers consecutively and if it succeeds it
 // wraps the parsed nodes in a new parent node.
 parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
                                 parser_t parsers[]);
--- a/src/parser_primitives.c
+++ b/src/parser_primitives.c
@ -0,0 +1,84 @@
 #include "parser_primitives.h"
 #include "ast.h"
 #include <string.h>
 parse_result_t parse_identifier(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_IDENTIFIER, nullptr);
 }
 parse_result_t parse_decimal(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_DECIMAL, NODE_DECIMAL, nullptr);
 }
 parse_result_t parse_hexadecimal(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_HEXADECIMAL, NODE_HEXADECIMAL, nullptr);
 }
 parse_result_t parse_binary(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_BINARY, NODE_BINARY, nullptr);
 }
 parse_result_t parse_octal(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_OCTAL, NODE_OCTAL, nullptr);
 }
 parse_result_t parse_string(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_STRING, NODE_STRING, nullptr);
 }
 parse_result_t parse_char(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_CHAR, NODE_CHAR, nullptr);
 }
 parse_result_t parse_colon(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_COLON, NODE_COLON, nullptr);
 }
 parse_result_t parse_comma(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_COMMA, NODE_COMMA, nullptr);
 }
 parse_result_t parse_lbracket(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_LBRACKET, NODE_LBRACKET, nullptr);
 }
 parse_result_t parse_rbracket(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_RBRACKET, NODE_RBRACKET, nullptr);
 }
 parse_result_t parse_plus(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_PLUS, NODE_PLUS, nullptr);
 }
 parse_result_t parse_minus(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_MINUS, NODE_MINUS, nullptr);
 }
 parse_result_t parse_asterisk(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_ASTERISK, NODE_ASTERISK, nullptr);
 }
 parse_result_t parse_dot(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
 }
 const char *registers[] = {"rax", "rcx", "rdx", "rbx", "rsp",  "rbp",
                           "rsi", "rdi", "r8",  "r9",  "r10",  "r11",
                           "r12", "r13", "r14", "r15", nullptr};
 bool is_register_token(lexer_token_t *token) {
    for (size_t i = 0; registers[i] != nullptr; ++i)
        if (strcmp(token->value, registers[i]) == 0)
            return true;
    return false;
 }
 parse_result_t parse_register(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_REGISTER,
                       is_register_token);
 }
 bool is_section_token(lexer_token_t *token) {
    return strcmp(token->value, "section") == 0;
 }
 parse_result_t parse_section(tokenlist_entry_t *current) {}
--- a/src/parser_primitives.h
+++ b/src/parser_primitives.h
@ -0,0 +1,29 @@
 #ifndef INCLUDE_SRC_PARSER_PRIMITIVES_H_
 #define INCLUDE_SRC_PARSER_PRIMITIVES_H_
 #include "parser_util.h"
 parse_result_t parse_identifier(tokenlist_entry_t *current);
 parse_result_t parse_decimal(tokenlist_entry_t *current);
 parse_result_t parse_hexadecimal(tokenlist_entry_t *current);
 parse_result_t parse_binary(tokenlist_entry_t *current);
 parse_result_t parse_octal(tokenlist_entry_t *current);
 parse_result_t parse_string(tokenlist_entry_t *current);
 parse_result_t parse_char(tokenlist_entry_t *current);
 parse_result_t parse_colon(tokenlist_entry_t *current);
 parse_result_t parse_comma(tokenlist_entry_t *current);
 parse_result_t parse_lbracket(tokenlist_entry_t *current);
 parse_result_t parse_rbracket(tokenlist_entry_t *current);
 parse_result_t parse_plus(tokenlist_entry_t *current);
 parse_result_t parse_minus(tokenlist_entry_t *current);
 parse_result_t parse_asterisk(tokenlist_entry_t *current);
 parse_result_t parse_dot(tokenlist_entry_t *current);
 /* These are "primitives" with a different name and some extra validation on top
 * for example, register is just an identifier but it only matches a limited set
 * of values
 */
 parse_result_t parse_register(tokenlist_entry_t *current);
 parse_result_t parse_section(tokenlist_entry_t *current);
 #endif // INCLUDE_SRC_PARSER_PRIMITIVES_H_
--- a/src/parser_util.c
+++ b/src/parser_util.c
@ -0,0 +1,35 @@
 #include "parser_util.h"
 #include "tokenlist.h"
 error_t *err_parse_no_match =
    &(error_t){.message = "parsing failed to find the correct token sequence"};
 parse_result_t parse_error(error_t *err) {
    return (parse_result_t){.err = err};
 }
 parse_result_t parse_no_match() {
    return parse_error(err_parse_no_match);
 }
 parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next) {
    next = tokenlist_skip_trivia(next);
    return (parse_result_t){.node = ast, .next = next};
 }
 parse_result_t parse_token(tokenlist_entry_t *current,
                           lexer_token_id_t token_id, node_id_t ast_id,
                           token_validator_t is_valid) {
    if (current->token.id != token_id ||
        (is_valid && !is_valid(&current->token)))
        return parse_no_match();
    ast_node_t *node;
    error_t *err = ast_node_alloc(&node);
    if (err)
        return parse_error(err);
    node->id = ast_id;
    node->token_entry = current;
    return parse_success(node, current->next);
 }
--- a/src/parser_util.h
+++ b/src/parser_util.h
@ -0,0 +1,27 @@
 #ifndef INCLUDE_SRC_PARSER_UTIL_H_
 #define INCLUDE_SRC_PARSER_UTIL_H_
 #include "ast.h"
 #include "error.h"
 #include "tokenlist.h"
 typedef struct parse_result {
    error_t *err;
    tokenlist_entry_t *next;
    ast_node_t *node;
 } parse_result_t;
 typedef bool (*token_validator_t)(lexer_token_t *);
 parse_result_t parse_error(error_t *err);
 parse_result_t parse_no_match();
 parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next);
 parse_result_t parse_token(tokenlist_entry_t *current,
                           lexer_token_id_t token_id, node_id_t ast_id,
                           token_validator_t is_valid);
 tokenlist_entry_t *skip_insignificant(tokenlist_entry_t *);
 extern error_t *err_parse_no_match;
 #endif // INCLUDE_SRC_PARSER_UTIL_H_
--- a/src/tokenlist.c
+++ b/src/tokenlist.c
@ -81,3 +81,26 @@ error_t *tokenlist_fill(tokenlist_t *list, lexer_t *lex) {
        return err;
    return nullptr;
 }
 bool is_trivia(tokenlist_entry_t *trivia) {
    switch (trivia->token.id) {
    case TOKEN_WHITESPACE:
    case TOKEN_COMMENT:
    case TOKEN_NEWLINE:
        return true;
    default:
        return false;
    }
 }
 tokenlist_entry_t *tokenlist_skip_trivia(tokenlist_entry_t *current) {
    while (current && is_trivia(current))
        current = current->next;
    return current;
 }
 tokenlist_entry_t *tokenlist_next(tokenlist_entry_t *current) {
    if (!current)
        return nullptr;
    return tokenlist_skip_trivia(current->next);
 }
--- a/src/tokenlist.h
+++ b/src/tokenlist.h
@ -27,4 +27,14 @@ error_t *tokenlist_fill(tokenlist_t *list, lexer_t *lex);
 void tokenlist_free(tokenlist_t *list);
 /**
 * Return the first token entry that isn't whitespace, newline or comment
 */
 tokenlist_entry_t *tokenlist_skip_trivia(tokenlist_entry_t *current);
 /**
 * Return the next token entry that isn't whitespace, newline or comment
 */
 tokenlist_entry_t *tokenlist_next(tokenlist_entry_t *current);
 #endif // INCLUDE_SRC_TOKENLIST_H_
--- a/validate.sh
+++ b/validate.sh
@ -10,7 +10,7 @@ scan-build -o reports/static-analysis/ -plist-html --status-bugs make all
 # Run the sanitizer builds and valgrind
 make clean sanitize all
-ARGUMENTS=("-tokens" "-text")
+ARGUMENTS=("tokens" "text" "ast")
 while IFS= read -r INPUT_FILE; do
    for ARGS in ${ARGUMENTS[@]}; do
        ./oas-asan $ARGS $INPUT_FILE > /dev/null
Author	SHA1	Message	Date
omicron	71f1b0aa64	Add more grammar rules to the parser All checks were successful Validate the build / validate-build (push) Successful in 26s Details	2025-04-01 23:43:05 +02:00
omicron	c3fcb917fc	Add a parser combinator to parse a delimited list	2025-04-01 23:39:48 +02:00
omicron	333991e05e	make parse_success always skip past trivia in the tokenlist	2025-04-01 23:39:01 +02:00
omicron	a1b4cc21f4	TODO: REVIEW ME AND WRITE PROPER MESSAGE Fix lexer issue where consuming n tokens always fails if there are n tokens and always succeeds if they aren't n tokens	2025-04-01 23:36:08 +02:00
omicron	80957326bc	fix operands list grammar rule	2025-04-01 23:22:29 +02:00
omicron	da51d66bb2	Match ast nodes to new grammar	2025-04-01 22:09:36 +02:00
omicron	42445338a4	Update grammar to match implementation	2025-04-01 22:01:33 +02:00
omicron	03fc44f339	Use new validator function for parse_token calls on all primitives Also adds new validated primitives for NODE_SECTION and NODE_REGISTER	2025-04-01 21:54:27 +02:00
omicron	eec02d6237	Fix incorrect error returned in parse_consecutive	2025-04-01 21:53:19 +02:00
omicron	39a4b2b0a7	Fix memory leak in ast. If a node has children the array of children was never freed.	2025-04-01 21:51:48 +02:00
omicron	048b8fcf9d	Extend parse_token to accept an optional validator function	2025-04-01 21:10:19 +02:00
omicron	1b21364939	Expose err_parse_no_match in parser_util.h	2025-04-01 20:57:34 +02:00
omicron	41114d7f9c	Add basic parser combinators	2025-04-01 20:05:35 +02:00
omicron	dfc89a7493	Add "primitive" parsers for all the semantic tokens in the lexer grammar	2025-04-01 20:03:53 +02:00
omicron	43a62095bf	Add basic parser utilities	2025-04-01 20:03:28 +02:00
omicron	ff7d33bf2a	Add functions to skip over trivia in a tokenlist	2025-04-01 19:55:00 +02:00
omicron	208f30ac48	Expand AST node ids to support the lexer tokens and grammar rules	2025-04-01 19:26:54 +02:00
omicron	988b54aee3	Adjust grammar so that it never depends on newline tokens	2025-04-01 19:26:27 +02:00
omicron	ed1491db33	Fix parse_token to add the correct information to a parse node	2025-04-01 17:20:50 +02:00
omicron	0bf4ba3a1b	Fix ast nodes now containing token entry instead of token	2025-04-01 17:20:32 +02:00
omicron	e632764bf2	Partial parser implementation	2025-04-01 17:16:21 +02:00
omicron	a298e99895	Add invalid ast node id	2025-04-01 15:06:42 +02:00
omicron	126905a092	FIXME REORDER COMMIT -- Change main so it can parse the ast FIXME THIS COMMIT NEEDS TO BE REORDERED FIXME THIS COMMIT NEEDS TO BE REORDERED FIXME THIS COMMIT NEEDS TO BE REORDERED FIXME THIS COMMIT NEEDS TO BE REORDERED	2025-04-01 15:06:20 +02:00