Update the test input file to contain all AST nodes

add functionality to main to parse and print the ast
Add parse_result_wrap to wrap a result with another parent node
2025-04-02 21:41:27 +02:00 · 2025-04-02 20:57:02 +02:00 · 2025-04-02 20:57:02 +02:00 · 2025-04-02 20:56:59 +02:00 · 2025-04-02 20:41:49 +02:00 · 2025-04-02 20:41:49 +02:00
6 changed files with 47 additions and 8 deletions
@@ -9,7 +9,8 @@
 parse_result_t parse_number(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal,
                          parse_binary, nullptr};
-    return parse_any(current, parsers);
+    parse_result_t result = parse_any(current, parsers);
    return parse_result_wrap(NODE_NUMBER, result);
 }
 parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) {
@@ -20,12 +21,12 @@ parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) {
 parse_result_t parse_register_index(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_plus, parse_register, parse_asterisk,
                          parse_number, nullptr};
-    return parse_consecutive(current, NODE_LABEL, parsers);
+    return parse_consecutive(current, NODE_REGISTER_INDEX, parsers);
 }
 parse_result_t parse_register_offset(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_plus_or_minus, parse_number, nullptr};
-    return parse_consecutive(current, NODE_LABEL, parsers);
+    return parse_consecutive(current, NODE_REGISTER_OFFSET, parsers);
 }
 parse_result_t parse_register_expression(tokenlist_entry_t *current) {
@@ -35,6 +36,7 @@ parse_result_t parse_register_expression(tokenlist_entry_t *current) {
    error_t *err = ast_node_alloc(&expr);
    if (err)
        return parse_error(err);
    expr->id = NODE_REGISTER_EXPRESSION;
    // <register>
    result = parse_register(current);
@@ -82,7 +84,8 @@ parse_result_t parse_register_expression(tokenlist_entry_t *current) {
 parse_result_t parse_immediate(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_number, parse_identifier, nullptr};
-    return parse_any(current, parsers);
+    parse_result_t result = parse_any(current, parsers);
    return parse_result_wrap(NODE_IMMEDIATE, result);
 }
 parse_result_t parse_memory_expression(tokenlist_entry_t *current) {
@@ -93,7 +96,7 @@ parse_result_t parse_memory_expression(tokenlist_entry_t *current) {
 parse_result_t parse_memory(tokenlist_entry_t *current) {
    parser_t parsers[] = {parse_lbracket, parse_memory_expression,
                          parse_rbracket, nullptr};
-    return parse_consecutive(current, NODE_LABEL, parsers);
+    return parse_consecutive(current, NODE_MEMORY, parsers);
 }
 parse_result_t parse_operand(tokenlist_entry_t *current) {
@@ -62,6 +62,11 @@ parse_result_t parse_dot(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
 }
 parse_result_t parse_label_reference(tokenlist_entry_t *current) {
    return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
                       nullptr);
 }
 const char *registers[] = {
    // 64-bit registers
    "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10",
@@ -75,6 +80,7 @@ const char *registers[] = {
    // 8-bit low registers
    "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b",
    "r11b", "r12b", "r13b", "r14b", "r15b", nullptr};
 bool is_register_token(lexer_token_t *token) {
    for (size_t i = 0; registers[i] != nullptr; ++i)
        if (strcmp(token->value, registers[i]) == 0)
@@ -18,6 +18,7 @@ parse_result_t parse_plus(tokenlist_entry_t *current);
 parse_result_t parse_minus(tokenlist_entry_t *current);
 parse_result_t parse_asterisk(tokenlist_entry_t *current);
 parse_result_t parse_dot(tokenlist_entry_t *current);
 parse_result_t parse_label_reference(tokenlist_entry_t *current);
 /* These are "primitives" with a different name and some extra validation on top
 * for example, register is just an identifier but it only matches a limited set
@@ -33,3 +33,24 @@ parse_result_t parse_token(tokenlist_entry_t *current,
    return parse_success(node, current->next);
 }
 parse_result_t parse_result_wrap(node_id_t id, parse_result_t result) {
    if (result.err)
        return result;
    ast_node_t *node;
    error_t *err = ast_node_alloc(&node);
    if (err) {
        ast_node_free(result.node);
        return parse_error(err);
    }
    node->id = id;
    err = ast_node_add_child(node, result.node);
    if (err) {
        ast_node_free(result.node);
        return parse_error(err);
    }
    return parse_success(node, result.next);
 }
@@ -19,8 +19,7 @@ parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next);
 parse_result_t parse_token(tokenlist_entry_t *current,
                           lexer_token_id_t token_id, node_id_t ast_id,
                           token_validator_t is_valid);
-
+parse_result_t parse_result_wrap(node_id_t id, parse_result_t result);
 tokenlist_entry_t *skip_insignificant(tokenlist_entry_t *);
 extern error_t *err_parse_no_match;
@@ -1,8 +1,17 @@
 .section text
 ; Small valid code snippet that should contain all different AST nodes
 _start:
    mov eax, ebx
-    mov eax, 555            ; move 555 into eax
+    lea eax, [eax + ebx * 4 + 8]
    lea eax, [eax + 8]
    lea eax, [eax + ebx * 8]
    lea eax, [esp - 24]
    lea eax, [eax + ebx * 4 - 8]
    lea eax, [_start]
    mov eax, _start
    mov eax, 555
    push 0o777
    xor eax, 0xDEADBEEF
    and ecx, 0o770
Author	SHA1	Message	Date
omicron	0d3881f680	Update the test input file to contain all AST nodes Validate the build / validate-build (push) Successful in 36s Details	2025-04-02 21:41:27 +02:00
omicron	5ea942024f	add functionality to main to parse and print the ast	2025-04-02 20:57:02 +02:00
omicron	b4757e008c	Add parse_result_wrap to wrap a result with another parent node Use the new wrap function to wrap numbers and immediate nodes	2025-04-02 20:57:02 +02:00
omicron	b70b6896bf	Partial parser implementation	2025-04-02 20:56:59 +02:00
omicron	6ca7bb3661	Fix incorrect size comparison in lexer_consume_n The buffer length len and the requested number of tokens n are mixed up in an invalid comparison. This causes all valid requests for n < len tokens to be denied and all invalid requests for n > len tokens to be accepted. This may cause a buffer overflow if the caller requests more characters than they provide space for.	2025-04-02 20:41:49 +02:00
omicron	d424c0f886	Add a parser combinator to parse a delimited list	2025-04-02 20:41:49 +02:00
omicron	c66489dd90	Add basic parser combinators	2025-04-02 20:41:49 +02:00
omicron	44fa66c2b7	Add "primitive" parsers for all the non-trivia tokens in the lexer grammar	2025-04-02 20:41:42 +02:00
omicron	c48adb1306	Add basic parser utilities	2025-04-02 20:38:35 +02:00