Fix zero operand parser bugs #17

Merged
omicron merged 4 commits from fix_parser_bugs into main 2025-04-16 11:32:47 +00:00
10 changed files with 103 additions and 11 deletions

View File

@ -1,13 +1,13 @@
<program> ::= <statement>* <program> ::= <statement>*
<statement> ::= <label> | <directive> | <instruction> <statement> ::= <label> | <directive> | <instruction> | <newline>
<label> ::= <identifier> <colon> <label> ::= <identifier> <colon>
<directive> ::= <dot> <section_directive> <directive> ::= <dot> <section_directive> <newline>
<section_directive> ::= "section" <identifier> <section_directive> ::= "section" <identifier>
<instruction> ::= <identifier> <operands> <instruction> ::= <identifier> <operands> <newline>
<operands> ::= <operand> ( <comma> <operand> )* <operands> ::= <operand> ( <comma> <operand> )*

View File

@ -157,6 +157,8 @@ const char *ast_node_id_to_cstr(node_id_t id) {
return "NODE_ASTERISK"; return "NODE_ASTERISK";
case NODE_DOT: case NODE_DOT:
return "NODE_DOT"; return "NODE_DOT";
case NODE_NEWLINE:
return "NODE_NEWLINE";
} }
assert(!"Unreachable, weird node id" && id); assert(!"Unreachable, weird node id" && id);
__builtin_unreachable(); __builtin_unreachable();
@ -172,7 +174,8 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
} }
printf("%s", ast_node_id_to_cstr(node->id)); printf("%s", ast_node_id_to_cstr(node->id));
if (node->token_entry && node->token_entry->token.value) { if (node->token_entry && node->token_entry->token.value &&
node->id != NODE_NEWLINE) {
printf(" \"%s\"", node->token_entry->token.value); printf(" \"%s\"", node->token_entry->token.value);
} }
printf("\n"); printf("\n");
@ -185,3 +188,18 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
void ast_node_print(ast_node_t *node) { void ast_node_print(ast_node_t *node) {
ast_node_print_internal(node, 0); ast_node_print_internal(node, 0);
} }
void ast_node_prune(ast_node_t *node, node_id_t id) {
size_t new_len = 0;
for (size_t i = 0; i < node->len; i++) {
auto child = node->children[i];
if (child->id == id) {
ast_node_free(child);
continue;
}
ast_node_prune(child, id);
node->children[new_len] = child;
new_len++;
}
node->len = new_len;
}

View File

@ -50,6 +50,7 @@ typedef enum node_id {
NODE_MINUS, NODE_MINUS,
NODE_ASTERISK, NODE_ASTERISK,
NODE_DOT, NODE_DOT,
NODE_NEWLINE,
} node_id_t; } node_id_t;
typedef struct ast_node ast_node_t; typedef struct ast_node ast_node_t;
@ -119,4 +120,17 @@ error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child);
*/ */
void ast_node_print(ast_node_t *node); void ast_node_print(ast_node_t *node);
/**
* Prune the children with a given id
*
* The tree is recursively visited and all child nodes of a given ID are pruned
* completely. If a node has the giver id, it will get removed along wih all its
* children, even if some of those children have different ids. The root node id
* is never checked so the tree is guaranteed to remain and allocated valid.
*
* @param node The root of the tree you want to prune
* @param id The id of the nodes you want to prune
*/
void ast_node_prune(ast_node_t *node, node_id_t id);
#endif // INCLUDE_SRC_AST_H_ #endif // INCLUDE_SRC_AST_H_

View File

@ -1,4 +1,5 @@
#include "combinators.h" #include "combinators.h"
#include "util.h"
// Parse a list of the given parser delimited by the given token id. Does not // Parse a list of the given parser delimited by the given token id. Does not
// store the delimiters in the parent node // store the delimiters in the parent node
@ -122,5 +123,12 @@ parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
} }
current = result.next; current = result.next;
} }
// token stream ended before we matched all parsers
if (parser != nullptr) {
ast_node_free(all);
return parse_no_match();
}
return parse_success(all, current); return parse_success(all, current);
} }

View File

@ -120,22 +120,28 @@ parse_result_t parse_section_directive(tokenlist_entry_t *current) {
} }
parse_result_t parse_directive(tokenlist_entry_t *current) { parse_result_t parse_directive(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_dot, parse_section_directive, nullptr}; parser_t parsers[] = {parse_dot, parse_section_directive, parse_newline,
nullptr};
return parse_consecutive(current, NODE_DIRECTIVE, parsers); return parse_consecutive(current, NODE_DIRECTIVE, parsers);
} }
parse_result_t parse_instruction(tokenlist_entry_t *current) { parse_result_t parse_instruction(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_identifier, parse_operands, nullptr}; parser_t parsers[] = {parse_identifier, parse_operands, parse_newline,
nullptr};
return parse_consecutive(current, NODE_INSTRUCTION, parsers); return parse_consecutive(current, NODE_INSTRUCTION, parsers);
} }
parse_result_t parse_statement(tokenlist_entry_t *current) { parse_result_t parse_statement(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_label, parse_directive, parse_instruction, parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
nullptr}; parse_newline, nullptr};
return parse_any(current, parsers); return parse_any(current, parsers);
} }
parse_result_t parse(tokenlist_entry_t *current) { parse_result_t parse(tokenlist_entry_t *current) {
current = tokenlist_skip_trivia(current); current = tokenlist_skip_trivia(current);
return parse_many(current, NODE_PROGRAM, true, parse_statement); parse_result_t result =
parse_many(current, NODE_PROGRAM, true, parse_statement);
if (result.node != nullptr)
ast_node_prune(result.node, NODE_NEWLINE);
return result;
} }

View File

@ -62,6 +62,10 @@ parse_result_t parse_dot(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr); return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
} }
parse_result_t parse_newline(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_NEWLINE, NODE_NEWLINE, nullptr);
}
parse_result_t parse_label_reference(tokenlist_entry_t *current) { parse_result_t parse_label_reference(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE, return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
nullptr); nullptr);

View File

@ -18,6 +18,7 @@ parse_result_t parse_plus(tokenlist_entry_t *current);
parse_result_t parse_minus(tokenlist_entry_t *current); parse_result_t parse_minus(tokenlist_entry_t *current);
parse_result_t parse_asterisk(tokenlist_entry_t *current); parse_result_t parse_asterisk(tokenlist_entry_t *current);
parse_result_t parse_dot(tokenlist_entry_t *current); parse_result_t parse_dot(tokenlist_entry_t *current);
parse_result_t parse_newline(tokenlist_entry_t *current);
parse_result_t parse_label_reference(tokenlist_entry_t *current); parse_result_t parse_label_reference(tokenlist_entry_t *current);
/* These are "primitives" with a different name and some extra validation on top /* These are "primitives" with a different name and some extra validation on top

View File

@ -86,7 +86,6 @@ bool is_trivia(tokenlist_entry_t *trivia) {
switch (trivia->token.id) { switch (trivia->token.id) {
case TOKEN_WHITESPACE: case TOKEN_WHITESPACE:
case TOKEN_COMMENT: case TOKEN_COMMENT:
case TOKEN_NEWLINE:
return true; return true;
default: default:
return false; return false;

View File

@ -0,0 +1,5 @@
; regression test for two issues:
; - parsing two zero operand instructions in a row
; - a zero operand instruction just before eof
syscall
ret

View File

@ -23,9 +23,46 @@ MunitResult test_regression_trivia_head(const MunitParameter params[], void *dat
ast_node_free(result.node); ast_node_free(result.node);
tokenlist_free(list); tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_no_operands_eof(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_t *lex = &(lexer_t){};
error_t *err = lexer_open(lex, "tests/input/regression/test_no_operands_eof.asm");
munit_assert_null(err);
tokenlist_t *list;
err = tokenlist_alloc(&list);
munit_assert_null(err);
err = tokenlist_fill(list, lex);
munit_assert_null(err);
parse_result_t result = parse(list->head);
munit_assert_null(result.err);
munit_assert_null(result.next);
// Both children should be instructions
munit_assert_size(result.node->len, ==, 2);
munit_assert_int(result.node->children[0]->id, ==, NODE_INSTRUCTION);
munit_assert_int(result.node->children[1]->id, ==, NODE_INSTRUCTION);
// And they should have empty operands
munit_assert_size(result.node->children[0]->len, ==, 2);
munit_assert_size(result.node->children[1]->len, ==, 2);
munit_assert_size(result.node->children[0]->children[1]->len, ==, 0);
munit_assert_size(result.node->children[1]->children[1]->len, ==, 0);
ast_node_free(result.node);
tokenlist_free(list);
return MUNIT_OK;
} }
MunitTest regression_tests[] = { MunitTest regression_tests[] = {
{"/trivia_head", test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, {"/trivia_head", test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr} {"/no_operands_eof", test_no_operands_eof, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
}; };