Fix zero operand parser bugs #17
@ -1,13 +1,13 @@
|
||||
<program> ::= <statement>*
|
||||
<statement> ::= <label> | <directive> | <instruction>
|
||||
<statement> ::= <label> | <directive> | <instruction> | <newline>
|
||||
|
||||
<label> ::= <identifier> <colon>
|
||||
|
||||
<directive> ::= <dot> <section_directive>
|
||||
<directive> ::= <dot> <section_directive> <newline>
|
||||
|
||||
<section_directive> ::= "section" <identifier>
|
||||
|
||||
<instruction> ::= <identifier> <operands>
|
||||
<instruction> ::= <identifier> <operands> <newline>
|
||||
|
||||
<operands> ::= <operand> ( <comma> <operand> )*
|
||||
|
||||
|
20
src/ast.c
20
src/ast.c
@ -157,6 +157,8 @@ const char *ast_node_id_to_cstr(node_id_t id) {
|
||||
return "NODE_ASTERISK";
|
||||
case NODE_DOT:
|
||||
return "NODE_DOT";
|
||||
case NODE_NEWLINE:
|
||||
return "NODE_NEWLINE";
|
||||
}
|
||||
assert(!"Unreachable, weird node id" && id);
|
||||
__builtin_unreachable();
|
||||
@ -172,7 +174,8 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
|
||||
}
|
||||
printf("%s", ast_node_id_to_cstr(node->id));
|
||||
|
||||
if (node->token_entry && node->token_entry->token.value) {
|
||||
if (node->token_entry && node->token_entry->token.value &&
|
||||
node->id != NODE_NEWLINE) {
|
||||
printf(" \"%s\"", node->token_entry->token.value);
|
||||
}
|
||||
printf("\n");
|
||||
@ -185,3 +188,18 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
|
||||
void ast_node_print(ast_node_t *node) {
|
||||
ast_node_print_internal(node, 0);
|
||||
}
|
||||
|
||||
void ast_node_prune(ast_node_t *node, node_id_t id) {
|
||||
size_t new_len = 0;
|
||||
for (size_t i = 0; i < node->len; i++) {
|
||||
auto child = node->children[i];
|
||||
if (child->id == id) {
|
||||
ast_node_free(child);
|
||||
continue;
|
||||
}
|
||||
ast_node_prune(child, id);
|
||||
node->children[new_len] = child;
|
||||
new_len++;
|
||||
}
|
||||
node->len = new_len;
|
||||
}
|
||||
|
14
src/ast.h
14
src/ast.h
@ -50,6 +50,7 @@ typedef enum node_id {
|
||||
NODE_MINUS,
|
||||
NODE_ASTERISK,
|
||||
NODE_DOT,
|
||||
NODE_NEWLINE,
|
||||
} node_id_t;
|
||||
|
||||
typedef struct ast_node ast_node_t;
|
||||
@ -119,4 +120,17 @@ error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child);
|
||||
*/
|
||||
void ast_node_print(ast_node_t *node);
|
||||
|
||||
/**
|
||||
* Prune the children with a given id
|
||||
*
|
||||
* The tree is recursively visited and all child nodes of a given ID are pruned
|
||||
* completely. If a node has the giver id, it will get removed along wih all its
|
||||
* children, even if some of those children have different ids. The root node id
|
||||
* is never checked so the tree is guaranteed to remain and allocated valid.
|
||||
*
|
||||
* @param node The root of the tree you want to prune
|
||||
* @param id The id of the nodes you want to prune
|
||||
*/
|
||||
void ast_node_prune(ast_node_t *node, node_id_t id);
|
||||
|
||||
#endif // INCLUDE_SRC_AST_H_
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include "combinators.h"
|
||||
#include "util.h"
|
||||
|
||||
// Parse a list of the given parser delimited by the given token id. Does not
|
||||
// store the delimiters in the parent node
|
||||
@ -122,5 +123,12 @@ parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
|
||||
}
|
||||
current = result.next;
|
||||
}
|
||||
|
||||
// token stream ended before we matched all parsers
|
||||
if (parser != nullptr) {
|
||||
ast_node_free(all);
|
||||
return parse_no_match();
|
||||
}
|
||||
|
||||
return parse_success(all, current);
|
||||
}
|
||||
|
@ -120,22 +120,28 @@ parse_result_t parse_section_directive(tokenlist_entry_t *current) {
|
||||
}
|
||||
|
||||
parse_result_t parse_directive(tokenlist_entry_t *current) {
|
||||
parser_t parsers[] = {parse_dot, parse_section_directive, nullptr};
|
||||
parser_t parsers[] = {parse_dot, parse_section_directive, parse_newline,
|
||||
nullptr};
|
||||
return parse_consecutive(current, NODE_DIRECTIVE, parsers);
|
||||
}
|
||||
|
||||
parse_result_t parse_instruction(tokenlist_entry_t *current) {
|
||||
parser_t parsers[] = {parse_identifier, parse_operands, nullptr};
|
||||
parser_t parsers[] = {parse_identifier, parse_operands, parse_newline,
|
||||
nullptr};
|
||||
return parse_consecutive(current, NODE_INSTRUCTION, parsers);
|
||||
}
|
||||
|
||||
parse_result_t parse_statement(tokenlist_entry_t *current) {
|
||||
parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
|
||||
nullptr};
|
||||
parse_newline, nullptr};
|
||||
return parse_any(current, parsers);
|
||||
}
|
||||
|
||||
parse_result_t parse(tokenlist_entry_t *current) {
|
||||
current = tokenlist_skip_trivia(current);
|
||||
return parse_many(current, NODE_PROGRAM, true, parse_statement);
|
||||
parse_result_t result =
|
||||
parse_many(current, NODE_PROGRAM, true, parse_statement);
|
||||
if (result.node != nullptr)
|
||||
ast_node_prune(result.node, NODE_NEWLINE);
|
||||
return result;
|
||||
}
|
||||
|
@ -62,6 +62,10 @@ parse_result_t parse_dot(tokenlist_entry_t *current) {
|
||||
return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
|
||||
}
|
||||
|
||||
parse_result_t parse_newline(tokenlist_entry_t *current) {
|
||||
return parse_token(current, TOKEN_NEWLINE, NODE_NEWLINE, nullptr);
|
||||
}
|
||||
|
||||
parse_result_t parse_label_reference(tokenlist_entry_t *current) {
|
||||
return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
|
||||
nullptr);
|
||||
|
@ -18,6 +18,7 @@ parse_result_t parse_plus(tokenlist_entry_t *current);
|
||||
parse_result_t parse_minus(tokenlist_entry_t *current);
|
||||
parse_result_t parse_asterisk(tokenlist_entry_t *current);
|
||||
parse_result_t parse_dot(tokenlist_entry_t *current);
|
||||
parse_result_t parse_newline(tokenlist_entry_t *current);
|
||||
parse_result_t parse_label_reference(tokenlist_entry_t *current);
|
||||
|
||||
/* These are "primitives" with a different name and some extra validation on top
|
||||
|
@ -86,7 +86,6 @@ bool is_trivia(tokenlist_entry_t *trivia) {
|
||||
switch (trivia->token.id) {
|
||||
case TOKEN_WHITESPACE:
|
||||
case TOKEN_COMMENT:
|
||||
case TOKEN_NEWLINE:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
5
tests/input/regression/test_no_operands_eof.asm
Normal file
5
tests/input/regression/test_no_operands_eof.asm
Normal file
@ -0,0 +1,5 @@
|
||||
; regression test for two issues:
|
||||
; - parsing two zero operand instructions in a row
|
||||
; - a zero operand instruction just before eof
|
||||
syscall
|
||||
ret
|
@ -23,9 +23,46 @@ MunitResult test_regression_trivia_head(const MunitParameter params[], void *dat
|
||||
|
||||
ast_node_free(result.node);
|
||||
tokenlist_free(list);
|
||||
return MUNIT_OK;
|
||||
}
|
||||
|
||||
MunitResult test_no_operands_eof(const MunitParameter params[], void *data) {
|
||||
(void)params;
|
||||
(void)data;
|
||||
|
||||
lexer_t *lex = &(lexer_t){};
|
||||
error_t *err = lexer_open(lex, "tests/input/regression/test_no_operands_eof.asm");
|
||||
munit_assert_null(err);
|
||||
|
||||
tokenlist_t *list;
|
||||
err = tokenlist_alloc(&list);
|
||||
munit_assert_null(err);
|
||||
|
||||
err = tokenlist_fill(list, lex);
|
||||
munit_assert_null(err);
|
||||
|
||||
parse_result_t result = parse(list->head);
|
||||
munit_assert_null(result.err);
|
||||
munit_assert_null(result.next);
|
||||
|
||||
// Both children should be instructions
|
||||
munit_assert_size(result.node->len, ==, 2);
|
||||
munit_assert_int(result.node->children[0]->id, ==, NODE_INSTRUCTION);
|
||||
munit_assert_int(result.node->children[1]->id, ==, NODE_INSTRUCTION);
|
||||
|
||||
// And they should have empty operands
|
||||
munit_assert_size(result.node->children[0]->len, ==, 2);
|
||||
munit_assert_size(result.node->children[1]->len, ==, 2);
|
||||
munit_assert_size(result.node->children[0]->children[1]->len, ==, 0);
|
||||
munit_assert_size(result.node->children[1]->children[1]->len, ==, 0);
|
||||
|
||||
ast_node_free(result.node);
|
||||
tokenlist_free(list);
|
||||
return MUNIT_OK;
|
||||
}
|
||||
|
||||
MunitTest regression_tests[] = {
|
||||
{"/trivia_head", test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
|
||||
{"/no_operands_eof", test_no_operands_eof, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
|
||||
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user