Fix zero operand parser bugs #17
@ -1,13 +1,13 @@
|
|||||||
<program> ::= <statement>*
|
<program> ::= <statement>*
|
||||||
<statement> ::= <label> | <directive> | <instruction>
|
<statement> ::= <label> | <directive> | <instruction> | <newline>
|
||||||
|
|
||||||
<label> ::= <identifier> <colon>
|
<label> ::= <identifier> <colon>
|
||||||
|
|
||||||
<directive> ::= <dot> <section_directive>
|
<directive> ::= <dot> <section_directive> <newline>
|
||||||
|
|
||||||
<section_directive> ::= "section" <identifier>
|
<section_directive> ::= "section" <identifier>
|
||||||
|
|
||||||
<instruction> ::= <identifier> <operands>
|
<instruction> ::= <identifier> <operands> <newline>
|
||||||
|
|
||||||
<operands> ::= <operand> ( <comma> <operand> )*
|
<operands> ::= <operand> ( <comma> <operand> )*
|
||||||
|
|
||||||
|
20
src/ast.c
20
src/ast.c
@ -157,6 +157,8 @@ const char *ast_node_id_to_cstr(node_id_t id) {
|
|||||||
return "NODE_ASTERISK";
|
return "NODE_ASTERISK";
|
||||||
case NODE_DOT:
|
case NODE_DOT:
|
||||||
return "NODE_DOT";
|
return "NODE_DOT";
|
||||||
|
case NODE_NEWLINE:
|
||||||
|
return "NODE_NEWLINE";
|
||||||
}
|
}
|
||||||
assert(!"Unreachable, weird node id" && id);
|
assert(!"Unreachable, weird node id" && id);
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
@ -172,7 +174,8 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
|
|||||||
}
|
}
|
||||||
printf("%s", ast_node_id_to_cstr(node->id));
|
printf("%s", ast_node_id_to_cstr(node->id));
|
||||||
|
|
||||||
if (node->token_entry && node->token_entry->token.value) {
|
if (node->token_entry && node->token_entry->token.value &&
|
||||||
|
node->id != NODE_NEWLINE) {
|
||||||
printf(" \"%s\"", node->token_entry->token.value);
|
printf(" \"%s\"", node->token_entry->token.value);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
@ -185,3 +188,18 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
|
|||||||
void ast_node_print(ast_node_t *node) {
|
void ast_node_print(ast_node_t *node) {
|
||||||
ast_node_print_internal(node, 0);
|
ast_node_print_internal(node, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ast_node_prune(ast_node_t *node, node_id_t id) {
|
||||||
|
size_t new_len = 0;
|
||||||
|
for (size_t i = 0; i < node->len; i++) {
|
||||||
|
auto child = node->children[i];
|
||||||
|
if (child->id == id) {
|
||||||
|
ast_node_free(child);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ast_node_prune(child, id);
|
||||||
|
node->children[new_len] = child;
|
||||||
|
new_len++;
|
||||||
|
}
|
||||||
|
node->len = new_len;
|
||||||
|
}
|
||||||
|
14
src/ast.h
14
src/ast.h
@ -50,6 +50,7 @@ typedef enum node_id {
|
|||||||
NODE_MINUS,
|
NODE_MINUS,
|
||||||
NODE_ASTERISK,
|
NODE_ASTERISK,
|
||||||
NODE_DOT,
|
NODE_DOT,
|
||||||
|
NODE_NEWLINE,
|
||||||
} node_id_t;
|
} node_id_t;
|
||||||
|
|
||||||
typedef struct ast_node ast_node_t;
|
typedef struct ast_node ast_node_t;
|
||||||
@ -119,4 +120,17 @@ error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child);
|
|||||||
*/
|
*/
|
||||||
void ast_node_print(ast_node_t *node);
|
void ast_node_print(ast_node_t *node);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prune the children with a given id
|
||||||
|
*
|
||||||
|
* The tree is recursively visited and all child nodes of a given ID are pruned
|
||||||
|
* completely. If a node has the giver id, it will get removed along wih all its
|
||||||
|
* children, even if some of those children have different ids. The root node id
|
||||||
|
* is never checked so the tree is guaranteed to remain and allocated valid.
|
||||||
|
*
|
||||||
|
* @param node The root of the tree you want to prune
|
||||||
|
* @param id The id of the nodes you want to prune
|
||||||
|
*/
|
||||||
|
void ast_node_prune(ast_node_t *node, node_id_t id);
|
||||||
|
|
||||||
#endif // INCLUDE_SRC_AST_H_
|
#endif // INCLUDE_SRC_AST_H_
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#include "combinators.h"
|
#include "combinators.h"
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
// Parse a list of the given parser delimited by the given token id. Does not
|
// Parse a list of the given parser delimited by the given token id. Does not
|
||||||
// store the delimiters in the parent node
|
// store the delimiters in the parent node
|
||||||
@ -122,5 +123,12 @@ parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
|
|||||||
}
|
}
|
||||||
current = result.next;
|
current = result.next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// token stream ended before we matched all parsers
|
||||||
|
if (parser != nullptr) {
|
||||||
|
ast_node_free(all);
|
||||||
|
return parse_no_match();
|
||||||
|
}
|
||||||
|
|
||||||
return parse_success(all, current);
|
return parse_success(all, current);
|
||||||
}
|
}
|
||||||
|
@ -120,22 +120,28 @@ parse_result_t parse_section_directive(tokenlist_entry_t *current) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
parse_result_t parse_directive(tokenlist_entry_t *current) {
|
parse_result_t parse_directive(tokenlist_entry_t *current) {
|
||||||
parser_t parsers[] = {parse_dot, parse_section_directive, nullptr};
|
parser_t parsers[] = {parse_dot, parse_section_directive, parse_newline,
|
||||||
|
nullptr};
|
||||||
return parse_consecutive(current, NODE_DIRECTIVE, parsers);
|
return parse_consecutive(current, NODE_DIRECTIVE, parsers);
|
||||||
}
|
}
|
||||||
|
|
||||||
parse_result_t parse_instruction(tokenlist_entry_t *current) {
|
parse_result_t parse_instruction(tokenlist_entry_t *current) {
|
||||||
parser_t parsers[] = {parse_identifier, parse_operands, nullptr};
|
parser_t parsers[] = {parse_identifier, parse_operands, parse_newline,
|
||||||
|
nullptr};
|
||||||
return parse_consecutive(current, NODE_INSTRUCTION, parsers);
|
return parse_consecutive(current, NODE_INSTRUCTION, parsers);
|
||||||
}
|
}
|
||||||
|
|
||||||
parse_result_t parse_statement(tokenlist_entry_t *current) {
|
parse_result_t parse_statement(tokenlist_entry_t *current) {
|
||||||
parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
|
parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
|
||||||
nullptr};
|
parse_newline, nullptr};
|
||||||
return parse_any(current, parsers);
|
return parse_any(current, parsers);
|
||||||
}
|
}
|
||||||
|
|
||||||
parse_result_t parse(tokenlist_entry_t *current) {
|
parse_result_t parse(tokenlist_entry_t *current) {
|
||||||
current = tokenlist_skip_trivia(current);
|
current = tokenlist_skip_trivia(current);
|
||||||
return parse_many(current, NODE_PROGRAM, true, parse_statement);
|
parse_result_t result =
|
||||||
|
parse_many(current, NODE_PROGRAM, true, parse_statement);
|
||||||
|
if (result.node != nullptr)
|
||||||
|
ast_node_prune(result.node, NODE_NEWLINE);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -62,6 +62,10 @@ parse_result_t parse_dot(tokenlist_entry_t *current) {
|
|||||||
return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
|
return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parse_result_t parse_newline(tokenlist_entry_t *current) {
|
||||||
|
return parse_token(current, TOKEN_NEWLINE, NODE_NEWLINE, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
parse_result_t parse_label_reference(tokenlist_entry_t *current) {
|
parse_result_t parse_label_reference(tokenlist_entry_t *current) {
|
||||||
return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
|
return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
|
||||||
nullptr);
|
nullptr);
|
||||||
|
@ -18,6 +18,7 @@ parse_result_t parse_plus(tokenlist_entry_t *current);
|
|||||||
parse_result_t parse_minus(tokenlist_entry_t *current);
|
parse_result_t parse_minus(tokenlist_entry_t *current);
|
||||||
parse_result_t parse_asterisk(tokenlist_entry_t *current);
|
parse_result_t parse_asterisk(tokenlist_entry_t *current);
|
||||||
parse_result_t parse_dot(tokenlist_entry_t *current);
|
parse_result_t parse_dot(tokenlist_entry_t *current);
|
||||||
|
parse_result_t parse_newline(tokenlist_entry_t *current);
|
||||||
parse_result_t parse_label_reference(tokenlist_entry_t *current);
|
parse_result_t parse_label_reference(tokenlist_entry_t *current);
|
||||||
|
|
||||||
/* These are "primitives" with a different name and some extra validation on top
|
/* These are "primitives" with a different name and some extra validation on top
|
||||||
|
@ -86,7 +86,6 @@ bool is_trivia(tokenlist_entry_t *trivia) {
|
|||||||
switch (trivia->token.id) {
|
switch (trivia->token.id) {
|
||||||
case TOKEN_WHITESPACE:
|
case TOKEN_WHITESPACE:
|
||||||
case TOKEN_COMMENT:
|
case TOKEN_COMMENT:
|
||||||
case TOKEN_NEWLINE:
|
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
5
tests/input/regression/test_no_operands_eof.asm
Normal file
5
tests/input/regression/test_no_operands_eof.asm
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
; regression test for two issues:
|
||||||
|
; - parsing two zero operand instructions in a row
|
||||||
|
; - a zero operand instruction just before eof
|
||||||
|
syscall
|
||||||
|
ret
|
@ -23,9 +23,46 @@ MunitResult test_regression_trivia_head(const MunitParameter params[], void *dat
|
|||||||
|
|
||||||
ast_node_free(result.node);
|
ast_node_free(result.node);
|
||||||
tokenlist_free(list);
|
tokenlist_free(list);
|
||||||
|
return MUNIT_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
MunitResult test_no_operands_eof(const MunitParameter params[], void *data) {
|
||||||
|
(void)params;
|
||||||
|
(void)data;
|
||||||
|
|
||||||
|
lexer_t *lex = &(lexer_t){};
|
||||||
|
error_t *err = lexer_open(lex, "tests/input/regression/test_no_operands_eof.asm");
|
||||||
|
munit_assert_null(err);
|
||||||
|
|
||||||
|
tokenlist_t *list;
|
||||||
|
err = tokenlist_alloc(&list);
|
||||||
|
munit_assert_null(err);
|
||||||
|
|
||||||
|
err = tokenlist_fill(list, lex);
|
||||||
|
munit_assert_null(err);
|
||||||
|
|
||||||
|
parse_result_t result = parse(list->head);
|
||||||
|
munit_assert_null(result.err);
|
||||||
|
munit_assert_null(result.next);
|
||||||
|
|
||||||
|
// Both children should be instructions
|
||||||
|
munit_assert_size(result.node->len, ==, 2);
|
||||||
|
munit_assert_int(result.node->children[0]->id, ==, NODE_INSTRUCTION);
|
||||||
|
munit_assert_int(result.node->children[1]->id, ==, NODE_INSTRUCTION);
|
||||||
|
|
||||||
|
// And they should have empty operands
|
||||||
|
munit_assert_size(result.node->children[0]->len, ==, 2);
|
||||||
|
munit_assert_size(result.node->children[1]->len, ==, 2);
|
||||||
|
munit_assert_size(result.node->children[0]->children[1]->len, ==, 0);
|
||||||
|
munit_assert_size(result.node->children[1]->children[1]->len, ==, 0);
|
||||||
|
|
||||||
|
ast_node_free(result.node);
|
||||||
|
tokenlist_free(list);
|
||||||
|
return MUNIT_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
MunitTest regression_tests[] = {
|
MunitTest regression_tests[] = {
|
||||||
{"/trivia_head", test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
|
{"/trivia_head", test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
|
||||||
|
{"/no_operands_eof", test_no_operands_eof, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
|
||||||
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
|
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user