Fix zero operand parser bugs #17
| @@ -1,13 +1,13 @@ | |||||||
| <program>   ::= <statement>* | <program>   ::= <statement>* | ||||||
| <statement> ::= <label> | <directive> | <instruction> | <statement> ::= <label> | <directive> | <instruction> | <newline> | ||||||
|  |  | ||||||
| <label> ::= <identifier> <colon> | <label> ::= <identifier> <colon> | ||||||
|  |  | ||||||
| <directive> ::= <dot> <section_directive> | <directive> ::= <dot> <section_directive> <newline> | ||||||
|  |  | ||||||
| <section_directive> ::= "section" <identifier> | <section_directive> ::= "section" <identifier> | ||||||
|  |  | ||||||
| <instruction> ::= <identifier> <operands> | <instruction> ::= <identifier> <operands> <newline> | ||||||
|  |  | ||||||
| <operands> ::= <operand> ( <comma> <operand> )* | <operands> ::= <operand> ( <comma> <operand> )* | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										20
									
								
								src/ast.c
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								src/ast.c
									
									
									
									
									
								
							| @@ -157,6 +157,8 @@ const char *ast_node_id_to_cstr(node_id_t id) { | |||||||
|         return "NODE_ASTERISK"; |         return "NODE_ASTERISK"; | ||||||
|     case NODE_DOT: |     case NODE_DOT: | ||||||
|         return "NODE_DOT"; |         return "NODE_DOT"; | ||||||
|  |     case NODE_NEWLINE: | ||||||
|  |         return "NODE_NEWLINE"; | ||||||
|     } |     } | ||||||
|     assert(!"Unreachable, weird node id" && id); |     assert(!"Unreachable, weird node id" && id); | ||||||
|     __builtin_unreachable(); |     __builtin_unreachable(); | ||||||
| @@ -172,7 +174,8 @@ static void ast_node_print_internal(ast_node_t *node, int indent) { | |||||||
|     } |     } | ||||||
|     printf("%s", ast_node_id_to_cstr(node->id)); |     printf("%s", ast_node_id_to_cstr(node->id)); | ||||||
|  |  | ||||||
|     if (node->token_entry && node->token_entry->token.value) { |     if (node->token_entry && node->token_entry->token.value && | ||||||
|  |         node->id != NODE_NEWLINE) { | ||||||
|         printf(" \"%s\"", node->token_entry->token.value); |         printf(" \"%s\"", node->token_entry->token.value); | ||||||
|     } |     } | ||||||
|     printf("\n"); |     printf("\n"); | ||||||
| @@ -185,3 +188,18 @@ static void ast_node_print_internal(ast_node_t *node, int indent) { | |||||||
| void ast_node_print(ast_node_t *node) { | void ast_node_print(ast_node_t *node) { | ||||||
|     ast_node_print_internal(node, 0); |     ast_node_print_internal(node, 0); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void ast_node_prune(ast_node_t *node, node_id_t id) { | ||||||
|  |     size_t new_len = 0; | ||||||
|  |     for (size_t i = 0; i < node->len; i++) { | ||||||
|  |         auto child = node->children[i]; | ||||||
|  |         if (child->id == id) { | ||||||
|  |             ast_node_free(child); | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         ast_node_prune(child, id); | ||||||
|  |         node->children[new_len] = child; | ||||||
|  |         new_len++; | ||||||
|  |     } | ||||||
|  |     node->len = new_len; | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										14
									
								
								src/ast.h
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								src/ast.h
									
									
									
									
									
								
							| @@ -50,6 +50,7 @@ typedef enum node_id { | |||||||
|     NODE_MINUS, |     NODE_MINUS, | ||||||
|     NODE_ASTERISK, |     NODE_ASTERISK, | ||||||
|     NODE_DOT, |     NODE_DOT, | ||||||
|  |     NODE_NEWLINE, | ||||||
| } node_id_t; | } node_id_t; | ||||||
|  |  | ||||||
| typedef struct ast_node ast_node_t; | typedef struct ast_node ast_node_t; | ||||||
| @@ -119,4 +120,17 @@ error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child); | |||||||
|  */ |  */ | ||||||
| void ast_node_print(ast_node_t *node); | void ast_node_print(ast_node_t *node); | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Prune the children with a given id | ||||||
|  |  * | ||||||
|  |  * The tree is recursively visited and all child nodes of a given ID are pruned | ||||||
|  |  * completely. If a node has the giver id, it will get removed along wih all its | ||||||
|  |  * children, even if some of those children have different ids. The root node id | ||||||
|  |  * is never checked so the tree is guaranteed to remain and allocated valid. | ||||||
|  |  * | ||||||
|  |  * @param node The root of the tree you want to prune | ||||||
|  |  * @param id The id of the nodes you want to prune | ||||||
|  |  */ | ||||||
|  | void ast_node_prune(ast_node_t *node, node_id_t id); | ||||||
|  |  | ||||||
| #endif // INCLUDE_SRC_AST_H_ | #endif // INCLUDE_SRC_AST_H_ | ||||||
|   | |||||||
| @@ -1,4 +1,5 @@ | |||||||
| #include "combinators.h" | #include "combinators.h" | ||||||
|  | #include "util.h" | ||||||
|  |  | ||||||
| // Parse a list of the given parser delimited by the given token id. Does not | // Parse a list of the given parser delimited by the given token id. Does not | ||||||
| // store the delimiters in the parent node | // store the delimiters in the parent node | ||||||
| @@ -122,5 +123,12 @@ parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id, | |||||||
|         } |         } | ||||||
|         current = result.next; |         current = result.next; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // token stream ended before we matched all parsers | ||||||
|  |     if (parser != nullptr) { | ||||||
|  |         ast_node_free(all); | ||||||
|  |         return parse_no_match(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     return parse_success(all, current); |     return parse_success(all, current); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -120,22 +120,28 @@ parse_result_t parse_section_directive(tokenlist_entry_t *current) { | |||||||
| } | } | ||||||
|  |  | ||||||
| parse_result_t parse_directive(tokenlist_entry_t *current) { | parse_result_t parse_directive(tokenlist_entry_t *current) { | ||||||
|     parser_t parsers[] = {parse_dot, parse_section_directive, nullptr}; |     parser_t parsers[] = {parse_dot, parse_section_directive, parse_newline, | ||||||
|  |                           nullptr}; | ||||||
|     return parse_consecutive(current, NODE_DIRECTIVE, parsers); |     return parse_consecutive(current, NODE_DIRECTIVE, parsers); | ||||||
| } | } | ||||||
|  |  | ||||||
| parse_result_t parse_instruction(tokenlist_entry_t *current) { | parse_result_t parse_instruction(tokenlist_entry_t *current) { | ||||||
|     parser_t parsers[] = {parse_identifier, parse_operands, nullptr}; |     parser_t parsers[] = {parse_identifier, parse_operands, parse_newline, | ||||||
|  |                           nullptr}; | ||||||
|     return parse_consecutive(current, NODE_INSTRUCTION, parsers); |     return parse_consecutive(current, NODE_INSTRUCTION, parsers); | ||||||
| } | } | ||||||
|  |  | ||||||
| parse_result_t parse_statement(tokenlist_entry_t *current) { | parse_result_t parse_statement(tokenlist_entry_t *current) { | ||||||
|     parser_t parsers[] = {parse_label, parse_directive, parse_instruction, |     parser_t parsers[] = {parse_label, parse_directive, parse_instruction, | ||||||
|                           nullptr}; |                           parse_newline, nullptr}; | ||||||
|     return parse_any(current, parsers); |     return parse_any(current, parsers); | ||||||
| } | } | ||||||
|  |  | ||||||
| parse_result_t parse(tokenlist_entry_t *current) { | parse_result_t parse(tokenlist_entry_t *current) { | ||||||
|     current = tokenlist_skip_trivia(current); |     current = tokenlist_skip_trivia(current); | ||||||
|     return parse_many(current, NODE_PROGRAM, true, parse_statement); |     parse_result_t result = | ||||||
|  |         parse_many(current, NODE_PROGRAM, true, parse_statement); | ||||||
|  |     if (result.node != nullptr) | ||||||
|  |         ast_node_prune(result.node, NODE_NEWLINE); | ||||||
|  |     return result; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -62,6 +62,10 @@ parse_result_t parse_dot(tokenlist_entry_t *current) { | |||||||
|     return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr); |     return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | parse_result_t parse_newline(tokenlist_entry_t *current) { | ||||||
|  |     return parse_token(current, TOKEN_NEWLINE, NODE_NEWLINE, nullptr); | ||||||
|  | } | ||||||
|  |  | ||||||
| parse_result_t parse_label_reference(tokenlist_entry_t *current) { | parse_result_t parse_label_reference(tokenlist_entry_t *current) { | ||||||
|     return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE, |     return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE, | ||||||
|                        nullptr); |                        nullptr); | ||||||
|   | |||||||
| @@ -18,6 +18,7 @@ parse_result_t parse_plus(tokenlist_entry_t *current); | |||||||
| parse_result_t parse_minus(tokenlist_entry_t *current); | parse_result_t parse_minus(tokenlist_entry_t *current); | ||||||
| parse_result_t parse_asterisk(tokenlist_entry_t *current); | parse_result_t parse_asterisk(tokenlist_entry_t *current); | ||||||
| parse_result_t parse_dot(tokenlist_entry_t *current); | parse_result_t parse_dot(tokenlist_entry_t *current); | ||||||
|  | parse_result_t parse_newline(tokenlist_entry_t *current); | ||||||
| parse_result_t parse_label_reference(tokenlist_entry_t *current); | parse_result_t parse_label_reference(tokenlist_entry_t *current); | ||||||
|  |  | ||||||
| /* These are "primitives" with a different name and some extra validation on top | /* These are "primitives" with a different name and some extra validation on top | ||||||
|   | |||||||
| @@ -86,7 +86,6 @@ bool is_trivia(tokenlist_entry_t *trivia) { | |||||||
|     switch (trivia->token.id) { |     switch (trivia->token.id) { | ||||||
|     case TOKEN_WHITESPACE: |     case TOKEN_WHITESPACE: | ||||||
|     case TOKEN_COMMENT: |     case TOKEN_COMMENT: | ||||||
|     case TOKEN_NEWLINE: |  | ||||||
|         return true; |         return true; | ||||||
|     default: |     default: | ||||||
|         return false; |         return false; | ||||||
|   | |||||||
							
								
								
									
										5
									
								
								tests/input/regression/test_no_operands_eof.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								tests/input/regression/test_no_operands_eof.asm
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | |||||||
|  | ; regression test for two issues: | ||||||
|  | ;  - parsing two zero operand instructions in a row | ||||||
|  | ;  - a zero operand instruction just before eof | ||||||
|  |     syscall | ||||||
|  |     ret | ||||||
| @@ -23,9 +23,46 @@ MunitResult test_regression_trivia_head(const MunitParameter params[], void *dat | |||||||
|  |  | ||||||
|     ast_node_free(result.node); |     ast_node_free(result.node); | ||||||
|     tokenlist_free(list); |     tokenlist_free(list); | ||||||
|  |     return MUNIT_OK; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | MunitResult test_no_operands_eof(const MunitParameter params[], void *data) { | ||||||
|  |     (void)params; | ||||||
|  |     (void)data; | ||||||
|  |  | ||||||
|  |     lexer_t *lex = &(lexer_t){}; | ||||||
|  |     error_t *err = lexer_open(lex, "tests/input/regression/test_no_operands_eof.asm"); | ||||||
|  |     munit_assert_null(err); | ||||||
|  |  | ||||||
|  |     tokenlist_t *list; | ||||||
|  |     err = tokenlist_alloc(&list); | ||||||
|  |     munit_assert_null(err); | ||||||
|  |  | ||||||
|  |     err = tokenlist_fill(list, lex); | ||||||
|  |     munit_assert_null(err); | ||||||
|  |  | ||||||
|  |     parse_result_t result = parse(list->head); | ||||||
|  |     munit_assert_null(result.err); | ||||||
|  |     munit_assert_null(result.next); | ||||||
|  |  | ||||||
|  |     // Both children should be instructions | ||||||
|  |     munit_assert_size(result.node->len, ==, 2); | ||||||
|  |     munit_assert_int(result.node->children[0]->id, ==, NODE_INSTRUCTION); | ||||||
|  |     munit_assert_int(result.node->children[1]->id, ==, NODE_INSTRUCTION); | ||||||
|  |  | ||||||
|  |     // And they should have empty operands | ||||||
|  |     munit_assert_size(result.node->children[0]->len, ==, 2); | ||||||
|  |     munit_assert_size(result.node->children[1]->len, ==, 2); | ||||||
|  |     munit_assert_size(result.node->children[0]->children[1]->len, ==, 0); | ||||||
|  |     munit_assert_size(result.node->children[1]->children[1]->len, ==, 0); | ||||||
|  |  | ||||||
|  |     ast_node_free(result.node); | ||||||
|  |     tokenlist_free(list); | ||||||
|  |     return MUNIT_OK; | ||||||
| } | } | ||||||
|  |  | ||||||
| MunitTest regression_tests[] = { | MunitTest regression_tests[] = { | ||||||
|     {"/trivia_head", test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |     {"/trivia_head",     test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, | ||||||
|     {nullptr,        nullptr,                     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr} |     {"/no_operands_eof", test_no_operands_eof,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, | ||||||
|  |     {nullptr,            nullptr,                     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr} | ||||||
| }; | }; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user