Add more grammar rules to the parser
This commit is contained in:
		
							
								
								
									
										138
									
								
								src/parser.c
									
									
									
									
									
								
							
							
						
						
									
										138
									
								
								src/parser.c
									
									
									
									
									
								
							| @@ -1,137 +1,51 @@ | ||||
| #include "parser.h" | ||||
| #include "ast.h" | ||||
| #include "lexer.h" | ||||
| #include "parser_combinators.h" | ||||
| #include "parser_primitives.h" | ||||
| #include "parser_util.h" | ||||
| #include "tokenlist.h" | ||||
|  | ||||
| error_t *err_parse_no_match = | ||||
|     &(error_t){.message = "parsing failed to find the correct token sequence"}; | ||||
|  | ||||
| typedef parse_result_t (*parser_t)(tokenlist_entry_t *); | ||||
|  | ||||
| parse_result_t parse_error(error_t *err) { | ||||
|     return (parse_result_t){.err = err}; | ||||
| } | ||||
| parse_result_t parse_no_match() { | ||||
|     return parse_error(err_parse_no_match); | ||||
| parse_result_t parse_number(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal, | ||||
|                           parse_binary, nullptr}; | ||||
|     return parse_any(current, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next) { | ||||
|     return (parse_result_t){.node = ast, .next = next}; | ||||
| parse_result_t parse_operand(tokenlist_entry_t *current) { | ||||
|     // FIXME: not the correct set of parsers | ||||
|     parser_t parsers[] = {parse_register, parse_number, nullptr}; | ||||
|     return parse_any(current, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]) { | ||||
|     for (parser_t parser = *parsers; parser; parser = *parsers++) { | ||||
|         parse_result_t result = parser(current); | ||||
|         if (result.err == nullptr) | ||||
|             return result; | ||||
|     } | ||||
|     return parse_no_match(); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_token(tokenlist_entry_t *current, | ||||
|                            lexer_token_id_t token_id, node_id_t ast_id) { | ||||
|     if (current->token.id != token_id) | ||||
|         return parse_no_match(); | ||||
|  | ||||
|     ast_node_t *node; | ||||
|     error_t *err = ast_node_alloc(&node); | ||||
|     if (err) | ||||
|         return parse_error(err); | ||||
|     node->id = ast_id; | ||||
|     node->token_entry = current; | ||||
|  | ||||
|     return parse_success(node, current->next); | ||||
| } | ||||
|  | ||||
| // parse as many of the giver parsers objects in a row as possible, potentially | ||||
| // allowing none wraps the found objects in a new ast node with the given note | ||||
| // id | ||||
| parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id, | ||||
|                           bool allow_none, parser_t parser) { | ||||
|     ast_node_t *many; | ||||
|     error_t *err = ast_node_alloc(&many); | ||||
|     parse_result_t result; | ||||
|     if (err) | ||||
|         return parse_error(err); | ||||
|     many->id = id; | ||||
|  | ||||
|     while (current) { | ||||
|         result = parser(current); | ||||
|         if (result.err == err_parse_no_match) | ||||
|             break; | ||||
|         if (result.err) { | ||||
|             ast_node_free(many); | ||||
|             return result; | ||||
|         } | ||||
|         err = ast_node_add_child(many, result.node); | ||||
|         if (err) { | ||||
|             ast_node_free(many); | ||||
|             ast_node_free(result.node); | ||||
|             return parse_error(err); | ||||
|         } | ||||
|         current = result.next; | ||||
|     } | ||||
|  | ||||
|     if (!allow_none && many->len == 0) { | ||||
|         ast_node_free(many); | ||||
|         return parse_no_match(); | ||||
|     } | ||||
|     return parse_success(many, current); | ||||
| } | ||||
|  | ||||
| // Parse all tries to parse all parsers consecutively and if it succeeds it | ||||
| // wraps the parsed nodes in a new parent node. | ||||
| parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id, | ||||
|                                  parser_t parsers[]) { | ||||
|     ast_node_t *all; | ||||
|     error_t *err = ast_node_alloc(&all); | ||||
|     parse_result_t result; | ||||
|     if (err) | ||||
|         return parse_no_match(); | ||||
|  | ||||
|     all->id = id; | ||||
|  | ||||
|     for (parser_t parser = *parsers; parser && current; parser = *parsers++) { | ||||
|         result = parser(current); | ||||
|         if (result.err) { | ||||
|             ast_node_free(all); | ||||
|             return result; | ||||
|         } | ||||
|         err = ast_node_add_child(all, result.node); | ||||
|         if (err) { | ||||
|             ast_node_free(result.node); | ||||
|             ast_node_free(all); | ||||
|             return parse_error(err); | ||||
|         } | ||||
|         current = result.next; | ||||
|     } | ||||
|     return parse_success(all, current); | ||||
| parse_result_t parse_operands(tokenlist_entry_t *current) { | ||||
|     return parse_list(current, NODE_OPERANDS, true, TOKEN_COMMA, parse_operand); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_label(tokenlist_entry_t *current) { | ||||
|     return (parse_result_t){.err = err_parse_no_match}; | ||||
|     parser_t parsers[] = {parse_identifier, parse_colon, nullptr}; | ||||
|     return parse_consecutive(current, NODE_LABEL, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_section_directive(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_section, parse_identifier, nullptr}; | ||||
|     return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_directive(tokenlist_entry_t *current) { | ||||
|     return (parse_result_t){.err = err_parse_no_match}; | ||||
|     parser_t parsers[] = {parse_dot, parse_section_directive, nullptr}; | ||||
|     return parse_consecutive(current, NODE_LABEL, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_instruction(tokenlist_entry_t *current) { | ||||
|     return (parse_result_t){.err = err_parse_no_match}; | ||||
|     parser_t parsers[] = {parse_identifier, parse_operands, nullptr}; | ||||
|     return parse_consecutive(current, NODE_INSTRUCTION, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_statement(tokenlist_entry_t *current) { | ||||
|     parser_t options[] = {parse_label, parse_directive, parse_instruction, | ||||
|     parser_t parsers[] = {parse_label, parse_directive, parse_instruction, | ||||
|                           nullptr}; | ||||
|     parse_result_t result = parse_any(current, options); | ||||
|     if (result.err) | ||||
|         return result; | ||||
|  | ||||
|     if (result.next->token.id == TOKEN_NEWLINE) { | ||||
|         result.next = result.next->next; | ||||
|         return result; | ||||
|     } | ||||
|     return parse_no_match(); | ||||
|     return parse_any(current, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse(tokenlist_entry_t *current) { | ||||
|   | ||||
| @@ -3,14 +3,9 @@ | ||||
|  | ||||
| #include "ast.h" | ||||
| #include "error.h" | ||||
| #include "parser_util.h" | ||||
| #include "tokenlist.h" | ||||
|  | ||||
| typedef struct parse_result { | ||||
|     error_t *err; | ||||
|     tokenlist_entry_t *next; | ||||
|     ast_node_t *node; | ||||
| } parse_result_t; | ||||
|  | ||||
| parse_result_t parse(tokenlist_entry_t *current); | ||||
|  | ||||
| #endif // INCLUDE_SRC_PARSER_H_ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user