From 4dc3ef89673954dd6b7c5f52ed822f932423ccde Mon Sep 17 00:00:00 2001 From: omicron Date: Tue, 1 Apr 2025 23:43:05 +0200 Subject: [PATCH] Add more grammar rules to the parser --- src/parser.c | 138 ++++++++++----------------------------------------- src/parser.h | 7 +-- 2 files changed, 27 insertions(+), 118 deletions(-) diff --git a/src/parser.c b/src/parser.c index 61d0589..eeffac7 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,137 +1,51 @@ #include "parser.h" #include "ast.h" #include "lexer.h" +#include "parser_combinators.h" +#include "parser_primitives.h" +#include "parser_util.h" #include "tokenlist.h" -error_t *err_parse_no_match = - &(error_t){.message = "parsing failed to find the correct token sequence"}; - -typedef parse_result_t (*parser_t)(tokenlist_entry_t *); - -parse_result_t parse_error(error_t *err) { - return (parse_result_t){.err = err}; -} -parse_result_t parse_no_match() { - return parse_error(err_parse_no_match); +parse_result_t parse_number(tokenlist_entry_t *current) { + parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal, + parse_binary, nullptr}; + return parse_any(current, parsers); } -parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next) { - return (parse_result_t){.node = ast, .next = next}; +parse_result_t parse_operand(tokenlist_entry_t *current) { + // FIXME: not the correct set of parsers + parser_t parsers[] = {parse_register, parse_number, nullptr}; + return parse_any(current, parsers); } -parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]) { - for (parser_t parser = *parsers; parser; parser = *parsers++) { - parse_result_t result = parser(current); - if (result.err == nullptr) - return result; - } - return parse_no_match(); -} - -parse_result_t parse_token(tokenlist_entry_t *current, - lexer_token_id_t token_id, node_id_t ast_id) { - if (current->token.id != token_id) - return parse_no_match(); - - ast_node_t *node; - error_t *err = ast_node_alloc(&node); - if (err) - return parse_error(err); - node->id = ast_id; - node->token_entry = current; - - return parse_success(node, current->next); -} - -// parse as many of the giver parsers objects in a row as possible, potentially -// allowing none wraps the found objects in a new ast node with the given note -// id -parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id, - bool allow_none, parser_t parser) { - ast_node_t *many; - error_t *err = ast_node_alloc(&many); - parse_result_t result; - if (err) - return parse_error(err); - many->id = id; - - while (current) { - result = parser(current); - if (result.err == err_parse_no_match) - break; - if (result.err) { - ast_node_free(many); - return result; - } - err = ast_node_add_child(many, result.node); - if (err) { - ast_node_free(many); - ast_node_free(result.node); - return parse_error(err); - } - current = result.next; - } - - if (!allow_none && many->len == 0) { - ast_node_free(many); - return parse_no_match(); - } - return parse_success(many, current); -} - -// Parse all tries to parse all parsers consecutively and if it succeeds it -// wraps the parsed nodes in a new parent node. -parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id, - parser_t parsers[]) { - ast_node_t *all; - error_t *err = ast_node_alloc(&all); - parse_result_t result; - if (err) - return parse_no_match(); - - all->id = id; - - for (parser_t parser = *parsers; parser && current; parser = *parsers++) { - result = parser(current); - if (result.err) { - ast_node_free(all); - return result; - } - err = ast_node_add_child(all, result.node); - if (err) { - ast_node_free(result.node); - ast_node_free(all); - return parse_error(err); - } - current = result.next; - } - return parse_success(all, current); +parse_result_t parse_operands(tokenlist_entry_t *current) { + return parse_list(current, NODE_OPERANDS, true, TOKEN_COMMA, parse_operand); } parse_result_t parse_label(tokenlist_entry_t *current) { - return (parse_result_t){.err = err_parse_no_match}; + parser_t parsers[] = {parse_identifier, parse_colon, nullptr}; + return parse_consecutive(current, NODE_LABEL, parsers); +} + +parse_result_t parse_section_directive(tokenlist_entry_t *current) { + parser_t parsers[] = {parse_section, parse_identifier, nullptr}; + return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers); } parse_result_t parse_directive(tokenlist_entry_t *current) { - return (parse_result_t){.err = err_parse_no_match}; + parser_t parsers[] = {parse_dot, parse_section_directive, nullptr}; + return parse_consecutive(current, NODE_LABEL, parsers); } parse_result_t parse_instruction(tokenlist_entry_t *current) { - return (parse_result_t){.err = err_parse_no_match}; + parser_t parsers[] = {parse_identifier, parse_operands, nullptr}; + return parse_consecutive(current, NODE_INSTRUCTION, parsers); } parse_result_t parse_statement(tokenlist_entry_t *current) { - parser_t options[] = {parse_label, parse_directive, parse_instruction, + parser_t parsers[] = {parse_label, parse_directive, parse_instruction, nullptr}; - parse_result_t result = parse_any(current, options); - if (result.err) - return result; - - if (result.next->token.id == TOKEN_NEWLINE) { - result.next = result.next->next; - return result; - } - return parse_no_match(); + return parse_any(current, parsers); } parse_result_t parse(tokenlist_entry_t *current) { diff --git a/src/parser.h b/src/parser.h index 46c3b46..958618e 100644 --- a/src/parser.h +++ b/src/parser.h @@ -3,14 +3,9 @@ #include "ast.h" #include "error.h" +#include "parser_util.h" #include "tokenlist.h" -typedef struct parse_result { - error_t *err; - tokenlist_entry_t *next; - ast_node_t *node; -} parse_result_t; - parse_result_t parse(tokenlist_entry_t *current); #endif // INCLUDE_SRC_PARSER_H_