diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..4071e76 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,137 @@ +#include "parser.h" +#include "ast.h" +#include "lexer.h" +#include "tokenlist.h" + +error_t *err_parse_no_match = + &(error_t){.message = "parsing failed to find the correct token sequence"}; + +typedef parse_result_t (*parser_t)(tokenlist_entry_t *); + +parse_result_t parse_error(error_t *err) { + return (parse_result_t){.err = err}; +} +parse_result_t parse_no_match() { + return parse_error(err_parse_no_match); +} + +parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next) { + return (parse_result_t){.node = ast, .next = next}; +} + +parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]) { + for (parser_t parser = *parsers; parser; parser = *parsers++) { + parse_result_t result = parser(current); + if (result.err == nullptr) + return result; + } + return parse_no_match(); +} + +parse_result_t parse_token(tokenlist_entry_t *current, + lexer_token_id_t token_id, node_id_t ast_id) { + if (current->token.id != token_id) + return parse_no_match(); + + ast_node_t *node; + error_t *err = ast_node_alloc(&node); + if (err) + return parse_error(err); + + return parse_success(node, current->next); +} + +// parse as many of the giver parsers objects in a row as possible, potentially +// allowing none wraps the found objects in a new ast node with the given note +// id +parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id, + bool allow_none, parser_t parser) { + ast_node_t *many; + error_t *err = ast_node_alloc(&many); + parse_result_t result; + if (err) + return parse_error(err); + many->id = id; + + while (current) { + result = parser(current); + if (result.err == err_parse_no_match) + break; + if (result.err) { + ast_node_free(many); + return result; + } + err = ast_node_add_child(many, result.node); + if (err) { + ast_node_free(many); + ast_node_free(result.node); + return parse_error(err); + } + current = result.next; + } + + if (!allow_none && many->len == 0) { + ast_node_free(many); + return parse_no_match(); + } + return parse_success(many, current); +} + +// Parse all tries to parse all parsers consecutively and if it succeeds it +// wraps the parsed nodes in a new parent node. +parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id, + parser_t parsers[]) { + ast_node_t *all; + error_t *err = ast_node_alloc(&all); + parse_result_t result; + if (err) + return parse_no_match(); + + all->id = id; + + for (parser_t parser = *parsers; parser && current; parser = *parsers++) { + result = parser(current); + if (result.err) { + ast_node_free(all); + return result; + } + err = ast_node_add_child(all, result.node); + if (err) { + ast_node_free(result.node); + ast_node_free(all); + return parse_error(err); + } + current = result.next; + } + return parse_success(all, current); +} + +parse_result_t parse_label(tokenlist_entry_t *current) { + return (parse_result_t){.err = err_parse_no_match}; +} + +parse_result_t parse_directive(tokenlist_entry_t *current) { + return (parse_result_t){.err = err_parse_no_match}; +} + +parse_result_t parse_instruction(tokenlist_entry_t *current) { + return (parse_result_t){.err = err_parse_no_match}; +} + +parse_result_t parse_statement(tokenlist_entry_t *current) { + parser_t options[] = {parse_label, parse_directive, parse_instruction, + nullptr}; + parse_result_t result = parse_any(current, options); + if (result.err) + return result; + + if (result.next->token.id == TOKEN_NEWLINE) { + result.next = result.next->next; + return result; + } + return parse_no_match(); +} + +parse_result_t parse(tokenlist_entry_t *current) { + return parse_many(current, NODE_PROGRAM, true, parse_statement); +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..46c3b46 --- /dev/null +++ b/src/parser.h @@ -0,0 +1,16 @@ +#ifndef INCLUDE_SRC_PARSER_H_ +#define INCLUDE_SRC_PARSER_H_ + +#include "ast.h" +#include "error.h" +#include "tokenlist.h" + +typedef struct parse_result { + error_t *err; + tokenlist_entry_t *next; + ast_node_t *node; +} parse_result_t; + +parse_result_t parse(tokenlist_entry_t *current); + +#endif // INCLUDE_SRC_PARSER_H_