From b1391b91bd81522db795e19617716f2d3706c7a7 Mon Sep 17 00:00:00 2001 From: omicron Date: Tue, 1 Apr 2025 17:16:21 +0200 Subject: [PATCH] Partial parser implementation --- src/parser.c | 53 +++++++++++++++++++++++++++++++++++++++++ src/parser.h | 11 +++++++++ src/parser_primitives.c | 21 ++++++++++++---- 3 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 src/parser.c create mode 100644 src/parser.h diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..f3c4f23 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,53 @@ +#include "parser.h" +#include "ast.h" +#include "lexer.h" +#include "parser_combinators.h" +#include "parser_primitives.h" +#include "parser_util.h" +#include "tokenlist.h" + +parse_result_t parse_number(tokenlist_entry_t *current) { + parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal, + parse_binary, nullptr}; + return parse_any(current, parsers); +} + +parse_result_t parse_operand(tokenlist_entry_t *current) { + // FIXME: not the correct set of parsers + parser_t parsers[] = {parse_register, parse_number, nullptr}; + return parse_any(current, parsers); +} + +parse_result_t parse_operands(tokenlist_entry_t *current) { + return parse_list(current, NODE_OPERANDS, true, TOKEN_COMMA, parse_operand); +} + +parse_result_t parse_label(tokenlist_entry_t *current) { + parser_t parsers[] = {parse_identifier, parse_colon, nullptr}; + return parse_consecutive(current, NODE_LABEL, parsers); +} + +parse_result_t parse_section_directive(tokenlist_entry_t *current) { + parser_t parsers[] = {parse_section, parse_identifier, nullptr}; + return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers); +} + +parse_result_t parse_directive(tokenlist_entry_t *current) { + parser_t parsers[] = {parse_dot, parse_section_directive, nullptr}; + return parse_consecutive(current, NODE_DIRECTIVE, parsers); +} + +parse_result_t parse_instruction(tokenlist_entry_t *current) { + parser_t parsers[] = {parse_identifier, parse_operands, nullptr}; + return parse_consecutive(current, NODE_INSTRUCTION, parsers); +} + +parse_result_t parse_statement(tokenlist_entry_t *current) { + parser_t parsers[] = {parse_label, parse_directive, parse_instruction, + nullptr}; + return parse_any(current, parsers); +} + +parse_result_t parse(tokenlist_entry_t *current) { + return parse_many(current, NODE_PROGRAM, true, parse_statement); +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..958618e --- /dev/null +++ b/src/parser.h @@ -0,0 +1,11 @@ +#ifndef INCLUDE_SRC_PARSER_H_ +#define INCLUDE_SRC_PARSER_H_ + +#include "ast.h" +#include "error.h" +#include "parser_util.h" +#include "tokenlist.h" + +parse_result_t parse(tokenlist_entry_t *current); + +#endif // INCLUDE_SRC_PARSER_H_ diff --git a/src/parser_primitives.c b/src/parser_primitives.c index 29bd254..0e19030 100644 --- a/src/parser_primitives.c +++ b/src/parser_primitives.c @@ -62,9 +62,19 @@ parse_result_t parse_dot(tokenlist_entry_t *current) { return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr); } -const char *registers[] = {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", - "rsi", "rdi", "r8", "r9", "r10", "r11", - "r12", "r13", "r14", "r15", nullptr}; +const char *registers[] = { + // 64-bit registers + "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15", + // 32-bit registers + "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", + "r10d", "r11d", "r12d", "r13d", "r14d", "r15d", + // 16-bit registers + "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", + "r11w", "r12w", "r13w", "r14w", "r15w", + // 8-bit low registers + "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", + "r11b", "r12b", "r13b", "r14b", "r15b", nullptr}; bool is_register_token(lexer_token_t *token) { for (size_t i = 0; registers[i] != nullptr; ++i) if (strcmp(token->value, registers[i]) == 0) @@ -81,4 +91,7 @@ bool is_section_token(lexer_token_t *token) { return strcmp(token->value, "section") == 0; } -parse_result_t parse_section(tokenlist_entry_t *current) {} +parse_result_t parse_section(tokenlist_entry_t *current) { + return parse_token(current, TOKEN_IDENTIFIER, NODE_SECTION, + is_section_token); +}