Compare commits

..

10 Commits

Author SHA1 Message Date
d40273b329 Fix parse_operand
All checks were successful
Validate the build / validate-build (push) Successful in 26s
2025-04-02 15:49:49 +02:00
000756fca9 More grammar
All checks were successful
Validate the build / validate-build (push) Successful in 27s
2025-04-02 15:39:41 +02:00
dabd3fd86f Slightly change the valid test input file
All checks were successful
Validate the build / validate-build (push) Successful in 27s
2025-04-02 13:00:06 +02:00
ccf8f52b6e add functionality to main to parse and print the ast 2025-04-02 13:00:01 +02:00
35c471f8d4 Partial parser implementation 2025-04-02 12:59:55 +02:00
44fab4c678 Fix incorrect size comparison in lexer_consume_n
The buffer length len and the requested number of tokens n are mixed up
in an invalid comparison. This causes all valid requests for n < len
tokens to be denied and all invalid requests for n > len tokens to be
accepted. This may cause a buffer overflow if the caller requests more
characters than they provide space for.
2025-04-02 12:59:55 +02:00
bcc1569b39 Add a parser combinator to parse a delimited list 2025-04-02 12:59:55 +02:00
5746ef1c5a Add basic parser combinators 2025-04-02 12:59:51 +02:00
2cab530eed Add "primitive" parsers for all the non-trivia tokens in the lexer grammar 2025-04-02 12:59:47 +02:00
7ac4eac37f Add basic parser utilities 2025-04-02 12:59:41 +02:00
6 changed files with 8 additions and 47 deletions

View File

@ -9,8 +9,7 @@
parse_result_t parse_number(tokenlist_entry_t *current) { parse_result_t parse_number(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal, parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal,
parse_binary, nullptr}; parse_binary, nullptr};
parse_result_t result = parse_any(current, parsers); return parse_any(current, parsers);
return parse_result_wrap(NODE_NUMBER, result);
} }
parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) { parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) {
@ -21,12 +20,12 @@ parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) {
parse_result_t parse_register_index(tokenlist_entry_t *current) { parse_result_t parse_register_index(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_plus, parse_register, parse_asterisk, parser_t parsers[] = {parse_plus, parse_register, parse_asterisk,
parse_number, nullptr}; parse_number, nullptr};
return parse_consecutive(current, NODE_REGISTER_INDEX, parsers); return parse_consecutive(current, NODE_LABEL, parsers);
} }
parse_result_t parse_register_offset(tokenlist_entry_t *current) { parse_result_t parse_register_offset(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_plus_or_minus, parse_number, nullptr}; parser_t parsers[] = {parse_plus_or_minus, parse_number, nullptr};
return parse_consecutive(current, NODE_REGISTER_OFFSET, parsers); return parse_consecutive(current, NODE_LABEL, parsers);
} }
parse_result_t parse_register_expression(tokenlist_entry_t *current) { parse_result_t parse_register_expression(tokenlist_entry_t *current) {
@ -36,7 +35,6 @@ parse_result_t parse_register_expression(tokenlist_entry_t *current) {
error_t *err = ast_node_alloc(&expr); error_t *err = ast_node_alloc(&expr);
if (err) if (err)
return parse_error(err); return parse_error(err);
expr->id = NODE_REGISTER_EXPRESSION;
// <register> // <register>
result = parse_register(current); result = parse_register(current);
@ -84,8 +82,7 @@ parse_result_t parse_register_expression(tokenlist_entry_t *current) {
parse_result_t parse_immediate(tokenlist_entry_t *current) { parse_result_t parse_immediate(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_number, parse_identifier, nullptr}; parser_t parsers[] = {parse_number, parse_identifier, nullptr};
parse_result_t result = parse_any(current, parsers); return parse_any(current, parsers);
return parse_result_wrap(NODE_IMMEDIATE, result);
} }
parse_result_t parse_memory_expression(tokenlist_entry_t *current) { parse_result_t parse_memory_expression(tokenlist_entry_t *current) {
@ -96,7 +93,7 @@ parse_result_t parse_memory_expression(tokenlist_entry_t *current) {
parse_result_t parse_memory(tokenlist_entry_t *current) { parse_result_t parse_memory(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_lbracket, parse_memory_expression, parser_t parsers[] = {parse_lbracket, parse_memory_expression,
parse_rbracket, nullptr}; parse_rbracket, nullptr};
return parse_consecutive(current, NODE_MEMORY, parsers); return parse_consecutive(current, NODE_LABEL, parsers);
} }
parse_result_t parse_operand(tokenlist_entry_t *current) { parse_result_t parse_operand(tokenlist_entry_t *current) {

View File

@ -62,11 +62,6 @@ parse_result_t parse_dot(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr); return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
} }
parse_result_t parse_label_reference(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
nullptr);
}
const char *registers[] = { const char *registers[] = {
// 64-bit registers // 64-bit registers
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10",
@ -80,7 +75,6 @@ const char *registers[] = {
// 8-bit low registers // 8-bit low registers
"al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b",
"r11b", "r12b", "r13b", "r14b", "r15b", nullptr}; "r11b", "r12b", "r13b", "r14b", "r15b", nullptr};
bool is_register_token(lexer_token_t *token) { bool is_register_token(lexer_token_t *token) {
for (size_t i = 0; registers[i] != nullptr; ++i) for (size_t i = 0; registers[i] != nullptr; ++i)
if (strcmp(token->value, registers[i]) == 0) if (strcmp(token->value, registers[i]) == 0)

View File

@ -18,7 +18,6 @@ parse_result_t parse_plus(tokenlist_entry_t *current);
parse_result_t parse_minus(tokenlist_entry_t *current); parse_result_t parse_minus(tokenlist_entry_t *current);
parse_result_t parse_asterisk(tokenlist_entry_t *current); parse_result_t parse_asterisk(tokenlist_entry_t *current);
parse_result_t parse_dot(tokenlist_entry_t *current); parse_result_t parse_dot(tokenlist_entry_t *current);
parse_result_t parse_label_reference(tokenlist_entry_t *current);
/* These are "primitives" with a different name and some extra validation on top /* These are "primitives" with a different name and some extra validation on top
* for example, register is just an identifier but it only matches a limited set * for example, register is just an identifier but it only matches a limited set

View File

@ -33,24 +33,3 @@ parse_result_t parse_token(tokenlist_entry_t *current,
return parse_success(node, current->next); return parse_success(node, current->next);
} }
parse_result_t parse_result_wrap(node_id_t id, parse_result_t result) {
if (result.err)
return result;
ast_node_t *node;
error_t *err = ast_node_alloc(&node);
if (err) {
ast_node_free(result.node);
return parse_error(err);
}
node->id = id;
err = ast_node_add_child(node, result.node);
if (err) {
ast_node_free(result.node);
return parse_error(err);
}
return parse_success(node, result.next);
}

View File

@ -19,7 +19,8 @@ parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next);
parse_result_t parse_token(tokenlist_entry_t *current, parse_result_t parse_token(tokenlist_entry_t *current,
lexer_token_id_t token_id, node_id_t ast_id, lexer_token_id_t token_id, node_id_t ast_id,
token_validator_t is_valid); token_validator_t is_valid);
parse_result_t parse_result_wrap(node_id_t id, parse_result_t result);
tokenlist_entry_t *skip_insignificant(tokenlist_entry_t *);
extern error_t *err_parse_no_match; extern error_t *err_parse_no_match;

View File

@ -1,17 +1,8 @@
.section text .section text
; Small valid code snippet that should contain all different AST nodes
_start: _start:
mov eax, ebx mov eax, ebx
lea eax, [eax + ebx * 4 + 8] mov eax, 555 ; move 555 into eax
lea eax, [eax + 8]
lea eax, [eax + ebx * 8]
lea eax, [esp - 24]
lea eax, [eax + ebx * 4 - 8]
lea eax, [_start]
mov eax, _start
mov eax, 555
push 0o777 push 0o777
xor eax, 0xDEADBEEF xor eax, 0xDEADBEEF
and ecx, 0o770 and ecx, 0o770