Compare commits

..

9 Commits

Author SHA1 Message Date
0d3881f680 Update the test input file to contain all AST nodes
All checks were successful
Validate the build / validate-build (push) Successful in 36s
2025-04-02 21:41:27 +02:00
5ea942024f add functionality to main to parse and print the ast 2025-04-02 20:57:02 +02:00
b4757e008c Add parse_result_wrap to wrap a result with another parent node
Use the new wrap function to wrap numbers and immediate nodes
2025-04-02 20:57:02 +02:00
b70b6896bf Partial parser implementation 2025-04-02 20:56:59 +02:00
6ca7bb3661 Fix incorrect size comparison in lexer_consume_n
The buffer length len and the requested number of tokens n are mixed up
in an invalid comparison. This causes all valid requests for n < len
tokens to be denied and all invalid requests for n > len tokens to be
accepted. This may cause a buffer overflow if the caller requests more
characters than they provide space for.
2025-04-02 20:41:49 +02:00
d424c0f886 Add a parser combinator to parse a delimited list 2025-04-02 20:41:49 +02:00
c66489dd90 Add basic parser combinators 2025-04-02 20:41:49 +02:00
44fa66c2b7 Add "primitive" parsers for all the non-trivia tokens in the lexer grammar 2025-04-02 20:41:42 +02:00
c48adb1306 Add basic parser utilities 2025-04-02 20:38:35 +02:00
6 changed files with 47 additions and 8 deletions

View File

@ -9,7 +9,8 @@
parse_result_t parse_number(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal,
parse_binary, nullptr};
return parse_any(current, parsers);
parse_result_t result = parse_any(current, parsers);
return parse_result_wrap(NODE_NUMBER, result);
}
parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) {
@ -20,12 +21,12 @@ parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) {
parse_result_t parse_register_index(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_plus, parse_register, parse_asterisk,
parse_number, nullptr};
return parse_consecutive(current, NODE_LABEL, parsers);
return parse_consecutive(current, NODE_REGISTER_INDEX, parsers);
}
parse_result_t parse_register_offset(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_plus_or_minus, parse_number, nullptr};
return parse_consecutive(current, NODE_LABEL, parsers);
return parse_consecutive(current, NODE_REGISTER_OFFSET, parsers);
}
parse_result_t parse_register_expression(tokenlist_entry_t *current) {
@ -35,6 +36,7 @@ parse_result_t parse_register_expression(tokenlist_entry_t *current) {
error_t *err = ast_node_alloc(&expr);
if (err)
return parse_error(err);
expr->id = NODE_REGISTER_EXPRESSION;
// <register>
result = parse_register(current);
@ -82,7 +84,8 @@ parse_result_t parse_register_expression(tokenlist_entry_t *current) {
parse_result_t parse_immediate(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_number, parse_identifier, nullptr};
return parse_any(current, parsers);
parse_result_t result = parse_any(current, parsers);
return parse_result_wrap(NODE_IMMEDIATE, result);
}
parse_result_t parse_memory_expression(tokenlist_entry_t *current) {
@ -93,7 +96,7 @@ parse_result_t parse_memory_expression(tokenlist_entry_t *current) {
parse_result_t parse_memory(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_lbracket, parse_memory_expression,
parse_rbracket, nullptr};
return parse_consecutive(current, NODE_LABEL, parsers);
return parse_consecutive(current, NODE_MEMORY, parsers);
}
parse_result_t parse_operand(tokenlist_entry_t *current) {

View File

@ -62,6 +62,11 @@ parse_result_t parse_dot(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
}
parse_result_t parse_label_reference(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
nullptr);
}
const char *registers[] = {
// 64-bit registers
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10",
@ -75,6 +80,7 @@ const char *registers[] = {
// 8-bit low registers
"al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b",
"r11b", "r12b", "r13b", "r14b", "r15b", nullptr};
bool is_register_token(lexer_token_t *token) {
for (size_t i = 0; registers[i] != nullptr; ++i)
if (strcmp(token->value, registers[i]) == 0)

View File

@ -18,6 +18,7 @@ parse_result_t parse_plus(tokenlist_entry_t *current);
parse_result_t parse_minus(tokenlist_entry_t *current);
parse_result_t parse_asterisk(tokenlist_entry_t *current);
parse_result_t parse_dot(tokenlist_entry_t *current);
parse_result_t parse_label_reference(tokenlist_entry_t *current);
/* These are "primitives" with a different name and some extra validation on top
* for example, register is just an identifier but it only matches a limited set

View File

@ -33,3 +33,24 @@ parse_result_t parse_token(tokenlist_entry_t *current,
return parse_success(node, current->next);
}
parse_result_t parse_result_wrap(node_id_t id, parse_result_t result) {
if (result.err)
return result;
ast_node_t *node;
error_t *err = ast_node_alloc(&node);
if (err) {
ast_node_free(result.node);
return parse_error(err);
}
node->id = id;
err = ast_node_add_child(node, result.node);
if (err) {
ast_node_free(result.node);
return parse_error(err);
}
return parse_success(node, result.next);
}

View File

@ -19,8 +19,7 @@ parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next);
parse_result_t parse_token(tokenlist_entry_t *current,
lexer_token_id_t token_id, node_id_t ast_id,
token_validator_t is_valid);
tokenlist_entry_t *skip_insignificant(tokenlist_entry_t *);
parse_result_t parse_result_wrap(node_id_t id, parse_result_t result);
extern error_t *err_parse_no_match;

View File

@ -1,8 +1,17 @@
.section text
; Small valid code snippet that should contain all different AST nodes
_start:
mov eax, ebx
mov eax, 555 ; move 555 into eax
lea eax, [eax + ebx * 4 + 8]
lea eax, [eax + 8]
lea eax, [eax + ebx * 8]
lea eax, [esp - 24]
lea eax, [eax + ebx * 4 - 8]
lea eax, [_start]
mov eax, _start
mov eax, 555
push 0o777
xor eax, 0xDEADBEEF
and ecx, 0o770