Add statement index to the symbol table
Some checks failed
Validate the build / validate-build (push) Failing after 37s

This allows going from a label to the statement/address. Restructure the
encoder to deal with this and pass the correct statement index to the
symbol update.
This commit is contained in:
omicron 2025-04-18 02:31:21 +02:00
parent 530e3fb423
commit 79e0120d52
5 changed files with 60 additions and 20 deletions

View File

@ -23,13 +23,15 @@ error_t *const err_encoder_not_implemented =
error_t *const err_encoder_unexpected_length = error_t *const err_encoder_unexpected_length =
&(error_t){.message = "Unexpectedly long encoding"}; &(error_t){.message = "Unexpectedly long encoding"};
error_t *encoder_alloc(encoder_t **output) { error_t *encoder_alloc(encoder_t **output, ast_node_t *ast) {
*output = nullptr; *output = nullptr;
encoder_t *encoder = calloc(1, sizeof(encoder_t)); encoder_t *encoder = calloc(1, sizeof(encoder_t));
if (encoder == nullptr) if (encoder == nullptr)
return err_allocation_failed; return err_allocation_failed;
encoder->ast = ast;
error_t *err = symbol_table_alloc(&encoder->symbols); error_t *err = symbol_table_alloc(&encoder->symbols);
if (err) { if (err) {
free(encoder); free(encoder);
@ -213,15 +215,12 @@ static inline uint8_t modrm_rm(uint8_t modrm, register_id_t id) {
return (modrm & ~modrm_rm_mask) | (id & 0b111); return (modrm & ~modrm_rm_mask) | (id & 0b111);
} }
/** error_t *encoder_collect_info(encoder_t *encoder, ast_node_t *node,
* Perform the initial pass over the AST. Records all symbols and sets the size_t statement_index) {
* values of registers and numbers.
*/
error_t *encoder_first_pass(encoder_t *encoder, ast_node_t *node) {
error_t *err = nullptr; error_t *err = nullptr;
if (encoder_is_symbols_node(node)) if (encoder_is_symbols_node(node))
err = symbol_table_update(encoder->symbols, node); err = symbol_table_update(encoder->symbols, node, statement_index);
else if (node->id == NODE_NUMBER) else if (node->id == NODE_NUMBER)
err = encoder_set_number_value(node); err = encoder_set_number_value(node);
else if (node->id == NODE_REGISTER) else if (node->id == NODE_REGISTER)
@ -230,7 +229,8 @@ error_t *encoder_first_pass(encoder_t *encoder, ast_node_t *node) {
return err; return err;
for (size_t i = 0; i < node->len; ++i) { for (size_t i = 0; i < node->len; ++i) {
error_t *err = encoder_first_pass(encoder, node->children[i]); error_t *err =
encoder_collect_info(encoder, node->children[i], statement_index);
if (err) if (err)
return err; return err;
} }
@ -238,6 +238,33 @@ error_t *encoder_first_pass(encoder_t *encoder, ast_node_t *node) {
return nullptr; return nullptr;
} }
/**
* Perform the initial pass over the AST.
*
* - Collect information about the operands
* - parse and set number values
* - set the register values
* - determine if label references are used by an instruction
* - encode instructions that don't use label references
* - determine estimated addresses of each statement
*
*/
error_t *encoder_first_pass(encoder_t *encoder) {
ast_node_t *root = encoder->ast;
assert(root->id == NODE_PROGRAM);
for (size_t i = 0; i < root->len; ++i) {
ast_node_t *statement = root->children[i];
error_t *err = encoder_collect_info(encoder, statement, i);
if (err)
return err;
if (statement->id != NODE_INSTRUCTION)
continue;
}
return nullptr;
}
bool is_operand_match(operand_info_t *info, ast_node_t *operand) { bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
switch (info->kind) { switch (info->kind) {
case OPERAND_REGISTER: case OPERAND_REGISTER:
@ -485,7 +512,9 @@ error_t *encoder_encode_instruction(encoder_t *encoder,
* placeholder values for label references because instruction size has not * placeholder values for label references because instruction size has not
* yet been determined. * yet been determined.
*/ */
error_t *encoder_encoding_pass(encoder_t *encoder, ast_node_t *root) { error_t *encoder_second_pass(encoder_t *encoder) {
ast_node_t *root = encoder->ast;
for (size_t i = 0; i < root->len; ++i) { for (size_t i = 0; i < root->len; ++i) {
if (root->children[i]->id != NODE_INSTRUCTION) if (root->children[i]->id != NODE_INSTRUCTION)
continue; continue;
@ -515,12 +544,12 @@ error_t *encoder_check_symbols(encoder_t *encoder) {
return nullptr; return nullptr;
} }
error_t *encoder_encode(encoder_t *encoder, ast_node_t *ast) { error_t *encoder_encode(encoder_t *encoder) {
error_t *err = encoder_first_pass(encoder, ast); error_t *err = encoder_first_pass(encoder);
if (err) if (err)
return err; return err;
err = encoder_check_symbols(encoder); err = encoder_check_symbols(encoder);
if (err) if (err)
return err; return err;
return encoder_encoding_pass(encoder, ast); return encoder_second_pass(encoder);
} }

View File

@ -5,6 +5,7 @@
typedef struct encoder { typedef struct encoder {
symbol_table_t *symbols; symbol_table_t *symbols;
ast_node_t *ast;
} encoder_t; } encoder_t;
constexpr uint8_t modrm_mod_memory = 0b00'000'000; constexpr uint8_t modrm_mod_memory = 0b00'000'000;
@ -16,8 +17,8 @@ constexpr uint8_t modrm_reg_mask = 0b00'111'000;
constexpr uint8_t modrm_rm_mask = 0b00'000'111; constexpr uint8_t modrm_rm_mask = 0b00'000'111;
constexpr uint8_t modrm_mod_mask = 0b11'000'000; constexpr uint8_t modrm_mod_mask = 0b11'000'000;
error_t *encoder_alloc(encoder_t **encoder); error_t *encoder_alloc(encoder_t **encoder, ast_node_t *ast);
error_t *encoder_encode(encoder_t *encoder, ast_node_t *ast); error_t *encoder_encode(encoder_t *encoder);
void encoder_free(encoder_t *encoder); void encoder_free(encoder_t *encoder);
extern error_t *const err_encoder_invalid_register; extern error_t *const err_encoder_invalid_register;

View File

@ -112,7 +112,7 @@ bool symbol_table_should_error(symbol_kind_t old, symbol_kind_t new) {
* @pre The symbol _must not_ already be in the table. * @pre The symbol _must not_ already be in the table.
*/ */
error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind, error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
ast_node_t *node) { ast_node_t *node, size_t statement_index) {
if (table->len >= table->cap) { if (table->len >= table->cap) {
error_t *err = symbol_table_grow_cap(table); error_t *err = symbol_table_grow_cap(table);
if (err) if (err)
@ -123,6 +123,7 @@ error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
.name = name, .name = name,
.kind = kind, .kind = kind,
.node = node, .node = node,
.statement_index = statement_index,
}; };
table->len += 1; table->len += 1;
@ -130,7 +131,8 @@ error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
return nullptr; return nullptr;
} }
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node) { error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node,
size_t statement_index) {
char *name; char *name;
symbol_kind_t kind; symbol_kind_t kind;
error_t *err = symbol_table_get_node_info(node, &kind, &name); error_t *err = symbol_table_get_node_info(node, &kind, &name);
@ -139,13 +141,19 @@ error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node) {
symbol_t *symbol = symbol_table_lookup(table, name); symbol_t *symbol = symbol_table_lookup(table, name);
if (!symbol) if (!symbol)
return symbol_table_add(table, name, kind, node); return symbol_table_add(table, name, kind, node, statement_index);
if (symbol_table_should_error(symbol->kind, kind)) if (symbol_table_should_error(symbol->kind, kind))
return err_symbol_table_incompatible_symbols; return err_symbol_table_incompatible_symbols;
if (symbol_table_should_update(symbol->kind, kind)) { if (symbol_table_should_update(symbol->kind, kind)) {
symbol->name = name; symbol->name = name;
symbol->kind = kind; symbol->kind = kind;
symbol->node = node; symbol->node = node;
// Some deviation from the regular update, the most important statement
// to keep track of is the actual label even if it gets promoted beyond
// a local symbol because the label determines the address
if (kind == SYMBOL_LOCAL)
symbol->statement_index = statement_index;
} }
return nullptr; return nullptr;
} }

View File

@ -30,6 +30,7 @@ typedef struct symbol {
char *name; char *name;
symbol_kind_t kind; symbol_kind_t kind;
ast_node_t *node; ast_node_t *node;
size_t statement_index;
} symbol_t; } symbol_t;
typedef struct symbol_table { typedef struct symbol_table {
@ -40,7 +41,8 @@ typedef struct symbol_table {
error_t *symbol_table_alloc(symbol_table_t **table); error_t *symbol_table_alloc(symbol_table_t **table);
void symbol_table_free(symbol_table_t *table); void symbol_table_free(symbol_table_t *table);
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node); error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node,
size_t statement_index);
symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name); symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name);
#endif // INCLUDE_ENCODER_SYMBOLS_H_ #endif // INCLUDE_ENCODER_SYMBOLS_H_

View File

@ -74,11 +74,11 @@ error_t *print_encoding(tokenlist_t *list) {
return result.err; return result.err;
encoder_t *encoder; encoder_t *encoder;
error_t *err = encoder_alloc(&encoder); error_t *err = encoder_alloc(&encoder, result.node);
if (err) if (err)
goto cleanup_ast; goto cleanup_ast;
err = encoder_encode(encoder, result.node); err = encoder_encode(encoder);
if (err) if (err)
goto cleanup_ast; goto cleanup_ast;