Add statement index to the symbol table
Some checks failed
Validate the build / validate-build (push) Failing after 37s
Some checks failed
Validate the build / validate-build (push) Failing after 37s
This allows going from a label to the statement/address. Restructure the encoder to deal with this and pass the correct statement index to the symbol update.
This commit is contained in:
parent
530e3fb423
commit
79e0120d52
@ -23,13 +23,15 @@ error_t *const err_encoder_not_implemented =
|
|||||||
error_t *const err_encoder_unexpected_length =
|
error_t *const err_encoder_unexpected_length =
|
||||||
&(error_t){.message = "Unexpectedly long encoding"};
|
&(error_t){.message = "Unexpectedly long encoding"};
|
||||||
|
|
||||||
error_t *encoder_alloc(encoder_t **output) {
|
error_t *encoder_alloc(encoder_t **output, ast_node_t *ast) {
|
||||||
*output = nullptr;
|
*output = nullptr;
|
||||||
encoder_t *encoder = calloc(1, sizeof(encoder_t));
|
encoder_t *encoder = calloc(1, sizeof(encoder_t));
|
||||||
|
|
||||||
if (encoder == nullptr)
|
if (encoder == nullptr)
|
||||||
return err_allocation_failed;
|
return err_allocation_failed;
|
||||||
|
|
||||||
|
encoder->ast = ast;
|
||||||
|
|
||||||
error_t *err = symbol_table_alloc(&encoder->symbols);
|
error_t *err = symbol_table_alloc(&encoder->symbols);
|
||||||
if (err) {
|
if (err) {
|
||||||
free(encoder);
|
free(encoder);
|
||||||
@ -213,15 +215,12 @@ static inline uint8_t modrm_rm(uint8_t modrm, register_id_t id) {
|
|||||||
return (modrm & ~modrm_rm_mask) | (id & 0b111);
|
return (modrm & ~modrm_rm_mask) | (id & 0b111);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
error_t *encoder_collect_info(encoder_t *encoder, ast_node_t *node,
|
||||||
* Perform the initial pass over the AST. Records all symbols and sets the
|
size_t statement_index) {
|
||||||
* values of registers and numbers.
|
|
||||||
*/
|
|
||||||
error_t *encoder_first_pass(encoder_t *encoder, ast_node_t *node) {
|
|
||||||
error_t *err = nullptr;
|
error_t *err = nullptr;
|
||||||
|
|
||||||
if (encoder_is_symbols_node(node))
|
if (encoder_is_symbols_node(node))
|
||||||
err = symbol_table_update(encoder->symbols, node);
|
err = symbol_table_update(encoder->symbols, node, statement_index);
|
||||||
else if (node->id == NODE_NUMBER)
|
else if (node->id == NODE_NUMBER)
|
||||||
err = encoder_set_number_value(node);
|
err = encoder_set_number_value(node);
|
||||||
else if (node->id == NODE_REGISTER)
|
else if (node->id == NODE_REGISTER)
|
||||||
@ -230,7 +229,8 @@ error_t *encoder_first_pass(encoder_t *encoder, ast_node_t *node) {
|
|||||||
return err;
|
return err;
|
||||||
|
|
||||||
for (size_t i = 0; i < node->len; ++i) {
|
for (size_t i = 0; i < node->len; ++i) {
|
||||||
error_t *err = encoder_first_pass(encoder, node->children[i]);
|
error_t *err =
|
||||||
|
encoder_collect_info(encoder, node->children[i], statement_index);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@ -238,6 +238,33 @@ error_t *encoder_first_pass(encoder_t *encoder, ast_node_t *node) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform the initial pass over the AST.
|
||||||
|
*
|
||||||
|
* - Collect information about the operands
|
||||||
|
* - parse and set number values
|
||||||
|
* - set the register values
|
||||||
|
* - determine if label references are used by an instruction
|
||||||
|
* - encode instructions that don't use label references
|
||||||
|
* - determine estimated addresses of each statement
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
error_t *encoder_first_pass(encoder_t *encoder) {
|
||||||
|
ast_node_t *root = encoder->ast;
|
||||||
|
assert(root->id == NODE_PROGRAM);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < root->len; ++i) {
|
||||||
|
ast_node_t *statement = root->children[i];
|
||||||
|
error_t *err = encoder_collect_info(encoder, statement, i);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
if (statement->id != NODE_INSTRUCTION)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
|
bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
|
||||||
switch (info->kind) {
|
switch (info->kind) {
|
||||||
case OPERAND_REGISTER:
|
case OPERAND_REGISTER:
|
||||||
@ -485,7 +512,9 @@ error_t *encoder_encode_instruction(encoder_t *encoder,
|
|||||||
* placeholder values for label references because instruction size has not
|
* placeholder values for label references because instruction size has not
|
||||||
* yet been determined.
|
* yet been determined.
|
||||||
*/
|
*/
|
||||||
error_t *encoder_encoding_pass(encoder_t *encoder, ast_node_t *root) {
|
error_t *encoder_second_pass(encoder_t *encoder) {
|
||||||
|
ast_node_t *root = encoder->ast;
|
||||||
|
|
||||||
for (size_t i = 0; i < root->len; ++i) {
|
for (size_t i = 0; i < root->len; ++i) {
|
||||||
if (root->children[i]->id != NODE_INSTRUCTION)
|
if (root->children[i]->id != NODE_INSTRUCTION)
|
||||||
continue;
|
continue;
|
||||||
@ -515,12 +544,12 @@ error_t *encoder_check_symbols(encoder_t *encoder) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
error_t *encoder_encode(encoder_t *encoder, ast_node_t *ast) {
|
error_t *encoder_encode(encoder_t *encoder) {
|
||||||
error_t *err = encoder_first_pass(encoder, ast);
|
error_t *err = encoder_first_pass(encoder);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
err = encoder_check_symbols(encoder);
|
err = encoder_check_symbols(encoder);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
return encoder_encoding_pass(encoder, ast);
|
return encoder_second_pass(encoder);
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
typedef struct encoder {
|
typedef struct encoder {
|
||||||
symbol_table_t *symbols;
|
symbol_table_t *symbols;
|
||||||
|
ast_node_t *ast;
|
||||||
} encoder_t;
|
} encoder_t;
|
||||||
|
|
||||||
constexpr uint8_t modrm_mod_memory = 0b00'000'000;
|
constexpr uint8_t modrm_mod_memory = 0b00'000'000;
|
||||||
@ -16,8 +17,8 @@ constexpr uint8_t modrm_reg_mask = 0b00'111'000;
|
|||||||
constexpr uint8_t modrm_rm_mask = 0b00'000'111;
|
constexpr uint8_t modrm_rm_mask = 0b00'000'111;
|
||||||
constexpr uint8_t modrm_mod_mask = 0b11'000'000;
|
constexpr uint8_t modrm_mod_mask = 0b11'000'000;
|
||||||
|
|
||||||
error_t *encoder_alloc(encoder_t **encoder);
|
error_t *encoder_alloc(encoder_t **encoder, ast_node_t *ast);
|
||||||
error_t *encoder_encode(encoder_t *encoder, ast_node_t *ast);
|
error_t *encoder_encode(encoder_t *encoder);
|
||||||
void encoder_free(encoder_t *encoder);
|
void encoder_free(encoder_t *encoder);
|
||||||
|
|
||||||
extern error_t *const err_encoder_invalid_register;
|
extern error_t *const err_encoder_invalid_register;
|
||||||
|
@ -112,7 +112,7 @@ bool symbol_table_should_error(symbol_kind_t old, symbol_kind_t new) {
|
|||||||
* @pre The symbol _must not_ already be in the table.
|
* @pre The symbol _must not_ already be in the table.
|
||||||
*/
|
*/
|
||||||
error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
|
error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
|
||||||
ast_node_t *node) {
|
ast_node_t *node, size_t statement_index) {
|
||||||
if (table->len >= table->cap) {
|
if (table->len >= table->cap) {
|
||||||
error_t *err = symbol_table_grow_cap(table);
|
error_t *err = symbol_table_grow_cap(table);
|
||||||
if (err)
|
if (err)
|
||||||
@ -123,6 +123,7 @@ error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
|
|||||||
.name = name,
|
.name = name,
|
||||||
.kind = kind,
|
.kind = kind,
|
||||||
.node = node,
|
.node = node,
|
||||||
|
.statement_index = statement_index,
|
||||||
};
|
};
|
||||||
|
|
||||||
table->len += 1;
|
table->len += 1;
|
||||||
@ -130,7 +131,8 @@ error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node) {
|
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node,
|
||||||
|
size_t statement_index) {
|
||||||
char *name;
|
char *name;
|
||||||
symbol_kind_t kind;
|
symbol_kind_t kind;
|
||||||
error_t *err = symbol_table_get_node_info(node, &kind, &name);
|
error_t *err = symbol_table_get_node_info(node, &kind, &name);
|
||||||
@ -139,13 +141,19 @@ error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node) {
|
|||||||
|
|
||||||
symbol_t *symbol = symbol_table_lookup(table, name);
|
symbol_t *symbol = symbol_table_lookup(table, name);
|
||||||
if (!symbol)
|
if (!symbol)
|
||||||
return symbol_table_add(table, name, kind, node);
|
return symbol_table_add(table, name, kind, node, statement_index);
|
||||||
if (symbol_table_should_error(symbol->kind, kind))
|
if (symbol_table_should_error(symbol->kind, kind))
|
||||||
return err_symbol_table_incompatible_symbols;
|
return err_symbol_table_incompatible_symbols;
|
||||||
if (symbol_table_should_update(symbol->kind, kind)) {
|
if (symbol_table_should_update(symbol->kind, kind)) {
|
||||||
symbol->name = name;
|
symbol->name = name;
|
||||||
symbol->kind = kind;
|
symbol->kind = kind;
|
||||||
symbol->node = node;
|
symbol->node = node;
|
||||||
|
|
||||||
|
// Some deviation from the regular update, the most important statement
|
||||||
|
// to keep track of is the actual label even if it gets promoted beyond
|
||||||
|
// a local symbol because the label determines the address
|
||||||
|
if (kind == SYMBOL_LOCAL)
|
||||||
|
symbol->statement_index = statement_index;
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
@ -30,6 +30,7 @@ typedef struct symbol {
|
|||||||
char *name;
|
char *name;
|
||||||
symbol_kind_t kind;
|
symbol_kind_t kind;
|
||||||
ast_node_t *node;
|
ast_node_t *node;
|
||||||
|
size_t statement_index;
|
||||||
} symbol_t;
|
} symbol_t;
|
||||||
|
|
||||||
typedef struct symbol_table {
|
typedef struct symbol_table {
|
||||||
@ -40,7 +41,8 @@ typedef struct symbol_table {
|
|||||||
|
|
||||||
error_t *symbol_table_alloc(symbol_table_t **table);
|
error_t *symbol_table_alloc(symbol_table_t **table);
|
||||||
void symbol_table_free(symbol_table_t *table);
|
void symbol_table_free(symbol_table_t *table);
|
||||||
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node);
|
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node,
|
||||||
|
size_t statement_index);
|
||||||
symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name);
|
symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name);
|
||||||
|
|
||||||
#endif // INCLUDE_ENCODER_SYMBOLS_H_
|
#endif // INCLUDE_ENCODER_SYMBOLS_H_
|
||||||
|
@ -74,11 +74,11 @@ error_t *print_encoding(tokenlist_t *list) {
|
|||||||
return result.err;
|
return result.err;
|
||||||
|
|
||||||
encoder_t *encoder;
|
encoder_t *encoder;
|
||||||
error_t *err = encoder_alloc(&encoder);
|
error_t *err = encoder_alloc(&encoder, result.node);
|
||||||
if (err)
|
if (err)
|
||||||
goto cleanup_ast;
|
goto cleanup_ast;
|
||||||
|
|
||||||
err = encoder_encode(encoder, result.node);
|
err = encoder_encode(encoder);
|
||||||
if (err)
|
if (err)
|
||||||
goto cleanup_ast;
|
goto cleanup_ast;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user