Compare commits
3 Commits
79e0120d52
...
fab5bedf3d
Author | SHA1 | Date | |
---|---|---|---|
fab5bedf3d | |||
9a1570e3e5 | |||
9c6b69e187 |
16
src/ast.h
16
src/ast.h
@ -75,7 +75,7 @@ typedef struct register_ {
|
||||
} register_t;
|
||||
|
||||
typedef struct opcode_encoding {
|
||||
uint8_t encoding[32];
|
||||
uint8_t buffer[32];
|
||||
size_t len;
|
||||
} opcode_encoding_t;
|
||||
|
||||
@ -89,7 +89,19 @@ struct ast_node {
|
||||
union {
|
||||
register_t reg;
|
||||
number_t number;
|
||||
opcode_encoding_t encoding;
|
||||
struct {
|
||||
bool has_reference;
|
||||
opcode_encoding_t encoding;
|
||||
int64_t address;
|
||||
} instruction;
|
||||
struct {
|
||||
int64_t offset;
|
||||
int64_t address;
|
||||
operand_size_t size;
|
||||
} reference;
|
||||
struct {
|
||||
int64_t address;
|
||||
} label;
|
||||
} value;
|
||||
};
|
||||
|
||||
|
@ -6,6 +6,31 @@
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
/**
|
||||
* General encoder flow:
|
||||
*
|
||||
* There are 2 major passes the encoder does:
|
||||
*
|
||||
* First pass:
|
||||
* - Run through the AST and collect information:
|
||||
* - Set register values
|
||||
* - Parse/set number values
|
||||
* - Mark all instructions that use label references
|
||||
* - Encode all instructions that don't use label references
|
||||
* - Update addresses of all labels and instructions. Use an estimated
|
||||
* instruction size for those instructions that use label references.
|
||||
*
|
||||
* Second pass:
|
||||
* - Run through the AST for all instructions that use label references and
|
||||
* collect size information using the estimated addresses from pass 1
|
||||
* - Encode label references with the estimated addresses, this fixes their
|
||||
* size.
|
||||
* - Update all addresses
|
||||
*
|
||||
* Iteration:
|
||||
* - Repeat the second pass until addresses converge
|
||||
*/
|
||||
|
||||
error_t *const err_encoder_invalid_register =
|
||||
&(error_t){.message = "Invalid register"};
|
||||
error_t *const err_encoder_number_overflow =
|
||||
@ -23,13 +48,15 @@ error_t *const err_encoder_not_implemented =
|
||||
error_t *const err_encoder_unexpected_length =
|
||||
&(error_t){.message = "Unexpectedly long encoding"};
|
||||
|
||||
error_t *encoder_alloc(encoder_t **output) {
|
||||
error_t *encoder_alloc(encoder_t **output, ast_node_t *ast) {
|
||||
*output = nullptr;
|
||||
encoder_t *encoder = calloc(1, sizeof(encoder_t));
|
||||
|
||||
if (encoder == nullptr)
|
||||
return err_allocation_failed;
|
||||
|
||||
encoder->ast = ast;
|
||||
|
||||
error_t *err = symbol_table_alloc(&encoder->symbols);
|
||||
if (err) {
|
||||
free(encoder);
|
||||
@ -213,16 +240,15 @@ static inline uint8_t modrm_rm(uint8_t modrm, register_id_t id) {
|
||||
return (modrm & ~modrm_rm_mask) | (id & 0b111);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the initial pass over the AST. Records all symbols and sets the
|
||||
* values of registers and numbers.
|
||||
*/
|
||||
error_t *encoder_first_pass(encoder_t *encoder, ast_node_t *node) {
|
||||
error_t *encoder_collect_info(encoder_t *encoder, ast_node_t *node,
|
||||
ast_node_t *statement) {
|
||||
error_t *err = nullptr;
|
||||
|
||||
if (encoder_is_symbols_node(node))
|
||||
err = symbol_table_update(encoder->symbols, node);
|
||||
else if (node->id == NODE_NUMBER)
|
||||
if (encoder_is_symbols_node(node)) {
|
||||
err = symbol_table_update(encoder->symbols, node, statement);
|
||||
if (statement->id == NODE_INSTRUCTION)
|
||||
statement->value.instruction.has_reference = true;
|
||||
} else if (node->id == NODE_NUMBER)
|
||||
err = encoder_set_number_value(node);
|
||||
else if (node->id == NODE_REGISTER)
|
||||
err = encoder_set_register_value(node);
|
||||
@ -230,7 +256,8 @@ error_t *encoder_first_pass(encoder_t *encoder, ast_node_t *node) {
|
||||
return err;
|
||||
|
||||
for (size_t i = 0; i < node->len; ++i) {
|
||||
error_t *err = encoder_first_pass(encoder, node->children[i]);
|
||||
error_t *err =
|
||||
encoder_collect_info(encoder, node->children[i], statement);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
@ -456,7 +483,7 @@ error_t *encoder_encode_instruction(encoder_t *encoder,
|
||||
return err;
|
||||
|
||||
// produce the actual encoding output in the NODE_INSTRUCTION value
|
||||
uint8_t *output = instruction->value.encoding.encoding;
|
||||
uint8_t *output = instruction->value.instruction.encoding.buffer;
|
||||
size_t output_len = 0;
|
||||
|
||||
// Handle prefixes
|
||||
@ -475,24 +502,136 @@ error_t *encoder_encode_instruction(encoder_t *encoder,
|
||||
memcpy(output + output_len, encoding->buffer, encoding->len);
|
||||
output_len += encoding->len;
|
||||
|
||||
instruction->value.encoding.len = output_len;
|
||||
instruction->value.instruction.encoding.len = output_len;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the second pass that performs actual encoding. Will use
|
||||
* placeholder values for label references because instruction size has not
|
||||
* yet been determined.
|
||||
* Perform the initial pass over the AST.
|
||||
*
|
||||
* - Collect information about the operands
|
||||
* - parse and set number values
|
||||
* - set the register values
|
||||
* - determine if label references are used by an instruction
|
||||
* - encode instructions that don't use label references
|
||||
* - determine estimated addresses of each statement
|
||||
*
|
||||
*/
|
||||
error_t *encoder_encoding_pass(encoder_t *encoder, ast_node_t *root) {
|
||||
constexpr size_t instruction_size_estimate = 10;
|
||||
error_t *encoder_first_pass(encoder_t *encoder) {
|
||||
ast_node_t *root = encoder->ast;
|
||||
assert(root->id == NODE_PROGRAM);
|
||||
|
||||
uintptr_t address = 0;
|
||||
|
||||
for (size_t i = 0; i < root->len; ++i) {
|
||||
if (root->children[i]->id != NODE_INSTRUCTION)
|
||||
continue;
|
||||
ast_node_t *instruction = root->children[i];
|
||||
error_t *err = encoder_encode_instruction(encoder, instruction);
|
||||
ast_node_t *statement = root->children[i];
|
||||
error_t *err = encoder_collect_info(encoder, statement, statement);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (statement->id == NODE_INSTRUCTION &&
|
||||
statement->value.instruction.has_reference == false) {
|
||||
err = encoder_encode_instruction(encoder, statement);
|
||||
if (err)
|
||||
return err;
|
||||
statement->value.instruction.address = address;
|
||||
address += statement->value.instruction.encoding.len;
|
||||
} else if (statement->id == NODE_INSTRUCTION) {
|
||||
statement->value.instruction.encoding.len =
|
||||
instruction_size_estimate;
|
||||
statement->value.instruction.address = address;
|
||||
address += instruction_size_estimate;
|
||||
} else if (statement->id == NODE_LABEL) {
|
||||
statement->value.instruction.address = address;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
operand_size_t signed_to_size_mask(int64_t value) {
|
||||
operand_size_t size = OPERAND_SIZE_64;
|
||||
|
||||
if (value >= INT8_MIN && value <= INT8_MAX)
|
||||
size |= OPERAND_SIZE_8;
|
||||
|
||||
if (value >= INT16_MIN && value <= INT16_MAX)
|
||||
size |= OPERAND_SIZE_16;
|
||||
|
||||
if (value >= INT32_MIN && value <= INT32_MAX)
|
||||
size |= OPERAND_SIZE_32;
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
int64_t statement_offset(ast_node_t *from, ast_node_t *to) {
|
||||
assert(from->id == NODE_LABEL || from->id == NODE_INSTRUCTION);
|
||||
assert(to->id == NODE_LABEL || to->id == NODE_INSTRUCTION);
|
||||
|
||||
int64_t from_addr =
|
||||
from->value.instruction.address + from->value.instruction.encoding.len;
|
||||
int64_t to_addr = to->value.instruction.address;
|
||||
|
||||
return to_addr - from_addr;
|
||||
}
|
||||
|
||||
error_t *encoder_collect_label_info(encoder_t *encoder, ast_node_t *node,
|
||||
ast_node_t *statement) {
|
||||
assert(statement->id == NODE_INSTRUCTION);
|
||||
if (node->id == NODE_LABEL_REFERENCE) {
|
||||
const char *name = node->token_entry->token.value;
|
||||
symbol_t *symbol = symbol_table_lookup(encoder->symbols, name);
|
||||
assert(symbol && symbol->statement &&
|
||||
symbol->statement->id == NODE_LABEL);
|
||||
|
||||
int64_t offset = statement_offset(statement, symbol->statement);
|
||||
int64_t absolute = symbol->statement->value.instruction.address;
|
||||
operand_size_t size = signed_to_size_mask(offset);
|
||||
|
||||
node->value.reference.address = absolute;
|
||||
node->value.reference.offset = offset;
|
||||
node->value.reference.size = size;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the second pass. Updates the label info and encodes all instructions
|
||||
* that have a label reference.that performs actual encoding.
|
||||
*/
|
||||
error_t *encoder_second_pass(encoder_t *encoder, bool *did_update) {
|
||||
ast_node_t *root = encoder->ast;
|
||||
|
||||
*did_update = false;
|
||||
int64_t address = 0;
|
||||
for (size_t i = 0; i < root->len; ++i) {
|
||||
ast_node_t *statement = root->children[i];
|
||||
|
||||
if (statement->id == NODE_INSTRUCTION &&
|
||||
statement->value.instruction.has_reference) {
|
||||
statement->value.instruction.address = address;
|
||||
size_t before = statement->value.instruction.encoding.len;
|
||||
error_t *err =
|
||||
encoder_collect_label_info(encoder, statement, statement);
|
||||
if (err)
|
||||
return err;
|
||||
err = encoder_encode_instruction(encoder, statement);
|
||||
if (err)
|
||||
return err;
|
||||
size_t after = statement->value.instruction.encoding.len;
|
||||
address += after;
|
||||
*did_update = *did_update || (before != after);
|
||||
} else if (statement->id == NODE_INSTRUCTION &&
|
||||
statement->value.instruction.has_reference) {
|
||||
statement->value.instruction.address = address;
|
||||
address += statement->value.instruction.encoding.len;
|
||||
} else if (statement->id == NODE_LABEL) {
|
||||
statement->value.label.address = address;
|
||||
}
|
||||
address += statement->value.instruction.encoding.len;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@ -515,12 +654,19 @@ error_t *encoder_check_symbols(encoder_t *encoder) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
error_t *encoder_encode(encoder_t *encoder, ast_node_t *ast) {
|
||||
error_t *err = encoder_first_pass(encoder, ast);
|
||||
error_t *encoder_encode(encoder_t *encoder) {
|
||||
error_t *err = encoder_first_pass(encoder);
|
||||
if (err)
|
||||
return err;
|
||||
err = encoder_check_symbols(encoder);
|
||||
if (err)
|
||||
return err;
|
||||
return encoder_encoding_pass(encoder, ast);
|
||||
|
||||
bool did_update = true;
|
||||
for (int i = 0; i < 10 && did_update; ++i) {
|
||||
err = encoder_second_pass(encoder, &did_update);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
typedef struct encoder {
|
||||
symbol_table_t *symbols;
|
||||
ast_node_t *ast;
|
||||
} encoder_t;
|
||||
|
||||
constexpr uint8_t modrm_mod_memory = 0b00'000'000;
|
||||
@ -16,8 +17,8 @@ constexpr uint8_t modrm_reg_mask = 0b00'111'000;
|
||||
constexpr uint8_t modrm_rm_mask = 0b00'000'111;
|
||||
constexpr uint8_t modrm_mod_mask = 0b11'000'000;
|
||||
|
||||
error_t *encoder_alloc(encoder_t **encoder);
|
||||
error_t *encoder_encode(encoder_t *encoder, ast_node_t *ast);
|
||||
error_t *encoder_alloc(encoder_t **encoder, ast_node_t *ast);
|
||||
error_t *encoder_encode(encoder_t *encoder);
|
||||
void encoder_free(encoder_t *encoder);
|
||||
|
||||
extern error_t *const err_encoder_invalid_register;
|
||||
|
@ -92,7 +92,7 @@ EXPORT | | | ERR | |
|
||||
-------------|-----------|----------|----------|----------|
|
||||
*/
|
||||
|
||||
bool symbol_table_should_update(symbol_kind_t old, symbol_kind_t new) {
|
||||
bool symbol_table_should_upgrade(symbol_kind_t old, symbol_kind_t new) {
|
||||
if (old == SYMBOL_REFERENCE)
|
||||
return new != SYMBOL_REFERENCE;
|
||||
if (old == SYMBOL_LOCAL)
|
||||
@ -112,7 +112,7 @@ bool symbol_table_should_error(symbol_kind_t old, symbol_kind_t new) {
|
||||
* @pre The symbol _must not_ already be in the table.
|
||||
*/
|
||||
error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
|
||||
ast_node_t *node) {
|
||||
ast_node_t *statement) {
|
||||
if (table->len >= table->cap) {
|
||||
error_t *err = symbol_table_grow_cap(table);
|
||||
if (err)
|
||||
@ -122,7 +122,7 @@ error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
|
||||
table->symbols[table->len] = (symbol_t){
|
||||
.name = name,
|
||||
.kind = kind,
|
||||
.node = node,
|
||||
.statement = statement,
|
||||
};
|
||||
|
||||
table->len += 1;
|
||||
@ -130,23 +130,29 @@ error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node) {
|
||||
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node,
|
||||
ast_node_t *statement) {
|
||||
char *name;
|
||||
symbol_kind_t kind;
|
||||
error_t *err = symbol_table_get_node_info(node, &kind, &name);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (kind != SYMBOL_LOCAL)
|
||||
statement = nullptr;
|
||||
|
||||
symbol_t *symbol = symbol_table_lookup(table, name);
|
||||
if (!symbol)
|
||||
return symbol_table_add(table, name, kind, node);
|
||||
return symbol_table_add(table, name, kind, statement);
|
||||
if (symbol_table_should_error(symbol->kind, kind))
|
||||
return err_symbol_table_incompatible_symbols;
|
||||
if (symbol_table_should_update(symbol->kind, kind)) {
|
||||
symbol->name = name;
|
||||
if (symbol_table_should_upgrade(symbol->kind, kind)) {
|
||||
symbol->kind = kind;
|
||||
symbol->node = node;
|
||||
}
|
||||
|
||||
if (kind == SYMBOL_LOCAL && symbol->statement == nullptr)
|
||||
symbol->statement = statement;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,7 @@ typedef enum symbol_kind {
|
||||
typedef struct symbol {
|
||||
char *name;
|
||||
symbol_kind_t kind;
|
||||
ast_node_t *node;
|
||||
ast_node_t *statement;
|
||||
} symbol_t;
|
||||
|
||||
typedef struct symbol_table {
|
||||
@ -40,7 +40,8 @@ typedef struct symbol_table {
|
||||
|
||||
error_t *symbol_table_alloc(symbol_table_t **table);
|
||||
void symbol_table_free(symbol_table_t *table);
|
||||
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node);
|
||||
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node,
|
||||
ast_node_t *statement);
|
||||
symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name);
|
||||
|
||||
#endif // INCLUDE_ENCODER_SYMBOLS_H_
|
||||
|
@ -74,11 +74,11 @@ error_t *print_encoding(tokenlist_t *list) {
|
||||
return result.err;
|
||||
|
||||
encoder_t *encoder;
|
||||
error_t *err = encoder_alloc(&encoder);
|
||||
error_t *err = encoder_alloc(&encoder, result.node);
|
||||
if (err)
|
||||
goto cleanup_ast;
|
||||
|
||||
err = encoder_encode(encoder, result.node);
|
||||
err = encoder_encode(encoder);
|
||||
if (err)
|
||||
goto cleanup_ast;
|
||||
|
||||
@ -88,7 +88,8 @@ error_t *print_encoding(tokenlist_t *list) {
|
||||
if (node->id != NODE_INSTRUCTION)
|
||||
continue;
|
||||
|
||||
print_hex(node->value.encoding.len, node->value.encoding.encoding);
|
||||
print_hex(node->value.instruction.encoding.len,
|
||||
node->value.instruction.encoding.buffer);
|
||||
}
|
||||
|
||||
encoder_free(encoder);
|
||||
|
Loading…
x
Reference in New Issue
Block a user