Compare commits
3 Commits
eab2202f23
...
0323e1282e
Author | SHA1 | Date | |
---|---|---|---|
0323e1282e | |||
5272fdb227 | |||
0acc3f27f3 |
58
src/ast.h
58
src/ast.h
@ -5,6 +5,7 @@
|
||||
#include "error.h"
|
||||
#include "lexer.h"
|
||||
#include "tokenlist.h"
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
@ -79,6 +80,22 @@ typedef struct opcode_encoding {
|
||||
size_t len;
|
||||
} opcode_encoding_t;
|
||||
|
||||
typedef struct instruction {
|
||||
bool has_reference;
|
||||
opcode_encoding_t encoding;
|
||||
int64_t address;
|
||||
} instruction_t;
|
||||
|
||||
typedef struct reference {
|
||||
int64_t offset;
|
||||
int64_t address;
|
||||
operand_size_t size;
|
||||
} reference_t;
|
||||
|
||||
typedef struct {
|
||||
int64_t address;
|
||||
} label_t;
|
||||
|
||||
struct ast_node {
|
||||
node_id_t id;
|
||||
tokenlist_entry_t *token_entry;
|
||||
@ -89,22 +106,37 @@ struct ast_node {
|
||||
union {
|
||||
register_t reg;
|
||||
number_t number;
|
||||
struct {
|
||||
bool has_reference;
|
||||
opcode_encoding_t encoding;
|
||||
int64_t address;
|
||||
} instruction;
|
||||
struct {
|
||||
int64_t offset;
|
||||
int64_t address;
|
||||
operand_size_t size;
|
||||
} reference;
|
||||
struct {
|
||||
int64_t address;
|
||||
} label;
|
||||
instruction_t instruction;
|
||||
reference_t reference;
|
||||
label_t label;
|
||||
} value;
|
||||
};
|
||||
|
||||
static inline register_t *ast_node_register_value(ast_node_t *node) {
|
||||
assert(node->id == NODE_REGISTER);
|
||||
return &node->value.reg;
|
||||
}
|
||||
|
||||
static inline number_t *ast_node_number_value(ast_node_t *node) {
|
||||
assert(node->id == NODE_NUMBER);
|
||||
return &node->value.number;
|
||||
}
|
||||
|
||||
static inline instruction_t *ast_node_instruction_value(ast_node_t *node) {
|
||||
assert(node->id == NODE_INSTRUCTION);
|
||||
return &node->value.instruction;
|
||||
}
|
||||
|
||||
static inline reference_t *ast_node_reference_value(ast_node_t *node) {
|
||||
assert(node->id == NODE_LABEL_REFERENCE);
|
||||
return &node->value.reference;
|
||||
}
|
||||
|
||||
static inline label_t *ast_node_label_value(ast_node_t *node) {
|
||||
assert(node->id == NODE_LABEL);
|
||||
return &node->value.label;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Allocates a new AST node
|
||||
*
|
||||
|
@ -269,7 +269,7 @@ bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
|
||||
switch (info->kind) {
|
||||
case OPERAND_REGISTER:
|
||||
return operand->id == NODE_REGISTER &&
|
||||
operand->value.reg.size == info->size;
|
||||
ast_node_register_value(operand)->size == info->size;
|
||||
case OPERAND_MEMORY:
|
||||
return operand->id == NODE_MEMORY;
|
||||
case OPERAND_IMMEDIATE: {
|
||||
@ -278,7 +278,7 @@ bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
|
||||
ast_node_t *child = operand->children[0];
|
||||
|
||||
if (child->id == NODE_NUMBER)
|
||||
return (child->value.number.size & info->size) > 0;
|
||||
return (ast_node_number_value(child)->size & info->size) > 0;
|
||||
else if (child->id == NODE_LABEL_REFERENCE)
|
||||
return info->size == OPERAND_SIZE_32;
|
||||
// FIXME: first pass should give us information about the distance of
|
||||
@ -340,7 +340,7 @@ error_t *encode_one_register_in_opcode(encoder_t *encoder,
|
||||
(void)encoder;
|
||||
(void)opcode;
|
||||
|
||||
register_id_t id = operands->children[0]->value.reg.id;
|
||||
register_id_t id = ast_node_register_value(operands->children[0])->id;
|
||||
encoding->buffer[encoding->len - 1] |= id & 0b111;
|
||||
if ((id & 0b1000) > 0) {
|
||||
*rex |= rex_prefix_r;
|
||||
@ -355,7 +355,7 @@ error_t *encode_one_register(encoder_t *encoder, opcode_data_t *opcode,
|
||||
assert(operands->len == 1);
|
||||
assert(operands->children[0]->id == NODE_REGISTER);
|
||||
|
||||
register_id_t id = operands->children[0]->value.reg.id;
|
||||
register_id_t id = ast_node_register_value(operands->children[0])->id;
|
||||
|
||||
uint8_t modrm = modrm_mod_register;
|
||||
|
||||
@ -390,7 +390,7 @@ error_t *encode_one_immediate(encoder_t *encoder, opcode_data_t *opcode,
|
||||
immediate->id == NODE_LABEL_REFERENCE);
|
||||
|
||||
if (immediate->id == NODE_NUMBER) {
|
||||
uint64_t value = immediate->value.number.value;
|
||||
uint64_t value = ast_node_number_value(immediate)->value;
|
||||
operand_size_t size = opcode->operands[0].size;
|
||||
error_t *err = nullptr;
|
||||
switch (size) {
|
||||
@ -483,7 +483,8 @@ error_t *encoder_encode_instruction(encoder_t *encoder,
|
||||
return err;
|
||||
|
||||
// produce the actual encoding output in the NODE_INSTRUCTION value
|
||||
uint8_t *output = instruction->value.instruction.encoding.buffer;
|
||||
instruction_t *instruction_value = ast_node_instruction_value(instruction);
|
||||
uint8_t *output = instruction_value->encoding.buffer;
|
||||
size_t output_len = 0;
|
||||
|
||||
// Handle prefixes
|
||||
@ -502,11 +503,17 @@ error_t *encoder_encode_instruction(encoder_t *encoder,
|
||||
memcpy(output + output_len, encoding->buffer, encoding->len);
|
||||
output_len += encoding->len;
|
||||
|
||||
instruction->value.instruction.encoding.len = output_len;
|
||||
instruction_value->encoding.len = output_len;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initial guess for instruction size of instructions that contain a label
|
||||
* reference
|
||||
*/
|
||||
constexpr size_t instruction_size_estimate = 10;
|
||||
|
||||
/**
|
||||
* Perform the initial pass over the AST.
|
||||
*
|
||||
@ -518,7 +525,6 @@ error_t *encoder_encode_instruction(encoder_t *encoder,
|
||||
* - determine estimated addresses of each statement
|
||||
*
|
||||
*/
|
||||
constexpr size_t instruction_size_estimate = 10;
|
||||
error_t *encoder_first_pass(encoder_t *encoder) {
|
||||
ast_node_t *root = encoder->ast;
|
||||
assert(root->id == NODE_PROGRAM);
|
||||
@ -532,19 +538,21 @@ error_t *encoder_first_pass(encoder_t *encoder) {
|
||||
return err;
|
||||
|
||||
if (statement->id == NODE_INSTRUCTION &&
|
||||
statement->value.instruction.has_reference == false) {
|
||||
ast_node_instruction_value(statement)->has_reference == false) {
|
||||
err = encoder_encode_instruction(encoder, statement);
|
||||
if (err)
|
||||
return err;
|
||||
statement->value.instruction.address = address;
|
||||
address += statement->value.instruction.encoding.len;
|
||||
instruction_t *instruction = ast_node_instruction_value(statement);
|
||||
instruction->address = address;
|
||||
address += instruction->encoding.len;
|
||||
} else if (statement->id == NODE_INSTRUCTION) {
|
||||
statement->value.instruction.encoding.len =
|
||||
instruction_size_estimate;
|
||||
statement->value.instruction.address = address;
|
||||
instruction_t *instruction = ast_node_instruction_value(statement);
|
||||
instruction->encoding.len = instruction_size_estimate;
|
||||
instruction->address = address;
|
||||
address += instruction_size_estimate;
|
||||
} else if (statement->id == NODE_LABEL) {
|
||||
statement->value.instruction.address = address;
|
||||
label_t *label = ast_node_label_value(statement);
|
||||
label->address = address;
|
||||
}
|
||||
}
|
||||
|
||||
@ -567,18 +575,18 @@ operand_size_t signed_to_size_mask(int64_t value) {
|
||||
}
|
||||
|
||||
int64_t statement_offset(ast_node_t *from, ast_node_t *to) {
|
||||
assert(from->id == NODE_LABEL || from->id == NODE_INSTRUCTION);
|
||||
assert(to->id == NODE_LABEL || to->id == NODE_INSTRUCTION);
|
||||
assert(from->id == NODE_INSTRUCTION);
|
||||
assert(to->id == NODE_LABEL);
|
||||
|
||||
int64_t from_addr =
|
||||
from->value.instruction.address + from->value.instruction.encoding.len;
|
||||
int64_t to_addr = to->value.instruction.address;
|
||||
instruction_t *instruction = ast_node_instruction_value(from);
|
||||
int64_t from_addr = instruction->address + instruction->encoding.len;
|
||||
int64_t to_addr = ast_node_label_value(to)->address;
|
||||
|
||||
return to_addr - from_addr;
|
||||
}
|
||||
|
||||
error_t *encoder_collect_label_info(encoder_t *encoder, ast_node_t *node,
|
||||
ast_node_t *statement) {
|
||||
error_t *encoder_collect_reference_info(encoder_t *encoder, ast_node_t *node,
|
||||
ast_node_t *statement) {
|
||||
assert(statement->id == NODE_INSTRUCTION);
|
||||
if (node->id == NODE_LABEL_REFERENCE) {
|
||||
const char *name = node->token_entry->token.value;
|
||||
@ -587,7 +595,7 @@ error_t *encoder_collect_label_info(encoder_t *encoder, ast_node_t *node,
|
||||
symbol->statement->id == NODE_LABEL);
|
||||
|
||||
int64_t offset = statement_offset(statement, symbol->statement);
|
||||
int64_t absolute = symbol->statement->value.instruction.address;
|
||||
int64_t absolute = ast_node_label_value(symbol->statement)->address;
|
||||
operand_size_t size = signed_to_size_mask(offset);
|
||||
|
||||
node->value.reference.address = absolute;
|
||||
@ -598,6 +606,28 @@ error_t *encoder_collect_label_info(encoder_t *encoder, ast_node_t *node,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool encoder_should_reencode(ast_node_t *statement) {
|
||||
if (statement->id != NODE_INSTRUCTION)
|
||||
return false;
|
||||
|
||||
instruction_t *instruction = ast_node_instruction_value(statement);
|
||||
return instruction->has_reference == false;
|
||||
}
|
||||
|
||||
void set_statement_address(ast_node_t *statement, int64_t address) {
|
||||
if (statement->id == NODE_INSTRUCTION) {
|
||||
ast_node_instruction_value(statement)->address = address;
|
||||
} else if (statement->id == NODE_LABEL) {
|
||||
ast_node_label_value(statement)->address = address;
|
||||
}
|
||||
}
|
||||
|
||||
size_t get_statement_length(ast_node_t *statement) {
|
||||
if (statement->id != NODE_INSTRUCTION)
|
||||
return 0;
|
||||
return ast_node_instruction_value(statement)->encoding.len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the second pass. Updates the label info and encodes all instructions
|
||||
* that have a label reference.that performs actual encoding.
|
||||
@ -610,28 +640,22 @@ error_t *encoder_second_pass(encoder_t *encoder, bool *did_update) {
|
||||
for (size_t i = 0; i < root->len; ++i) {
|
||||
ast_node_t *statement = root->children[i];
|
||||
|
||||
if (statement->id == NODE_INSTRUCTION &&
|
||||
statement->value.instruction.has_reference) {
|
||||
statement->value.instruction.address = address;
|
||||
size_t before = statement->value.instruction.encoding.len;
|
||||
set_statement_address(statement, address);
|
||||
size_t before = get_statement_length(statement);
|
||||
|
||||
if (encoder_should_reencode(statement)) {
|
||||
error_t *err =
|
||||
encoder_collect_label_info(encoder, statement, statement);
|
||||
encoder_collect_reference_info(encoder, statement, statement);
|
||||
if (err)
|
||||
return err;
|
||||
err = encoder_encode_instruction(encoder, statement);
|
||||
if (err)
|
||||
return err;
|
||||
size_t after = statement->value.instruction.encoding.len;
|
||||
address += after;
|
||||
*did_update = *did_update || (before != after);
|
||||
} else if (statement->id == NODE_INSTRUCTION &&
|
||||
statement->value.instruction.has_reference) {
|
||||
statement->value.instruction.address = address;
|
||||
address += statement->value.instruction.encoding.len;
|
||||
} else if (statement->id == NODE_LABEL) {
|
||||
statement->value.label.address = address;
|
||||
}
|
||||
address += statement->value.instruction.encoding.len;
|
||||
|
||||
size_t after = get_statement_length(statement);
|
||||
*did_update = *did_update || (before != after);
|
||||
address += after;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user