Compare commits
	
		
			3 Commits
		
	
	
		
			0323e1282e
			...
			eab2202f23
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| eab2202f23 | |||
| fab5bedf3d | |||
| 9a1570e3e5 | 
							
								
								
									
										58
									
								
								src/ast.h
									
									
									
									
									
								
							
							
						
						
									
										58
									
								
								src/ast.h
									
									
									
									
									
								
							@@ -5,7 +5,6 @@
 | 
			
		||||
#include "error.h"
 | 
			
		||||
#include "lexer.h"
 | 
			
		||||
#include "tokenlist.h"
 | 
			
		||||
#include <assert.h>
 | 
			
		||||
#include <stddef.h>
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
 | 
			
		||||
@@ -80,22 +79,6 @@ typedef struct opcode_encoding {
 | 
			
		||||
    size_t len;
 | 
			
		||||
} opcode_encoding_t;
 | 
			
		||||
 | 
			
		||||
typedef struct instruction {
 | 
			
		||||
    bool has_reference;
 | 
			
		||||
    opcode_encoding_t encoding;
 | 
			
		||||
    int64_t address;
 | 
			
		||||
} instruction_t;
 | 
			
		||||
 | 
			
		||||
typedef struct reference {
 | 
			
		||||
    int64_t offset;
 | 
			
		||||
    int64_t address;
 | 
			
		||||
    operand_size_t size;
 | 
			
		||||
} reference_t;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
    int64_t address;
 | 
			
		||||
} label_t;
 | 
			
		||||
 | 
			
		||||
struct ast_node {
 | 
			
		||||
    node_id_t id;
 | 
			
		||||
    tokenlist_entry_t *token_entry;
 | 
			
		||||
@@ -106,37 +89,22 @@ struct ast_node {
 | 
			
		||||
    union {
 | 
			
		||||
        register_t reg;
 | 
			
		||||
        number_t number;
 | 
			
		||||
        instruction_t instruction;
 | 
			
		||||
        reference_t reference;
 | 
			
		||||
        label_t label;
 | 
			
		||||
        struct {
 | 
			
		||||
            bool has_reference;
 | 
			
		||||
            opcode_encoding_t encoding;
 | 
			
		||||
            int64_t address;
 | 
			
		||||
        } instruction;
 | 
			
		||||
        struct {
 | 
			
		||||
            int64_t offset;
 | 
			
		||||
            int64_t address;
 | 
			
		||||
            operand_size_t size;
 | 
			
		||||
        } reference;
 | 
			
		||||
        struct {
 | 
			
		||||
            int64_t address;
 | 
			
		||||
        } label;
 | 
			
		||||
    } value;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static inline register_t *ast_node_register_value(ast_node_t *node) {
 | 
			
		||||
    assert(node->id == NODE_REGISTER);
 | 
			
		||||
    return &node->value.reg;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline number_t *ast_node_number_value(ast_node_t *node) {
 | 
			
		||||
    assert(node->id == NODE_NUMBER);
 | 
			
		||||
    return &node->value.number;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline instruction_t *ast_node_instruction_value(ast_node_t *node) {
 | 
			
		||||
    assert(node->id == NODE_INSTRUCTION);
 | 
			
		||||
    return &node->value.instruction;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline reference_t *ast_node_reference_value(ast_node_t *node) {
 | 
			
		||||
    assert(node->id == NODE_LABEL_REFERENCE);
 | 
			
		||||
    return &node->value.reference;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline label_t *ast_node_label_value(ast_node_t *node) {
 | 
			
		||||
    assert(node->id == NODE_LABEL);
 | 
			
		||||
    return &node->value.label;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @brief Allocates a new AST node
 | 
			
		||||
 *
 | 
			
		||||
 
 | 
			
		||||
@@ -269,7 +269,7 @@ bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
 | 
			
		||||
    switch (info->kind) {
 | 
			
		||||
    case OPERAND_REGISTER:
 | 
			
		||||
        return operand->id == NODE_REGISTER &&
 | 
			
		||||
               ast_node_register_value(operand)->size == info->size;
 | 
			
		||||
               operand->value.reg.size == info->size;
 | 
			
		||||
    case OPERAND_MEMORY:
 | 
			
		||||
        return operand->id == NODE_MEMORY;
 | 
			
		||||
    case OPERAND_IMMEDIATE: {
 | 
			
		||||
@@ -278,7 +278,7 @@ bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
 | 
			
		||||
        ast_node_t *child = operand->children[0];
 | 
			
		||||
 | 
			
		||||
        if (child->id == NODE_NUMBER)
 | 
			
		||||
            return (ast_node_number_value(child)->size & info->size) > 0;
 | 
			
		||||
            return (child->value.number.size & info->size) > 0;
 | 
			
		||||
        else if (child->id == NODE_LABEL_REFERENCE)
 | 
			
		||||
            return info->size == OPERAND_SIZE_32;
 | 
			
		||||
        // FIXME: first pass should give us information about the distance of
 | 
			
		||||
@@ -340,7 +340,7 @@ error_t *encode_one_register_in_opcode(encoder_t *encoder,
 | 
			
		||||
    (void)encoder;
 | 
			
		||||
    (void)opcode;
 | 
			
		||||
 | 
			
		||||
    register_id_t id = ast_node_register_value(operands->children[0])->id;
 | 
			
		||||
    register_id_t id = operands->children[0]->value.reg.id;
 | 
			
		||||
    encoding->buffer[encoding->len - 1] |= id & 0b111;
 | 
			
		||||
    if ((id & 0b1000) > 0) {
 | 
			
		||||
        *rex |= rex_prefix_r;
 | 
			
		||||
@@ -355,7 +355,7 @@ error_t *encode_one_register(encoder_t *encoder, opcode_data_t *opcode,
 | 
			
		||||
    assert(operands->len == 1);
 | 
			
		||||
    assert(operands->children[0]->id == NODE_REGISTER);
 | 
			
		||||
 | 
			
		||||
    register_id_t id = ast_node_register_value(operands->children[0])->id;
 | 
			
		||||
    register_id_t id = operands->children[0]->value.reg.id;
 | 
			
		||||
 | 
			
		||||
    uint8_t modrm = modrm_mod_register;
 | 
			
		||||
 | 
			
		||||
@@ -390,7 +390,7 @@ error_t *encode_one_immediate(encoder_t *encoder, opcode_data_t *opcode,
 | 
			
		||||
           immediate->id == NODE_LABEL_REFERENCE);
 | 
			
		||||
 | 
			
		||||
    if (immediate->id == NODE_NUMBER) {
 | 
			
		||||
        uint64_t value = ast_node_number_value(immediate)->value;
 | 
			
		||||
        uint64_t value = immediate->value.number.value;
 | 
			
		||||
        operand_size_t size = opcode->operands[0].size;
 | 
			
		||||
        error_t *err = nullptr;
 | 
			
		||||
        switch (size) {
 | 
			
		||||
@@ -483,8 +483,7 @@ error_t *encoder_encode_instruction(encoder_t *encoder,
 | 
			
		||||
        return err;
 | 
			
		||||
 | 
			
		||||
    // produce the actual encoding output in the NODE_INSTRUCTION value
 | 
			
		||||
    instruction_t *instruction_value = ast_node_instruction_value(instruction);
 | 
			
		||||
    uint8_t *output = instruction_value->encoding.buffer;
 | 
			
		||||
    uint8_t *output = instruction->value.instruction.encoding.buffer;
 | 
			
		||||
    size_t output_len = 0;
 | 
			
		||||
 | 
			
		||||
    // Handle prefixes
 | 
			
		||||
@@ -503,17 +502,11 @@ error_t *encoder_encode_instruction(encoder_t *encoder,
 | 
			
		||||
    memcpy(output + output_len, encoding->buffer, encoding->len);
 | 
			
		||||
    output_len += encoding->len;
 | 
			
		||||
 | 
			
		||||
    instruction_value->encoding.len = output_len;
 | 
			
		||||
    instruction->value.instruction.encoding.len = output_len;
 | 
			
		||||
 | 
			
		||||
    return nullptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Initial guess for instruction size of instructions that contain a label
 | 
			
		||||
 * reference
 | 
			
		||||
 */
 | 
			
		||||
constexpr size_t instruction_size_estimate = 10;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Perform the initial pass over the AST.
 | 
			
		||||
 *
 | 
			
		||||
@@ -525,6 +518,7 @@ constexpr size_t instruction_size_estimate = 10;
 | 
			
		||||
 * - determine estimated addresses of each statement
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
constexpr size_t instruction_size_estimate = 10;
 | 
			
		||||
error_t *encoder_first_pass(encoder_t *encoder) {
 | 
			
		||||
    ast_node_t *root = encoder->ast;
 | 
			
		||||
    assert(root->id == NODE_PROGRAM);
 | 
			
		||||
@@ -538,21 +532,19 @@ error_t *encoder_first_pass(encoder_t *encoder) {
 | 
			
		||||
            return err;
 | 
			
		||||
 | 
			
		||||
        if (statement->id == NODE_INSTRUCTION &&
 | 
			
		||||
            ast_node_instruction_value(statement)->has_reference == false) {
 | 
			
		||||
            statement->value.instruction.has_reference == false) {
 | 
			
		||||
            err = encoder_encode_instruction(encoder, statement);
 | 
			
		||||
            if (err)
 | 
			
		||||
                return err;
 | 
			
		||||
            instruction_t *instruction = ast_node_instruction_value(statement);
 | 
			
		||||
            instruction->address = address;
 | 
			
		||||
            address += instruction->encoding.len;
 | 
			
		||||
            statement->value.instruction.address = address;
 | 
			
		||||
            address += statement->value.instruction.encoding.len;
 | 
			
		||||
        } else if (statement->id == NODE_INSTRUCTION) {
 | 
			
		||||
            instruction_t *instruction = ast_node_instruction_value(statement);
 | 
			
		||||
            instruction->encoding.len = instruction_size_estimate;
 | 
			
		||||
            instruction->address = address;
 | 
			
		||||
            statement->value.instruction.encoding.len =
 | 
			
		||||
                instruction_size_estimate;
 | 
			
		||||
            statement->value.instruction.address = address;
 | 
			
		||||
            address += instruction_size_estimate;
 | 
			
		||||
        } else if (statement->id == NODE_LABEL) {
 | 
			
		||||
            label_t *label = ast_node_label_value(statement);
 | 
			
		||||
            label->address = address;
 | 
			
		||||
            statement->value.instruction.address = address;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -575,17 +567,17 @@ operand_size_t signed_to_size_mask(int64_t value) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int64_t statement_offset(ast_node_t *from, ast_node_t *to) {
 | 
			
		||||
    assert(from->id == NODE_INSTRUCTION);
 | 
			
		||||
    assert(to->id == NODE_LABEL);
 | 
			
		||||
    assert(from->id == NODE_LABEL || from->id == NODE_INSTRUCTION);
 | 
			
		||||
    assert(to->id == NODE_LABEL || to->id == NODE_INSTRUCTION);
 | 
			
		||||
 | 
			
		||||
    instruction_t *instruction = ast_node_instruction_value(from);
 | 
			
		||||
    int64_t from_addr = instruction->address + instruction->encoding.len;
 | 
			
		||||
    int64_t to_addr = ast_node_label_value(to)->address;
 | 
			
		||||
    int64_t from_addr =
 | 
			
		||||
        from->value.instruction.address + from->value.instruction.encoding.len;
 | 
			
		||||
    int64_t to_addr = to->value.instruction.address;
 | 
			
		||||
 | 
			
		||||
    return to_addr - from_addr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
error_t *encoder_collect_reference_info(encoder_t *encoder, ast_node_t *node,
 | 
			
		||||
error_t *encoder_collect_label_info(encoder_t *encoder, ast_node_t *node,
 | 
			
		||||
                                    ast_node_t *statement) {
 | 
			
		||||
    assert(statement->id == NODE_INSTRUCTION);
 | 
			
		||||
    if (node->id == NODE_LABEL_REFERENCE) {
 | 
			
		||||
@@ -595,7 +587,7 @@ error_t *encoder_collect_reference_info(encoder_t *encoder, ast_node_t *node,
 | 
			
		||||
               symbol->statement->id == NODE_LABEL);
 | 
			
		||||
 | 
			
		||||
        int64_t offset = statement_offset(statement, symbol->statement);
 | 
			
		||||
        int64_t absolute = ast_node_label_value(symbol->statement)->address;
 | 
			
		||||
        int64_t absolute = symbol->statement->value.instruction.address;
 | 
			
		||||
        operand_size_t size = signed_to_size_mask(offset);
 | 
			
		||||
 | 
			
		||||
        node->value.reference.address = absolute;
 | 
			
		||||
@@ -606,28 +598,6 @@ error_t *encoder_collect_reference_info(encoder_t *encoder, ast_node_t *node,
 | 
			
		||||
    return nullptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool encoder_should_reencode(ast_node_t *statement) {
 | 
			
		||||
    if (statement->id != NODE_INSTRUCTION)
 | 
			
		||||
        return false;
 | 
			
		||||
 | 
			
		||||
    instruction_t *instruction = ast_node_instruction_value(statement);
 | 
			
		||||
    return instruction->has_reference == false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void set_statement_address(ast_node_t *statement, int64_t address) {
 | 
			
		||||
    if (statement->id == NODE_INSTRUCTION) {
 | 
			
		||||
        ast_node_instruction_value(statement)->address = address;
 | 
			
		||||
    } else if (statement->id == NODE_LABEL) {
 | 
			
		||||
        ast_node_label_value(statement)->address = address;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
size_t get_statement_length(ast_node_t *statement) {
 | 
			
		||||
    if (statement->id != NODE_INSTRUCTION)
 | 
			
		||||
        return 0;
 | 
			
		||||
    return ast_node_instruction_value(statement)->encoding.len;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Perform the second pass. Updates the label info and encodes all instructions
 | 
			
		||||
 * that have a label reference.that performs actual encoding.
 | 
			
		||||
@@ -640,22 +610,28 @@ error_t *encoder_second_pass(encoder_t *encoder, bool *did_update) {
 | 
			
		||||
    for (size_t i = 0; i < root->len; ++i) {
 | 
			
		||||
        ast_node_t *statement = root->children[i];
 | 
			
		||||
 | 
			
		||||
        set_statement_address(statement, address);
 | 
			
		||||
        size_t before = get_statement_length(statement);
 | 
			
		||||
 | 
			
		||||
        if (encoder_should_reencode(statement)) {
 | 
			
		||||
        if (statement->id == NODE_INSTRUCTION &&
 | 
			
		||||
            statement->value.instruction.has_reference) {
 | 
			
		||||
            statement->value.instruction.address = address;
 | 
			
		||||
            size_t before = statement->value.instruction.encoding.len;
 | 
			
		||||
            error_t *err =
 | 
			
		||||
                encoder_collect_reference_info(encoder, statement, statement);
 | 
			
		||||
                encoder_collect_label_info(encoder, statement, statement);
 | 
			
		||||
            if (err)
 | 
			
		||||
                return err;
 | 
			
		||||
            err = encoder_encode_instruction(encoder, statement);
 | 
			
		||||
            if (err)
 | 
			
		||||
                return err;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        size_t after = get_statement_length(statement);
 | 
			
		||||
        *did_update = *did_update || (before != after);
 | 
			
		||||
            size_t after = statement->value.instruction.encoding.len;
 | 
			
		||||
            address += after;
 | 
			
		||||
            *did_update = *did_update || (before != after);
 | 
			
		||||
        } else if (statement->id == NODE_INSTRUCTION &&
 | 
			
		||||
                   statement->value.instruction.has_reference) {
 | 
			
		||||
            statement->value.instruction.address = address;
 | 
			
		||||
            address += statement->value.instruction.encoding.len;
 | 
			
		||||
        } else if (statement->id == NODE_LABEL) {
 | 
			
		||||
            statement->value.label.address = address;
 | 
			
		||||
        }
 | 
			
		||||
        address += statement->value.instruction.encoding.len;
 | 
			
		||||
    }
 | 
			
		||||
    return nullptr;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user