Compare commits

..

2 Commits

Author SHA1 Message Date
7cefc3564d Implement one immediate label reference operand
All checks were successful
Validate the build / validate-build (push) Successful in 43s
Also adds opcode data for jmp and call
2025-04-24 14:45:57 +02:00
c848995ad6 Implement two pass encoding
First pass:
 - collect information for numbers, registers and which instructions
   contain label references
 - encode all instructions that don't contain label references
 - Set (temporary) addresses for each instruction

Second pass:
 - Collect information about label references (address, offset, size)
 - encode all instructions that contain label references
 - Update (if necessary) addresses for each instruction

 The second pass is iterated 10 times or until no instructions change
 size, whichever comes first.
2025-04-24 14:45:46 +02:00
2 changed files with 146 additions and 11 deletions

View File

@ -138,8 +138,128 @@ opcode_data_t *const opcodes[] = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
// CALL rel32
&(opcode_data_t) {
.mnemonic = "call",
.opcode = 0xE8,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_DEFAULT,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32 },
},
},
// CALL reg64
&(opcode_data_t) {
.mnemonic = "call",
.opcode = 0xFF,
.opcode_extension = 2,
.encoding_class = ENCODING_DEFAULT,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
// CALL mem64
&(opcode_data_t) {
.mnemonic = "call",
.opcode = 0xFF,
.opcode_extension = 2,
.encoding_class = ENCODING_DEFAULT,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_MEMORY, .size = OPERAND_SIZE_64 },
},
},
// JMP rel8 (short jump)
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xEB,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_DEFAULT,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_8 },
},
},
// JMP rel16
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xE9,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_DEFAULT,
.operand_size_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16 },
},
},
// JMP reg16
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xFF,
.opcode_extension = 4,
.encoding_class = ENCODING_DEFAULT,
.operand_size_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
},
},
// JMP rel32 (near jump)
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xE9,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_DEFAULT,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32 },
},
},
// JMP reg32
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xFF,
.opcode_extension = 4,
.encoding_class = ENCODING_DEFAULT,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
},
},
// JMP reg64
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xFF,
.opcode_extension = 4,
.encoding_class = ENCODING_DEFAULT,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
// JMP mem64
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xFF,
.opcode_extension = 4,
.encoding_class = ENCODING_DEFAULT,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_MEMORY, .size = OPERAND_SIZE_64 },
},
},
nullptr,
};

View File

@ -279,12 +279,9 @@ bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
if (child->id == NODE_NUMBER)
return (ast_node_number_value(child)->size & info->size) > 0;
else if (child->id == NODE_LABEL_REFERENCE)
return info->size == OPERAND_SIZE_32;
// FIXME: first pass should give us information about the distance of
// the label reference so we can pick a size more appropriately instead
// of just defaulting to 32 bits
break;
else if (child->id == NODE_LABEL_REFERENCE) {
return info->size &= ast_node_reference_value(child)->size;
}
} // end OPERAND_IMMEDIATE case
}
assert(false && "unreachable");
@ -389,9 +386,9 @@ error_t *encode_one_immediate(encoder_t *encoder, opcode_data_t *opcode,
assert(immediate->id == NODE_NUMBER ||
immediate->id == NODE_LABEL_REFERENCE);
operand_size_t size = opcode->operands[0].size;
if (immediate->id == NODE_NUMBER) {
uint64_t value = ast_node_number_value(immediate)->value;
operand_size_t size = opcode->operands[0].size;
error_t *err = nullptr;
switch (size) {
case OPERAND_SIZE_8:
@ -411,10 +408,21 @@ error_t *encode_one_immediate(encoder_t *encoder, opcode_data_t *opcode,
}
return err;
} else {
// FIXME: this still assumes references are always 32 bit
uint32_t value = 0xDEADBEEF;
return bytes_append_uint32(encoding, value);
reference_t *reference = ast_node_reference_value(immediate);
switch (size) {
case OPERAND_SIZE_64:
return bytes_append_uint64(encoding, reference->address);
case OPERAND_SIZE_32:
return bytes_append_uint32(encoding, reference->offset);
case OPERAND_SIZE_16:
return bytes_append_uint16(encoding, reference->offset);
case OPERAND_SIZE_8:
return bytes_append_uint8(encoding, reference->offset);
default:
assert(false && "intentionally unhandled");
}
}
__builtin_unreachable();
}
error_t *encode_one_memory(encoder_t *encoder, opcode_data_t *opcode,
@ -603,6 +611,13 @@ error_t *encoder_collect_reference_info(encoder_t *encoder, ast_node_t *node,
node->value.reference.size = size;
}
for (size_t i = 0; i < node->len; ++i) {
error_t *err = encoder_collect_reference_info(
encoder, node->children[i], statement);
if (err)
return err;
}
return nullptr;
}
@ -611,7 +626,7 @@ bool encoder_should_reencode(ast_node_t *statement) {
return false;
instruction_t *instruction = ast_node_instruction_value(statement);
return instruction->has_reference == false;
return instruction->has_reference;
}
void set_statement_address(ast_node_t *statement, int64_t address) {