Compare commits

...

18 Commits

Author SHA1 Message Date
d97cfb97be Implement printing the encoding in main
All checks were successful
Validate the build / validate-build (push) Successful in 33s
2025-04-16 23:10:17 +02:00
99c9dcd985 Incomplete second pass encoding 2025-04-16 23:10:09 +02:00
7e9c1bfda2 Add bytes type and tests
bytes_t is a local (automatic) allocation array that carries the length
and capacity with it.
2025-04-16 23:10:09 +02:00
d8ae126e9a Add opcode encoding value for NODE_INSTRUCTION entries in the AST 2025-04-16 23:10:09 +02:00
68dcd9dcce Add first encoding pass
First pass collects all the symbols and interprets number and register
tokens into usable data for the later passes.
2025-04-16 23:10:00 +02:00
dcf90b72e0 Add register and number values to AST nodes 2025-04-16 23:10:00 +02:00
2cf69f5e18 Add initial limited opcode data 2025-04-16 23:09:47 +02:00
d59559d327 Add registers data table
Change the validated primitive parse_register so that it uses the data
table instead
2025-04-16 13:46:19 +02:00
ac14925a0a Add symbols tests 2025-04-16 13:46:19 +02:00
2a7bb479ac initial symbol table implementation 2025-04-16 13:46:19 +02:00
ef22c0b620 Add .import and .export to the input test file 2025-04-16 13:46:19 +02:00
8c0e9926c5 Make main properly return with failure on parsing errors 2025-04-16 13:46:19 +02:00
d3d69b82d5 Add .import and .export directive to the grammar and parser 2025-04-16 13:46:10 +02:00
dc210e409c fix parse_immediate to accept label_reference instead of identifier 2025-04-16 13:41:28 +02:00
00272d69bf Add regression test for parse zero operands at eof
All checks were successful
Validate the build / validate-build (push) Successful in 30s
2025-04-16 13:16:55 +02:00
2385d38608 Prune the parse tree of NODE_NEWLINE after parsing succeeds 2025-04-16 13:01:02 +02:00
242fd9baa5 Fix grammar not being able to disambiguate some instructions
When two identifiers follow eachother it could be two instruction
mnemonics or one instruction mnemonic and one operand. To fix this
TOKEN_NEWLINE has been reintroduced as a semantic token. The grammar has
been changed to allow empty statements and every instruction and
directive has to end in a newline. Labels do not have to end in a
newline.

In addition to updating the grammar, the implementation of tokenlist,
ast and parser has been updated to reflect these changes.
2025-04-16 12:34:44 +02:00
1574ec6249 Fix parse_consecutive behavior when the token stream runs out 2025-04-16 12:13:02 +02:00
27 changed files with 2071 additions and 57 deletions

View File

@ -1,13 +1,17 @@
<program> ::= <statement>*
<statement> ::= <label> | <directive> | <instruction>
<statement> ::= <label> | <directive> | <instruction> | <newline>
<label> ::= <identifier> <colon>
<directive> ::= <dot> <section_directive>
<directive> ::= <dot> (<section_directive> | <export_directive> | <import_directive> ) <newline>
<section_directive> ::= "section" <identifier>
<instruction> ::= <identifier> <operands>
<export_directive> ::= "export" <identifier>
<import_directive> ::= "import" <identifier>
<instruction> ::= <identifier> <operands> <newline>
<operands> ::= <operand> ( <comma> <operand> )*

View File

@ -123,6 +123,10 @@ const char *ast_node_id_to_cstr(node_id_t id) {
return "NODE_PLUS_OR_MINUS";
case NODE_SECTION_DIRECTIVE:
return "NODE_SECTION_DIRECTIVE";
case NODE_IMPORT_DIRECTIVE:
return "NODE_IMPORT_DIRECTIVE";
case NODE_EXPORT_DIRECTIVE:
return "NODE_EXPORT_DIRECTIVE";
case NODE_REGISTER:
return "NODE_REGISTER";
case NODE_SECTION:
@ -157,6 +161,12 @@ const char *ast_node_id_to_cstr(node_id_t id) {
return "NODE_ASTERISK";
case NODE_DOT:
return "NODE_DOT";
case NODE_NEWLINE:
return "NODE_NEWLINE";
case NODE_IMPORT:
return "NODE_IMPORT";
case NODE_EXPORT:
return "NODE_EXPORT";
}
assert(!"Unreachable, weird node id" && id);
__builtin_unreachable();
@ -172,7 +182,8 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
}
printf("%s", ast_node_id_to_cstr(node->id));
if (node->token_entry && node->token_entry->token.value) {
if (node->token_entry && node->token_entry->token.value &&
node->id != NODE_NEWLINE) {
printf(" \"%s\"", node->token_entry->token.value);
}
printf("\n");
@ -185,3 +196,18 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
void ast_node_print(ast_node_t *node) {
ast_node_print_internal(node, 0);
}
void ast_node_prune(ast_node_t *node, node_id_t id) {
size_t new_len = 0;
for (size_t i = 0; i < node->len; i++) {
auto child = node->children[i];
if (child->id == id) {
ast_node_free(child);
continue;
}
ast_node_prune(child, id);
node->children[new_len] = child;
new_len++;
}
node->len = new_len;
}

View File

@ -1,6 +1,7 @@
#ifndef INCLUDE_SRC_AST_H_
#define INCLUDE_SRC_AST_H_
#include "data/registers.h"
#include "error.h"
#include "lexer.h"
#include "tokenlist.h"
@ -29,10 +30,14 @@ typedef enum node_id {
NODE_REGISTER_OFFSET,
NODE_PLUS_OR_MINUS,
NODE_SECTION_DIRECTIVE,
NODE_IMPORT_DIRECTIVE,
NODE_EXPORT_DIRECTIVE,
// Validated primitives
NODE_REGISTER,
NODE_SECTION,
NODE_IMPORT,
NODE_EXPORT,
// Primitive nodes
NODE_IDENTIFIER,
@ -50,6 +55,7 @@ typedef enum node_id {
NODE_MINUS,
NODE_ASTERISK,
NODE_DOT,
NODE_NEWLINE,
} node_id_t;
typedef struct ast_node ast_node_t;
@ -58,6 +64,21 @@ constexpr size_t node_default_children_cap = 8;
/* 65K ought to be enough for anybody */
constexpr size_t node_max_children_cap = 1 << 16;
typedef struct number {
uint64_t value;
operand_size_t size;
} number_t;
typedef struct register_ {
register_id_t id;
operand_size_t size;
} register_t;
typedef struct opcode_encoding {
uint8_t encoding[32];
size_t len;
} opcode_encoding_t;
struct ast_node {
node_id_t id;
tokenlist_entry_t *token_entry;
@ -66,11 +87,9 @@ struct ast_node {
ast_node_t **children;
union {
struct {
uint64_t value;
int size;
} integer;
char *name;
register_t reg;
number_t number;
opcode_encoding_t encoding;
} value;
};
@ -119,4 +138,17 @@ error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child);
*/
void ast_node_print(ast_node_t *node);
/**
* Prune the children with a given id
*
* The tree is recursively visited and all child nodes of a given ID are pruned
* completely. If a node has the giver id, it will get removed along wih all its
* children, even if some of those children have different ids. The root node id
* is never checked so the tree is guaranteed to remain and allocated valid.
*
* @param node The root of the tree you want to prune
* @param id The id of the nodes you want to prune
*/
void ast_node_prune(ast_node_t *node, node_id_t id);
#endif // INCLUDE_SRC_AST_H_

6
src/bytes.c Normal file
View File

@ -0,0 +1,6 @@
#include "bytes.h"
#include "error.h"
error_t *const err_bytes_no_capacity = &(error_t){
.message = "Not enough capacity in bytes buffer",
};

60
src/bytes.h Normal file
View File

@ -0,0 +1,60 @@
#ifndef INCLUDE_SRC_BYTES_H_
#define INCLUDE_SRC_BYTES_H_
#include "error.h"
#include <stddef.h>
#include <stdint.h>
#include <string.h>
extern error_t *const err_bytes_no_capacity;
typedef struct bytes {
size_t len;
size_t cap;
uint8_t buffer[];
} bytes_t;
#define LOCAL_BYTES_ANONYMOUS(N) \
&(struct { \
size_t len; \
size_t cap; \
uint8_t buffer[(N)]; \
}) { \
0, (N), {} \
}
#define LOCAL_BYTES(N) (bytes_t *)LOCAL_BYTES_ANONYMOUS(N);
static inline error_t *bytes_append_uint8(bytes_t *bytes, uint8_t value) {
if (bytes->len >= bytes->cap)
return err_bytes_no_capacity;
bytes->buffer[bytes->len++] = value;
return nullptr;
}
static inline error_t *bytes_append_array(bytes_t *dst, size_t n,
uint8_t buffer[static n]) {
if (dst->len + n >= dst->cap)
return err_bytes_no_capacity;
memcpy(dst->buffer + dst->len, buffer, n);
dst->len += n;
return nullptr;
}
static inline error_t *bytes_append_bytes(bytes_t *dst, bytes_t *src) {
return bytes_append_array(dst, src->len, src->buffer);
}
static inline error_t *bytes_append_uint16(bytes_t *dst, uint16_t value) {
return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
}
static inline error_t *bytes_append_uint32(bytes_t *dst, uint32_t value) {
return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
}
static inline error_t *bytes_append_uint64(bytes_t *dst, uint64_t value) {
return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
}
#endif // INCLUDE_SRC_BYTES_H_

145
src/data/opcodes.c Normal file
View File

@ -0,0 +1,145 @@
#include "opcodes.h"
// clang-format off
opcode_data_t *const opcodes[] = {
// RET
&(opcode_data_t) {
.mnemonic = "ret",
.opcode = 0xC3,
.opcode_extension = opcode_extension_none,
.operand_count = 0,
},
// RET imm16
&(opcode_data_t) {
.mnemonic = "ret",
.opcode = 0xC2,
.opcode_extension = opcode_extension_none,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16 },
},
},
// PUSH imm8
&(opcode_data_t) {
.mnemonic = "push",
.opcode = 0x6A,
.opcode_extension = opcode_extension_none,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_8},
},
},
// PUSH imm16
&(opcode_data_t) {
.mnemonic = "push",
.opcode = 0x68,
.opcode_extension = opcode_extension_none,
.operand_size_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16},
},
},
// PUSH imm32
&(opcode_data_t) {
.mnemonic = "push",
.opcode = 0x68,
.opcode_extension = opcode_extension_none,
.operand_size_prefix = false,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32},
},
},
// PUSH reg16,
&(opcode_data_t) {
.mnemonic = "push",
.opcode = 0x50,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_OPCODE_REGISTER,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
},
},
// PUSH reg64
&(opcode_data_t) {
.mnemonic = "push",
.opcode = 0x50,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_OPCODE_REGISTER,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
// NOT reg16
&(opcode_data_t) {
.mnemonic = "not",
.opcode = 0xF7,
.opcode_extension = 2,
.operand_size_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
},
},
// NOT reg32
&(opcode_data_t) {
.mnemonic = "not",
.opcode = 0xF7,
.opcode_extension = 2,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
},
},
// NOT reg64
&(opcode_data_t) {
.mnemonic = "not",
.opcode = 0xF7,
.opcode_extension = 2,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
// NEG reg16
&(opcode_data_t) {
.mnemonic = "neg",
.opcode = 0xF7,
.opcode_extension = 3,
.operand_size_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
},
},
// NEG reg32
&(opcode_data_t) {
.mnemonic = "neg",
.opcode = 0xF7,
.opcode_extension = 3,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
},
},
// NEG reg64
&(opcode_data_t) {
.mnemonic = "neg",
.opcode = 0xF7,
.opcode_extension = 3,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
nullptr,
};

56
src/data/opcodes.h Normal file
View File

@ -0,0 +1,56 @@
#ifndef INCLUDE_DATA_OPCODES_H_
#define INCLUDE_DATA_OPCODES_H_
#include "../data/registers.h"
#include <stddef.h>
#include <stdint.h>
constexpr uint8_t rex_prefix = 0x40;
constexpr uint8_t rex_prefix_w = 0x48;
constexpr uint8_t rex_prefix_r = 0x44;
constexpr uint8_t rex_prefix_x = 0x42;
constexpr uint8_t rex_prefix_b = 0x41;
constexpr uint8_t operand_size_prefix = 0x66;
constexpr uint8_t memory_size_prefix = 0x67;
constexpr uint8_t lock_prefix = 0xF0;
constexpr uint8_t repne_prefix = 0xF2;
constexpr uint8_t rep_prefix = 0xF3;
typedef enum encoding_class {
ENCODING_DEFAULT, // use modrm+sib for registers and memory, append
// immediates
ENCODING_OPCODE_REGISTER, // encode the register in the last 3 bits of the
// opcode
} encoding_class_t;
typedef enum operand_kind {
OPERAND_REGISTER,
OPERAND_MEMORY,
OPERAND_IMMEDIATE,
} operand_kind_t;
typedef struct operand_info {
operand_kind_t kind;
operand_size_t size;
} operand_info_t;
constexpr uint8_t opcode_extension_none = 0xFF;
typedef struct opcode_data {
const char *mnemonic;
uint16_t opcode;
uint8_t opcode_extension; // 3 bits for the opcode extension in the reg
// field of a modr/m byte
encoding_class_t encoding_class;
bool operand_size_prefix;
bool address_size_prefix;
bool rex_w_prefix;
size_t operand_count;
operand_info_t operands[3];
} opcode_data_t;
extern opcode_data_t *const opcodes[];
#endif // INCLUDE_DATA_OPCODES_H_

92
src/data/registers.c Normal file
View File

@ -0,0 +1,92 @@
#include "registers.h"
register_data_t *const registers[] = {
// Instruction pointer
&(register_data_t){"rip", REG_RIP, OPERAND_SIZE_64},
&(register_data_t){"eip", REG_RIP, OPERAND_SIZE_32},
&(register_data_t){"ip", REG_RIP, OPERAND_SIZE_16},
// 64-bit general purpose registers
&(register_data_t){"rax", REG_A, OPERAND_SIZE_64},
&(register_data_t){"rcx", REG_C, OPERAND_SIZE_64},
&(register_data_t){"rdx", REG_D, OPERAND_SIZE_64},
&(register_data_t){"rbx", REG_B, OPERAND_SIZE_64},
&(register_data_t){"rsp", REG_SP, OPERAND_SIZE_64},
&(register_data_t){"rbp", REG_BP, OPERAND_SIZE_64},
&(register_data_t){"rsi", REG_SI, OPERAND_SIZE_64},
&(register_data_t){"rdi", REG_DI, OPERAND_SIZE_64},
&(register_data_t){"r8", REG_8, OPERAND_SIZE_64},
&(register_data_t){"r9", REG_9, OPERAND_SIZE_64},
&(register_data_t){"r10", REG_10, OPERAND_SIZE_64},
&(register_data_t){"r11", REG_11, OPERAND_SIZE_64},
&(register_data_t){"r12", REG_12, OPERAND_SIZE_64},
&(register_data_t){"r13", REG_13, OPERAND_SIZE_64},
&(register_data_t){"r14", REG_14, OPERAND_SIZE_64},
&(register_data_t){"r15", REG_15, OPERAND_SIZE_64},
// 32-bit general purpose registers
&(register_data_t){"eax", REG_A, OPERAND_SIZE_32},
&(register_data_t){"ecx", REG_C, OPERAND_SIZE_32},
&(register_data_t){"edx", REG_D, OPERAND_SIZE_32},
&(register_data_t){"ebx", REG_B, OPERAND_SIZE_32},
&(register_data_t){"esp", REG_SP, OPERAND_SIZE_32},
&(register_data_t){"ebp", REG_BP, OPERAND_SIZE_32},
&(register_data_t){"esi", REG_SI, OPERAND_SIZE_32},
&(register_data_t){"edi", REG_DI, OPERAND_SIZE_32},
&(register_data_t){"r8d", REG_8, OPERAND_SIZE_32},
&(register_data_t){"r9d", REG_9, OPERAND_SIZE_32},
&(register_data_t){"r10d", REG_10, OPERAND_SIZE_32},
&(register_data_t){"r11d", REG_11, OPERAND_SIZE_32},
&(register_data_t){"r12d", REG_12, OPERAND_SIZE_32},
&(register_data_t){"r13d", REG_13, OPERAND_SIZE_32},
&(register_data_t){"r14d", REG_14, OPERAND_SIZE_32},
&(register_data_t){"r15d", REG_15, OPERAND_SIZE_32},
// 16-bit general purpose registers
&(register_data_t){"ax", REG_A, OPERAND_SIZE_16},
&(register_data_t){"cx", REG_C, OPERAND_SIZE_16},
&(register_data_t){"dx", REG_D, OPERAND_SIZE_16},
&(register_data_t){"bx", REG_B, OPERAND_SIZE_16},
&(register_data_t){"sp", REG_SP, OPERAND_SIZE_16},
&(register_data_t){"bp", REG_BP, OPERAND_SIZE_16},
&(register_data_t){"si", REG_SI, OPERAND_SIZE_16},
&(register_data_t){"di", REG_DI, OPERAND_SIZE_16},
&(register_data_t){"r8w", REG_8, OPERAND_SIZE_16},
&(register_data_t){"r9w", REG_9, OPERAND_SIZE_16},
&(register_data_t){"r10w", REG_10, OPERAND_SIZE_16},
&(register_data_t){"r11w", REG_11, OPERAND_SIZE_16},
&(register_data_t){"r12w", REG_12, OPERAND_SIZE_16},
&(register_data_t){"r13w", REG_13, OPERAND_SIZE_16},
&(register_data_t){"r14w", REG_14, OPERAND_SIZE_16},
&(register_data_t){"r15w", REG_15, OPERAND_SIZE_16},
// 8-bit general purpose registers (low byte)
&(register_data_t){"al", REG_A, OPERAND_SIZE_8 },
&(register_data_t){"cl", REG_C, OPERAND_SIZE_8 },
&(register_data_t){"dl", REG_D, OPERAND_SIZE_8 },
&(register_data_t){"bl", REG_B, OPERAND_SIZE_8 },
&(register_data_t){"spl", REG_SP, OPERAND_SIZE_8 },
&(register_data_t){"bpl", REG_BP, OPERAND_SIZE_8 },
&(register_data_t){"sil", REG_SI, OPERAND_SIZE_8 },
&(register_data_t){"dil", REG_DI, OPERAND_SIZE_8 },
&(register_data_t){"r8b", REG_8, OPERAND_SIZE_8 },
&(register_data_t){"r9b", REG_9, OPERAND_SIZE_8 },
&(register_data_t){"r10b", REG_10, OPERAND_SIZE_8 },
&(register_data_t){"r11b", REG_11, OPERAND_SIZE_8 },
&(register_data_t){"r12b", REG_12, OPERAND_SIZE_8 },
&(register_data_t){"r13b", REG_13, OPERAND_SIZE_8 },
&(register_data_t){"r14b", REG_14, OPERAND_SIZE_8 },
&(register_data_t){"r15b", REG_15, OPERAND_SIZE_8 },
// x87 floating point registers
&(register_data_t){"st0", REG_ST0, OPERAND_SIZE_80},
&(register_data_t){"st1", REG_ST1, OPERAND_SIZE_80},
&(register_data_t){"st2", REG_ST2, OPERAND_SIZE_80},
&(register_data_t){"st3", REG_ST3, OPERAND_SIZE_80},
&(register_data_t){"st4", REG_ST4, OPERAND_SIZE_80},
&(register_data_t){"st5", REG_ST5, OPERAND_SIZE_80},
&(register_data_t){"st6", REG_ST6, OPERAND_SIZE_80},
&(register_data_t){"st7", REG_ST7, OPERAND_SIZE_80},
nullptr,
};

82
src/data/registers.h Normal file
View File

@ -0,0 +1,82 @@
#ifndef INCLUDE_DATA_REGISTERS_H_
#define INCLUDE_DATA_REGISTERS_H_
typedef enum operand_size {
OPERAND_SIZE_INVALID = 0,
OPERAND_SIZE_8 = 1 << 0,
OPERAND_SIZE_16 = 1 << 1,
OPERAND_SIZE_32 = 1 << 2,
OPERAND_SIZE_64 = 1 << 3,
OPERAND_SIZE_80 = 1 << 4,
OPERAND_SIZE_128 = 1 << 5,
OPERAND_SIZE_256 = 1 << 6,
OPERAND_SIZE_512 = 1 << 7,
} operand_size_t;
static inline operand_size_t bits_to_operand_size(int bits) {
switch (bits) {
case 8:
return OPERAND_SIZE_8;
case 16:
return OPERAND_SIZE_16;
case 32:
return OPERAND_SIZE_32;
case 64:
return OPERAND_SIZE_64;
case 80:
return OPERAND_SIZE_80;
case 128:
return OPERAND_SIZE_128;
case 256:
return OPERAND_SIZE_256;
case 512:
return OPERAND_SIZE_512;
default:
return OPERAND_SIZE_INVALID;
}
}
typedef enum register_id {
// Special registers
REG_RIP = -1,
// General purpose registers
REG_A = 0x0000,
REG_C,
REG_D,
REG_B,
REG_SP,
REG_BP,
REG_SI,
REG_DI,
REG_8,
REG_9,
REG_10,
REG_11,
REG_12,
REG_13,
REG_14,
REG_15,
REG_ST0 = 0x1000,
REG_ST1,
REG_ST2,
REG_ST3,
REG_ST4,
REG_ST5,
REG_ST6,
REG_ST7,
} register_id_t;
typedef struct register_data {
const char *name;
register_id_t id;
operand_size_t size;
} register_data_t;
extern register_data_t *const registers[];
#endif // INCLUDE_DATA_REGISTERS_H_

526
src/encoder/encoder.c Normal file
View File

@ -0,0 +1,526 @@
#include "encoder.h"
#include "../bytes.h"
#include "../data/opcodes.h"
#include "symbols.h"
#include <assert.h>
#include <errno.h>
#include <string.h>
error_t *const err_encoder_invalid_register =
&(error_t){.message = "Invalid register"};
error_t *const err_encoder_number_overflow =
&(error_t){.message = "Number overflows the storage"};
error_t *const err_encoder_invalid_number_format =
&(error_t){.message = "Invalid number format"};
error_t *const err_encoder_invalid_size_suffix =
&(error_t){.message = "Invalid number size suffix"};
error_t *const err_encoder_unknown_symbol_reference =
&(error_t){.message = "Referenced an unknown symbol"};
error_t *const err_encoder_no_encoding_found =
&(error_t){.message = "No encoding found for instruction"};
error_t *const err_encoder_not_implemented =
&(error_t){.message = "Implementation for this opcode is missing"};
error_t *const err_encoder_unexpected_length =
&(error_t){.message = "Unexpectedly long encoding"};
error_t *encoder_alloc(encoder_t **output) {
*output = nullptr;
encoder_t *encoder = calloc(1, sizeof(encoder_t));
if (encoder == nullptr)
return err_allocation_failed;
error_t *err = symbol_table_alloc(&encoder->symbols);
if (err) {
free(encoder);
return err;
}
*output = encoder;
return nullptr;
}
void encoder_free(encoder_t *encoder) {
if (encoder == nullptr)
return;
symbol_table_free(encoder->symbols);
free(encoder);
}
bool encoder_is_symbols_node(ast_node_t *node) {
switch (node->id) {
case NODE_LABEL:
case NODE_LABEL_REFERENCE:
case NODE_EXPORT_DIRECTIVE:
case NODE_IMPORT_DIRECTIVE:
return true;
default:
return false;
}
}
int encoder_get_number_base(ast_node_t *number) {
switch (number->children[0]->id) {
case NODE_BINARY:
return 2;
case NODE_OCTAL:
return 8;
case NODE_DECIMAL:
return 10;
case NODE_HEXADECIMAL:
return 16;
default:
assert(false);
}
__builtin_unreachable();
}
bool is_valid_size_suffix(int bits) {
switch (bits) {
case 0:
case 8:
case 16:
case 32:
case 64:
return true;
default:
return false;
}
}
bool is_overflow(uint64_t value, int bits) {
if (bits == 0 || bits >= 64)
return false;
uint64_t max_value = (1ULL << bits) - 1;
return value > max_value;
}
operand_size_t encoder_get_size_mask(uint64_t value, int bits) {
if (bits != 0)
return bits_to_operand_size(bits);
operand_size_t mask = OPERAND_SIZE_64;
if (value < (1ULL << 8))
mask |= OPERAND_SIZE_8;
if (value < (1ULL << 16))
mask |= OPERAND_SIZE_16;
if (value < (1ULL << 32))
mask |= OPERAND_SIZE_32;
return mask;
}
error_t *encoder_set_number_value(ast_node_t *node) {
assert(node->id == NODE_NUMBER);
assert(node->children[0]);
const char *number = node->children[0]->token_entry->token.value;
int base = encoder_get_number_base(node);
if (base != 10)
number += 2; // all except base 10 use a 0x, 0o or 0b prefix
char *endptr;
errno = 0;
uint64_t value = strtoull(number, &endptr, base);
if (errno == ERANGE)
return err_encoder_number_overflow;
if (endptr == number)
return err_encoder_invalid_number_format;
int bits = 0;
if (*endptr == ':') {
const char *suffix = endptr + 1;
bits = strtol(suffix, &endptr, 10);
if (endptr == suffix)
return err_encoder_invalid_number_format;
}
if (*endptr != '\0')
return err_encoder_invalid_number_format;
if (!is_valid_size_suffix(bits))
return err_encoder_invalid_size_suffix;
if (is_overflow(value, bits))
return err_encoder_number_overflow;
node->value.number.value = value;
node->value.number.size = encoder_get_size_mask(value, bits);
return nullptr;
}
error_t *encoder_set_register_value(ast_node_t *node) {
assert(node->id == NODE_REGISTER);
const char *value = node->token_entry->token.value;
for (size_t i = 0; registers[i] != nullptr; ++i) {
if (strcmp(value, registers[i]->name) == 0) {
node->value.reg.id = registers[i]->id;
node->value.reg.size = registers[i]->size;
return nullptr;
}
}
return err_encoder_invalid_register;
}
/**
* Set the opcode extension in the modrm field
*/
static inline uint8_t modrm_extension(uint8_t modrm, uint8_t extension) {
assert(extension != opcode_extension_none);
assert((extension & 0b111) == extension);
return (modrm & ~modrm_reg_mask) | extension << 3;
}
/**
* Return the rex bit for reg field in modrm
*/
static inline uint8_t modrm_reg_rex(uint8_t rex, register_id_t id) {
if (id & 0b1000)
rex |= rex_prefix_r;
return rex;
}
/**
* update modrm reg field with the given register, must be used alongside
* modrm_reg_rex
*/
static inline uint8_t modrm_reg(uint8_t modrm, register_id_t id) {
return (modrm & ~modrm_reg_mask) | (id & 0b111) << 3;
}
/**
* Return the rex bit for rm field in modrm
*/
static inline uint8_t modrm_rm_rex(uint8_t rex, register_id_t id) {
if (id & 0b1000)
rex |= rex_prefix_b;
return rex;
}
/**
* update modrm rm field with the given register, must be used alongside
* modrm_rm_rex
*/
static inline uint8_t modrm_rm(uint8_t modrm, register_id_t id) {
assert((modrm & modrm_mod_mask) == modrm_mod_register);
return (modrm & ~modrm_rm_mask) | (id & 0b111);
}
/**
* Perform the initial pass over the AST. Records all symbols and sets the
* values of registers and numbers.
*/
error_t *encoder_first_pass(encoder_t *encoder, ast_node_t *node) {
error_t *err = nullptr;
if (encoder_is_symbols_node(node))
err = symbol_table_update(encoder->symbols, node);
else if (node->id == NODE_NUMBER)
err = encoder_set_number_value(node);
else if (node->id == NODE_REGISTER)
err = encoder_set_register_value(node);
if (err)
return err;
for (size_t i = 0; i < node->len; ++i) {
error_t *err = encoder_first_pass(encoder, node->children[i]);
if (err)
return err;
}
return nullptr;
}
bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
switch (info->kind) {
case OPERAND_REGISTER:
return operand->id == NODE_REGISTER &&
operand->value.reg.size == info->size;
case OPERAND_MEMORY:
return operand->id == NODE_MEMORY;
case OPERAND_IMMEDIATE: {
if (operand->id != NODE_IMMEDIATE)
return false;
ast_node_t *child = operand->children[0];
if (child->id == NODE_NUMBER)
return (child->value.number.size & info->size) > 0;
else if (child->id == NODE_LABEL_REFERENCE)
return info->size == OPERAND_SIZE_32;
// FIXME: first pass should give us information about the distance of
// the label reference so we can pick a size more appropriately instead
// of just defaulting to 32 bits
break;
} // end OPERAND_IMMEDIATE case
}
assert(false && "unreachable");
__builtin_unreachable();
}
bool is_opcode_match(opcode_data_t *opcode, const char *mnemonic,
ast_node_t *operands) {
if (strcmp(opcode->mnemonic, mnemonic) != 0)
return false;
if (opcode->operand_count != operands->len)
return false;
for (size_t i = 0; i < operands->len; ++i) {
if (!is_operand_match(&opcode->operands[i], operands->children[i]))
return false;
}
return true;
}
error_t *encoder_get_opcode_data(ast_node_t *instruction, ast_node_t *operands,
opcode_data_t **opcode_out) {
const char *mnemonic = instruction->children[0]->token_entry->token.value;
for (size_t i = 0; opcodes[i]; ++i) {
opcode_data_t *opcode = opcodes[i];
if (is_opcode_match(opcode, mnemonic, operands)) {
*opcode_out = opcode;
return nullptr;
}
}
return err_encoder_no_encoding_found;
}
error_t *encode_two_operand(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
(void)opcode;
(void)operands;
(void)encoding;
(void)rex;
assert(encoding->len >= 1 && "must have 1+ opcode byte in buffer already");
return err_encoder_not_implemented;
}
error_t *encode_one_register_in_opcode(encoder_t *encoder,
opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
(void)opcode;
register_id_t id = operands->children[0]->value.reg.id;
encoding->buffer[encoding->len - 1] |= id & 0b111;
if ((id & 0b1000) > 0) {
*rex |= rex_prefix_r;
}
return nullptr;
}
error_t *encode_one_register(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
assert(operands->len == 1);
assert(operands->children[0]->id == NODE_REGISTER);
register_id_t id = operands->children[0]->value.reg.id;
uint8_t modrm = modrm_mod_register;
if (opcode->opcode_extension != opcode_extension_none) {
// register goes in rm field, extension goes in mod field
modrm = modrm_extension(modrm, opcode->opcode_extension);
modrm = modrm_rm(modrm, id);
*rex = modrm_rm_rex(*rex, id);
} else {
// register goes in reg field
// NOTE:
// it's actually likely this case just doesn't exist at all and all
// opcodes that take one register in modr/m _all_ have extended opcdes
modrm = modrm_reg(modrm, id);
*rex = modrm_reg_rex(*rex, id);
}
return bytes_append_uint8(encoding, modrm);
}
error_t *encode_one_immediate(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
(void)opcode;
(void)rex;
assert(operands->len == 1);
assert(operands->children[0]->id == NODE_IMMEDIATE);
assert(operands->children[0]->len == 1);
ast_node_t *immediate = operands->children[0]->children[0];
assert(immediate->id == NODE_NUMBER ||
immediate->id == NODE_LABEL_REFERENCE);
if (immediate->id == NODE_NUMBER) {
uint64_t value = immediate->value.number.value;
operand_size_t size = opcode->operands[0].size;
error_t *err = nullptr;
switch (size) {
case OPERAND_SIZE_8:
err = bytes_append_uint8(encoding, value);
break;
case OPERAND_SIZE_16:
err = bytes_append_uint16(encoding, value);
break;
case OPERAND_SIZE_32:
err = bytes_append_uint32(encoding, value);
break;
case OPERAND_SIZE_64:
err = bytes_append_uint64(encoding, value);
break;
default:
assert(false && "intentionally unhandled");
}
return err;
} else {
// FIXME: this still assumes references are always 32 bit
uint32_t value = 0xDEADBEEF;
return bytes_append_uint32(encoding, value);
}
}
error_t *encode_one_memory(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
(void)opcode;
(void)operands;
(void)encoding;
(void)rex;
return err_encoder_not_implemented;
}
error_t *encode_one_operand(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
switch (opcode->operands[0].kind) {
case OPERAND_REGISTER:
if (opcode->encoding_class == ENCODING_OPCODE_REGISTER)
return encode_one_register_in_opcode(encoder, opcode, operands,
encoding, rex);
else
return encode_one_register(encoder, opcode, operands, encoding,
rex);
case OPERAND_MEMORY:
return encode_one_memory(encoder, opcode, operands, encoding, rex);
case OPERAND_IMMEDIATE:
return encode_one_immediate(encoder, opcode, operands, encoding, rex);
}
}
error_t *encoder_encode_instruction(encoder_t *encoder,
ast_node_t *instruction) {
ast_node_t *operands = instruction->children[1];
opcode_data_t *opcode = nullptr;
error_t *err = encoder_get_opcode_data(instruction, operands, &opcode);
if (err)
return err;
uint8_t rex = 0;
bytes_t *encoding = LOCAL_BYTES(32);
if (opcode->opcode > 0xFF &&
(err = bytes_append_uint8(encoding, opcode->opcode >> 8)))
return err;
if ((err = bytes_append_uint8(encoding, opcode->opcode & 0xFF)))
return err;
// NOTE:operand encoders all expect the opcode to be in the buffer already.
// Some of them rely on this to encode the register value in the opcode
// byte.
switch (opcode->operand_count) {
case 0:
break;
case 1:
err = encode_one_operand(encoder, opcode, operands, encoding, &rex);
break;
case 2:
err = encode_two_operand(encoder, opcode, operands, encoding, &rex);
break;
default:
err = err_encoder_not_implemented;
}
if (err)
return err;
// produce the actual encoding output in the NODE_INSTRUCTION value
uint8_t *output = instruction->value.encoding.encoding;
size_t output_len = 0;
// Handle prefixes
if (opcode->rex_w_prefix)
rex = rex_prefix_w;
if (opcode->address_size_prefix)
output[output_len++] = memory_size_prefix;
if (opcode->operand_size_prefix)
output[output_len++] = operand_size_prefix;
if (rex > 0)
output[output_len++] = rex;
// copy the encoded opcode and operands
if (encoding->len > 20)
return err_encoder_unexpected_length;
memcpy(output + output_len, encoding->buffer, encoding->len);
output_len += encoding->len;
instruction->value.encoding.len = output_len;
return nullptr;
}
/**
* Perform the second pass that performs actual encoding. Will use
* placeholder values for label references because instruction size has not
* yet been determined.
*/
error_t *encoder_encoding_pass(encoder_t *encoder, ast_node_t *root) {
for (size_t i = 0; i < root->len; ++i) {
if (root->children[i]->id != NODE_INSTRUCTION)
continue;
ast_node_t *instruction = root->children[i];
error_t *err = encoder_encode_instruction(encoder, instruction);
if (err)
return err;
}
return nullptr;
}
opcode_data_t *encoder_find_opcode(ast_node_t *instruction) {
for (size_t i = 0; opcodes[i] != nullptr; ++i) {
const char *mnemonic =
instruction->children[0]->token_entry->token.value;
ast_node_t *operands = instruction->children[1];
if (is_opcode_match(opcodes[i], mnemonic, operands))
return opcodes[i];
}
return nullptr;
}
error_t *encoder_check_symbols(encoder_t *encoder) {
for (size_t i = 0; i < encoder->symbols->len; ++i)
if (encoder->symbols->symbols[i].kind == SYMBOL_REFERENCE)
return err_encoder_unknown_symbol_reference;
return nullptr;
}
error_t *encoder_encode(encoder_t *encoder, ast_node_t *ast) {
error_t *err = encoder_first_pass(encoder, ast);
if (err)
return err;
err = encoder_check_symbols(encoder);
if (err)
return err;
return encoder_encoding_pass(encoder, ast);
}

32
src/encoder/encoder.h Normal file
View File

@ -0,0 +1,32 @@
#ifndef INCLUDE_ENCODER_ENCODER_H_
#define INCLUDE_ENCODER_ENCODER_H_
#include "symbols.h"
typedef struct encoder {
symbol_table_t *symbols;
} encoder_t;
constexpr uint8_t modrm_mod_memory = 0b00'000'000;
constexpr uint8_t modrm_mod_memory_displacement8 = 0b01'000'000;
constexpr uint8_t modrm_mod_memory_displacement32 = 0b10'000'000;
constexpr uint8_t modrm_mod_register = 0b11'000'000;
constexpr uint8_t modrm_reg_mask = 0b00'111'000;
constexpr uint8_t modrm_rm_mask = 0b00'000'111;
constexpr uint8_t modrm_mod_mask = 0b11'000'000;
error_t *encoder_alloc(encoder_t **encoder);
error_t *encoder_encode(encoder_t *encoder, ast_node_t *ast);
void encoder_free(encoder_t *encoder);
extern error_t *const err_encoder_invalid_register;
extern error_t *const err_encoder_number_overflow;
extern error_t *const err_encoder_invalid_number_format;
extern error_t *const err_encoder_invalid_size_suffix;
extern error_t *const err_encoder_unknown_symbol_reference;
extern error_t *const err_encoder_no_encoding_found;
extern error_t *const err_encoder_not_implemented;
extern error_t *const err_encoder_unexpected_length;
#endif // INCLUDE_ENCODER_ENCODER_H_

159
src/encoder/symbols.c Normal file
View File

@ -0,0 +1,159 @@
#include "symbols.h"
#include "../error.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
constexpr size_t symbol_table_default_cap = 64;
constexpr size_t symbol_table_max_cap = 1 << 16;
error_t *const err_symbol_table_invalid_node = &(error_t){
.message = "Unexpected node id when adding symbol to symbol table"};
error_t *const err_symbol_table_max_cap = &(error_t){
.message = "Failed to increase symbol table length, max capacity reached"};
error_t *const err_symbol_table_incompatible_symbols =
&(error_t){.message = "Failed to update symbol with incompatible kind"};
error_t *symbol_table_alloc(symbol_table_t **output) {
*output = nullptr;
symbol_table_t *table = calloc(1, sizeof(symbol_table_t));
if (table == nullptr)
return err_allocation_failed;
table->symbols = calloc(symbol_table_default_cap, sizeof(symbol_t));
if (table->symbols == nullptr) {
free(table);
return err_allocation_failed;
}
table->cap = symbol_table_default_cap;
table->len = 0;
*output = table;
return nullptr;
}
void symbol_table_free(symbol_table_t *table) {
free(table->symbols);
free(table);
}
error_t *symbol_table_grow_cap(symbol_table_t *table) {
if (table->cap >= symbol_table_max_cap)
return err_symbol_table_max_cap;
size_t new_cap = table->cap * 2;
symbol_t *new_symbols = realloc(table->symbols, new_cap * sizeof(symbol_t));
if (new_symbols == nullptr)
return err_allocation_failed;
table->symbols = new_symbols;
table->cap = new_cap;
return nullptr;
}
error_t *symbol_table_get_node_info(ast_node_t *node, symbol_kind_t *kind,
char **name) {
switch (node->id) {
case NODE_LABEL:
*kind = SYMBOL_LOCAL;
*name = node->children[0]->token_entry->token.value;
return nullptr;
case NODE_LABEL_REFERENCE:
*kind = SYMBOL_REFERENCE;
*name = node->token_entry->token.value;
return nullptr;
case NODE_IMPORT_DIRECTIVE:
*kind = SYMBOL_IMPORT;
*name = node->children[1]->token_entry->token.value;
return nullptr;
case NODE_EXPORT_DIRECTIVE:
*kind = SYMBOL_EXPORT;
*name = node->children[1]->token_entry->token.value;
return nullptr;
default:
return err_symbol_table_invalid_node;
}
__builtin_unreachable();
}
/*
old \ new | REFERENCE | LOCAL | IMPORT | EXPORT |
-------------|-----------|----------|----------|----------|
REFERENCE | | replace | replace | replace |
-------------|-----------|----------|----------|----------|
LOCAL | | | ERR | replace |
-------------|-----------|----------|----------|----------|
IMPORT | | | | ERR |
-------------|-----------|----------|----------|----------|
EXPORT | | | ERR | |
-------------|-----------|----------|----------|----------|
*/
bool symbol_table_should_update(symbol_kind_t old, symbol_kind_t new) {
if (old == SYMBOL_REFERENCE)
return new != SYMBOL_REFERENCE;
if (old == SYMBOL_LOCAL)
return new == SYMBOL_EXPORT;
return false;
}
bool symbol_table_should_error(symbol_kind_t old, symbol_kind_t new) {
if (new == SYMBOL_IMPORT)
return old == SYMBOL_LOCAL || old == SYMBOL_EXPORT;
if (new == SYMBOL_EXPORT)
return old == SYMBOL_IMPORT;
return false;
}
/**
* @pre The symbol _must not_ already be in the table.
*/
error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
ast_node_t *node) {
if (table->len >= table->cap) {
error_t *err = symbol_table_grow_cap(table);
if (err)
return err;
}
table->symbols[table->len] = (symbol_t){
.name = name,
.kind = kind,
.node = node,
};
table->len += 1;
return nullptr;
}
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node) {
char *name;
symbol_kind_t kind;
error_t *err = symbol_table_get_node_info(node, &kind, &name);
if (err)
return err;
symbol_t *symbol = symbol_table_lookup(table, name);
if (!symbol)
return symbol_table_add(table, name, kind, node);
if (symbol_table_should_error(symbol->kind, kind))
return err_symbol_table_incompatible_symbols;
if (symbol_table_should_update(symbol->kind, kind)) {
symbol->name = name;
symbol->kind = kind;
symbol->node = node;
}
return nullptr;
}
symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name) {
for (size_t i = 0; i < table->len; ++i) {
if (strcmp(table->symbols[i].name, name) == 0)
return &table->symbols[i];
}
return nullptr;
}

46
src/encoder/symbols.h Normal file
View File

@ -0,0 +1,46 @@
#ifndef INCLUDE_ENCODER_SYMBOLS_H_
#define INCLUDE_ENCODER_SYMBOLS_H_
#include "../ast.h"
extern error_t *const err_symbol_table_invalid_node;
extern error_t *const err_symbol_table_max_cap;
extern error_t *const err_symbol_table_incompatible_symbols;
typedef enum symbol_kind {
SYMBOL_REFERENCE,
SYMBOL_LOCAL,
SYMBOL_EXPORT,
SYMBOL_IMPORT,
} symbol_kind_t;
/**
* Represent a symbol in the program
*
* Symbols with the same name can only be in the table once. IMPORT or EXPORT
* symbols take precedence over REFERENCE symbols. If any reference symbols
* remain after the first encoding pass this indicates an error. Trying to add
* an IMPORT or EXPORT symbol if the same name already exists as the other kind
* is an error.
*
* This symbol table never taken ownership of the name string, it's lifted
* straight from the node->token.value.
*/
typedef struct symbol {
char *name;
symbol_kind_t kind;
ast_node_t *node;
} symbol_t;
typedef struct symbol_table {
size_t cap;
size_t len;
symbol_t *symbols;
} symbol_table_t;
error_t *symbol_table_alloc(symbol_table_t **table);
void symbol_table_free(symbol_table_t *table);
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node);
symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name);
#endif // INCLUDE_ENCODER_SYMBOLS_H_

View File

@ -1,3 +1,5 @@
#include "ast.h"
#include "encoder/encoder.h"
#include "error.h"
#include "lexer.h"
#include "parser/parser.h"
@ -8,7 +10,13 @@
#include <stdlib.h>
#include <string.h>
typedef enum mode { MODE_AST, MODE_TEXT, MODE_TOKENS } mode_t;
typedef enum mode {
MODE_INVALID = -1,
MODE_AST,
MODE_TEXT,
MODE_TOKENS,
MODE_ENCODING,
} mode_t;
void print_tokens(tokenlist_t *list) {
for (auto entry = list->head; entry; entry = entry->next) {
@ -32,39 +40,105 @@ void print_text(tokenlist_t *list) {
}
}
void print_ast(tokenlist_t *list) {
error_t *print_ast(tokenlist_t *list) {
parse_result_t result = parse(list->head);
if (result.err) {
puts(result.err->message);
error_free(result.err);
return;
}
if (result.err)
return result.err;
ast_node_print(result.node);
if (result.next != nullptr) {
puts("First unparsed token:");
lexer_token_print(&result.next->token);
}
ast_node_free(result.node);
if (result.next != nullptr) {
return errorf("did not parse entire input token stream");
}
return nullptr;
}
void print_hex(size_t len, uint8_t bytes[static len]) {
for (size_t i = 0; i < len; i++) {
printf("%02x", bytes[i]);
if (i < len - 1) {
printf(" ");
}
}
printf("\n");
}
error_t *print_encoding(tokenlist_t *list) {
parse_result_t result = parse(list->head);
if (result.err)
return result.err;
encoder_t *encoder;
error_t *err = encoder_alloc(&encoder);
if (err)
goto cleanup_ast;
err = encoder_encode(encoder, result.node);
if (err)
goto cleanup_ast;
ast_node_t *root = result.node;
for (size_t i = 0; i < root->len; ++i) {
ast_node_t *node = root->children[i];
if (node->id != NODE_INSTRUCTION)
continue;
print_hex(node->value.encoding.len, node->value.encoding.encoding);
}
encoder_free(encoder);
ast_node_free(result.node);
return nullptr;
cleanup_ast:
ast_node_free(result.node);
return err;
}
int get_execution_mode(int argc, char *argv[]) {
if (argc != 3 || (strcmp(argv[1], "tokens") != 0 &&
strcmp(argv[1], "text") != 0 && strcmp(argv[1], "ast"))) {
puts("Usage: oas [tokens|text|ast] <filename>");
exit(1);
}
if (argc != 3)
return MODE_INVALID;
if (strcmp(argv[1], "tokens") == 0)
return MODE_TOKENS;
if (strcmp(argv[1], "text") == 0)
return MODE_TEXT;
if (strcmp(argv[1], "ast") == 0)
return MODE_AST;
if (strcmp(argv[1], "encoding") == 0)
return MODE_ENCODING;
return MODE_INVALID;
}
error_t *do_action(mode_t mode, tokenlist_t *list) {
switch (mode) {
case MODE_TOKENS:
print_tokens(list);
return nullptr;
case MODE_TEXT:
print_text(list);
return nullptr;
case MODE_AST:
return print_ast(list);
case MODE_ENCODING:
return print_encoding(list);
case MODE_INVALID:
/* can't happen */
}
__builtin_unreachable();
}
int main(int argc, char *argv[]) {
mode_t mode = get_execution_mode(argc, argv);
if (mode == MODE_INVALID) {
puts("Usage: oas [tokens|text|ast|encoding] <filename>");
exit(1);
}
char *filename = argv[2];
lexer_t *lex = &(lexer_t){};
@ -81,17 +155,9 @@ int main(int argc, char *argv[]) {
if (err)
goto cleanup_tokens;
switch (mode) {
case MODE_TOKENS:
print_tokens(list);
break;
case MODE_TEXT:
print_text(list);
break;
case MODE_AST:
print_ast(list);
break;
}
err = do_action(mode, list);
if (err)
goto cleanup_tokens;
tokenlist_free(list);
error_free(err);

View File

@ -1,4 +1,5 @@
#include "combinators.h"
#include "util.h"
// Parse a list of the given parser delimited by the given token id. Does not
// store the delimiters in the parent node
@ -122,5 +123,12 @@ parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
}
current = result.next;
}
// token stream ended before we matched all parsers
if (parser != nullptr) {
ast_node_free(all);
return parse_no_match();
}
return parse_success(all, current);
}

View File

@ -83,7 +83,7 @@ parse_result_t parse_register_expression(tokenlist_entry_t *current) {
}
parse_result_t parse_immediate(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_number, parse_identifier, nullptr};
parser_t parsers[] = {parse_number, parse_label_reference, nullptr};
parse_result_t result = parse_any(current, parsers);
return parse_result_wrap(NODE_IMMEDIATE, result);
}
@ -119,23 +119,45 @@ parse_result_t parse_section_directive(tokenlist_entry_t *current) {
return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers);
}
parse_result_t parse_import_directive(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_import, parse_identifier, nullptr};
return parse_consecutive(current, NODE_IMPORT_DIRECTIVE, parsers);
}
parse_result_t parse_export_directive(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_export, parse_identifier, nullptr};
return parse_consecutive(current, NODE_EXPORT_DIRECTIVE, parsers);
}
parse_result_t parse_directive_options(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_section_directive, parse_import_directive,
parse_export_directive, nullptr};
return parse_any(current, parsers);
}
parse_result_t parse_directive(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_dot, parse_section_directive, nullptr};
parser_t parsers[] = {parse_dot, parse_directive_options, parse_newline,
nullptr};
return parse_consecutive(current, NODE_DIRECTIVE, parsers);
}
parse_result_t parse_instruction(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_identifier, parse_operands, nullptr};
parser_t parsers[] = {parse_identifier, parse_operands, parse_newline,
nullptr};
return parse_consecutive(current, NODE_INSTRUCTION, parsers);
}
parse_result_t parse_statement(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
nullptr};
parse_newline, nullptr};
return parse_any(current, parsers);
}
parse_result_t parse(tokenlist_entry_t *current) {
current = tokenlist_skip_trivia(current);
return parse_many(current, NODE_PROGRAM, true, parse_statement);
parse_result_t result =
parse_many(current, NODE_PROGRAM, true, parse_statement);
if (result.node != nullptr)
ast_node_prune(result.node, NODE_NEWLINE);
return result;
}

View File

@ -1,5 +1,6 @@
#include "primitives.h"
#include "../ast.h"
#include "../data/registers.h"
#include <string.h>
parse_result_t parse_identifier(tokenlist_entry_t *current) {
@ -62,28 +63,18 @@ parse_result_t parse_dot(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
}
parse_result_t parse_newline(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_NEWLINE, NODE_NEWLINE, nullptr);
}
parse_result_t parse_label_reference(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
nullptr);
}
const char *registers[] = {
// 64-bit registers
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15",
// 32-bit registers
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d",
"r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
// 16-bit registers
"ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w",
"r11w", "r12w", "r13w", "r14w", "r15w",
// 8-bit low registers
"al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b",
"r11b", "r12b", "r13b", "r14b", "r15b", nullptr};
bool is_register_token(lexer_token_t *token) {
for (size_t i = 0; registers[i] != nullptr; ++i)
if (strcmp(token->value, registers[i]) == 0)
if (strcmp(token->value, registers[i]->name) == 0)
return true;
return false;
}
@ -101,3 +92,19 @@ parse_result_t parse_section(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_SECTION,
is_section_token);
}
bool is_import_token(lexer_token_t *token) {
return strcmp(token->value, "import") == 0;
}
parse_result_t parse_import(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_IMPORT, is_import_token);
}
bool is_export_token(lexer_token_t *token) {
return strcmp(token->value, "export") == 0;
}
parse_result_t parse_export(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_EXPORT, is_export_token);
}

View File

@ -18,6 +18,7 @@ parse_result_t parse_plus(tokenlist_entry_t *current);
parse_result_t parse_minus(tokenlist_entry_t *current);
parse_result_t parse_asterisk(tokenlist_entry_t *current);
parse_result_t parse_dot(tokenlist_entry_t *current);
parse_result_t parse_newline(tokenlist_entry_t *current);
parse_result_t parse_label_reference(tokenlist_entry_t *current);
/* These are "primitives" with a different name and some extra validation on top
@ -26,5 +27,7 @@ parse_result_t parse_label_reference(tokenlist_entry_t *current);
*/
parse_result_t parse_register(tokenlist_entry_t *current);
parse_result_t parse_section(tokenlist_entry_t *current);
parse_result_t parse_import(tokenlist_entry_t *current);
parse_result_t parse_export(tokenlist_entry_t *current);
#endif // INCLUDE_PARSER_PRIMITIVES_H_

View File

@ -86,7 +86,6 @@ bool is_trivia(tokenlist_entry_t *trivia) {
switch (trivia->token.id) {
case TOKEN_WHITESPACE:
case TOKEN_COMMENT:
case TOKEN_NEWLINE:
return true;
default:
return false;

164
tests/bytes.c Normal file
View File

@ -0,0 +1,164 @@
#include "../src/bytes.h"
#include "munit.h"
MunitResult test_bytes_initializer(const MunitParameter params[], void *data) {
(void)params;
(void)data;
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
for (size_t i = 0; i < 16; ++i)
munit_assert_uint8(bytes->buffer[i], ==, 0);
return MUNIT_OK;
}
MunitResult test_bytes_append_uint8(const MunitParameter params[], void *data) {
(void)params;
(void)data;
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
for (size_t i = 0; i < 16; ++i) {
error_t *err = bytes_append_uint8(bytes, (uint8_t)i);
munit_assert_null(err);
munit_assert_uint8(bytes->buffer[i], ==, (uint8_t)i);
}
error_t *err = bytes_append_uint8(bytes, 0xFF);
munit_assert_ptr(err, ==, err_bytes_no_capacity);
return MUNIT_OK;
}
MunitResult test_bytes_append_array(const MunitParameter params[], void *data) {
(void)params;
(void)data;
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
uint8_t test_array[] = {0x01, 0x02, 0x03, 0x04, 0x05};
size_t array_len = sizeof(test_array) / sizeof(test_array[0]);
error_t *err = bytes_append_array(bytes, array_len, test_array);
munit_assert_null(err);
munit_assert_size(bytes->len, ==, array_len);
for (size_t i = 0; i < array_len; ++i) {
munit_assert_uint8(bytes->buffer[i], ==, test_array[i]);
}
uint8_t second_array[] = {0x06, 0x07, 0x08};
size_t second_len = sizeof(second_array) / sizeof(second_array[0]);
err = bytes_append_array(bytes, second_len, second_array);
munit_assert_null(err);
munit_assert_size(bytes->len, ==, array_len + second_len);
for (size_t i = 0; i < second_len; ++i) {
munit_assert_uint8(bytes->buffer[array_len + i], ==, second_array[i]);
}
uint8_t overflow_array[10] = {0}; // Array that would exceed capacity
err = bytes_append_array(bytes, sizeof(overflow_array), overflow_array);
munit_assert_ptr(err, ==, err_bytes_no_capacity);
munit_assert_size(bytes->len, ==, array_len + second_len);
return MUNIT_OK;
}
MunitResult test_bytes_append_bytes(const MunitParameter params[], void *data) {
(void)params;
(void)data;
bytes_t *src = LOCAL_BYTES(8);
bytes_t *dst = LOCAL_BYTES(16);
// Fill source bytes with test data
for (uint8_t i = 0; i < 5; ++i) {
error_t *err = bytes_append_uint8(src, i + 1);
munit_assert_null(err);
}
munit_assert_size(src->len, ==, 5);
// Append source to destination
error_t *err = bytes_append_bytes(dst, src);
munit_assert_null(err);
munit_assert_size(dst->len, ==, src->len);
// Verify destination contents match source
for (size_t i = 0; i < src->len; ++i) {
munit_assert_uint8(dst->buffer[i], ==, src->buffer[i]);
}
// Fill source with more data and append again
for (uint8_t i = 0; i < 3; ++i) {
err = bytes_append_uint8(src, i + 6);
munit_assert_null(err);
}
munit_assert_size(src->len, ==, 8);
// Append updated source
err = bytes_append_bytes(dst, src);
munit_assert_null(err);
munit_assert_size(dst->len, ==, 13); // 5 + 8
// Test capacity boundary
src->len = 4; // manually set length to barely not fit
err = bytes_append_bytes(dst, src);
munit_assert_ptr(err, ==, err_bytes_no_capacity);
munit_assert_size(dst->len, ==, 13); // Length unchanged after error
return MUNIT_OK;
}
MunitResult test_bytes_append_uint16(const MunitParameter params[], void *data) {
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
bytes_append_uint16(bytes, 0xFFAA);
munit_assert_size(bytes->len, ==, 2);
munit_assert_uint8(bytes->buffer[0], ==, 0xAA);
munit_assert_uint8(bytes->buffer[1], ==, 0xFF);
return MUNIT_OK;
}
MunitResult test_bytes_append_uint32(const MunitParameter params[], void *data) {
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
bytes_append_uint32(bytes, 0xAABBCCDD);
munit_assert_size(bytes->len, ==, 4);
munit_assert_uint8(bytes->buffer[0], ==, 0xDD);
munit_assert_uint8(bytes->buffer[1], ==, 0xCC);
munit_assert_uint8(bytes->buffer[2], ==, 0xBB);
munit_assert_uint8(bytes->buffer[3], ==, 0xAA);
return MUNIT_OK;
}
MunitResult test_bytes_append_uint64(const MunitParameter params[], void *data) {
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
bytes_append_uint64(bytes, 0xAABBCCDDEEFF9988);
munit_assert_size(bytes->len, ==, 8);
munit_assert_uint8(bytes->buffer[0], ==, 0x88);
munit_assert_uint8(bytes->buffer[1], ==, 0x99);
munit_assert_uint8(bytes->buffer[2], ==, 0xFF);
munit_assert_uint8(bytes->buffer[3], ==, 0xEE);
munit_assert_uint8(bytes->buffer[4], ==, 0xDD);
munit_assert_uint8(bytes->buffer[5], ==, 0xCC);
munit_assert_uint8(bytes->buffer[6], ==, 0xBB);
munit_assert_uint8(bytes->buffer[7], ==, 0xAA);
return MUNIT_OK;
}
MunitTest bytes_tests[] = {
{"/initializer", test_bytes_initializer, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_uint8", test_bytes_append_uint8, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_array", test_bytes_append_array, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_bytes", test_bytes_append_bytes, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_uint16", test_bytes_append_uint16, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_uint32", test_bytes_append_uint32, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_uint64", test_bytes_append_uint64, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
};

View File

@ -0,0 +1,65 @@
lbl_0: ; 65 symbols used for testing growing the symbols table
lbl_1:
lbl_2:
lbl_3:
lbl_4:
lbl_5:
lbl_6:
lbl_7:
lbl_8:
lbl_9:
lbl_10:
lbl_11:
lbl_12:
lbl_13:
lbl_14:
lbl_15:
lbl_16:
lbl_17:
lbl_18:
lbl_19:
lbl_20:
lbl_21:
lbl_22:
lbl_23:
lbl_24:
lbl_25:
lbl_26:
lbl_27:
lbl_28:
lbl_29:
lbl_30:
lbl_31:
lbl_32:
lbl_33:
lbl_34:
lbl_35:
lbl_36:
lbl_37:
lbl_38:
lbl_39:
lbl_40:
lbl_41:
lbl_42:
lbl_43:
lbl_44:
lbl_45:
lbl_46:
lbl_47:
lbl_48:
lbl_49:
lbl_50:
lbl_51:
lbl_52:
lbl_53:
lbl_54:
lbl_55:
lbl_56:
lbl_57:
lbl_58:
lbl_59:
lbl_60:
lbl_61:
lbl_62:
lbl_63:
lbl_64:

View File

@ -0,0 +1,5 @@
; regression test for two issues:
; - parsing two zero operand instructions in a row
; - a zero operand instruction just before eof
syscall
ret

12
tests/input/symbols.asm Normal file
View File

@ -0,0 +1,12 @@
.import test
.export test
test:
call test
.import more
.export more
more:
call more
.import other
.export other
other:
call other

View File

@ -2,6 +2,9 @@
; Small valid code snippet that should contain all different AST nodes
.export _start
.import exit
_start:
mov eax, ebx
lea eax, [eax + ebx * 4 + 8]
@ -19,3 +22,5 @@ _start:
push 0xffff:64
push 0o777:16
push 0b0001:16
mov rax, 0
call exit

View File

@ -3,12 +3,16 @@
extern MunitTest ast_tests[];
extern MunitTest lexer_tests[];
extern MunitTest regression_tests[];
extern MunitTest symbols_tests[];
extern MunitTest bytes_tests[];
int main(int argc, char *argv[MUNIT_ARRAY_PARAM(argc + 1)]) {
MunitSuite suites[] = {
{"/regression", regression_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{"/ast", ast_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{"/lexer", lexer_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{"/symbols", symbols_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{"/bytes", bytes_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{nullptr, nullptr, nullptr, 0, MUNIT_SUITE_OPTION_NONE},
};

View File

@ -23,9 +23,46 @@ MunitResult test_regression_trivia_head(const MunitParameter params[], void *dat
ast_node_free(result.node);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_no_operands_eof(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_t *lex = &(lexer_t){};
error_t *err = lexer_open(lex, "tests/input/regression/test_no_operands_eof.asm");
munit_assert_null(err);
tokenlist_t *list;
err = tokenlist_alloc(&list);
munit_assert_null(err);
err = tokenlist_fill(list, lex);
munit_assert_null(err);
parse_result_t result = parse(list->head);
munit_assert_null(result.err);
munit_assert_null(result.next);
// Both children should be instructions
munit_assert_size(result.node->len, ==, 2);
munit_assert_int(result.node->children[0]->id, ==, NODE_INSTRUCTION);
munit_assert_int(result.node->children[1]->id, ==, NODE_INSTRUCTION);
// And they should have empty operands
munit_assert_size(result.node->children[0]->len, ==, 2);
munit_assert_size(result.node->children[1]->len, ==, 2);
munit_assert_size(result.node->children[0]->children[1]->len, ==, 0);
munit_assert_size(result.node->children[1]->children[1]->len, ==, 0);
ast_node_free(result.node);
tokenlist_free(list);
return MUNIT_OK;
}
MunitTest regression_tests[] = {
{"/trivia_head", test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/no_operands_eof", test_no_operands_eof, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
};

351
tests/symbols.c Normal file
View File

@ -0,0 +1,351 @@
#include "../src/encoder/symbols.h"
#include "../src/ast.h"
#include "../src/error.h"
#include "../src/lexer.h"
#include "../src/parser/parser.h"
#include "munit.h"
#include <string.h>
void symbols_setup_test(ast_node_t **node, tokenlist_t **list, char *path) {
lexer_t *lex = &(lexer_t){};
lexer_open(lex, path);
tokenlist_alloc(list);
tokenlist_fill(*list, lex);
parse_result_t result = parse((*list)->head);
lexer_close(lex);
*node = result.node;
}
MunitResult test_symbol_table_alloc(const MunitParameter params[], void *data) {
(void)params;
(void)data;
symbol_table_t *table = nullptr;
error_t *err = symbol_table_alloc(&table);
munit_assert_ptr_not_null(table);
munit_assert_ptr_null(err);
munit_assert_size(table->cap, ==, 64); // Default capacity
munit_assert_size(table->len, ==, 0);
munit_assert_ptr_not_null(table->symbols);
symbol_table_free(table);
return MUNIT_OK;
}
MunitResult test_symbol_table_lookup_empty(const MunitParameter params[], void *data) {
(void)params;
(void)data;
symbol_table_t *table = nullptr;
symbol_table_alloc(&table);
symbol_t *symbol = symbol_table_lookup(table, "nonexistent");
munit_assert_ptr_null(symbol);
symbol_table_free(table);
return MUNIT_OK;
}
MunitResult test_symbol_add_reference(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *root;
tokenlist_t *list;
symbol_table_t *table = nullptr;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
symbol_table_alloc(&table);
ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
munit_assert_int(reference->id, ==, NODE_LABEL_REFERENCE);
munit_assert_size(table->len, ==, 0);
error_t *err = symbol_table_update(table, reference);
munit_assert_null(err);
munit_assert_size(table->len, ==, 1);
symbol_t *symbol = symbol_table_lookup(table, "test");
munit_assert_not_null(symbol);
munit_assert_int(SYMBOL_REFERENCE, ==, symbol->kind);
munit_assert_ptr_equal(reference, symbol->node);
munit_assert_string_equal(symbol->name, "test");
symbol_table_free(table);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_symbol_add_label(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *root;
tokenlist_t *list;
symbol_table_t *table = nullptr;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
symbol_table_alloc(&table);
ast_node_t *label = root->children[2];
munit_assert_int(label->id, ==, NODE_LABEL);
munit_assert_size(table->len, ==, 0);
error_t *err = symbol_table_update(table, label);
munit_assert_null(err);
munit_assert_size(table->len, ==, 1);
symbol_t *symbol = symbol_table_lookup(table, "test");
munit_assert_not_null(symbol);
munit_assert_int(SYMBOL_LOCAL, ==, symbol->kind);
munit_assert_ptr_equal(label, symbol->node);
munit_assert_string_equal(symbol->name, "test");
symbol_table_free(table);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_symbol_add_import(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *root;
tokenlist_t *list;
symbol_table_t *table = nullptr;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
symbol_table_alloc(&table);
ast_node_t *import_directive = root->children[0]->children[1];
munit_assert_int(import_directive->id, ==, NODE_IMPORT_DIRECTIVE);
munit_assert_size(table->len, ==, 0);
error_t *err = symbol_table_update(table, import_directive);
munit_assert_null(err);
munit_assert_size(table->len, ==, 1);
symbol_t *symbol = symbol_table_lookup(table, "test");
munit_assert_not_null(symbol);
munit_assert_int(SYMBOL_IMPORT, ==, symbol->kind);
munit_assert_ptr_equal(import_directive, symbol->node);
munit_assert_string_equal(symbol->name, "test");
symbol_table_free(table);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
void test_symbol_update(const char *name, ast_node_t *first, symbol_kind_t first_kind, ast_node_t *second,
symbol_kind_t second_kind, bool should_succeed, bool should_update) {
symbol_table_t *table = nullptr;
symbol_table_alloc(&table);
munit_assert_size(table->len, ==, 0);
error_t *err = symbol_table_update(table, first);
munit_assert_null(err);
munit_assert_size(table->len, ==, 1);
symbol_t *symbol = symbol_table_lookup(table, name);
munit_assert_not_null(symbol);
munit_assert_int(first_kind, ==, symbol->kind);
munit_assert_ptr_equal(first, symbol->node);
munit_assert_string_equal(symbol->name, name);
err = symbol_table_update(table, second);
if (should_succeed)
munit_assert_null(err);
else
munit_assert_ptr_equal(err, err_symbol_table_incompatible_symbols);
munit_assert_size(table->len, ==, 1);
symbol = symbol_table_lookup(table, name);
if (should_update) {
munit_assert_not_null(symbol);
munit_assert_int(second_kind, ==, symbol->kind);
munit_assert_ptr_equal(second, symbol->node);
munit_assert_string_equal(symbol->name, name);
} else {
munit_assert_not_null(symbol);
munit_assert_int(first_kind, ==, symbol->kind);
munit_assert_ptr_equal(first, symbol->node);
munit_assert_string_equal(symbol->name, name);
}
symbol_table_free(table);
}
MunitResult test_symbol_upgrade_valid(const MunitParameter params[], void *data) {
ast_node_t *root;
tokenlist_t *list;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
ast_node_t *label = root->children[2];
ast_node_t *import_directive = root->children[0]->children[1];
ast_node_t *export_directive = root->children[1]->children[1];
// real upgrades
test_symbol_update("test", reference, SYMBOL_REFERENCE, label, SYMBOL_LOCAL, true, true);
test_symbol_update("test", reference, SYMBOL_REFERENCE, import_directive, SYMBOL_IMPORT, true, true);
test_symbol_update("test", reference, SYMBOL_REFERENCE, export_directive, SYMBOL_EXPORT, true, true);
test_symbol_update("test", label, SYMBOL_LOCAL, export_directive, SYMBOL_EXPORT, true, true);
// identity upgrades
test_symbol_update("test", reference, SYMBOL_REFERENCE, reference, SYMBOL_REFERENCE, true, false);
test_symbol_update("test", label, SYMBOL_LOCAL, label, SYMBOL_LOCAL, true, false);
test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_directive, SYMBOL_IMPORT, true, false);
test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_directive, SYMBOL_EXPORT, true, false);
// downgrades that are allowed and ignored
test_symbol_update("test", label, SYMBOL_LOCAL, reference, SYMBOL_REFERENCE, true, false);
test_symbol_update("test", import_directive, SYMBOL_IMPORT, reference, SYMBOL_REFERENCE, true, false);
test_symbol_update("test", export_directive, SYMBOL_EXPORT, reference, SYMBOL_REFERENCE, true, false);
test_symbol_update("test", export_directive, SYMBOL_EXPORT, label, SYMBOL_LOCAL, true, false);
test_symbol_update("test", import_directive, SYMBOL_IMPORT, label, SYMBOL_LOCAL, true, false);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_symbol_upgrade_invalid(const MunitParameter params[], void *data) {
ast_node_t *root;
tokenlist_t *list;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
ast_node_t *label = root->children[2];
ast_node_t *import_directive = root->children[0]->children[1];
ast_node_t *export_directive = root->children[1]->children[1];
// invalid upgrades
test_symbol_update("test", label, SYMBOL_LOCAL, import_directive, SYMBOL_IMPORT, false, false);
test_symbol_update("test", export_directive, SYMBOL_EXPORT, import_directive, SYMBOL_IMPORT, false, false);
test_symbol_update("test", import_directive, SYMBOL_IMPORT, export_directive, SYMBOL_EXPORT, false, false);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_symbol_add_export(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *root;
tokenlist_t *list;
symbol_table_t *table = nullptr;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
symbol_table_alloc(&table);
ast_node_t *export_directive = root->children[1]->children[1];
munit_assert_int(export_directive->id, ==, NODE_EXPORT_DIRECTIVE);
munit_assert_size(table->len, ==, 0);
error_t *err = symbol_table_update(table, export_directive);
munit_assert_null(err);
munit_assert_size(table->len, ==, 1);
symbol_t *symbol = symbol_table_lookup(table, "test");
munit_assert_not_null(symbol);
munit_assert_int(SYMBOL_EXPORT, ==, symbol->kind);
munit_assert_ptr_equal(export_directive, symbol->node);
munit_assert_string_equal(symbol->name, "test");
symbol_table_free(table);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_symbol_table_growth(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *root;
tokenlist_t *list;
symbol_table_t *table = nullptr;
// Set up with our manysymbols.asm file
symbols_setup_test(&root, &list, "tests/input/manysymbols.asm");
symbol_table_alloc(&table);
// Initial capacity should be the default (64)
munit_assert_size(table->cap, ==, 64);
munit_assert_size(table->len, ==, 0);
// Add the first 64 labels (indices 0-63)
size_t initial_cap = table->cap;
for (size_t i = 0; i < 64; i++) {
ast_node_t *label = root->children[i];
munit_assert_int(label->id, ==, NODE_LABEL);
error_t *err = symbol_table_update(table, label);
munit_assert_null(err);
munit_assert_size(table->len, ==, i + 1);
// Capacity should remain the same for the first 64 labels
munit_assert_size(table->cap, ==, initial_cap);
}
// Now add the 65th label (index 64), which should trigger growth
ast_node_t *final_label = root->children[64];
munit_assert_int(final_label->id, ==, NODE_LABEL);
error_t *err = symbol_table_update(table, final_label);
munit_assert_null(err);
munit_assert_size(table->len, ==, 65);
// Capacity should have doubled
munit_assert_size(table->cap, ==, initial_cap * 2);
// Validate we can look up all the symbols
for (size_t i = 0; i <= 64; i++) {
char name[10];
sprintf(name, "lbl_%zu", i);
symbol_t *symbol = symbol_table_lookup(table, name);
munit_assert_not_null(symbol);
munit_assert_int(SYMBOL_LOCAL, ==, symbol->kind);
munit_assert_string_equal(symbol->name, name);
}
symbol_table_free(table);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_symbol_invalid_node(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *root;
tokenlist_t *list;
symbol_table_t *table = nullptr;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
symbol_table_alloc(&table);
munit_assert_size(table->len, ==, 0);
error_t *err = symbol_table_update(table, root);
munit_assert_ptr_equal(err, err_symbol_table_invalid_node);
munit_assert_size(table->len, ==, 0);
symbol_table_free(table);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitTest symbols_tests[] = {
{"/table_alloc", test_symbol_table_alloc, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/table_lookup_empty", test_symbol_table_lookup_empty, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/add_reference", test_symbol_add_reference, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/add_label", test_symbol_add_label, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/add_import", test_symbol_add_import, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/add_export", test_symbol_add_export, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/upgrade_valid", test_symbol_upgrade_valid, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/upgrade_invalid", test_symbol_upgrade_invalid, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/table_growth", test_symbol_table_growth, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/invalid_node", test_symbol_invalid_node, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
};