Compare commits

..

10 Commits

Author SHA1 Message Date
7b2cee0533 Add first encoding pass
All checks were successful
Validate the build / validate-build (push) Successful in 42s
First pass collects all the symbols and interprets number and register
tokens into usable data for the later passes.
2025-04-15 00:05:14 +02:00
3a164de8d4 Add register and number values to AST nodes 2025-04-15 00:05:14 +02:00
32ca7b942c Add initial limited opcode data 2025-04-15 00:05:14 +02:00
43ea0042b6 Add registers data table
Change the validated primitive parse_register so that it uses the data
table instead
2025-04-15 00:05:06 +02:00
b16ba10b0d Add symbols tests
All checks were successful
Validate the build / validate-build (push) Successful in 32s
2025-04-09 19:49:10 +02:00
678e71129d initial symbol table implementation 2025-04-09 19:43:44 +02:00
fba3cd6660 Add .import and .export to the input test file 2025-04-09 19:43:44 +02:00
cf59c1e75b Make main properly return with failure on parsing errors 2025-04-09 19:43:44 +02:00
c932c3c609 Add .import and .export directive to the grammar and parser 2025-04-09 19:43:44 +02:00
f061419edf fix parse_immediate to accept label_reference instead of identifier 2025-04-09 19:43:44 +02:00
18 changed files with 21 additions and 816 deletions

View File

@ -1,9 +1,9 @@
<program> ::= <statement>*
<statement> ::= <label> | <directive> | <instruction> | <newline>
<statement> ::= <label> | <directive> | <instruction>
<label> ::= <identifier> <colon>
<directive> ::= <dot> (<section_directive> | <export_directive> | <import_directive> ) <newline>
<directive> ::= <dot> (<section_directive> | <export_directive> | <import_directive> )
<section_directive> ::= "section" <identifier>
@ -11,7 +11,7 @@
<import_directive> ::= "import" <identifier>
<instruction> ::= <identifier> <operands> <newline>
<instruction> ::= <identifier> <operands>
<operands> ::= <operand> ( <comma> <operand> )*

View File

@ -161,8 +161,6 @@ const char *ast_node_id_to_cstr(node_id_t id) {
return "NODE_ASTERISK";
case NODE_DOT:
return "NODE_DOT";
case NODE_NEWLINE:
return "NODE_NEWLINE";
case NODE_IMPORT:
return "NODE_IMPORT";
case NODE_EXPORT:
@ -182,8 +180,7 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
}
printf("%s", ast_node_id_to_cstr(node->id));
if (node->token_entry && node->token_entry->token.value &&
node->id != NODE_NEWLINE) {
if (node->token_entry && node->token_entry->token.value) {
printf(" \"%s\"", node->token_entry->token.value);
}
printf("\n");
@ -196,18 +193,3 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
void ast_node_print(ast_node_t *node) {
ast_node_print_internal(node, 0);
}
void ast_node_prune(ast_node_t *node, node_id_t id) {
size_t new_len = 0;
for (size_t i = 0; i < node->len; i++) {
auto child = node->children[i];
if (child->id == id) {
ast_node_free(child);
continue;
}
ast_node_prune(child, id);
node->children[new_len] = child;
new_len++;
}
node->len = new_len;
}

View File

@ -55,7 +55,6 @@ typedef enum node_id {
NODE_MINUS,
NODE_ASTERISK,
NODE_DOT,
NODE_NEWLINE,
} node_id_t;
typedef struct ast_node ast_node_t;
@ -74,11 +73,6 @@ typedef struct register_ {
operand_size_t size;
} register_t;
typedef struct opcode_encoding {
uint8_t encoding[32];
size_t len;
} opcode_encoding_t;
struct ast_node {
node_id_t id;
tokenlist_entry_t *token_entry;
@ -89,7 +83,6 @@ struct ast_node {
union {
register_t reg;
number_t number;
opcode_encoding_t encoding;
} value;
};
@ -138,17 +131,4 @@ error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child);
*/
void ast_node_print(ast_node_t *node);
/**
* Prune the children with a given id
*
* The tree is recursively visited and all child nodes of a given ID are pruned
* completely. If a node has the giver id, it will get removed along wih all its
* children, even if some of those children have different ids. The root node id
* is never checked so the tree is guaranteed to remain and allocated valid.
*
* @param node The root of the tree you want to prune
* @param id The id of the nodes you want to prune
*/
void ast_node_prune(ast_node_t *node, node_id_t id);
#endif // INCLUDE_SRC_AST_H_

View File

@ -1,6 +0,0 @@
#include "bytes.h"
#include "error.h"
error_t *const err_bytes_no_capacity = &(error_t){
.message = "Not enough capacity in bytes buffer",
};

View File

@ -1,60 +0,0 @@
#ifndef INCLUDE_SRC_BYTES_H_
#define INCLUDE_SRC_BYTES_H_
#include "error.h"
#include <stddef.h>
#include <stdint.h>
#include <string.h>
extern error_t *const err_bytes_no_capacity;
typedef struct bytes {
size_t len;
size_t cap;
uint8_t buffer[];
} bytes_t;
#define LOCAL_BYTES_ANONYMOUS(N) \
&(struct { \
size_t len; \
size_t cap; \
uint8_t buffer[(N)]; \
}) { \
0, (N), {} \
}
#define LOCAL_BYTES(N) (bytes_t *)LOCAL_BYTES_ANONYMOUS(N);
static inline error_t *bytes_append_uint8(bytes_t *bytes, uint8_t value) {
if (bytes->len >= bytes->cap)
return err_bytes_no_capacity;
bytes->buffer[bytes->len++] = value;
return nullptr;
}
static inline error_t *bytes_append_array(bytes_t *dst, size_t n,
uint8_t buffer[static n]) {
if (dst->len + n >= dst->cap)
return err_bytes_no_capacity;
memcpy(dst->buffer + dst->len, buffer, n);
dst->len += n;
return nullptr;
}
static inline error_t *bytes_append_bytes(bytes_t *dst, bytes_t *src) {
return bytes_append_array(dst, src->len, src->buffer);
}
static inline error_t *bytes_append_uint16(bytes_t *dst, uint16_t value) {
return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
}
static inline error_t *bytes_append_uint32(bytes_t *dst, uint32_t value) {
return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
}
static inline error_t *bytes_append_uint64(bytes_t *dst, uint64_t value) {
return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
}
#endif // INCLUDE_SRC_BYTES_H_

View File

@ -51,7 +51,7 @@ opcode_data_t *const opcodes[] = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32},
},
},
// PUSH reg16,
// Push reg16,
&(opcode_data_t) {
.mnemonic = "push",
.opcode = 0x50,
@ -59,86 +59,9 @@ opcode_data_t *const opcodes[] = {
.encoding_class = ENCODING_OPCODE_REGISTER,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 | OPERAND_SIZE_32 | OPERAND_SIZE_64 },
},
},
// PUSH reg64
&(opcode_data_t) {
.mnemonic = "push",
.opcode = 0x50,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_OPCODE_REGISTER,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
// NOT reg16
&(opcode_data_t) {
.mnemonic = "not",
.opcode = 0xF7,
.opcode_extension = 2,
.operand_size_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
},
},
// NOT reg32
&(opcode_data_t) {
.mnemonic = "not",
.opcode = 0xF7,
.opcode_extension = 2,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
},
},
// NOT reg64
&(opcode_data_t) {
.mnemonic = "not",
.opcode = 0xF7,
.opcode_extension = 2,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
// NEG reg16
&(opcode_data_t) {
.mnemonic = "neg",
.opcode = 0xF7,
.opcode_extension = 3,
.operand_size_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
},
},
// NEG reg32
&(opcode_data_t) {
.mnemonic = "neg",
.opcode = 0xF7,
.opcode_extension = 3,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
},
},
// NEG reg64
&(opcode_data_t) {
.mnemonic = "neg",
.opcode = 0xF7,
.opcode_extension = 3,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
nullptr,
};

View File

@ -1,5 +1,4 @@
#include "encoder.h"
#include "../bytes.h"
#include "../data/opcodes.h"
#include "symbols.h"
#include <assert.h>
@ -16,12 +15,6 @@ error_t *const err_encoder_invalid_size_suffix =
&(error_t){.message = "Invalid number size suffix"};
error_t *const err_encoder_unknown_symbol_reference =
&(error_t){.message = "Referenced an unknown symbol"};
error_t *const err_encoder_no_encoding_found =
&(error_t){.message = "No encoding found for instruction"};
error_t *const err_encoder_not_implemented =
&(error_t){.message = "Implementation for this opcode is missing"};
error_t *const err_encoder_unexpected_length =
&(error_t){.message = "Unexpectedly long encoding"};
error_t *encoder_alloc(encoder_t **output) {
*output = nullptr;
@ -163,54 +156,8 @@ error_t *encoder_set_register_value(ast_node_t *node) {
if (strcmp(value, registers[i]->name) == 0) {
node->value.reg.id = registers[i]->id;
node->value.reg.size = registers[i]->size;
return nullptr;
}
}
return err_encoder_invalid_register;
}
/**
* Set the opcode extension in the modrm field
*/
static inline uint8_t modrm_extension(uint8_t modrm, uint8_t extension) {
assert(extension != opcode_extension_none);
assert((extension & 0b111) == extension);
return (modrm & ~modrm_reg_mask) | extension << 3;
}
/**
* Return the rex bit for reg field in modrm
*/
static inline uint8_t modrm_reg_rex(uint8_t rex, register_id_t id) {
if (id & 0b1000)
rex |= rex_prefix_r;
return rex;
}
/**
* update modrm reg field with the given register, must be used alongside
* modrm_reg_rex
*/
static inline uint8_t modrm_reg(uint8_t modrm, register_id_t id) {
return (modrm & ~modrm_reg_mask) | (id & 0b111) << 3;
}
/**
* Return the rex bit for rm field in modrm
*/
static inline uint8_t modrm_rm_rex(uint8_t rex, register_id_t id) {
if (id & 0b1000)
rex |= rex_prefix_b;
return rex;
}
/**
* update modrm rm field with the given register, must be used alongside
* modrm_rm_rex
*/
static inline uint8_t modrm_rm(uint8_t modrm, register_id_t id) {
assert((modrm & modrm_mod_mask) == modrm_mod_register);
return (modrm & ~modrm_rm_mask) | (id & 0b111);
}
/**
@ -238,272 +185,8 @@ error_t *encoder_first_pass(encoder_t *encoder, ast_node_t *node) {
return nullptr;
}
bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
switch (info->kind) {
case OPERAND_REGISTER:
return operand->id == NODE_REGISTER &&
operand->value.reg.size == info->size;
case OPERAND_MEMORY:
return operand->id == NODE_MEMORY;
case OPERAND_IMMEDIATE: {
if (operand->id != NODE_IMMEDIATE)
return false;
ast_node_t *child = operand->children[0];
if (child->id == NODE_NUMBER)
return (child->value.number.size & info->size) > 0;
else if (child->id == NODE_LABEL_REFERENCE)
return info->size == OPERAND_SIZE_32;
// FIXME: first pass should give us information about the distance of
// the label reference so we can pick a size more appropriately instead
// of just defaulting to 32 bits
break;
} // end OPERAND_IMMEDIATE case
}
assert(false && "unreachable");
__builtin_unreachable();
}
bool is_opcode_match(opcode_data_t *opcode, const char *mnemonic,
ast_node_t *operands) {
if (strcmp(opcode->mnemonic, mnemonic) != 0)
return false;
if (opcode->operand_count != operands->len)
return false;
for (size_t i = 0; i < operands->len; ++i) {
if (!is_operand_match(&opcode->operands[i], operands->children[i]))
return false;
}
return true;
}
error_t *encoder_get_opcode_data(ast_node_t *instruction, ast_node_t *operands,
opcode_data_t **opcode_out) {
const char *mnemonic = instruction->children[0]->token_entry->token.value;
for (size_t i = 0; opcodes[i]; ++i) {
opcode_data_t *opcode = opcodes[i];
if (is_opcode_match(opcode, mnemonic, operands)) {
*opcode_out = opcode;
return nullptr;
}
}
return err_encoder_no_encoding_found;
}
error_t *encode_two_operand(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
(void)opcode;
(void)operands;
(void)encoding;
(void)rex;
assert(encoding->len >= 1 && "must have 1+ opcode byte in buffer already");
return err_encoder_not_implemented;
}
error_t *encode_one_register_in_opcode(encoder_t *encoder,
opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
(void)opcode;
register_id_t id = operands->children[0]->value.reg.id;
encoding->buffer[encoding->len - 1] |= id & 0b111;
if ((id & 0b1000) > 0) {
*rex |= rex_prefix_r;
}
return nullptr;
}
error_t *encode_one_register(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
assert(operands->len == 1);
assert(operands->children[0]->id == NODE_REGISTER);
register_id_t id = operands->children[0]->value.reg.id;
uint8_t modrm = modrm_mod_register;
if (opcode->opcode_extension != opcode_extension_none) {
// register goes in rm field, extension goes in mod field
modrm = modrm_extension(modrm, opcode->opcode_extension);
modrm = modrm_rm(modrm, id);
*rex = modrm_rm_rex(*rex, id);
} else {
// register goes in reg field
// NOTE:
// it's actually likely this case just doesn't exist at all and all
// opcodes that take one register in modr/m _all_ have extended opcdes
modrm = modrm_reg(modrm, id);
*rex = modrm_reg_rex(*rex, id);
}
return bytes_append_uint8(encoding, modrm);
}
error_t *encode_one_immediate(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
(void)opcode;
(void)rex;
assert(operands->len == 1);
assert(operands->children[0]->id == NODE_IMMEDIATE);
assert(operands->children[0]->len == 1);
ast_node_t *immediate = operands->children[0]->children[0];
assert(immediate->id == NODE_NUMBER ||
immediate->id == NODE_LABEL_REFERENCE);
if (immediate->id == NODE_NUMBER) {
uint64_t value = immediate->value.number.value;
operand_size_t size = opcode->operands[0].size;
error_t *err = nullptr;
switch (size) {
case OPERAND_SIZE_8:
err = bytes_append_uint8(encoding, value);
break;
case OPERAND_SIZE_16:
err = bytes_append_uint16(encoding, value);
break;
case OPERAND_SIZE_32:
err = bytes_append_uint32(encoding, value);
break;
case OPERAND_SIZE_64:
err = bytes_append_uint64(encoding, value);
break;
default:
assert(false && "intentionally unhandled");
}
return err;
} else {
// FIXME: this still assumes references are always 32 bit
uint32_t value = 0xDEADBEEF;
return bytes_append_uint32(encoding, value);
}
}
error_t *encode_one_memory(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
(void)opcode;
(void)operands;
(void)encoding;
(void)rex;
return err_encoder_not_implemented;
}
error_t *encode_one_operand(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
switch (opcode->operands[0].kind) {
case OPERAND_REGISTER:
if (opcode->encoding_class == ENCODING_OPCODE_REGISTER)
return encode_one_register_in_opcode(encoder, opcode, operands,
encoding, rex);
else
return encode_one_register(encoder, opcode, operands, encoding,
rex);
case OPERAND_MEMORY:
return encode_one_memory(encoder, opcode, operands, encoding, rex);
case OPERAND_IMMEDIATE:
return encode_one_immediate(encoder, opcode, operands, encoding, rex);
}
}
error_t *encoder_encode_instruction(encoder_t *encoder,
ast_node_t *instruction) {
ast_node_t *operands = instruction->children[1];
opcode_data_t *opcode = nullptr;
error_t *err = encoder_get_opcode_data(instruction, operands, &opcode);
if (err)
return err;
uint8_t rex = 0;
bytes_t *encoding = LOCAL_BYTES(32);
if (opcode->opcode > 0xFF &&
(err = bytes_append_uint8(encoding, opcode->opcode >> 8)))
return err;
if ((err = bytes_append_uint8(encoding, opcode->opcode & 0xFF)))
return err;
// NOTE:operand encoders all expect the opcode to be in the buffer already.
// Some of them rely on this to encode the register value in the opcode
// byte.
switch (opcode->operand_count) {
case 0:
break;
case 1:
err = encode_one_operand(encoder, opcode, operands, encoding, &rex);
break;
case 2:
err = encode_two_operand(encoder, opcode, operands, encoding, &rex);
break;
default:
err = err_encoder_not_implemented;
}
if (err)
return err;
// produce the actual encoding output in the NODE_INSTRUCTION value
uint8_t *output = instruction->value.encoding.encoding;
size_t output_len = 0;
// Handle prefixes
if (opcode->rex_w_prefix)
rex = rex_prefix_w;
if (opcode->address_size_prefix)
output[output_len++] = memory_size_prefix;
if (opcode->operand_size_prefix)
output[output_len++] = operand_size_prefix;
if (rex > 0)
output[output_len++] = rex;
// copy the encoded opcode and operands
if (encoding->len > 20)
return err_encoder_unexpected_length;
memcpy(output + output_len, encoding->buffer, encoding->len);
output_len += encoding->len;
instruction->value.encoding.len = output_len;
return nullptr;
}
/**
* Perform the second pass that performs actual encoding. Will use
* placeholder values for label references because instruction size has not
* yet been determined.
*/
error_t *encoder_encoding_pass(encoder_t *encoder, ast_node_t *root) {
for (size_t i = 0; i < root->len; ++i) {
if (root->children[i]->id != NODE_INSTRUCTION)
continue;
ast_node_t *instruction = root->children[i];
error_t *err = encoder_encode_instruction(encoder, instruction);
if (err)
return err;
}
return nullptr;
}
opcode_data_t *encoder_find_opcode(ast_node_t *instruction) {
for (size_t i = 0; opcodes[i] != nullptr; ++i) {
const char *mnemonic =
instruction->children[0]->token_entry->token.value;
ast_node_t *operands = instruction->children[1];
if (is_opcode_match(opcodes[i], mnemonic, operands))
return opcodes[i];
}
return nullptr;
}
@ -522,5 +205,5 @@ error_t *encoder_encode(encoder_t *encoder, ast_node_t *ast) {
err = encoder_check_symbols(encoder);
if (err)
return err;
return encoder_encoding_pass(encoder, ast);
return nullptr;
}

View File

@ -7,15 +7,6 @@ typedef struct encoder {
symbol_table_t *symbols;
} encoder_t;
constexpr uint8_t modrm_mod_memory = 0b00'000'000;
constexpr uint8_t modrm_mod_memory_displacement8 = 0b01'000'000;
constexpr uint8_t modrm_mod_memory_displacement32 = 0b10'000'000;
constexpr uint8_t modrm_mod_register = 0b11'000'000;
constexpr uint8_t modrm_reg_mask = 0b00'111'000;
constexpr uint8_t modrm_rm_mask = 0b00'000'111;
constexpr uint8_t modrm_mod_mask = 0b11'000'000;
error_t *encoder_alloc(encoder_t **encoder);
error_t *encoder_encode(encoder_t *encoder, ast_node_t *ast);
void encoder_free(encoder_t *encoder);
@ -25,8 +16,5 @@ extern error_t *const err_encoder_number_overflow;
extern error_t *const err_encoder_invalid_number_format;
extern error_t *const err_encoder_invalid_size_suffix;
extern error_t *const err_encoder_unknown_symbol_reference;
extern error_t *const err_encoder_no_encoding_found;
extern error_t *const err_encoder_not_implemented;
extern error_t *const err_encoder_unexpected_length;
#endif // INCLUDE_ENCODER_ENCODER_H_

View File

@ -1,5 +1,3 @@
#include "ast.h"
#include "encoder/encoder.h"
#include "error.h"
#include "lexer.h"
#include "parser/parser.h"
@ -10,13 +8,7 @@
#include <stdlib.h>
#include <string.h>
typedef enum mode {
MODE_INVALID = -1,
MODE_AST,
MODE_TEXT,
MODE_TOKENS,
MODE_ENCODING,
} mode_t;
typedef enum mode { MODE_AST, MODE_TEXT, MODE_TOKENS } mode_t;
void print_tokens(tokenlist_t *list) {
for (auto entry = list->head; entry; entry = entry->next) {
@ -58,61 +50,18 @@ error_t *print_ast(tokenlist_t *list) {
return nullptr;
}
void print_hex(size_t len, uint8_t bytes[static len]) {
for (size_t i = 0; i < len; i++) {
printf("%02x", bytes[i]);
if (i < len - 1) {
printf(" ");
}
}
printf("\n");
}
error_t *print_encoding(tokenlist_t *list) {
parse_result_t result = parse(list->head);
if (result.err)
return result.err;
encoder_t *encoder;
error_t *err = encoder_alloc(&encoder);
if (err)
goto cleanup_ast;
err = encoder_encode(encoder, result.node);
if (err)
goto cleanup_ast;
ast_node_t *root = result.node;
for (size_t i = 0; i < root->len; ++i) {
ast_node_t *node = root->children[i];
if (node->id != NODE_INSTRUCTION)
continue;
print_hex(node->value.encoding.len, node->value.encoding.encoding);
}
encoder_free(encoder);
ast_node_free(result.node);
return nullptr;
cleanup_ast:
ast_node_free(result.node);
return err;
}
int get_execution_mode(int argc, char *argv[]) {
if (argc != 3)
return MODE_INVALID;
if (argc != 3 || (strcmp(argv[1], "tokens") != 0 &&
strcmp(argv[1], "text") != 0 && strcmp(argv[1], "ast"))) {
puts("Usage: oas [tokens|text|ast] <filename>");
exit(1);
}
if (strcmp(argv[1], "tokens") == 0)
return MODE_TOKENS;
if (strcmp(argv[1], "text") == 0)
return MODE_TEXT;
if (strcmp(argv[1], "ast") == 0)
return MODE_AST;
if (strcmp(argv[1], "encoding") == 0)
return MODE_ENCODING;
return MODE_INVALID;
}
error_t *do_action(mode_t mode, tokenlist_t *list) {
@ -125,20 +74,12 @@ error_t *do_action(mode_t mode, tokenlist_t *list) {
return nullptr;
case MODE_AST:
return print_ast(list);
case MODE_ENCODING:
return print_encoding(list);
case MODE_INVALID:
/* can't happen */
}
__builtin_unreachable();
}
int main(int argc, char *argv[]) {
mode_t mode = get_execution_mode(argc, argv);
if (mode == MODE_INVALID) {
puts("Usage: oas [tokens|text|ast|encoding] <filename>");
exit(1);
}
char *filename = argv[2];
lexer_t *lex = &(lexer_t){};

View File

@ -1,5 +1,4 @@
#include "combinators.h"
#include "util.h"
// Parse a list of the given parser delimited by the given token id. Does not
// store the delimiters in the parent node
@ -123,12 +122,5 @@ parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
}
current = result.next;
}
// token stream ended before we matched all parsers
if (parser != nullptr) {
ast_node_free(all);
return parse_no_match();
}
return parse_success(all, current);
}

View File

@ -136,28 +136,22 @@ parse_result_t parse_directive_options(tokenlist_entry_t *current) {
}
parse_result_t parse_directive(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_dot, parse_directive_options, parse_newline,
nullptr};
parser_t parsers[] = {parse_dot, parse_directive_options, nullptr};
return parse_consecutive(current, NODE_DIRECTIVE, parsers);
}
parse_result_t parse_instruction(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_identifier, parse_operands, parse_newline,
nullptr};
parser_t parsers[] = {parse_identifier, parse_operands, nullptr};
return parse_consecutive(current, NODE_INSTRUCTION, parsers);
}
parse_result_t parse_statement(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
parse_newline, nullptr};
nullptr};
return parse_any(current, parsers);
}
parse_result_t parse(tokenlist_entry_t *current) {
current = tokenlist_skip_trivia(current);
parse_result_t result =
parse_many(current, NODE_PROGRAM, true, parse_statement);
if (result.node != nullptr)
ast_node_prune(result.node, NODE_NEWLINE);
return result;
return parse_many(current, NODE_PROGRAM, true, parse_statement);
}

View File

@ -63,10 +63,6 @@ parse_result_t parse_dot(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
}
parse_result_t parse_newline(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_NEWLINE, NODE_NEWLINE, nullptr);
}
parse_result_t parse_label_reference(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
nullptr);

View File

@ -18,7 +18,6 @@ parse_result_t parse_plus(tokenlist_entry_t *current);
parse_result_t parse_minus(tokenlist_entry_t *current);
parse_result_t parse_asterisk(tokenlist_entry_t *current);
parse_result_t parse_dot(tokenlist_entry_t *current);
parse_result_t parse_newline(tokenlist_entry_t *current);
parse_result_t parse_label_reference(tokenlist_entry_t *current);
/* These are "primitives" with a different name and some extra validation on top

View File

@ -86,6 +86,7 @@ bool is_trivia(tokenlist_entry_t *trivia) {
switch (trivia->token.id) {
case TOKEN_WHITESPACE:
case TOKEN_COMMENT:
case TOKEN_NEWLINE:
return true;
default:
return false;

View File

@ -1,164 +0,0 @@
#include "../src/bytes.h"
#include "munit.h"
MunitResult test_bytes_initializer(const MunitParameter params[], void *data) {
(void)params;
(void)data;
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
for (size_t i = 0; i < 16; ++i)
munit_assert_uint8(bytes->buffer[i], ==, 0);
return MUNIT_OK;
}
MunitResult test_bytes_append_uint8(const MunitParameter params[], void *data) {
(void)params;
(void)data;
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
for (size_t i = 0; i < 16; ++i) {
error_t *err = bytes_append_uint8(bytes, (uint8_t)i);
munit_assert_null(err);
munit_assert_uint8(bytes->buffer[i], ==, (uint8_t)i);
}
error_t *err = bytes_append_uint8(bytes, 0xFF);
munit_assert_ptr(err, ==, err_bytes_no_capacity);
return MUNIT_OK;
}
MunitResult test_bytes_append_array(const MunitParameter params[], void *data) {
(void)params;
(void)data;
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
uint8_t test_array[] = {0x01, 0x02, 0x03, 0x04, 0x05};
size_t array_len = sizeof(test_array) / sizeof(test_array[0]);
error_t *err = bytes_append_array(bytes, array_len, test_array);
munit_assert_null(err);
munit_assert_size(bytes->len, ==, array_len);
for (size_t i = 0; i < array_len; ++i) {
munit_assert_uint8(bytes->buffer[i], ==, test_array[i]);
}
uint8_t second_array[] = {0x06, 0x07, 0x08};
size_t second_len = sizeof(second_array) / sizeof(second_array[0]);
err = bytes_append_array(bytes, second_len, second_array);
munit_assert_null(err);
munit_assert_size(bytes->len, ==, array_len + second_len);
for (size_t i = 0; i < second_len; ++i) {
munit_assert_uint8(bytes->buffer[array_len + i], ==, second_array[i]);
}
uint8_t overflow_array[10] = {0}; // Array that would exceed capacity
err = bytes_append_array(bytes, sizeof(overflow_array), overflow_array);
munit_assert_ptr(err, ==, err_bytes_no_capacity);
munit_assert_size(bytes->len, ==, array_len + second_len);
return MUNIT_OK;
}
MunitResult test_bytes_append_bytes(const MunitParameter params[], void *data) {
(void)params;
(void)data;
bytes_t *src = LOCAL_BYTES(8);
bytes_t *dst = LOCAL_BYTES(16);
// Fill source bytes with test data
for (uint8_t i = 0; i < 5; ++i) {
error_t *err = bytes_append_uint8(src, i + 1);
munit_assert_null(err);
}
munit_assert_size(src->len, ==, 5);
// Append source to destination
error_t *err = bytes_append_bytes(dst, src);
munit_assert_null(err);
munit_assert_size(dst->len, ==, src->len);
// Verify destination contents match source
for (size_t i = 0; i < src->len; ++i) {
munit_assert_uint8(dst->buffer[i], ==, src->buffer[i]);
}
// Fill source with more data and append again
for (uint8_t i = 0; i < 3; ++i) {
err = bytes_append_uint8(src, i + 6);
munit_assert_null(err);
}
munit_assert_size(src->len, ==, 8);
// Append updated source
err = bytes_append_bytes(dst, src);
munit_assert_null(err);
munit_assert_size(dst->len, ==, 13); // 5 + 8
// Test capacity boundary
src->len = 4; // manually set length to barely not fit
err = bytes_append_bytes(dst, src);
munit_assert_ptr(err, ==, err_bytes_no_capacity);
munit_assert_size(dst->len, ==, 13); // Length unchanged after error
return MUNIT_OK;
}
MunitResult test_bytes_append_uint16(const MunitParameter params[], void *data) {
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
bytes_append_uint16(bytes, 0xFFAA);
munit_assert_size(bytes->len, ==, 2);
munit_assert_uint8(bytes->buffer[0], ==, 0xAA);
munit_assert_uint8(bytes->buffer[1], ==, 0xFF);
return MUNIT_OK;
}
MunitResult test_bytes_append_uint32(const MunitParameter params[], void *data) {
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
bytes_append_uint32(bytes, 0xAABBCCDD);
munit_assert_size(bytes->len, ==, 4);
munit_assert_uint8(bytes->buffer[0], ==, 0xDD);
munit_assert_uint8(bytes->buffer[1], ==, 0xCC);
munit_assert_uint8(bytes->buffer[2], ==, 0xBB);
munit_assert_uint8(bytes->buffer[3], ==, 0xAA);
return MUNIT_OK;
}
MunitResult test_bytes_append_uint64(const MunitParameter params[], void *data) {
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
bytes_append_uint64(bytes, 0xAABBCCDDEEFF9988);
munit_assert_size(bytes->len, ==, 8);
munit_assert_uint8(bytes->buffer[0], ==, 0x88);
munit_assert_uint8(bytes->buffer[1], ==, 0x99);
munit_assert_uint8(bytes->buffer[2], ==, 0xFF);
munit_assert_uint8(bytes->buffer[3], ==, 0xEE);
munit_assert_uint8(bytes->buffer[4], ==, 0xDD);
munit_assert_uint8(bytes->buffer[5], ==, 0xCC);
munit_assert_uint8(bytes->buffer[6], ==, 0xBB);
munit_assert_uint8(bytes->buffer[7], ==, 0xAA);
return MUNIT_OK;
}
MunitTest bytes_tests[] = {
{"/initializer", test_bytes_initializer, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_uint8", test_bytes_append_uint8, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_array", test_bytes_append_array, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_bytes", test_bytes_append_bytes, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_uint16", test_bytes_append_uint16, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_uint32", test_bytes_append_uint32, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_uint64", test_bytes_append_uint64, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
};

View File

@ -1,5 +0,0 @@
; regression test for two issues:
; - parsing two zero operand instructions in a row
; - a zero operand instruction just before eof
syscall
ret

View File

@ -4,7 +4,6 @@ extern MunitTest ast_tests[];
extern MunitTest lexer_tests[];
extern MunitTest regression_tests[];
extern MunitTest symbols_tests[];
extern MunitTest bytes_tests[];
int main(int argc, char *argv[MUNIT_ARRAY_PARAM(argc + 1)]) {
MunitSuite suites[] = {
@ -12,7 +11,6 @@ int main(int argc, char *argv[MUNIT_ARRAY_PARAM(argc + 1)]) {
{"/ast", ast_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{"/lexer", lexer_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{"/symbols", symbols_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{"/bytes", bytes_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{nullptr, nullptr, nullptr, 0, MUNIT_SUITE_OPTION_NONE},
};

View File

@ -23,46 +23,9 @@ MunitResult test_regression_trivia_head(const MunitParameter params[], void *dat
ast_node_free(result.node);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_no_operands_eof(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_t *lex = &(lexer_t){};
error_t *err = lexer_open(lex, "tests/input/regression/test_no_operands_eof.asm");
munit_assert_null(err);
tokenlist_t *list;
err = tokenlist_alloc(&list);
munit_assert_null(err);
err = tokenlist_fill(list, lex);
munit_assert_null(err);
parse_result_t result = parse(list->head);
munit_assert_null(result.err);
munit_assert_null(result.next);
// Both children should be instructions
munit_assert_size(result.node->len, ==, 2);
munit_assert_int(result.node->children[0]->id, ==, NODE_INSTRUCTION);
munit_assert_int(result.node->children[1]->id, ==, NODE_INSTRUCTION);
// And they should have empty operands
munit_assert_size(result.node->children[0]->len, ==, 2);
munit_assert_size(result.node->children[1]->len, ==, 2);
munit_assert_size(result.node->children[0]->children[1]->len, ==, 0);
munit_assert_size(result.node->children[1]->children[1]->len, ==, 0);
ast_node_free(result.node);
tokenlist_free(list);
return MUNIT_OK;
}
MunitTest regression_tests[] = {
{"/trivia_head", test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/no_operands_eof", test_no_operands_eof, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
};