Compare commits
4 Commits
2bea87b39a
...
b38b5d220a
Author | SHA1 | Date | |
---|---|---|---|
b38b5d220a | |||
9549951fe1 | |||
0afc1d869a | |||
d3141e764c |
@ -3,10 +3,14 @@
|
|||||||
|
|
||||||
<label> ::= <identifier> <colon>
|
<label> ::= <identifier> <colon>
|
||||||
|
|
||||||
<directive> ::= <dot> <section_directive>
|
<directive> ::= <dot> (<section_directive> | <export_directive> | <import_directive> )
|
||||||
|
|
||||||
<section_directive> ::= "section" <identifier>
|
<section_directive> ::= "section" <identifier>
|
||||||
|
|
||||||
|
<export_directive> ::= "export" <identifier>
|
||||||
|
|
||||||
|
<import_directive> ::= "import" <identifier>
|
||||||
|
|
||||||
<instruction> ::= <identifier> <operands>
|
<instruction> ::= <identifier> <operands>
|
||||||
|
|
||||||
<operands> ::= <operand> ( <comma> <operand> )*
|
<operands> ::= <operand> ( <comma> <operand> )*
|
||||||
|
@ -123,6 +123,10 @@ const char *ast_node_id_to_cstr(node_id_t id) {
|
|||||||
return "NODE_PLUS_OR_MINUS";
|
return "NODE_PLUS_OR_MINUS";
|
||||||
case NODE_SECTION_DIRECTIVE:
|
case NODE_SECTION_DIRECTIVE:
|
||||||
return "NODE_SECTION_DIRECTIVE";
|
return "NODE_SECTION_DIRECTIVE";
|
||||||
|
case NODE_IMPORT_DIRECTIVE:
|
||||||
|
return "NODE_IMPORT_DIRECTIVE";
|
||||||
|
case NODE_EXPORT_DIRECTIVE:
|
||||||
|
return "NODE_EXPORT_DIRECTIVE";
|
||||||
case NODE_REGISTER:
|
case NODE_REGISTER:
|
||||||
return "NODE_REGISTER";
|
return "NODE_REGISTER";
|
||||||
case NODE_SECTION:
|
case NODE_SECTION:
|
||||||
@ -157,6 +161,10 @@ const char *ast_node_id_to_cstr(node_id_t id) {
|
|||||||
return "NODE_ASTERISK";
|
return "NODE_ASTERISK";
|
||||||
case NODE_DOT:
|
case NODE_DOT:
|
||||||
return "NODE_DOT";
|
return "NODE_DOT";
|
||||||
|
case NODE_IMPORT:
|
||||||
|
return "NODE_IMPORT";
|
||||||
|
case NODE_EXPORT:
|
||||||
|
return "NODE_EXPORT";
|
||||||
}
|
}
|
||||||
assert(!"Unreachable, weird node id" && id);
|
assert(!"Unreachable, weird node id" && id);
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
|
@ -29,10 +29,14 @@ typedef enum node_id {
|
|||||||
NODE_REGISTER_OFFSET,
|
NODE_REGISTER_OFFSET,
|
||||||
NODE_PLUS_OR_MINUS,
|
NODE_PLUS_OR_MINUS,
|
||||||
NODE_SECTION_DIRECTIVE,
|
NODE_SECTION_DIRECTIVE,
|
||||||
|
NODE_IMPORT_DIRECTIVE,
|
||||||
|
NODE_EXPORT_DIRECTIVE,
|
||||||
|
|
||||||
// Validated primitives
|
// Validated primitives
|
||||||
NODE_REGISTER,
|
NODE_REGISTER,
|
||||||
NODE_SECTION,
|
NODE_SECTION,
|
||||||
|
NODE_IMPORT,
|
||||||
|
NODE_EXPORT,
|
||||||
|
|
||||||
// Primitive nodes
|
// Primitive nodes
|
||||||
NODE_IDENTIFIER,
|
NODE_IDENTIFIER,
|
||||||
|
149
src/encoder/symbols.c
Normal file
149
src/encoder/symbols.c
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
#include "symbols.h"
|
||||||
|
#include "../error.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
constexpr size_t symbol_table_default_cap = 64;
|
||||||
|
constexpr size_t symbol_table_max_cap = 1 << 16;
|
||||||
|
|
||||||
|
error_t *err_symbol_table_invalid_node = &(error_t){
|
||||||
|
.message = "Unexpected node id when adding symbol to symbol table"};
|
||||||
|
error_t *err_symbol_table_max_cap = &(error_t){
|
||||||
|
.message = "Failed to increase symbol table length, max capacity reached"};
|
||||||
|
error_t *err_symbol_table_incompatible_symbols =
|
||||||
|
&(error_t){.message = "Failed to update symbol with incompatible kind"};
|
||||||
|
|
||||||
|
error_t *symbol_table_alloc(symbol_table_t **output) {
|
||||||
|
*output = nullptr;
|
||||||
|
|
||||||
|
symbol_table_t *table = calloc(1, sizeof(symbol_table_t));
|
||||||
|
if (table == nullptr)
|
||||||
|
return err_allocation_failed;
|
||||||
|
|
||||||
|
table->symbols = calloc(symbol_table_default_cap, sizeof(symbol_t));
|
||||||
|
if (table->symbols == nullptr) {
|
||||||
|
free(table);
|
||||||
|
return err_allocation_failed;
|
||||||
|
}
|
||||||
|
|
||||||
|
table->cap = symbol_table_default_cap;
|
||||||
|
table->len = 0;
|
||||||
|
|
||||||
|
*output = table;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void symbol_table_free(symbol_table_t *table) {
|
||||||
|
free(table->symbols);
|
||||||
|
free(table);
|
||||||
|
}
|
||||||
|
|
||||||
|
error_t *symbol_table_grow_cap(symbol_table_t *table) {
|
||||||
|
if (table->cap >= symbol_table_max_cap)
|
||||||
|
return err_symbol_table_max_cap;
|
||||||
|
|
||||||
|
size_t new_cap = table->cap * 2;
|
||||||
|
symbol_t *new_symbols = realloc(table->symbols, new_cap * sizeof(symbol_t));
|
||||||
|
if (new_symbols == nullptr)
|
||||||
|
return err_allocation_failed;
|
||||||
|
|
||||||
|
table->symbols = new_symbols;
|
||||||
|
table->cap = new_cap;
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
error_t *symbol_table_get_node_info(ast_node_t *node, symbol_kind_t *kind,
|
||||||
|
char **name) {
|
||||||
|
switch (node->id) {
|
||||||
|
case NODE_LABEL:
|
||||||
|
*kind = SYMBOL_LOCAL;
|
||||||
|
*name = node->children[0]->token_entry->token.value;
|
||||||
|
break;
|
||||||
|
case NODE_LABEL_REFERENCE:
|
||||||
|
*kind = SYMBOL_REFERENCE;
|
||||||
|
*name = node->token_entry->token.value;
|
||||||
|
break;
|
||||||
|
// TODO: when .import and .export directives are created add them here
|
||||||
|
default:
|
||||||
|
return err_symbol_table_invalid_node;
|
||||||
|
}
|
||||||
|
__builtin_unreachable();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
old \ new | REFERENCE | LOCAL | IMPORT | EXPORT |
|
||||||
|
-------------|-----------|----------|----------|----------|
|
||||||
|
REFERENCE | | replace | replace | replace |
|
||||||
|
-------------|-----------|----------|----------|----------|
|
||||||
|
LOCAL | | | ERR | replace |
|
||||||
|
-------------|-----------|----------|----------|----------|
|
||||||
|
IMPORT | | | | ERR |
|
||||||
|
-------------|-----------|----------|----------|----------|
|
||||||
|
EXPORT | | | ERR | |
|
||||||
|
-------------|-----------|----------|----------|----------|
|
||||||
|
*/
|
||||||
|
|
||||||
|
bool symbol_table_should_update(symbol_kind_t old, symbol_kind_t new) {
|
||||||
|
if (old == SYMBOL_REFERENCE)
|
||||||
|
return new != SYMBOL_REFERENCE;
|
||||||
|
if (old == SYMBOL_LOCAL)
|
||||||
|
return new == SYMBOL_EXPORT;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool symbol_table_should_error(symbol_kind_t old, symbol_kind_t new) {
|
||||||
|
if (new == SYMBOL_IMPORT)
|
||||||
|
return old == SYMBOL_LOCAL || old == SYMBOL_EXPORT;
|
||||||
|
if (new == SYMBOL_EXPORT)
|
||||||
|
return old == SYMBOL_IMPORT;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @pre The symbol _must not_ already be in the table.
|
||||||
|
*/
|
||||||
|
error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
|
||||||
|
ast_node_t *node) {
|
||||||
|
if (table->len >= table->cap) {
|
||||||
|
error_t *err = symbol_table_grow_cap(table);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
table->symbols[table->len] = (symbol_t){
|
||||||
|
.name = name,
|
||||||
|
.kind = kind,
|
||||||
|
.node = node,
|
||||||
|
};
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node) {
|
||||||
|
char *name;
|
||||||
|
symbol_kind_t kind;
|
||||||
|
error_t *err = symbol_table_get_node_info(node, &kind, &name);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
symbol_t *symbol = symbol_table_lookup(table, name);
|
||||||
|
if (!symbol)
|
||||||
|
return symbol_table_add(table, name, kind, node);
|
||||||
|
if (symbol_table_should_error(symbol->kind, kind))
|
||||||
|
return err_symbol_table_incompatible_symbols;
|
||||||
|
if (symbol_table_should_update(symbol->kind, kind)) {
|
||||||
|
symbol->name = name;
|
||||||
|
symbol->kind = kind;
|
||||||
|
symbol->node = node;
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name) {
|
||||||
|
for (size_t i = 0; i < table->len; ++i) {
|
||||||
|
if (strcmp(table->symbols[i].name, name) == 0)
|
||||||
|
return &table->symbols[i];
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
42
src/encoder/symbols.h
Normal file
42
src/encoder/symbols.h
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#ifndef INCLUDE_ENCODER_SYMBOLS_H_
|
||||||
|
#define INCLUDE_ENCODER_SYMBOLS_H_
|
||||||
|
|
||||||
|
#include "../ast.h"
|
||||||
|
|
||||||
|
typedef enum symbol_kind {
|
||||||
|
SYMBOL_REFERENCE,
|
||||||
|
SYMBOL_LOCAL,
|
||||||
|
SYMBOL_EXPORT,
|
||||||
|
SYMBOL_IMPORT,
|
||||||
|
} symbol_kind_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represent a symbol in the program
|
||||||
|
*
|
||||||
|
* Symbols with the same name can only be in the table once. IMPORT or EXPORT
|
||||||
|
* symbols take precedence over REFERENCE symbols. If any reference symbols
|
||||||
|
* remain after the first encoding pass this indicates an error. Trying to add
|
||||||
|
* an IMPORT or EXPORT symbol if the same name already exists as the other kind
|
||||||
|
* is an error.
|
||||||
|
*
|
||||||
|
* This symbol table never taken ownership of the name string, it's lifted
|
||||||
|
* straight from the node->token.value.
|
||||||
|
*/
|
||||||
|
typedef struct symbol {
|
||||||
|
char *name;
|
||||||
|
symbol_kind_t kind;
|
||||||
|
ast_node_t *node;
|
||||||
|
} symbol_t;
|
||||||
|
|
||||||
|
typedef struct symbol_table {
|
||||||
|
size_t cap;
|
||||||
|
size_t len;
|
||||||
|
symbol_t *symbols;
|
||||||
|
} symbol_table_t;
|
||||||
|
|
||||||
|
error_t *symbol_table_alloc(symbol_table_t **table);
|
||||||
|
void symbol_table_free(symbol_table_t *table);
|
||||||
|
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node);
|
||||||
|
symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name);
|
||||||
|
|
||||||
|
#endif // INCLUDE_ENCODER_SYMBOLS_H_
|
43
src/main.c
43
src/main.c
@ -32,21 +32,22 @@ void print_text(tokenlist_t *list) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_ast(tokenlist_t *list) {
|
error_t *print_ast(tokenlist_t *list) {
|
||||||
parse_result_t result = parse(list->head);
|
parse_result_t result = parse(list->head);
|
||||||
if (result.err) {
|
if (result.err)
|
||||||
puts(result.err->message);
|
return result.err;
|
||||||
error_free(result.err);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
ast_node_print(result.node);
|
ast_node_print(result.node);
|
||||||
|
|
||||||
if (result.next != nullptr) {
|
if (result.next != nullptr) {
|
||||||
puts("First unparsed token:");
|
puts("First unparsed token:");
|
||||||
lexer_token_print(&result.next->token);
|
lexer_token_print(&result.next->token);
|
||||||
}
|
}
|
||||||
|
|
||||||
ast_node_free(result.node);
|
ast_node_free(result.node);
|
||||||
|
if (result.next != nullptr) {
|
||||||
|
return errorf("did not parse entire input token stream");
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
int get_execution_mode(int argc, char *argv[]) {
|
int get_execution_mode(int argc, char *argv[]) {
|
||||||
@ -63,6 +64,20 @@ int get_execution_mode(int argc, char *argv[]) {
|
|||||||
return MODE_AST;
|
return MODE_AST;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
error_t *do_action(mode_t mode, tokenlist_t *list) {
|
||||||
|
switch (mode) {
|
||||||
|
case MODE_TOKENS:
|
||||||
|
print_tokens(list);
|
||||||
|
return nullptr;
|
||||||
|
case MODE_TEXT:
|
||||||
|
print_text(list);
|
||||||
|
return nullptr;
|
||||||
|
case MODE_AST:
|
||||||
|
return print_ast(list);
|
||||||
|
}
|
||||||
|
__builtin_unreachable();
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
mode_t mode = get_execution_mode(argc, argv);
|
mode_t mode = get_execution_mode(argc, argv);
|
||||||
char *filename = argv[2];
|
char *filename = argv[2];
|
||||||
@ -81,17 +96,9 @@ int main(int argc, char *argv[]) {
|
|||||||
if (err)
|
if (err)
|
||||||
goto cleanup_tokens;
|
goto cleanup_tokens;
|
||||||
|
|
||||||
switch (mode) {
|
err = do_action(mode, list);
|
||||||
case MODE_TOKENS:
|
if (err)
|
||||||
print_tokens(list);
|
goto cleanup_tokens;
|
||||||
break;
|
|
||||||
case MODE_TEXT:
|
|
||||||
print_text(list);
|
|
||||||
break;
|
|
||||||
case MODE_AST:
|
|
||||||
print_ast(list);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
tokenlist_free(list);
|
tokenlist_free(list);
|
||||||
error_free(err);
|
error_free(err);
|
||||||
|
@ -119,8 +119,24 @@ parse_result_t parse_section_directive(tokenlist_entry_t *current) {
|
|||||||
return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers);
|
return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parse_result_t parse_import_directive(tokenlist_entry_t *current) {
|
||||||
|
parser_t parsers[] = {parse_import, parse_identifier, nullptr};
|
||||||
|
return parse_consecutive(current, NODE_IMPORT_DIRECTIVE, parsers);
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_result_t parse_export_directive(tokenlist_entry_t *current) {
|
||||||
|
parser_t parsers[] = {parse_export, parse_identifier, nullptr};
|
||||||
|
return parse_consecutive(current, NODE_EXPORT_DIRECTIVE, parsers);
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_result_t parse_directive_options(tokenlist_entry_t *current) {
|
||||||
|
parser_t parsers[] = {parse_section_directive, parse_import_directive,
|
||||||
|
parse_export_directive, nullptr};
|
||||||
|
return parse_any(current, parsers);
|
||||||
|
}
|
||||||
|
|
||||||
parse_result_t parse_directive(tokenlist_entry_t *current) {
|
parse_result_t parse_directive(tokenlist_entry_t *current) {
|
||||||
parser_t parsers[] = {parse_dot, parse_section_directive, nullptr};
|
parser_t parsers[] = {parse_dot, parse_directive_options, nullptr};
|
||||||
return parse_consecutive(current, NODE_DIRECTIVE, parsers);
|
return parse_consecutive(current, NODE_DIRECTIVE, parsers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,3 +101,19 @@ parse_result_t parse_section(tokenlist_entry_t *current) {
|
|||||||
return parse_token(current, TOKEN_IDENTIFIER, NODE_SECTION,
|
return parse_token(current, TOKEN_IDENTIFIER, NODE_SECTION,
|
||||||
is_section_token);
|
is_section_token);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_import_token(lexer_token_t *token) {
|
||||||
|
return strcmp(token->value, "import") == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_result_t parse_import(tokenlist_entry_t *current) {
|
||||||
|
return parse_token(current, TOKEN_IDENTIFIER, NODE_IMPORT, is_import_token);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_export_token(lexer_token_t *token) {
|
||||||
|
return strcmp(token->value, "export") == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_result_t parse_export(tokenlist_entry_t *current) {
|
||||||
|
return parse_token(current, TOKEN_IDENTIFIER, NODE_EXPORT, is_export_token);
|
||||||
|
}
|
||||||
|
@ -26,5 +26,7 @@ parse_result_t parse_label_reference(tokenlist_entry_t *current);
|
|||||||
*/
|
*/
|
||||||
parse_result_t parse_register(tokenlist_entry_t *current);
|
parse_result_t parse_register(tokenlist_entry_t *current);
|
||||||
parse_result_t parse_section(tokenlist_entry_t *current);
|
parse_result_t parse_section(tokenlist_entry_t *current);
|
||||||
|
parse_result_t parse_import(tokenlist_entry_t *current);
|
||||||
|
parse_result_t parse_export(tokenlist_entry_t *current);
|
||||||
|
|
||||||
#endif // INCLUDE_PARSER_PRIMITIVES_H_
|
#endif // INCLUDE_PARSER_PRIMITIVES_H_
|
||||||
|
@ -2,6 +2,9 @@
|
|||||||
|
|
||||||
; Small valid code snippet that should contain all different AST nodes
|
; Small valid code snippet that should contain all different AST nodes
|
||||||
|
|
||||||
|
.export _start
|
||||||
|
.import exit
|
||||||
|
|
||||||
_start:
|
_start:
|
||||||
mov eax, ebx
|
mov eax, ebx
|
||||||
lea eax, [eax + ebx * 4 + 8]
|
lea eax, [eax + ebx * 4 + 8]
|
||||||
@ -19,3 +22,5 @@ _start:
|
|||||||
push 0xffff:64
|
push 0xffff:64
|
||||||
push 0o777:16
|
push 0o777:16
|
||||||
push 0b0001:16
|
push 0b0001:16
|
||||||
|
mov rax, 0
|
||||||
|
call exit
|
||||||
|
Loading…
x
Reference in New Issue
Block a user