From d3141e764c216aa66d9485ac8c137548fcba3a90 Mon Sep 17 00:00:00 2001
From: omicron <omicron.me@protonmail.com>
Date: Thu, 3 Apr 2025 03:43:56 +0200
Subject: [PATCH] initial symbol table implementation

---
 src/encoder/symbols.c | 149 ++++++++++++++++++++++++++++++++++++++++++
 src/encoder/symbols.h |  42 ++++++++++++
 2 files changed, 191 insertions(+)
 create mode 100644 src/encoder/symbols.c
 create mode 100644 src/encoder/symbols.h

diff --git a/src/encoder/symbols.c b/src/encoder/symbols.c
new file mode 100644
index 0000000..413d259
--- /dev/null
+++ b/src/encoder/symbols.c
@@ -0,0 +1,149 @@
+#include "symbols.h"
+#include "../error.h"
+#include <stdlib.h>
+#include <string.h>
+
+constexpr size_t symbol_table_default_cap = 64;
+constexpr size_t symbol_table_max_cap = 1 << 16;
+
+error_t *err_symbol_table_invalid_node = &(error_t){
+    .message = "Unexpected node id when adding symbol to symbol table"};
+error_t *err_symbol_table_max_cap = &(error_t){
+    .message = "Failed to increase symbol table length, max capacity reached"};
+error_t *err_symbol_table_incompatible_symbols =
+    &(error_t){.message = "Failed to update symbol with incompatible kind"};
+
+error_t *symbol_table_alloc(symbol_table_t **output) {
+    *output = nullptr;
+
+    symbol_table_t *table = calloc(1, sizeof(symbol_table_t));
+    if (table == nullptr)
+        return err_allocation_failed;
+
+    table->symbols = calloc(symbol_table_default_cap, sizeof(symbol_t));
+    if (table->symbols == nullptr) {
+        free(table);
+        return err_allocation_failed;
+    }
+
+    table->cap = symbol_table_default_cap;
+    table->len = 0;
+
+    *output = table;
+    return nullptr;
+}
+
+void symbol_table_free(symbol_table_t *table) {
+    free(table->symbols);
+    free(table);
+}
+
+error_t *symbol_table_grow_cap(symbol_table_t *table) {
+    if (table->cap >= symbol_table_max_cap)
+        return err_symbol_table_max_cap;
+
+    size_t new_cap = table->cap * 2;
+    symbol_t *new_symbols = realloc(table->symbols, new_cap * sizeof(symbol_t));
+    if (new_symbols == nullptr)
+        return err_allocation_failed;
+
+    table->symbols = new_symbols;
+    table->cap = new_cap;
+
+    return nullptr;
+}
+
+error_t *symbol_table_get_node_info(ast_node_t *node, symbol_kind_t *kind,
+                                    char **name) {
+    switch (node->id) {
+    case NODE_LABEL:
+        *kind = SYMBOL_LOCAL;
+        *name = node->children[0]->token_entry->token.value;
+        break;
+    case NODE_LABEL_REFERENCE:
+        *kind = SYMBOL_REFERENCE;
+        *name = node->token_entry->token.value;
+        break;
+    // TODO: when .import and .export directives are created add them here
+    default:
+        return err_symbol_table_invalid_node;
+    }
+    __builtin_unreachable();
+}
+
+/*
+old  \  new  | REFERENCE | LOCAL    | IMPORT   | EXPORT   |
+-------------|-----------|----------|----------|----------|
+REFERENCE    |           | replace  | replace  | replace  |
+-------------|-----------|----------|----------|----------|
+LOCAL        |           |          |   ERR    | replace  |
+-------------|-----------|----------|----------|----------|
+IMPORT       |           |          |          |   ERR    |
+-------------|-----------|----------|----------|----------|
+EXPORT       |           |          |   ERR    |          |
+-------------|-----------|----------|----------|----------|
+*/
+
+bool symbol_table_should_update(symbol_kind_t old, symbol_kind_t new) {
+    if (old == SYMBOL_REFERENCE)
+        return new != SYMBOL_REFERENCE;
+    if (old == SYMBOL_LOCAL)
+        return new == SYMBOL_EXPORT;
+    return false;
+}
+
+bool symbol_table_should_error(symbol_kind_t old, symbol_kind_t new) {
+    if (new == SYMBOL_IMPORT)
+        return old == SYMBOL_LOCAL || old == SYMBOL_EXPORT;
+    if (new == SYMBOL_EXPORT)
+        return old == SYMBOL_IMPORT;
+    return false;
+}
+
+/**
+ * @pre The symbol _must not_ already be in the table.
+ */
+error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
+                          ast_node_t *node) {
+    if (table->len >= table->cap) {
+        error_t *err = symbol_table_grow_cap(table);
+        if (err)
+            return err;
+    }
+
+    table->symbols[table->len] = (symbol_t){
+        .name = name,
+        .kind = kind,
+        .node = node,
+    };
+
+    return nullptr;
+}
+
+error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node) {
+    char *name;
+    symbol_kind_t kind;
+    error_t *err = symbol_table_get_node_info(node, &kind, &name);
+    if (err)
+        return err;
+
+    symbol_t *symbol = symbol_table_lookup(table, name);
+    if (!symbol)
+        return symbol_table_add(table, name, kind, node);
+    if (symbol_table_should_error(symbol->kind, kind))
+        return err_symbol_table_incompatible_symbols;
+    if (symbol_table_should_update(symbol->kind, kind)) {
+        symbol->name = name;
+        symbol->kind = kind;
+        symbol->node = node;
+    }
+    return nullptr;
+}
+
+symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name) {
+    for (size_t i = 0; i < table->len; ++i) {
+        if (strcmp(table->symbols[i].name, name) == 0)
+            return &table->symbols[i];
+    }
+    return nullptr;
+}
diff --git a/src/encoder/symbols.h b/src/encoder/symbols.h
new file mode 100644
index 0000000..344d601
--- /dev/null
+++ b/src/encoder/symbols.h
@@ -0,0 +1,42 @@
+#ifndef INCLUDE_ENCODER_SYMBOLS_H_
+#define INCLUDE_ENCODER_SYMBOLS_H_
+
+#include "../ast.h"
+
+typedef enum symbol_kind {
+    SYMBOL_REFERENCE,
+    SYMBOL_LOCAL,
+    SYMBOL_EXPORT,
+    SYMBOL_IMPORT,
+} symbol_kind_t;
+
+/**
+ * Represent a symbol in the program
+ *
+ * Symbols with the same name can only be in the table once. IMPORT or EXPORT
+ * symbols take precedence over REFERENCE symbols. If any reference symbols
+ * remain after the first encoding pass this indicates an error. Trying to add
+ * an IMPORT or EXPORT symbol if the same name already exists as the other kind
+ * is an error.
+ *
+ * This symbol table never taken ownership of the name string, it's lifted
+ * straight from the node->token.value.
+ */
+typedef struct symbol {
+    char *name;
+    symbol_kind_t kind;
+    ast_node_t *node;
+} symbol_t;
+
+typedef struct symbol_table {
+    size_t cap;
+    size_t len;
+    symbol_t *symbols;
+} symbol_table_t;
+
+error_t *symbol_table_alloc(symbol_table_t **table);
+void symbol_table_free(symbol_table_t *table);
+error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node);
+symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name);
+
+#endif // INCLUDE_ENCODER_SYMBOLS_H_