Compare commits

..

1 Commits

Author SHA1 Message Date
b4301ed650 Add a parser grammar
All checks were successful
Validate the build / validate-build (push) Successful in 24s
Currently this is a subset of the grammar, enough to get reasonable work
going.
2025-03-31 14:47:58 +02:00
56 changed files with 132 additions and 7261 deletions

View File

@ -1,2 +1,2 @@
CompileFlags:
Add: ["-std=c23", "-x", "c", "-D_POSIX_C_SOURCE=200809L"]
Add: ["-std=c23", "-x", "c"]

View File

@ -16,10 +16,8 @@ jobs:
echo "http://dl-cdn.alpinelinux.org/alpine/edge/main" >> /etc/apk/repositories
echo "http://dl-cdn.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories
# determine correct clang version and then install it
apk update
RT_VERSION=$(apk search -v compiler-rt | grep -o "compiler-rt-[0-9]*" | head -1 | grep -o "[0-9]*")
apk add --no-cache llvm${RT_VERSION} clang${RT_VERSION} clang${RT_VERSION}-analyzer compiler-rt valgrind
apk add --no-cache llvm19 clang19 clang19-analyzer compiler-rt valgrind
# Verify versions
echo "---------------------"
@ -36,7 +34,3 @@ jobs:
- name: make validate
run: |
make validate
- name: make test
run: |
make test

4
.gitignore vendored
View File

@ -1,5 +1,7 @@
*.o
*.d
/core
/build
/oas
/oas-asan
/oas-msan
/reports

View File

@ -1,46 +1,54 @@
.PHONY: all clean distclean release debug afl asan msan validate analyze fuzz
.PHONY: all clean clean-objects clean-reports run sanitize validate fuzz
debug:
make -rRf make/debug.mk all
CC=clang
LD=clang
CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L
LDFLAGS?=
all: debug release afl asan msan
SOURCES = $(shell find src/ -type f -name '*.c')
OBJECTS = $(SOURCES:.c=.o)
DEPENDENCIES = $(SOURCES:.c=.d)
TARGET?=oas
OUTPUTS=oas oas-asan oas-msan oas-afl
RUNARGUMENTS?=-tokens tests/input/valid.asm
all: $(TARGET)
release:
make -rRf make/release.mk all
afl:
make -rRf make/afl.mk all
run: $(TARGET)
./$(TARGET) $(RUNARGUMENTS)
fuzz:
make -rRf make/afl.mk fuzz
make CC="afl-clang-fast" LD="afl-clang-fast" TARGET="oas-afl" clean-objects all
make clean-objects
mkdir -p reports/afl
afl-fuzz -i tests/input -o reports/afl -m none -- ./oas-afl -tokens @@
asan:
make -rRf make/asan.mk all
sanitize:
make CFLAGS="$(CFLAGS) -fsanitize=address,undefined" \
LDFLAGS="-fsanitize=address,undefined" \
TARGET="oas-asan" clean-objects all
make CFLAGS="$(CFLAGS) -fsanitize=memory -fsanitize-memory-track-origins=2" \
LDFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2" \
TARGET="oas-msan" clean-objects all
make clean-objects
msan:
make -rRf make/msan.mk all
validate: asan msan debug release
validate:
./validate.sh
analyze:
make -rRf make/analyze.mk clean all
$(TARGET): $(OBJECTS)
$(LD) $(LDFLAGS) -o $@ $^
test:
make -rRf make/test.mk test
%.o: %.c
$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
clean:
make -rRf make/release.mk clean
make -rRf make/debug.mk clean
make -rRf make/afl.mk clean
make -rRf make/msan.mk clean
make -rRf make/asan.mk clean
make -rRf make/analyze.mk clean
make -rRf make/test.mk clean
rm -rf build/
-include $(DEPENDENCIES)
distclean: clean
make -rRf make/afl.mk distclean
make -rRf make/analyze.mk distclean
clean-objects:
rm -f $(OBJECTS) $(DEPENDENCIES)
clean-reports:
rm -rf reports/
clean: clean-objects
rm -f $(TARGET) $(OUTPUTS)

View File

@ -1,29 +0,0 @@
# Building
To build oas in the default configuration you just need (gnu) make and a
sufficiently modern clang.
```
make
```
## Make targets
There are a number of make targets available to build various instrumented
builds that are used in validation, analysis and sanitizing. Some of these may
require extra dependencies.
- `debug`: Creates the debug build in `build/debug`. This is the default target.
- `all`: Builds all binary executable targets. These are
`debug`, `release`, `msan`, `asan` and `afl`. All executables can be found
in `build/` in a subdirectory matching their target names.
- `release`: Creates the release build in `build/release`
- `afl`: Creates a build with AFL++ instrumentation for fuzzing
- `fuzz`: Starts the fuzzer with the instrumented afl executable
- `asan`: builds with the address and undefined clang sanitizers
- `msan`: builds with the memory clang sanitizer
- `validate`: Builds `debug`, `msan`, and `asan` targets, then runs the
validation script. This script executes the sanitizer targets and runs
Valgrind on the debug target across multiple modes and test input files.

View File

@ -1,24 +1,24 @@
/* string literals are lexer identifier tokens with that particular value */
<program> ::= <statement>*
<statement> ::= <label> | <directive> | <instruction> | <newline>
<statement> ::= ( <label> | <directive> | <instruction> ) <newline>
<label> ::= <identifier> <colon>
<directive> ::= <dot> (<section_directive> | <export_directive> | <import_directive> ) <newline>
<directive> ::= <dot> <section>
<section_directive> ::= "section" <identifier>
<section> ::= "section" <identifier>
<export_directive> ::= "export" <identifier>
<instruction> ::= <identifier> <operands>
<import_directive> ::= "import" <identifier>
<instruction> ::= <identifier> <operands> <newline>
<operands> ::= <operand> ( <comma> <operand> )*
<operands> ::= <operand> ( <comma> <operands> )*
<operand> ::= <register> | <immediate> | <memory>
<immediate> ::= <number> | <label_reference>
<register> ::= <register_base> | <register_extra>
<register_base> ::= "rax" | "rbx" | "rcx" | "rdx" | "rsi" | "rdi" | "rbp" | "rsp"
<register_extra> ::= "r8" | "r9" | "r10" | "r11" | "r12" | "r13" | "r14" | "r15"
<immediate> ::= <number> | <label_reference>
<number> ::= <octal> | <binary> | <decimal> | <hexadecimal>
<label_reference> ::= <identifier>
@ -27,17 +27,10 @@
<memory_expression> ::= <label_reference> | <register_expression>
<register_expression> ::= <register> <register_index>? <register_offset>?
<register_expression> ::= <register> ( <plus> <register> <asterisk> <number> )? ( <plus_or_minus> <number> )?
<register_index> ::= <plus> <register> <asterisk> <number>
<register_displacement> ::= <plus> <register> <asterisk> <number>
<register_offset> ::= <plus_or_minus> <number>
<plus_or_minus> ::= <plus> | <minus>
/* These are lexer identifiers with the correct string value */
<section> ::= "section"
<register> ::= "rax" | "rbx" | "rcx" | "rdx" | "rsi" | "rdi" | "rbp" | "rsp" |
"r8" | "r9" | "r10" | "r11" | "r12" | "r13" | "r14" | "r15"

View File

@ -1,14 +0,0 @@
.PHONY: fuzz distclean
CC=afl-clang-fast
LD=afl-clang-fast
BUILD_DIR=build/afl/
-include make/base.mk
fuzz: $(BUILD_DIR)$(TARGET)
mkdir -p reports/afl
afl-fuzz -i tests/input -o reports/afl -m none -- ./$< -tokens @@
distclean: clean
rm -rf reports/afl

View File

@ -1,9 +0,0 @@
BUILD_DIR=build/analyze/
-include make/base.mk
analyze:
mkdir -p reports/static-analysis
scan-build -o reports/static-analysis/ -plist-html --status-bugs make -rRf make/analyze.mk all
distclean: clean
rm -rf reports/static-analysis

View File

@ -1,5 +0,0 @@
CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fsanitize=address,undefined
LDFLAGS=-fsanitize=address,undefined
BUILD_DIR=build/asan/
-include make/base.mk

View File

@ -1,27 +0,0 @@
.PHONY: all clean
CC?=clang
LD?=clang
CFLAGS?=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L
LDFLAGS?=
BUILD_DIR?=build/debug/
SOURCES?=$(shell find src/ -type f -name '*.c')
OBJECTS=$(patsubst %.c,$(BUILD_DIR)%.o,$(SOURCES))
DEPENDENCIES=$(OBJECTS:.o=.d)
TARGET?=oas
all: $(BUILD_DIR)$(TARGET)
$(BUILD_DIR)$(TARGET): $(OBJECTS)
$(LD) $(LDFLAGS) -o $@ $^
$(BUILD_DIR)%.o: %.c
mkdir -p $(dir $@)
$(CC) $(CFLAGS) -MMD -MP -c $< -o $@
-include $(DEPENDENCIES)
clean:
rm -rf $(BUILD_DIR)

View File

@ -1 +0,0 @@
-include make/base.mk

View File

@ -1,5 +0,0 @@
CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fsanitize=memory
LDFLAGS=-fsanitize=memory
BUILD_DIR=build/msan/
-include make/base.mk

View File

@ -1,5 +0,0 @@
CFLAGS?=-Wall -Wextra -Wpedantic -Werror -O2 -std=c23 -flto -fomit-frame-pointer -DNDEBUG -D_POSIX_C_SOURCE=200809L
LDFLAGS?=-flto -s -Wl,--gc-sections
BUILD_DIR?=build/release/
-include make/base.mk

View File

@ -1,21 +0,0 @@
.PHONY: test
CFLAGS?=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fprofile-instr-generate -fcoverage-mapping
LDFLAGS?=-fprofile-instr-generate
BUILD_DIR=build/test/
TARGET=oas-tests
SOURCES = $(filter-out src/main.c, $(shell find src/ tests/ -type f -name '*.c'))
-include make/base.mk
test: $(BUILD_DIR)$(TARGET)
mkdir -p reports/coverage
LLVM_PROFILE_FILE="reports/coverage/tests.profraw" $(BUILD_DIR)$(TARGET)
llvm-profdata merge -sparse reports/coverage/tests.profraw -o reports/coverage/tests.profdata
llvm-cov show $(BUILD_DIR)$(TARGET) -instr-profile=reports/coverage/tests.profdata -format=html -output-dir=reports/coverage/html -ignore-filename-regex="tests/.*"
@echo "--"
@echo "Test coverage:"
@echo "file://$$(realpath reports/coverage/html/index.html)"
@echo "--"
clean:
rm -rf reports/coverage

207
src/ast.c
View File

@ -1,207 +0,0 @@
#include "ast.h"
#include "error.h"
#include <assert.h>
#include <string.h>
error_t *const err_ast_children_cap = &(error_t){
.message = "Failed to increase ast node children, max capacity reached"};
error_t *ast_node_alloc(ast_node_t **output) {
*output = nullptr;
ast_node_t *node = calloc(1, sizeof(ast_node_t));
if (node == nullptr)
return err_allocation_failed;
*output = node;
return nullptr;
}
void ast_node_free(ast_node_t *node) {
if (node == nullptr)
return;
if (node->children) {
for (size_t i = 0; i < node->len; ++i)
ast_node_free(node->children[i]);
free(node->children);
}
memset(node, 0, sizeof(ast_node_t));
free(node);
}
/**
* @pre node->children must be nullptr
*/
error_t *ast_node_alloc_children(ast_node_t *node) {
node->children = calloc(node_default_children_cap, sizeof(ast_node_t *));
if (node->children == nullptr)
return err_allocation_failed;
node->cap = node_default_children_cap;
return nullptr;
}
error_t *ast_node_grow_cap(ast_node_t *node) {
if (node->cap >= node_max_children_cap) {
return err_ast_children_cap;
}
size_t new_cap = node->cap * 2;
if (new_cap > node_max_children_cap) {
new_cap = node_max_children_cap;
}
ast_node_t **new_children =
realloc(node->children, new_cap * sizeof(ast_node_t *));
if (new_children == nullptr) {
return err_allocation_failed;
}
node->children = new_children;
node->cap = new_cap;
return nullptr;
}
error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child) {
error_t *err = nullptr;
if (node->children == nullptr)
err = ast_node_alloc_children(node);
else if (node->len >= node->cap)
err = ast_node_grow_cap(node);
if (err)
return err;
node->children[node->len] = child;
node->len += 1;
return nullptr;
}
const char *ast_node_id_to_cstr(node_id_t id) {
switch (id) {
case NODE_INVALID:
return "NODE_INVALID";
case NODE_PROGRAM:
return "NODE_PROGRAM";
case NODE_STATEMENT:
return "NODE_STATEMENT";
case NODE_LABEL:
return "NODE_LABEL";
case NODE_DIRECTIVE:
return "NODE_DIRECTIVE";
case NODE_INSTRUCTION:
return "NODE_INSTRUCTION";
case NODE_OPERANDS:
return "NODE_OPERANDS";
case NODE_OPERAND:
return "NODE_OPERAND";
case NODE_IMMEDIATE:
return "NODE_IMMEDIATE";
case NODE_MEMORY:
return "NODE_MEMORY";
case NODE_NUMBER:
return "NODE_NUMBER";
case NODE_LABEL_REFERENCE:
return "NODE_LABEL_REFERENCE";
case NODE_MEMORY_EXPRESSION:
return "NODE_MEMORY_EXPRESSION";
case NODE_REGISTER_EXPRESSION:
return "NODE_REGISTER_EXPRESSION";
case NODE_REGISTER_INDEX:
return "NODE_REGISTER_INDEX";
case NODE_REGISTER_OFFSET:
return "NODE_REGISTER_OFFSET";
case NODE_PLUS_OR_MINUS:
return "NODE_PLUS_OR_MINUS";
case NODE_SECTION_DIRECTIVE:
return "NODE_SECTION_DIRECTIVE";
case NODE_IMPORT_DIRECTIVE:
return "NODE_IMPORT_DIRECTIVE";
case NODE_EXPORT_DIRECTIVE:
return "NODE_EXPORT_DIRECTIVE";
case NODE_REGISTER:
return "NODE_REGISTER";
case NODE_SECTION:
return "NODE_SECTION";
case NODE_IDENTIFIER:
return "NODE_IDENTIFIER";
case NODE_DECIMAL:
return "NODE_DECIMAL";
case NODE_HEXADECIMAL:
return "NODE_HEXADECIMAL";
case NODE_OCTAL:
return "NODE_OCTAL";
case NODE_BINARY:
return "NODE_BINARY";
case NODE_CHAR:
return "NODE_CHAR";
case NODE_STRING:
return "NODE_STRING";
case NODE_COLON:
return "NODE_COLON";
case NODE_COMMA:
return "NODE_COMMA";
case NODE_LBRACKET:
return "NODE_LBRACKET";
case NODE_RBRACKET:
return "NODE_RBRACKET";
case NODE_PLUS:
return "NODE_PLUS";
case NODE_MINUS:
return "NODE_MINUS";
case NODE_ASTERISK:
return "NODE_ASTERISK";
case NODE_DOT:
return "NODE_DOT";
case NODE_NEWLINE:
return "NODE_NEWLINE";
case NODE_IMPORT:
return "NODE_IMPORT";
case NODE_EXPORT:
return "NODE_EXPORT";
}
assert(!"Unreachable, weird node id" && id);
__builtin_unreachable();
}
static void ast_node_print_internal(ast_node_t *node, int indent) {
if (node == NULL) {
return;
}
for (int i = 0; i < indent; i++) {
printf(" ");
}
printf("%s", ast_node_id_to_cstr(node->id));
if (node->token_entry && node->token_entry->token.value &&
node->id != NODE_NEWLINE) {
printf(" \"%s\"", node->token_entry->token.value);
}
printf("\n");
for (size_t i = 0; i < node->len; i++) {
ast_node_print_internal(node->children[i], indent + 1);
}
}
void ast_node_print(ast_node_t *node) {
ast_node_print_internal(node, 0);
}
void ast_node_prune(ast_node_t *node, node_id_t id) {
size_t new_len = 0;
for (size_t i = 0; i < node->len; i++) {
auto child = node->children[i];
if (child->id == id) {
ast_node_free(child);
continue;
}
ast_node_prune(child, id);
node->children[new_len] = child;
new_len++;
}
node->len = new_len;
}

198
src/ast.h
View File

@ -1,198 +0,0 @@
#ifndef INCLUDE_SRC_AST_H_
#define INCLUDE_SRC_AST_H_
#include "data/registers.h"
#include "error.h"
#include "lexer.h"
#include "tokenlist.h"
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
extern error_t *const err_ast_children_cap;
typedef enum node_id {
NODE_INVALID,
NODE_PROGRAM,
NODE_STATEMENT,
NODE_LABEL,
NODE_DIRECTIVE,
NODE_INSTRUCTION,
NODE_OPERANDS,
NODE_OPERAND,
NODE_IMMEDIATE,
NODE_MEMORY,
NODE_NUMBER,
NODE_LABEL_REFERENCE,
NODE_MEMORY_EXPRESSION,
NODE_REGISTER_EXPRESSION,
NODE_REGISTER_INDEX,
NODE_REGISTER_OFFSET,
NODE_PLUS_OR_MINUS,
NODE_SECTION_DIRECTIVE,
NODE_IMPORT_DIRECTIVE,
NODE_EXPORT_DIRECTIVE,
// Validated primitives
NODE_REGISTER,
NODE_SECTION,
NODE_IMPORT,
NODE_EXPORT,
// Primitive nodes
NODE_IDENTIFIER,
NODE_DECIMAL,
NODE_HEXADECIMAL,
NODE_OCTAL,
NODE_BINARY,
NODE_CHAR,
NODE_STRING,
NODE_COLON,
NODE_COMMA,
NODE_LBRACKET,
NODE_RBRACKET,
NODE_PLUS,
NODE_MINUS,
NODE_ASTERISK,
NODE_DOT,
NODE_NEWLINE,
} node_id_t;
typedef struct ast_node ast_node_t;
constexpr size_t node_default_children_cap = 8;
/* 65K ought to be enough for anybody */
constexpr size_t node_max_children_cap = 1 << 16;
typedef struct number {
uint64_t value;
operand_size_t size;
} number_t;
typedef struct register_ {
register_id_t id;
operand_size_t size;
} register_t;
typedef struct opcode_encoding {
uint8_t buffer[32];
size_t len;
} opcode_encoding_t;
typedef struct instruction {
bool has_reference;
opcode_encoding_t encoding;
int64_t address;
} instruction_t;
typedef struct reference {
int64_t offset;
int64_t address;
operand_size_t size;
} reference_t;
typedef struct {
int64_t address;
} label_t;
struct ast_node {
node_id_t id;
tokenlist_entry_t *token_entry;
size_t len;
size_t cap;
ast_node_t **children;
union {
register_t reg;
number_t number;
instruction_t instruction;
reference_t reference;
label_t label;
} value;
};
static inline register_t *ast_node_register_value(ast_node_t *node) {
assert(node->id == NODE_REGISTER);
return &node->value.reg;
}
static inline number_t *ast_node_number_value(ast_node_t *node) {
assert(node->id == NODE_NUMBER);
return &node->value.number;
}
static inline instruction_t *ast_node_instruction_value(ast_node_t *node) {
assert(node->id == NODE_INSTRUCTION);
return &node->value.instruction;
}
static inline reference_t *ast_node_reference_value(ast_node_t *node) {
assert(node->id == NODE_LABEL_REFERENCE);
return &node->value.reference;
}
static inline label_t *ast_node_label_value(ast_node_t *node) {
assert(node->id == NODE_LABEL);
return &node->value.label;
}
/**
* @brief Allocates a new AST node
*
* Creates and initializes a new AST node with default (zero) values.
*
* @param[out] output Pointer to store the allocated node
* @return error_t* nullptr on success, allocation error on failure
*/
error_t *ast_node_alloc(ast_node_t **node);
/**
* @brief Frees an AST node and all its children recursively
*
* Recursively frees all children of the node, then frees the node itself.
* If node is nullptr, the function returns without doing anything.
*
* @param node The node to free
*/
void ast_node_free(ast_node_t *node);
/**
* @brief Adds a child node to a parent node
*
* Adds the specified child node to the parent's children array.
* If this is the first child, the function allocates the children array.
* If the children array is full, the function increases its capacity.
*
* @param node The parent node to add the child to
* @param child The child node to add
* @return error_t* nullptr on success, allocation error on failure,
* or err_node_children_cap if maximum capacity is reached
*/
error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child);
/**
* @brief Prints an AST starting from the given node
*
* Prints a representation of the AST with indentation to show structure.
* Each node's type is shown, and if a node has an associated token value,
* that value is printed in quotes.
*
* @param node The root node of the AST to print
*/
void ast_node_print(ast_node_t *node);
/**
* Prune the children with a given id
*
* The tree is recursively visited and all child nodes of a given ID are pruned
* completely. If a node has the giver id, it will get removed along wih all its
* children, even if some of those children have different ids. The root node id
* is never checked so the tree is guaranteed to remain and allocated valid.
*
* @param node The root of the tree you want to prune
* @param id The id of the nodes you want to prune
*/
void ast_node_prune(ast_node_t *node, node_id_t id);
#endif // INCLUDE_SRC_AST_H_

View File

@ -1,6 +0,0 @@
#include "bytes.h"
#include "error.h"
error_t *const err_bytes_no_capacity = &(error_t){
.message = "Not enough capacity in bytes buffer",
};

View File

@ -1,60 +0,0 @@
#ifndef INCLUDE_SRC_BYTES_H_
#define INCLUDE_SRC_BYTES_H_
#include "error.h"
#include <stddef.h>
#include <stdint.h>
#include <string.h>
extern error_t *const err_bytes_no_capacity;
typedef struct bytes {
size_t len;
size_t cap;
uint8_t buffer[];
} bytes_t;
#define LOCAL_BYTES_ANONYMOUS(N) \
&(struct { \
size_t len; \
size_t cap; \
uint8_t buffer[(N)]; \
}) { \
0, (N), {} \
}
#define LOCAL_BYTES(N) (bytes_t *)LOCAL_BYTES_ANONYMOUS(N);
static inline error_t *bytes_append_uint8(bytes_t *bytes, uint8_t value) {
if (bytes->len >= bytes->cap)
return err_bytes_no_capacity;
bytes->buffer[bytes->len++] = value;
return nullptr;
}
static inline error_t *bytes_append_array(bytes_t *dst, size_t n,
uint8_t buffer[static n]) {
if (dst->len + n >= dst->cap)
return err_bytes_no_capacity;
memcpy(dst->buffer + dst->len, buffer, n);
dst->len += n;
return nullptr;
}
static inline error_t *bytes_append_bytes(bytes_t *dst, bytes_t *src) {
return bytes_append_array(dst, src->len, src->buffer);
}
static inline error_t *bytes_append_uint16(bytes_t *dst, uint16_t value) {
return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
}
static inline error_t *bytes_append_uint32(bytes_t *dst, uint32_t value) {
return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
}
static inline error_t *bytes_append_uint64(bytes_t *dst, uint64_t value) {
return bytes_append_array(dst, sizeof(value), (uint8_t *)&value);
}
#endif // INCLUDE_SRC_BYTES_H_

View File

@ -1,265 +0,0 @@
#include "opcodes.h"
// clang-format off
opcode_data_t *const opcodes[] = {
// RET
&(opcode_data_t) {
.mnemonic = "ret",
.opcode = 0xC3,
.opcode_extension = opcode_extension_none,
.operand_count = 0,
},
// RET imm16
&(opcode_data_t) {
.mnemonic = "ret",
.opcode = 0xC2,
.opcode_extension = opcode_extension_none,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16 },
},
},
// PUSH imm8
&(opcode_data_t) {
.mnemonic = "push",
.opcode = 0x6A,
.opcode_extension = opcode_extension_none,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_8},
},
},
// PUSH imm16
&(opcode_data_t) {
.mnemonic = "push",
.opcode = 0x68,
.opcode_extension = opcode_extension_none,
.operand_size_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16},
},
},
// PUSH imm32
&(opcode_data_t) {
.mnemonic = "push",
.opcode = 0x68,
.opcode_extension = opcode_extension_none,
.operand_size_prefix = false,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32},
},
},
// PUSH reg16,
&(opcode_data_t) {
.mnemonic = "push",
.opcode = 0x50,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_OPCODE_REGISTER,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
},
},
// PUSH reg64
&(opcode_data_t) {
.mnemonic = "push",
.opcode = 0x50,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_OPCODE_REGISTER,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
// NOT reg16
&(opcode_data_t) {
.mnemonic = "not",
.opcode = 0xF7,
.opcode_extension = 2,
.operand_size_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
},
},
// NOT reg32
&(opcode_data_t) {
.mnemonic = "not",
.opcode = 0xF7,
.opcode_extension = 2,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
},
},
// NOT reg64
&(opcode_data_t) {
.mnemonic = "not",
.opcode = 0xF7,
.opcode_extension = 2,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
// NEG reg16
&(opcode_data_t) {
.mnemonic = "neg",
.opcode = 0xF7,
.opcode_extension = 3,
.operand_size_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
},
},
// NEG reg32
&(opcode_data_t) {
.mnemonic = "neg",
.opcode = 0xF7,
.opcode_extension = 3,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
},
},
// NEG reg64
&(opcode_data_t) {
.mnemonic = "neg",
.opcode = 0xF7,
.opcode_extension = 3,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
// CALL rel32
&(opcode_data_t) {
.mnemonic = "call",
.opcode = 0xE8,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_DEFAULT,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32 },
},
},
// CALL reg64
&(opcode_data_t) {
.mnemonic = "call",
.opcode = 0xFF,
.opcode_extension = 2,
.encoding_class = ENCODING_DEFAULT,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
// CALL mem64
&(opcode_data_t) {
.mnemonic = "call",
.opcode = 0xFF,
.opcode_extension = 2,
.encoding_class = ENCODING_DEFAULT,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_MEMORY, .size = OPERAND_SIZE_64 },
},
},
// JMP rel8 (short jump)
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xEB,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_DEFAULT,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_8 },
},
},
// JMP rel16
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xE9,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_DEFAULT,
.operand_size_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16 },
},
},
// JMP reg16
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xFF,
.opcode_extension = 4,
.encoding_class = ENCODING_DEFAULT,
.operand_size_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 },
},
},
// JMP rel32 (near jump)
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xE9,
.opcode_extension = opcode_extension_none,
.encoding_class = ENCODING_DEFAULT,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32 },
},
},
// JMP reg32
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xFF,
.opcode_extension = 4,
.encoding_class = ENCODING_DEFAULT,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 },
},
},
// JMP reg64
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xFF,
.opcode_extension = 4,
.encoding_class = ENCODING_DEFAULT,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 },
},
},
// JMP mem64
&(opcode_data_t) {
.mnemonic = "jmp",
.opcode = 0xFF,
.opcode_extension = 4,
.encoding_class = ENCODING_DEFAULT,
.rex_w_prefix = true,
.operand_count = 1,
.operands = {
{ .kind = OPERAND_MEMORY, .size = OPERAND_SIZE_64 },
},
},
nullptr,
};

View File

@ -1,56 +0,0 @@
#ifndef INCLUDE_DATA_OPCODES_H_
#define INCLUDE_DATA_OPCODES_H_
#include "../data/registers.h"
#include <stddef.h>
#include <stdint.h>
constexpr uint8_t rex_prefix = 0x40;
constexpr uint8_t rex_prefix_w = 0x48;
constexpr uint8_t rex_prefix_r = 0x44;
constexpr uint8_t rex_prefix_x = 0x42;
constexpr uint8_t rex_prefix_b = 0x41;
constexpr uint8_t operand_size_prefix = 0x66;
constexpr uint8_t memory_size_prefix = 0x67;
constexpr uint8_t lock_prefix = 0xF0;
constexpr uint8_t repne_prefix = 0xF2;
constexpr uint8_t rep_prefix = 0xF3;
typedef enum encoding_class {
ENCODING_DEFAULT, // use modrm+sib for registers and memory, append
// immediates
ENCODING_OPCODE_REGISTER, // encode the register in the last 3 bits of the
// opcode
} encoding_class_t;
typedef enum operand_kind {
OPERAND_REGISTER,
OPERAND_MEMORY,
OPERAND_IMMEDIATE,
} operand_kind_t;
typedef struct operand_info {
operand_kind_t kind;
operand_size_t size;
} operand_info_t;
constexpr uint8_t opcode_extension_none = 0xFF;
typedef struct opcode_data {
const char *mnemonic;
uint16_t opcode;
uint8_t opcode_extension; // 3 bits for the opcode extension in the reg
// field of a modr/m byte
encoding_class_t encoding_class;
bool operand_size_prefix;
bool address_size_prefix;
bool rex_w_prefix;
size_t operand_count;
operand_info_t operands[3];
} opcode_data_t;
extern opcode_data_t *const opcodes[];
#endif // INCLUDE_DATA_OPCODES_H_

View File

@ -1,92 +0,0 @@
#include "registers.h"
register_data_t *const registers[] = {
// Instruction pointer
&(register_data_t){"rip", REG_RIP, OPERAND_SIZE_64},
&(register_data_t){"eip", REG_RIP, OPERAND_SIZE_32},
&(register_data_t){"ip", REG_RIP, OPERAND_SIZE_16},
// 64-bit general purpose registers
&(register_data_t){"rax", REG_A, OPERAND_SIZE_64},
&(register_data_t){"rcx", REG_C, OPERAND_SIZE_64},
&(register_data_t){"rdx", REG_D, OPERAND_SIZE_64},
&(register_data_t){"rbx", REG_B, OPERAND_SIZE_64},
&(register_data_t){"rsp", REG_SP, OPERAND_SIZE_64},
&(register_data_t){"rbp", REG_BP, OPERAND_SIZE_64},
&(register_data_t){"rsi", REG_SI, OPERAND_SIZE_64},
&(register_data_t){"rdi", REG_DI, OPERAND_SIZE_64},
&(register_data_t){"r8", REG_8, OPERAND_SIZE_64},
&(register_data_t){"r9", REG_9, OPERAND_SIZE_64},
&(register_data_t){"r10", REG_10, OPERAND_SIZE_64},
&(register_data_t){"r11", REG_11, OPERAND_SIZE_64},
&(register_data_t){"r12", REG_12, OPERAND_SIZE_64},
&(register_data_t){"r13", REG_13, OPERAND_SIZE_64},
&(register_data_t){"r14", REG_14, OPERAND_SIZE_64},
&(register_data_t){"r15", REG_15, OPERAND_SIZE_64},
// 32-bit general purpose registers
&(register_data_t){"eax", REG_A, OPERAND_SIZE_32},
&(register_data_t){"ecx", REG_C, OPERAND_SIZE_32},
&(register_data_t){"edx", REG_D, OPERAND_SIZE_32},
&(register_data_t){"ebx", REG_B, OPERAND_SIZE_32},
&(register_data_t){"esp", REG_SP, OPERAND_SIZE_32},
&(register_data_t){"ebp", REG_BP, OPERAND_SIZE_32},
&(register_data_t){"esi", REG_SI, OPERAND_SIZE_32},
&(register_data_t){"edi", REG_DI, OPERAND_SIZE_32},
&(register_data_t){"r8d", REG_8, OPERAND_SIZE_32},
&(register_data_t){"r9d", REG_9, OPERAND_SIZE_32},
&(register_data_t){"r10d", REG_10, OPERAND_SIZE_32},
&(register_data_t){"r11d", REG_11, OPERAND_SIZE_32},
&(register_data_t){"r12d", REG_12, OPERAND_SIZE_32},
&(register_data_t){"r13d", REG_13, OPERAND_SIZE_32},
&(register_data_t){"r14d", REG_14, OPERAND_SIZE_32},
&(register_data_t){"r15d", REG_15, OPERAND_SIZE_32},
// 16-bit general purpose registers
&(register_data_t){"ax", REG_A, OPERAND_SIZE_16},
&(register_data_t){"cx", REG_C, OPERAND_SIZE_16},
&(register_data_t){"dx", REG_D, OPERAND_SIZE_16},
&(register_data_t){"bx", REG_B, OPERAND_SIZE_16},
&(register_data_t){"sp", REG_SP, OPERAND_SIZE_16},
&(register_data_t){"bp", REG_BP, OPERAND_SIZE_16},
&(register_data_t){"si", REG_SI, OPERAND_SIZE_16},
&(register_data_t){"di", REG_DI, OPERAND_SIZE_16},
&(register_data_t){"r8w", REG_8, OPERAND_SIZE_16},
&(register_data_t){"r9w", REG_9, OPERAND_SIZE_16},
&(register_data_t){"r10w", REG_10, OPERAND_SIZE_16},
&(register_data_t){"r11w", REG_11, OPERAND_SIZE_16},
&(register_data_t){"r12w", REG_12, OPERAND_SIZE_16},
&(register_data_t){"r13w", REG_13, OPERAND_SIZE_16},
&(register_data_t){"r14w", REG_14, OPERAND_SIZE_16},
&(register_data_t){"r15w", REG_15, OPERAND_SIZE_16},
// 8-bit general purpose registers (low byte)
&(register_data_t){"al", REG_A, OPERAND_SIZE_8 },
&(register_data_t){"cl", REG_C, OPERAND_SIZE_8 },
&(register_data_t){"dl", REG_D, OPERAND_SIZE_8 },
&(register_data_t){"bl", REG_B, OPERAND_SIZE_8 },
&(register_data_t){"spl", REG_SP, OPERAND_SIZE_8 },
&(register_data_t){"bpl", REG_BP, OPERAND_SIZE_8 },
&(register_data_t){"sil", REG_SI, OPERAND_SIZE_8 },
&(register_data_t){"dil", REG_DI, OPERAND_SIZE_8 },
&(register_data_t){"r8b", REG_8, OPERAND_SIZE_8 },
&(register_data_t){"r9b", REG_9, OPERAND_SIZE_8 },
&(register_data_t){"r10b", REG_10, OPERAND_SIZE_8 },
&(register_data_t){"r11b", REG_11, OPERAND_SIZE_8 },
&(register_data_t){"r12b", REG_12, OPERAND_SIZE_8 },
&(register_data_t){"r13b", REG_13, OPERAND_SIZE_8 },
&(register_data_t){"r14b", REG_14, OPERAND_SIZE_8 },
&(register_data_t){"r15b", REG_15, OPERAND_SIZE_8 },
// x87 floating point registers
&(register_data_t){"st0", REG_ST0, OPERAND_SIZE_80},
&(register_data_t){"st1", REG_ST1, OPERAND_SIZE_80},
&(register_data_t){"st2", REG_ST2, OPERAND_SIZE_80},
&(register_data_t){"st3", REG_ST3, OPERAND_SIZE_80},
&(register_data_t){"st4", REG_ST4, OPERAND_SIZE_80},
&(register_data_t){"st5", REG_ST5, OPERAND_SIZE_80},
&(register_data_t){"st6", REG_ST6, OPERAND_SIZE_80},
&(register_data_t){"st7", REG_ST7, OPERAND_SIZE_80},
nullptr,
};

View File

@ -1,82 +0,0 @@
#ifndef INCLUDE_DATA_REGISTERS_H_
#define INCLUDE_DATA_REGISTERS_H_
typedef enum operand_size {
OPERAND_SIZE_INVALID = 0,
OPERAND_SIZE_8 = 1 << 0,
OPERAND_SIZE_16 = 1 << 1,
OPERAND_SIZE_32 = 1 << 2,
OPERAND_SIZE_64 = 1 << 3,
OPERAND_SIZE_80 = 1 << 4,
OPERAND_SIZE_128 = 1 << 5,
OPERAND_SIZE_256 = 1 << 6,
OPERAND_SIZE_512 = 1 << 7,
} operand_size_t;
static inline operand_size_t bits_to_operand_size(int bits) {
switch (bits) {
case 8:
return OPERAND_SIZE_8;
case 16:
return OPERAND_SIZE_16;
case 32:
return OPERAND_SIZE_32;
case 64:
return OPERAND_SIZE_64;
case 80:
return OPERAND_SIZE_80;
case 128:
return OPERAND_SIZE_128;
case 256:
return OPERAND_SIZE_256;
case 512:
return OPERAND_SIZE_512;
default:
return OPERAND_SIZE_INVALID;
}
}
typedef enum register_id {
// Special registers
REG_RIP = -1,
// General purpose registers
REG_A = 0x0000,
REG_C,
REG_D,
REG_B,
REG_SP,
REG_BP,
REG_SI,
REG_DI,
REG_8,
REG_9,
REG_10,
REG_11,
REG_12,
REG_13,
REG_14,
REG_15,
REG_ST0 = 0x1000,
REG_ST1,
REG_ST2,
REG_ST3,
REG_ST4,
REG_ST5,
REG_ST6,
REG_ST7,
} register_id_t;
typedef struct register_data {
const char *name;
register_id_t id;
operand_size_t size;
} register_data_t;
extern register_data_t *const registers[];
#endif // INCLUDE_DATA_REGISTERS_H_

View File

@ -1,711 +0,0 @@
#include "encoder.h"
#include "../bytes.h"
#include "../data/opcodes.h"
#include "symbols.h"
#include <assert.h>
#include <errno.h>
#include <string.h>
/**
* General encoder flow:
*
* There are 2 major passes the encoder does:
*
* First pass:
* - Run through the AST and collect information:
* - Set register values
* - Parse/set number values
* - Mark all instructions that use label references
* - Encode all instructions that don't use label references
* - Update addresses of all labels and instructions. Use an estimated
* instruction size for those instructions that use label references.
*
* Second pass:
* - Run through the AST for all instructions that use label references and
* collect size information using the estimated addresses from pass 1
* - Encode label references with the estimated addresses, this fixes their
* size.
* - Update all addresses
*
* Iteration:
* - Repeat the second pass until addresses converge
*/
error_t *const err_encoder_invalid_register =
&(error_t){.message = "Invalid register"};
error_t *const err_encoder_number_overflow =
&(error_t){.message = "Number overflows the storage"};
error_t *const err_encoder_invalid_number_format =
&(error_t){.message = "Invalid number format"};
error_t *const err_encoder_invalid_size_suffix =
&(error_t){.message = "Invalid number size suffix"};
error_t *const err_encoder_unknown_symbol_reference =
&(error_t){.message = "Referenced an unknown symbol"};
error_t *const err_encoder_no_encoding_found =
&(error_t){.message = "No encoding found for instruction"};
error_t *const err_encoder_not_implemented =
&(error_t){.message = "Implementation for this opcode is missing"};
error_t *const err_encoder_unexpected_length =
&(error_t){.message = "Unexpectedly long encoding"};
error_t *encoder_alloc(encoder_t **output, ast_node_t *ast) {
*output = nullptr;
encoder_t *encoder = calloc(1, sizeof(encoder_t));
if (encoder == nullptr)
return err_allocation_failed;
encoder->ast = ast;
error_t *err = symbol_table_alloc(&encoder->symbols);
if (err) {
free(encoder);
return err;
}
*output = encoder;
return nullptr;
}
void encoder_free(encoder_t *encoder) {
if (encoder == nullptr)
return;
symbol_table_free(encoder->symbols);
free(encoder);
}
bool encoder_is_symbols_node(ast_node_t *node) {
switch (node->id) {
case NODE_LABEL:
case NODE_LABEL_REFERENCE:
case NODE_EXPORT_DIRECTIVE:
case NODE_IMPORT_DIRECTIVE:
return true;
default:
return false;
}
}
int encoder_get_number_base(ast_node_t *number) {
switch (number->children[0]->id) {
case NODE_BINARY:
return 2;
case NODE_OCTAL:
return 8;
case NODE_DECIMAL:
return 10;
case NODE_HEXADECIMAL:
return 16;
default:
assert(false);
}
__builtin_unreachable();
}
bool is_valid_size_suffix(int bits) {
switch (bits) {
case 0:
case 8:
case 16:
case 32:
case 64:
return true;
default:
return false;
}
}
bool is_overflow(uint64_t value, int bits) {
if (bits == 0 || bits >= 64)
return false;
uint64_t max_value = (1ULL << bits) - 1;
return value > max_value;
}
operand_size_t encoder_get_size_mask(uint64_t value, int bits) {
if (bits != 0)
return bits_to_operand_size(bits);
operand_size_t mask = OPERAND_SIZE_64;
if (value < (1ULL << 8))
mask |= OPERAND_SIZE_8;
if (value < (1ULL << 16))
mask |= OPERAND_SIZE_16;
if (value < (1ULL << 32))
mask |= OPERAND_SIZE_32;
return mask;
}
error_t *encoder_set_number_value(ast_node_t *node) {
assert(node->id == NODE_NUMBER);
assert(node->children[0]);
const char *number = node->children[0]->token_entry->token.value;
int base = encoder_get_number_base(node);
if (base != 10)
number += 2; // all except base 10 use a 0x, 0o or 0b prefix
char *endptr;
errno = 0;
uint64_t value = strtoull(number, &endptr, base);
if (errno == ERANGE)
return err_encoder_number_overflow;
if (endptr == number)
return err_encoder_invalid_number_format;
int bits = 0;
if (*endptr == ':') {
const char *suffix = endptr + 1;
bits = strtol(suffix, &endptr, 10);
if (endptr == suffix)
return err_encoder_invalid_number_format;
}
if (*endptr != '\0')
return err_encoder_invalid_number_format;
if (!is_valid_size_suffix(bits))
return err_encoder_invalid_size_suffix;
if (is_overflow(value, bits))
return err_encoder_number_overflow;
node->value.number.value = value;
node->value.number.size = encoder_get_size_mask(value, bits);
return nullptr;
}
error_t *encoder_set_register_value(ast_node_t *node) {
assert(node->id == NODE_REGISTER);
const char *value = node->token_entry->token.value;
for (size_t i = 0; registers[i] != nullptr; ++i) {
if (strcmp(value, registers[i]->name) == 0) {
node->value.reg.id = registers[i]->id;
node->value.reg.size = registers[i]->size;
return nullptr;
}
}
return err_encoder_invalid_register;
}
/**
* Set the opcode extension in the modrm field
*/
static inline uint8_t modrm_extension(uint8_t modrm, uint8_t extension) {
assert(extension != opcode_extension_none);
assert((extension & 0b111) == extension);
return (modrm & ~modrm_reg_mask) | extension << 3;
}
/**
* Return the rex bit for reg field in modrm
*/
static inline uint8_t modrm_reg_rex(uint8_t rex, register_id_t id) {
if (id & 0b1000)
rex |= rex_prefix_r;
return rex;
}
/**
* update modrm reg field with the given register, must be used alongside
* modrm_reg_rex
*/
static inline uint8_t modrm_reg(uint8_t modrm, register_id_t id) {
return (modrm & ~modrm_reg_mask) | (id & 0b111) << 3;
}
/**
* Return the rex bit for rm field in modrm
*/
static inline uint8_t modrm_rm_rex(uint8_t rex, register_id_t id) {
if (id & 0b1000)
rex |= rex_prefix_b;
return rex;
}
/**
* update modrm rm field with the given register, must be used alongside
* modrm_rm_rex
*/
static inline uint8_t modrm_rm(uint8_t modrm, register_id_t id) {
assert((modrm & modrm_mod_mask) == modrm_mod_register);
return (modrm & ~modrm_rm_mask) | (id & 0b111);
}
error_t *encoder_collect_info(encoder_t *encoder, ast_node_t *node,
ast_node_t *statement) {
error_t *err = nullptr;
if (encoder_is_symbols_node(node)) {
err = symbol_table_update(encoder->symbols, node, statement);
if (statement->id == NODE_INSTRUCTION)
statement->value.instruction.has_reference = true;
} else if (node->id == NODE_NUMBER)
err = encoder_set_number_value(node);
else if (node->id == NODE_REGISTER)
err = encoder_set_register_value(node);
if (err)
return err;
for (size_t i = 0; i < node->len; ++i) {
error_t *err =
encoder_collect_info(encoder, node->children[i], statement);
if (err)
return err;
}
return nullptr;
}
bool is_operand_match(operand_info_t *info, ast_node_t *operand) {
switch (info->kind) {
case OPERAND_REGISTER:
return operand->id == NODE_REGISTER &&
ast_node_register_value(operand)->size == info->size;
case OPERAND_MEMORY:
return operand->id == NODE_MEMORY;
case OPERAND_IMMEDIATE: {
if (operand->id != NODE_IMMEDIATE)
return false;
ast_node_t *child = operand->children[0];
if (child->id == NODE_NUMBER)
return (ast_node_number_value(child)->size & info->size) > 0;
else if (child->id == NODE_LABEL_REFERENCE) {
return info->size &= ast_node_reference_value(child)->size;
}
} // end OPERAND_IMMEDIATE case
}
assert(false && "unreachable");
__builtin_unreachable();
}
bool is_opcode_match(opcode_data_t *opcode, const char *mnemonic,
ast_node_t *operands) {
if (strcmp(opcode->mnemonic, mnemonic) != 0)
return false;
if (opcode->operand_count != operands->len)
return false;
for (size_t i = 0; i < operands->len; ++i) {
if (!is_operand_match(&opcode->operands[i], operands->children[i]))
return false;
}
return true;
}
error_t *encoder_get_opcode_data(ast_node_t *instruction, ast_node_t *operands,
opcode_data_t **opcode_out) {
const char *mnemonic = instruction->children[0]->token_entry->token.value;
for (size_t i = 0; opcodes[i]; ++i) {
opcode_data_t *opcode = opcodes[i];
if (is_opcode_match(opcode, mnemonic, operands)) {
*opcode_out = opcode;
return nullptr;
}
}
return err_encoder_no_encoding_found;
}
error_t *encode_two_operand(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
(void)opcode;
(void)operands;
(void)encoding;
(void)rex;
assert(encoding->len >= 1 && "must have 1+ opcode byte in buffer already");
return err_encoder_not_implemented;
}
error_t *encode_one_register_in_opcode(encoder_t *encoder,
opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
(void)opcode;
register_id_t id = ast_node_register_value(operands->children[0])->id;
encoding->buffer[encoding->len - 1] |= id & 0b111;
if ((id & 0b1000) > 0) {
*rex |= rex_prefix_r;
}
return nullptr;
}
error_t *encode_one_register(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
assert(operands->len == 1);
assert(operands->children[0]->id == NODE_REGISTER);
register_id_t id = ast_node_register_value(operands->children[0])->id;
uint8_t modrm = modrm_mod_register;
if (opcode->opcode_extension != opcode_extension_none) {
// register goes in rm field, extension goes in mod field
modrm = modrm_extension(modrm, opcode->opcode_extension);
modrm = modrm_rm(modrm, id);
*rex = modrm_rm_rex(*rex, id);
} else {
// register goes in reg field
// NOTE:
// it's actually likely this case just doesn't exist at all and all
// opcodes that take one register in modr/m _all_ have extended opcdes
modrm = modrm_reg(modrm, id);
*rex = modrm_reg_rex(*rex, id);
}
return bytes_append_uint8(encoding, modrm);
}
error_t *encode_one_immediate(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
(void)opcode;
(void)rex;
assert(operands->len == 1);
assert(operands->children[0]->id == NODE_IMMEDIATE);
assert(operands->children[0]->len == 1);
ast_node_t *immediate = operands->children[0]->children[0];
assert(immediate->id == NODE_NUMBER ||
immediate->id == NODE_LABEL_REFERENCE);
operand_size_t size = opcode->operands[0].size;
if (immediate->id == NODE_NUMBER) {
uint64_t value = ast_node_number_value(immediate)->value;
error_t *err = nullptr;
switch (size) {
case OPERAND_SIZE_8:
err = bytes_append_uint8(encoding, value);
break;
case OPERAND_SIZE_16:
err = bytes_append_uint16(encoding, value);
break;
case OPERAND_SIZE_32:
err = bytes_append_uint32(encoding, value);
break;
case OPERAND_SIZE_64:
err = bytes_append_uint64(encoding, value);
break;
default:
assert(false && "intentionally unhandled");
}
return err;
} else {
reference_t *reference = ast_node_reference_value(immediate);
switch (size) {
case OPERAND_SIZE_64:
return bytes_append_uint64(encoding, reference->address);
case OPERAND_SIZE_32:
return bytes_append_uint32(encoding, reference->offset);
case OPERAND_SIZE_16:
return bytes_append_uint16(encoding, reference->offset);
case OPERAND_SIZE_8:
return bytes_append_uint8(encoding, reference->offset);
default:
assert(false && "intentionally unhandled");
}
}
__builtin_unreachable();
}
error_t *encode_one_memory(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
(void)encoder;
(void)opcode;
(void)operands;
(void)encoding;
(void)rex;
return err_encoder_not_implemented;
}
error_t *encode_one_operand(encoder_t *encoder, opcode_data_t *opcode,
ast_node_t *operands, bytes_t *encoding,
uint8_t *rex) {
switch (opcode->operands[0].kind) {
case OPERAND_REGISTER:
if (opcode->encoding_class == ENCODING_OPCODE_REGISTER)
return encode_one_register_in_opcode(encoder, opcode, operands,
encoding, rex);
else
return encode_one_register(encoder, opcode, operands, encoding,
rex);
case OPERAND_MEMORY:
return encode_one_memory(encoder, opcode, operands, encoding, rex);
case OPERAND_IMMEDIATE:
return encode_one_immediate(encoder, opcode, operands, encoding, rex);
}
}
error_t *encoder_encode_instruction(encoder_t *encoder,
ast_node_t *instruction) {
ast_node_t *operands = instruction->children[1];
opcode_data_t *opcode = nullptr;
error_t *err = encoder_get_opcode_data(instruction, operands, &opcode);
if (err)
return err;
uint8_t rex = 0;
bytes_t *encoding = LOCAL_BYTES(32);
if (opcode->opcode > 0xFF &&
(err = bytes_append_uint8(encoding, opcode->opcode >> 8)))
return err;
if ((err = bytes_append_uint8(encoding, opcode->opcode & 0xFF)))
return err;
// NOTE:operand encoders all expect the opcode to be in the buffer already.
// Some of them rely on this to encode the register value in the opcode
// byte.
switch (opcode->operand_count) {
case 0:
break;
case 1:
err = encode_one_operand(encoder, opcode, operands, encoding, &rex);
break;
case 2:
err = encode_two_operand(encoder, opcode, operands, encoding, &rex);
break;
default:
err = err_encoder_not_implemented;
}
if (err)
return err;
// produce the actual encoding output in the NODE_INSTRUCTION value
instruction_t *instruction_value = ast_node_instruction_value(instruction);
uint8_t *output = instruction_value->encoding.buffer;
size_t output_len = 0;
// Handle prefixes
if (opcode->rex_w_prefix)
rex = rex_prefix_w;
if (opcode->address_size_prefix)
output[output_len++] = memory_size_prefix;
if (opcode->operand_size_prefix)
output[output_len++] = operand_size_prefix;
if (rex > 0)
output[output_len++] = rex;
// copy the encoded opcode and operands
if (encoding->len > 20)
return err_encoder_unexpected_length;
memcpy(output + output_len, encoding->buffer, encoding->len);
output_len += encoding->len;
instruction_value->encoding.len = output_len;
return nullptr;
}
/**
* Initial guess for instruction size of instructions that contain a label
* reference
*/
constexpr size_t instruction_size_estimate = 10;
/**
* Perform the initial pass over the AST.
*
* - Collect information about the operands
* - parse and set number values
* - set the register values
* - determine if label references are used by an instruction
* - encode instructions that don't use label references
* - determine estimated addresses of each statement
*
*/
error_t *encoder_first_pass(encoder_t *encoder) {
ast_node_t *root = encoder->ast;
assert(root->id == NODE_PROGRAM);
uintptr_t address = 0;
for (size_t i = 0; i < root->len; ++i) {
ast_node_t *statement = root->children[i];
error_t *err = encoder_collect_info(encoder, statement, statement);
if (err)
return err;
if (statement->id == NODE_INSTRUCTION &&
ast_node_instruction_value(statement)->has_reference == false) {
err = encoder_encode_instruction(encoder, statement);
if (err)
return err;
instruction_t *instruction = ast_node_instruction_value(statement);
instruction->address = address;
address += instruction->encoding.len;
} else if (statement->id == NODE_INSTRUCTION) {
instruction_t *instruction = ast_node_instruction_value(statement);
instruction->encoding.len = instruction_size_estimate;
instruction->address = address;
address += instruction_size_estimate;
} else if (statement->id == NODE_LABEL) {
label_t *label = ast_node_label_value(statement);
label->address = address;
}
}
return nullptr;
}
operand_size_t signed_to_size_mask(int64_t value) {
operand_size_t size = OPERAND_SIZE_64;
if (value >= INT8_MIN && value <= INT8_MAX)
size |= OPERAND_SIZE_8;
if (value >= INT16_MIN && value <= INT16_MAX)
size |= OPERAND_SIZE_16;
if (value >= INT32_MIN && value <= INT32_MAX)
size |= OPERAND_SIZE_32;
return size;
}
int64_t statement_offset(ast_node_t *from, ast_node_t *to) {
assert(from->id == NODE_INSTRUCTION);
assert(to->id == NODE_LABEL);
instruction_t *instruction = ast_node_instruction_value(from);
int64_t from_addr = instruction->address + instruction->encoding.len;
int64_t to_addr = ast_node_label_value(to)->address;
return to_addr - from_addr;
}
error_t *encoder_collect_reference_info(encoder_t *encoder, ast_node_t *node,
ast_node_t *statement) {
assert(statement->id == NODE_INSTRUCTION);
if (node->id == NODE_LABEL_REFERENCE) {
const char *name = node->token_entry->token.value;
symbol_t *symbol = symbol_table_lookup(encoder->symbols, name);
assert(symbol && symbol->statement &&
symbol->statement->id == NODE_LABEL);
int64_t offset = statement_offset(statement, symbol->statement);
int64_t absolute = ast_node_label_value(symbol->statement)->address;
operand_size_t size = signed_to_size_mask(offset);
node->value.reference.address = absolute;
node->value.reference.offset = offset;
node->value.reference.size = size;
}
for (size_t i = 0; i < node->len; ++i) {
error_t *err = encoder_collect_reference_info(
encoder, node->children[i], statement);
if (err)
return err;
}
return nullptr;
}
bool encoder_should_reencode(ast_node_t *statement) {
if (statement->id != NODE_INSTRUCTION)
return false;
instruction_t *instruction = ast_node_instruction_value(statement);
return instruction->has_reference;
}
void set_statement_address(ast_node_t *statement, int64_t address) {
if (statement->id == NODE_INSTRUCTION) {
ast_node_instruction_value(statement)->address = address;
} else if (statement->id == NODE_LABEL) {
ast_node_label_value(statement)->address = address;
}
}
size_t get_statement_length(ast_node_t *statement) {
if (statement->id != NODE_INSTRUCTION)
return 0;
return ast_node_instruction_value(statement)->encoding.len;
}
/**
* Perform the second pass. Updates the label info and encodes all instructions
* that have a label reference.that performs actual encoding.
*/
error_t *encoder_second_pass(encoder_t *encoder, bool *did_update) {
ast_node_t *root = encoder->ast;
*did_update = false;
int64_t address = 0;
for (size_t i = 0; i < root->len; ++i) {
ast_node_t *statement = root->children[i];
set_statement_address(statement, address);
size_t before = get_statement_length(statement);
if (encoder_should_reencode(statement)) {
error_t *err =
encoder_collect_reference_info(encoder, statement, statement);
if (err)
return err;
err = encoder_encode_instruction(encoder, statement);
if (err)
return err;
}
size_t after = get_statement_length(statement);
*did_update = *did_update || (before != after);
address += after;
}
return nullptr;
}
opcode_data_t *encoder_find_opcode(ast_node_t *instruction) {
for (size_t i = 0; opcodes[i] != nullptr; ++i) {
const char *mnemonic =
instruction->children[0]->token_entry->token.value;
ast_node_t *operands = instruction->children[1];
if (is_opcode_match(opcodes[i], mnemonic, operands))
return opcodes[i];
}
return nullptr;
}
error_t *encoder_check_symbols(encoder_t *encoder) {
for (size_t i = 0; i < encoder->symbols->len; ++i)
if (encoder->symbols->symbols[i].kind == SYMBOL_REFERENCE)
return err_encoder_unknown_symbol_reference;
return nullptr;
}
error_t *encoder_encode(encoder_t *encoder) {
error_t *err = encoder_first_pass(encoder);
if (err)
return err;
err = encoder_check_symbols(encoder);
if (err)
return err;
bool did_update = true;
for (int i = 0; i < 10 && did_update; ++i) {
err = encoder_second_pass(encoder, &did_update);
if (err)
return err;
}
return nullptr;
}

View File

@ -1,33 +0,0 @@
#ifndef INCLUDE_ENCODER_ENCODER_H_
#define INCLUDE_ENCODER_ENCODER_H_
#include "symbols.h"
typedef struct encoder {
symbol_table_t *symbols;
ast_node_t *ast;
} encoder_t;
constexpr uint8_t modrm_mod_memory = 0b00'000'000;
constexpr uint8_t modrm_mod_memory_displacement8 = 0b01'000'000;
constexpr uint8_t modrm_mod_memory_displacement32 = 0b10'000'000;
constexpr uint8_t modrm_mod_register = 0b11'000'000;
constexpr uint8_t modrm_reg_mask = 0b00'111'000;
constexpr uint8_t modrm_rm_mask = 0b00'000'111;
constexpr uint8_t modrm_mod_mask = 0b11'000'000;
error_t *encoder_alloc(encoder_t **encoder, ast_node_t *ast);
error_t *encoder_encode(encoder_t *encoder);
void encoder_free(encoder_t *encoder);
extern error_t *const err_encoder_invalid_register;
extern error_t *const err_encoder_number_overflow;
extern error_t *const err_encoder_invalid_number_format;
extern error_t *const err_encoder_invalid_size_suffix;
extern error_t *const err_encoder_unknown_symbol_reference;
extern error_t *const err_encoder_no_encoding_found;
extern error_t *const err_encoder_not_implemented;
extern error_t *const err_encoder_unexpected_length;
#endif // INCLUDE_ENCODER_ENCODER_H_

View File

@ -1,165 +0,0 @@
#include "symbols.h"
#include "../error.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
constexpr size_t symbol_table_default_cap = 64;
constexpr size_t symbol_table_max_cap = 1 << 16;
error_t *const err_symbol_table_invalid_node = &(error_t){
.message = "Unexpected node id when adding symbol to symbol table"};
error_t *const err_symbol_table_max_cap = &(error_t){
.message = "Failed to increase symbol table length, max capacity reached"};
error_t *const err_symbol_table_incompatible_symbols =
&(error_t){.message = "Failed to update symbol with incompatible kind"};
error_t *symbol_table_alloc(symbol_table_t **output) {
*output = nullptr;
symbol_table_t *table = calloc(1, sizeof(symbol_table_t));
if (table == nullptr)
return err_allocation_failed;
table->symbols = calloc(symbol_table_default_cap, sizeof(symbol_t));
if (table->symbols == nullptr) {
free(table);
return err_allocation_failed;
}
table->cap = symbol_table_default_cap;
table->len = 0;
*output = table;
return nullptr;
}
void symbol_table_free(symbol_table_t *table) {
free(table->symbols);
free(table);
}
error_t *symbol_table_grow_cap(symbol_table_t *table) {
if (table->cap >= symbol_table_max_cap)
return err_symbol_table_max_cap;
size_t new_cap = table->cap * 2;
symbol_t *new_symbols = realloc(table->symbols, new_cap * sizeof(symbol_t));
if (new_symbols == nullptr)
return err_allocation_failed;
table->symbols = new_symbols;
table->cap = new_cap;
return nullptr;
}
error_t *symbol_table_get_node_info(ast_node_t *node, symbol_kind_t *kind,
char **name) {
switch (node->id) {
case NODE_LABEL:
*kind = SYMBOL_LOCAL;
*name = node->children[0]->token_entry->token.value;
return nullptr;
case NODE_LABEL_REFERENCE:
*kind = SYMBOL_REFERENCE;
*name = node->token_entry->token.value;
return nullptr;
case NODE_IMPORT_DIRECTIVE:
*kind = SYMBOL_IMPORT;
*name = node->children[1]->token_entry->token.value;
return nullptr;
case NODE_EXPORT_DIRECTIVE:
*kind = SYMBOL_EXPORT;
*name = node->children[1]->token_entry->token.value;
return nullptr;
default:
return err_symbol_table_invalid_node;
}
__builtin_unreachable();
}
/*
old \ new | REFERENCE | LOCAL | IMPORT | EXPORT |
-------------|-----------|----------|----------|----------|
REFERENCE | | replace | replace | replace |
-------------|-----------|----------|----------|----------|
LOCAL | | | ERR | replace |
-------------|-----------|----------|----------|----------|
IMPORT | | | | ERR |
-------------|-----------|----------|----------|----------|
EXPORT | | | ERR | |
-------------|-----------|----------|----------|----------|
*/
bool symbol_table_should_upgrade(symbol_kind_t old, symbol_kind_t new) {
if (old == SYMBOL_REFERENCE)
return new != SYMBOL_REFERENCE;
if (old == SYMBOL_LOCAL)
return new == SYMBOL_EXPORT;
return false;
}
bool symbol_table_should_error(symbol_kind_t old, symbol_kind_t new) {
if (new == SYMBOL_IMPORT)
return old == SYMBOL_LOCAL || old == SYMBOL_EXPORT;
if (new == SYMBOL_EXPORT)
return old == SYMBOL_IMPORT;
return false;
}
/**
* @pre The symbol _must not_ already be in the table.
*/
error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind,
ast_node_t *statement) {
if (table->len >= table->cap) {
error_t *err = symbol_table_grow_cap(table);
if (err)
return err;
}
table->symbols[table->len] = (symbol_t){
.name = name,
.kind = kind,
.statement = statement,
};
table->len += 1;
return nullptr;
}
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node,
ast_node_t *statement) {
char *name;
symbol_kind_t kind;
error_t *err = symbol_table_get_node_info(node, &kind, &name);
if (err)
return err;
if (kind != SYMBOL_LOCAL)
statement = nullptr;
symbol_t *symbol = symbol_table_lookup(table, name);
if (!symbol)
return symbol_table_add(table, name, kind, statement);
if (symbol_table_should_error(symbol->kind, kind))
return err_symbol_table_incompatible_symbols;
if (symbol_table_should_upgrade(symbol->kind, kind)) {
symbol->kind = kind;
}
if (kind == SYMBOL_LOCAL && symbol->statement == nullptr)
symbol->statement = statement;
return nullptr;
}
symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name) {
for (size_t i = 0; i < table->len; ++i) {
if (strcmp(table->symbols[i].name, name) == 0)
return &table->symbols[i];
}
return nullptr;
}

View File

@ -1,47 +0,0 @@
#ifndef INCLUDE_ENCODER_SYMBOLS_H_
#define INCLUDE_ENCODER_SYMBOLS_H_
#include "../ast.h"
extern error_t *const err_symbol_table_invalid_node;
extern error_t *const err_symbol_table_max_cap;
extern error_t *const err_symbol_table_incompatible_symbols;
typedef enum symbol_kind {
SYMBOL_REFERENCE,
SYMBOL_LOCAL,
SYMBOL_EXPORT,
SYMBOL_IMPORT,
} symbol_kind_t;
/**
* Represent a symbol in the program
*
* Symbols with the same name can only be in the table once. IMPORT or EXPORT
* symbols take precedence over REFERENCE symbols. If any reference symbols
* remain after the first encoding pass this indicates an error. Trying to add
* an IMPORT or EXPORT symbol if the same name already exists as the other kind
* is an error.
*
* This symbol table never taken ownership of the name string, it's lifted
* straight from the node->token.value.
*/
typedef struct symbol {
char *name;
symbol_kind_t kind;
ast_node_t *statement;
} symbol_t;
typedef struct symbol_table {
size_t cap;
size_t len;
symbol_t *symbols;
} symbol_table_t;
error_t *symbol_table_alloc(symbol_table_t **table);
void symbol_table_free(symbol_table_t *table);
error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node,
ast_node_t *statement);
symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name);
#endif // INCLUDE_ENCODER_SYMBOLS_H_

View File

@ -9,14 +9,6 @@ error_t *const err_errorf_alloc = &(error_t){
error_t *const err_errorf_length = &(error_t){
.message =
"Formatting of another error failed to determine the error length"};
error_t *const err_eof =
&(error_t){.message = "Read failed because EOF is reached"};
error_t *const err_unknown_read_failure =
&(error_t){.message = "Unknown read error"};
error_t *const err_allocation_failed =
&(error_t){.message = "Memory allocation failed"};
error_t *errorf(const char *fmt, ...) {
error_t *err = calloc(1, sizeof(error_t));

View File

@ -18,9 +18,4 @@ static inline void error_free(error_t *err) {
free(err);
}
/* Some global errors */
extern error_t *const err_allocation_failed;
extern error_t *const err_eof;
extern error_t *const err_unknown_read_failure;
#endif // INCLUDE_SRC_ERROR_H_

View File

@ -5,16 +5,24 @@
#include <errno.h>
#include <string.h>
error_t *const err_lexer_already_open = &(error_t){
error_t *err_lexer_already_open = &(error_t){
.message =
"Can't open on a lexer object that is already opened. Close it first."};
error_t *const err_lexer_prefix_too_large =
error_t *err_prefix_too_large =
&(error_t){.message = "Prefix too large for internal lexer buffer"};
error_t *const err_lexer_buffer_underrun = &(error_t){
error_t *err_buffer_underrun = &(error_t){
.message = "Buffer does not contain enough characters for lexer_consume_n"};
error_t *const err_lexer_consume_excessive_length =
error_t *err_consume_excessive_length =
&(error_t){.message = "Too many valid characters to consume"};
error_t *err_eof =
&(error_t){.message = "Can't read from file because EOF is reached"};
error_t *err_unknown_read = &(error_t){.message = "Unknown read error"};
error_t *err_allocation_failed =
&(error_t){.message = "Memory allocation failed"};
typedef bool (*char_predicate_t)(char);
const char *lexer_token_id_to_cstr(lexer_token_id_t id) {
@ -107,7 +115,7 @@ error_t *lexer_fill_buffer(lexer_t *lex) {
if (n == 0 && ferror(lex->fp))
return errorf("Read error: %s", strerror(errno));
if (n == 0)
return err_unknown_read_failure;
return err_unknown_read;
remaining -= n;
lex->buffer_count += n;
}
@ -136,7 +144,7 @@ error_t *lexer_open(lexer_t *lex, char *path) {
*
* @pre There must be at least n characters in the input buffer
*/
void lexer_shift_buffer(lexer_t *lex, size_t n) {
void lexer_shift_buffer(lexer_t *lex, int n) {
assert(lex->buffer_count >= n);
lex->buffer_count -= n;
memmove(lex->buffer, lex->buffer + n, lex->buffer_count);
@ -177,9 +185,9 @@ error_t *lexer_not_implemented(lexer_t *lex, lexer_token_t *token) {
error_t *lexer_consume_n(lexer_t *lex, const size_t len,
char buffer[static len], const size_t n) {
if (lex->buffer_count < n)
return err_lexer_buffer_underrun;
if (n > len)
return err_lexer_consume_excessive_length;
return err_buffer_underrun;
if (len > n)
return err_consume_excessive_length;
memcpy(buffer, lex->buffer, n);
lexer_shift_buffer(lex, n);
@ -224,7 +232,7 @@ error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n],
(lex->buffer_count > 0 && is_valid(lex->buffer[0]));
if (have_more_characters && *n_consumed == buffer_size) {
return err_lexer_consume_excessive_length;
return err_consume_excessive_length;
}
} while (have_more_characters);
return nullptr;
@ -294,12 +302,11 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
error_t *err = lexer_consume(lex, max_number_length - so_far,
buffer + so_far, is_valid, &n);
if (err == err_lexer_consume_excessive_length) {
if (err == err_consume_excessive_length) {
token->id = TOKEN_ERROR;
token->explanation =
"Number length exceeds the maximum of 128 characters";
}
lex->character_number += n;
so_far += n;
if (n == 0) {
token->id = TOKEN_ERROR;
@ -325,15 +332,14 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
if (suffix_length > 0) {
err = lexer_consume_n(lex, max_number_length - so_far, buffer + so_far,
suffix_length);
if (err == err_lexer_consume_excessive_length) {
if (err == err_consume_excessive_length) {
token->id = TOKEN_ERROR;
token->explanation =
"Number length exceeds the maximum of 128 characters";
} else {
lex->character_number += suffix_length;
}
}
lex->character_number += n;
token->value = strdup(buffer);
return nullptr;
}
@ -403,7 +409,7 @@ error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) {
error_t *err = lexer_consume(lex, max_identifier_length, buffer,
is_identifier_character, &n);
if (err == err_lexer_consume_excessive_length) {
if (err == err_consume_excessive_length) {
token->id = TOKEN_ERROR;
token->explanation =
"Identifier length exceeds the maximum of 128 characters";
@ -446,7 +452,7 @@ error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) {
error_t *err = lexer_consume(lex, max_whitespace_length, buffer,
is_whitespace_character, &n);
if (err == err_lexer_consume_excessive_length) {
if (err == err_consume_excessive_length) {
token->id = TOKEN_ERROR;
token->explanation =
"Whitespace length exceeds the maximum of 1024 characters";
@ -481,7 +487,7 @@ error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) {
error_t *err = lexer_consume(lex, max_comment_length, buffer,
is_comment_character, &n);
if (err == err_lexer_consume_excessive_length) {
if (err == err_consume_excessive_length) {
token->id = TOKEN_ERROR;
token->explanation =
"Comment length exceeds the maximum of 1024 characters";

View File

@ -5,10 +5,7 @@
#include <stddef.h>
#include <stdio.h>
extern error_t *const err_lexer_already_open;
extern error_t *const err_lexer_prefix_too_large;
extern error_t *const err_lexer_buffer_underrun;
extern error_t *const err_lexer_consume_excessive_length;
extern error_t *err_eof;
typedef enum {
TOKEN_ERROR,

View File

@ -1,175 +1,62 @@
#include "ast.h"
#include "encoder/encoder.h"
#include "error.h"
#include "lexer.h"
#include "parser/parser.h"
#include "tokenlist.h"
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef enum mode {
MODE_INVALID = -1,
MODE_AST,
MODE_TEXT,
MODE_TOKENS,
MODE_ENCODING,
} mode_t;
void print_tokens(tokenlist_t *list) {
for (auto entry = list->head; entry; entry = entry->next) {
auto token = &entry->token;
bool print_token(lexer_token_t *token) {
lexer_token_print(token);
}
return true;
}
void print_text(tokenlist_t *list) {
for (auto entry = list->head; entry; entry = entry->next) {
auto token = &entry->token;
bool print_value(lexer_token_t *token) {
if (token->id == TOKEN_ERROR) {
printf("%s\n", token->value);
for (size_t i = 0; i < token->character_number; ++i)
printf(" ");
printf("^-- %s\n", token->explanation);
return;
} else {
printf("%s", token->value);
}
}
}
error_t *print_ast(tokenlist_t *list) {
parse_result_t result = parse(list->head);
if (result.err)
return result.err;
ast_node_print(result.node);
if (result.next != nullptr) {
puts("First unparsed token:");
lexer_token_print(&result.next->token);
}
ast_node_free(result.node);
if (result.next != nullptr) {
return errorf("did not parse entire input token stream");
}
return nullptr;
}
void print_hex(size_t len, uint8_t bytes[static len]) {
for (size_t i = 0; i < len; i++) {
printf("%02x", bytes[i]);
if (i < len - 1) {
printf(" ");
}
}
printf("\n");
}
error_t *print_encoding(tokenlist_t *list) {
parse_result_t result = parse(list->head);
if (result.err)
return result.err;
encoder_t *encoder;
error_t *err = encoder_alloc(&encoder, result.node);
if (err)
goto cleanup_ast;
err = encoder_encode(encoder);
if (err)
goto cleanup_ast;
ast_node_t *root = result.node;
for (size_t i = 0; i < root->len; ++i) {
ast_node_t *node = root->children[i];
if (node->id != NODE_INSTRUCTION)
continue;
print_hex(node->value.instruction.encoding.len,
node->value.instruction.encoding.buffer);
}
encoder_free(encoder);
ast_node_free(result.node);
return nullptr;
cleanup_ast:
ast_node_free(result.node);
return err;
}
int get_execution_mode(int argc, char *argv[]) {
if (argc != 3)
return MODE_INVALID;
if (strcmp(argv[1], "tokens") == 0)
return MODE_TOKENS;
if (strcmp(argv[1], "text") == 0)
return MODE_TEXT;
if (strcmp(argv[1], "ast") == 0)
return MODE_AST;
if (strcmp(argv[1], "encoding") == 0)
return MODE_ENCODING;
return MODE_INVALID;
}
error_t *do_action(mode_t mode, tokenlist_t *list) {
switch (mode) {
case MODE_TOKENS:
print_tokens(list);
return nullptr;
case MODE_TEXT:
print_text(list);
return nullptr;
case MODE_AST:
return print_ast(list);
case MODE_ENCODING:
return print_encoding(list);
case MODE_INVALID:
/* can't happen */
}
__builtin_unreachable();
return token->id != TOKEN_ERROR;
}
int main(int argc, char *argv[]) {
mode_t mode = get_execution_mode(argc, argv);
if (mode == MODE_INVALID) {
puts("Usage: oas [tokens|text|ast|encoding] <filename>");
exit(1);
if (argc != 3 ||
(strcmp(argv[1], "-tokens") != 0 && strcmp(argv[1], "-text") != 0)) {
puts("Usage: oas -tokens <filename>");
puts("Usage: oas -text <filename>");
return 1;
}
bool (*print_fn)(lexer_token_t *);
char *filename = argv[2];
if (strcmp(argv[1], "-tokens") == 0) {
print_fn = print_token;
} else {
print_fn = print_value;
}
lexer_t *lex = &(lexer_t){};
error_t *err = lexer_open(lex, filename);
if (err)
goto cleanup_error;
tokenlist_t *list;
err = tokenlist_alloc(&list);
if (err)
goto cleanup_lexer;
err = tokenlist_fill(list, lex);
if (err)
goto cleanup_tokens;
err = do_action(mode, list);
if (err)
goto cleanup_tokens;
tokenlist_free(list);
error_free(err);
return 0;
cleanup_tokens:
tokenlist_free(list);
cleanup_lexer:
lexer_close(lex);
cleanup_error:
lexer_t lex = {0};
lexer_token_t token;
error_t *err = lexer_open(&lex, filename);
if (err) {
puts(err->message);
error_free(err);
return 1;
}
bool keep_going = true;
while (keep_going && (err = lexer_next(&lex, &token)) == nullptr) {
keep_going = print_fn(&token);
free(token.value);
}
if (err && err != err_eof) {
puts(err->message);
}
error_free(err);
return 0;
}

View File

@ -1,134 +0,0 @@
#include "combinators.h"
#include "util.h"
// Parse a list of the given parser delimited by the given token id. Does not
// store the delimiters in the parent node
parse_result_t parse_list(tokenlist_entry_t *current, node_id_t id,
bool allow_none, lexer_token_id_t delimiter_id,
parser_t parser) {
ast_node_t *many;
error_t *err = ast_node_alloc(&many);
parse_result_t result;
if (err)
return parse_error(err);
many->id = id;
while (current) {
// Skip beyond the delimiter on all but the first iteration
if (many->len > 0) {
if (current->token.id != delimiter_id)
break;
current = tokenlist_next(current);
if (current == nullptr) {
// FIXME: this isn't quite right, we can't consume the delimiter
// if the next element will fail to parse but it's late and I
// must think this through tomorrow
break;
}
}
result = parser(current);
if (result.err == err_parse_no_match)
break;
if (result.err) {
ast_node_free(many);
return result;
}
err = ast_node_add_child(many, result.node);
if (err) {
ast_node_free(many);
ast_node_free(result.node);
return parse_error(err);
}
current = result.next;
}
if (!allow_none && many->len == 0) {
ast_node_free(many);
return parse_no_match();
}
return parse_success(many, current);
}
parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]) {
parser_t parser;
while ((parser = *parsers++)) {
parse_result_t result = parser(current);
if (result.err == nullptr)
return result;
}
return parse_no_match();
}
// parse as many of the giver parsers objects in a row as possible,
// potentially allowing none wraps the found objects in a new ast node with
// the given note id
parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id,
bool allow_none, parser_t parser) {
ast_node_t *many;
error_t *err = ast_node_alloc(&many);
parse_result_t result;
if (err)
return parse_error(err);
many->id = id;
while (current) {
result = parser(current);
if (result.err == err_parse_no_match)
break;
if (result.err) {
ast_node_free(many);
return result;
}
err = ast_node_add_child(many, result.node);
if (err) {
ast_node_free(many);
ast_node_free(result.node);
return parse_error(err);
}
current = result.next;
}
if (!allow_none && many->len == 0) {
ast_node_free(many);
return parse_no_match();
}
return parse_success(many, current);
}
// Parse all tries to parse all parsers consecutively and if it succeeds it
// wraps the parsed nodes in a new parent node.
parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
parser_t parsers[]) {
ast_node_t *all;
error_t *err = ast_node_alloc(&all);
parse_result_t result;
if (err)
return parse_error(err);
all->id = id;
parser_t parser;
while ((parser = *parsers++) && current) {
result = parser(current);
if (result.err) {
ast_node_free(all);
return result;
}
err = ast_node_add_child(all, result.node);
if (err) {
ast_node_free(result.node);
ast_node_free(all);
return parse_error(err);
}
current = result.next;
}
// token stream ended before we matched all parsers
if (parser != nullptr) {
ast_node_free(all);
return parse_no_match();
}
return parse_success(all, current);
}

View File

@ -1,25 +0,0 @@
#ifndef INCLUDE_PARSER_COMBINATORS_H_
#define INCLUDE_PARSER_COMBINATORS_H_
#include "util.h"
typedef parse_result_t (*parser_t)(tokenlist_entry_t *);
parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]);
// parse as many of the giver parsers objects in a row as possible, potentially
// allowing none wraps the found objects in a new ast node with the given note
// id
parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id,
bool allow_none, parser_t parser);
parse_result_t parse_list(tokenlist_entry_t *current, node_id_t id,
bool allow_none, lexer_token_id_t delimiter_id,
parser_t parser);
// Parse all tries to parse all parsers consecutively and if it succeeds it
// wraps the parsed nodes in a new parent node.
parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id,
parser_t parsers[]);
#endif // INCLUDE_PARSER_COMBINATORS_H_

View File

@ -1,164 +0,0 @@
#include "parser.h"
#include "../ast.h"
#include "../lexer.h"
#include "../tokenlist.h"
#include "combinators.h"
#include "primitives.h"
#include "util.h"
parse_result_t parse_number(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal,
parse_binary, nullptr};
parse_result_t result = parse_any(current, parsers);
return parse_result_wrap(NODE_NUMBER, result);
}
parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_plus, parse_minus, nullptr};
return parse_any(current, parsers);
}
parse_result_t parse_register_index(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_plus, parse_register, parse_asterisk,
parse_number, nullptr};
return parse_consecutive(current, NODE_REGISTER_INDEX, parsers);
}
parse_result_t parse_register_offset(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_plus_or_minus, parse_number, nullptr};
return parse_consecutive(current, NODE_REGISTER_OFFSET, parsers);
}
parse_result_t parse_register_expression(tokenlist_entry_t *current) {
parse_result_t result;
ast_node_t *expr;
error_t *err = ast_node_alloc(&expr);
if (err)
return parse_error(err);
expr->id = NODE_REGISTER_EXPRESSION;
// <register>
result = parse_register(current);
if (result.err) {
ast_node_free(expr);
return result;
}
err = ast_node_add_child(expr, result.node);
if (err) {
ast_node_free(result.node);
ast_node_free(expr);
return parse_error(err);
}
current = result.next;
// <register_index>?
result = parse_register_index(current);
if (result.err) {
error_free(result.err);
} else {
err = ast_node_add_child(expr, result.node);
if (err) {
ast_node_free(result.node);
ast_node_free(expr);
return parse_error(err);
}
current = result.next;
}
// <register_offset>?
result = parse_register_offset(current);
if (result.err) {
error_free(result.err);
} else {
err = ast_node_add_child(expr, result.node);
if (err) {
ast_node_free(result.node);
ast_node_free(expr);
return parse_error(err);
}
current = result.next;
}
return parse_success(expr, current);
}
parse_result_t parse_immediate(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_number, parse_label_reference, nullptr};
parse_result_t result = parse_any(current, parsers);
return parse_result_wrap(NODE_IMMEDIATE, result);
}
parse_result_t parse_memory_expression(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_register_expression, parse_label_reference,
nullptr};
return parse_any(current, parsers);
}
parse_result_t parse_memory(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_lbracket, parse_memory_expression,
parse_rbracket, nullptr};
return parse_consecutive(current, NODE_MEMORY, parsers);
}
parse_result_t parse_operand(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_register, parse_memory, parse_immediate,
nullptr};
return parse_any(current, parsers);
}
parse_result_t parse_operands(tokenlist_entry_t *current) {
return parse_list(current, NODE_OPERANDS, true, TOKEN_COMMA, parse_operand);
}
parse_result_t parse_label(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_identifier, parse_colon, nullptr};
return parse_consecutive(current, NODE_LABEL, parsers);
}
parse_result_t parse_section_directive(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_section, parse_identifier, nullptr};
return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers);
}
parse_result_t parse_import_directive(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_import, parse_identifier, nullptr};
return parse_consecutive(current, NODE_IMPORT_DIRECTIVE, parsers);
}
parse_result_t parse_export_directive(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_export, parse_identifier, nullptr};
return parse_consecutive(current, NODE_EXPORT_DIRECTIVE, parsers);
}
parse_result_t parse_directive_options(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_section_directive, parse_import_directive,
parse_export_directive, nullptr};
return parse_any(current, parsers);
}
parse_result_t parse_directive(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_dot, parse_directive_options, parse_newline,
nullptr};
return parse_consecutive(current, NODE_DIRECTIVE, parsers);
}
parse_result_t parse_instruction(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_identifier, parse_operands, parse_newline,
nullptr};
return parse_consecutive(current, NODE_INSTRUCTION, parsers);
}
parse_result_t parse_statement(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
parse_newline, nullptr};
return parse_any(current, parsers);
}
parse_result_t parse(tokenlist_entry_t *current) {
current = tokenlist_skip_trivia(current);
parse_result_t result =
parse_many(current, NODE_PROGRAM, true, parse_statement);
if (result.node != nullptr)
ast_node_prune(result.node, NODE_NEWLINE);
return result;
}

View File

@ -1,9 +0,0 @@
#ifndef INCLUDE_PARSER_PARSER_H_
#define INCLUDE_PARSER_PARSER_H_
#include "../tokenlist.h"
#include "util.h"
parse_result_t parse(tokenlist_entry_t *current);
#endif // INCLUDE_PARSER_PARSER_H_

View File

@ -1,110 +0,0 @@
#include "primitives.h"
#include "../ast.h"
#include "../data/registers.h"
#include <string.h>
parse_result_t parse_identifier(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_IDENTIFIER, nullptr);
}
parse_result_t parse_decimal(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_DECIMAL, NODE_DECIMAL, nullptr);
}
parse_result_t parse_hexadecimal(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_HEXADECIMAL, NODE_HEXADECIMAL, nullptr);
}
parse_result_t parse_binary(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_BINARY, NODE_BINARY, nullptr);
}
parse_result_t parse_octal(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_OCTAL, NODE_OCTAL, nullptr);
}
parse_result_t parse_string(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_STRING, NODE_STRING, nullptr);
}
parse_result_t parse_char(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_CHAR, NODE_CHAR, nullptr);
}
parse_result_t parse_colon(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_COLON, NODE_COLON, nullptr);
}
parse_result_t parse_comma(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_COMMA, NODE_COMMA, nullptr);
}
parse_result_t parse_lbracket(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_LBRACKET, NODE_LBRACKET, nullptr);
}
parse_result_t parse_rbracket(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_RBRACKET, NODE_RBRACKET, nullptr);
}
parse_result_t parse_plus(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_PLUS, NODE_PLUS, nullptr);
}
parse_result_t parse_minus(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_MINUS, NODE_MINUS, nullptr);
}
parse_result_t parse_asterisk(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_ASTERISK, NODE_ASTERISK, nullptr);
}
parse_result_t parse_dot(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
}
parse_result_t parse_newline(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_NEWLINE, NODE_NEWLINE, nullptr);
}
parse_result_t parse_label_reference(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
nullptr);
}
bool is_register_token(lexer_token_t *token) {
for (size_t i = 0; registers[i] != nullptr; ++i)
if (strcmp(token->value, registers[i]->name) == 0)
return true;
return false;
}
parse_result_t parse_register(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_REGISTER,
is_register_token);
}
bool is_section_token(lexer_token_t *token) {
return strcmp(token->value, "section") == 0;
}
parse_result_t parse_section(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_SECTION,
is_section_token);
}
bool is_import_token(lexer_token_t *token) {
return strcmp(token->value, "import") == 0;
}
parse_result_t parse_import(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_IMPORT, is_import_token);
}
bool is_export_token(lexer_token_t *token) {
return strcmp(token->value, "export") == 0;
}
parse_result_t parse_export(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_EXPORT, is_export_token);
}

View File

@ -1,33 +0,0 @@
#ifndef INCLUDE_PARSER_PRIMITIVES_H_
#define INCLUDE_PARSER_PRIMITIVES_H_
#include "util.h"
parse_result_t parse_identifier(tokenlist_entry_t *current);
parse_result_t parse_decimal(tokenlist_entry_t *current);
parse_result_t parse_hexadecimal(tokenlist_entry_t *current);
parse_result_t parse_binary(tokenlist_entry_t *current);
parse_result_t parse_octal(tokenlist_entry_t *current);
parse_result_t parse_string(tokenlist_entry_t *current);
parse_result_t parse_char(tokenlist_entry_t *current);
parse_result_t parse_colon(tokenlist_entry_t *current);
parse_result_t parse_comma(tokenlist_entry_t *current);
parse_result_t parse_lbracket(tokenlist_entry_t *current);
parse_result_t parse_rbracket(tokenlist_entry_t *current);
parse_result_t parse_plus(tokenlist_entry_t *current);
parse_result_t parse_minus(tokenlist_entry_t *current);
parse_result_t parse_asterisk(tokenlist_entry_t *current);
parse_result_t parse_dot(tokenlist_entry_t *current);
parse_result_t parse_newline(tokenlist_entry_t *current);
parse_result_t parse_label_reference(tokenlist_entry_t *current);
/* These are "primitives" with a different name and some extra validation on top
* for example, register is just an identifier but it only matches a limited set
* of values
*/
parse_result_t parse_register(tokenlist_entry_t *current);
parse_result_t parse_section(tokenlist_entry_t *current);
parse_result_t parse_import(tokenlist_entry_t *current);
parse_result_t parse_export(tokenlist_entry_t *current);
#endif // INCLUDE_PARSER_PRIMITIVES_H_

View File

@ -1,56 +0,0 @@
#include "util.h"
#include "../tokenlist.h"
error_t *const err_parse_no_match =
&(error_t){.message = "parsing failed to find the correct token sequence"};
parse_result_t parse_error(error_t *err) {
return (parse_result_t){.err = err};
}
parse_result_t parse_no_match() {
return parse_error(err_parse_no_match);
}
parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next) {
next = tokenlist_skip_trivia(next);
return (parse_result_t){.node = ast, .next = next};
}
parse_result_t parse_token(tokenlist_entry_t *current,
lexer_token_id_t token_id, node_id_t ast_id,
token_validator_t is_valid) {
if (current->token.id != token_id ||
(is_valid && !is_valid(&current->token)))
return parse_no_match();
ast_node_t *node;
error_t *err = ast_node_alloc(&node);
if (err)
return parse_error(err);
node->id = ast_id;
node->token_entry = current;
return parse_success(node, current->next);
}
parse_result_t parse_result_wrap(node_id_t id, parse_result_t result) {
if (result.err)
return result;
ast_node_t *node;
error_t *err = ast_node_alloc(&node);
if (err) {
ast_node_free(result.node);
return parse_error(err);
}
node->id = id;
err = ast_node_add_child(node, result.node);
if (err) {
ast_node_free(result.node);
return parse_error(err);
}
return parse_success(node, result.next);
}

View File

@ -1,26 +0,0 @@
#ifndef INCLUDE_PARSER_UTIL_H_
#define INCLUDE_PARSER_UTIL_H_
#include "../ast.h"
#include "../error.h"
#include "../tokenlist.h"
typedef struct parse_result {
error_t *err;
tokenlist_entry_t *next;
ast_node_t *node;
} parse_result_t;
typedef bool (*token_validator_t)(lexer_token_t *);
parse_result_t parse_error(error_t *err);
parse_result_t parse_no_match();
parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next);
parse_result_t parse_token(tokenlist_entry_t *current,
lexer_token_id_t token_id, node_id_t ast_id,
token_validator_t is_valid);
parse_result_t parse_result_wrap(node_id_t id, parse_result_t result);
extern error_t *const err_parse_no_match;
#endif // INCLUDE_PARSER_UTIL_H_

View File

@ -1,105 +0,0 @@
#include "tokenlist.h"
#include "error.h"
#include "lexer.h"
#include <stdlib.h>
error_t *tokenlist_alloc(tokenlist_t **output) {
*output = nullptr;
tokenlist_t *list = calloc(1, sizeof(tokenlist_t));
if (list == nullptr)
return err_allocation_failed;
list->head = nullptr;
list->tail = nullptr;
*output = list;
return nullptr;
}
error_t *tokenlist_entry_alloc(tokenlist_entry_t **output) {
*output = nullptr;
tokenlist_entry_t *entry = calloc(1, sizeof(tokenlist_entry_t));
if (entry == nullptr)
return err_allocation_failed;
entry->next = nullptr;
entry->prev = nullptr;
*output = entry;
return nullptr;
}
void tokenlist_append(tokenlist_t *list, tokenlist_entry_t *entry) {
if (list->head == nullptr) {
list->head = entry;
list->tail = entry;
entry->next = nullptr;
entry->prev = nullptr;
} else {
entry->prev = list->tail;
entry->next = nullptr;
list->tail->next = entry;
list->tail = entry;
}
}
void tokenlist_entry_free(tokenlist_entry_t *entry) {
lexer_token_cleanup(&entry->token);
free(entry);
}
void tokenlist_free(tokenlist_t *list) {
if (list == nullptr)
return;
tokenlist_entry_t *current = list->head;
while (current) {
tokenlist_entry_t *next = current->next;
tokenlist_entry_free(current);
current = next;
}
free(list);
}
error_t *tokenlist_fill(tokenlist_t *list, lexer_t *lex) {
error_t *err = nullptr;
lexer_token_t token = {};
while ((err = lexer_next(lex, &token)) == nullptr) {
tokenlist_entry_t *entry;
err = tokenlist_entry_alloc(&entry);
if (err) {
lexer_token_cleanup(&token);
return err;
}
entry->token = token;
tokenlist_append(list, entry);
}
if (err != err_eof)
return err;
return nullptr;
}
bool is_trivia(tokenlist_entry_t *trivia) {
switch (trivia->token.id) {
case TOKEN_WHITESPACE:
case TOKEN_COMMENT:
return true;
default:
return false;
}
}
tokenlist_entry_t *tokenlist_skip_trivia(tokenlist_entry_t *current) {
while (current && is_trivia(current))
current = current->next;
return current;
}
tokenlist_entry_t *tokenlist_next(tokenlist_entry_t *current) {
if (!current)
return nullptr;
return tokenlist_skip_trivia(current->next);
}

View File

@ -1,40 +0,0 @@
#ifndef INCLUDE_SRC_TOKENLIST_H_
#define INCLUDE_SRC_TOKENLIST_H_
#include "lexer.h"
typedef struct tokenlist_entry tokenlist_entry_t;
struct tokenlist_entry {
lexer_token_t token;
tokenlist_entry_t *next;
tokenlist_entry_t *prev;
};
typedef struct tokenlist {
tokenlist_entry_t *head;
tokenlist_entry_t *tail;
} tokenlist_t;
/**
* @brief Allocate a new doubly linked list of lexer tokens
*/
error_t *tokenlist_alloc(tokenlist_t **list);
/**
* Consume all tokens from the lexer and add them to the list
*/
error_t *tokenlist_fill(tokenlist_t *list, lexer_t *lex);
void tokenlist_free(tokenlist_t *list);
/**
* Return the first token entry that isn't whitespace, newline or comment
*/
tokenlist_entry_t *tokenlist_skip_trivia(tokenlist_entry_t *current);
/**
* Return the next token entry that isn't whitespace, newline or comment
*/
tokenlist_entry_t *tokenlist_next(tokenlist_entry_t *current);
#endif // INCLUDE_SRC_TOKENLIST_H_

View File

@ -1,6 +0,0 @@
BasedOnStyle: LLVM
IndentWidth: 4
Cpp11BracedListStyle: true
AlignArrayOfStructures: Left
AllowShortFunctionsOnASingleLine: Empty
ColumnLimit: 120

View File

@ -1,22 +0,0 @@
#include "../src/ast.h"
#include "munit.h"
MunitResult test_ast_node_alloc(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *node = nullptr;
error_t *err = ast_node_alloc(&node);
munit_assert_ptr_not_null(node);
munit_assert_ptr_null(err);
ast_node_free(node);
return MUNIT_OK;
}
MunitTest ast_tests[] = {
{"/node_alloc", test_ast_node_alloc, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
};

View File

@ -1,164 +0,0 @@
#include "../src/bytes.h"
#include "munit.h"
MunitResult test_bytes_initializer(const MunitParameter params[], void *data) {
(void)params;
(void)data;
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
for (size_t i = 0; i < 16; ++i)
munit_assert_uint8(bytes->buffer[i], ==, 0);
return MUNIT_OK;
}
MunitResult test_bytes_append_uint8(const MunitParameter params[], void *data) {
(void)params;
(void)data;
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
for (size_t i = 0; i < 16; ++i) {
error_t *err = bytes_append_uint8(bytes, (uint8_t)i);
munit_assert_null(err);
munit_assert_uint8(bytes->buffer[i], ==, (uint8_t)i);
}
error_t *err = bytes_append_uint8(bytes, 0xFF);
munit_assert_ptr(err, ==, err_bytes_no_capacity);
return MUNIT_OK;
}
MunitResult test_bytes_append_array(const MunitParameter params[], void *data) {
(void)params;
(void)data;
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
uint8_t test_array[] = {0x01, 0x02, 0x03, 0x04, 0x05};
size_t array_len = sizeof(test_array) / sizeof(test_array[0]);
error_t *err = bytes_append_array(bytes, array_len, test_array);
munit_assert_null(err);
munit_assert_size(bytes->len, ==, array_len);
for (size_t i = 0; i < array_len; ++i) {
munit_assert_uint8(bytes->buffer[i], ==, test_array[i]);
}
uint8_t second_array[] = {0x06, 0x07, 0x08};
size_t second_len = sizeof(second_array) / sizeof(second_array[0]);
err = bytes_append_array(bytes, second_len, second_array);
munit_assert_null(err);
munit_assert_size(bytes->len, ==, array_len + second_len);
for (size_t i = 0; i < second_len; ++i) {
munit_assert_uint8(bytes->buffer[array_len + i], ==, second_array[i]);
}
uint8_t overflow_array[10] = {0}; // Array that would exceed capacity
err = bytes_append_array(bytes, sizeof(overflow_array), overflow_array);
munit_assert_ptr(err, ==, err_bytes_no_capacity);
munit_assert_size(bytes->len, ==, array_len + second_len);
return MUNIT_OK;
}
MunitResult test_bytes_append_bytes(const MunitParameter params[], void *data) {
(void)params;
(void)data;
bytes_t *src = LOCAL_BYTES(8);
bytes_t *dst = LOCAL_BYTES(16);
// Fill source bytes with test data
for (uint8_t i = 0; i < 5; ++i) {
error_t *err = bytes_append_uint8(src, i + 1);
munit_assert_null(err);
}
munit_assert_size(src->len, ==, 5);
// Append source to destination
error_t *err = bytes_append_bytes(dst, src);
munit_assert_null(err);
munit_assert_size(dst->len, ==, src->len);
// Verify destination contents match source
for (size_t i = 0; i < src->len; ++i) {
munit_assert_uint8(dst->buffer[i], ==, src->buffer[i]);
}
// Fill source with more data and append again
for (uint8_t i = 0; i < 3; ++i) {
err = bytes_append_uint8(src, i + 6);
munit_assert_null(err);
}
munit_assert_size(src->len, ==, 8);
// Append updated source
err = bytes_append_bytes(dst, src);
munit_assert_null(err);
munit_assert_size(dst->len, ==, 13); // 5 + 8
// Test capacity boundary
src->len = 4; // manually set length to barely not fit
err = bytes_append_bytes(dst, src);
munit_assert_ptr(err, ==, err_bytes_no_capacity);
munit_assert_size(dst->len, ==, 13); // Length unchanged after error
return MUNIT_OK;
}
MunitResult test_bytes_append_uint16(const MunitParameter params[], void *data) {
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
bytes_append_uint16(bytes, 0xFFAA);
munit_assert_size(bytes->len, ==, 2);
munit_assert_uint8(bytes->buffer[0], ==, 0xAA);
munit_assert_uint8(bytes->buffer[1], ==, 0xFF);
return MUNIT_OK;
}
MunitResult test_bytes_append_uint32(const MunitParameter params[], void *data) {
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
bytes_append_uint32(bytes, 0xAABBCCDD);
munit_assert_size(bytes->len, ==, 4);
munit_assert_uint8(bytes->buffer[0], ==, 0xDD);
munit_assert_uint8(bytes->buffer[1], ==, 0xCC);
munit_assert_uint8(bytes->buffer[2], ==, 0xBB);
munit_assert_uint8(bytes->buffer[3], ==, 0xAA);
return MUNIT_OK;
}
MunitResult test_bytes_append_uint64(const MunitParameter params[], void *data) {
bytes_t *bytes = LOCAL_BYTES(16);
munit_assert_size(bytes->len, ==, 0);
munit_assert_size(bytes->cap, ==, 16);
bytes_append_uint64(bytes, 0xAABBCCDDEEFF9988);
munit_assert_size(bytes->len, ==, 8);
munit_assert_uint8(bytes->buffer[0], ==, 0x88);
munit_assert_uint8(bytes->buffer[1], ==, 0x99);
munit_assert_uint8(bytes->buffer[2], ==, 0xFF);
munit_assert_uint8(bytes->buffer[3], ==, 0xEE);
munit_assert_uint8(bytes->buffer[4], ==, 0xDD);
munit_assert_uint8(bytes->buffer[5], ==, 0xCC);
munit_assert_uint8(bytes->buffer[6], ==, 0xBB);
munit_assert_uint8(bytes->buffer[7], ==, 0xAA);
return MUNIT_OK;
}
MunitTest bytes_tests[] = {
{"/initializer", test_bytes_initializer, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_uint8", test_bytes_append_uint8, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_array", test_bytes_append_array, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_bytes", test_bytes_append_bytes, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_uint16", test_bytes_append_uint16, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_uint32", test_bytes_append_uint32, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/append_uint64", test_bytes_append_uint64, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
};

View File

@ -1,65 +0,0 @@
lbl_0: ; 65 symbols used for testing growing the symbols table
lbl_1:
lbl_2:
lbl_3:
lbl_4:
lbl_5:
lbl_6:
lbl_7:
lbl_8:
lbl_9:
lbl_10:
lbl_11:
lbl_12:
lbl_13:
lbl_14:
lbl_15:
lbl_16:
lbl_17:
lbl_18:
lbl_19:
lbl_20:
lbl_21:
lbl_22:
lbl_23:
lbl_24:
lbl_25:
lbl_26:
lbl_27:
lbl_28:
lbl_29:
lbl_30:
lbl_31:
lbl_32:
lbl_33:
lbl_34:
lbl_35:
lbl_36:
lbl_37:
lbl_38:
lbl_39:
lbl_40:
lbl_41:
lbl_42:
lbl_43:
lbl_44:
lbl_45:
lbl_46:
lbl_47:
lbl_48:
lbl_49:
lbl_50:
lbl_51:
lbl_52:
lbl_53:
lbl_54:
lbl_55:
lbl_56:
lbl_57:
lbl_58:
lbl_59:
lbl_60:
lbl_61:
lbl_62:
lbl_63:
lbl_64:

View File

@ -1,5 +0,0 @@
; regression test for two issues:
; - parsing two zero operand instructions in a row
; - a zero operand instruction just before eof
syscall
ret

View File

@ -1,5 +0,0 @@
; sample program with trivia on the head of the tokenlist
_start:
xor rax, rax
call exit

View File

@ -1,12 +0,0 @@
.import test
.export test
test:
call test
.import more
.export more
more:
call more
.import other
.export other
other:
call other

View File

@ -1,20 +1,5 @@
.section text
; Small valid code snippet that should contain all different AST nodes
.export _start
.import exit
_start:
mov eax, ebx
lea eax, [eax + ebx * 4 + 8]
lea eax, [eax + 8]
lea eax, [eax + ebx * 8]
lea eax, [esp - 24]
lea eax, [eax + ebx * 4 - 8]
lea eax, [_start]
mov eax, _start
mov eax, 555
mov eax, 555 ; move 555 into eax
push 0o777
xor eax, 0xDEADBEEF
and ecx, 0o770
@ -22,5 +7,3 @@ _start:
push 0xffff:64
push 0o777:16
push 0b0001:16
mov rax, 0
call exit

View File

@ -1,896 +0,0 @@
#include "../src/lexer.h"
#include "../src/error.h"
#include "munit.h"
#include <string.h>
void lexer_setup_memory_test(lexer_t *lex, const char *input) {
munit_assert_null(lex->fp);
FILE *stream = fmemopen((void *)input, strlen(input), "rb");
munit_assert_not_null(stream);
lex->fp = stream;
lex->line_number = 0;
lex->character_number = 0;
lex->buffer_count = 0;
}
void lexer_expect_one_token(lexer_t *lex, lexer_token_id_t id, const char *value, size_t line, size_t column) {
lexer_token_t token = {};
error_t *err = lexer_next(lex, &token);
munit_assert_null(err);
munit_assert_int(token.id, ==, id);
munit_assert_string_equal(token.value, value);
munit_assert_int(token.line_number, ==, line);
munit_assert_int(token.character_number, ==, column);
lexer_token_cleanup(&token);
}
void lexer_expect_eof(lexer_t *lex) {
lexer_token_t token = {};
error_t *err = lexer_next(lex, &token);
munit_assert_ptr_equal(err, err_eof);
}
void lexer_test_one_token(lexer_token_id_t id, const char *value) {
lexer_t lex = {};
lexer_setup_memory_test(&lex, value);
lexer_expect_one_token(&lex, id, value, 0, 0);
lexer_expect_eof(&lex);
lexer_close(&lex);
}
MunitResult test_lexer_identifier(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_IDENTIFIER, "identifier");
lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier");
lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier123_55");
return MUNIT_OK;
}
typedef struct token_data {
lexer_token_id_t id;
char *value;
size_t line;
size_t column;
} token_data_t;
typedef struct boundary {
const char *input;
token_data_t first;
token_data_t second;
} boundary_t;
void test_lexer_boundary(boundary_t boundaries[]) {
for (size_t i = 0; boundaries[i].input; ++i) {
auto boundary = boundaries[i];
auto first = boundary.first;
auto second = boundary.second;
lexer_t lex = {};
lexer_setup_memory_test(&lex, boundary.input);
lexer_expect_one_token(&lex, first.id, first.value, first.line, first.column);
lexer_expect_one_token(&lex, second.id, second.value, second.line, second.column);
lexer_expect_eof(&lex);
lexer_close(&lex);
}
}
MunitResult test_lexer_identifier_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"id:", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COLON, ":", 0, 2} },
{"id[", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_LBRACKET, "[", 0, 2} },
{"id]", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_RBRACKET, "]", 0, 2} },
{"id+", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_PLUS, "+", 0, 2} },
{"id-", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_MINUS, "-", 0, 2} },
{"id*", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_ASTERISK, "*", 0, 2} },
{"id.", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_DOT, ".", 0, 2} },
{"id;comment", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COMMENT, ";comment", 0, 2}},
{"id\n", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 2} },
{"id\r\n", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 2} },
{"id ", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 2} },
{"id\t", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 2} },
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_decimal(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_DECIMAL, "123");
lexer_test_one_token(TOKEN_DECIMAL, "0");
lexer_test_one_token(TOKEN_DECIMAL, "42");
return MUNIT_OK;
}
MunitResult test_lexer_decimal_with_suffix(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_DECIMAL, "123:8");
lexer_test_one_token(TOKEN_DECIMAL, "0:16");
lexer_test_one_token(TOKEN_DECIMAL, "42:32");
lexer_test_one_token(TOKEN_DECIMAL, "69:64");
return MUNIT_OK;
}
MunitResult test_lexer_hexadecimal(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123");
lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD");
lexer_test_one_token(TOKEN_HEXADECIMAL, "0x0");
lexer_test_one_token(TOKEN_HEXADECIMAL, "0xabcdef");
lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABCDEF");
return MUNIT_OK;
}
MunitResult test_lexer_hexadecimal_with_suffix(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123:8");
lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD:16");
lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABC:32");
lexer_test_one_token(TOKEN_HEXADECIMAL, "0xffff:64");
return MUNIT_OK;
}
MunitResult test_lexer_octal(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_OCTAL, "0o777");
lexer_test_one_token(TOKEN_OCTAL, "0o0");
lexer_test_one_token(TOKEN_OCTAL, "0o123");
return MUNIT_OK;
}
MunitResult test_lexer_octal_with_suffix(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_OCTAL, "0o777:8");
lexer_test_one_token(TOKEN_OCTAL, "0o123:16");
lexer_test_one_token(TOKEN_OCTAL, "0o777:32");
lexer_test_one_token(TOKEN_OCTAL, "0o123:64");
return MUNIT_OK;
}
MunitResult test_lexer_binary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_BINARY, "0b101");
lexer_test_one_token(TOKEN_BINARY, "0b0");
lexer_test_one_token(TOKEN_BINARY, "0b1");
lexer_test_one_token(TOKEN_BINARY, "0b01010101");
return MUNIT_OK;
}
MunitResult test_lexer_binary_with_suffix(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_BINARY, "0b101:8");
lexer_test_one_token(TOKEN_BINARY, "0b0:16");
lexer_test_one_token(TOKEN_BINARY, "0b1:32");
lexer_test_one_token(TOKEN_BINARY, "0b01010101:64");
return MUNIT_OK;
}
MunitResult test_lexer_colon(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_COLON, ":");
return MUNIT_OK;
}
MunitResult test_lexer_comma(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_COMMA, ",");
return MUNIT_OK;
}
MunitResult test_lexer_lbracket(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_LBRACKET, "[");
return MUNIT_OK;
}
MunitResult test_lexer_rbracket(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_RBRACKET, "]");
return MUNIT_OK;
}
MunitResult test_lexer_plus(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_PLUS, "+");
return MUNIT_OK;
}
MunitResult test_lexer_minus(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_MINUS, "-");
return MUNIT_OK;
}
MunitResult test_lexer_asterisk(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_ASTERISK, "*");
return MUNIT_OK;
}
MunitResult test_lexer_dot(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_DOT, ".");
return MUNIT_OK;
}
MunitResult test_lexer_comment(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_COMMENT, ";This is a comment");
lexer_test_one_token(TOKEN_COMMENT, "; Another comment");
lexer_test_one_token(TOKEN_COMMENT, ";");
return MUNIT_OK;
}
MunitResult test_lexer_whitespace(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_test_one_token(TOKEN_WHITESPACE, " ");
lexer_test_one_token(TOKEN_WHITESPACE, " ");
lexer_test_one_token(TOKEN_WHITESPACE, "\t");
lexer_test_one_token(TOKEN_WHITESPACE, " \t ");
return MUNIT_OK;
}
MunitResult test_lexer_newlines(const MunitParameter params[], void *data) {
(void)params;
(void)data;
// Test simple newline
lexer_t lex = {};
lexer_setup_memory_test(&lex, "\n");
lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 0);
lexer_expect_eof(&lex);
lexer_close(&lex);
// Test Windows-style newline
lexer_t lex2 = {};
lexer_setup_memory_test(&lex2, "\r\n");
lexer_expect_one_token(&lex2, TOKEN_NEWLINE, "\r\n", 0, 0);
lexer_expect_eof(&lex2);
lexer_close(&lex2);
return MUNIT_OK;
}
MunitResult test_lexer_line_numbers(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_t lex = {};
lexer_setup_memory_test(&lex, "a\nb\nc");
lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "a", 0, 0);
lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 1);
lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "b", 1, 0);
lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 1, 1);
lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "c", 2, 0);
lexer_expect_eof(&lex);
lexer_close(&lex);
return MUNIT_OK;
}
MunitResult test_lexer_decimal_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"123,", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMA, ",", 0, 3} },
{"123:", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COLON, ":", 0, 3} },
{"123[", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 3} },
{"123]", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 3} },
{"123+", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_PLUS, "+", 0, 3} },
{"123-", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_MINUS, "-", 0, 3} },
{"123*", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 3} },
{"123.", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_DOT, ".", 0, 3} },
{"123;", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMENT, ";", 0, 3} },
{"123\n", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 3} },
{"123\r\n", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 3} },
{"123 ", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 3} },
{"123\t", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 3}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_hexadecimal_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"0x123,", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMA, ",", 0, 5} },
{"0x123:", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COLON, ":", 0, 5} },
{"0x123[", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5} },
{"0x123]", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5} },
{"0x123+", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_PLUS, "+", 0, 5} },
{"0x123-", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_MINUS, "-", 0, 5} },
{"0x123*", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5} },
{"0x123.", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_DOT, ".", 0, 5} },
{"0x123;", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5} },
{"0x123\n", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5} },
{"0x123\r\n", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
{"0x123 ", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
{"0x123\t", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_octal_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"0o123,", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMA, ",", 0, 5} },
{"0o123:", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COLON, ":", 0, 5} },
{"0o123[", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5} },
{"0o123]", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5} },
{"0o123+", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_PLUS, "+", 0, 5} },
{"0o123-", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_MINUS, "-", 0, 5} },
{"0o123*", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5} },
{"0o123.", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_DOT, ".", 0, 5} },
{"0o123;", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5} },
{"0o123\n", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5} },
{"0o123\r\n", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
{"0o123 ", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
{"0o123\t", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_binary_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"0b101,", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMA, ",", 0, 5} },
{"0b101:", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COLON, ":", 0, 5} },
{"0b101[", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5} },
{"0b101]", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5} },
{"0b101+", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_PLUS, "+", 0, 5} },
{"0b101-", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_MINUS, "-", 0, 5} },
{"0b101*", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5} },
{"0b101.", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_DOT, ".", 0, 5} },
{"0b101;", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMENT, ";", 0, 5} },
{"0b101\n", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5} },
{"0b101\r\n", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} },
{"0b101 ", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} },
{"0b101\t", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_colon_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{":,", {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMA, ",", 0, 1} },
{"::", {TOKEN_COLON, ":", 0, 0}, {TOKEN_COLON, ":", 0, 1} },
{":[", {TOKEN_COLON, ":", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1} },
{":]", {TOKEN_COLON, ":", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1} },
{":+", {TOKEN_COLON, ":", 0, 0}, {TOKEN_PLUS, "+", 0, 1} },
{":-", {TOKEN_COLON, ":", 0, 0}, {TOKEN_MINUS, "-", 0, 1} },
{":*", {TOKEN_COLON, ":", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1} },
{":.", {TOKEN_COLON, ":", 0, 0}, {TOKEN_DOT, ".", 0, 1} },
{":;", {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMENT, ";", 0, 1} },
{":\n", {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1} },
{":\r\n", {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
{": ", {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
{":\t", {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_comma_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{",,", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMA, ",", 0, 1} },
{",:", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COLON, ":", 0, 1} },
{",[", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1} },
{",]", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1} },
{",+", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_PLUS, "+", 0, 1} },
{",-", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_MINUS, "-", 0, 1} },
{",*", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1} },
{",.", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_DOT, ".", 0, 1} },
{",;", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMENT, ";", 0, 1} },
{",\n", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1} },
{",\r\n", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
{", ", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
{",\t", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_lbracket_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"[,", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMA, ",", 0, 1} },
{"[:", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COLON, ":", 0, 1} },
{"[[", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1} },
{"[]", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1} },
{"[+", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_PLUS, "+", 0, 1} },
{"[-", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_MINUS, "-", 0, 1} },
{"[*", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1} },
{"[.", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_DOT, ".", 0, 1} },
{"[;", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMENT, ";", 0, 1} },
{"[\n", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1} },
{"[\r\n", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
{"[ ", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
{"[\t", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_rbracket_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"],", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMA, ",", 0, 1} },
{"]:", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COLON, ":", 0, 1} },
{"][", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1} },
{"]]", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1} },
{"]+", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_PLUS, "+", 0, 1} },
{"]-", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_MINUS, "-", 0, 1} },
{"]*", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1} },
{"].", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_DOT, ".", 0, 1} },
{"];", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMENT, ";", 0, 1} },
{"]\n", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1} },
{"]\r\n", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
{"] ", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
{"]\t", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_plus_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"+,", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMA, ",", 0, 1} },
{"+:", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COLON, ":", 0, 1} },
{"+[", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1} },
{"+]", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1} },
{"++", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_PLUS, "+", 0, 1} },
{"+-", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_MINUS, "-", 0, 1} },
{"+*", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1} },
{"+.", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_DOT, ".", 0, 1} },
{"+;", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMENT, ";", 0, 1} },
{"+\n", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1} },
{"+\r\n", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
{"+ ", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
{"+\t", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_minus_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"-,", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMA, ",", 0, 1} },
{"-:", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COLON, ":", 0, 1} },
{"-[", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1} },
{"-]", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1} },
{"-+", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_PLUS, "+", 0, 1} },
{"--", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_MINUS, "-", 0, 1} },
{"-*", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1} },
{"-.", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_DOT, ".", 0, 1} },
{"-;", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMENT, ";", 0, 1} },
{"-\n", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1} },
{"-\r\n", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
{"- ", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
{"-\t", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_asterisk_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"*,", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMA, ",", 0, 1} },
{"*:", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COLON, ":", 0, 1} },
{"*[", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1} },
{"*]", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1} },
{"*+", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_PLUS, "+", 0, 1} },
{"*-", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_MINUS, "-", 0, 1} },
{"**", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1} },
{"*.", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_DOT, ".", 0, 1} },
{"*;", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMENT, ";", 0, 1} },
{"*\n", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1} },
{"*\r\n", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
{"* ", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
{"*\t", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_dot_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{".,", {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMA, ",", 0, 1} },
{".:", {TOKEN_DOT, ".", 0, 0}, {TOKEN_COLON, ":", 0, 1} },
{".[", {TOKEN_DOT, ".", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1} },
{".]", {TOKEN_DOT, ".", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1} },
{".+", {TOKEN_DOT, ".", 0, 0}, {TOKEN_PLUS, "+", 0, 1} },
{".-", {TOKEN_DOT, ".", 0, 0}, {TOKEN_MINUS, "-", 0, 1} },
{".*", {TOKEN_DOT, ".", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1} },
{"..", {TOKEN_DOT, ".", 0, 0}, {TOKEN_DOT, ".", 0, 1} },
{".;", {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMENT, ";", 0, 1} },
{".\n", {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1} },
{".\r\n", {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} },
{". ", {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} },
{".\t", {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_comment_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{";comment\n", {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8} },
{";comment\r\n", {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 8}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_whitespace_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{" ,", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMA, ",", 0, 1} },
{" :", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COLON, ":", 0, 1} },
{" [", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1} },
{" ]", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1} },
{" +", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_PLUS, "+", 0, 1} },
{" -", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_MINUS, "-", 0, 1} },
{" *", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1} },
{" .", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_DOT, ".", 0, 1} },
{" ;", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMENT, ";", 0, 1} },
{" \n", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1} },
{" \r\n", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_newline_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"\n,", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0} },
{"\n:", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COLON, ":", 1, 0} },
{"\n[", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0} },
{"\n]", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0} },
{"\n+", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0} },
{"\n-", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0} },
{"\n*", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0} },
{"\n.", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_DOT, ".", 1, 0} },
{"\n;", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0} },
{"\n\n", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0} },
{"\n\r\n", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} },
{"\n ", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} },
{"\n\t", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_crlf_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"\r\n,", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0} },
{"\r\n:", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COLON, ":", 1, 0} },
{"\r\n[", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0} },
{"\r\n]", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0} },
{"\r\n+", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0} },
{"\r\n-", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0} },
{"\r\n*", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0} },
{"\r\n.", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_DOT, ".", 1, 0} },
{"\r\n;", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0} },
{"\r\n\n", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0} },
{"\r\n\r\n", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} },
{"\r\n ", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} },
{"\r\n\t", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_number_boundary(const MunitParameter params[], void *data) {
(void)params;
(void)data;
boundary_t boundaries[] = {
{"0x123:8,", {TOKEN_HEXADECIMAL, "0x123:8", 0, 0}, {TOKEN_COMMA, ",", 0, 7} },
{"0x123:16:", {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_COLON, ":", 0, 8} },
{"0o777:32[", {TOKEN_OCTAL, "0o777:32", 0, 0}, {TOKEN_LBRACKET, "[", 0, 8} },
{"0b101:64]", {TOKEN_BINARY, "0b101:64", 0, 0}, {TOKEN_RBRACKET, "]", 0, 8} },
{"0x123:8+", {TOKEN_HEXADECIMAL, "0x123:8", 0, 0}, {TOKEN_PLUS, "+", 0, 7} },
{"0x123:16-", {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_MINUS, "-", 0, 8} },
{"0o777:32*", {TOKEN_OCTAL, "0o777:32", 0, 0}, {TOKEN_ASTERISK, "*", 0, 8} },
{"0b101:64.", {TOKEN_BINARY, "0b101:64", 0, 0}, {TOKEN_DOT, ".", 0, 8} },
{"0x123:8;", {TOKEN_HEXADECIMAL, "0x123:8", 0, 0}, {TOKEN_COMMENT, ";", 0, 7} },
{"0x123:16\n", {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8} },
{"0o777:32\r\n", {TOKEN_OCTAL, "0o777:32", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 8} },
{"0b101:64 ", {TOKEN_BINARY, "0b101:64", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 8} },
{"0x123:8\t", {TOKEN_HEXADECIMAL, "0x123:8", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 7}},
{nullptr, {}, {} },
};
test_lexer_boundary(boundaries);
return MUNIT_OK;
}
MunitResult test_lexer_maximum_length_numbers(const MunitParameter params[], void *data) {
(void)params;
(void)data;
char *numbers[] = {
"9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
"9999999999999999999988",
"9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
"9999999999999999998:64",
"0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
"9999999999999999999988",
"0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
"9999999999999999998:64",
nullptr,
};
for (size_t i = 0; numbers[i]; ++i) {
auto number = numbers[i];
munit_assert_size(128, ==, strlen(number));
lexer_t lex = {};
lexer_token_t token = {};
lexer_setup_memory_test(&lex, number);
lexer_next(&lex, &token);
munit_assert_true(token.id == TOKEN_DECIMAL || token.id == TOKEN_HEXADECIMAL);
munit_assert_size(128, ==, strlen(token.value));
lexer_token_cleanup(&token);
lexer_close(&lex);
}
return MUNIT_OK;
}
MunitResult test_lexer_too_long_numbers(const MunitParameter params[], void *data) {
(void)params;
(void)data;
char *numbers[] = {
"9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
"99999999999999999999988",
"0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
"99999999999999999999988",
"9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
"99999999999999999998:64",
"0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"
"99999999999999999998:64",
};
// Without suffix we expect 128 characters and then failure
for (size_t i = 0; i < 2; ++i) {
auto number = numbers[i];
munit_assert_size(129, ==, strlen(number));
lexer_t lex = {};
lexer_token_t token = {};
lexer_setup_memory_test(&lex, number);
lexer_next(&lex, &token);
munit_assert_int(TOKEN_ERROR, ==, token.id);
munit_assert_size(128, ==, strlen(token.value));
lexer_token_cleanup(&token);
lexer_close(&lex);
}
// With suffix we fail at the suffix boundary
for (size_t i = 2; i < 4; ++i) {
auto number = numbers[i];
munit_assert_size(129, ==, strlen(number));
lexer_t lex = {};
lexer_token_t token = {};
lexer_setup_memory_test(&lex, number);
lexer_next(&lex, &token);
munit_assert_int(TOKEN_ERROR, ==, token.id);
munit_assert_size(128, >=, strlen(token.value));
lexer_token_cleanup(&token);
lexer_expect_one_token(&lex, TOKEN_COLON, ":", 0, 126);
lexer_expect_one_token(&lex, TOKEN_DECIMAL, "64", 0, 127);
lexer_close(&lex);
}
return MUNIT_OK;
}
MunitResult test_lexer_max_whitespace_length(const MunitParameter params[], void *data) {
(void)params;
(void)data;
char whitespace[1025];
memset(whitespace, ' ', 1024);
whitespace[1024] = '\0';
munit_assert_size(1024, ==, strlen(whitespace));
lexer_t lex = {};
lexer_token_t token = {};
lexer_setup_memory_test(&lex, whitespace);
lexer_next(&lex, &token);
munit_assert_int(TOKEN_WHITESPACE, ==, token.id);
munit_assert_size(1024, ==, strlen(token.value));
lexer_token_cleanup(&token);
lexer_close(&lex);
return MUNIT_OK;
}
MunitResult test_lexer_too_long_whitespace(const MunitParameter params[], void *data) {
(void)params;
(void)data;
char whitespace[1026];
memset(whitespace, ' ', 1025);
whitespace[1025] = '\0';
munit_assert_size(1025, ==, strlen(whitespace));
lexer_t lex = {};
lexer_token_t token = {};
lexer_setup_memory_test(&lex, whitespace);
lexer_next(&lex, &token);
munit_assert_int(TOKEN_ERROR, ==, token.id);
munit_assert_size(1024, ==, strlen(token.value));
lexer_token_cleanup(&token);
lexer_expect_one_token(&lex, TOKEN_WHITESPACE, " ", 0, 1024);
lexer_close(&lex);
return MUNIT_OK;
}
MunitTest lexer_tests[] = {
{"/identifier", test_lexer_identifier, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/identifier_boundary", test_lexer_identifier_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/decimal", test_lexer_decimal, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/decimal_boundary", test_lexer_decimal_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/hexadecimal", test_lexer_hexadecimal, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/hexadecimal_with_suffix", test_lexer_hexadecimal_with_suffix, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/hexadecimal_boundary", test_lexer_hexadecimal_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/octal", test_lexer_octal, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/octal_with_suffix", test_lexer_octal_with_suffix, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/octal_boundary", test_lexer_octal_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/binary", test_lexer_binary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/binary_with_suffix", test_lexer_binary_with_suffix, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/binary_boundary", test_lexer_binary_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/number_boundary", test_lexer_number_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/colon", test_lexer_colon, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/colon_boundary", test_lexer_colon_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/comma", test_lexer_comma, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/comma_boundary", test_lexer_comma_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/lbracket", test_lexer_lbracket, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/lbracket_boundary", test_lexer_lbracket_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/rbracket", test_lexer_rbracket, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/rbracket_boundary", test_lexer_rbracket_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/plus", test_lexer_plus, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/plus_boundary", test_lexer_plus_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/minus", test_lexer_minus, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/minus_boundary", test_lexer_minus_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/asterisk", test_lexer_asterisk, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/asterisk_boundary", test_lexer_asterisk_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/dot", test_lexer_dot, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/dot_boundary", test_lexer_dot_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/comment", test_lexer_comment, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/comment_boundary", test_lexer_comment_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/whitespace", test_lexer_whitespace, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/whitespace_boundary", test_lexer_whitespace_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/newlines", test_lexer_newlines, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/newline_boundary", test_lexer_newline_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/crlf_boundary", test_lexer_crlf_boundary, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/line_numbers", test_lexer_line_numbers, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/maximum_length_numbers", test_lexer_maximum_length_numbers, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/too_long_numbers", test_lexer_too_long_numbers, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/max_whitespace_length", test_lexer_max_whitespace_length, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/too_long_whitespace", test_lexer_too_long_whitespace, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
};

View File

@ -1,22 +0,0 @@
#include "munit.h"
extern MunitTest ast_tests[];
extern MunitTest lexer_tests[];
extern MunitTest regression_tests[];
extern MunitTest symbols_tests[];
extern MunitTest bytes_tests[];
int main(int argc, char *argv[MUNIT_ARRAY_PARAM(argc + 1)]) {
MunitSuite suites[] = {
{"/regression", regression_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{"/ast", ast_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{"/lexer", lexer_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{"/symbols", symbols_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{"/bytes", bytes_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE},
{nullptr, nullptr, nullptr, 0, MUNIT_SUITE_OPTION_NONE},
};
MunitSuite master_suite = {"/oas", nullptr, suites, 1, MUNIT_SUITE_OPTION_NONE};
return munit_suite_main(&master_suite, nullptr, argc, argv);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,535 +0,0 @@
/* µnit Testing Framework
* Copyright (c) 2013-2017 Evan Nemerson <evan@nemerson.com>
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if !defined(MUNIT_H)
#define MUNIT_H
#include <stdarg.h>
#include <stdlib.h>
#define MUNIT_VERSION(major, minor, revision) \
(((major) << 16) | ((minor) << 8) | (revision))
#define MUNIT_CURRENT_VERSION MUNIT_VERSION(0, 4, 1)
#if defined(_MSC_VER) && (_MSC_VER < 1600)
# define munit_int8_t __int8
# define munit_uint8_t unsigned __int8
# define munit_int16_t __int16
# define munit_uint16_t unsigned __int16
# define munit_int32_t __int32
# define munit_uint32_t unsigned __int32
# define munit_int64_t __int64
# define munit_uint64_t unsigned __int64
#else
# include <stdint.h>
# define munit_int8_t int8_t
# define munit_uint8_t uint8_t
# define munit_int16_t int16_t
# define munit_uint16_t uint16_t
# define munit_int32_t int32_t
# define munit_uint32_t uint32_t
# define munit_int64_t int64_t
# define munit_uint64_t uint64_t
#endif
#if defined(_MSC_VER) && (_MSC_VER < 1800)
# if !defined(PRIi8)
# define PRIi8 "i"
# endif
# if !defined(PRIi16)
# define PRIi16 "i"
# endif
# if !defined(PRIi32)
# define PRIi32 "i"
# endif
# if !defined(PRIi64)
# define PRIi64 "I64i"
# endif
# if !defined(PRId8)
# define PRId8 "d"
# endif
# if !defined(PRId16)
# define PRId16 "d"
# endif
# if !defined(PRId32)
# define PRId32 "d"
# endif
# if !defined(PRId64)
# define PRId64 "I64d"
# endif
# if !defined(PRIx8)
# define PRIx8 "x"
# endif
# if !defined(PRIx16)
# define PRIx16 "x"
# endif
# if !defined(PRIx32)
# define PRIx32 "x"
# endif
# if !defined(PRIx64)
# define PRIx64 "I64x"
# endif
# if !defined(PRIu8)
# define PRIu8 "u"
# endif
# if !defined(PRIu16)
# define PRIu16 "u"
# endif
# if !defined(PRIu32)
# define PRIu32 "u"
# endif
# if !defined(PRIu64)
# define PRIu64 "I64u"
# endif
#else
# include <inttypes.h>
#endif
#if !defined(munit_bool)
# if defined(bool)
# define munit_bool bool
# elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
# define munit_bool _Bool
# else
# define munit_bool int
# endif
#endif
#if defined(__cplusplus)
extern "C" {
#endif
#if defined(__GNUC__)
# define MUNIT_LIKELY(expr) (__builtin_expect ((expr), 1))
# define MUNIT_UNLIKELY(expr) (__builtin_expect ((expr), 0))
# define MUNIT_UNUSED __attribute__((__unused__))
#else
# define MUNIT_LIKELY(expr) (expr)
# define MUNIT_UNLIKELY(expr) (expr)
# define MUNIT_UNUSED
#endif
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__PGI)
# define MUNIT_ARRAY_PARAM(name) name
#else
# define MUNIT_ARRAY_PARAM(name)
#endif
#if !defined(_WIN32)
# define MUNIT_SIZE_MODIFIER "z"
# define MUNIT_CHAR_MODIFIER "hh"
# define MUNIT_SHORT_MODIFIER "h"
#else
# if defined(_M_X64) || defined(__amd64__)
# define MUNIT_SIZE_MODIFIER "I64"
# else
# define MUNIT_SIZE_MODIFIER ""
# endif
# define MUNIT_CHAR_MODIFIER ""
# define MUNIT_SHORT_MODIFIER ""
#endif
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
# define MUNIT_NO_RETURN _Noreturn
#elif defined(__GNUC__)
# define MUNIT_NO_RETURN __attribute__((__noreturn__))
#elif defined(_MSC_VER)
# define MUNIT_NO_RETURN __declspec(noreturn)
#else
# define MUNIT_NO_RETURN
#endif
#if defined(_MSC_VER) && (_MSC_VER >= 1500)
# define MUNIT_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127))
# define MUNIT_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop))
#else
# define MUNIT_PUSH_DISABLE_MSVC_C4127_
# define MUNIT_POP_DISABLE_MSVC_C4127_
#endif
typedef enum {
MUNIT_LOG_DEBUG,
MUNIT_LOG_INFO,
MUNIT_LOG_WARNING,
MUNIT_LOG_ERROR
} MunitLogLevel;
#if defined(__GNUC__) && !defined(__MINGW32__)
# define MUNIT_PRINTF(string_index, first_to_check) __attribute__((format (printf, string_index, first_to_check)))
#else
# define MUNIT_PRINTF(string_index, first_to_check)
#endif
MUNIT_PRINTF(4, 5)
void munit_logf_ex(MunitLogLevel level, const char* filename, int line, const char* format, ...);
#define munit_logf(level, format, ...) \
munit_logf_ex(level, __FILE__, __LINE__, format, __VA_ARGS__)
#define munit_log(level, msg) \
munit_logf(level, "%s", msg)
MUNIT_NO_RETURN
MUNIT_PRINTF(3, 4)
void munit_errorf_ex(const char* filename, int line, const char* format, ...);
#define munit_errorf(format, ...) \
munit_errorf_ex(__FILE__, __LINE__, format, __VA_ARGS__)
#define munit_error(msg) \
munit_errorf("%s", msg)
#define munit_assert(expr) \
do { \
if (!MUNIT_LIKELY(expr)) { \
munit_error("assertion failed: " #expr); \
} \
MUNIT_PUSH_DISABLE_MSVC_C4127_ \
} while (0) \
MUNIT_POP_DISABLE_MSVC_C4127_
#define munit_assert_true(expr) \
do { \
if (!MUNIT_LIKELY(expr)) { \
munit_error("assertion failed: " #expr " is not true"); \
} \
MUNIT_PUSH_DISABLE_MSVC_C4127_ \
} while (0) \
MUNIT_POP_DISABLE_MSVC_C4127_
#define munit_assert_false(expr) \
do { \
if (!MUNIT_LIKELY(!(expr))) { \
munit_error("assertion failed: " #expr " is not false"); \
} \
MUNIT_PUSH_DISABLE_MSVC_C4127_ \
} while (0) \
MUNIT_POP_DISABLE_MSVC_C4127_
#define munit_assert_type_full(prefix, suffix, T, fmt, a, op, b) \
do { \
T munit_tmp_a_ = (a); \
T munit_tmp_b_ = (b); \
if (!(munit_tmp_a_ op munit_tmp_b_)) { \
munit_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")", \
#a, #op, #b, munit_tmp_a_, #op, munit_tmp_b_); \
} \
MUNIT_PUSH_DISABLE_MSVC_C4127_ \
} while (0) \
MUNIT_POP_DISABLE_MSVC_C4127_
#define munit_assert_type(T, fmt, a, op, b) \
munit_assert_type_full("", "", T, fmt, a, op, b)
#define munit_assert_char(a, op, b) \
munit_assert_type_full("'\\x", "'", char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b)
#define munit_assert_uchar(a, op, b) \
munit_assert_type_full("'\\x", "'", unsigned char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b)
#define munit_assert_short(a, op, b) \
munit_assert_type(short, MUNIT_SHORT_MODIFIER "d", a, op, b)
#define munit_assert_ushort(a, op, b) \
munit_assert_type(unsigned short, MUNIT_SHORT_MODIFIER "u", a, op, b)
#define munit_assert_int(a, op, b) \
munit_assert_type(int, "d", a, op, b)
#define munit_assert_uint(a, op, b) \
munit_assert_type(unsigned int, "u", a, op, b)
#define munit_assert_long(a, op, b) \
munit_assert_type(long int, "ld", a, op, b)
#define munit_assert_ulong(a, op, b) \
munit_assert_type(unsigned long int, "lu", a, op, b)
#define munit_assert_llong(a, op, b) \
munit_assert_type(long long int, "lld", a, op, b)
#define munit_assert_ullong(a, op, b) \
munit_assert_type(unsigned long long int, "llu", a, op, b)
#define munit_assert_size(a, op, b) \
munit_assert_type(size_t, MUNIT_SIZE_MODIFIER "u", a, op, b)
#define munit_assert_float(a, op, b) \
munit_assert_type(float, "f", a, op, b)
#define munit_assert_double(a, op, b) \
munit_assert_type(double, "g", a, op, b)
#define munit_assert_ptr(a, op, b) \
munit_assert_type(const void*, "p", a, op, b)
#define munit_assert_int8(a, op, b) \
munit_assert_type(munit_int8_t, PRIi8, a, op, b)
#define munit_assert_uint8(a, op, b) \
munit_assert_type(munit_uint8_t, PRIu8, a, op, b)
#define munit_assert_int16(a, op, b) \
munit_assert_type(munit_int16_t, PRIi16, a, op, b)
#define munit_assert_uint16(a, op, b) \
munit_assert_type(munit_uint16_t, PRIu16, a, op, b)
#define munit_assert_int32(a, op, b) \
munit_assert_type(munit_int32_t, PRIi32, a, op, b)
#define munit_assert_uint32(a, op, b) \
munit_assert_type(munit_uint32_t, PRIu32, a, op, b)
#define munit_assert_int64(a, op, b) \
munit_assert_type(munit_int64_t, PRIi64, a, op, b)
#define munit_assert_uint64(a, op, b) \
munit_assert_type(munit_uint64_t, PRIu64, a, op, b)
#define munit_assert_double_equal(a, b, precision) \
do { \
const double munit_tmp_a_ = (a); \
const double munit_tmp_b_ = (b); \
const double munit_tmp_diff_ = ((munit_tmp_a_ - munit_tmp_b_) < 0) ? \
-(munit_tmp_a_ - munit_tmp_b_) : \
(munit_tmp_a_ - munit_tmp_b_); \
if (MUNIT_UNLIKELY(munit_tmp_diff_ > 1e-##precision)) { \
munit_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)", \
#a, #b, munit_tmp_a_, munit_tmp_b_); \
} \
MUNIT_PUSH_DISABLE_MSVC_C4127_ \
} while (0) \
MUNIT_POP_DISABLE_MSVC_C4127_
#include <string.h>
#define munit_assert_string_equal(a, b) \
do { \
const char* munit_tmp_a_ = a; \
const char* munit_tmp_b_ = b; \
if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) != 0)) { \
munit_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")", \
#a, #b, munit_tmp_a_, munit_tmp_b_); \
} \
MUNIT_PUSH_DISABLE_MSVC_C4127_ \
} while (0) \
MUNIT_POP_DISABLE_MSVC_C4127_
#define munit_assert_string_not_equal(a, b) \
do { \
const char* munit_tmp_a_ = a; \
const char* munit_tmp_b_ = b; \
if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) == 0)) { \
munit_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")", \
#a, #b, munit_tmp_a_, munit_tmp_b_); \
} \
MUNIT_PUSH_DISABLE_MSVC_C4127_ \
} while (0) \
MUNIT_POP_DISABLE_MSVC_C4127_
#define munit_assert_memory_equal(size, a, b) \
do { \
const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \
const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \
const size_t munit_tmp_size_ = (size); \
if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) != 0) { \
size_t munit_tmp_pos_; \
for (munit_tmp_pos_ = 0 ; munit_tmp_pos_ < munit_tmp_size_ ; munit_tmp_pos_++) { \
if (munit_tmp_a_[munit_tmp_pos_] != munit_tmp_b_[munit_tmp_pos_]) { \
munit_errorf("assertion failed: memory %s == %s, at offset %" MUNIT_SIZE_MODIFIER "u", \
#a, #b, munit_tmp_pos_); \
break; \
} \
} \
} \
MUNIT_PUSH_DISABLE_MSVC_C4127_ \
} while (0) \
MUNIT_POP_DISABLE_MSVC_C4127_
#define munit_assert_memory_not_equal(size, a, b) \
do { \
const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \
const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \
const size_t munit_tmp_size_ = (size); \
if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) == 0) { \
munit_errorf("assertion failed: memory %s != %s (%zu bytes)", \
#a, #b, munit_tmp_size_); \
} \
MUNIT_PUSH_DISABLE_MSVC_C4127_ \
} while (0) \
MUNIT_POP_DISABLE_MSVC_C4127_
#define munit_assert_ptr_equal(a, b) \
munit_assert_ptr(a, ==, b)
#define munit_assert_ptr_not_equal(a, b) \
munit_assert_ptr(a, !=, b)
#define munit_assert_null(ptr) \
munit_assert_ptr(ptr, ==, NULL)
#define munit_assert_not_null(ptr) \
munit_assert_ptr(ptr, !=, NULL)
#define munit_assert_ptr_null(ptr) \
munit_assert_ptr(ptr, ==, NULL)
#define munit_assert_ptr_not_null(ptr) \
munit_assert_ptr(ptr, !=, NULL)
/*** Memory allocation ***/
void* munit_malloc_ex(const char* filename, int line, size_t size);
#define munit_malloc(size) \
munit_malloc_ex(__FILE__, __LINE__, (size))
#define munit_new(type) \
((type*) munit_malloc(sizeof(type)))
#define munit_calloc(nmemb, size) \
munit_malloc((nmemb) * (size))
#define munit_newa(type, nmemb) \
((type*) munit_calloc((nmemb), sizeof(type)))
/*** Random number generation ***/
void munit_rand_seed(munit_uint32_t seed);
munit_uint32_t munit_rand_uint32(void);
int munit_rand_int_range(int min, int max);
double munit_rand_double(void);
void munit_rand_memory(size_t size, munit_uint8_t buffer[MUNIT_ARRAY_PARAM(size)]);
/*** Tests and Suites ***/
typedef enum {
/* Test successful */
MUNIT_OK,
/* Test failed */
MUNIT_FAIL,
/* Test was skipped */
MUNIT_SKIP,
/* Test failed due to circumstances not intended to be tested
* (things like network errors, invalid parameter value, failure to
* allocate memory in the test harness, etc.). */
MUNIT_ERROR
} MunitResult;
typedef struct {
char* name;
char** values;
} MunitParameterEnum;
typedef struct {
char* name;
char* value;
} MunitParameter;
const char* munit_parameters_get(const MunitParameter params[], const char* key);
typedef enum {
MUNIT_TEST_OPTION_NONE = 0,
MUNIT_TEST_OPTION_SINGLE_ITERATION = 1 << 0,
MUNIT_TEST_OPTION_TODO = 1 << 1
} MunitTestOptions;
typedef MunitResult (* MunitTestFunc)(const MunitParameter params[], void* user_data_or_fixture);
typedef void* (* MunitTestSetup)(const MunitParameter params[], void* user_data);
typedef void (* MunitTestTearDown)(void* fixture);
typedef struct {
char* name;
MunitTestFunc test;
MunitTestSetup setup;
MunitTestTearDown tear_down;
MunitTestOptions options;
MunitParameterEnum* parameters;
} MunitTest;
typedef enum {
MUNIT_SUITE_OPTION_NONE = 0
} MunitSuiteOptions;
typedef struct MunitSuite_ MunitSuite;
struct MunitSuite_ {
char* prefix;
MunitTest* tests;
MunitSuite* suites;
unsigned int iterations;
MunitSuiteOptions options;
};
int munit_suite_main(const MunitSuite* suite, void* user_data, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)]);
/* Note: I'm not very happy with this API; it's likely to change if I
* figure out something better. Suggestions welcome. */
typedef struct MunitArgument_ MunitArgument;
struct MunitArgument_ {
char* name;
munit_bool (* parse_argument)(const MunitSuite* suite, void* user_data, int* arg, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)]);
void (* write_help)(const MunitArgument* argument, void* user_data);
};
int munit_suite_main_custom(const MunitSuite* suite,
void* user_data,
int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)],
const MunitArgument arguments[]);
#if defined(MUNIT_ENABLE_ASSERT_ALIASES)
#define assert_true(expr) munit_assert_true(expr)
#define assert_false(expr) munit_assert_false(expr)
#define assert_char(a, op, b) munit_assert_char(a, op, b)
#define assert_uchar(a, op, b) munit_assert_uchar(a, op, b)
#define assert_short(a, op, b) munit_assert_short(a, op, b)
#define assert_ushort(a, op, b) munit_assert_ushort(a, op, b)
#define assert_int(a, op, b) munit_assert_int(a, op, b)
#define assert_uint(a, op, b) munit_assert_uint(a, op, b)
#define assert_long(a, op, b) munit_assert_long(a, op, b)
#define assert_ulong(a, op, b) munit_assert_ulong(a, op, b)
#define assert_llong(a, op, b) munit_assert_llong(a, op, b)
#define assert_ullong(a, op, b) munit_assert_ullong(a, op, b)
#define assert_size(a, op, b) munit_assert_size(a, op, b)
#define assert_float(a, op, b) munit_assert_float(a, op, b)
#define assert_double(a, op, b) munit_assert_double(a, op, b)
#define assert_ptr(a, op, b) munit_assert_ptr(a, op, b)
#define assert_int8(a, op, b) munit_assert_int8(a, op, b)
#define assert_uint8(a, op, b) munit_assert_uint8(a, op, b)
#define assert_int16(a, op, b) munit_assert_int16(a, op, b)
#define assert_uint16(a, op, b) munit_assert_uint16(a, op, b)
#define assert_int32(a, op, b) munit_assert_int32(a, op, b)
#define assert_uint32(a, op, b) munit_assert_uint32(a, op, b)
#define assert_int64(a, op, b) munit_assert_int64(a, op, b)
#define assert_uint64(a, op, b) munit_assert_uint64(a, op, b)
#define assert_double_equal(a, b, precision) munit_assert_double_equal(a, b, precision)
#define assert_string_equal(a, b) munit_assert_string_equal(a, b)
#define assert_string_not_equal(a, b) munit_assert_string_not_equal(a, b)
#define assert_memory_equal(size, a, b) munit_assert_memory_equal(size, a, b)
#define assert_memory_not_equal(size, a, b) munit_assert_memory_not_equal(size, a, b)
#define assert_ptr_equal(a, b) munit_assert_ptr_equal(a, b)
#define assert_ptr_not_equal(a, b) munit_assert_ptr_not_equal(a, b)
#define assert_ptr_null(ptr) munit_assert_null_equal(ptr)
#define assert_ptr_not_null(ptr) munit_assert_not_null(ptr)
#define assert_null(ptr) munit_assert_null(ptr)
#define assert_not_null(ptr) munit_assert_not_null(ptr)
#endif /* defined(MUNIT_ENABLE_ASSERT_ALIASES) */
#if defined(__cplusplus)
}
#endif
#endif /* !defined(MUNIT_H) */
#if defined(MUNIT_ENABLE_ASSERT_ALIASES)
# if defined(assert)
# undef assert
# endif
# define assert(expr) munit_assert(expr)
#endif

View File

@ -1,68 +0,0 @@
#include "../src/ast.h"
#include "../src/parser/parser.h"
#include "munit.h"
MunitResult test_regression_trivia_head(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_t *lex = &(lexer_t){};
error_t *err = lexer_open(lex, "tests/input/regression/test_trivia_head.asm");
munit_assert_null(err);
tokenlist_t *list;
err = tokenlist_alloc(&list);
munit_assert_null(err);
err = tokenlist_fill(list, lex);
munit_assert_null(err);
parse_result_t result = parse(list->head);
munit_assert_null(result.err);
munit_assert_null(result.next);
ast_node_free(result.node);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_no_operands_eof(const MunitParameter params[], void *data) {
(void)params;
(void)data;
lexer_t *lex = &(lexer_t){};
error_t *err = lexer_open(lex, "tests/input/regression/test_no_operands_eof.asm");
munit_assert_null(err);
tokenlist_t *list;
err = tokenlist_alloc(&list);
munit_assert_null(err);
err = tokenlist_fill(list, lex);
munit_assert_null(err);
parse_result_t result = parse(list->head);
munit_assert_null(result.err);
munit_assert_null(result.next);
// Both children should be instructions
munit_assert_size(result.node->len, ==, 2);
munit_assert_int(result.node->children[0]->id, ==, NODE_INSTRUCTION);
munit_assert_int(result.node->children[1]->id, ==, NODE_INSTRUCTION);
// And they should have empty operands
munit_assert_size(result.node->children[0]->len, ==, 2);
munit_assert_size(result.node->children[1]->len, ==, 2);
munit_assert_size(result.node->children[0]->children[1]->len, ==, 0);
munit_assert_size(result.node->children[1]->children[1]->len, ==, 0);
ast_node_free(result.node);
tokenlist_free(list);
return MUNIT_OK;
}
MunitTest regression_tests[] = {
{"/trivia_head", test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/no_operands_eof", test_no_operands_eof, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
};

View File

@ -1,393 +0,0 @@
#include "../src/encoder/symbols.h"
#include "../src/ast.h"
#include "../src/error.h"
#include "../src/lexer.h"
#include "../src/parser/parser.h"
#include "munit.h"
#include <string.h>
void symbols_setup_test(ast_node_t **node, tokenlist_t **list, char *path) {
lexer_t *lex = &(lexer_t){};
lexer_open(lex, path);
tokenlist_alloc(list);
tokenlist_fill(*list, lex);
parse_result_t result = parse((*list)->head);
lexer_close(lex);
*node = result.node;
}
MunitResult test_symbol_table_alloc(const MunitParameter params[], void *data) {
(void)params;
(void)data;
symbol_table_t *table = nullptr;
error_t *err = symbol_table_alloc(&table);
munit_assert_ptr_not_null(table);
munit_assert_ptr_null(err);
munit_assert_size(table->cap, ==, 64); // Default capacity
munit_assert_size(table->len, ==, 0);
munit_assert_ptr_not_null(table->symbols);
symbol_table_free(table);
return MUNIT_OK;
}
MunitResult test_symbol_table_lookup_empty(const MunitParameter params[], void *data) {
(void)params;
(void)data;
symbol_table_t *table = nullptr;
symbol_table_alloc(&table);
symbol_t *symbol = symbol_table_lookup(table, "nonexistent");
munit_assert_ptr_null(symbol);
symbol_table_free(table);
return MUNIT_OK;
}
MunitResult test_symbol_add_reference(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *root;
tokenlist_t *list;
symbol_table_t *table = nullptr;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
symbol_table_alloc(&table);
ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
ast_node_t *statement = root->children[3]; // The containing statement
munit_assert_int(reference->id, ==, NODE_LABEL_REFERENCE);
munit_assert_size(table->len, ==, 0);
error_t *err = symbol_table_update(table, reference, statement);
munit_assert_null(err);
munit_assert_size(table->len, ==, 1);
symbol_t *symbol = symbol_table_lookup(table, "test");
munit_assert_not_null(symbol);
munit_assert_int(SYMBOL_REFERENCE, ==, symbol->kind);
// For references, the statement should be nullptr
munit_assert_ptr_null(symbol->statement);
munit_assert_string_equal(symbol->name, "test");
symbol_table_free(table);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_symbol_add_label(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *root;
tokenlist_t *list;
symbol_table_t *table = nullptr;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
symbol_table_alloc(&table);
ast_node_t *label = root->children[2];
munit_assert_int(label->id, ==, NODE_LABEL);
munit_assert_size(table->len, ==, 0);
error_t *err = symbol_table_update(table, label, label);
munit_assert_null(err);
munit_assert_size(table->len, ==, 1);
symbol_t *symbol = symbol_table_lookup(table, "test");
munit_assert_not_null(symbol);
munit_assert_int(SYMBOL_LOCAL, ==, symbol->kind);
munit_assert_ptr_equal(label, symbol->statement);
munit_assert_string_equal(symbol->name, "test");
symbol_table_free(table);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_symbol_add_import(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *root;
tokenlist_t *list;
symbol_table_t *table = nullptr;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
symbol_table_alloc(&table);
ast_node_t *import_directive = root->children[0]->children[1];
ast_node_t *statement = root->children[0]; // The containing statement
munit_assert_int(import_directive->id, ==, NODE_IMPORT_DIRECTIVE);
munit_assert_size(table->len, ==, 0);
error_t *err = symbol_table_update(table, import_directive, statement);
munit_assert_null(err);
munit_assert_size(table->len, ==, 1);
symbol_t *symbol = symbol_table_lookup(table, "test");
munit_assert_not_null(symbol);
munit_assert_int(SYMBOL_IMPORT, ==, symbol->kind);
// For import directives, the statement should be nullptr
munit_assert_ptr_null(symbol->statement);
munit_assert_string_equal(symbol->name, "test");
symbol_table_free(table);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
void test_symbol_update(const char *name, ast_node_t *first, symbol_kind_t first_kind, ast_node_t *first_statement,
ast_node_t *second, symbol_kind_t second_kind, ast_node_t *second_statement,
bool should_succeed, bool should_update, ast_node_t *expected_statement) {
symbol_table_t *table = nullptr;
symbol_table_alloc(&table);
// Add the first symbol
error_t *err = symbol_table_update(table, first, first_statement);
munit_assert_null(err);
munit_assert_size(table->len, ==, 1);
// Verify first symbol state
symbol_t *symbol = symbol_table_lookup(table, name);
munit_assert_not_null(symbol);
munit_assert_int(first_kind, ==, symbol->kind);
munit_assert_string_equal(symbol->name, name);
// Check statement based on symbol kind
if (first_kind == SYMBOL_LOCAL) {
munit_assert_ptr_equal(first_statement, symbol->statement);
} else {
munit_assert_ptr_null(symbol->statement);
}
// Attempt the second update
err = symbol_table_update(table, second, second_statement);
// Check if update succeeded as expected
if (should_succeed) {
munit_assert_null(err);
} else {
munit_assert_ptr_equal(err, err_symbol_table_incompatible_symbols);
symbol_table_free(table);
return;
}
// Verify symbol after second update
symbol = symbol_table_lookup(table, name);
munit_assert_not_null(symbol);
// Check if kind updated as expected
if (should_update) {
munit_assert_int(second_kind, ==, symbol->kind);
} else {
munit_assert_int(first_kind, ==, symbol->kind);
}
// Simply check against the expected statement value
munit_assert_ptr_equal(expected_statement, symbol->statement);
symbol_table_free(table);
}
MunitResult test_symbol_upgrade_valid(const MunitParameter params[], void *data) {
ast_node_t *root;
tokenlist_t *list;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
ast_node_t *reference_statement = root->children[3];
ast_node_t *label = root->children[2];
ast_node_t *import_directive = root->children[0]->children[1];
ast_node_t *import_statement = root->children[0];
ast_node_t *export_directive = root->children[1]->children[1];
ast_node_t *export_statement = root->children[1];
// real upgrades
test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, label, SYMBOL_LOCAL, label, true, true,
label);
test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, import_directive, SYMBOL_IMPORT,
import_statement, true, true, nullptr);
test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, export_directive, SYMBOL_EXPORT,
export_statement, true, true, nullptr);
test_symbol_update("test", label, SYMBOL_LOCAL, label, export_directive, SYMBOL_EXPORT, export_statement, true,
true, label);
// identity upgrades
test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, reference, SYMBOL_REFERENCE,
reference_statement, true, false, nullptr);
test_symbol_update("test", label, SYMBOL_LOCAL, label, label, SYMBOL_LOCAL, label, true, false, label);
test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, import_directive, SYMBOL_IMPORT,
import_statement, true, false, nullptr);
test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, export_directive, SYMBOL_EXPORT,
export_statement, true, false, nullptr);
// downgrades that are allowed and ignored
test_symbol_update("test", label, SYMBOL_LOCAL, label, reference, SYMBOL_REFERENCE, reference_statement, true,
false, label);
test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, reference, SYMBOL_REFERENCE,
reference_statement, true, false, nullptr);
test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, reference, SYMBOL_REFERENCE,
reference_statement, true, false, nullptr);
test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, label, SYMBOL_LOCAL, label, true,
false, label);
test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, label, SYMBOL_LOCAL, label, true,
false, label);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_symbol_upgrade_invalid(const MunitParameter params[], void *data) {
ast_node_t *root;
tokenlist_t *list;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0];
ast_node_t *reference_statement = root->children[3];
ast_node_t *label = root->children[2];
ast_node_t *import_directive = root->children[0]->children[1];
ast_node_t *import_statement = root->children[0];
ast_node_t *export_directive = root->children[1]->children[1];
ast_node_t *export_statement = root->children[1];
// invalid upgrades
test_symbol_update("test", label, SYMBOL_LOCAL, label, import_directive, SYMBOL_IMPORT, import_statement, false,
false, nullptr);
test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, import_directive, SYMBOL_IMPORT,
import_statement, false, false, nullptr);
test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, export_directive, SYMBOL_EXPORT,
export_statement, false, false, nullptr);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_symbol_add_export(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *root;
tokenlist_t *list;
symbol_table_t *table = nullptr;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
symbol_table_alloc(&table);
ast_node_t *export_directive = root->children[1]->children[1];
ast_node_t *statement = root->children[1]; // The containing statement
munit_assert_int(export_directive->id, ==, NODE_EXPORT_DIRECTIVE);
munit_assert_size(table->len, ==, 0);
error_t *err = symbol_table_update(table, export_directive, statement);
munit_assert_null(err);
munit_assert_size(table->len, ==, 1);
symbol_t *symbol = symbol_table_lookup(table, "test");
munit_assert_not_null(symbol);
munit_assert_int(SYMBOL_EXPORT, ==, symbol->kind);
// For export directives, the statement should be nullptr
munit_assert_ptr_null(symbol->statement);
munit_assert_string_equal(symbol->name, "test");
symbol_table_free(table);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_symbol_table_growth(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *root;
tokenlist_t *list;
symbol_table_t *table = nullptr;
// Set up with our manysymbols.asm file
symbols_setup_test(&root, &list, "tests/input/manysymbols.asm");
symbol_table_alloc(&table);
// Initial capacity should be the default (64)
munit_assert_size(table->cap, ==, 64);
munit_assert_size(table->len, ==, 0);
// Add the first 64 labels (indices 0-63)
size_t initial_cap = table->cap;
for (size_t i = 0; i < 64; i++) {
ast_node_t *label = root->children[i];
munit_assert_int(label->id, ==, NODE_LABEL);
error_t *err = symbol_table_update(table, label, label);
munit_assert_null(err);
munit_assert_size(table->len, ==, i + 1);
// Capacity should remain the same for the first 64 labels
munit_assert_size(table->cap, ==, initial_cap);
}
// Now add the 65th label (index 64), which should trigger growth
ast_node_t *final_label = root->children[64];
munit_assert_int(final_label->id, ==, NODE_LABEL);
error_t *err = symbol_table_update(table, final_label, final_label);
munit_assert_null(err);
munit_assert_size(table->len, ==, 65);
// Capacity should have doubled
munit_assert_size(table->cap, ==, initial_cap * 2);
// Validate we can look up all the symbols
for (size_t i = 0; i <= 64; i++) {
char name[10];
sprintf(name, "lbl_%zu", i);
symbol_t *symbol = symbol_table_lookup(table, name);
munit_assert_not_null(symbol);
munit_assert_int(SYMBOL_LOCAL, ==, symbol->kind);
munit_assert_string_equal(symbol->name, name);
munit_assert_ptr_equal(symbol->statement, root->children[i]);
}
symbol_table_free(table);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitResult test_symbol_invalid_node(const MunitParameter params[], void *data) {
(void)params;
(void)data;
ast_node_t *root;
tokenlist_t *list;
symbol_table_t *table = nullptr;
symbols_setup_test(&root, &list, "tests/input/symbols.asm");
symbol_table_alloc(&table);
munit_assert_size(table->len, ==, 0);
error_t *err = symbol_table_update(table, root, root);
munit_assert_ptr_equal(err, err_symbol_table_invalid_node);
munit_assert_size(table->len, ==, 0);
symbol_table_free(table);
ast_node_free(root);
tokenlist_free(list);
return MUNIT_OK;
}
MunitTest symbols_tests[] = {
{"/table_alloc", test_symbol_table_alloc, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/table_lookup_empty", test_symbol_table_lookup_empty, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/add_reference", test_symbol_add_reference, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/add_label", test_symbol_add_label, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/add_import", test_symbol_add_import, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/add_export", test_symbol_add_export, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/upgrade_valid", test_symbol_upgrade_valid, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/upgrade_invalid", test_symbol_upgrade_invalid, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/table_growth", test_symbol_table_growth, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{"/invalid_node", test_symbol_invalid_node, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr},
{nullptr, nullptr, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}
};

View File

@ -2,17 +2,19 @@
set -euo pipefail
make analyze debug asan msan
# Start with static analysis
make clean all
mkdir -p reports/static-analysis
scan-build -o reports/static-analysis/ -plist-html --status-bugs make all
ASAN=build/asan/oas
MSAN=build/msan/oas
DEBUG=build/debug/oas
# Run the sanitizer builds and valgrind
make clean sanitize all
ARGUMENTS=("tokens" "text" "ast")
ARGUMENTS=("-tokens" "-text")
while IFS= read -r INPUT_FILE; do
for ARGS in ${ARGUMENTS[@]}; do
$ASAN $ARGS $INPUT_FILE > /dev/null
$MSAN $ARGS $INPUT_FILE > /dev/null
valgrind --leak-check=full --error-exitcode=1 $DEBUG $ARGS $INPUT_FILE >/dev/null
./oas-asan $ARGS $INPUT_FILE > /dev/null
./oas-msan $ARGS $INPUT_FILE > /dev/null
valgrind --leak-check=full --error-exitcode=1 ./oas $ARGS $INPUT_FILE >/dev/null
done
done < <(find tests/input/ -type f -name '*.asm')