Compare commits

...

4 Commits

Author SHA1 Message Date
2474e0c773 Slightly change the valid test input file
All checks were successful
Validate the build / validate-build (push) Successful in 27s
2025-04-02 12:33:36 +02:00
6b840ad888 add functionality to main to parse and print the ast 2025-04-02 12:33:36 +02:00
b1391b91bd Partial parser implementation 2025-04-02 12:33:36 +02:00
c9b29e10e8 Fix incorrect size comparison in lexer_consume_n
The buffer length len and the requested number of tokens n are mixed up
in an invalid comparison. This causes all valid requests for n < len
tokens to be denied and all invalid requests for n > len tokens to be
accepted. This may cause a buffer overflow if the caller requests more
characters than they provide space for.
2025-04-02 12:33:30 +02:00
8 changed files with 150 additions and 34 deletions

View File

@ -10,7 +10,7 @@ OBJECTS = $(SOURCES:.c=.o)
DEPENDENCIES = $(SOURCES:.c=.d)
TARGET?=oas
OUTPUTS=oas oas-asan oas-msan oas-afl
RUNARGUMENTS?=-tokens tests/input/valid.asm
RUNARGUMENTS?=ast tests/input/valid.asm
all: $(TARGET)

View File

@ -183,7 +183,7 @@ error_t *lexer_consume_n(lexer_t *lex, const size_t len,
char buffer[static len], const size_t n) {
if (lex->buffer_count < n)
return err_buffer_underrun;
if (len > n)
if (n > len)
return err_consume_excessive_length;
memcpy(buffer, lex->buffer, n);

View File

@ -1,5 +1,6 @@
#include "error.h"
#include "lexer.h"
#include "parser.h"
#include "tokenlist.h"
#include <limits.h>
@ -7,38 +8,64 @@
#include <stdlib.h>
#include <string.h>
bool print_token(lexer_token_t *token) {
typedef enum mode { MODE_AST, MODE_TEXT, MODE_TOKENS } mode_t;
void print_tokens(tokenlist_t *list) {
for (auto entry = list->head; entry; entry = entry->next) {
auto token = &entry->token;
lexer_token_print(token);
return true;
}
}
bool print_value(lexer_token_t *token) {
void print_text(tokenlist_t *list) {
for (auto entry = list->head; entry; entry = entry->next) {
auto token = &entry->token;
if (token->id == TOKEN_ERROR) {
printf("%s\n", token->value);
for (size_t i = 0; i < token->character_number; ++i)
printf(" ");
printf("^-- %s\n", token->explanation);
return;
} else {
printf("%s", token->value);
}
return token->id != TOKEN_ERROR;
}
}
void print_ast(tokenlist_t *list) {
parse_result_t result = parse(list->head);
if (result.err) {
puts(result.err->message);
error_free(result.err);
return;
}
ast_node_print(result.node);
if (result.next != nullptr) {
puts("First unparsed token:");
lexer_token_print(&result.next->token);
}
ast_node_free(result.node);
}
int get_execution_mode(int argc, char *argv[]) {
if (argc != 3 || (strcmp(argv[1], "tokens") != 0 &&
strcmp(argv[1], "text") != 0 && strcmp(argv[1], "ast"))) {
puts("Usage: oas [tokens|text|ast] <filename>");
exit(1);
}
if (strcmp(argv[1], "tokens") == 0)
return MODE_TOKENS;
if (strcmp(argv[1], "text") == 0)
return MODE_TEXT;
return MODE_AST;
}
int main(int argc, char *argv[]) {
if (argc != 3 ||
(strcmp(argv[1], "-tokens") != 0 && strcmp(argv[1], "-text") != 0)) {
puts("Usage: oas -tokens <filename>");
puts("Usage: oas -text <filename>");
return 1;
}
bool (*print_fn)(lexer_token_t *);
mode_t mode = get_execution_mode(argc, argv);
char *filename = argv[2];
if (strcmp(argv[1], "-tokens") == 0) {
print_fn = print_token;
} else {
print_fn = print_value;
}
lexer_t *lex = &(lexer_t){};
error_t *err = lexer_open(lex, filename);
@ -54,9 +81,18 @@ int main(int argc, char *argv[]) {
if (err)
goto cleanup_tokens;
for (auto entry = list->head; entry; entry = entry->next) {
print_fn(&entry->token);
switch (mode) {
case MODE_TOKENS:
print_tokens(list);
break;
case MODE_TEXT:
print_text(list);
break;
case MODE_AST:
print_ast(list);
break;
}
tokenlist_free(list);
error_free(err);
return 0;

53
src/parser.c Normal file
View File

@ -0,0 +1,53 @@
#include "parser.h"
#include "ast.h"
#include "lexer.h"
#include "parser_combinators.h"
#include "parser_primitives.h"
#include "parser_util.h"
#include "tokenlist.h"
parse_result_t parse_number(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal,
parse_binary, nullptr};
return parse_any(current, parsers);
}
parse_result_t parse_operand(tokenlist_entry_t *current) {
// FIXME: not the correct set of parsers
parser_t parsers[] = {parse_register, parse_number, nullptr};
return parse_any(current, parsers);
}
parse_result_t parse_operands(tokenlist_entry_t *current) {
return parse_list(current, NODE_OPERANDS, true, TOKEN_COMMA, parse_operand);
}
parse_result_t parse_label(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_identifier, parse_colon, nullptr};
return parse_consecutive(current, NODE_LABEL, parsers);
}
parse_result_t parse_section_directive(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_section, parse_identifier, nullptr};
return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers);
}
parse_result_t parse_directive(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_dot, parse_section_directive, nullptr};
return parse_consecutive(current, NODE_DIRECTIVE, parsers);
}
parse_result_t parse_instruction(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_identifier, parse_operands, nullptr};
return parse_consecutive(current, NODE_INSTRUCTION, parsers);
}
parse_result_t parse_statement(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
nullptr};
return parse_any(current, parsers);
}
parse_result_t parse(tokenlist_entry_t *current) {
return parse_many(current, NODE_PROGRAM, true, parse_statement);
}

11
src/parser.h Normal file
View File

@ -0,0 +1,11 @@
#ifndef INCLUDE_SRC_PARSER_H_
#define INCLUDE_SRC_PARSER_H_
#include "ast.h"
#include "error.h"
#include "parser_util.h"
#include "tokenlist.h"
parse_result_t parse(tokenlist_entry_t *current);
#endif // INCLUDE_SRC_PARSER_H_

View File

@ -62,9 +62,19 @@ parse_result_t parse_dot(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
}
const char *registers[] = {"rax", "rcx", "rdx", "rbx", "rsp", "rbp",
"rsi", "rdi", "r8", "r9", "r10", "r11",
"r12", "r13", "r14", "r15", nullptr};
const char *registers[] = {
// 64-bit registers
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15",
// 32-bit registers
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d",
"r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
// 16-bit registers
"ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w",
"r11w", "r12w", "r13w", "r14w", "r15w",
// 8-bit low registers
"al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b",
"r11b", "r12b", "r13b", "r14b", "r15b", nullptr};
bool is_register_token(lexer_token_t *token) {
for (size_t i = 0; registers[i] != nullptr; ++i)
if (strcmp(token->value, registers[i]) == 0)
@ -81,4 +91,7 @@ bool is_section_token(lexer_token_t *token) {
return strcmp(token->value, "section") == 0;
}
parse_result_t parse_section(tokenlist_entry_t *current) {}
parse_result_t parse_section(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_SECTION,
is_section_token);
}

View File

@ -1,4 +1,7 @@
.section text
_start:
mov eax, ebx
mov eax, 555 ; move 555 into eax
push 0o777
xor eax, 0xDEADBEEF

View File

@ -10,7 +10,7 @@ scan-build -o reports/static-analysis/ -plist-html --status-bugs make all
# Run the sanitizer builds and valgrind
make clean sanitize all
ARGUMENTS=("-tokens" "-text")
ARGUMENTS=("tokens" "text" "ast")
while IFS= read -r INPUT_FILE; do
for ARGS in ${ARGUMENTS[@]}; do
./oas-asan $ARGS $INPUT_FILE > /dev/null