commit df948b18c6158473ec3d4fb6bec56828e0a704af Author: omicron Date: Sun Mar 30 17:45:51 2025 +0200 Initial commit, basic lexer structure diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..d2b0355 --- /dev/null +++ b/.clang-format @@ -0,0 +1,5 @@ +BasedOnStyle: LLVM +IndentWidth: 4 +Cpp11BracedListStyle: true +AlignArrayOfStructures: Left +AllowShortFunctionsOnASingleLine: Empty diff --git a/.clangd b/.clangd new file mode 100644 index 0000000..6f7b3be --- /dev/null +++ b/.clangd @@ -0,0 +1,2 @@ +CompileFlags: + Add: ["-std=c23", "-x", "c"] diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ab02f7f --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +*.o +*.d +/core +/oas +/oas-asan +/oas-msan +/reports diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c0d882d --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2025 omicron + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..091f129 --- /dev/null +++ b/Makefile @@ -0,0 +1,42 @@ +.PHONY: all clean clean-objects run sanitize validate + +CC=clang +LD=clang +CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls +LDFLAGS?= + +SOURCES = $(shell find src/ -type f -name '*.c') +OBJECTS = $(SOURCES:.c=.o) +DEPENDENCIES = $(SOURCES:.c=.d) +TARGET?=oas +OUTPUTS=oas oas-asan oas-msan +RUNARGUMENTS=-tokens test.asm + +all: $(TARGET) + + +run: $(TARGET) + ./$(TARGET) $(RUNARGUMENTS) + +sanitize: + make CFLAGS="$(CFLAGS) -fsanitize=address,undefined" LDFLAGS="-fsanitize=address,undefined" TARGET="oas-asan" clean-objects all + make CFLAGS="$(CFLAGS) -fsanitize=memory -fsanitize-memory-track-origins=2" LDFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2" TARGET="oas-msan" clean-objects all + make clean-objects + +validate: + ./validate.sh + +$(TARGET): $(OBJECTS) + $(LD) $(LDFLAGS) -o $@ $^ + +%.o: %.c + $(CC) $(CFLAGS) -MMD -MP -c $< -o $@ + +-include $(DEPENDENCIES) + +clean-objects: + rm -f $(OBJECTS) $(DEPENDENCIES) + +clean: clean-objects + rm -f $(TARGET) $(OUTPUTS) + rm -rf reports/ diff --git a/doc/lexer_grammar.txt b/doc/lexer_grammar.txt new file mode 100644 index 0000000..189ba87 --- /dev/null +++ b/doc/lexer_grammar.txt @@ -0,0 +1,46 @@ +/* These non-terminals are the actual tokens the lexer emits */ + ::= + + ::= [0-9]+ + + ::= "0x" + ? + ::= "0b" [0-1]+ ? + ::= "0o" [0-7]+ ? + ::= "\"" + "\"" + ::= "'" "'" + ::= ":" + ::= "," + ::= "[" + ::= "]" + ::= "+" + ::= "-" + ::= "*" + ::= "." + ::= ";" * + ::= "\r"? "\n" + ::= ( " " | "\t" )+ + +/* helper non-terminals to make it easier to define the tokens */ + ::= ":" ( "8" | "16" | "32" | "64" ) + + ::= [a-z] | [A-Z] | "_" + ::= [a-z] | [A-Z] | [0-9] | "_" + + ::= [a-f] | [A-F] + + ::= | + ::= | + + ::= "\\" ( | ) + ::= "\\" | "n" | "r" | "t" | "0" | "\"" | "'" + ::= "x" + +/* alternative definitions to support bnfplayground, use the ones below instead */ + ::= | "'" | "\"" + ::= | "'" + ::= | "\"" + ::= [a-z] | [A-Z] | [0-9] | " " | "+" | "-" | "#" | "\t" | "_" | "$" | "&" | "{" | "}" | "(" | ")" | "|" + +/* actual definition we're implementing */ +/* ::= [^\r\n] */ +/* ::= [^\\'] */ +/* ::= [^\\"] */ diff --git a/src/error.c b/src/error.c new file mode 100644 index 0000000..008c588 --- /dev/null +++ b/src/error.c @@ -0,0 +1,42 @@ +#include "error.h" + +#include +#include +#include + +error_t *const err_errorf_alloc = &(error_t){ + .message = "Allocation failed during formatting of another error"}; +error_t *const err_errorf_length = &(error_t){ + .message = + "Formatting of another error failed to determine the error length"}; + +error_t *errorf(const char *fmt, ...) { + error_t *err = calloc(1, sizeof(error_t)); + if (err == nullptr) + return err_errorf_alloc; + + va_list args; + va_list args_count; + va_start(args, fmt); + va_copy(args_count, args); + + int size = vsnprintf(nullptr, 0, fmt, args_count) + 1; + va_end(args_count); + if (size <= 0) { + free(err); + va_end(args); + return err_errorf_length; + } + + err->message = malloc(size); + if (err->message == nullptr) { + free(err); + va_end(args); + return err_errorf_alloc; + } + + vsnprintf(err->message, size, fmt, args); + va_end(args); + err->is_heap_allocated = true; + return err; +} diff --git a/src/error.h b/src/error.h new file mode 100644 index 0000000..5a6f143 --- /dev/null +++ b/src/error.h @@ -0,0 +1,21 @@ +#ifndef INCLUDE_SRC_ERROR_H_ +#define INCLUDE_SRC_ERROR_H_ + +#include + +typedef struct error { + char *message; + bool is_heap_allocated; +} error_t; + +error_t *errorf(const char *fmt, ...); +static inline void error_free(error_t *err) { + if (err == nullptr) + return; + if (!err->is_heap_allocated) + return; + free(err->message); + free(err); +} + +#endif // INCLUDE_SRC_ERROR_H_ diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..8c9dd90 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,465 @@ +#include "lexer.h" +#include "error.h" +#include +#include +#include +#include + +error_t *err_lexer_already_open = &(error_t){ + .message = + "Can't open on a lexer object that is already opened. Close it first."}; +error_t *err_prefix_too_large = + &(error_t){.message = "Prefix too large for internal lexer buffer"}; +error_t *err_buffer_underrun = &(error_t){ + .message = "Buffer does not contain enough characters for lexer_consume_n"}; +error_t *err_consume_excessive_length = + &(error_t){.message = "Too many valid characters to consume"}; + +error_t *err_eof = + &(error_t){.message = "Can't read from file because EOF is reached"}; + +error_t *err_unknown_read = &(error_t){.message = "Unknown read error"}; + +error_t *err_allocation_failed = + &(error_t){.message = "Memory allocation failed"}; + +typedef bool (*char_predicate_t)(char); + +const char *lexer_token_id_to_cstr(lexer_token_id_t id) { + switch (id) { + case TOKEN_ERROR: + return "TOKEN_ERROR"; + case TOKEN_IDENTIFIER: + return "TOKEN_IDENTIFIER"; + case TOKEN_DECIMAL: + return "TOKEN_DECIMAL"; + case TOKEN_HEXADECIMAL: + return "TOKEN_HEXADECIMAL"; + case TOKEN_OCTAL: + return "TOKEN_OCTAL"; + case TOKEN_BINARY: + return "TOKEN_BINARY"; + case TOKEN_CHAR: + return "TOKEN_CHAR"; + case TOKEN_STRING: + return "TOKEN_STRING"; + case TOKEN_COLON: + return "TOKEN_COLON"; + case TOKEN_COMMA: + return "TOKEN_COMMA"; + case TOKEN_LBRACKET: + return "TOKEN_LBRACKET"; + case TOKEN_RBRACKET: + return "TOKEN_RBRACKET"; + case TOKEN_PLUS: + return "TOKEN_PLUS"; + case TOKEN_MINUS: + return "TOKEN_MINUS"; + case TOKEN_ASTERISK: + return "TOKEN_ASTERISK"; + case TOKEN_DOT: + return "TOKEN_DOT"; + case TOKEN_COMMENT: + return "TOKEN_COMMENT"; + case TOKEN_NEWLINE: + return "TOKEN_NEWLINE"; + case TOKEN_WHITESPACE: + return "TOKEN_WHITESPACE"; + } + assert(!"Unreachable, weird token id" && id); + __builtin_unreachable(); +} + +void lexer_token_print(lexer_token_t *token) { + printf("(%zu, %zu) %s[%d]%s%s\n", token->line_number, + token->character_number, lexer_token_id_to_cstr(token->id), + token->id, token->value ? ": " : "", + token->value ? token->value : ""); + if (token->id == TOKEN_ERROR) + printf(" `--> %s\n", token->explanation); +} + +void lexer_token_cleanup(lexer_token_t *token) { + free(token->value); + memset(token, 0, sizeof(lexer_token_t)); +} + +void lexer_close(lexer_t *lex) { + fclose(lex->fp); + memset(lex, 0, sizeof(lexer_t)); +} + +error_t *lexer_fill_buffer(lexer_t *lex) { + if (feof(lex->fp) && lex->buffer_count == 0) + return err_eof; + if (feof(lex->fp)) + return nullptr; + if (lex->buffer_count == lexer_buffer_size) + return nullptr; + + size_t remaining = lexer_buffer_size - lex->buffer_count; + while (remaining > 0) { + char *buffer = lex->buffer + lex->buffer_count; + size_t n = fread(buffer, 1, remaining, lex->fp); + if (n == 0 && feof(lex->fp)) + break; + if (n == 0 && ferror(lex->fp)) + return errorf("Read error: %s", strerror(errno)); + if (n == 0) + return err_unknown_read; + remaining -= n; + lex->buffer_count += n; + } + return nullptr; +} + +error_t *lexer_open(lexer_t *lex, char *path) { + if (lex->fp != nullptr) + return err_lexer_already_open; + + lex->fp = fopen(path, "rb"); + if (lex->fp == nullptr) + return errorf("Failed to open file '%s': %s", path, strerror(errno)); + lex->line_number = 0; + lex->character_number = 0; + lex->buffer_count = 0; + return nullptr; +} + +void lexer_shift_buffer(lexer_t *lex, int n) { + lex->buffer_count -= n; + memmove(lex->buffer, lex->buffer + n, lex->buffer_count); +} + +error_t *lexer_peek(lexer_t *lex, char *c) { + error_t *err = lexer_fill_buffer(lex); + if (err) + return err; + if (lex->buffer_count == 0) + return err_eof; + *c = lex->buffer[0]; + lexer_shift_buffer(lex, 1); + return nullptr; +} + +// This does _not_ fill the internal lexer buffer and you _must_ call +// lexer_fill_buffer() before calling this. It will always return false if your +// prefix is larger than lexer_buffer_size +bool lexer_has_prefix(lexer_t *lex, char *prefix) { + size_t len = strlen(prefix); + if (len > lex->buffer_count) + return false; + return memcmp(lex->buffer, prefix, len) == 0; +} + +error_t *lexer_not_implemented(lexer_t *lex, lexer_token_t *token) { + (void)token; + return errorf("Not implemented, character %02x (%c) at (%zu, %zu).\n", + lex->buffer[0], lex->buffer[0], lex->line_number, + lex->character_number); +} + +error_t *lexer_consume_n(lexer_t *lex, const size_t len, + char buffer[static len], const size_t n) { + if (lex->buffer_count < n) + return err_buffer_underrun; + if (len > n) + return err_consume_excessive_length; + + memcpy(buffer, lex->buffer, n); + lexer_shift_buffer(lex, n); + return nullptr; +} +error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n], + char_predicate_t is_valid, size_t *n_consumed) { + const size_t buffer_size = n; + bool have_more_characters = false; + *n_consumed = 0; + do { + size_t i = 0; + while (i < lex->buffer_count && i < buffer_size - *n_consumed && + is_valid(lex->buffer[i])) { + ++i; + } + memcpy(buffer + *n_consumed, lex->buffer, i); + lexer_shift_buffer(lex, i); + *n_consumed += i; + + error_t *err = lexer_fill_buffer(lex); + if (err == err_eof) + have_more_characters = false; + else if (err) + return err; + else + have_more_characters = + (lex->buffer_count > 0 && is_valid(lex->buffer[0])); + + if (have_more_characters && *n_consumed == buffer_size) { + return err_consume_excessive_length; + } + } while (have_more_characters); + return nullptr; +} + +bool is_hexadecimal_character(char c) { + return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); +} + +bool is_octal_character(char c) { + return c >= '0' && c <= '7'; +} + +bool is_binary_character(char c) { + return c == '0' || c == '1'; +} + +bool is_decimal_character(char c) { + return isdigit(c); +} + +error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) { + constexpr size_t max_number_length = 128; + size_t so_far = 0; + size_t n = 0; + char buffer[max_number_length + 1] = {}; + + token->line_number = lex->line_number; + token->character_number = lex->character_number; + char_predicate_t is_valid; + if (lexer_has_prefix(lex, "0x")) { + is_valid = is_hexadecimal_character; + token->id = TOKEN_HEXADECIMAL; + strcpy(buffer, "0x"); + so_far = 2; + } else if (lexer_has_prefix(lex, "0o")) { + is_valid = is_octal_character; + token->id = TOKEN_OCTAL; + strcpy(buffer, "0o"); + so_far = 2; + } else if (lexer_has_prefix(lex, "0b")) { + token->id = TOKEN_BINARY; + is_valid = is_binary_character; + strcpy(buffer, "0b"); + so_far = 2; + } else { + token->id = TOKEN_DECIMAL; + is_valid = is_decimal_character; + so_far = 0; + } + if (so_far > 0) { + lex->character_number += so_far; + lexer_shift_buffer(lex, so_far); + } + + error_t *err = lexer_consume(lex, max_number_length - so_far, + buffer + so_far, is_valid, &n); + if (err == err_consume_excessive_length) { + token->id = TOKEN_ERROR; + token->explanation = + "Number length exceeds the maximum of 128 characters"; + } + so_far += n; + if (n == 0) { + token->id = TOKEN_ERROR; + token->explanation = "Invalid number format"; + } + + err = lexer_fill_buffer(lex); + if (err != err_eof && err) { + return err; + } + + size_t suffix_length = 0; + if (lexer_has_prefix(lex, ":8")) { + suffix_length = 2; + } else if (lexer_has_prefix(lex, ":16")) { + suffix_length = 3; + } else if (lexer_has_prefix(lex, ":32")) { + suffix_length = 3; + } else if (lexer_has_prefix(lex, ":64")) { + suffix_length = 3; + } + + if (suffix_length > 0) { + err = lexer_consume_n(lex, max_number_length - so_far, buffer + so_far, + suffix_length); + if (err == err_consume_excessive_length) { + token->id = TOKEN_ERROR; + token->explanation = + "Number length exceeds the maximum of 128 characters"; + } + } + + lex->character_number += n; + token->value = strdup(buffer); + return nullptr; +} +error_t *lexer_next_newline(lexer_t *lex, lexer_token_t *token) { + token->line_number = lex->line_number; + token->character_number = lex->character_number; + token->id = TOKEN_NEWLINE; + + if (lexer_has_prefix(lex, "\r\n")) { + lexer_shift_buffer(lex, 2); + token->value = strdup("\r\n"); + lex->character_number = 0; + lex->line_number += 1; + } else if (lexer_has_prefix(lex, "\n")) { + lexer_shift_buffer(lex, 1); + token->value = strdup("\n"); + lex->character_number = 0; + lex->line_number += 1; + } else { + token->id = TOKEN_ERROR; + lex->character_number += 1; + token->value = strdup((char[]){lex->buffer[0]}); + token->explanation = "Invalid newline format"; + } + return nullptr; +} + +bool is_identifier_character(char c) { + return isalnum(c) || c == '_'; +} + +error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) { + constexpr size_t max_identifier_length = 128; + size_t n = 0; + char buffer[max_identifier_length + 1] = {}; + + token->id = TOKEN_IDENTIFIER; + token->line_number = lex->line_number; + token->character_number = lex->character_number; + + error_t *err = lexer_consume(lex, max_identifier_length, buffer, + is_identifier_character, &n); + if (err == err_consume_excessive_length) { + token->id = TOKEN_ERROR; + token->explanation = + "Identifier length exceeds the maximum of 128 characters"; + } + lex->character_number += n; + token->value = strdup(buffer); + return nullptr; +} + +error_t *lexer_next_character(lexer_t *lex, lexer_token_t *token) { + return lexer_not_implemented(lex, token); +} +error_t *lexer_next_string(lexer_t *lex, lexer_token_t *token) { + return lexer_not_implemented(lex, token); +} + +bool is_whitespace_character(char c) { + return c == ' ' || c == '\t'; +} + +error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) { + constexpr size_t max_whitespace_length = 1024; + size_t n = 0; + char buffer[max_whitespace_length + 1] = {}; + + token->id = TOKEN_WHITESPACE; + token->line_number = lex->line_number; + token->character_number = lex->character_number; + + error_t *err = lexer_consume(lex, max_whitespace_length, buffer, + is_whitespace_character, &n); + if (err == err_consume_excessive_length) { + token->id = TOKEN_ERROR; + token->explanation = + "Whitespace length exceeds the maximum of 1024 characters"; + } + lex->character_number += n; + token->value = strdup(buffer); + return nullptr; +} + +bool is_comment_character(char c) { + return c != '\r' && c != '\n'; +} + +error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) { + constexpr size_t max_comment_length = 1024; + size_t n = 0; + char buffer[max_comment_length + 1] = {}; + + token->id = TOKEN_COMMENT; + token->line_number = lex->line_number; + token->character_number = lex->character_number; + + error_t *err = lexer_consume(lex, max_comment_length, buffer, + is_comment_character, &n); + if (err == err_consume_excessive_length) { + token->id = TOKEN_ERROR; + token->explanation = + "Comment length exceeds the maximum of 1024 characters"; + } + lex->character_number += n; + token->value = strdup(buffer); + return nullptr; +} + +error_t *lexer_next(lexer_t *lex, lexer_token_t *token) { + memset(token, 0, sizeof(lexer_token_t)); + error_t *err = lexer_fill_buffer(lex); + if (err) + return err; + char first = lex->buffer[0]; + if (isalpha(first) || first == '_') + return lexer_next_identifier(lex, token); + if (isdigit(first)) + return lexer_next_number(lex, token); + + switch (first) { + case '\'': + return lexer_next_character(lex, token); + case '"': + return lexer_next_string(lex, token); + case ' ': + case '\t': + return lexer_next_whitespace(lex, token); + case ';': + return lexer_next_comment(lex, token); + case ':': + token->id = TOKEN_COLON; + break; + case ',': + token->id = TOKEN_COMMA; + break; + case '[': + token->id = TOKEN_LBRACKET; + break; + case ']': + token->id = TOKEN_RBRACKET; + break; + case '+': + token->id = TOKEN_PLUS; + break; + case '-': + token->id = TOKEN_MINUS; + break; + case '*': + token->id = TOKEN_ASTERISK; + break; + case '.': + token->id = TOKEN_DOT; + break; + case '\r': + case '\n': + return lexer_next_newline(lex, token); + default: + token->id = TOKEN_ERROR; + break; + } + token->value = strdup((char[]){first, 0}); + lexer_shift_buffer(lex, 1); + token->line_number = lex->line_number; + token->character_number = lex->character_number; + if (token->id == TOKEN_ERROR) { + token->explanation = + "unexpected character during lexing (first of token)"; + } + lex->character_number += 1; + return nullptr; +} diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..62ffedf --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,56 @@ +#ifndef INCLUDE_SRC_LEXER_H_ +#define INCLUDE_SRC_LEXER_H_ + +#include "error.h" +#include +#include + +extern error_t *err_eof; + +typedef enum { + TOKEN_ERROR, + TOKEN_IDENTIFIER, + TOKEN_DECIMAL, + TOKEN_HEXADECIMAL, + TOKEN_OCTAL, + TOKEN_BINARY, + TOKEN_CHAR, + TOKEN_STRING, + TOKEN_COLON, + TOKEN_COMMA, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_ASTERISK, + TOKEN_DOT, + TOKEN_COMMENT, + TOKEN_NEWLINE, + TOKEN_WHITESPACE, +} lexer_token_id_t; + +typedef struct lexer_token { + lexer_token_id_t id; + size_t line_number; + size_t character_number; + char *value; + const char *explanation; +} lexer_token_t; + +constexpr size_t lexer_buffer_size = 32; + +typedef struct lexer { + size_t line_number; + size_t character_number; + size_t buffer_count; + char buffer[lexer_buffer_size]; + FILE *fp; +} lexer_t; + +void lexer_close(lexer_t *lex); +error_t *lexer_open(lexer_t *lex, char *path); +error_t *lexer_next(lexer_t *lex, lexer_token_t *token); +void lexer_token_print(lexer_token_t *token); +void lexer_token_cleanup(lexer_token_t *token); + +#endif // INCLUDE_SRC_LEXER_H_ diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..94af906 --- /dev/null +++ b/src/main.c @@ -0,0 +1,62 @@ +#include "error.h" +#include "lexer.h" + +#include +#include +#include +#include + +bool print_token(lexer_token_t *token) { + lexer_token_print(token); + return true; +} + +bool print_value(lexer_token_t *token) { + if (token->id == TOKEN_ERROR) { + printf("%s\n", token->value); + for (size_t i = 0; i < token->character_number; ++i) + printf(" "); + printf("^-- %s\n", token->explanation); + } else { + printf("%s", token->value); + } + return token->id != TOKEN_ERROR; +} + +int main(int argc, char *argv[]) { + if (argc != 3 || + (strcmp(argv[1], "-tokens") != 0 && strcmp(argv[1], "-text") != 0)) { + puts("Usage: oas -tokens "); + puts("Usage: oas -text "); + return 1; + } + + bool (*print_fn)(lexer_token_t *); + char *filename = argv[2]; + if (strcmp(argv[1], "-tokens") == 0) { + print_fn = print_token; + } else { + print_fn = print_value; + } + + lexer_t lex = {0}; + lexer_token_t token; + error_t *err = lexer_open(&lex, filename); + if (err) { + puts(err->message); + error_free(err); + return 1; + } + + bool keep_going = true; + while (keep_going && (err = lexer_next(&lex, &token)) == nullptr) { + keep_going = print_fn(&token); + free(token.value); + } + + if (err && err != err_eof) { + puts(err->message); + } + error_free(err); + return 0; +} diff --git a/tests/input/valid.asm b/tests/input/valid.asm new file mode 100644 index 0000000..de95bbe --- /dev/null +++ b/tests/input/valid.asm @@ -0,0 +1,9 @@ +_start: + mov eax, 555 ; move 555 into eax + push 0o777 + xor eax, 0xDEADBEEF + and ecx, 0o770 + mov edx, 0b01010101 + push 0xffff:64 + push 0o777:16 + push 0b0001:16 diff --git a/validate.sh b/validate.sh new file mode 100755 index 0000000..6169159 --- /dev/null +++ b/validate.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -euo pipefail + +# Start with static analysis +scan-build -o reports/ -plist-html --status-bugs make clean all + +# Run the sanitizer builds and valgrind +make clean sanitize all + +ARGUMENTS=("-tokens" "-text") +while IFS= read -r INPUT_FILE; do + for ARGS in ${ARGUMENTS[@]}; do + ./oas-asan $ARGS $INPUT_FILE > /dev/null + ./oas-msan $ARGS $INPUT_FILE > /dev/null + valgrind --leak-check=full --error-exitcode=1 ./oas $ARGS $INPUT_FILE >/dev/null + done +done < <(find tests/input/ -type f -name '*.asm')