From 42da7b1d05952eb5951994d96f8a7e82fd4c609b Mon Sep 17 00:00:00 2001 From: omicron Date: Mon, 31 Mar 2025 15:08:29 +0200 Subject: [PATCH 01/15] Move err_allocation_failed into error.c and make it available to everyone. --- src/error.c | 3 +++ src/error.h | 3 +++ src/lexer.c | 3 --- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/error.c b/src/error.c index 008c588..9ede6cb 100644 --- a/src/error.c +++ b/src/error.c @@ -10,6 +10,9 @@ error_t *const err_errorf_length = &(error_t){ .message = "Formatting of another error failed to determine the error length"}; +error_t *err_allocation_failed = + &(error_t){.message = "Memory allocation failed"}; + error_t *errorf(const char *fmt, ...) { error_t *err = calloc(1, sizeof(error_t)); if (err == nullptr) diff --git a/src/error.h b/src/error.h index 5a6f143..96374e0 100644 --- a/src/error.h +++ b/src/error.h @@ -18,4 +18,7 @@ static inline void error_free(error_t *err) { free(err); } +/* Some global errors */ +extern error_t *err_allocation_failed; + #endif // INCLUDE_SRC_ERROR_H_ diff --git a/src/lexer.c b/src/lexer.c index be808cc..08a96bd 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -20,9 +20,6 @@ error_t *err_eof = error_t *err_unknown_read = &(error_t){.message = "Unknown read error"}; -error_t *err_allocation_failed = - &(error_t){.message = "Memory allocation failed"}; - typedef bool (*char_predicate_t)(char); const char *lexer_token_id_to_cstr(lexer_token_id_t id) { -- 2.47.2 From bd37ddaeea3c4e2e3deadfbd465436c1e2804378 Mon Sep 17 00:00:00 2001 From: omicron Date: Mon, 31 Mar 2025 18:38:46 +0200 Subject: [PATCH 02/15] Add tokenlist, a linked list of lexer tokens The linked list is doubly linked so the parser can look forward into it and error reporting can look backward. This commmit also reworks main to use the tokenlist instead of dealing with the lexer manually. --- src/main.c | 40 +++++++++++++++--------- src/tokenlist.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++ src/tokenlist.h | 30 ++++++++++++++++++ 3 files changed, 138 insertions(+), 15 deletions(-) create mode 100644 src/tokenlist.c create mode 100644 src/tokenlist.h diff --git a/src/main.c b/src/main.c index 94af906..d74d710 100644 --- a/src/main.c +++ b/src/main.c @@ -1,5 +1,6 @@ #include "error.h" #include "lexer.h" +#include "tokenlist.h" #include #include @@ -39,24 +40,33 @@ int main(int argc, char *argv[]) { print_fn = print_value; } - lexer_t lex = {0}; - lexer_token_t token; - error_t *err = lexer_open(&lex, filename); - if (err) { - puts(err->message); - error_free(err); - return 1; - } + lexer_t *lex = &(lexer_t){}; + error_t *err = lexer_open(lex, filename); + if (err) + goto cleanup_error; - bool keep_going = true; - while (keep_going && (err = lexer_next(&lex, &token)) == nullptr) { - keep_going = print_fn(&token); - free(token.value); - } + tokenlist_t *list; + err = tokenlist_alloc(&list); + if (err) + goto cleanup_lexer; - if (err && err != err_eof) { - puts(err->message); + err = tokenlist_fill(list, lex); + if (err) + goto cleanup_tokens; + + for (auto entry = list->head; entry; entry = entry->next) { + print_fn(&entry->token); } + tokenlist_free(list); error_free(err); return 0; + +cleanup_tokens: + tokenlist_free(list); +cleanup_lexer: + lexer_close(lex); +cleanup_error: + puts(err->message); + error_free(err); + return 1; } diff --git a/src/tokenlist.c b/src/tokenlist.c new file mode 100644 index 0000000..0a102b1 --- /dev/null +++ b/src/tokenlist.c @@ -0,0 +1,83 @@ +#include "tokenlist.h" +#include "error.h" +#include "lexer.h" +#include + +error_t *tokenlist_alloc(tokenlist_t **output) { + *output = nullptr; + + tokenlist_t *list = calloc(1, sizeof(tokenlist_t)); + if (list == nullptr) + return err_allocation_failed; + + list->head = nullptr; + list->tail = nullptr; + + *output = list; + return nullptr; +} + +error_t *tokenlist_entry_alloc(tokenlist_entry_t **output) { + *output = nullptr; + + tokenlist_entry_t *entry = calloc(1, sizeof(tokenlist_entry_t)); + if (entry == nullptr) + return err_allocation_failed; + + entry->next = nullptr; + entry->prev = nullptr; + + *output = entry; + return nullptr; +} + +void tokenlist_append(tokenlist_t *list, tokenlist_entry_t *entry) { + if (list->head == nullptr) { + list->head = entry; + list->tail = entry; + entry->next = nullptr; + entry->prev = nullptr; + } else { + entry->prev = list->tail; + entry->next = nullptr; + list->tail->next = entry; + list->tail = entry; + } +} + +void tokenlist_entry_free(tokenlist_entry_t *entry) { + lexer_token_cleanup(&entry->token); + free(entry); +} + +void tokenlist_free(tokenlist_t *list) { + if (list == nullptr) + return; + + tokenlist_entry_t *current = list->head; + while (current) { + tokenlist_entry_t *next = current->next; + tokenlist_entry_free(current); + current = next; + } + + free(list); +} + +error_t *tokenlist_fill(tokenlist_t *list, lexer_t *lex) { + error_t *err = nullptr; + lexer_token_t token = {}; + while ((err = lexer_next(lex, &token)) == nullptr) { + tokenlist_entry_t *entry; + err = tokenlist_entry_alloc(&entry); + if (err) { + lexer_token_cleanup(&token); + return err; + } + entry->token = token; + tokenlist_append(list, entry); + } + if (err != err_eof) + return err; + return nullptr; +} diff --git a/src/tokenlist.h b/src/tokenlist.h new file mode 100644 index 0000000..25d75e3 --- /dev/null +++ b/src/tokenlist.h @@ -0,0 +1,30 @@ +#ifndef INCLUDE_SRC_TOKENLIST_H_ +#define INCLUDE_SRC_TOKENLIST_H_ +#include "lexer.h" + +typedef struct tokenlist_entry tokenlist_entry_t; + +struct tokenlist_entry { + lexer_token_t token; + tokenlist_entry_t *next; + tokenlist_entry_t *prev; +}; + +typedef struct tokenlist { + tokenlist_entry_t *head; + tokenlist_entry_t *tail; +} tokenlist_t; + +/** + * @brief Allocate a new doubly linked list of lexer tokens + */ +error_t *tokenlist_alloc(tokenlist_t **list); + +/** + * Consume all tokens from the lexer and add them to the list + */ +error_t *tokenlist_fill(tokenlist_t *list, lexer_t *lex); + +void tokenlist_free(tokenlist_t *list); + +#endif // INCLUDE_SRC_TOKENLIST_H_ -- 2.47.2 From 34ace3692023409aca270bd2b8e44c9ad70b6f04 Mon Sep 17 00:00:00 2001 From: omicron Date: Mon, 31 Mar 2025 14:47:58 +0200 Subject: [PATCH 03/15] Add a parser grammar --- doc/parser_grammar.txt | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 doc/parser_grammar.txt diff --git a/doc/parser_grammar.txt b/doc/parser_grammar.txt new file mode 100644 index 0000000..e7e9315 --- /dev/null +++ b/doc/parser_grammar.txt @@ -0,0 +1,39 @@ + ::= * + ::=