From e5830daac92fc1cc7b326a373870392f33ddc0aa Mon Sep 17 00:00:00 2001
From: omicron <omicron.me@protonmail.com>
Date: Sun, 30 Mar 2025 22:51:15 +0200
Subject: [PATCH] Add documentation comments to the lexer code

---
 src/lexer.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 src/lexer.h |  33 +++++++++++++++
 2 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/src/lexer.c b/src/lexer.c
index fac9b48..cf3f6d5 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -89,6 +89,15 @@ void lexer_close(lexer_t *lex) {
     memset(lex, 0, sizeof(lexer_t));
 }
 
+/**
+ * Attempts to fill the lexer's internal buffer with more data from the file.
+ * Only reads data if the buffer isn't already full and the file hasn't reached
+ * EOF.
+ *
+ * @param lex The lexer to fill the buffer for
+ * @return nullptr on success, an error otherwise (including err_eof if EOF
+ * reached with empty buffer)
+ */
 error_t *lexer_fill_buffer(lexer_t *lex) {
     if (feof(lex->fp) && lex->buffer_count == 0)
         return err_eof;
@@ -126,7 +135,17 @@ error_t *lexer_open(lexer_t *lex, char *path) {
     return nullptr;
 }
 
+/**
+ * Shifts the lexer's buffer by n characters, discarding the first n characters
+ * and moving the remaining characters to the beginning of the buffer.
+ *
+ * @param lex The lexer whose buffer to shift
+ * @param n Number of characters to shift out
+ *
+ * @pre There must be at least n characters in the input buffer
+ */
 void lexer_shift_buffer(lexer_t *lex, int n) {
+    assert(lex->buffer_count >= n);
     lex->buffer_count -= n;
     memmove(lex->buffer, lex->buffer + n, lex->buffer_count);
 }
@@ -142,9 +161,13 @@ error_t *lexer_peek(lexer_t *lex, char *c) {
     return nullptr;
 }
 
-// This does _not_ fill the internal lexer buffer and you _must_ call
-// lexer_fill_buffer() before calling this. It will always return false if your
-// prefix is larger than lexer_buffer_size
+/**
+ * Checks if the lexer's buffer starts with the given prefix.
+ *
+ * @param lex The lexer to check
+ * @param prefix The string prefix to check for
+ * @return true if the buffer starts with the prefix, false otherwise
+ */
 bool lexer_has_prefix(lexer_t *lex, char *prefix) {
     size_t len = strlen(prefix);
     if (len > lex->buffer_count)
@@ -159,6 +182,17 @@ error_t *lexer_not_implemented(lexer_t *lex, lexer_token_t *token) {
                   lex->character_number);
 }
 
+/**
+ * Consumes exactly n characters from the buffer into the provided output
+ * buffer.
+ *
+ * @param lex The lexer to consume from
+ * @param len Size of the output buffer
+ * @param buffer Output buffer to store the consumed characters
+ * @param n Number of characters to consume
+ * @return nullptr on success, an error otherwise (err_buffer_underrun if buffer
+ * contains fewer than n characters)
+ */
 error_t *lexer_consume_n(lexer_t *lex, const size_t len,
                          char buffer[static len], const size_t n) {
     if (lex->buffer_count < n)
@@ -170,6 +204,20 @@ error_t *lexer_consume_n(lexer_t *lex, const size_t len,
     lexer_shift_buffer(lex, n);
     return nullptr;
 }
+
+/**
+ * Consumes characters from the lexer buffer that satisfy the predicate
+ * function. Will attempt to refill the buffer if more valid characters are
+ * available.
+ *
+ * @param lex The lexer to consume from
+ * @param n Maximum number of characters to consume
+ * @param buffer Output buffer to store consumed characters
+ * @param is_valid Function that determines if a character should be consumed
+ * @param n_consumed Output parameter that will contain the number of characters
+ * consumed
+ * @return nullptr on success, an error otherwise
+ */
 error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n],
                        char_predicate_t is_valid, size_t *n_consumed) {
     const size_t buffer_size = n;
@@ -217,6 +265,18 @@ bool is_decimal_character(char c) {
     return isdigit(c);
 }
 
+/**
+ * Processes a number token (decimal, hexadecimal, octal, or binary).
+ * Handles number formats with optional size suffixes.
+ *
+ * @param lex The lexer to read from
+ * @param token Output parameter that will be populated with the token
+ * information
+ * @return nullptr on success, an error otherwise
+ *
+ * @pre There must be at least one character in the input buffer and it should
+ * be [0-9]
+ */
 error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
     constexpr size_t max_number_length = 128;
     size_t so_far = 0;
@@ -294,6 +354,19 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
     token->value = strdup(buffer);
     return nullptr;
 }
+
+/**
+ * Processes a newline token (\n or \r\n).
+ * Updates the lexer's line and character position tracking.
+ *
+ * @param lex The lexer to read from
+ * @param token Output parameter that will be populated with the token
+ * information
+ * @return nullptr on success, an error otherwise
+ *
+ * @pre There must be at least on character in the input buffer and it must
+ * be [\r\n]
+ */
 error_t *lexer_next_newline(lexer_t *lex, lexer_token_t *token) {
     token->line_number = lex->line_number;
     token->character_number = lex->character_number;
@@ -323,6 +396,19 @@ bool is_identifier_character(char c) {
     return isalnum(c) || c == '_';
 }
 
+/**
+ * Processes an identifier token.
+ * Identifiers start with a letter or underscore and can contain alphanumeric
+ * characters or underscores.
+ *
+ * @param lex The lexer to read from
+ * @param token Output parameter that will be populated with the token
+ * information
+ * @return nullptr on success, an error otherwise
+ *
+ * @pre There must be at least 1 character in the read buffer and it must be
+ * [a-zA-Z_]
+ */
 error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) {
     constexpr size_t max_identifier_length = 128;
     size_t n = 0;
@@ -355,6 +441,17 @@ bool is_whitespace_character(char c) {
     return c == ' ' || c == '\t';
 }
 
+/**
+ * Processes a whitespace token (spaces and tabs).
+ *
+ * @param lex The lexer to read from
+ * @param token Output parameter that will be populated with the token
+ * information
+ * @return nullptr on success, an error otherwise
+ *
+ * @pre There must be at least one character in the buffer and it must be
+ * [ \t]
+ */
 error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) {
     constexpr size_t max_whitespace_length = 1024;
     size_t n = 0;
@@ -380,6 +477,16 @@ bool is_comment_character(char c) {
     return c != '\r' && c != '\n';
 }
 
+/**
+ * Processes a comment token (starts with ';' and continues to end of line).
+ *
+ * @param lex The lexer to read from
+ * @param token Output parameter that will be populated with the token
+ * information
+ * @return nullptr on success, an error otherwise
+ *
+ * @pre There must be at least one character in the buffer and it must be ';'
+ */
 error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) {
     constexpr size_t max_comment_length = 1024;
     size_t n = 0;
diff --git a/src/lexer.h b/src/lexer.h
index 62ffedf..3265f37 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -47,10 +47,43 @@ typedef struct lexer {
     FILE *fp;
 } lexer_t;
 
+/**
+ * @brief Closes a lexer and releases associated resources
+ *
+ * @param lex Pointer to the lexer to close
+ */
 void lexer_close(lexer_t *lex);
+
+/**
+ * @brief Opens a file for lexical analysis
+ *
+ * @param lex Pointer to the lexer to initialize
+ * @param path Path to the file to open
+ * @return error_t* nullptr on success, or error describing the failure
+ */
 error_t *lexer_open(lexer_t *lex, char *path);
+
+/**
+ * @brief Reads the next token from the input stream
+ *
+ * @param lex Pointer to an initialized lexer
+ * @param token Pointer to a token structure to fill with the next token
+ * @return error_t* nullptr on success, err_eof at end of file, or other error
+ */
 error_t *lexer_next(lexer_t *lex, lexer_token_t *token);
+
+/**
+ * @brief Prints a token to stdout for debugging purposes
+ *
+ * @param token Pointer to the token to print
+ */
 void lexer_token_print(lexer_token_t *token);
+
+/**
+ * @brief Frees any resources associated with a token
+ *
+ * @param token Pointer to the token to clean up
+ */
 void lexer_token_cleanup(lexer_token_t *token);
 
 #endif // INCLUDE_SRC_LEXER_H_