Add documentation comments to the lexer code

2025-03-30 22:51:15 +02:00
parent 4becfb868e
commit e5830daac9
2 changed files with 143 additions and 3 deletions
--- a/src/lexer.c
+++ b/src/lexer.c
@ -89,6 +89,15 @@ void lexer_close(lexer_t *lex) {
    memset(lex, 0, sizeof(lexer_t));
 }
 /**
 * Attempts to fill the lexer's internal buffer with more data from the file.
 * Only reads data if the buffer isn't already full and the file hasn't reached
 * EOF.
 *
 * @param lex The lexer to fill the buffer for
 * @return nullptr on success, an error otherwise (including err_eof if EOF
 * reached with empty buffer)
 */
 error_t *lexer_fill_buffer(lexer_t *lex) {
    if (feof(lex->fp) && lex->buffer_count == 0)
        return err_eof;
@ -126,7 +135,17 @@ error_t *lexer_open(lexer_t *lex, char *path) {
    return nullptr;
 }
 /**
 * Shifts the lexer's buffer by n characters, discarding the first n characters
 * and moving the remaining characters to the beginning of the buffer.
 *
 * @param lex The lexer whose buffer to shift
 * @param n Number of characters to shift out
 *
 * @pre There must be at least n characters in the input buffer
 */
 void lexer_shift_buffer(lexer_t *lex, int n) {
    assert(lex->buffer_count >= n);
    lex->buffer_count -= n;
    memmove(lex->buffer, lex->buffer + n, lex->buffer_count);
 }
@ -142,9 +161,13 @@ error_t *lexer_peek(lexer_t *lex, char *c) {
    return nullptr;
 }
-// This does _not_ fill the internal lexer buffer and you _must_ call
+/**
-// lexer_fill_buffer() before calling this. It will always return false if your
+ * Checks if the lexer's buffer starts with the given prefix.
-// prefix is larger than lexer_buffer_size
+ *
 * @param lex The lexer to check
 * @param prefix The string prefix to check for
 * @return true if the buffer starts with the prefix, false otherwise
 */
 bool lexer_has_prefix(lexer_t *lex, char *prefix) {
    size_t len = strlen(prefix);
    if (len > lex->buffer_count)
@ -159,6 +182,17 @@ error_t *lexer_not_implemented(lexer_t *lex, lexer_token_t *token) {
                  lex->character_number);
 }
 /**
 * Consumes exactly n characters from the buffer into the provided output
 * buffer.
 *
 * @param lex The lexer to consume from
 * @param len Size of the output buffer
 * @param buffer Output buffer to store the consumed characters
 * @param n Number of characters to consume
 * @return nullptr on success, an error otherwise (err_buffer_underrun if buffer
 * contains fewer than n characters)
 */
 error_t *lexer_consume_n(lexer_t *lex, const size_t len,
                         char buffer[static len], const size_t n) {
    if (lex->buffer_count < n)
@ -170,6 +204,20 @@ error_t *lexer_consume_n(lexer_t *lex, const size_t len,
    lexer_shift_buffer(lex, n);
    return nullptr;
 }
 /**
 * Consumes characters from the lexer buffer that satisfy the predicate
 * function. Will attempt to refill the buffer if more valid characters are
 * available.
 *
 * @param lex The lexer to consume from
 * @param n Maximum number of characters to consume
 * @param buffer Output buffer to store consumed characters
 * @param is_valid Function that determines if a character should be consumed
 * @param n_consumed Output parameter that will contain the number of characters
 * consumed
 * @return nullptr on success, an error otherwise
 */
 error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n],
                       char_predicate_t is_valid, size_t *n_consumed) {
    const size_t buffer_size = n;
@ -217,6 +265,18 @@ bool is_decimal_character(char c) {
    return isdigit(c);
 }
 /**
 * Processes a number token (decimal, hexadecimal, octal, or binary).
 * Handles number formats with optional size suffixes.
 *
 * @param lex The lexer to read from
 * @param token Output parameter that will be populated with the token
 * information
 * @return nullptr on success, an error otherwise
 *
 * @pre There must be at least one character in the input buffer and it should
 * be [0-9]
 */
 error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
    constexpr size_t max_number_length = 128;
    size_t so_far = 0;
@ -294,6 +354,19 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
    token->value = strdup(buffer);
    return nullptr;
 }
 /**
 * Processes a newline token (\n or \r\n).
 * Updates the lexer's line and character position tracking.
 *
 * @param lex The lexer to read from
 * @param token Output parameter that will be populated with the token
 * information
 * @return nullptr on success, an error otherwise
 *
 * @pre There must be at least on character in the input buffer and it must
 * be [\r\n]
 */
 error_t *lexer_next_newline(lexer_t *lex, lexer_token_t *token) {
    token->line_number = lex->line_number;
    token->character_number = lex->character_number;
@ -323,6 +396,19 @@ bool is_identifier_character(char c) {
    return isalnum(c) || c == '_';
 }
 /**
 * Processes an identifier token.
 * Identifiers start with a letter or underscore and can contain alphanumeric
 * characters or underscores.
 *
 * @param lex The lexer to read from
 * @param token Output parameter that will be populated with the token
 * information
 * @return nullptr on success, an error otherwise
 *
 * @pre There must be at least 1 character in the read buffer and it must be
 * [a-zA-Z_]
 */
 error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) {
    constexpr size_t max_identifier_length = 128;
    size_t n = 0;
@ -355,6 +441,17 @@ bool is_whitespace_character(char c) {
    return c == ' ' || c == '\t';
 }
 /**
 * Processes a whitespace token (spaces and tabs).
 *
 * @param lex The lexer to read from
 * @param token Output parameter that will be populated with the token
 * information
 * @return nullptr on success, an error otherwise
 *
 * @pre There must be at least one character in the buffer and it must be
 * [ \t]
 */
 error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) {
    constexpr size_t max_whitespace_length = 1024;
    size_t n = 0;
@ -380,6 +477,16 @@ bool is_comment_character(char c) {
    return c != '\r' && c != '\n';
 }
 /**
 * Processes a comment token (starts with ';' and continues to end of line).
 *
 * @param lex The lexer to read from
 * @param token Output parameter that will be populated with the token
 * information
 * @return nullptr on success, an error otherwise
 *
 * @pre There must be at least one character in the buffer and it must be ';'
 */
 error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) {
    constexpr size_t max_comment_length = 1024;
    size_t n = 0;
--- a/src/lexer.h
+++ b/src/lexer.h
@ -47,10 +47,43 @@ typedef struct lexer {
    FILE *fp;
 } lexer_t;
 /**
 * @brief Closes a lexer and releases associated resources
 *
 * @param lex Pointer to the lexer to close
 */
 void lexer_close(lexer_t *lex);
 /**
 * @brief Opens a file for lexical analysis
 *
 * @param lex Pointer to the lexer to initialize
 * @param path Path to the file to open
 * @return error_t* nullptr on success, or error describing the failure
 */
 error_t *lexer_open(lexer_t *lex, char *path);
 /**
 * @brief Reads the next token from the input stream
 *
 * @param lex Pointer to an initialized lexer
 * @param token Pointer to a token structure to fill with the next token
 * @return error_t* nullptr on success, err_eof at end of file, or other error
 */
 error_t *lexer_next(lexer_t *lex, lexer_token_t *token);
 /**
 * @brief Prints a token to stdout for debugging purposes
 *
 * @param token Pointer to the token to print
 */
 void lexer_token_print(lexer_token_t *token);
 /**
 * @brief Frees any resources associated with a token
 *
 * @param token Pointer to the token to clean up
 */
 void lexer_token_cleanup(lexer_token_t *token);
 #endif // INCLUDE_SRC_LEXER_H_