Add documentation comments to the lexer code

This commit is contained in:
omicron 2025-03-30 22:51:15 +02:00
parent 4becfb868e
commit e5830daac9
2 changed files with 143 additions and 3 deletions

View File

@ -89,6 +89,15 @@ void lexer_close(lexer_t *lex) {
memset(lex, 0, sizeof(lexer_t)); memset(lex, 0, sizeof(lexer_t));
} }
/**
* Attempts to fill the lexer's internal buffer with more data from the file.
* Only reads data if the buffer isn't already full and the file hasn't reached
* EOF.
*
* @param lex The lexer to fill the buffer for
* @return nullptr on success, an error otherwise (including err_eof if EOF
* reached with empty buffer)
*/
error_t *lexer_fill_buffer(lexer_t *lex) { error_t *lexer_fill_buffer(lexer_t *lex) {
if (feof(lex->fp) && lex->buffer_count == 0) if (feof(lex->fp) && lex->buffer_count == 0)
return err_eof; return err_eof;
@ -126,7 +135,17 @@ error_t *lexer_open(lexer_t *lex, char *path) {
return nullptr; return nullptr;
} }
/**
* Shifts the lexer's buffer by n characters, discarding the first n characters
* and moving the remaining characters to the beginning of the buffer.
*
* @param lex The lexer whose buffer to shift
* @param n Number of characters to shift out
*
* @pre There must be at least n characters in the input buffer
*/
void lexer_shift_buffer(lexer_t *lex, int n) { void lexer_shift_buffer(lexer_t *lex, int n) {
assert(lex->buffer_count >= n);
lex->buffer_count -= n; lex->buffer_count -= n;
memmove(lex->buffer, lex->buffer + n, lex->buffer_count); memmove(lex->buffer, lex->buffer + n, lex->buffer_count);
} }
@ -142,9 +161,13 @@ error_t *lexer_peek(lexer_t *lex, char *c) {
return nullptr; return nullptr;
} }
// This does _not_ fill the internal lexer buffer and you _must_ call /**
// lexer_fill_buffer() before calling this. It will always return false if your * Checks if the lexer's buffer starts with the given prefix.
// prefix is larger than lexer_buffer_size *
* @param lex The lexer to check
* @param prefix The string prefix to check for
* @return true if the buffer starts with the prefix, false otherwise
*/
bool lexer_has_prefix(lexer_t *lex, char *prefix) { bool lexer_has_prefix(lexer_t *lex, char *prefix) {
size_t len = strlen(prefix); size_t len = strlen(prefix);
if (len > lex->buffer_count) if (len > lex->buffer_count)
@ -159,6 +182,17 @@ error_t *lexer_not_implemented(lexer_t *lex, lexer_token_t *token) {
lex->character_number); lex->character_number);
} }
/**
* Consumes exactly n characters from the buffer into the provided output
* buffer.
*
* @param lex The lexer to consume from
* @param len Size of the output buffer
* @param buffer Output buffer to store the consumed characters
* @param n Number of characters to consume
* @return nullptr on success, an error otherwise (err_buffer_underrun if buffer
* contains fewer than n characters)
*/
error_t *lexer_consume_n(lexer_t *lex, const size_t len, error_t *lexer_consume_n(lexer_t *lex, const size_t len,
char buffer[static len], const size_t n) { char buffer[static len], const size_t n) {
if (lex->buffer_count < n) if (lex->buffer_count < n)
@ -170,6 +204,20 @@ error_t *lexer_consume_n(lexer_t *lex, const size_t len,
lexer_shift_buffer(lex, n); lexer_shift_buffer(lex, n);
return nullptr; return nullptr;
} }
/**
* Consumes characters from the lexer buffer that satisfy the predicate
* function. Will attempt to refill the buffer if more valid characters are
* available.
*
* @param lex The lexer to consume from
* @param n Maximum number of characters to consume
* @param buffer Output buffer to store consumed characters
* @param is_valid Function that determines if a character should be consumed
* @param n_consumed Output parameter that will contain the number of characters
* consumed
* @return nullptr on success, an error otherwise
*/
error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n], error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n],
char_predicate_t is_valid, size_t *n_consumed) { char_predicate_t is_valid, size_t *n_consumed) {
const size_t buffer_size = n; const size_t buffer_size = n;
@ -217,6 +265,18 @@ bool is_decimal_character(char c) {
return isdigit(c); return isdigit(c);
} }
/**
* Processes a number token (decimal, hexadecimal, octal, or binary).
* Handles number formats with optional size suffixes.
*
* @param lex The lexer to read from
* @param token Output parameter that will be populated with the token
* information
* @return nullptr on success, an error otherwise
*
* @pre There must be at least one character in the input buffer and it should
* be [0-9]
*/
error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) { error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
constexpr size_t max_number_length = 128; constexpr size_t max_number_length = 128;
size_t so_far = 0; size_t so_far = 0;
@ -294,6 +354,19 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) {
token->value = strdup(buffer); token->value = strdup(buffer);
return nullptr; return nullptr;
} }
/**
* Processes a newline token (\n or \r\n).
* Updates the lexer's line and character position tracking.
*
* @param lex The lexer to read from
* @param token Output parameter that will be populated with the token
* information
* @return nullptr on success, an error otherwise
*
* @pre There must be at least on character in the input buffer and it must
* be [\r\n]
*/
error_t *lexer_next_newline(lexer_t *lex, lexer_token_t *token) { error_t *lexer_next_newline(lexer_t *lex, lexer_token_t *token) {
token->line_number = lex->line_number; token->line_number = lex->line_number;
token->character_number = lex->character_number; token->character_number = lex->character_number;
@ -323,6 +396,19 @@ bool is_identifier_character(char c) {
return isalnum(c) || c == '_'; return isalnum(c) || c == '_';
} }
/**
* Processes an identifier token.
* Identifiers start with a letter or underscore and can contain alphanumeric
* characters or underscores.
*
* @param lex The lexer to read from
* @param token Output parameter that will be populated with the token
* information
* @return nullptr on success, an error otherwise
*
* @pre There must be at least 1 character in the read buffer and it must be
* [a-zA-Z_]
*/
error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) { error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) {
constexpr size_t max_identifier_length = 128; constexpr size_t max_identifier_length = 128;
size_t n = 0; size_t n = 0;
@ -355,6 +441,17 @@ bool is_whitespace_character(char c) {
return c == ' ' || c == '\t'; return c == ' ' || c == '\t';
} }
/**
* Processes a whitespace token (spaces and tabs).
*
* @param lex The lexer to read from
* @param token Output parameter that will be populated with the token
* information
* @return nullptr on success, an error otherwise
*
* @pre There must be at least one character in the buffer and it must be
* [ \t]
*/
error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) { error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) {
constexpr size_t max_whitespace_length = 1024; constexpr size_t max_whitespace_length = 1024;
size_t n = 0; size_t n = 0;
@ -380,6 +477,16 @@ bool is_comment_character(char c) {
return c != '\r' && c != '\n'; return c != '\r' && c != '\n';
} }
/**
* Processes a comment token (starts with ';' and continues to end of line).
*
* @param lex The lexer to read from
* @param token Output parameter that will be populated with the token
* information
* @return nullptr on success, an error otherwise
*
* @pre There must be at least one character in the buffer and it must be ';'
*/
error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) { error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) {
constexpr size_t max_comment_length = 1024; constexpr size_t max_comment_length = 1024;
size_t n = 0; size_t n = 0;

View File

@ -47,10 +47,43 @@ typedef struct lexer {
FILE *fp; FILE *fp;
} lexer_t; } lexer_t;
/**
* @brief Closes a lexer and releases associated resources
*
* @param lex Pointer to the lexer to close
*/
void lexer_close(lexer_t *lex); void lexer_close(lexer_t *lex);
/**
* @brief Opens a file for lexical analysis
*
* @param lex Pointer to the lexer to initialize
* @param path Path to the file to open
* @return error_t* nullptr on success, or error describing the failure
*/
error_t *lexer_open(lexer_t *lex, char *path); error_t *lexer_open(lexer_t *lex, char *path);
/**
* @brief Reads the next token from the input stream
*
* @param lex Pointer to an initialized lexer
* @param token Pointer to a token structure to fill with the next token
* @return error_t* nullptr on success, err_eof at end of file, or other error
*/
error_t *lexer_next(lexer_t *lex, lexer_token_t *token); error_t *lexer_next(lexer_t *lex, lexer_token_t *token);
/**
* @brief Prints a token to stdout for debugging purposes
*
* @param token Pointer to the token to print
*/
void lexer_token_print(lexer_token_t *token); void lexer_token_print(lexer_token_t *token);
/**
* @brief Frees any resources associated with a token
*
* @param token Pointer to the token to clean up
*/
void lexer_token_cleanup(lexer_token_t *token); void lexer_token_cleanup(lexer_token_t *token);
#endif // INCLUDE_SRC_LEXER_H_ #endif // INCLUDE_SRC_LEXER_H_