Compare commits
	
		
			3 Commits
		
	
	
		
			942dd444cc
			...
			5cdb60d395
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 5cdb60d395 | |||
| e5830daac9 | |||
| 4becfb868e | 
							
								
								
									
										8
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								Makefile
									
									
									
									
									
								
							| @@ -25,8 +25,12 @@ fuzz: | ||||
| 	afl-fuzz -i tests/input -o reports/afl -m none -- ./oas-afl -tokens @@ | ||||
|  | ||||
| sanitize: | ||||
| 	make CFLAGS="$(CFLAGS) -fsanitize=address,undefined" LDFLAGS="-fsanitize=address,undefined" TARGET="oas-asan" clean-objects all | ||||
| 	make CFLAGS="$(CFLAGS) -fsanitize=memory -fsanitize-memory-track-origins=2" LDFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2" TARGET="oas-msan" clean-objects all  | ||||
| 	make CFLAGS="$(CFLAGS) -fsanitize=address,undefined" \ | ||||
| 		LDFLAGS="-fsanitize=address,undefined" \ | ||||
| 		TARGET="oas-asan" clean-objects all | ||||
| 	make CFLAGS="$(CFLAGS) -fsanitize=memory -fsanitize-memory-track-origins=2" \ | ||||
| 		LDFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2" \ | ||||
| 		TARGET="oas-msan" clean-objects all  | ||||
| 	make clean-objects | ||||
|  | ||||
| validate: | ||||
|   | ||||
							
								
								
									
										124
									
								
								src/lexer.c
									
									
									
									
									
								
							
							
						
						
									
										124
									
								
								src/lexer.c
									
									
									
									
									
								
							| @@ -89,6 +89,15 @@ void lexer_close(lexer_t *lex) { | ||||
|     memset(lex, 0, sizeof(lexer_t)); | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Attempts to fill the lexer's internal buffer with more data from the file. | ||||
|  * Only reads data if the buffer isn't already full and the file hasn't reached | ||||
|  * EOF. | ||||
|  * | ||||
|  * @param lex The lexer to fill the buffer for | ||||
|  * @return nullptr on success, an error otherwise (including err_eof if EOF | ||||
|  * reached with empty buffer) | ||||
|  */ | ||||
| error_t *lexer_fill_buffer(lexer_t *lex) { | ||||
|     if (feof(lex->fp) && lex->buffer_count == 0) | ||||
|         return err_eof; | ||||
| @@ -126,25 +135,28 @@ error_t *lexer_open(lexer_t *lex, char *path) { | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Shifts the lexer's buffer by n characters, discarding the first n characters | ||||
|  * and moving the remaining characters to the beginning of the buffer. | ||||
|  * | ||||
|  * @param lex The lexer whose buffer to shift | ||||
|  * @param n Number of characters to shift out | ||||
|  * | ||||
|  * @pre There must be at least n characters in the input buffer | ||||
|  */ | ||||
| void lexer_shift_buffer(lexer_t *lex, int n) { | ||||
|     assert(lex->buffer_count >= n); | ||||
|     lex->buffer_count -= n; | ||||
|     memmove(lex->buffer, lex->buffer + n, lex->buffer_count); | ||||
| } | ||||
|  | ||||
| error_t *lexer_peek(lexer_t *lex, char *c) { | ||||
|     error_t *err = lexer_fill_buffer(lex); | ||||
|     if (err) | ||||
|         return err; | ||||
|     if (lex->buffer_count == 0) | ||||
|         return err_eof; | ||||
|     *c = lex->buffer[0]; | ||||
|     lexer_shift_buffer(lex, 1); | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| // This does _not_ fill the internal lexer buffer and you _must_ call | ||||
| // lexer_fill_buffer() before calling this. It will always return false if your | ||||
| // prefix is larger than lexer_buffer_size | ||||
| /** | ||||
|  * Checks if the lexer's buffer starts with the given prefix. | ||||
|  * | ||||
|  * @param lex The lexer to check | ||||
|  * @param prefix The string prefix to check for | ||||
|  * @return true if the buffer starts with the prefix, false otherwise | ||||
|  */ | ||||
| bool lexer_has_prefix(lexer_t *lex, char *prefix) { | ||||
|     size_t len = strlen(prefix); | ||||
|     if (len > lex->buffer_count) | ||||
| @@ -159,6 +171,17 @@ error_t *lexer_not_implemented(lexer_t *lex, lexer_token_t *token) { | ||||
|                   lex->character_number); | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Consumes exactly n characters from the buffer into the provided output | ||||
|  * buffer. | ||||
|  * | ||||
|  * @param lex The lexer to consume from | ||||
|  * @param len Size of the output buffer | ||||
|  * @param buffer Output buffer to store the consumed characters | ||||
|  * @param n Number of characters to consume | ||||
|  * @return nullptr on success, an error otherwise (err_buffer_underrun if buffer | ||||
|  * contains fewer than n characters) | ||||
|  */ | ||||
| error_t *lexer_consume_n(lexer_t *lex, const size_t len, | ||||
|                          char buffer[static len], const size_t n) { | ||||
|     if (lex->buffer_count < n) | ||||
| @@ -170,6 +193,20 @@ error_t *lexer_consume_n(lexer_t *lex, const size_t len, | ||||
|     lexer_shift_buffer(lex, n); | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Consumes characters from the lexer buffer that satisfy the predicate | ||||
|  * function. Will attempt to refill the buffer if more valid characters are | ||||
|  * available. | ||||
|  * | ||||
|  * @param lex The lexer to consume from | ||||
|  * @param n Maximum number of characters to consume | ||||
|  * @param buffer Output buffer to store consumed characters | ||||
|  * @param is_valid Function that determines if a character should be consumed | ||||
|  * @param n_consumed Output parameter that will contain the number of characters | ||||
|  * consumed | ||||
|  * @return nullptr on success, an error otherwise | ||||
|  */ | ||||
| error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n], | ||||
|                        char_predicate_t is_valid, size_t *n_consumed) { | ||||
|     const size_t buffer_size = n; | ||||
| @@ -217,6 +254,18 @@ bool is_decimal_character(char c) { | ||||
|     return isdigit(c); | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Processes a number token (decimal, hexadecimal, octal, or binary). | ||||
|  * Handles number formats with optional size suffixes. | ||||
|  * | ||||
|  * @param lex The lexer to read from | ||||
|  * @param token Output parameter that will be populated with the token | ||||
|  * information | ||||
|  * @return nullptr on success, an error otherwise | ||||
|  * | ||||
|  * @pre There must be at least one character in the input buffer and it should | ||||
|  * be [0-9] | ||||
|  */ | ||||
| error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) { | ||||
|     constexpr size_t max_number_length = 128; | ||||
|     size_t so_far = 0; | ||||
| @@ -294,6 +343,19 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) { | ||||
|     token->value = strdup(buffer); | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Processes a newline token (\n or \r\n). | ||||
|  * Updates the lexer's line and character position tracking. | ||||
|  * | ||||
|  * @param lex The lexer to read from | ||||
|  * @param token Output parameter that will be populated with the token | ||||
|  * information | ||||
|  * @return nullptr on success, an error otherwise | ||||
|  * | ||||
|  * @pre There must be at least on character in the input buffer and it must | ||||
|  * be [\r\n] | ||||
|  */ | ||||
| error_t *lexer_next_newline(lexer_t *lex, lexer_token_t *token) { | ||||
|     token->line_number = lex->line_number; | ||||
|     token->character_number = lex->character_number; | ||||
| @@ -323,6 +385,19 @@ bool is_identifier_character(char c) { | ||||
|     return isalnum(c) || c == '_'; | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Processes an identifier token. | ||||
|  * Identifiers start with a letter or underscore and can contain alphanumeric | ||||
|  * characters or underscores. | ||||
|  * | ||||
|  * @param lex The lexer to read from | ||||
|  * @param token Output parameter that will be populated with the token | ||||
|  * information | ||||
|  * @return nullptr on success, an error otherwise | ||||
|  * | ||||
|  * @pre There must be at least 1 character in the read buffer and it must be | ||||
|  * [a-zA-Z_] | ||||
|  */ | ||||
| error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) { | ||||
|     constexpr size_t max_identifier_length = 128; | ||||
|     size_t n = 0; | ||||
| @@ -355,6 +430,17 @@ bool is_whitespace_character(char c) { | ||||
|     return c == ' ' || c == '\t'; | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Processes a whitespace token (spaces and tabs). | ||||
|  * | ||||
|  * @param lex The lexer to read from | ||||
|  * @param token Output parameter that will be populated with the token | ||||
|  * information | ||||
|  * @return nullptr on success, an error otherwise | ||||
|  * | ||||
|  * @pre There must be at least one character in the buffer and it must be | ||||
|  * [ \t] | ||||
|  */ | ||||
| error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) { | ||||
|     constexpr size_t max_whitespace_length = 1024; | ||||
|     size_t n = 0; | ||||
| @@ -380,6 +466,16 @@ bool is_comment_character(char c) { | ||||
|     return c != '\r' && c != '\n'; | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Processes a comment token (starts with ';' and continues to end of line). | ||||
|  * | ||||
|  * @param lex The lexer to read from | ||||
|  * @param token Output parameter that will be populated with the token | ||||
|  * information | ||||
|  * @return nullptr on success, an error otherwise | ||||
|  * | ||||
|  * @pre There must be at least one character in the buffer and it must be ';' | ||||
|  */ | ||||
| error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) { | ||||
|     constexpr size_t max_comment_length = 1024; | ||||
|     size_t n = 0; | ||||
|   | ||||
							
								
								
									
										33
									
								
								src/lexer.h
									
									
									
									
									
								
							
							
						
						
									
										33
									
								
								src/lexer.h
									
									
									
									
									
								
							| @@ -47,10 +47,43 @@ typedef struct lexer { | ||||
|     FILE *fp; | ||||
| } lexer_t; | ||||
|  | ||||
| /** | ||||
|  * @brief Closes a lexer and releases associated resources | ||||
|  * | ||||
|  * @param lex Pointer to the lexer to close | ||||
|  */ | ||||
| void lexer_close(lexer_t *lex); | ||||
|  | ||||
| /** | ||||
|  * @brief Opens a file for lexical analysis | ||||
|  * | ||||
|  * @param lex Pointer to the lexer to initialize | ||||
|  * @param path Path to the file to open | ||||
|  * @return error_t* nullptr on success, or error describing the failure | ||||
|  */ | ||||
| error_t *lexer_open(lexer_t *lex, char *path); | ||||
|  | ||||
| /** | ||||
|  * @brief Reads the next token from the input stream | ||||
|  * | ||||
|  * @param lex Pointer to an initialized lexer | ||||
|  * @param token Pointer to a token structure to fill with the next token | ||||
|  * @return error_t* nullptr on success, err_eof at end of file, or other error | ||||
|  */ | ||||
| error_t *lexer_next(lexer_t *lex, lexer_token_t *token); | ||||
|  | ||||
| /** | ||||
|  * @brief Prints a token to stdout for debugging purposes | ||||
|  * | ||||
|  * @param token Pointer to the token to print | ||||
|  */ | ||||
| void lexer_token_print(lexer_token_t *token); | ||||
|  | ||||
| /** | ||||
|  * @brief Frees any resources associated with a token | ||||
|  * | ||||
|  * @param token Pointer to the token to clean up | ||||
|  */ | ||||
| void lexer_token_cleanup(lexer_token_t *token); | ||||
|  | ||||
| #endif // INCLUDE_SRC_LEXER_H_ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user