Compare commits
	
		
			22 Commits
		
	
	
		
			297ad863c3
			...
			3a737c05d5
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 3a737c05d5 | |||
| 44254614c1 | |||
| 6230ade289 | |||
| a436f23601 | |||
| 3e325e4abd | |||
| c427adbd22 | |||
| 1bb9425546 | |||
| 5c620870c1 | |||
| 110a9bc31e | |||
| 3af255baeb | |||
| d13b6102c1 | |||
| 4a4523a1f0 | |||
| 2733d4fd7e | |||
| cbe49b2db5 | |||
| b92248ec47 | |||
| 018bb6ef9a | |||
| 85fd507004 | |||
| 0f9e1886cb | |||
| d8f3838c50 | |||
| d3881ac19d | |||
| e5be1a527e | |||
| 935da30257 | 
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							| @@ -10,7 +10,7 @@ OBJECTS = $(SOURCES:.c=.o) | ||||
| DEPENDENCIES = $(SOURCES:.c=.d) | ||||
| TARGET?=oas | ||||
| OUTPUTS=oas oas-asan oas-msan oas-afl | ||||
| RUNARGUMENTS?=-tokens tests/input/valid.asm | ||||
| RUNARGUMENTS?=ast tests/input/valid.asm | ||||
|  | ||||
| all: $(TARGET) | ||||
| 	 | ||||
|   | ||||
							
								
								
									
										207
									
								
								src/ast.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										207
									
								
								src/ast.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,207 @@ | ||||
| #include "ast.h" | ||||
| #include "error.h" | ||||
| #include <assert.h> | ||||
| #include <string.h> | ||||
|  | ||||
| error_t *err_node_children_cap = &(error_t){ | ||||
|     .message = "Failed to increase ast node children, max capacity reached"}; | ||||
|  | ||||
| error_t *ast_node_alloc(ast_node_t **output) { | ||||
|     *output = nullptr; | ||||
|  | ||||
|     ast_node_t *node = calloc(1, sizeof(ast_node_t)); | ||||
|     if (node == nullptr) | ||||
|         return err_allocation_failed; | ||||
|  | ||||
|     *output = node; | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| void ast_node_free_value(ast_node_t *node) { | ||||
|     // TODO: decide how value ownership will work and clean it up here | ||||
| } | ||||
|  | ||||
| void ast_node_free(ast_node_t *node) { | ||||
|     if (node == nullptr) | ||||
|         return; | ||||
|     if (node->children) { | ||||
|         for (size_t i = 0; i < node->len; ++i) | ||||
|             ast_node_free(node->children[i]); | ||||
|         free(node->children); | ||||
|     } | ||||
|  | ||||
|     ast_node_free_value(node); | ||||
|  | ||||
|     memset(node, 0, sizeof(ast_node_t)); | ||||
|     free(node); | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * @pre node->children must be nullptr | ||||
|  */ | ||||
| error_t *ast_node_alloc_children(ast_node_t *node) { | ||||
|     node->children = calloc(node_default_children_cap, sizeof(ast_node_t *)); | ||||
|     if (node->children == nullptr) | ||||
|         return err_allocation_failed; | ||||
|  | ||||
|     node->cap = node_default_children_cap; | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| error_t *ast_node_grow_cap(ast_node_t *node) { | ||||
|     if (node->cap >= node_max_children_cap) { | ||||
|         return err_node_children_cap; | ||||
|     } | ||||
|  | ||||
|     size_t new_cap = node->cap * 2; | ||||
|     if (new_cap > node_max_children_cap) { | ||||
|         new_cap = node_max_children_cap; | ||||
|     } | ||||
|  | ||||
|     ast_node_t **new_children = | ||||
|         realloc(node->children, new_cap * sizeof(ast_node_t *)); | ||||
|     if (new_children == nullptr) { | ||||
|         return err_allocation_failed; | ||||
|     } | ||||
|  | ||||
|     node->children = new_children; | ||||
|     node->cap = new_cap; | ||||
|  | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child) { | ||||
|     error_t *err = nullptr; | ||||
|     if (node->children == nullptr) | ||||
|         err = ast_node_alloc_children(node); | ||||
|     else if (node->len >= node->cap) | ||||
|         err = ast_node_grow_cap(node); | ||||
|     if (err) | ||||
|         return err; | ||||
|  | ||||
|     node->children[node->len] = child; | ||||
|     node->len += 1; | ||||
|  | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| const char *ast_node_id_to_cstr(node_id_t id) { | ||||
|     switch (id) { | ||||
|     case NODE_INVALID: | ||||
|         return "NODE_INVALID"; | ||||
|     case NODE_PROGRAM: | ||||
|         return "NODE_PROGRAM"; | ||||
|     case NODE_STATEMENT: | ||||
|         return "NODE_STATEMENT"; | ||||
|     case NODE_LABEL: | ||||
|         return "NODE_LABEL"; | ||||
|     case NODE_DIRECTIVE: | ||||
|         return "NODE_DIRECTIVE"; | ||||
|     case NODE_INSTRUCTION: | ||||
|         return "NODE_INSTRUCTION"; | ||||
|     case NODE_OPERANDS: | ||||
|         return "NODE_OPERANDS"; | ||||
|     case NODE_OPERAND: | ||||
|         return "NODE_OPERAND"; | ||||
|     case NODE_IMMEDIATE: | ||||
|         return "NODE_IMMEDIATE"; | ||||
|     case NODE_MEMORY: | ||||
|         return "NODE_MEMORY"; | ||||
|     case NODE_NUMBER: | ||||
|         return "NODE_NUMBER"; | ||||
|     case NODE_LABEL_REFERENCE: | ||||
|         return "NODE_LABEL_REFERENCE"; | ||||
|     case NODE_MEMORY_EXPRESSION: | ||||
|         return "NODE_MEMORY_EXPRESSION"; | ||||
|     case NODE_REGISTER_EXPRESSION: | ||||
|         return "NODE_REGISTER_EXPRESSION"; | ||||
|     case NODE_REGISTER_INDEX: | ||||
|         return "NODE_REGISTER_INDEX"; | ||||
|     case NODE_REGISTER_OFFSET: | ||||
|         return "NODE_REGISTER_OFFSET"; | ||||
|     case NODE_PLUS_OR_MINUS: | ||||
|         return "NODE_PLUS_OR_MINUS"; | ||||
|     case NODE_SECTION_DIRECTIVE: | ||||
|         return "NODE_SECTION_DIRECTIVE"; | ||||
|     case NODE_REGISTER: | ||||
|         return "NODE_REGISTER"; | ||||
|     case NODE_SECTION: | ||||
|         return "NODE_SECTION"; | ||||
|     case NODE_IDENTIFIER: | ||||
|         return "NODE_IDENTIFIER"; | ||||
|     case NODE_DECIMAL: | ||||
|         return "NODE_DECIMAL"; | ||||
|     case NODE_HEXADECIMAL: | ||||
|         return "NODE_HEXADECIMAL"; | ||||
|     case NODE_OCTAL: | ||||
|         return "NODE_OCTAL"; | ||||
|     case NODE_BINARY: | ||||
|         return "NODE_BINARY"; | ||||
|     case NODE_CHAR: | ||||
|         return "NODE_CHAR"; | ||||
|     case NODE_STRING: | ||||
|         return "NODE_STRING"; | ||||
|     case NODE_COLON: | ||||
|         return "NODE_COLON"; | ||||
|     case NODE_COMMA: | ||||
|         return "NODE_COMMA"; | ||||
|     case NODE_LBRACKET: | ||||
|         return "NODE_LBRACKET"; | ||||
|     case NODE_RBRACKET: | ||||
|         return "NODE_RBRACKET"; | ||||
|     case NODE_PLUS: | ||||
|         return "NODE_PLUS"; | ||||
|     case NODE_MINUS: | ||||
|         return "NODE_MINUS"; | ||||
|     case NODE_ASTERISK: | ||||
|         return "NODE_ASTERISK"; | ||||
|     case NODE_DOT: | ||||
|         return "NODE_DOT"; | ||||
|     } | ||||
|     assert(!"Unreachable, weird node id" && id); | ||||
|     __builtin_unreachable(); | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * @brief Helper function to print a single AST node with indentation | ||||
|  * | ||||
|  * @param node The node to print | ||||
|  * @param indent Current indentation level | ||||
|  */ | ||||
| static void ast_node_print_internal(ast_node_t *node, int indent) { | ||||
|     if (node == NULL) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     // Print indentation | ||||
|     for (int i = 0; i < indent; i++) { | ||||
|         printf("  "); | ||||
|     } | ||||
|  | ||||
|     // Print node type | ||||
|     printf("%s", ast_node_id_to_cstr(node->id)); | ||||
|  | ||||
|     // Print token value if available | ||||
|     if (node->token_entry && node->token_entry->token.value) { | ||||
|         printf(" \"%s\"", node->token_entry->token.value); | ||||
|     } | ||||
|     printf("\n"); | ||||
|  | ||||
|     // Recursively print all children with increased indentation | ||||
|     for (size_t i = 0; i < node->len; i++) { | ||||
|         ast_node_print_internal(node->children[i], indent + 1); | ||||
|     } | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * @brief Prints an AST starting from the given node | ||||
|  * | ||||
|  * Prints a representation of the AST with indentation to show structure. | ||||
|  * Each node's type is shown, and if a node has an associated token value, | ||||
|  * that value is printed in quotes. | ||||
|  * | ||||
|  * @param node The root node of the AST to print | ||||
|  */ | ||||
| void ast_node_print(ast_node_t *node) { | ||||
|     ast_node_print_internal(node, 0); | ||||
| } | ||||
							
								
								
									
										112
									
								
								src/ast.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										112
									
								
								src/ast.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,112 @@ | ||||
| #ifndef INCLUDE_SRC_AST_H_ | ||||
| #define INCLUDE_SRC_AST_H_ | ||||
|  | ||||
| #include "error.h" | ||||
| #include "lexer.h" | ||||
| #include "tokenlist.h" | ||||
| #include <stddef.h> | ||||
| #include <stdint.h> | ||||
|  | ||||
| typedef enum node_id { | ||||
|     NODE_INVALID, | ||||
|  | ||||
|     NODE_PROGRAM, | ||||
|     NODE_STATEMENT, | ||||
|     NODE_LABEL, | ||||
|     NODE_DIRECTIVE, | ||||
|     NODE_INSTRUCTION, | ||||
|     NODE_OPERANDS, | ||||
|     NODE_OPERAND, | ||||
|     NODE_IMMEDIATE, | ||||
|     NODE_MEMORY, | ||||
|     NODE_NUMBER, | ||||
|     NODE_LABEL_REFERENCE, | ||||
|     NODE_MEMORY_EXPRESSION, | ||||
|     NODE_REGISTER_EXPRESSION, | ||||
|     NODE_REGISTER_INDEX, | ||||
|     NODE_REGISTER_OFFSET, | ||||
|     NODE_PLUS_OR_MINUS, | ||||
|     NODE_SECTION_DIRECTIVE, | ||||
|  | ||||
|     // Validated primitives | ||||
|     NODE_REGISTER, | ||||
|     NODE_SECTION, | ||||
|  | ||||
|     // Primitive nodes | ||||
|     NODE_IDENTIFIER, | ||||
|     NODE_DECIMAL, | ||||
|     NODE_HEXADECIMAL, | ||||
|     NODE_OCTAL, | ||||
|     NODE_BINARY, | ||||
|     NODE_CHAR, | ||||
|     NODE_STRING, | ||||
|     NODE_COLON, | ||||
|     NODE_COMMA, | ||||
|     NODE_LBRACKET, | ||||
|     NODE_RBRACKET, | ||||
|     NODE_PLUS, | ||||
|     NODE_MINUS, | ||||
|     NODE_ASTERISK, | ||||
|     NODE_DOT, | ||||
| } node_id_t; | ||||
|  | ||||
| typedef struct ast_node ast_node_t; | ||||
|  | ||||
| constexpr size_t node_default_children_cap = 8; | ||||
| /* 65K ought to be enough for anybody */ | ||||
| constexpr size_t node_max_children_cap = 1 << 16; | ||||
|  | ||||
| struct ast_node { | ||||
|     node_id_t id; | ||||
|     tokenlist_entry_t *token_entry; | ||||
|     size_t len; | ||||
|     size_t cap; | ||||
|     ast_node_t **children; | ||||
|  | ||||
|     union { | ||||
|         struct { | ||||
|             uint64_t value; | ||||
|             int size; | ||||
|         } integer; | ||||
|         char *name; | ||||
|     } value; | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * @brief Allocates a new AST node | ||||
|  * | ||||
|  * Creates and initializes a new AST node with default (zero) values. | ||||
|  * | ||||
|  * @param[out] output Pointer to store the allocated node | ||||
|  * @return error_t* nullptr on success, allocation error on failure | ||||
|  */ | ||||
| error_t *ast_node_alloc(ast_node_t **node); | ||||
|  | ||||
| /** | ||||
|  * @brief Frees an AST node and all its children recursively | ||||
|  * | ||||
|  * Recursively frees all children of the node, then frees the node itself. | ||||
|  * If node is nullptr, the function returns without doing anything. | ||||
|  * | ||||
|  * @param node The node to free | ||||
|  */ | ||||
| void ast_node_free(ast_node_t *node); | ||||
|  | ||||
| /** | ||||
|  * @brief Adds a child node to a parent node | ||||
|  * | ||||
|  * Adds the specified child node to the parent's children array. | ||||
|  * If this is the first child, the function allocates the children array. | ||||
|  * If the children array is full, the function increases its capacity. | ||||
|  * | ||||
|  * @param node The parent node to add the child to | ||||
|  * @param child The child node to add | ||||
|  * @return error_t* nullptr on success, allocation error on failure, | ||||
|  *                  or err_node_children_cap if maximum capacity is reached | ||||
|  */ | ||||
| error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child); | ||||
|  | ||||
| const char *ast_node_id_to_cstr(node_id_t id); | ||||
| void ast_node_print(ast_node_t *node); | ||||
|  | ||||
| #endif // INCLUDE_SRC_AST_H_ | ||||
| @@ -183,7 +183,7 @@ error_t *lexer_consume_n(lexer_t *lex, const size_t len, | ||||
|                          char buffer[static len], const size_t n) { | ||||
|     if (lex->buffer_count < n) | ||||
|         return err_buffer_underrun; | ||||
|     if (len > n) | ||||
|     if (n > len) | ||||
|         return err_consume_excessive_length; | ||||
|  | ||||
|     memcpy(buffer, lex->buffer, n); | ||||
|   | ||||
							
								
								
									
										90
									
								
								src/main.c
									
									
									
									
									
								
							
							
						
						
									
										90
									
								
								src/main.c
									
									
									
									
									
								
							| @@ -1,5 +1,6 @@ | ||||
| #include "error.h" | ||||
| #include "lexer.h" | ||||
| #include "parser.h" | ||||
| #include "tokenlist.h" | ||||
|  | ||||
| #include <limits.h> | ||||
| @@ -7,38 +8,64 @@ | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
|  | ||||
| bool print_token(lexer_token_t *token) { | ||||
|     lexer_token_print(token); | ||||
|     return true; | ||||
| typedef enum mode { MODE_AST, MODE_TEXT, MODE_TOKENS } mode_t; | ||||
|  | ||||
| void print_tokens(tokenlist_t *list) { | ||||
|     for (auto entry = list->head; entry; entry = entry->next) { | ||||
|         auto token = &entry->token; | ||||
|         lexer_token_print(token); | ||||
|     } | ||||
| } | ||||
|  | ||||
| bool print_value(lexer_token_t *token) { | ||||
|     if (token->id == TOKEN_ERROR) { | ||||
|         printf("%s\n", token->value); | ||||
|         for (size_t i = 0; i < token->character_number; ++i) | ||||
|             printf(" "); | ||||
|         printf("^-- %s\n", token->explanation); | ||||
|     } else { | ||||
|         printf("%s", token->value); | ||||
| void print_text(tokenlist_t *list) { | ||||
|     for (auto entry = list->head; entry; entry = entry->next) { | ||||
|         auto token = &entry->token; | ||||
|         if (token->id == TOKEN_ERROR) { | ||||
|             printf("%s\n", token->value); | ||||
|             for (size_t i = 0; i < token->character_number; ++i) | ||||
|                 printf(" "); | ||||
|             printf("^-- %s\n", token->explanation); | ||||
|             return; | ||||
|         } else { | ||||
|             printf("%s", token->value); | ||||
|         } | ||||
|     } | ||||
|     return token->id != TOKEN_ERROR; | ||||
| } | ||||
|  | ||||
| void print_ast(tokenlist_t *list) { | ||||
|     parse_result_t result = parse(list->head); | ||||
|     if (result.err) { | ||||
|         puts(result.err->message); | ||||
|         error_free(result.err); | ||||
|         return; | ||||
|     } | ||||
|     ast_node_print(result.node); | ||||
|  | ||||
|     if (result.next != nullptr) { | ||||
|         puts("First unparsed token:"); | ||||
|         lexer_token_print(&result.next->token); | ||||
|     } | ||||
|  | ||||
|     ast_node_free(result.node); | ||||
| } | ||||
|  | ||||
| int get_execution_mode(int argc, char *argv[]) { | ||||
|     if (argc != 3 || (strcmp(argv[1], "tokens") != 0 && | ||||
|                       strcmp(argv[1], "text") != 0 && strcmp(argv[1], "ast"))) { | ||||
|         puts("Usage: oas [tokens|text|ast] <filename>"); | ||||
|         exit(1); | ||||
|     } | ||||
|  | ||||
|     if (strcmp(argv[1], "tokens") == 0) | ||||
|         return MODE_TOKENS; | ||||
|     if (strcmp(argv[1], "text") == 0) | ||||
|         return MODE_TEXT; | ||||
|     return MODE_AST; | ||||
| } | ||||
|  | ||||
| int main(int argc, char *argv[]) { | ||||
|     if (argc != 3 || | ||||
|         (strcmp(argv[1], "-tokens") != 0 && strcmp(argv[1], "-text") != 0)) { | ||||
|         puts("Usage: oas -tokens <filename>"); | ||||
|         puts("Usage: oas -text <filename>"); | ||||
|         return 1; | ||||
|     } | ||||
|  | ||||
|     bool (*print_fn)(lexer_token_t *); | ||||
|     mode_t mode = get_execution_mode(argc, argv); | ||||
|     char *filename = argv[2]; | ||||
|     if (strcmp(argv[1], "-tokens") == 0) { | ||||
|         print_fn = print_token; | ||||
|     } else { | ||||
|         print_fn = print_value; | ||||
|     } | ||||
|  | ||||
|     lexer_t *lex = &(lexer_t){}; | ||||
|     error_t *err = lexer_open(lex, filename); | ||||
| @@ -54,9 +81,18 @@ int main(int argc, char *argv[]) { | ||||
|     if (err) | ||||
|         goto cleanup_tokens; | ||||
|  | ||||
|     for (auto entry = list->head; entry; entry = entry->next) { | ||||
|         print_fn(&entry->token); | ||||
|     switch (mode) { | ||||
|     case MODE_TOKENS: | ||||
|         print_tokens(list); | ||||
|         break; | ||||
|     case MODE_TEXT: | ||||
|         print_text(list); | ||||
|         break; | ||||
|     case MODE_AST: | ||||
|         print_ast(list); | ||||
|         break; | ||||
|     } | ||||
|  | ||||
|     tokenlist_free(list); | ||||
|     error_free(err); | ||||
|     return 0; | ||||
|   | ||||
							
								
								
									
										53
									
								
								src/parser.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								src/parser.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| #include "parser.h" | ||||
| #include "ast.h" | ||||
| #include "lexer.h" | ||||
| #include "parser_combinators.h" | ||||
| #include "parser_primitives.h" | ||||
| #include "parser_util.h" | ||||
| #include "tokenlist.h" | ||||
|  | ||||
| parse_result_t parse_number(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal, | ||||
|                           parse_binary, nullptr}; | ||||
|     return parse_any(current, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_operand(tokenlist_entry_t *current) { | ||||
|     // FIXME: not the correct set of parsers | ||||
|     parser_t parsers[] = {parse_register, parse_number, nullptr}; | ||||
|     return parse_any(current, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_operands(tokenlist_entry_t *current) { | ||||
|     return parse_list(current, NODE_OPERANDS, true, TOKEN_COMMA, parse_operand); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_label(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_identifier, parse_colon, nullptr}; | ||||
|     return parse_consecutive(current, NODE_LABEL, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_section_directive(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_section, parse_identifier, nullptr}; | ||||
|     return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_directive(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_dot, parse_section_directive, nullptr}; | ||||
|     return parse_consecutive(current, NODE_DIRECTIVE, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_instruction(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_identifier, parse_operands, nullptr}; | ||||
|     return parse_consecutive(current, NODE_INSTRUCTION, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_statement(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_label, parse_directive, parse_instruction, | ||||
|                           nullptr}; | ||||
|     return parse_any(current, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse(tokenlist_entry_t *current) { | ||||
|     return parse_many(current, NODE_PROGRAM, true, parse_statement); | ||||
| } | ||||
							
								
								
									
										11
									
								
								src/parser.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								src/parser.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| #ifndef INCLUDE_SRC_PARSER_H_ | ||||
| #define INCLUDE_SRC_PARSER_H_ | ||||
|  | ||||
| #include "ast.h" | ||||
| #include "error.h" | ||||
| #include "parser_util.h" | ||||
| #include "tokenlist.h" | ||||
|  | ||||
| parse_result_t parse(tokenlist_entry_t *current); | ||||
|  | ||||
| #endif // INCLUDE_SRC_PARSER_H_ | ||||
							
								
								
									
										126
									
								
								src/parser_combinators.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										126
									
								
								src/parser_combinators.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,126 @@ | ||||
| #include "parser_combinators.h" | ||||
|  | ||||
| // Parse a list of the given parser delimited by the given token id. Does not | ||||
| // store the delimiters in the parent node | ||||
| parse_result_t parse_list(tokenlist_entry_t *current, node_id_t id, | ||||
|                           bool allow_none, lexer_token_id_t delimiter_id, | ||||
|                           parser_t parser) { | ||||
|     ast_node_t *many; | ||||
|     error_t *err = ast_node_alloc(&many); | ||||
|     parse_result_t result; | ||||
|     if (err) | ||||
|         return parse_error(err); | ||||
|     many->id = id; | ||||
|  | ||||
|     while (current) { | ||||
|         // Skip beyond the delimiter on all but the first iteration | ||||
|         if (many->len > 0) { | ||||
|             if (current->token.id != delimiter_id) | ||||
|                 break; | ||||
|             current = tokenlist_next(current); | ||||
|             if (current == nullptr) { | ||||
|                 // FIXME: this isn't quite right, we can't consume the delimiter | ||||
|                 // if the next element will fail to parse but it's late and I | ||||
|                 // must think this through tomorrow | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         result = parser(current); | ||||
|         if (result.err == err_parse_no_match) | ||||
|             break; | ||||
|         if (result.err) { | ||||
|             ast_node_free(many); | ||||
|             return result; | ||||
|         } | ||||
|         err = ast_node_add_child(many, result.node); | ||||
|         if (err) { | ||||
|             ast_node_free(many); | ||||
|             ast_node_free(result.node); | ||||
|             return parse_error(err); | ||||
|         } | ||||
|         current = result.next; | ||||
|     } | ||||
|  | ||||
|     if (!allow_none && many->len == 0) { | ||||
|         ast_node_free(many); | ||||
|         return parse_no_match(); | ||||
|     } | ||||
|     return parse_success(many, current); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]) { | ||||
|     parser_t parser; | ||||
|     while ((parser = *parsers++)) { | ||||
|         parse_result_t result = parser(current); | ||||
|         if (result.err == nullptr) | ||||
|             return result; | ||||
|     } | ||||
|     return parse_no_match(); | ||||
| } | ||||
|  | ||||
| // parse as many of the giver parsers objects in a row as possible, | ||||
| // potentially allowing none wraps the found objects in a new ast node with | ||||
| // the given note id | ||||
| parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id, | ||||
|                           bool allow_none, parser_t parser) { | ||||
|     ast_node_t *many; | ||||
|     error_t *err = ast_node_alloc(&many); | ||||
|     parse_result_t result; | ||||
|     if (err) | ||||
|         return parse_error(err); | ||||
|     many->id = id; | ||||
|  | ||||
|     while (current) { | ||||
|         result = parser(current); | ||||
|         if (result.err == err_parse_no_match) | ||||
|             break; | ||||
|         if (result.err) { | ||||
|             ast_node_free(many); | ||||
|             return result; | ||||
|         } | ||||
|         err = ast_node_add_child(many, result.node); | ||||
|         if (err) { | ||||
|             ast_node_free(many); | ||||
|             ast_node_free(result.node); | ||||
|             return parse_error(err); | ||||
|         } | ||||
|         current = result.next; | ||||
|     } | ||||
|  | ||||
|     if (!allow_none && many->len == 0) { | ||||
|         ast_node_free(many); | ||||
|         return parse_no_match(); | ||||
|     } | ||||
|     return parse_success(many, current); | ||||
| } | ||||
|  | ||||
| // Parse all tries to parse all parsers consecutively and if it succeeds it | ||||
| // wraps the parsed nodes in a new parent node. | ||||
| parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id, | ||||
|                                  parser_t parsers[]) { | ||||
|     ast_node_t *all; | ||||
|     error_t *err = ast_node_alloc(&all); | ||||
|     parse_result_t result; | ||||
|     if (err) | ||||
|         return parse_error(err); | ||||
|  | ||||
|     all->id = id; | ||||
|  | ||||
|     parser_t parser; | ||||
|     while ((parser = *parsers++) && current) { | ||||
|         result = parser(current); | ||||
|         if (result.err) { | ||||
|             ast_node_free(all); | ||||
|             return result; | ||||
|         } | ||||
|         err = ast_node_add_child(all, result.node); | ||||
|         if (err) { | ||||
|             ast_node_free(result.node); | ||||
|             ast_node_free(all); | ||||
|             return parse_error(err); | ||||
|         } | ||||
|         current = result.next; | ||||
|     } | ||||
|     return parse_success(all, current); | ||||
| } | ||||
							
								
								
									
										20
									
								
								src/parser_combinators.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								src/parser_combinators.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | ||||
| #include "parser_util.h" | ||||
|  | ||||
| typedef parse_result_t (*parser_t)(tokenlist_entry_t *); | ||||
|  | ||||
| parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]); | ||||
|  | ||||
| // parse as many of the giver parsers objects in a row as possible, potentially | ||||
| // allowing none wraps the found objects in a new ast node with the given note | ||||
| // id | ||||
| parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id, | ||||
|                           bool allow_none, parser_t parser); | ||||
|  | ||||
| parse_result_t parse_list(tokenlist_entry_t *current, node_id_t id, | ||||
|                           bool allow_none, lexer_token_id_t delimiter_id, | ||||
|                           parser_t parser); | ||||
|  | ||||
| // Parse all tries to parse all parsers consecutively and if it succeeds it | ||||
| // wraps the parsed nodes in a new parent node. | ||||
| parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id, | ||||
|                                  parser_t parsers[]); | ||||
							
								
								
									
										97
									
								
								src/parser_primitives.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								src/parser_primitives.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | ||||
| #include "parser_primitives.h" | ||||
| #include "ast.h" | ||||
| #include <string.h> | ||||
|  | ||||
| parse_result_t parse_identifier(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_IDENTIFIER, NODE_IDENTIFIER, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_decimal(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_DECIMAL, NODE_DECIMAL, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_hexadecimal(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_HEXADECIMAL, NODE_HEXADECIMAL, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_binary(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_BINARY, NODE_BINARY, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_octal(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_OCTAL, NODE_OCTAL, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_string(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_STRING, NODE_STRING, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_char(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_CHAR, NODE_CHAR, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_colon(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_COLON, NODE_COLON, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_comma(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_COMMA, NODE_COMMA, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_lbracket(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_LBRACKET, NODE_LBRACKET, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_rbracket(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_RBRACKET, NODE_RBRACKET, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_plus(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_PLUS, NODE_PLUS, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_minus(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_MINUS, NODE_MINUS, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_asterisk(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_ASTERISK, NODE_ASTERISK, nullptr); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_dot(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr); | ||||
| } | ||||
|  | ||||
| const char *registers[] = { | ||||
|     // 64-bit registers | ||||
|     "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", | ||||
|     "r11", "r12", "r13", "r14", "r15", | ||||
|     // 32-bit registers | ||||
|     "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", | ||||
|     "r10d", "r11d", "r12d", "r13d", "r14d", "r15d", | ||||
|     // 16-bit registers | ||||
|     "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", | ||||
|     "r11w", "r12w", "r13w", "r14w", "r15w", | ||||
|     // 8-bit low registers | ||||
|     "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", | ||||
|     "r11b", "r12b", "r13b", "r14b", "r15b", nullptr}; | ||||
| bool is_register_token(lexer_token_t *token) { | ||||
|     for (size_t i = 0; registers[i] != nullptr; ++i) | ||||
|         if (strcmp(token->value, registers[i]) == 0) | ||||
|             return true; | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| parse_result_t parse_register(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_IDENTIFIER, NODE_REGISTER, | ||||
|                        is_register_token); | ||||
| } | ||||
|  | ||||
| bool is_section_token(lexer_token_t *token) { | ||||
|     return strcmp(token->value, "section") == 0; | ||||
| } | ||||
|  | ||||
| parse_result_t parse_section(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_IDENTIFIER, NODE_SECTION, | ||||
|                        is_section_token); | ||||
| } | ||||
							
								
								
									
										29
									
								
								src/parser_primitives.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								src/parser_primitives.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | ||||
| #ifndef INCLUDE_SRC_PARSER_PRIMITIVES_H_ | ||||
| #define INCLUDE_SRC_PARSER_PRIMITIVES_H_ | ||||
|  | ||||
| #include "parser_util.h" | ||||
|  | ||||
| parse_result_t parse_identifier(tokenlist_entry_t *current); | ||||
| parse_result_t parse_decimal(tokenlist_entry_t *current); | ||||
| parse_result_t parse_hexadecimal(tokenlist_entry_t *current); | ||||
| parse_result_t parse_binary(tokenlist_entry_t *current); | ||||
| parse_result_t parse_octal(tokenlist_entry_t *current); | ||||
| parse_result_t parse_string(tokenlist_entry_t *current); | ||||
| parse_result_t parse_char(tokenlist_entry_t *current); | ||||
| parse_result_t parse_colon(tokenlist_entry_t *current); | ||||
| parse_result_t parse_comma(tokenlist_entry_t *current); | ||||
| parse_result_t parse_lbracket(tokenlist_entry_t *current); | ||||
| parse_result_t parse_rbracket(tokenlist_entry_t *current); | ||||
| parse_result_t parse_plus(tokenlist_entry_t *current); | ||||
| parse_result_t parse_minus(tokenlist_entry_t *current); | ||||
| parse_result_t parse_asterisk(tokenlist_entry_t *current); | ||||
| parse_result_t parse_dot(tokenlist_entry_t *current); | ||||
|  | ||||
| /* These are "primitives" with a different name and some extra validation on top | ||||
|  * for example, register is just an identifier but it only matches a limited set | ||||
|  * of values | ||||
|  */ | ||||
| parse_result_t parse_register(tokenlist_entry_t *current); | ||||
| parse_result_t parse_section(tokenlist_entry_t *current); | ||||
|  | ||||
| #endif // INCLUDE_SRC_PARSER_PRIMITIVES_H_ | ||||
							
								
								
									
										35
									
								
								src/parser_util.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								src/parser_util.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| #include "parser_util.h" | ||||
| #include "tokenlist.h" | ||||
|  | ||||
| error_t *err_parse_no_match = | ||||
|     &(error_t){.message = "parsing failed to find the correct token sequence"}; | ||||
|  | ||||
| parse_result_t parse_error(error_t *err) { | ||||
|     return (parse_result_t){.err = err}; | ||||
| } | ||||
|  | ||||
| parse_result_t parse_no_match() { | ||||
|     return parse_error(err_parse_no_match); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next) { | ||||
|     next = tokenlist_skip_trivia(next); | ||||
|     return (parse_result_t){.node = ast, .next = next}; | ||||
| } | ||||
|  | ||||
| parse_result_t parse_token(tokenlist_entry_t *current, | ||||
|                            lexer_token_id_t token_id, node_id_t ast_id, | ||||
|                            token_validator_t is_valid) { | ||||
|     if (current->token.id != token_id || | ||||
|         (is_valid && !is_valid(¤t->token))) | ||||
|         return parse_no_match(); | ||||
|  | ||||
|     ast_node_t *node; | ||||
|     error_t *err = ast_node_alloc(&node); | ||||
|     if (err) | ||||
|         return parse_error(err); | ||||
|     node->id = ast_id; | ||||
|     node->token_entry = current; | ||||
|  | ||||
|     return parse_success(node, current->next); | ||||
| } | ||||
							
								
								
									
										27
									
								
								src/parser_util.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								src/parser_util.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| #ifndef INCLUDE_SRC_PARSER_UTIL_H_ | ||||
| #define INCLUDE_SRC_PARSER_UTIL_H_ | ||||
|  | ||||
| #include "ast.h" | ||||
| #include "error.h" | ||||
| #include "tokenlist.h" | ||||
|  | ||||
| typedef struct parse_result { | ||||
|     error_t *err; | ||||
|     tokenlist_entry_t *next; | ||||
|     ast_node_t *node; | ||||
| } parse_result_t; | ||||
|  | ||||
| typedef bool (*token_validator_t)(lexer_token_t *); | ||||
|  | ||||
| parse_result_t parse_error(error_t *err); | ||||
| parse_result_t parse_no_match(); | ||||
| parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next); | ||||
| parse_result_t parse_token(tokenlist_entry_t *current, | ||||
|                            lexer_token_id_t token_id, node_id_t ast_id, | ||||
|                            token_validator_t is_valid); | ||||
|  | ||||
| tokenlist_entry_t *skip_insignificant(tokenlist_entry_t *); | ||||
|  | ||||
| extern error_t *err_parse_no_match; | ||||
|  | ||||
| #endif // INCLUDE_SRC_PARSER_UTIL_H_ | ||||
| @@ -81,3 +81,26 @@ error_t *tokenlist_fill(tokenlist_t *list, lexer_t *lex) { | ||||
|         return err; | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| bool is_trivia(tokenlist_entry_t *trivia) { | ||||
|     switch (trivia->token.id) { | ||||
|     case TOKEN_WHITESPACE: | ||||
|     case TOKEN_COMMENT: | ||||
|     case TOKEN_NEWLINE: | ||||
|         return true; | ||||
|     default: | ||||
|         return false; | ||||
|     } | ||||
| } | ||||
|  | ||||
| tokenlist_entry_t *tokenlist_skip_trivia(tokenlist_entry_t *current) { | ||||
|     while (current && is_trivia(current)) | ||||
|         current = current->next; | ||||
|     return current; | ||||
| } | ||||
|  | ||||
| tokenlist_entry_t *tokenlist_next(tokenlist_entry_t *current) { | ||||
|     if (!current) | ||||
|         return nullptr; | ||||
|     return tokenlist_skip_trivia(current->next); | ||||
| } | ||||
|   | ||||
| @@ -27,4 +27,14 @@ error_t *tokenlist_fill(tokenlist_t *list, lexer_t *lex); | ||||
|  | ||||
| void tokenlist_free(tokenlist_t *list); | ||||
|  | ||||
| /** | ||||
|  * Return the first token entry that isn't whitespace, newline or comment | ||||
|  */ | ||||
| tokenlist_entry_t *tokenlist_skip_trivia(tokenlist_entry_t *current); | ||||
|  | ||||
| /** | ||||
|  * Return the next token entry that isn't whitespace, newline or comment | ||||
|  */ | ||||
| tokenlist_entry_t *tokenlist_next(tokenlist_entry_t *current); | ||||
|  | ||||
| #endif // INCLUDE_SRC_TOKENLIST_H_ | ||||
|   | ||||
| @@ -1,4 +1,7 @@ | ||||
| .section text | ||||
|  | ||||
| _start: | ||||
|     mov eax, ebx | ||||
|     mov eax, 555            ; move 555 into eax | ||||
|     push 0o777 | ||||
|     xor eax, 0xDEADBEEF | ||||
|   | ||||
| @@ -10,7 +10,7 @@ scan-build -o reports/static-analysis/ -plist-html --status-bugs make all | ||||
| # Run the sanitizer builds and valgrind | ||||
| make clean sanitize all | ||||
|  | ||||
| ARGUMENTS=("-tokens" "-text") | ||||
| ARGUMENTS=("tokens" "text" "ast") | ||||
| while IFS= read -r INPUT_FILE; do | ||||
|     for ARGS in ${ARGUMENTS[@]}; do | ||||
|         ./oas-asan $ARGS $INPUT_FILE > /dev/null | ||||
|   | ||||
		Reference in New Issue
	
	Block a user