Compare commits
	
		
			4 Commits
		
	
	
		
			28283dd381
			...
			2474e0c773
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 2474e0c773 | |||
| 6b840ad888 | |||
| b1391b91bd | |||
| c9b29e10e8 | 
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							| @@ -10,7 +10,7 @@ OBJECTS = $(SOURCES:.c=.o) | ||||
| DEPENDENCIES = $(SOURCES:.c=.d) | ||||
| TARGET?=oas | ||||
| OUTPUTS=oas oas-asan oas-msan oas-afl | ||||
| RUNARGUMENTS?=-tokens tests/input/valid.asm | ||||
| RUNARGUMENTS?=ast tests/input/valid.asm | ||||
|  | ||||
| all: $(TARGET) | ||||
| 	 | ||||
|   | ||||
| @@ -183,7 +183,7 @@ error_t *lexer_consume_n(lexer_t *lex, const size_t len, | ||||
|                          char buffer[static len], const size_t n) { | ||||
|     if (lex->buffer_count < n) | ||||
|         return err_buffer_underrun; | ||||
|     if (len > n) | ||||
|     if (n > len) | ||||
|         return err_consume_excessive_length; | ||||
|  | ||||
|     memcpy(buffer, lex->buffer, n); | ||||
|   | ||||
							
								
								
									
										90
									
								
								src/main.c
									
									
									
									
									
								
							
							
						
						
									
										90
									
								
								src/main.c
									
									
									
									
									
								
							| @@ -1,5 +1,6 @@ | ||||
| #include "error.h" | ||||
| #include "lexer.h" | ||||
| #include "parser.h" | ||||
| #include "tokenlist.h" | ||||
|  | ||||
| #include <limits.h> | ||||
| @@ -7,38 +8,64 @@ | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
|  | ||||
| bool print_token(lexer_token_t *token) { | ||||
|     lexer_token_print(token); | ||||
|     return true; | ||||
| typedef enum mode { MODE_AST, MODE_TEXT, MODE_TOKENS } mode_t; | ||||
|  | ||||
| void print_tokens(tokenlist_t *list) { | ||||
|     for (auto entry = list->head; entry; entry = entry->next) { | ||||
|         auto token = &entry->token; | ||||
|         lexer_token_print(token); | ||||
|     } | ||||
| } | ||||
|  | ||||
| bool print_value(lexer_token_t *token) { | ||||
|     if (token->id == TOKEN_ERROR) { | ||||
|         printf("%s\n", token->value); | ||||
|         for (size_t i = 0; i < token->character_number; ++i) | ||||
|             printf(" "); | ||||
|         printf("^-- %s\n", token->explanation); | ||||
|     } else { | ||||
|         printf("%s", token->value); | ||||
| void print_text(tokenlist_t *list) { | ||||
|     for (auto entry = list->head; entry; entry = entry->next) { | ||||
|         auto token = &entry->token; | ||||
|         if (token->id == TOKEN_ERROR) { | ||||
|             printf("%s\n", token->value); | ||||
|             for (size_t i = 0; i < token->character_number; ++i) | ||||
|                 printf(" "); | ||||
|             printf("^-- %s\n", token->explanation); | ||||
|             return; | ||||
|         } else { | ||||
|             printf("%s", token->value); | ||||
|         } | ||||
|     } | ||||
|     return token->id != TOKEN_ERROR; | ||||
| } | ||||
|  | ||||
| void print_ast(tokenlist_t *list) { | ||||
|     parse_result_t result = parse(list->head); | ||||
|     if (result.err) { | ||||
|         puts(result.err->message); | ||||
|         error_free(result.err); | ||||
|         return; | ||||
|     } | ||||
|     ast_node_print(result.node); | ||||
|  | ||||
|     if (result.next != nullptr) { | ||||
|         puts("First unparsed token:"); | ||||
|         lexer_token_print(&result.next->token); | ||||
|     } | ||||
|  | ||||
|     ast_node_free(result.node); | ||||
| } | ||||
|  | ||||
| int get_execution_mode(int argc, char *argv[]) { | ||||
|     if (argc != 3 || (strcmp(argv[1], "tokens") != 0 && | ||||
|                       strcmp(argv[1], "text") != 0 && strcmp(argv[1], "ast"))) { | ||||
|         puts("Usage: oas [tokens|text|ast] <filename>"); | ||||
|         exit(1); | ||||
|     } | ||||
|  | ||||
|     if (strcmp(argv[1], "tokens") == 0) | ||||
|         return MODE_TOKENS; | ||||
|     if (strcmp(argv[1], "text") == 0) | ||||
|         return MODE_TEXT; | ||||
|     return MODE_AST; | ||||
| } | ||||
|  | ||||
| int main(int argc, char *argv[]) { | ||||
|     if (argc != 3 || | ||||
|         (strcmp(argv[1], "-tokens") != 0 && strcmp(argv[1], "-text") != 0)) { | ||||
|         puts("Usage: oas -tokens <filename>"); | ||||
|         puts("Usage: oas -text <filename>"); | ||||
|         return 1; | ||||
|     } | ||||
|  | ||||
|     bool (*print_fn)(lexer_token_t *); | ||||
|     mode_t mode = get_execution_mode(argc, argv); | ||||
|     char *filename = argv[2]; | ||||
|     if (strcmp(argv[1], "-tokens") == 0) { | ||||
|         print_fn = print_token; | ||||
|     } else { | ||||
|         print_fn = print_value; | ||||
|     } | ||||
|  | ||||
|     lexer_t *lex = &(lexer_t){}; | ||||
|     error_t *err = lexer_open(lex, filename); | ||||
| @@ -54,9 +81,18 @@ int main(int argc, char *argv[]) { | ||||
|     if (err) | ||||
|         goto cleanup_tokens; | ||||
|  | ||||
|     for (auto entry = list->head; entry; entry = entry->next) { | ||||
|         print_fn(&entry->token); | ||||
|     switch (mode) { | ||||
|     case MODE_TOKENS: | ||||
|         print_tokens(list); | ||||
|         break; | ||||
|     case MODE_TEXT: | ||||
|         print_text(list); | ||||
|         break; | ||||
|     case MODE_AST: | ||||
|         print_ast(list); | ||||
|         break; | ||||
|     } | ||||
|  | ||||
|     tokenlist_free(list); | ||||
|     error_free(err); | ||||
|     return 0; | ||||
|   | ||||
							
								
								
									
										53
									
								
								src/parser.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								src/parser.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| #include "parser.h" | ||||
| #include "ast.h" | ||||
| #include "lexer.h" | ||||
| #include "parser_combinators.h" | ||||
| #include "parser_primitives.h" | ||||
| #include "parser_util.h" | ||||
| #include "tokenlist.h" | ||||
|  | ||||
| parse_result_t parse_number(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal, | ||||
|                           parse_binary, nullptr}; | ||||
|     return parse_any(current, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_operand(tokenlist_entry_t *current) { | ||||
|     // FIXME: not the correct set of parsers | ||||
|     parser_t parsers[] = {parse_register, parse_number, nullptr}; | ||||
|     return parse_any(current, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_operands(tokenlist_entry_t *current) { | ||||
|     return parse_list(current, NODE_OPERANDS, true, TOKEN_COMMA, parse_operand); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_label(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_identifier, parse_colon, nullptr}; | ||||
|     return parse_consecutive(current, NODE_LABEL, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_section_directive(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_section, parse_identifier, nullptr}; | ||||
|     return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_directive(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_dot, parse_section_directive, nullptr}; | ||||
|     return parse_consecutive(current, NODE_DIRECTIVE, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_instruction(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_identifier, parse_operands, nullptr}; | ||||
|     return parse_consecutive(current, NODE_INSTRUCTION, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse_statement(tokenlist_entry_t *current) { | ||||
|     parser_t parsers[] = {parse_label, parse_directive, parse_instruction, | ||||
|                           nullptr}; | ||||
|     return parse_any(current, parsers); | ||||
| } | ||||
|  | ||||
| parse_result_t parse(tokenlist_entry_t *current) { | ||||
|     return parse_many(current, NODE_PROGRAM, true, parse_statement); | ||||
| } | ||||
							
								
								
									
										11
									
								
								src/parser.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								src/parser.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| #ifndef INCLUDE_SRC_PARSER_H_ | ||||
| #define INCLUDE_SRC_PARSER_H_ | ||||
|  | ||||
| #include "ast.h" | ||||
| #include "error.h" | ||||
| #include "parser_util.h" | ||||
| #include "tokenlist.h" | ||||
|  | ||||
| parse_result_t parse(tokenlist_entry_t *current); | ||||
|  | ||||
| #endif // INCLUDE_SRC_PARSER_H_ | ||||
| @@ -62,9 +62,19 @@ parse_result_t parse_dot(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr); | ||||
| } | ||||
|  | ||||
| const char *registers[] = {"rax", "rcx", "rdx", "rbx", "rsp",  "rbp", | ||||
|                            "rsi", "rdi", "r8",  "r9",  "r10",  "r11", | ||||
|                            "r12", "r13", "r14", "r15", nullptr}; | ||||
| const char *registers[] = { | ||||
|     // 64-bit registers | ||||
|     "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", | ||||
|     "r11", "r12", "r13", "r14", "r15", | ||||
|     // 32-bit registers | ||||
|     "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", | ||||
|     "r10d", "r11d", "r12d", "r13d", "r14d", "r15d", | ||||
|     // 16-bit registers | ||||
|     "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", | ||||
|     "r11w", "r12w", "r13w", "r14w", "r15w", | ||||
|     // 8-bit low registers | ||||
|     "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", | ||||
|     "r11b", "r12b", "r13b", "r14b", "r15b", nullptr}; | ||||
| bool is_register_token(lexer_token_t *token) { | ||||
|     for (size_t i = 0; registers[i] != nullptr; ++i) | ||||
|         if (strcmp(token->value, registers[i]) == 0) | ||||
| @@ -81,4 +91,7 @@ bool is_section_token(lexer_token_t *token) { | ||||
|     return strcmp(token->value, "section") == 0; | ||||
| } | ||||
|  | ||||
| parse_result_t parse_section(tokenlist_entry_t *current) {} | ||||
| parse_result_t parse_section(tokenlist_entry_t *current) { | ||||
|     return parse_token(current, TOKEN_IDENTIFIER, NODE_SECTION, | ||||
|                        is_section_token); | ||||
| } | ||||
|   | ||||
| @@ -1,4 +1,7 @@ | ||||
| .section text | ||||
|  | ||||
| _start: | ||||
|     mov eax, ebx | ||||
|     mov eax, 555            ; move 555 into eax | ||||
|     push 0o777 | ||||
|     xor eax, 0xDEADBEEF | ||||
|   | ||||
| @@ -10,7 +10,7 @@ scan-build -o reports/static-analysis/ -plist-html --status-bugs make all | ||||
| # Run the sanitizer builds and valgrind | ||||
| make clean sanitize all | ||||
|  | ||||
| ARGUMENTS=("-tokens" "-text") | ||||
| ARGUMENTS=("tokens" "text" "ast") | ||||
| while IFS= read -r INPUT_FILE; do | ||||
|     for ARGS in ${ARGUMENTS[@]}; do | ||||
|         ./oas-asan $ARGS $INPUT_FILE > /dev/null | ||||
|   | ||||
		Reference in New Issue
	
	Block a user