Compare commits
	
		
			10 Commits
		
	
	
		
			d40273b329
			...
			object_for
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 3db9fd9b8f | |||
| 0d3881f680 | |||
| 5ea942024f | |||
| b4757e008c | |||
| b70b6896bf | |||
| 6ca7bb3661 | |||
| d424c0f886 | |||
| c66489dd90 | |||
| 44fa66c2b7 | |||
| c48adb1306 | 
							
								
								
									
										55
									
								
								doc/object_format.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								doc/object_format.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | |||||||
|  | # Linker file format | ||||||
|  |  | ||||||
|  | ```C | ||||||
|  |  | ||||||
|  | struct object_file { | ||||||
|  |     uint64_t    magic;   // ".oo-bin" | ||||||
|  |     uint64_t    version; // 1 | ||||||
|  |     uint64_t    architecture; // AMD64(0) | ||||||
|  |     uint64_t    offsets_offset; | ||||||
|  |  | ||||||
|  |     struct offsets { | ||||||
|  |         uint64_t strings; | ||||||
|  |         uint64_t sections; | ||||||
|  |         uint64_t symbols; | ||||||
|  |         uint64_t relocations; | ||||||
|  |     } offsets; | ||||||
|  |  | ||||||
|  |     struct string_table { | ||||||
|  |         uint64_t size; | ||||||
|  |         uint8_t data[static size]; | ||||||
|  |     } strings; | ||||||
|  |  | ||||||
|  |     struct section_table { | ||||||
|  |         uint32_t count; | ||||||
|  |         struct section_entry { | ||||||
|  |             uint32_t name; | ||||||
|  |             uint64_t offset; | ||||||
|  |             uint64_t size_on_disk; | ||||||
|  |             uint64_t size_in_memory; | ||||||
|  |             uint64_t flags; | ||||||
|  |         } sections[static count]; | ||||||
|  |     } sections; | ||||||
|  |  | ||||||
|  |     struct symbol_table { | ||||||
|  |         uint32_t count; | ||||||
|  |         struct symbol_entry { | ||||||
|  |             uint32_t name; | ||||||
|  |             uint8_t  kind;  // IMPORT(0) | EXPORT(1) | LOCAL(2) | ||||||
|  |             uint32_t section; | ||||||
|  |             uint64_t offset; | ||||||
|  |         } symbols[static count]; | ||||||
|  |     } symbols; | ||||||
|  |  | ||||||
|  |     struct relocation_table { | ||||||
|  |         uint32_t count; | ||||||
|  |         struct relocation_entry { | ||||||
|  |             uint32_t section; | ||||||
|  |             uint64_t offset; | ||||||
|  |             uint8_t  size; | ||||||
|  |             uint32_t symbol; | ||||||
|  |             uint8_t  kind;  // ABSOLUTE(0) | RELATIVE(1) | ||||||
|  |         } relocations[static count]; | ||||||
|  |     } relocations; | ||||||
|  | }; | ||||||
|  | ``` | ||||||
| @@ -9,7 +9,8 @@ | |||||||
| parse_result_t parse_number(tokenlist_entry_t *current) { | parse_result_t parse_number(tokenlist_entry_t *current) { | ||||||
|     parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal, |     parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal, | ||||||
|                           parse_binary, nullptr}; |                           parse_binary, nullptr}; | ||||||
|     return parse_any(current, parsers); |     parse_result_t result = parse_any(current, parsers); | ||||||
|  |     return parse_result_wrap(NODE_NUMBER, result); | ||||||
| } | } | ||||||
|  |  | ||||||
| parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) { | parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) { | ||||||
| @@ -20,12 +21,12 @@ parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) { | |||||||
| parse_result_t parse_register_index(tokenlist_entry_t *current) { | parse_result_t parse_register_index(tokenlist_entry_t *current) { | ||||||
|     parser_t parsers[] = {parse_plus, parse_register, parse_asterisk, |     parser_t parsers[] = {parse_plus, parse_register, parse_asterisk, | ||||||
|                           parse_number, nullptr}; |                           parse_number, nullptr}; | ||||||
|     return parse_consecutive(current, NODE_LABEL, parsers); |     return parse_consecutive(current, NODE_REGISTER_INDEX, parsers); | ||||||
| } | } | ||||||
|  |  | ||||||
| parse_result_t parse_register_offset(tokenlist_entry_t *current) { | parse_result_t parse_register_offset(tokenlist_entry_t *current) { | ||||||
|     parser_t parsers[] = {parse_plus_or_minus, parse_number, nullptr}; |     parser_t parsers[] = {parse_plus_or_minus, parse_number, nullptr}; | ||||||
|     return parse_consecutive(current, NODE_LABEL, parsers); |     return parse_consecutive(current, NODE_REGISTER_OFFSET, parsers); | ||||||
| } | } | ||||||
|  |  | ||||||
| parse_result_t parse_register_expression(tokenlist_entry_t *current) { | parse_result_t parse_register_expression(tokenlist_entry_t *current) { | ||||||
| @@ -35,6 +36,7 @@ parse_result_t parse_register_expression(tokenlist_entry_t *current) { | |||||||
|     error_t *err = ast_node_alloc(&expr); |     error_t *err = ast_node_alloc(&expr); | ||||||
|     if (err) |     if (err) | ||||||
|         return parse_error(err); |         return parse_error(err); | ||||||
|  |     expr->id = NODE_REGISTER_EXPRESSION; | ||||||
|  |  | ||||||
|     // <register> |     // <register> | ||||||
|     result = parse_register(current); |     result = parse_register(current); | ||||||
| @@ -82,7 +84,8 @@ parse_result_t parse_register_expression(tokenlist_entry_t *current) { | |||||||
|  |  | ||||||
| parse_result_t parse_immediate(tokenlist_entry_t *current) { | parse_result_t parse_immediate(tokenlist_entry_t *current) { | ||||||
|     parser_t parsers[] = {parse_number, parse_identifier, nullptr}; |     parser_t parsers[] = {parse_number, parse_identifier, nullptr}; | ||||||
|     return parse_any(current, parsers); |     parse_result_t result = parse_any(current, parsers); | ||||||
|  |     return parse_result_wrap(NODE_IMMEDIATE, result); | ||||||
| } | } | ||||||
|  |  | ||||||
| parse_result_t parse_memory_expression(tokenlist_entry_t *current) { | parse_result_t parse_memory_expression(tokenlist_entry_t *current) { | ||||||
| @@ -93,7 +96,7 @@ parse_result_t parse_memory_expression(tokenlist_entry_t *current) { | |||||||
| parse_result_t parse_memory(tokenlist_entry_t *current) { | parse_result_t parse_memory(tokenlist_entry_t *current) { | ||||||
|     parser_t parsers[] = {parse_lbracket, parse_memory_expression, |     parser_t parsers[] = {parse_lbracket, parse_memory_expression, | ||||||
|                           parse_rbracket, nullptr}; |                           parse_rbracket, nullptr}; | ||||||
|     return parse_consecutive(current, NODE_LABEL, parsers); |     return parse_consecutive(current, NODE_MEMORY, parsers); | ||||||
| } | } | ||||||
|  |  | ||||||
| parse_result_t parse_operand(tokenlist_entry_t *current) { | parse_result_t parse_operand(tokenlist_entry_t *current) { | ||||||
|   | |||||||
| @@ -62,6 +62,11 @@ parse_result_t parse_dot(tokenlist_entry_t *current) { | |||||||
|     return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr); |     return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | parse_result_t parse_label_reference(tokenlist_entry_t *current) { | ||||||
|  |     return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE, | ||||||
|  |                        nullptr); | ||||||
|  | } | ||||||
|  |  | ||||||
| const char *registers[] = { | const char *registers[] = { | ||||||
|     // 64-bit registers |     // 64-bit registers | ||||||
|     "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", |     "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", | ||||||
| @@ -75,6 +80,7 @@ const char *registers[] = { | |||||||
|     // 8-bit low registers |     // 8-bit low registers | ||||||
|     "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", |     "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", | ||||||
|     "r11b", "r12b", "r13b", "r14b", "r15b", nullptr}; |     "r11b", "r12b", "r13b", "r14b", "r15b", nullptr}; | ||||||
|  |  | ||||||
| bool is_register_token(lexer_token_t *token) { | bool is_register_token(lexer_token_t *token) { | ||||||
|     for (size_t i = 0; registers[i] != nullptr; ++i) |     for (size_t i = 0; registers[i] != nullptr; ++i) | ||||||
|         if (strcmp(token->value, registers[i]) == 0) |         if (strcmp(token->value, registers[i]) == 0) | ||||||
|   | |||||||
| @@ -18,6 +18,7 @@ parse_result_t parse_plus(tokenlist_entry_t *current); | |||||||
| parse_result_t parse_minus(tokenlist_entry_t *current); | parse_result_t parse_minus(tokenlist_entry_t *current); | ||||||
| parse_result_t parse_asterisk(tokenlist_entry_t *current); | parse_result_t parse_asterisk(tokenlist_entry_t *current); | ||||||
| parse_result_t parse_dot(tokenlist_entry_t *current); | parse_result_t parse_dot(tokenlist_entry_t *current); | ||||||
|  | parse_result_t parse_label_reference(tokenlist_entry_t *current); | ||||||
|  |  | ||||||
| /* These are "primitives" with a different name and some extra validation on top | /* These are "primitives" with a different name and some extra validation on top | ||||||
|  * for example, register is just an identifier but it only matches a limited set |  * for example, register is just an identifier but it only matches a limited set | ||||||
|   | |||||||
| @@ -33,3 +33,24 @@ parse_result_t parse_token(tokenlist_entry_t *current, | |||||||
|  |  | ||||||
|     return parse_success(node, current->next); |     return parse_success(node, current->next); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | parse_result_t parse_result_wrap(node_id_t id, parse_result_t result) { | ||||||
|  |     if (result.err) | ||||||
|  |         return result; | ||||||
|  |  | ||||||
|  |     ast_node_t *node; | ||||||
|  |     error_t *err = ast_node_alloc(&node); | ||||||
|  |     if (err) { | ||||||
|  |         ast_node_free(result.node); | ||||||
|  |         return parse_error(err); | ||||||
|  |     } | ||||||
|  |     node->id = id; | ||||||
|  |  | ||||||
|  |     err = ast_node_add_child(node, result.node); | ||||||
|  |     if (err) { | ||||||
|  |         ast_node_free(result.node); | ||||||
|  |         return parse_error(err); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return parse_success(node, result.next); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -19,8 +19,7 @@ parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next); | |||||||
| parse_result_t parse_token(tokenlist_entry_t *current, | parse_result_t parse_token(tokenlist_entry_t *current, | ||||||
|                            lexer_token_id_t token_id, node_id_t ast_id, |                            lexer_token_id_t token_id, node_id_t ast_id, | ||||||
|                            token_validator_t is_valid); |                            token_validator_t is_valid); | ||||||
|  | parse_result_t parse_result_wrap(node_id_t id, parse_result_t result); | ||||||
| tokenlist_entry_t *skip_insignificant(tokenlist_entry_t *); |  | ||||||
|  |  | ||||||
| extern error_t *err_parse_no_match; | extern error_t *err_parse_no_match; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,8 +1,17 @@ | |||||||
| .section text | .section text | ||||||
|  |  | ||||||
|  | ; Small valid code snippet that should contain all different AST nodes | ||||||
|  |  | ||||||
| _start: | _start: | ||||||
|     mov eax, ebx |     mov eax, ebx | ||||||
|     mov eax, 555            ; move 555 into eax |     lea eax, [eax + ebx * 4 + 8] | ||||||
|  |     lea eax, [eax + 8] | ||||||
|  |     lea eax, [eax + ebx * 8] | ||||||
|  |     lea eax, [esp - 24] | ||||||
|  |     lea eax, [eax + ebx * 4 - 8] | ||||||
|  |     lea eax, [_start] | ||||||
|  |     mov eax, _start | ||||||
|  |     mov eax, 555 | ||||||
|     push 0o777 |     push 0o777 | ||||||
|     xor eax, 0xDEADBEEF |     xor eax, 0xDEADBEEF | ||||||
|     and ecx, 0o770 |     and ecx, 0o770 | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user