Compare commits
	
		
			2 Commits
		
	
	
		
			main
			...
			0f6efa8050
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 0f6efa8050 | |||
| 36af377ba0 | 
							
								
								
									
										2
									
								
								.clangd
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								.clangd
									
									
									
									
									
								
							| @@ -1,2 +1,2 @@ | |||||||
| CompileFlags: | CompileFlags: | ||||||
|   Add: ["-std=c23", "-x", "c", "-D_POSIX_C_SOURCE=200809L"] |   Add: ["-std=c23", "-x", "c"] | ||||||
|   | |||||||
| @@ -16,10 +16,8 @@ jobs: | |||||||
|           echo "http://dl-cdn.alpinelinux.org/alpine/edge/main" >> /etc/apk/repositories |           echo "http://dl-cdn.alpinelinux.org/alpine/edge/main" >> /etc/apk/repositories | ||||||
|           echo "http://dl-cdn.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories |           echo "http://dl-cdn.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories | ||||||
|  |  | ||||||
|           # determine correct clang version and then install it |  | ||||||
|           apk update |           apk update | ||||||
|           RT_VERSION=$(apk search -v compiler-rt | grep -o "compiler-rt-[0-9]*" | head -1 | grep -o "[0-9]*") |           apk add --no-cache llvm19 clang19 clang19-analyzer compiler-rt valgrind | ||||||
|           apk add --no-cache llvm${RT_VERSION} clang${RT_VERSION} clang${RT_VERSION}-analyzer compiler-rt valgrind |  | ||||||
|  |  | ||||||
|           # Verify versions |           # Verify versions | ||||||
|           echo "---------------------" |           echo "---------------------" | ||||||
| @@ -36,7 +34,3 @@ jobs: | |||||||
|       - name: make validate |       - name: make validate | ||||||
|         run: | |         run: | | ||||||
|           make validate |           make validate | ||||||
|  |  | ||||||
|       - name: make test |  | ||||||
|         run: | |  | ||||||
|           make test |  | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -1,5 +1,7 @@ | |||||||
| *.o | *.o | ||||||
| *.d | *.d | ||||||
| /core | /core | ||||||
| /build | /oas | ||||||
|  | /oas-asan | ||||||
|  | /oas-msan | ||||||
| /reports | /reports | ||||||
|   | |||||||
							
								
								
									
										72
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										72
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,46 +1,54 @@ | |||||||
| .PHONY: all clean distclean release debug afl asan msan validate analyze fuzz | .PHONY: all clean clean-objects clean-reports run sanitize validate fuzz | ||||||
|  |  | ||||||
| debug:  | CC=clang | ||||||
| 	make -rRf make/debug.mk all | LD=clang | ||||||
|  | CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L | ||||||
|  | LDFLAGS?= | ||||||
|  |  | ||||||
| all: debug release afl asan msan | SOURCES = $(shell find src/ -type f -name '*.c') | ||||||
|  | OBJECTS = $(SOURCES:.c=.o) | ||||||
|  | DEPENDENCIES = $(SOURCES:.c=.d) | ||||||
|  | TARGET?=oas | ||||||
|  | OUTPUTS=oas oas-asan oas-msan oas-afl | ||||||
|  | RUNARGUMENTS?=-tokens tests/input/valid.asm | ||||||
|  |  | ||||||
|  | all: $(TARGET) | ||||||
| 	 | 	 | ||||||
|  |  | ||||||
| release:  | run: $(TARGET) | ||||||
| 	make -rRf make/release.mk all | 	./$(TARGET) $(RUNARGUMENTS) | ||||||
|  |  | ||||||
| afl: |  | ||||||
| 	make -rRf make/afl.mk all |  | ||||||
|  |  | ||||||
| fuzz: | fuzz: | ||||||
| 	make -rRf make/afl.mk fuzz | 	make CC="afl-clang-fast" LD="afl-clang-fast" TARGET="oas-afl" clean-objects all | ||||||
|  | 	make clean-objects | ||||||
|  | 	mkdir -p reports/afl | ||||||
|  | 	afl-fuzz -i tests/input -o reports/afl -m none -- ./oas-afl -tokens @@ | ||||||
|  |  | ||||||
| asan: | sanitize: | ||||||
| 	make -rRf make/asan.mk all | 	make CFLAGS="$(CFLAGS) -fsanitize=address,undefined" \ | ||||||
|  | 		LDFLAGS="-fsanitize=address,undefined" \ | ||||||
|  | 		TARGET="oas-asan" clean-objects all | ||||||
|  | 	make CFLAGS="$(CFLAGS) -fsanitize=memory -fsanitize-memory-track-origins=2" \ | ||||||
|  | 		LDFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2" \ | ||||||
|  | 		TARGET="oas-msan" clean-objects all  | ||||||
|  | 	make clean-objects | ||||||
|  |  | ||||||
| msan: | validate: | ||||||
| 	make -rRf make/msan.mk all |  | ||||||
|  |  | ||||||
| validate: asan msan debug release |  | ||||||
| 	./validate.sh | 	./validate.sh | ||||||
|  |  | ||||||
| analyze: | $(TARGET): $(OBJECTS) | ||||||
| 	make -rRf make/analyze.mk clean all | 	$(LD) $(LDFLAGS) -o $@ $^ | ||||||
|  |  | ||||||
| test: | %.o: %.c | ||||||
| 	make -rRf make/test.mk test | 	$(CC) $(CFLAGS) -MMD -MP -c $< -o $@ | ||||||
|  |  | ||||||
| clean: | -include $(DEPENDENCIES) | ||||||
| 	make -rRf make/release.mk clean |  | ||||||
| 	make -rRf make/debug.mk clean |  | ||||||
| 	make -rRf make/afl.mk clean |  | ||||||
| 	make -rRf make/msan.mk clean |  | ||||||
| 	make -rRf make/asan.mk clean |  | ||||||
| 	make -rRf make/analyze.mk clean |  | ||||||
| 	make -rRf make/test.mk clean |  | ||||||
| 	rm -rf build/ |  | ||||||
|  |  | ||||||
| distclean: clean | clean-objects: | ||||||
| 	make -rRf make/afl.mk distclean | 	rm -f $(OBJECTS) $(DEPENDENCIES) | ||||||
| 	make -rRf make/analyze.mk distclean |  | ||||||
|  | clean-reports: | ||||||
| 	rm -rf reports/ | 	rm -rf reports/ | ||||||
|  |  | ||||||
|  | clean: clean-objects | ||||||
|  | 	rm -f $(TARGET) $(OUTPUTS) | ||||||
|   | |||||||
| @@ -1,29 +0,0 @@ | |||||||
| # Building |  | ||||||
|  |  | ||||||
| To build oas in the default configuration you just need (gnu) make and a |  | ||||||
| sufficiently modern clang. |  | ||||||
|  |  | ||||||
| ``` |  | ||||||
| make |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| ## Make targets |  | ||||||
|  |  | ||||||
| There are a number of make targets available to build various instrumented |  | ||||||
| builds that are used in validation, analysis and sanitizing. Some of these may |  | ||||||
| require extra dependencies. |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  - `debug`: Creates the debug build in `build/debug`. This is the default target. |  | ||||||
|  - `all`: Builds all binary executable targets. These are |  | ||||||
|    `debug`, `release`, `msan`, `asan` and `afl`. All executables can be found |  | ||||||
|    in `build/` in a subdirectory matching their target names. |  | ||||||
|  - `release`: Creates the release build in `build/release` |  | ||||||
|  - `afl`: Creates a build with AFL++ instrumentation for fuzzing |  | ||||||
|  - `fuzz`: Starts the fuzzer with the instrumented afl executable |  | ||||||
|  - `asan`: builds with the address and undefined clang sanitizers |  | ||||||
|  - `msan`: builds with the memory clang sanitizer |  | ||||||
|  - `validate`: Builds `debug`, `msan`, and `asan` targets, then runs the |  | ||||||
|    validation script. This script executes the sanitizer targets and runs |  | ||||||
|    Valgrind on the debug target across multiple modes and test input files. |  | ||||||
|  |  | ||||||
| @@ -1,24 +1,24 @@ | |||||||
|  | /* string literals are lexer identifier tokens with that particular value */ | ||||||
| <program>   ::= <statement>* | <program>   ::= <statement>* | ||||||
| <statement> ::= <label> | <directive> | <instruction> | <newline> | <statement> ::= ( <label> | <directive> | <instruction> ) <newline> | ||||||
|  |  | ||||||
| <label> ::= <identifier> <colon> | <label> ::= <identifier> <colon> | ||||||
|  |  | ||||||
| <directive> ::= <dot> (<section_directive> | <export_directive> | <import_directive> ) <newline> | <directive> ::= <dot> <section> | ||||||
|  |  | ||||||
| <section_directive> ::= "section" <identifier> | <section>   ::= "section" <identifier> | ||||||
|  |  | ||||||
| <export_directive> ::= "export" <identifier> | <instruction> ::= <identifier> <operands> | ||||||
|  |  | ||||||
| <import_directive> ::= "import" <identifier> | <operands> ::= <operand> ( <comma> <operands> )* | ||||||
|  |  | ||||||
| <instruction> ::= <identifier> <operands> <newline> |  | ||||||
|  |  | ||||||
| <operands> ::= <operand> ( <comma> <operand> )* |  | ||||||
|  |  | ||||||
| <operand>  ::= <register> | <immediate> | <memory> | <operand>  ::= <register> | <immediate> | <memory> | ||||||
|  |  | ||||||
| <immediate> ::= <number> | <label_reference> | <register> ::= <register_base> | <register_extra> | ||||||
|  | <register_base> ::= "rax" | "rbx" | "rcx" | "rdx" | "rsi" | "rdi" | "rbp" | "rsp" | ||||||
|  | <register_extra> ::= "r8" | "r9" | "r10" | "r11" | "r12" | "r13" | "r14" | "r15"  | ||||||
|  |  | ||||||
|  | <immediate> ::= <number> | <label_reference> | ||||||
| <number> ::= <octal> | <binary> | <decimal> | <hexadecimal> | <number> ::= <octal> | <binary> | <decimal> | <hexadecimal> | ||||||
|  |  | ||||||
| <label_reference> ::= <identifier> | <label_reference> ::= <identifier> | ||||||
| @@ -34,10 +34,3 @@ | |||||||
| <register_offset> ::= <plus_or_minus> <number> | <register_offset> ::= <plus_or_minus> <number> | ||||||
|  |  | ||||||
| <plus_or_minus> ::= <plus> | <minus> | <plus_or_minus> ::= <plus> | <minus> | ||||||
|  |  | ||||||
|  |  | ||||||
| /* These are lexer identifiers with the correct string value */ |  | ||||||
| <section> ::= "section" |  | ||||||
|  |  | ||||||
| <register> ::= "rax" | "rbx" | "rcx" | "rdx" | "rsi" | "rdi" | "rbp" | "rsp" | |  | ||||||
| "r8" | "r9" | "r10" | "r11" | "r12" | "r13" | "r14" | "r15" |  | ||||||
|   | |||||||
							
								
								
									
										14
									
								
								make/afl.mk
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								make/afl.mk
									
									
									
									
									
								
							| @@ -1,14 +0,0 @@ | |||||||
| .PHONY: fuzz distclean |  | ||||||
|  |  | ||||||
| CC=afl-clang-fast |  | ||||||
| LD=afl-clang-fast |  | ||||||
| BUILD_DIR=build/afl/ |  | ||||||
|  |  | ||||||
| -include make/base.mk |  | ||||||
|  |  | ||||||
| fuzz: $(BUILD_DIR)$(TARGET) |  | ||||||
| 	mkdir -p reports/afl |  | ||||||
| 	afl-fuzz -i tests/input -o reports/afl -m none -- ./$< -tokens @@ |  | ||||||
|  |  | ||||||
| distclean: clean |  | ||||||
| 	rm -rf reports/afl |  | ||||||
| @@ -1,9 +0,0 @@ | |||||||
| BUILD_DIR=build/analyze/ |  | ||||||
| -include make/base.mk |  | ||||||
|  |  | ||||||
| analyze: |  | ||||||
| 	mkdir -p reports/static-analysis |  | ||||||
| 	scan-build -o reports/static-analysis/ -plist-html --status-bugs make -rRf make/analyze.mk all |  | ||||||
|  |  | ||||||
| distclean: clean |  | ||||||
| 	rm -rf reports/static-analysis |  | ||||||
| @@ -1,5 +0,0 @@ | |||||||
| CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fsanitize=address,undefined |  | ||||||
| LDFLAGS=-fsanitize=address,undefined |  | ||||||
| BUILD_DIR=build/asan/ |  | ||||||
|  |  | ||||||
| -include make/base.mk |  | ||||||
							
								
								
									
										27
									
								
								make/base.mk
									
									
									
									
									
								
							
							
						
						
									
										27
									
								
								make/base.mk
									
									
									
									
									
								
							| @@ -1,27 +0,0 @@ | |||||||
| .PHONY: all clean |  | ||||||
|  |  | ||||||
| CC?=clang |  | ||||||
| LD?=clang |  | ||||||
| CFLAGS?=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L |  | ||||||
| LDFLAGS?= |  | ||||||
| BUILD_DIR?=build/debug/ |  | ||||||
|  |  | ||||||
| SOURCES?=$(shell find src/ -type f -name '*.c') |  | ||||||
| OBJECTS=$(patsubst %.c,$(BUILD_DIR)%.o,$(SOURCES)) |  | ||||||
| DEPENDENCIES=$(OBJECTS:.o=.d) |  | ||||||
| TARGET?=oas |  | ||||||
|  |  | ||||||
| all: $(BUILD_DIR)$(TARGET) |  | ||||||
| 	 |  | ||||||
|  |  | ||||||
| $(BUILD_DIR)$(TARGET): $(OBJECTS) |  | ||||||
| 	$(LD) $(LDFLAGS) -o $@ $^ |  | ||||||
|  |  | ||||||
| $(BUILD_DIR)%.o: %.c |  | ||||||
| 	mkdir -p $(dir $@) |  | ||||||
| 	$(CC) $(CFLAGS) -MMD -MP -c $< -o $@ |  | ||||||
|  |  | ||||||
| -include $(DEPENDENCIES) |  | ||||||
|  |  | ||||||
| clean: |  | ||||||
| 	rm -rf $(BUILD_DIR) |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| -include make/base.mk |  | ||||||
| @@ -1,5 +0,0 @@ | |||||||
| CFLAGS=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fsanitize=memory |  | ||||||
| LDFLAGS=-fsanitize=memory |  | ||||||
| BUILD_DIR=build/msan/ |  | ||||||
|  |  | ||||||
| -include make/base.mk |  | ||||||
| @@ -1,5 +0,0 @@ | |||||||
| CFLAGS?=-Wall -Wextra -Wpedantic -Werror -O2 -std=c23 -flto -fomit-frame-pointer -DNDEBUG -D_POSIX_C_SOURCE=200809L |  | ||||||
| LDFLAGS?=-flto -s -Wl,--gc-sections |  | ||||||
| BUILD_DIR?=build/release/ |  | ||||||
|  |  | ||||||
| -include make/base.mk |  | ||||||
							
								
								
									
										21
									
								
								make/test.mk
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								make/test.mk
									
									
									
									
									
								
							| @@ -1,21 +0,0 @@ | |||||||
| .PHONY: test |  | ||||||
|  |  | ||||||
| CFLAGS?=-Wall -Wextra -Wpedantic -O0 -g3 -std=c23 -fno-omit-frame-pointer -fno-optimize-sibling-calls -D_POSIX_C_SOURCE=200809L -fprofile-instr-generate -fcoverage-mapping |  | ||||||
| LDFLAGS?=-fprofile-instr-generate |  | ||||||
| BUILD_DIR=build/test/ |  | ||||||
| TARGET=oas-tests |  | ||||||
| SOURCES = $(filter-out src/main.c, $(shell find src/ tests/ -type f -name '*.c')) |  | ||||||
| -include make/base.mk |  | ||||||
|  |  | ||||||
| test: $(BUILD_DIR)$(TARGET) |  | ||||||
| 	mkdir -p reports/coverage |  | ||||||
| 	LLVM_PROFILE_FILE="reports/coverage/tests.profraw" $(BUILD_DIR)$(TARGET) |  | ||||||
| 	llvm-profdata merge -sparse reports/coverage/tests.profraw -o reports/coverage/tests.profdata |  | ||||||
| 	llvm-cov show $(BUILD_DIR)$(TARGET) -instr-profile=reports/coverage/tests.profdata -format=html -output-dir=reports/coverage/html -ignore-filename-regex="tests/.*" |  | ||||||
| 	@echo "--" |  | ||||||
| 	@echo "Test coverage:" |  | ||||||
| 	@echo "file://$$(realpath reports/coverage/html/index.html)" |  | ||||||
| 	@echo "--" |  | ||||||
|  |  | ||||||
| clean: |  | ||||||
| 	rm -rf reports/coverage |  | ||||||
							
								
								
									
										139
									
								
								src/ast.c
									
									
									
									
									
								
							
							
						
						
									
										139
									
								
								src/ast.c
									
									
									
									
									
								
							| @@ -1,9 +1,8 @@ | |||||||
| #include "ast.h" | #include "ast.h" | ||||||
| #include "error.h" | #include "error.h" | ||||||
| #include <assert.h> |  | ||||||
| #include <string.h> | #include <string.h> | ||||||
|  |  | ||||||
| error_t *const err_ast_children_cap = &(error_t){ | error_t *err_node_children_cap = &(error_t){ | ||||||
|     .message = "Failed to increase ast node children, max capacity reached"}; |     .message = "Failed to increase ast node children, max capacity reached"}; | ||||||
|  |  | ||||||
| error_t *ast_node_alloc(ast_node_t **output) { | error_t *ast_node_alloc(ast_node_t **output) { | ||||||
| @@ -17,15 +16,20 @@ error_t *ast_node_alloc(ast_node_t **output) { | |||||||
|     return nullptr; |     return nullptr; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void ast_node_free_value(ast_node_t *node) { | ||||||
|  |     // TODO: decide how value ownership will work and clean it up here | ||||||
|  | } | ||||||
|  |  | ||||||
| void ast_node_free(ast_node_t *node) { | void ast_node_free(ast_node_t *node) { | ||||||
|     if (node == nullptr) |     if (node == nullptr) | ||||||
|         return; |         return; | ||||||
|     if (node->children) { |     if (node->children) { | ||||||
|         for (size_t i = 0; i < node->len; ++i) |         for (size_t i = 0; i < node->len; ++i) | ||||||
|             ast_node_free(node->children[i]); |             ast_node_free(node->children[i]); | ||||||
|         free(node->children); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     ast_node_free_value(node); | ||||||
|  |  | ||||||
|     memset(node, 0, sizeof(ast_node_t)); |     memset(node, 0, sizeof(ast_node_t)); | ||||||
|     free(node); |     free(node); | ||||||
| } | } | ||||||
| @@ -44,7 +48,7 @@ error_t *ast_node_alloc_children(ast_node_t *node) { | |||||||
|  |  | ||||||
| error_t *ast_node_grow_cap(ast_node_t *node) { | error_t *ast_node_grow_cap(ast_node_t *node) { | ||||||
|     if (node->cap >= node_max_children_cap) { |     if (node->cap >= node_max_children_cap) { | ||||||
|         return err_ast_children_cap; |         return err_node_children_cap; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     size_t new_cap = node->cap * 2; |     size_t new_cap = node->cap * 2; | ||||||
| @@ -78,130 +82,3 @@ error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child) { | |||||||
|  |  | ||||||
|     return nullptr; |     return nullptr; | ||||||
| } | } | ||||||
|  |  | ||||||
| const char *ast_node_id_to_cstr(node_id_t id) { |  | ||||||
|     switch (id) { |  | ||||||
|     case NODE_INVALID: |  | ||||||
|         return "NODE_INVALID"; |  | ||||||
|     case NODE_PROGRAM: |  | ||||||
|         return "NODE_PROGRAM"; |  | ||||||
|     case NODE_STATEMENT: |  | ||||||
|         return "NODE_STATEMENT"; |  | ||||||
|     case NODE_LABEL: |  | ||||||
|         return "NODE_LABEL"; |  | ||||||
|     case NODE_DIRECTIVE: |  | ||||||
|         return "NODE_DIRECTIVE"; |  | ||||||
|     case NODE_INSTRUCTION: |  | ||||||
|         return "NODE_INSTRUCTION"; |  | ||||||
|     case NODE_OPERANDS: |  | ||||||
|         return "NODE_OPERANDS"; |  | ||||||
|     case NODE_OPERAND: |  | ||||||
|         return "NODE_OPERAND"; |  | ||||||
|     case NODE_IMMEDIATE: |  | ||||||
|         return "NODE_IMMEDIATE"; |  | ||||||
|     case NODE_MEMORY: |  | ||||||
|         return "NODE_MEMORY"; |  | ||||||
|     case NODE_NUMBER: |  | ||||||
|         return "NODE_NUMBER"; |  | ||||||
|     case NODE_LABEL_REFERENCE: |  | ||||||
|         return "NODE_LABEL_REFERENCE"; |  | ||||||
|     case NODE_MEMORY_EXPRESSION: |  | ||||||
|         return "NODE_MEMORY_EXPRESSION"; |  | ||||||
|     case NODE_REGISTER_EXPRESSION: |  | ||||||
|         return "NODE_REGISTER_EXPRESSION"; |  | ||||||
|     case NODE_REGISTER_INDEX: |  | ||||||
|         return "NODE_REGISTER_INDEX"; |  | ||||||
|     case NODE_REGISTER_OFFSET: |  | ||||||
|         return "NODE_REGISTER_OFFSET"; |  | ||||||
|     case NODE_PLUS_OR_MINUS: |  | ||||||
|         return "NODE_PLUS_OR_MINUS"; |  | ||||||
|     case NODE_SECTION_DIRECTIVE: |  | ||||||
|         return "NODE_SECTION_DIRECTIVE"; |  | ||||||
|     case NODE_IMPORT_DIRECTIVE: |  | ||||||
|         return "NODE_IMPORT_DIRECTIVE"; |  | ||||||
|     case NODE_EXPORT_DIRECTIVE: |  | ||||||
|         return "NODE_EXPORT_DIRECTIVE"; |  | ||||||
|     case NODE_REGISTER: |  | ||||||
|         return "NODE_REGISTER"; |  | ||||||
|     case NODE_SECTION: |  | ||||||
|         return "NODE_SECTION"; |  | ||||||
|     case NODE_IDENTIFIER: |  | ||||||
|         return "NODE_IDENTIFIER"; |  | ||||||
|     case NODE_DECIMAL: |  | ||||||
|         return "NODE_DECIMAL"; |  | ||||||
|     case NODE_HEXADECIMAL: |  | ||||||
|         return "NODE_HEXADECIMAL"; |  | ||||||
|     case NODE_OCTAL: |  | ||||||
|         return "NODE_OCTAL"; |  | ||||||
|     case NODE_BINARY: |  | ||||||
|         return "NODE_BINARY"; |  | ||||||
|     case NODE_CHAR: |  | ||||||
|         return "NODE_CHAR"; |  | ||||||
|     case NODE_STRING: |  | ||||||
|         return "NODE_STRING"; |  | ||||||
|     case NODE_COLON: |  | ||||||
|         return "NODE_COLON"; |  | ||||||
|     case NODE_COMMA: |  | ||||||
|         return "NODE_COMMA"; |  | ||||||
|     case NODE_LBRACKET: |  | ||||||
|         return "NODE_LBRACKET"; |  | ||||||
|     case NODE_RBRACKET: |  | ||||||
|         return "NODE_RBRACKET"; |  | ||||||
|     case NODE_PLUS: |  | ||||||
|         return "NODE_PLUS"; |  | ||||||
|     case NODE_MINUS: |  | ||||||
|         return "NODE_MINUS"; |  | ||||||
|     case NODE_ASTERISK: |  | ||||||
|         return "NODE_ASTERISK"; |  | ||||||
|     case NODE_DOT: |  | ||||||
|         return "NODE_DOT"; |  | ||||||
|     case NODE_NEWLINE: |  | ||||||
|         return "NODE_NEWLINE"; |  | ||||||
|     case NODE_IMPORT: |  | ||||||
|         return "NODE_IMPORT"; |  | ||||||
|     case NODE_EXPORT: |  | ||||||
|         return "NODE_EXPORT"; |  | ||||||
|     } |  | ||||||
|     assert(!"Unreachable, weird node id" && id); |  | ||||||
|     __builtin_unreachable(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static void ast_node_print_internal(ast_node_t *node, int indent) { |  | ||||||
|     if (node == NULL) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for (int i = 0; i < indent; i++) { |  | ||||||
|         printf("  "); |  | ||||||
|     } |  | ||||||
|     printf("%s", ast_node_id_to_cstr(node->id)); |  | ||||||
|  |  | ||||||
|     if (node->token_entry && node->token_entry->token.value && |  | ||||||
|         node->id != NODE_NEWLINE) { |  | ||||||
|         printf(" \"%s\"", node->token_entry->token.value); |  | ||||||
|     } |  | ||||||
|     printf("\n"); |  | ||||||
|  |  | ||||||
|     for (size_t i = 0; i < node->len; i++) { |  | ||||||
|         ast_node_print_internal(node->children[i], indent + 1); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void ast_node_print(ast_node_t *node) { |  | ||||||
|     ast_node_print_internal(node, 0); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void ast_node_prune(ast_node_t *node, node_id_t id) { |  | ||||||
|     size_t new_len = 0; |  | ||||||
|     for (size_t i = 0; i < node->len; i++) { |  | ||||||
|         auto child = node->children[i]; |  | ||||||
|         if (child->id == id) { |  | ||||||
|             ast_node_free(child); |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         ast_node_prune(child, id); |  | ||||||
|         node->children[new_len] = child; |  | ||||||
|         new_len++; |  | ||||||
|     } |  | ||||||
|     node->len = new_len; |  | ||||||
| } |  | ||||||
|   | |||||||
							
								
								
									
										142
									
								
								src/ast.h
									
									
									
									
									
								
							
							
						
						
									
										142
									
								
								src/ast.h
									
									
									
									
									
								
							| @@ -1,62 +1,16 @@ | |||||||
| #ifndef INCLUDE_SRC_AST_H_ | #ifndef INCLUDE_SRC_AST_H_ | ||||||
| #define INCLUDE_SRC_AST_H_ | #define INCLUDE_SRC_AST_H_ | ||||||
|  |  | ||||||
| #include "data/registers.h" |  | ||||||
| #include "error.h" | #include "error.h" | ||||||
| #include "lexer.h" | #include "lexer.h" | ||||||
| #include "tokenlist.h" |  | ||||||
| #include <assert.h> |  | ||||||
| #include <stddef.h> | #include <stddef.h> | ||||||
| #include <stdint.h> | #include <stdint.h> | ||||||
|  |  | ||||||
| extern error_t *const err_ast_children_cap; |  | ||||||
|  |  | ||||||
| typedef enum node_id { | typedef enum node_id { | ||||||
|     NODE_INVALID, |  | ||||||
|  |  | ||||||
|     NODE_PROGRAM, |     NODE_PROGRAM, | ||||||
|     NODE_STATEMENT, |  | ||||||
|     NODE_LABEL, |  | ||||||
|     NODE_DIRECTIVE, |     NODE_DIRECTIVE, | ||||||
|     NODE_INSTRUCTION, |     NODE_LABEL, | ||||||
|     NODE_OPERANDS, |     NODE_INSTRUCTION | ||||||
|     NODE_OPERAND, |  | ||||||
|     NODE_IMMEDIATE, |  | ||||||
|     NODE_MEMORY, |  | ||||||
|     NODE_NUMBER, |  | ||||||
|     NODE_LABEL_REFERENCE, |  | ||||||
|     NODE_MEMORY_EXPRESSION, |  | ||||||
|     NODE_REGISTER_EXPRESSION, |  | ||||||
|     NODE_REGISTER_INDEX, |  | ||||||
|     NODE_REGISTER_OFFSET, |  | ||||||
|     NODE_PLUS_OR_MINUS, |  | ||||||
|     NODE_SECTION_DIRECTIVE, |  | ||||||
|     NODE_IMPORT_DIRECTIVE, |  | ||||||
|     NODE_EXPORT_DIRECTIVE, |  | ||||||
|  |  | ||||||
|     // Validated primitives |  | ||||||
|     NODE_REGISTER, |  | ||||||
|     NODE_SECTION, |  | ||||||
|     NODE_IMPORT, |  | ||||||
|     NODE_EXPORT, |  | ||||||
|  |  | ||||||
|     // Primitive nodes |  | ||||||
|     NODE_IDENTIFIER, |  | ||||||
|     NODE_DECIMAL, |  | ||||||
|     NODE_HEXADECIMAL, |  | ||||||
|     NODE_OCTAL, |  | ||||||
|     NODE_BINARY, |  | ||||||
|     NODE_CHAR, |  | ||||||
|     NODE_STRING, |  | ||||||
|     NODE_COLON, |  | ||||||
|     NODE_COMMA, |  | ||||||
|     NODE_LBRACKET, |  | ||||||
|     NODE_RBRACKET, |  | ||||||
|     NODE_PLUS, |  | ||||||
|     NODE_MINUS, |  | ||||||
|     NODE_ASTERISK, |  | ||||||
|     NODE_DOT, |  | ||||||
|     NODE_NEWLINE, |  | ||||||
| } node_id_t; | } node_id_t; | ||||||
|  |  | ||||||
| typedef struct ast_node ast_node_t; | typedef struct ast_node ast_node_t; | ||||||
| @@ -65,78 +19,22 @@ constexpr size_t node_default_children_cap = 8; | |||||||
| /* 65K ought to be enough for anybody */ | /* 65K ought to be enough for anybody */ | ||||||
| constexpr size_t node_max_children_cap = 1 << 16; | constexpr size_t node_max_children_cap = 1 << 16; | ||||||
|  |  | ||||||
| typedef struct number { |  | ||||||
|     uint64_t value; |  | ||||||
|     operand_size_t size; |  | ||||||
| } number_t; |  | ||||||
|  |  | ||||||
| typedef struct register_ { |  | ||||||
|     register_id_t id; |  | ||||||
|     operand_size_t size; |  | ||||||
| } register_t; |  | ||||||
|  |  | ||||||
| typedef struct opcode_encoding { |  | ||||||
|     uint8_t buffer[32]; |  | ||||||
|     size_t len; |  | ||||||
| } opcode_encoding_t; |  | ||||||
|  |  | ||||||
| typedef struct instruction { |  | ||||||
|     bool has_reference; |  | ||||||
|     opcode_encoding_t encoding; |  | ||||||
|     int64_t address; |  | ||||||
| } instruction_t; |  | ||||||
|  |  | ||||||
| typedef struct reference { |  | ||||||
|     int64_t offset; |  | ||||||
|     int64_t address; |  | ||||||
|     operand_size_t size; |  | ||||||
| } reference_t; |  | ||||||
|  |  | ||||||
| typedef struct { |  | ||||||
|     int64_t address; |  | ||||||
| } label_t; |  | ||||||
|  |  | ||||||
| struct ast_node { | struct ast_node { | ||||||
|     node_id_t id; |     node_id_t id; | ||||||
|     tokenlist_entry_t *token_entry; |     lexer_token_t *token; | ||||||
|     size_t len; |     size_t len; | ||||||
|     size_t cap; |     size_t cap; | ||||||
|     ast_node_t **children; |     ast_node_t **children; | ||||||
|  |  | ||||||
|     union { |     union { | ||||||
|         register_t reg; |         struct { | ||||||
|         number_t number; |             uint64_t value; | ||||||
|         instruction_t instruction; |             int size; | ||||||
|         reference_t reference; |         } integer; | ||||||
|         label_t label; |         char *name; | ||||||
|     } value; |     } value; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| static inline register_t *ast_node_register_value(ast_node_t *node) { |  | ||||||
|     assert(node->id == NODE_REGISTER); |  | ||||||
|     return &node->value.reg; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static inline number_t *ast_node_number_value(ast_node_t *node) { |  | ||||||
|     assert(node->id == NODE_NUMBER); |  | ||||||
|     return &node->value.number; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static inline instruction_t *ast_node_instruction_value(ast_node_t *node) { |  | ||||||
|     assert(node->id == NODE_INSTRUCTION); |  | ||||||
|     return &node->value.instruction; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static inline reference_t *ast_node_reference_value(ast_node_t *node) { |  | ||||||
|     assert(node->id == NODE_LABEL_REFERENCE); |  | ||||||
|     return &node->value.reference; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static inline label_t *ast_node_label_value(ast_node_t *node) { |  | ||||||
|     assert(node->id == NODE_LABEL); |  | ||||||
|     return &node->value.label; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * @brief Allocates a new AST node |  * @brief Allocates a new AST node | ||||||
|  * |  * | ||||||
| @@ -171,28 +69,4 @@ void ast_node_free(ast_node_t *node); | |||||||
|  */ |  */ | ||||||
| error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child); | error_t *ast_node_add_child(ast_node_t *node, ast_node_t *child); | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @brief Prints an AST starting from the given node |  | ||||||
|  * |  | ||||||
|  * Prints a representation of the AST with indentation to show structure. |  | ||||||
|  * Each node's type is shown, and if a node has an associated token value, |  | ||||||
|  * that value is printed in quotes. |  | ||||||
|  * |  | ||||||
|  * @param node The root node of the AST to print |  | ||||||
|  */ |  | ||||||
| void ast_node_print(ast_node_t *node); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Prune the children with a given id |  | ||||||
|  * |  | ||||||
|  * The tree is recursively visited and all child nodes of a given ID are pruned |  | ||||||
|  * completely. If a node has the giver id, it will get removed along wih all its |  | ||||||
|  * children, even if some of those children have different ids. The root node id |  | ||||||
|  * is never checked so the tree is guaranteed to remain and allocated valid. |  | ||||||
|  * |  | ||||||
|  * @param node The root of the tree you want to prune |  | ||||||
|  * @param id The id of the nodes you want to prune |  | ||||||
|  */ |  | ||||||
| void ast_node_prune(ast_node_t *node, node_id_t id); |  | ||||||
|  |  | ||||||
| #endif // INCLUDE_SRC_AST_H_ | #endif // INCLUDE_SRC_AST_H_ | ||||||
|   | |||||||
| @@ -1,6 +0,0 @@ | |||||||
| #include "bytes.h" |  | ||||||
| #include "error.h" |  | ||||||
|  |  | ||||||
| error_t *const err_bytes_no_capacity = &(error_t){ |  | ||||||
|     .message = "Not enough capacity in bytes buffer", |  | ||||||
| }; |  | ||||||
							
								
								
									
										60
									
								
								src/bytes.h
									
									
									
									
									
								
							
							
						
						
									
										60
									
								
								src/bytes.h
									
									
									
									
									
								
							| @@ -1,60 +0,0 @@ | |||||||
| #ifndef INCLUDE_SRC_BYTES_H_ |  | ||||||
| #define INCLUDE_SRC_BYTES_H_ |  | ||||||
|  |  | ||||||
| #include "error.h" |  | ||||||
| #include <stddef.h> |  | ||||||
| #include <stdint.h> |  | ||||||
| #include <string.h> |  | ||||||
|  |  | ||||||
| extern error_t *const err_bytes_no_capacity; |  | ||||||
|  |  | ||||||
| typedef struct bytes { |  | ||||||
|     size_t len; |  | ||||||
|     size_t cap; |  | ||||||
|     uint8_t buffer[]; |  | ||||||
| } bytes_t; |  | ||||||
|  |  | ||||||
| #define LOCAL_BYTES_ANONYMOUS(N)                                               \ |  | ||||||
|     &(struct {                                                                 \ |  | ||||||
|         size_t len;                                                            \ |  | ||||||
|         size_t cap;                                                            \ |  | ||||||
|         uint8_t buffer[(N)];                                                   \ |  | ||||||
|     }) {                                                                       \ |  | ||||||
|         0, (N), {}                                                             \ |  | ||||||
|     } |  | ||||||
|  |  | ||||||
| #define LOCAL_BYTES(N) (bytes_t *)LOCAL_BYTES_ANONYMOUS(N); |  | ||||||
|  |  | ||||||
| static inline error_t *bytes_append_uint8(bytes_t *bytes, uint8_t value) { |  | ||||||
|     if (bytes->len >= bytes->cap) |  | ||||||
|         return err_bytes_no_capacity; |  | ||||||
|     bytes->buffer[bytes->len++] = value; |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static inline error_t *bytes_append_array(bytes_t *dst, size_t n, |  | ||||||
|                                           uint8_t buffer[static n]) { |  | ||||||
|     if (dst->len + n >= dst->cap) |  | ||||||
|         return err_bytes_no_capacity; |  | ||||||
|     memcpy(dst->buffer + dst->len, buffer, n); |  | ||||||
|     dst->len += n; |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static inline error_t *bytes_append_bytes(bytes_t *dst, bytes_t *src) { |  | ||||||
|     return bytes_append_array(dst, src->len, src->buffer); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static inline error_t *bytes_append_uint16(bytes_t *dst, uint16_t value) { |  | ||||||
|     return bytes_append_array(dst, sizeof(value), (uint8_t *)&value); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static inline error_t *bytes_append_uint32(bytes_t *dst, uint32_t value) { |  | ||||||
|     return bytes_append_array(dst, sizeof(value), (uint8_t *)&value); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static inline error_t *bytes_append_uint64(bytes_t *dst, uint64_t value) { |  | ||||||
|     return bytes_append_array(dst, sizeof(value), (uint8_t *)&value); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #endif // INCLUDE_SRC_BYTES_H_ |  | ||||||
| @@ -1,265 +0,0 @@ | |||||||
| #include "opcodes.h" |  | ||||||
|  |  | ||||||
| // clang-format off |  | ||||||
| opcode_data_t *const opcodes[] = { |  | ||||||
|     // RET |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "ret", |  | ||||||
|         .opcode = 0xC3, |  | ||||||
|         .opcode_extension = opcode_extension_none, |  | ||||||
|         .operand_count = 0, |  | ||||||
|     }, |  | ||||||
|     // RET imm16 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "ret", |  | ||||||
|         .opcode = 0xC2, |  | ||||||
|         .opcode_extension = opcode_extension_none, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // PUSH imm8 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "push", |  | ||||||
|         .opcode = 0x6A, |  | ||||||
|         .opcode_extension = opcode_extension_none, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_8}, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // PUSH imm16 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "push", |  | ||||||
|         .opcode = 0x68, |  | ||||||
|         .opcode_extension = opcode_extension_none, |  | ||||||
|         .operand_size_prefix = true, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16}, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // PUSH imm32 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "push", |  | ||||||
|         .opcode = 0x68, |  | ||||||
|         .opcode_extension = opcode_extension_none, |  | ||||||
|         .operand_size_prefix = false, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32}, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // PUSH reg16,  |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "push", |  | ||||||
|         .opcode = 0x50, |  | ||||||
|         .opcode_extension = opcode_extension_none, |  | ||||||
|         .encoding_class = ENCODING_OPCODE_REGISTER, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // PUSH reg64 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "push", |  | ||||||
|         .opcode = 0x50, |  | ||||||
|         .opcode_extension = opcode_extension_none, |  | ||||||
|         .encoding_class = ENCODING_OPCODE_REGISTER, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // NOT reg16 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "not", |  | ||||||
|         .opcode = 0xF7, |  | ||||||
|         .opcode_extension = 2, |  | ||||||
|         .operand_size_prefix = true, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // NOT reg32 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "not", |  | ||||||
|         .opcode = 0xF7, |  | ||||||
|         .opcode_extension = 2, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // NOT reg64 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "not", |  | ||||||
|         .opcode = 0xF7, |  | ||||||
|         .opcode_extension = 2, |  | ||||||
|         .rex_w_prefix = true, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|  |  | ||||||
|     // NEG reg16 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "neg", |  | ||||||
|         .opcode = 0xF7, |  | ||||||
|         .opcode_extension = 3, |  | ||||||
|         .operand_size_prefix = true, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // NEG reg32 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "neg", |  | ||||||
|         .opcode = 0xF7, |  | ||||||
|         .opcode_extension = 3, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // NEG reg64 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "neg", |  | ||||||
|         .opcode = 0xF7, |  | ||||||
|         .opcode_extension = 3, |  | ||||||
|         .rex_w_prefix = true, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // CALL rel32 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "call", |  | ||||||
|         .opcode = 0xE8, |  | ||||||
|         .opcode_extension = opcode_extension_none, |  | ||||||
|         .encoding_class = ENCODING_DEFAULT, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // CALL reg64 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "call", |  | ||||||
|         .opcode = 0xFF, |  | ||||||
|         .opcode_extension = 2, |  | ||||||
|         .encoding_class = ENCODING_DEFAULT, |  | ||||||
|         .rex_w_prefix = true, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // CALL mem64 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "call", |  | ||||||
|         .opcode = 0xFF, |  | ||||||
|         .opcode_extension = 2, |  | ||||||
|         .encoding_class = ENCODING_DEFAULT, |  | ||||||
|         .rex_w_prefix = true, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_MEMORY, .size = OPERAND_SIZE_64 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     // JMP rel8 (short jump) |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "jmp", |  | ||||||
|         .opcode = 0xEB, |  | ||||||
|         .opcode_extension = opcode_extension_none, |  | ||||||
|         .encoding_class = ENCODING_DEFAULT, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_8 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|  |  | ||||||
|     // JMP rel16 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "jmp", |  | ||||||
|         .opcode = 0xE9, |  | ||||||
|         .opcode_extension = opcode_extension_none, |  | ||||||
|         .encoding_class = ENCODING_DEFAULT, |  | ||||||
|         .operand_size_prefix = true, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_16 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|  |  | ||||||
|     // JMP reg16 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "jmp", |  | ||||||
|         .opcode = 0xFF, |  | ||||||
|         .opcode_extension = 4, |  | ||||||
|         .encoding_class = ENCODING_DEFAULT, |  | ||||||
|         .operand_size_prefix = true, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_16 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|  |  | ||||||
|     // JMP rel32 (near jump) |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "jmp", |  | ||||||
|         .opcode = 0xE9, |  | ||||||
|         .opcode_extension = opcode_extension_none, |  | ||||||
|         .encoding_class = ENCODING_DEFAULT, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_IMMEDIATE, .size = OPERAND_SIZE_32 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|  |  | ||||||
|     // JMP reg32 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "jmp", |  | ||||||
|         .opcode = 0xFF, |  | ||||||
|         .opcode_extension = 4, |  | ||||||
|         .encoding_class = ENCODING_DEFAULT, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_32 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|  |  | ||||||
|     // JMP reg64 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "jmp", |  | ||||||
|         .opcode = 0xFF, |  | ||||||
|         .opcode_extension = 4, |  | ||||||
|         .encoding_class = ENCODING_DEFAULT, |  | ||||||
|         .rex_w_prefix = true, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_REGISTER, .size = OPERAND_SIZE_64 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|  |  | ||||||
|     // JMP mem64 |  | ||||||
|     &(opcode_data_t) { |  | ||||||
|         .mnemonic = "jmp", |  | ||||||
|         .opcode = 0xFF, |  | ||||||
|         .opcode_extension = 4, |  | ||||||
|         .encoding_class = ENCODING_DEFAULT, |  | ||||||
|         .rex_w_prefix = true, |  | ||||||
|         .operand_count = 1, |  | ||||||
|         .operands = { |  | ||||||
|             { .kind = OPERAND_MEMORY, .size = OPERAND_SIZE_64 }, |  | ||||||
|         }, |  | ||||||
|     }, |  | ||||||
|     nullptr, |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| @@ -1,56 +0,0 @@ | |||||||
| #ifndef INCLUDE_DATA_OPCODES_H_ |  | ||||||
| #define INCLUDE_DATA_OPCODES_H_ |  | ||||||
|  |  | ||||||
| #include "../data/registers.h" |  | ||||||
| #include <stddef.h> |  | ||||||
| #include <stdint.h> |  | ||||||
|  |  | ||||||
| constexpr uint8_t rex_prefix = 0x40; |  | ||||||
| constexpr uint8_t rex_prefix_w = 0x48; |  | ||||||
| constexpr uint8_t rex_prefix_r = 0x44; |  | ||||||
| constexpr uint8_t rex_prefix_x = 0x42; |  | ||||||
| constexpr uint8_t rex_prefix_b = 0x41; |  | ||||||
|  |  | ||||||
| constexpr uint8_t operand_size_prefix = 0x66; |  | ||||||
| constexpr uint8_t memory_size_prefix = 0x67; |  | ||||||
| constexpr uint8_t lock_prefix = 0xF0; |  | ||||||
| constexpr uint8_t repne_prefix = 0xF2; |  | ||||||
| constexpr uint8_t rep_prefix = 0xF3; |  | ||||||
|  |  | ||||||
| typedef enum encoding_class { |  | ||||||
|     ENCODING_DEFAULT,         // use modrm+sib for registers and memory, append |  | ||||||
|                               // immediates |  | ||||||
|     ENCODING_OPCODE_REGISTER, // encode the register in the last 3 bits of the |  | ||||||
|                               // opcode |  | ||||||
| } encoding_class_t; |  | ||||||
|  |  | ||||||
| typedef enum operand_kind { |  | ||||||
|     OPERAND_REGISTER, |  | ||||||
|     OPERAND_MEMORY, |  | ||||||
|     OPERAND_IMMEDIATE, |  | ||||||
| } operand_kind_t; |  | ||||||
|  |  | ||||||
| typedef struct operand_info { |  | ||||||
|     operand_kind_t kind; |  | ||||||
|     operand_size_t size; |  | ||||||
| } operand_info_t; |  | ||||||
|  |  | ||||||
| constexpr uint8_t opcode_extension_none = 0xFF; |  | ||||||
|  |  | ||||||
| typedef struct opcode_data { |  | ||||||
|     const char *mnemonic; |  | ||||||
|  |  | ||||||
|     uint16_t opcode; |  | ||||||
|     uint8_t opcode_extension; // 3 bits for the opcode extension in the reg |  | ||||||
|                               // field of a modr/m byte |  | ||||||
|     encoding_class_t encoding_class; |  | ||||||
|     bool operand_size_prefix; |  | ||||||
|     bool address_size_prefix; |  | ||||||
|     bool rex_w_prefix; |  | ||||||
|     size_t operand_count; |  | ||||||
|     operand_info_t operands[3]; |  | ||||||
| } opcode_data_t; |  | ||||||
|  |  | ||||||
| extern opcode_data_t *const opcodes[]; |  | ||||||
|  |  | ||||||
| #endif // INCLUDE_DATA_OPCODES_H_ |  | ||||||
| @@ -1,92 +0,0 @@ | |||||||
| #include "registers.h" |  | ||||||
|  |  | ||||||
| register_data_t *const registers[] = { |  | ||||||
|     // Instruction pointer |  | ||||||
|     &(register_data_t){"rip",  REG_RIP, OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"eip",  REG_RIP, OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"ip",   REG_RIP, OPERAND_SIZE_16}, |  | ||||||
|  |  | ||||||
|     // 64-bit general purpose registers |  | ||||||
|     &(register_data_t){"rax",  REG_A,   OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"rcx",  REG_C,   OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"rdx",  REG_D,   OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"rbx",  REG_B,   OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"rsp",  REG_SP,  OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"rbp",  REG_BP,  OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"rsi",  REG_SI,  OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"rdi",  REG_DI,  OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"r8",   REG_8,   OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"r9",   REG_9,   OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"r10",  REG_10,  OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"r11",  REG_11,  OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"r12",  REG_12,  OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"r13",  REG_13,  OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"r14",  REG_14,  OPERAND_SIZE_64}, |  | ||||||
|     &(register_data_t){"r15",  REG_15,  OPERAND_SIZE_64}, |  | ||||||
|  |  | ||||||
|     // 32-bit general purpose registers |  | ||||||
|     &(register_data_t){"eax",  REG_A,   OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"ecx",  REG_C,   OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"edx",  REG_D,   OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"ebx",  REG_B,   OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"esp",  REG_SP,  OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"ebp",  REG_BP,  OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"esi",  REG_SI,  OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"edi",  REG_DI,  OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"r8d",  REG_8,   OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"r9d",  REG_9,   OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"r10d", REG_10,  OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"r11d", REG_11,  OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"r12d", REG_12,  OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"r13d", REG_13,  OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"r14d", REG_14,  OPERAND_SIZE_32}, |  | ||||||
|     &(register_data_t){"r15d", REG_15,  OPERAND_SIZE_32}, |  | ||||||
|  |  | ||||||
|     // 16-bit general purpose registers |  | ||||||
|     &(register_data_t){"ax",   REG_A,   OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"cx",   REG_C,   OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"dx",   REG_D,   OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"bx",   REG_B,   OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"sp",   REG_SP,  OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"bp",   REG_BP,  OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"si",   REG_SI,  OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"di",   REG_DI,  OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"r8w",  REG_8,   OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"r9w",  REG_9,   OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"r10w", REG_10,  OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"r11w", REG_11,  OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"r12w", REG_12,  OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"r13w", REG_13,  OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"r14w", REG_14,  OPERAND_SIZE_16}, |  | ||||||
|     &(register_data_t){"r15w", REG_15,  OPERAND_SIZE_16}, |  | ||||||
|  |  | ||||||
|     // 8-bit general purpose registers (low byte) |  | ||||||
|     &(register_data_t){"al",   REG_A,   OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"cl",   REG_C,   OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"dl",   REG_D,   OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"bl",   REG_B,   OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"spl",  REG_SP,  OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"bpl",  REG_BP,  OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"sil",  REG_SI,  OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"dil",  REG_DI,  OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"r8b",  REG_8,   OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"r9b",  REG_9,   OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"r10b", REG_10,  OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"r11b", REG_11,  OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"r12b", REG_12,  OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"r13b", REG_13,  OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"r14b", REG_14,  OPERAND_SIZE_8 }, |  | ||||||
|     &(register_data_t){"r15b", REG_15,  OPERAND_SIZE_8 }, |  | ||||||
|  |  | ||||||
|     // x87 floating point registers |  | ||||||
|     &(register_data_t){"st0",  REG_ST0, OPERAND_SIZE_80}, |  | ||||||
|     &(register_data_t){"st1",  REG_ST1, OPERAND_SIZE_80}, |  | ||||||
|     &(register_data_t){"st2",  REG_ST2, OPERAND_SIZE_80}, |  | ||||||
|     &(register_data_t){"st3",  REG_ST3, OPERAND_SIZE_80}, |  | ||||||
|     &(register_data_t){"st4",  REG_ST4, OPERAND_SIZE_80}, |  | ||||||
|     &(register_data_t){"st5",  REG_ST5, OPERAND_SIZE_80}, |  | ||||||
|     &(register_data_t){"st6",  REG_ST6, OPERAND_SIZE_80}, |  | ||||||
|     &(register_data_t){"st7",  REG_ST7, OPERAND_SIZE_80}, |  | ||||||
|  |  | ||||||
|     nullptr, |  | ||||||
| }; |  | ||||||
| @@ -1,82 +0,0 @@ | |||||||
| #ifndef INCLUDE_DATA_REGISTERS_H_ |  | ||||||
| #define INCLUDE_DATA_REGISTERS_H_ |  | ||||||
|  |  | ||||||
| typedef enum operand_size { |  | ||||||
|     OPERAND_SIZE_INVALID = 0, |  | ||||||
|  |  | ||||||
|     OPERAND_SIZE_8 = 1 << 0, |  | ||||||
|     OPERAND_SIZE_16 = 1 << 1, |  | ||||||
|     OPERAND_SIZE_32 = 1 << 2, |  | ||||||
|     OPERAND_SIZE_64 = 1 << 3, |  | ||||||
|  |  | ||||||
|     OPERAND_SIZE_80 = 1 << 4, |  | ||||||
|     OPERAND_SIZE_128 = 1 << 5, |  | ||||||
|     OPERAND_SIZE_256 = 1 << 6, |  | ||||||
|     OPERAND_SIZE_512 = 1 << 7, |  | ||||||
| } operand_size_t; |  | ||||||
|  |  | ||||||
| static inline operand_size_t bits_to_operand_size(int bits) { |  | ||||||
|     switch (bits) { |  | ||||||
|     case 8: |  | ||||||
|         return OPERAND_SIZE_8; |  | ||||||
|     case 16: |  | ||||||
|         return OPERAND_SIZE_16; |  | ||||||
|     case 32: |  | ||||||
|         return OPERAND_SIZE_32; |  | ||||||
|     case 64: |  | ||||||
|         return OPERAND_SIZE_64; |  | ||||||
|     case 80: |  | ||||||
|         return OPERAND_SIZE_80; |  | ||||||
|     case 128: |  | ||||||
|         return OPERAND_SIZE_128; |  | ||||||
|     case 256: |  | ||||||
|         return OPERAND_SIZE_256; |  | ||||||
|     case 512: |  | ||||||
|         return OPERAND_SIZE_512; |  | ||||||
|     default: |  | ||||||
|         return OPERAND_SIZE_INVALID; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| typedef enum register_id { |  | ||||||
|     // Special registers |  | ||||||
|     REG_RIP = -1, |  | ||||||
|  |  | ||||||
|     // General purpose registers |  | ||||||
|     REG_A = 0x0000, |  | ||||||
|     REG_C, |  | ||||||
|     REG_D, |  | ||||||
|     REG_B, |  | ||||||
|     REG_SP, |  | ||||||
|     REG_BP, |  | ||||||
|     REG_SI, |  | ||||||
|     REG_DI, |  | ||||||
|  |  | ||||||
|     REG_8, |  | ||||||
|     REG_9, |  | ||||||
|     REG_10, |  | ||||||
|     REG_11, |  | ||||||
|     REG_12, |  | ||||||
|     REG_13, |  | ||||||
|     REG_14, |  | ||||||
|     REG_15, |  | ||||||
|  |  | ||||||
|     REG_ST0 = 0x1000, |  | ||||||
|     REG_ST1, |  | ||||||
|     REG_ST2, |  | ||||||
|     REG_ST3, |  | ||||||
|     REG_ST4, |  | ||||||
|     REG_ST5, |  | ||||||
|     REG_ST6, |  | ||||||
|     REG_ST7, |  | ||||||
| } register_id_t; |  | ||||||
|  |  | ||||||
| typedef struct register_data { |  | ||||||
|     const char *name; |  | ||||||
|     register_id_t id; |  | ||||||
|     operand_size_t size; |  | ||||||
| } register_data_t; |  | ||||||
|  |  | ||||||
| extern register_data_t *const registers[]; |  | ||||||
|  |  | ||||||
| #endif // INCLUDE_DATA_REGISTERS_H_ |  | ||||||
| @@ -1,711 +0,0 @@ | |||||||
| #include "encoder.h" |  | ||||||
| #include "../bytes.h" |  | ||||||
| #include "../data/opcodes.h" |  | ||||||
| #include "symbols.h" |  | ||||||
| #include <assert.h> |  | ||||||
| #include <errno.h> |  | ||||||
| #include <string.h> |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * General encoder flow: |  | ||||||
|  * |  | ||||||
|  * There are 2 major passes the encoder does: |  | ||||||
|  * |  | ||||||
|  * First pass: |  | ||||||
|  *   - Run through the AST and collect information: |  | ||||||
|  *     - Set register values |  | ||||||
|  *     - Parse/set number values |  | ||||||
|  *     - Mark all instructions that use label references |  | ||||||
|  *   - Encode all instructions that don't use label references |  | ||||||
|  *   - Update addresses of all labels and instructions. Use an estimated |  | ||||||
|  *     instruction size for those instructions that use label references. |  | ||||||
|  * |  | ||||||
|  * Second pass: |  | ||||||
|  *   - Run through the AST for all instructions that use label references and |  | ||||||
|  *     collect size information using the estimated addresses from pass 1 |  | ||||||
|  *   - Encode label references with the estimated addresses, this fixes their |  | ||||||
|  *     size. |  | ||||||
|  *   - Update all addresses |  | ||||||
|  * |  | ||||||
|  * Iteration: |  | ||||||
|  *   - Repeat the second pass until addresses converge |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| error_t *const err_encoder_invalid_register = |  | ||||||
|     &(error_t){.message = "Invalid register"}; |  | ||||||
| error_t *const err_encoder_number_overflow = |  | ||||||
|     &(error_t){.message = "Number overflows the storage"}; |  | ||||||
| error_t *const err_encoder_invalid_number_format = |  | ||||||
|     &(error_t){.message = "Invalid number format"}; |  | ||||||
| error_t *const err_encoder_invalid_size_suffix = |  | ||||||
|     &(error_t){.message = "Invalid number size suffix"}; |  | ||||||
| error_t *const err_encoder_unknown_symbol_reference = |  | ||||||
|     &(error_t){.message = "Referenced an unknown symbol"}; |  | ||||||
| error_t *const err_encoder_no_encoding_found = |  | ||||||
|     &(error_t){.message = "No encoding found for instruction"}; |  | ||||||
| error_t *const err_encoder_not_implemented = |  | ||||||
|     &(error_t){.message = "Implementation for this opcode is missing"}; |  | ||||||
| error_t *const err_encoder_unexpected_length = |  | ||||||
|     &(error_t){.message = "Unexpectedly long encoding"}; |  | ||||||
|  |  | ||||||
| error_t *encoder_alloc(encoder_t **output, ast_node_t *ast) { |  | ||||||
|     *output = nullptr; |  | ||||||
|     encoder_t *encoder = calloc(1, sizeof(encoder_t)); |  | ||||||
|  |  | ||||||
|     if (encoder == nullptr) |  | ||||||
|         return err_allocation_failed; |  | ||||||
|  |  | ||||||
|     encoder->ast = ast; |  | ||||||
|  |  | ||||||
|     error_t *err = symbol_table_alloc(&encoder->symbols); |  | ||||||
|     if (err) { |  | ||||||
|         free(encoder); |  | ||||||
|         return err; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     *output = encoder; |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void encoder_free(encoder_t *encoder) { |  | ||||||
|     if (encoder == nullptr) |  | ||||||
|         return; |  | ||||||
|     symbol_table_free(encoder->symbols); |  | ||||||
|     free(encoder); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool encoder_is_symbols_node(ast_node_t *node) { |  | ||||||
|     switch (node->id) { |  | ||||||
|     case NODE_LABEL: |  | ||||||
|     case NODE_LABEL_REFERENCE: |  | ||||||
|     case NODE_EXPORT_DIRECTIVE: |  | ||||||
|     case NODE_IMPORT_DIRECTIVE: |  | ||||||
|         return true; |  | ||||||
|     default: |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| int encoder_get_number_base(ast_node_t *number) { |  | ||||||
|     switch (number->children[0]->id) { |  | ||||||
|     case NODE_BINARY: |  | ||||||
|         return 2; |  | ||||||
|     case NODE_OCTAL: |  | ||||||
|         return 8; |  | ||||||
|     case NODE_DECIMAL: |  | ||||||
|         return 10; |  | ||||||
|     case NODE_HEXADECIMAL: |  | ||||||
|         return 16; |  | ||||||
|     default: |  | ||||||
|         assert(false); |  | ||||||
|     } |  | ||||||
|     __builtin_unreachable(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool is_valid_size_suffix(int bits) { |  | ||||||
|     switch (bits) { |  | ||||||
|     case 0: |  | ||||||
|     case 8: |  | ||||||
|     case 16: |  | ||||||
|     case 32: |  | ||||||
|     case 64: |  | ||||||
|         return true; |  | ||||||
|     default: |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool is_overflow(uint64_t value, int bits) { |  | ||||||
|     if (bits == 0 || bits >= 64) |  | ||||||
|         return false; |  | ||||||
|  |  | ||||||
|     uint64_t max_value = (1ULL << bits) - 1; |  | ||||||
|     return value > max_value; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| operand_size_t encoder_get_size_mask(uint64_t value, int bits) { |  | ||||||
|     if (bits != 0) |  | ||||||
|         return bits_to_operand_size(bits); |  | ||||||
|  |  | ||||||
|     operand_size_t mask = OPERAND_SIZE_64; |  | ||||||
|     if (value < (1ULL << 8)) |  | ||||||
|         mask |= OPERAND_SIZE_8; |  | ||||||
|     if (value < (1ULL << 16)) |  | ||||||
|         mask |= OPERAND_SIZE_16; |  | ||||||
|     if (value < (1ULL << 32)) |  | ||||||
|         mask |= OPERAND_SIZE_32; |  | ||||||
|     return mask; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encoder_set_number_value(ast_node_t *node) { |  | ||||||
|     assert(node->id == NODE_NUMBER); |  | ||||||
|     assert(node->children[0]); |  | ||||||
|     const char *number = node->children[0]->token_entry->token.value; |  | ||||||
|     int base = encoder_get_number_base(node); |  | ||||||
|  |  | ||||||
|     if (base != 10) |  | ||||||
|         number += 2; // all except base 10 use a 0x, 0o or 0b prefix |  | ||||||
|  |  | ||||||
|     char *endptr; |  | ||||||
|     errno = 0; |  | ||||||
|     uint64_t value = strtoull(number, &endptr, base); |  | ||||||
|  |  | ||||||
|     if (errno == ERANGE) |  | ||||||
|         return err_encoder_number_overflow; |  | ||||||
|  |  | ||||||
|     if (endptr == number) |  | ||||||
|         return err_encoder_invalid_number_format; |  | ||||||
|  |  | ||||||
|     int bits = 0; |  | ||||||
|     if (*endptr == ':') { |  | ||||||
|         const char *suffix = endptr + 1; |  | ||||||
|  |  | ||||||
|         bits = strtol(suffix, &endptr, 10); |  | ||||||
|  |  | ||||||
|         if (endptr == suffix) |  | ||||||
|             return err_encoder_invalid_number_format; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (*endptr != '\0') |  | ||||||
|         return err_encoder_invalid_number_format; |  | ||||||
|  |  | ||||||
|     if (!is_valid_size_suffix(bits)) |  | ||||||
|         return err_encoder_invalid_size_suffix; |  | ||||||
|  |  | ||||||
|     if (is_overflow(value, bits)) |  | ||||||
|         return err_encoder_number_overflow; |  | ||||||
|  |  | ||||||
|     node->value.number.value = value; |  | ||||||
|     node->value.number.size = encoder_get_size_mask(value, bits); |  | ||||||
|  |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encoder_set_register_value(ast_node_t *node) { |  | ||||||
|     assert(node->id == NODE_REGISTER); |  | ||||||
|  |  | ||||||
|     const char *value = node->token_entry->token.value; |  | ||||||
|  |  | ||||||
|     for (size_t i = 0; registers[i] != nullptr; ++i) { |  | ||||||
|         if (strcmp(value, registers[i]->name) == 0) { |  | ||||||
|             node->value.reg.id = registers[i]->id; |  | ||||||
|             node->value.reg.size = registers[i]->size; |  | ||||||
|             return nullptr; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     return err_encoder_invalid_register; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Set the opcode extension in the modrm field |  | ||||||
|  */ |  | ||||||
| static inline uint8_t modrm_extension(uint8_t modrm, uint8_t extension) { |  | ||||||
|     assert(extension != opcode_extension_none); |  | ||||||
|     assert((extension & 0b111) == extension); |  | ||||||
|     return (modrm & ~modrm_reg_mask) | extension << 3; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Return the rex bit for reg field in modrm |  | ||||||
|  */ |  | ||||||
| static inline uint8_t modrm_reg_rex(uint8_t rex, register_id_t id) { |  | ||||||
|     if (id & 0b1000) |  | ||||||
|         rex |= rex_prefix_r; |  | ||||||
|     return rex; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * update modrm reg field with the given register, must be used alongside |  | ||||||
|  * modrm_reg_rex |  | ||||||
|  */ |  | ||||||
| static inline uint8_t modrm_reg(uint8_t modrm, register_id_t id) { |  | ||||||
|     return (modrm & ~modrm_reg_mask) | (id & 0b111) << 3; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Return the rex bit for rm field in modrm |  | ||||||
|  */ |  | ||||||
| static inline uint8_t modrm_rm_rex(uint8_t rex, register_id_t id) { |  | ||||||
|     if (id & 0b1000) |  | ||||||
|         rex |= rex_prefix_b; |  | ||||||
|     return rex; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * update modrm rm field with the given register, must be used alongside |  | ||||||
|  * modrm_rm_rex |  | ||||||
|  */ |  | ||||||
| static inline uint8_t modrm_rm(uint8_t modrm, register_id_t id) { |  | ||||||
|     assert((modrm & modrm_mod_mask) == modrm_mod_register); |  | ||||||
|     return (modrm & ~modrm_rm_mask) | (id & 0b111); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encoder_collect_info(encoder_t *encoder, ast_node_t *node, |  | ||||||
|                               ast_node_t *statement) { |  | ||||||
|     error_t *err = nullptr; |  | ||||||
|  |  | ||||||
|     if (encoder_is_symbols_node(node)) { |  | ||||||
|         err = symbol_table_update(encoder->symbols, node, statement); |  | ||||||
|         if (statement->id == NODE_INSTRUCTION) |  | ||||||
|             statement->value.instruction.has_reference = true; |  | ||||||
|     } else if (node->id == NODE_NUMBER) |  | ||||||
|         err = encoder_set_number_value(node); |  | ||||||
|     else if (node->id == NODE_REGISTER) |  | ||||||
|         err = encoder_set_register_value(node); |  | ||||||
|     if (err) |  | ||||||
|         return err; |  | ||||||
|  |  | ||||||
|     for (size_t i = 0; i < node->len; ++i) { |  | ||||||
|         error_t *err = |  | ||||||
|             encoder_collect_info(encoder, node->children[i], statement); |  | ||||||
|         if (err) |  | ||||||
|             return err; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool is_operand_match(operand_info_t *info, ast_node_t *operand) { |  | ||||||
|     switch (info->kind) { |  | ||||||
|     case OPERAND_REGISTER: |  | ||||||
|         return operand->id == NODE_REGISTER && |  | ||||||
|                ast_node_register_value(operand)->size == info->size; |  | ||||||
|     case OPERAND_MEMORY: |  | ||||||
|         return operand->id == NODE_MEMORY; |  | ||||||
|     case OPERAND_IMMEDIATE: { |  | ||||||
|         if (operand->id != NODE_IMMEDIATE) |  | ||||||
|             return false; |  | ||||||
|         ast_node_t *child = operand->children[0]; |  | ||||||
|  |  | ||||||
|         if (child->id == NODE_NUMBER) |  | ||||||
|             return (ast_node_number_value(child)->size & info->size) > 0; |  | ||||||
|         else if (child->id == NODE_LABEL_REFERENCE) { |  | ||||||
|             return info->size &= ast_node_reference_value(child)->size; |  | ||||||
|         } |  | ||||||
|     } // end OPERAND_IMMEDIATE case |  | ||||||
|     } |  | ||||||
|     assert(false && "unreachable"); |  | ||||||
|     __builtin_unreachable(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool is_opcode_match(opcode_data_t *opcode, const char *mnemonic, |  | ||||||
|                      ast_node_t *operands) { |  | ||||||
|     if (strcmp(opcode->mnemonic, mnemonic) != 0) |  | ||||||
|         return false; |  | ||||||
|  |  | ||||||
|     if (opcode->operand_count != operands->len) |  | ||||||
|         return false; |  | ||||||
|  |  | ||||||
|     for (size_t i = 0; i < operands->len; ++i) { |  | ||||||
|         if (!is_operand_match(&opcode->operands[i], operands->children[i])) |  | ||||||
|             return false; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return true; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encoder_get_opcode_data(ast_node_t *instruction, ast_node_t *operands, |  | ||||||
|                                  opcode_data_t **opcode_out) { |  | ||||||
|     const char *mnemonic = instruction->children[0]->token_entry->token.value; |  | ||||||
|  |  | ||||||
|     for (size_t i = 0; opcodes[i]; ++i) { |  | ||||||
|         opcode_data_t *opcode = opcodes[i]; |  | ||||||
|         if (is_opcode_match(opcode, mnemonic, operands)) { |  | ||||||
|             *opcode_out = opcode; |  | ||||||
|             return nullptr; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     return err_encoder_no_encoding_found; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encode_two_operand(encoder_t *encoder, opcode_data_t *opcode, |  | ||||||
|                             ast_node_t *operands, bytes_t *encoding, |  | ||||||
|                             uint8_t *rex) { |  | ||||||
|     (void)encoder; |  | ||||||
|     (void)opcode; |  | ||||||
|     (void)operands; |  | ||||||
|     (void)encoding; |  | ||||||
|     (void)rex; |  | ||||||
|     assert(encoding->len >= 1 && "must have 1+ opcode byte in buffer already"); |  | ||||||
|     return err_encoder_not_implemented; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encode_one_register_in_opcode(encoder_t *encoder, |  | ||||||
|                                        opcode_data_t *opcode, |  | ||||||
|                                        ast_node_t *operands, bytes_t *encoding, |  | ||||||
|                                        uint8_t *rex) { |  | ||||||
|     (void)encoder; |  | ||||||
|     (void)opcode; |  | ||||||
|  |  | ||||||
|     register_id_t id = ast_node_register_value(operands->children[0])->id; |  | ||||||
|     encoding->buffer[encoding->len - 1] |= id & 0b111; |  | ||||||
|     if ((id & 0b1000) > 0) { |  | ||||||
|         *rex |= rex_prefix_r; |  | ||||||
|     } |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encode_one_register(encoder_t *encoder, opcode_data_t *opcode, |  | ||||||
|                              ast_node_t *operands, bytes_t *encoding, |  | ||||||
|                              uint8_t *rex) { |  | ||||||
|     (void)encoder; |  | ||||||
|     assert(operands->len == 1); |  | ||||||
|     assert(operands->children[0]->id == NODE_REGISTER); |  | ||||||
|  |  | ||||||
|     register_id_t id = ast_node_register_value(operands->children[0])->id; |  | ||||||
|  |  | ||||||
|     uint8_t modrm = modrm_mod_register; |  | ||||||
|  |  | ||||||
|     if (opcode->opcode_extension != opcode_extension_none) { |  | ||||||
|         // register goes in rm field, extension goes in mod field |  | ||||||
|         modrm = modrm_extension(modrm, opcode->opcode_extension); |  | ||||||
|         modrm = modrm_rm(modrm, id); |  | ||||||
|         *rex = modrm_rm_rex(*rex, id); |  | ||||||
|     } else { |  | ||||||
|         // register goes in reg field |  | ||||||
|         // NOTE: |  | ||||||
|         // it's actually likely this case just doesn't exist at all and all |  | ||||||
|         // opcodes that take one register in modr/m _all_ have extended opcdes |  | ||||||
|         modrm = modrm_reg(modrm, id); |  | ||||||
|         *rex = modrm_reg_rex(*rex, id); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return bytes_append_uint8(encoding, modrm); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encode_one_immediate(encoder_t *encoder, opcode_data_t *opcode, |  | ||||||
|                               ast_node_t *operands, bytes_t *encoding, |  | ||||||
|                               uint8_t *rex) { |  | ||||||
|     (void)encoder; |  | ||||||
|     (void)opcode; |  | ||||||
|     (void)rex; |  | ||||||
|     assert(operands->len == 1); |  | ||||||
|     assert(operands->children[0]->id == NODE_IMMEDIATE); |  | ||||||
|     assert(operands->children[0]->len == 1); |  | ||||||
|     ast_node_t *immediate = operands->children[0]->children[0]; |  | ||||||
|     assert(immediate->id == NODE_NUMBER || |  | ||||||
|            immediate->id == NODE_LABEL_REFERENCE); |  | ||||||
|  |  | ||||||
|     operand_size_t size = opcode->operands[0].size; |  | ||||||
|     if (immediate->id == NODE_NUMBER) { |  | ||||||
|         uint64_t value = ast_node_number_value(immediate)->value; |  | ||||||
|         error_t *err = nullptr; |  | ||||||
|         switch (size) { |  | ||||||
|         case OPERAND_SIZE_8: |  | ||||||
|             err = bytes_append_uint8(encoding, value); |  | ||||||
|             break; |  | ||||||
|         case OPERAND_SIZE_16: |  | ||||||
|             err = bytes_append_uint16(encoding, value); |  | ||||||
|             break; |  | ||||||
|         case OPERAND_SIZE_32: |  | ||||||
|             err = bytes_append_uint32(encoding, value); |  | ||||||
|             break; |  | ||||||
|         case OPERAND_SIZE_64: |  | ||||||
|             err = bytes_append_uint64(encoding, value); |  | ||||||
|             break; |  | ||||||
|         default: |  | ||||||
|             assert(false && "intentionally unhandled"); |  | ||||||
|         } |  | ||||||
|         return err; |  | ||||||
|     } else { |  | ||||||
|         reference_t *reference = ast_node_reference_value(immediate); |  | ||||||
|         switch (size) { |  | ||||||
|         case OPERAND_SIZE_64: |  | ||||||
|             return bytes_append_uint64(encoding, reference->address); |  | ||||||
|         case OPERAND_SIZE_32: |  | ||||||
|             return bytes_append_uint32(encoding, reference->offset); |  | ||||||
|         case OPERAND_SIZE_16: |  | ||||||
|             return bytes_append_uint16(encoding, reference->offset); |  | ||||||
|         case OPERAND_SIZE_8: |  | ||||||
|             return bytes_append_uint8(encoding, reference->offset); |  | ||||||
|         default: |  | ||||||
|             assert(false && "intentionally unhandled"); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     __builtin_unreachable(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encode_one_memory(encoder_t *encoder, opcode_data_t *opcode, |  | ||||||
|                            ast_node_t *operands, bytes_t *encoding, |  | ||||||
|                            uint8_t *rex) { |  | ||||||
|     (void)encoder; |  | ||||||
|     (void)opcode; |  | ||||||
|     (void)operands; |  | ||||||
|     (void)encoding; |  | ||||||
|     (void)rex; |  | ||||||
|     return err_encoder_not_implemented; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encode_one_operand(encoder_t *encoder, opcode_data_t *opcode, |  | ||||||
|                             ast_node_t *operands, bytes_t *encoding, |  | ||||||
|                             uint8_t *rex) { |  | ||||||
|     switch (opcode->operands[0].kind) { |  | ||||||
|     case OPERAND_REGISTER: |  | ||||||
|         if (opcode->encoding_class == ENCODING_OPCODE_REGISTER) |  | ||||||
|             return encode_one_register_in_opcode(encoder, opcode, operands, |  | ||||||
|                                                  encoding, rex); |  | ||||||
|         else |  | ||||||
|             return encode_one_register(encoder, opcode, operands, encoding, |  | ||||||
|                                        rex); |  | ||||||
|     case OPERAND_MEMORY: |  | ||||||
|         return encode_one_memory(encoder, opcode, operands, encoding, rex); |  | ||||||
|     case OPERAND_IMMEDIATE: |  | ||||||
|         return encode_one_immediate(encoder, opcode, operands, encoding, rex); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encoder_encode_instruction(encoder_t *encoder, |  | ||||||
|                                     ast_node_t *instruction) { |  | ||||||
|     ast_node_t *operands = instruction->children[1]; |  | ||||||
|  |  | ||||||
|     opcode_data_t *opcode = nullptr; |  | ||||||
|     error_t *err = encoder_get_opcode_data(instruction, operands, &opcode); |  | ||||||
|     if (err) |  | ||||||
|         return err; |  | ||||||
|  |  | ||||||
|     uint8_t rex = 0; |  | ||||||
|     bytes_t *encoding = LOCAL_BYTES(32); |  | ||||||
|  |  | ||||||
|     if (opcode->opcode > 0xFF && |  | ||||||
|         (err = bytes_append_uint8(encoding, opcode->opcode >> 8))) |  | ||||||
|         return err; |  | ||||||
|     if ((err = bytes_append_uint8(encoding, opcode->opcode & 0xFF))) |  | ||||||
|         return err; |  | ||||||
|  |  | ||||||
|     // NOTE:operand encoders all expect the opcode to be in the buffer already. |  | ||||||
|     // Some of them rely on this to encode the register value in the opcode |  | ||||||
|     // byte. |  | ||||||
|     switch (opcode->operand_count) { |  | ||||||
|     case 0: |  | ||||||
|         break; |  | ||||||
|     case 1: |  | ||||||
|         err = encode_one_operand(encoder, opcode, operands, encoding, &rex); |  | ||||||
|         break; |  | ||||||
|     case 2: |  | ||||||
|         err = encode_two_operand(encoder, opcode, operands, encoding, &rex); |  | ||||||
|         break; |  | ||||||
|     default: |  | ||||||
|         err = err_encoder_not_implemented; |  | ||||||
|     } |  | ||||||
|     if (err) |  | ||||||
|         return err; |  | ||||||
|  |  | ||||||
|     // produce the actual encoding output in the NODE_INSTRUCTION value |  | ||||||
|     instruction_t *instruction_value = ast_node_instruction_value(instruction); |  | ||||||
|     uint8_t *output = instruction_value->encoding.buffer; |  | ||||||
|     size_t output_len = 0; |  | ||||||
|  |  | ||||||
|     // Handle prefixes |  | ||||||
|     if (opcode->rex_w_prefix) |  | ||||||
|         rex = rex_prefix_w; |  | ||||||
|     if (opcode->address_size_prefix) |  | ||||||
|         output[output_len++] = memory_size_prefix; |  | ||||||
|     if (opcode->operand_size_prefix) |  | ||||||
|         output[output_len++] = operand_size_prefix; |  | ||||||
|     if (rex > 0) |  | ||||||
|         output[output_len++] = rex; |  | ||||||
|  |  | ||||||
|     // copy the encoded opcode and operands |  | ||||||
|     if (encoding->len > 20) |  | ||||||
|         return err_encoder_unexpected_length; |  | ||||||
|     memcpy(output + output_len, encoding->buffer, encoding->len); |  | ||||||
|     output_len += encoding->len; |  | ||||||
|  |  | ||||||
|     instruction_value->encoding.len = output_len; |  | ||||||
|  |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Initial guess for instruction size of instructions that contain a label |  | ||||||
|  * reference |  | ||||||
|  */ |  | ||||||
| constexpr size_t instruction_size_estimate = 10; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Perform the initial pass over the AST. |  | ||||||
|  * |  | ||||||
|  * - Collect information about the operands |  | ||||||
|  *   - parse and set number values |  | ||||||
|  *   - set the register values |  | ||||||
|  *   - determine if label references are used by an instruction |  | ||||||
|  * - encode instructions that don't use label references |  | ||||||
|  * - determine estimated addresses of each statement |  | ||||||
|  * |  | ||||||
|  */ |  | ||||||
| error_t *encoder_first_pass(encoder_t *encoder) { |  | ||||||
|     ast_node_t *root = encoder->ast; |  | ||||||
|     assert(root->id == NODE_PROGRAM); |  | ||||||
|  |  | ||||||
|     uintptr_t address = 0; |  | ||||||
|  |  | ||||||
|     for (size_t i = 0; i < root->len; ++i) { |  | ||||||
|         ast_node_t *statement = root->children[i]; |  | ||||||
|         error_t *err = encoder_collect_info(encoder, statement, statement); |  | ||||||
|         if (err) |  | ||||||
|             return err; |  | ||||||
|  |  | ||||||
|         if (statement->id == NODE_INSTRUCTION && |  | ||||||
|             ast_node_instruction_value(statement)->has_reference == false) { |  | ||||||
|             err = encoder_encode_instruction(encoder, statement); |  | ||||||
|             if (err) |  | ||||||
|                 return err; |  | ||||||
|             instruction_t *instruction = ast_node_instruction_value(statement); |  | ||||||
|             instruction->address = address; |  | ||||||
|             address += instruction->encoding.len; |  | ||||||
|         } else if (statement->id == NODE_INSTRUCTION) { |  | ||||||
|             instruction_t *instruction = ast_node_instruction_value(statement); |  | ||||||
|             instruction->encoding.len = instruction_size_estimate; |  | ||||||
|             instruction->address = address; |  | ||||||
|             address += instruction_size_estimate; |  | ||||||
|         } else if (statement->id == NODE_LABEL) { |  | ||||||
|             label_t *label = ast_node_label_value(statement); |  | ||||||
|             label->address = address; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| operand_size_t signed_to_size_mask(int64_t value) { |  | ||||||
|     operand_size_t size = OPERAND_SIZE_64; |  | ||||||
|  |  | ||||||
|     if (value >= INT8_MIN && value <= INT8_MAX) |  | ||||||
|         size |= OPERAND_SIZE_8; |  | ||||||
|  |  | ||||||
|     if (value >= INT16_MIN && value <= INT16_MAX) |  | ||||||
|         size |= OPERAND_SIZE_16; |  | ||||||
|  |  | ||||||
|     if (value >= INT32_MIN && value <= INT32_MAX) |  | ||||||
|         size |= OPERAND_SIZE_32; |  | ||||||
|  |  | ||||||
|     return size; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| int64_t statement_offset(ast_node_t *from, ast_node_t *to) { |  | ||||||
|     assert(from->id == NODE_INSTRUCTION); |  | ||||||
|     assert(to->id == NODE_LABEL); |  | ||||||
|  |  | ||||||
|     instruction_t *instruction = ast_node_instruction_value(from); |  | ||||||
|     int64_t from_addr = instruction->address + instruction->encoding.len; |  | ||||||
|     int64_t to_addr = ast_node_label_value(to)->address; |  | ||||||
|  |  | ||||||
|     return to_addr - from_addr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encoder_collect_reference_info(encoder_t *encoder, ast_node_t *node, |  | ||||||
|                                         ast_node_t *statement) { |  | ||||||
|     assert(statement->id == NODE_INSTRUCTION); |  | ||||||
|     if (node->id == NODE_LABEL_REFERENCE) { |  | ||||||
|         const char *name = node->token_entry->token.value; |  | ||||||
|         symbol_t *symbol = symbol_table_lookup(encoder->symbols, name); |  | ||||||
|         assert(symbol && symbol->statement && |  | ||||||
|                symbol->statement->id == NODE_LABEL); |  | ||||||
|  |  | ||||||
|         int64_t offset = statement_offset(statement, symbol->statement); |  | ||||||
|         int64_t absolute = ast_node_label_value(symbol->statement)->address; |  | ||||||
|         operand_size_t size = signed_to_size_mask(offset); |  | ||||||
|  |  | ||||||
|         node->value.reference.address = absolute; |  | ||||||
|         node->value.reference.offset = offset; |  | ||||||
|         node->value.reference.size = size; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for (size_t i = 0; i < node->len; ++i) { |  | ||||||
|         error_t *err = encoder_collect_reference_info( |  | ||||||
|             encoder, node->children[i], statement); |  | ||||||
|         if (err) |  | ||||||
|             return err; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool encoder_should_reencode(ast_node_t *statement) { |  | ||||||
|     if (statement->id != NODE_INSTRUCTION) |  | ||||||
|         return false; |  | ||||||
|  |  | ||||||
|     instruction_t *instruction = ast_node_instruction_value(statement); |  | ||||||
|     return instruction->has_reference; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void set_statement_address(ast_node_t *statement, int64_t address) { |  | ||||||
|     if (statement->id == NODE_INSTRUCTION) { |  | ||||||
|         ast_node_instruction_value(statement)->address = address; |  | ||||||
|     } else if (statement->id == NODE_LABEL) { |  | ||||||
|         ast_node_label_value(statement)->address = address; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| size_t get_statement_length(ast_node_t *statement) { |  | ||||||
|     if (statement->id != NODE_INSTRUCTION) |  | ||||||
|         return 0; |  | ||||||
|     return ast_node_instruction_value(statement)->encoding.len; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Perform the second pass. Updates the label info and encodes all instructions |  | ||||||
|  * that have a label reference.that performs actual encoding. |  | ||||||
|  */ |  | ||||||
| error_t *encoder_second_pass(encoder_t *encoder, bool *did_update) { |  | ||||||
|     ast_node_t *root = encoder->ast; |  | ||||||
|  |  | ||||||
|     *did_update = false; |  | ||||||
|     int64_t address = 0; |  | ||||||
|     for (size_t i = 0; i < root->len; ++i) { |  | ||||||
|         ast_node_t *statement = root->children[i]; |  | ||||||
|  |  | ||||||
|         set_statement_address(statement, address); |  | ||||||
|         size_t before = get_statement_length(statement); |  | ||||||
|  |  | ||||||
|         if (encoder_should_reencode(statement)) { |  | ||||||
|             error_t *err = |  | ||||||
|                 encoder_collect_reference_info(encoder, statement, statement); |  | ||||||
|             if (err) |  | ||||||
|                 return err; |  | ||||||
|             err = encoder_encode_instruction(encoder, statement); |  | ||||||
|             if (err) |  | ||||||
|                 return err; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         size_t after = get_statement_length(statement); |  | ||||||
|         *did_update = *did_update || (before != after); |  | ||||||
|         address += after; |  | ||||||
|     } |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| opcode_data_t *encoder_find_opcode(ast_node_t *instruction) { |  | ||||||
|     for (size_t i = 0; opcodes[i] != nullptr; ++i) { |  | ||||||
|         const char *mnemonic = |  | ||||||
|             instruction->children[0]->token_entry->token.value; |  | ||||||
|         ast_node_t *operands = instruction->children[1]; |  | ||||||
|         if (is_opcode_match(opcodes[i], mnemonic, operands)) |  | ||||||
|             return opcodes[i]; |  | ||||||
|     } |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encoder_check_symbols(encoder_t *encoder) { |  | ||||||
|     for (size_t i = 0; i < encoder->symbols->len; ++i) |  | ||||||
|         if (encoder->symbols->symbols[i].kind == SYMBOL_REFERENCE) |  | ||||||
|             return err_encoder_unknown_symbol_reference; |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *encoder_encode(encoder_t *encoder) { |  | ||||||
|     error_t *err = encoder_first_pass(encoder); |  | ||||||
|     if (err) |  | ||||||
|         return err; |  | ||||||
|     err = encoder_check_symbols(encoder); |  | ||||||
|     if (err) |  | ||||||
|         return err; |  | ||||||
|  |  | ||||||
|     bool did_update = true; |  | ||||||
|     for (int i = 0; i < 10 && did_update; ++i) { |  | ||||||
|         err = encoder_second_pass(encoder, &did_update); |  | ||||||
|         if (err) |  | ||||||
|             return err; |  | ||||||
|     } |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
| @@ -1,33 +0,0 @@ | |||||||
| #ifndef INCLUDE_ENCODER_ENCODER_H_ |  | ||||||
| #define INCLUDE_ENCODER_ENCODER_H_ |  | ||||||
|  |  | ||||||
| #include "symbols.h" |  | ||||||
|  |  | ||||||
| typedef struct encoder { |  | ||||||
|     symbol_table_t *symbols; |  | ||||||
|     ast_node_t *ast; |  | ||||||
| } encoder_t; |  | ||||||
|  |  | ||||||
| constexpr uint8_t modrm_mod_memory = 0b00'000'000; |  | ||||||
| constexpr uint8_t modrm_mod_memory_displacement8 = 0b01'000'000; |  | ||||||
| constexpr uint8_t modrm_mod_memory_displacement32 = 0b10'000'000; |  | ||||||
| constexpr uint8_t modrm_mod_register = 0b11'000'000; |  | ||||||
|  |  | ||||||
| constexpr uint8_t modrm_reg_mask = 0b00'111'000; |  | ||||||
| constexpr uint8_t modrm_rm_mask = 0b00'000'111; |  | ||||||
| constexpr uint8_t modrm_mod_mask = 0b11'000'000; |  | ||||||
|  |  | ||||||
| error_t *encoder_alloc(encoder_t **encoder, ast_node_t *ast); |  | ||||||
| error_t *encoder_encode(encoder_t *encoder); |  | ||||||
| void encoder_free(encoder_t *encoder); |  | ||||||
|  |  | ||||||
| extern error_t *const err_encoder_invalid_register; |  | ||||||
| extern error_t *const err_encoder_number_overflow; |  | ||||||
| extern error_t *const err_encoder_invalid_number_format; |  | ||||||
| extern error_t *const err_encoder_invalid_size_suffix; |  | ||||||
| extern error_t *const err_encoder_unknown_symbol_reference; |  | ||||||
| extern error_t *const err_encoder_no_encoding_found; |  | ||||||
| extern error_t *const err_encoder_not_implemented; |  | ||||||
| extern error_t *const err_encoder_unexpected_length; |  | ||||||
|  |  | ||||||
| #endif // INCLUDE_ENCODER_ENCODER_H_ |  | ||||||
| @@ -1,165 +0,0 @@ | |||||||
| #include "symbols.h" |  | ||||||
| #include "../error.h" |  | ||||||
| #include <assert.h> |  | ||||||
| #include <stdlib.h> |  | ||||||
| #include <string.h> |  | ||||||
|  |  | ||||||
| constexpr size_t symbol_table_default_cap = 64; |  | ||||||
| constexpr size_t symbol_table_max_cap = 1 << 16; |  | ||||||
|  |  | ||||||
| error_t *const err_symbol_table_invalid_node = &(error_t){ |  | ||||||
|     .message = "Unexpected node id when adding symbol to symbol table"}; |  | ||||||
| error_t *const err_symbol_table_max_cap = &(error_t){ |  | ||||||
|     .message = "Failed to increase symbol table length, max capacity reached"}; |  | ||||||
| error_t *const err_symbol_table_incompatible_symbols = |  | ||||||
|     &(error_t){.message = "Failed to update symbol with incompatible kind"}; |  | ||||||
|  |  | ||||||
| error_t *symbol_table_alloc(symbol_table_t **output) { |  | ||||||
|     *output = nullptr; |  | ||||||
|  |  | ||||||
|     symbol_table_t *table = calloc(1, sizeof(symbol_table_t)); |  | ||||||
|     if (table == nullptr) |  | ||||||
|         return err_allocation_failed; |  | ||||||
|  |  | ||||||
|     table->symbols = calloc(symbol_table_default_cap, sizeof(symbol_t)); |  | ||||||
|     if (table->symbols == nullptr) { |  | ||||||
|         free(table); |  | ||||||
|         return err_allocation_failed; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     table->cap = symbol_table_default_cap; |  | ||||||
|     table->len = 0; |  | ||||||
|  |  | ||||||
|     *output = table; |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void symbol_table_free(symbol_table_t *table) { |  | ||||||
|     free(table->symbols); |  | ||||||
|     free(table); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *symbol_table_grow_cap(symbol_table_t *table) { |  | ||||||
|     if (table->cap >= symbol_table_max_cap) |  | ||||||
|         return err_symbol_table_max_cap; |  | ||||||
|  |  | ||||||
|     size_t new_cap = table->cap * 2; |  | ||||||
|     symbol_t *new_symbols = realloc(table->symbols, new_cap * sizeof(symbol_t)); |  | ||||||
|     if (new_symbols == nullptr) |  | ||||||
|         return err_allocation_failed; |  | ||||||
|  |  | ||||||
|     table->symbols = new_symbols; |  | ||||||
|     table->cap = new_cap; |  | ||||||
|  |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *symbol_table_get_node_info(ast_node_t *node, symbol_kind_t *kind, |  | ||||||
|                                     char **name) { |  | ||||||
|     switch (node->id) { |  | ||||||
|     case NODE_LABEL: |  | ||||||
|         *kind = SYMBOL_LOCAL; |  | ||||||
|         *name = node->children[0]->token_entry->token.value; |  | ||||||
|         return nullptr; |  | ||||||
|     case NODE_LABEL_REFERENCE: |  | ||||||
|         *kind = SYMBOL_REFERENCE; |  | ||||||
|         *name = node->token_entry->token.value; |  | ||||||
|         return nullptr; |  | ||||||
|     case NODE_IMPORT_DIRECTIVE: |  | ||||||
|         *kind = SYMBOL_IMPORT; |  | ||||||
|         *name = node->children[1]->token_entry->token.value; |  | ||||||
|         return nullptr; |  | ||||||
|     case NODE_EXPORT_DIRECTIVE: |  | ||||||
|         *kind = SYMBOL_EXPORT; |  | ||||||
|         *name = node->children[1]->token_entry->token.value; |  | ||||||
|         return nullptr; |  | ||||||
|     default: |  | ||||||
|         return err_symbol_table_invalid_node; |  | ||||||
|     } |  | ||||||
|     __builtin_unreachable(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /* |  | ||||||
| old  \  new  | REFERENCE | LOCAL    | IMPORT   | EXPORT   | |  | ||||||
| -------------|-----------|----------|----------|----------| |  | ||||||
| REFERENCE    |           | replace  | replace  | replace  | |  | ||||||
| -------------|-----------|----------|----------|----------| |  | ||||||
| LOCAL        |           |          |   ERR    | replace  | |  | ||||||
| -------------|-----------|----------|----------|----------| |  | ||||||
| IMPORT       |           |          |          |   ERR    | |  | ||||||
| -------------|-----------|----------|----------|----------| |  | ||||||
| EXPORT       |           |          |   ERR    |          | |  | ||||||
| -------------|-----------|----------|----------|----------| |  | ||||||
| */ |  | ||||||
|  |  | ||||||
| bool symbol_table_should_upgrade(symbol_kind_t old, symbol_kind_t new) { |  | ||||||
|     if (old == SYMBOL_REFERENCE) |  | ||||||
|         return new != SYMBOL_REFERENCE; |  | ||||||
|     if (old == SYMBOL_LOCAL) |  | ||||||
|         return new == SYMBOL_EXPORT; |  | ||||||
|     return false; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool symbol_table_should_error(symbol_kind_t old, symbol_kind_t new) { |  | ||||||
|     if (new == SYMBOL_IMPORT) |  | ||||||
|         return old == SYMBOL_LOCAL || old == SYMBOL_EXPORT; |  | ||||||
|     if (new == SYMBOL_EXPORT) |  | ||||||
|         return old == SYMBOL_IMPORT; |  | ||||||
|     return false; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @pre The symbol _must not_ already be in the table. |  | ||||||
|  */ |  | ||||||
| error_t *symbol_table_add(symbol_table_t *table, char *name, symbol_kind_t kind, |  | ||||||
|                           ast_node_t *statement) { |  | ||||||
|     if (table->len >= table->cap) { |  | ||||||
|         error_t *err = symbol_table_grow_cap(table); |  | ||||||
|         if (err) |  | ||||||
|             return err; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     table->symbols[table->len] = (symbol_t){ |  | ||||||
|         .name = name, |  | ||||||
|         .kind = kind, |  | ||||||
|         .statement = statement, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     table->len += 1; |  | ||||||
|  |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node, |  | ||||||
|                              ast_node_t *statement) { |  | ||||||
|     char *name; |  | ||||||
|     symbol_kind_t kind; |  | ||||||
|     error_t *err = symbol_table_get_node_info(node, &kind, &name); |  | ||||||
|     if (err) |  | ||||||
|         return err; |  | ||||||
|  |  | ||||||
|     if (kind != SYMBOL_LOCAL) |  | ||||||
|         statement = nullptr; |  | ||||||
|  |  | ||||||
|     symbol_t *symbol = symbol_table_lookup(table, name); |  | ||||||
|     if (!symbol) |  | ||||||
|         return symbol_table_add(table, name, kind, statement); |  | ||||||
|     if (symbol_table_should_error(symbol->kind, kind)) |  | ||||||
|         return err_symbol_table_incompatible_symbols; |  | ||||||
|     if (symbol_table_should_upgrade(symbol->kind, kind)) { |  | ||||||
|         symbol->kind = kind; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (kind == SYMBOL_LOCAL && symbol->statement == nullptr) |  | ||||||
|         symbol->statement = statement; |  | ||||||
|  |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name) { |  | ||||||
|     for (size_t i = 0; i < table->len; ++i) { |  | ||||||
|         if (strcmp(table->symbols[i].name, name) == 0) |  | ||||||
|             return &table->symbols[i]; |  | ||||||
|     } |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
| @@ -1,47 +0,0 @@ | |||||||
| #ifndef INCLUDE_ENCODER_SYMBOLS_H_ |  | ||||||
| #define INCLUDE_ENCODER_SYMBOLS_H_ |  | ||||||
|  |  | ||||||
| #include "../ast.h" |  | ||||||
|  |  | ||||||
| extern error_t *const err_symbol_table_invalid_node; |  | ||||||
| extern error_t *const err_symbol_table_max_cap; |  | ||||||
| extern error_t *const err_symbol_table_incompatible_symbols; |  | ||||||
|  |  | ||||||
| typedef enum symbol_kind { |  | ||||||
|     SYMBOL_REFERENCE, |  | ||||||
|     SYMBOL_LOCAL, |  | ||||||
|     SYMBOL_EXPORT, |  | ||||||
|     SYMBOL_IMPORT, |  | ||||||
| } symbol_kind_t; |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Represent a symbol in the program |  | ||||||
|  * |  | ||||||
|  * Symbols with the same name can only be in the table once. IMPORT or EXPORT |  | ||||||
|  * symbols take precedence over REFERENCE symbols. If any reference symbols |  | ||||||
|  * remain after the first encoding pass this indicates an error. Trying to add |  | ||||||
|  * an IMPORT or EXPORT symbol if the same name already exists as the other kind |  | ||||||
|  * is an error. |  | ||||||
|  * |  | ||||||
|  * This symbol table never taken ownership of the name string, it's lifted |  | ||||||
|  * straight from the node->token.value. |  | ||||||
|  */ |  | ||||||
| typedef struct symbol { |  | ||||||
|     char *name; |  | ||||||
|     symbol_kind_t kind; |  | ||||||
|     ast_node_t *statement; |  | ||||||
| } symbol_t; |  | ||||||
|  |  | ||||||
| typedef struct symbol_table { |  | ||||||
|     size_t cap; |  | ||||||
|     size_t len; |  | ||||||
|     symbol_t *symbols; |  | ||||||
| } symbol_table_t; |  | ||||||
|  |  | ||||||
| error_t *symbol_table_alloc(symbol_table_t **table); |  | ||||||
| void symbol_table_free(symbol_table_t *table); |  | ||||||
| error_t *symbol_table_update(symbol_table_t *table, ast_node_t *node, |  | ||||||
|                              ast_node_t *statement); |  | ||||||
| symbol_t *symbol_table_lookup(symbol_table_t *table, const char *name); |  | ||||||
|  |  | ||||||
| #endif // INCLUDE_ENCODER_SYMBOLS_H_ |  | ||||||
| @@ -9,13 +9,8 @@ error_t *const err_errorf_alloc = &(error_t){ | |||||||
| error_t *const err_errorf_length = &(error_t){ | error_t *const err_errorf_length = &(error_t){ | ||||||
|     .message = |     .message = | ||||||
|         "Formatting of another error failed to determine the error length"}; |         "Formatting of another error failed to determine the error length"}; | ||||||
| error_t *const err_eof = |  | ||||||
|     &(error_t){.message = "Read failed because EOF is reached"}; |  | ||||||
|  |  | ||||||
| error_t *const err_unknown_read_failure = | error_t *err_allocation_failed = | ||||||
|     &(error_t){.message = "Unknown read error"}; |  | ||||||
|  |  | ||||||
| error_t *const err_allocation_failed = |  | ||||||
|     &(error_t){.message = "Memory allocation failed"}; |     &(error_t){.message = "Memory allocation failed"}; | ||||||
|  |  | ||||||
| error_t *errorf(const char *fmt, ...) { | error_t *errorf(const char *fmt, ...) { | ||||||
|   | |||||||
| @@ -19,8 +19,6 @@ static inline void error_free(error_t *err) { | |||||||
| } | } | ||||||
|  |  | ||||||
| /* Some global errors */ | /* Some global errors */ | ||||||
| extern error_t *const err_allocation_failed; | extern error_t *err_allocation_failed; | ||||||
| extern error_t *const err_eof; |  | ||||||
| extern error_t *const err_unknown_read_failure; |  | ||||||
|  |  | ||||||
| #endif // INCLUDE_SRC_ERROR_H_ | #endif // INCLUDE_SRC_ERROR_H_ | ||||||
|   | |||||||
							
								
								
									
										39
									
								
								src/lexer.c
									
									
									
									
									
								
							
							
						
						
									
										39
									
								
								src/lexer.c
									
									
									
									
									
								
							| @@ -5,16 +5,21 @@ | |||||||
| #include <errno.h> | #include <errno.h> | ||||||
| #include <string.h> | #include <string.h> | ||||||
|  |  | ||||||
| error_t *const err_lexer_already_open = &(error_t){ | error_t *err_lexer_already_open = &(error_t){ | ||||||
|     .message = |     .message = | ||||||
|         "Can't open on a lexer object that is already opened. Close it first."}; |         "Can't open on a lexer object that is already opened. Close it first."}; | ||||||
| error_t *const err_lexer_prefix_too_large = | error_t *err_prefix_too_large = | ||||||
|     &(error_t){.message = "Prefix too large for internal lexer buffer"}; |     &(error_t){.message = "Prefix too large for internal lexer buffer"}; | ||||||
| error_t *const err_lexer_buffer_underrun = &(error_t){ | error_t *err_buffer_underrun = &(error_t){ | ||||||
|     .message = "Buffer does not contain enough characters for lexer_consume_n"}; |     .message = "Buffer does not contain enough characters for lexer_consume_n"}; | ||||||
| error_t *const err_lexer_consume_excessive_length = | error_t *err_consume_excessive_length = | ||||||
|     &(error_t){.message = "Too many valid characters to consume"}; |     &(error_t){.message = "Too many valid characters to consume"}; | ||||||
|  |  | ||||||
|  | error_t *err_eof = | ||||||
|  |     &(error_t){.message = "Can't read from file because EOF is reached"}; | ||||||
|  |  | ||||||
|  | error_t *err_unknown_read = &(error_t){.message = "Unknown read error"}; | ||||||
|  |  | ||||||
| typedef bool (*char_predicate_t)(char); | typedef bool (*char_predicate_t)(char); | ||||||
|  |  | ||||||
| const char *lexer_token_id_to_cstr(lexer_token_id_t id) { | const char *lexer_token_id_to_cstr(lexer_token_id_t id) { | ||||||
| @@ -107,7 +112,7 @@ error_t *lexer_fill_buffer(lexer_t *lex) { | |||||||
|         if (n == 0 && ferror(lex->fp)) |         if (n == 0 && ferror(lex->fp)) | ||||||
|             return errorf("Read error: %s", strerror(errno)); |             return errorf("Read error: %s", strerror(errno)); | ||||||
|         if (n == 0) |         if (n == 0) | ||||||
|             return err_unknown_read_failure; |             return err_unknown_read; | ||||||
|         remaining -= n; |         remaining -= n; | ||||||
|         lex->buffer_count += n; |         lex->buffer_count += n; | ||||||
|     } |     } | ||||||
| @@ -136,7 +141,7 @@ error_t *lexer_open(lexer_t *lex, char *path) { | |||||||
|  * |  * | ||||||
|  * @pre There must be at least n characters in the input buffer |  * @pre There must be at least n characters in the input buffer | ||||||
|  */ |  */ | ||||||
| void lexer_shift_buffer(lexer_t *lex, size_t n) { | void lexer_shift_buffer(lexer_t *lex, int n) { | ||||||
|     assert(lex->buffer_count >= n); |     assert(lex->buffer_count >= n); | ||||||
|     lex->buffer_count -= n; |     lex->buffer_count -= n; | ||||||
|     memmove(lex->buffer, lex->buffer + n, lex->buffer_count); |     memmove(lex->buffer, lex->buffer + n, lex->buffer_count); | ||||||
| @@ -177,9 +182,9 @@ error_t *lexer_not_implemented(lexer_t *lex, lexer_token_t *token) { | |||||||
| error_t *lexer_consume_n(lexer_t *lex, const size_t len, | error_t *lexer_consume_n(lexer_t *lex, const size_t len, | ||||||
|                          char buffer[static len], const size_t n) { |                          char buffer[static len], const size_t n) { | ||||||
|     if (lex->buffer_count < n) |     if (lex->buffer_count < n) | ||||||
|         return err_lexer_buffer_underrun; |         return err_buffer_underrun; | ||||||
|     if (n > len) |     if (len > n) | ||||||
|         return err_lexer_consume_excessive_length; |         return err_consume_excessive_length; | ||||||
|  |  | ||||||
|     memcpy(buffer, lex->buffer, n); |     memcpy(buffer, lex->buffer, n); | ||||||
|     lexer_shift_buffer(lex, n); |     lexer_shift_buffer(lex, n); | ||||||
| @@ -224,7 +229,7 @@ error_t *lexer_consume(lexer_t *lex, const size_t n, char buffer[static n], | |||||||
|                 (lex->buffer_count > 0 && is_valid(lex->buffer[0])); |                 (lex->buffer_count > 0 && is_valid(lex->buffer[0])); | ||||||
|  |  | ||||||
|         if (have_more_characters && *n_consumed == buffer_size) { |         if (have_more_characters && *n_consumed == buffer_size) { | ||||||
|             return err_lexer_consume_excessive_length; |             return err_consume_excessive_length; | ||||||
|         } |         } | ||||||
|     } while (have_more_characters); |     } while (have_more_characters); | ||||||
|     return nullptr; |     return nullptr; | ||||||
| @@ -294,12 +299,11 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) { | |||||||
|  |  | ||||||
|     error_t *err = lexer_consume(lex, max_number_length - so_far, |     error_t *err = lexer_consume(lex, max_number_length - so_far, | ||||||
|                                  buffer + so_far, is_valid, &n); |                                  buffer + so_far, is_valid, &n); | ||||||
|     if (err == err_lexer_consume_excessive_length) { |     if (err == err_consume_excessive_length) { | ||||||
|         token->id = TOKEN_ERROR; |         token->id = TOKEN_ERROR; | ||||||
|         token->explanation = |         token->explanation = | ||||||
|             "Number length exceeds the maximum of 128 characters"; |             "Number length exceeds the maximum of 128 characters"; | ||||||
|     } |     } | ||||||
|     lex->character_number += n; |  | ||||||
|     so_far += n; |     so_far += n; | ||||||
|     if (n == 0) { |     if (n == 0) { | ||||||
|         token->id = TOKEN_ERROR; |         token->id = TOKEN_ERROR; | ||||||
| @@ -325,15 +329,14 @@ error_t *lexer_next_number(lexer_t *lex, lexer_token_t *token) { | |||||||
|     if (suffix_length > 0) { |     if (suffix_length > 0) { | ||||||
|         err = lexer_consume_n(lex, max_number_length - so_far, buffer + so_far, |         err = lexer_consume_n(lex, max_number_length - so_far, buffer + so_far, | ||||||
|                               suffix_length); |                               suffix_length); | ||||||
|         if (err == err_lexer_consume_excessive_length) { |         if (err == err_consume_excessive_length) { | ||||||
|             token->id = TOKEN_ERROR; |             token->id = TOKEN_ERROR; | ||||||
|             token->explanation = |             token->explanation = | ||||||
|                 "Number length exceeds the maximum of 128 characters"; |                 "Number length exceeds the maximum of 128 characters"; | ||||||
|         } else { |  | ||||||
|             lex->character_number += suffix_length; |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     lex->character_number += n; | ||||||
|     token->value = strdup(buffer); |     token->value = strdup(buffer); | ||||||
|     return nullptr; |     return nullptr; | ||||||
| } | } | ||||||
| @@ -403,7 +406,7 @@ error_t *lexer_next_identifier(lexer_t *lex, lexer_token_t *token) { | |||||||
|  |  | ||||||
|     error_t *err = lexer_consume(lex, max_identifier_length, buffer, |     error_t *err = lexer_consume(lex, max_identifier_length, buffer, | ||||||
|                                  is_identifier_character, &n); |                                  is_identifier_character, &n); | ||||||
|     if (err == err_lexer_consume_excessive_length) { |     if (err == err_consume_excessive_length) { | ||||||
|         token->id = TOKEN_ERROR; |         token->id = TOKEN_ERROR; | ||||||
|         token->explanation = |         token->explanation = | ||||||
|             "Identifier length exceeds the maximum of 128 characters"; |             "Identifier length exceeds the maximum of 128 characters"; | ||||||
| @@ -446,7 +449,7 @@ error_t *lexer_next_whitespace(lexer_t *lex, lexer_token_t *token) { | |||||||
|  |  | ||||||
|     error_t *err = lexer_consume(lex, max_whitespace_length, buffer, |     error_t *err = lexer_consume(lex, max_whitespace_length, buffer, | ||||||
|                                  is_whitespace_character, &n); |                                  is_whitespace_character, &n); | ||||||
|     if (err == err_lexer_consume_excessive_length) { |     if (err == err_consume_excessive_length) { | ||||||
|         token->id = TOKEN_ERROR; |         token->id = TOKEN_ERROR; | ||||||
|         token->explanation = |         token->explanation = | ||||||
|             "Whitespace length exceeds the maximum of 1024 characters"; |             "Whitespace length exceeds the maximum of 1024 characters"; | ||||||
| @@ -481,7 +484,7 @@ error_t *lexer_next_comment(lexer_t *lex, lexer_token_t *token) { | |||||||
|  |  | ||||||
|     error_t *err = lexer_consume(lex, max_comment_length, buffer, |     error_t *err = lexer_consume(lex, max_comment_length, buffer, | ||||||
|                                  is_comment_character, &n); |                                  is_comment_character, &n); | ||||||
|     if (err == err_lexer_consume_excessive_length) { |     if (err == err_consume_excessive_length) { | ||||||
|         token->id = TOKEN_ERROR; |         token->id = TOKEN_ERROR; | ||||||
|         token->explanation = |         token->explanation = | ||||||
|             "Comment length exceeds the maximum of 1024 characters"; |             "Comment length exceeds the maximum of 1024 characters"; | ||||||
|   | |||||||
| @@ -5,10 +5,7 @@ | |||||||
| #include <stddef.h> | #include <stddef.h> | ||||||
| #include <stdio.h> | #include <stdio.h> | ||||||
|  |  | ||||||
| extern error_t *const err_lexer_already_open; | extern error_t *err_eof; | ||||||
| extern error_t *const err_lexer_prefix_too_large; |  | ||||||
| extern error_t *const err_lexer_buffer_underrun; |  | ||||||
| extern error_t *const err_lexer_consume_excessive_length; |  | ||||||
|  |  | ||||||
| typedef enum { | typedef enum { | ||||||
|     TOKEN_ERROR, |     TOKEN_ERROR, | ||||||
|   | |||||||
							
								
								
									
										155
									
								
								src/main.c
									
									
									
									
									
								
							
							
						
						
									
										155
									
								
								src/main.c
									
									
									
									
									
								
							| @@ -1,8 +1,5 @@ | |||||||
| #include "ast.h" |  | ||||||
| #include "encoder/encoder.h" |  | ||||||
| #include "error.h" | #include "error.h" | ||||||
| #include "lexer.h" | #include "lexer.h" | ||||||
| #include "parser/parser.h" |  | ||||||
| #include "tokenlist.h" | #include "tokenlist.h" | ||||||
|  |  | ||||||
| #include <limits.h> | #include <limits.h> | ||||||
| @@ -10,137 +7,38 @@ | |||||||
| #include <stdlib.h> | #include <stdlib.h> | ||||||
| #include <string.h> | #include <string.h> | ||||||
|  |  | ||||||
| typedef enum mode { | bool print_token(lexer_token_t *token) { | ||||||
|     MODE_INVALID = -1, |     lexer_token_print(token); | ||||||
|     MODE_AST, |     return true; | ||||||
|     MODE_TEXT, |  | ||||||
|     MODE_TOKENS, |  | ||||||
|     MODE_ENCODING, |  | ||||||
| } mode_t; |  | ||||||
|  |  | ||||||
| void print_tokens(tokenlist_t *list) { |  | ||||||
|     for (auto entry = list->head; entry; entry = entry->next) { |  | ||||||
|         auto token = &entry->token; |  | ||||||
|         lexer_token_print(token); |  | ||||||
|     } |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void print_text(tokenlist_t *list) { | bool print_value(lexer_token_t *token) { | ||||||
|     for (auto entry = list->head; entry; entry = entry->next) { |     if (token->id == TOKEN_ERROR) { | ||||||
|         auto token = &entry->token; |         printf("%s\n", token->value); | ||||||
|         if (token->id == TOKEN_ERROR) { |         for (size_t i = 0; i < token->character_number; ++i) | ||||||
|             printf("%s\n", token->value); |  | ||||||
|             for (size_t i = 0; i < token->character_number; ++i) |  | ||||||
|                 printf(" "); |  | ||||||
|             printf("^-- %s\n", token->explanation); |  | ||||||
|             return; |  | ||||||
|         } else { |  | ||||||
|             printf("%s", token->value); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *print_ast(tokenlist_t *list) { |  | ||||||
|     parse_result_t result = parse(list->head); |  | ||||||
|     if (result.err) |  | ||||||
|         return result.err; |  | ||||||
|  |  | ||||||
|     ast_node_print(result.node); |  | ||||||
|  |  | ||||||
|     if (result.next != nullptr) { |  | ||||||
|         puts("First unparsed token:"); |  | ||||||
|         lexer_token_print(&result.next->token); |  | ||||||
|     } |  | ||||||
|     ast_node_free(result.node); |  | ||||||
|     if (result.next != nullptr) { |  | ||||||
|         return errorf("did not parse entire input token stream"); |  | ||||||
|     } |  | ||||||
|     return nullptr; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void print_hex(size_t len, uint8_t bytes[static len]) { |  | ||||||
|     for (size_t i = 0; i < len; i++) { |  | ||||||
|         printf("%02x", bytes[i]); |  | ||||||
|         if (i < len - 1) { |  | ||||||
|             printf(" "); |             printf(" "); | ||||||
|         } |         printf("^-- %s\n", token->explanation); | ||||||
|  |     } else { | ||||||
|  |         printf("%s", token->value); | ||||||
|     } |     } | ||||||
|     printf("\n"); |     return token->id != TOKEN_ERROR; | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *print_encoding(tokenlist_t *list) { |  | ||||||
|     parse_result_t result = parse(list->head); |  | ||||||
|     if (result.err) |  | ||||||
|         return result.err; |  | ||||||
|  |  | ||||||
|     encoder_t *encoder; |  | ||||||
|     error_t *err = encoder_alloc(&encoder, result.node); |  | ||||||
|     if (err) |  | ||||||
|         goto cleanup_ast; |  | ||||||
|  |  | ||||||
|     err = encoder_encode(encoder); |  | ||||||
|     if (err) |  | ||||||
|         goto cleanup_ast; |  | ||||||
|  |  | ||||||
|     ast_node_t *root = result.node; |  | ||||||
|     for (size_t i = 0; i < root->len; ++i) { |  | ||||||
|         ast_node_t *node = root->children[i]; |  | ||||||
|         if (node->id != NODE_INSTRUCTION) |  | ||||||
|             continue; |  | ||||||
|  |  | ||||||
|         print_hex(node->value.instruction.encoding.len, |  | ||||||
|                   node->value.instruction.encoding.buffer); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     encoder_free(encoder); |  | ||||||
|     ast_node_free(result.node); |  | ||||||
|     return nullptr; |  | ||||||
|  |  | ||||||
| cleanup_ast: |  | ||||||
|     ast_node_free(result.node); |  | ||||||
|     return err; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| int get_execution_mode(int argc, char *argv[]) { |  | ||||||
|     if (argc != 3) |  | ||||||
|         return MODE_INVALID; |  | ||||||
|  |  | ||||||
|     if (strcmp(argv[1], "tokens") == 0) |  | ||||||
|         return MODE_TOKENS; |  | ||||||
|     if (strcmp(argv[1], "text") == 0) |  | ||||||
|         return MODE_TEXT; |  | ||||||
|     if (strcmp(argv[1], "ast") == 0) |  | ||||||
|         return MODE_AST; |  | ||||||
|     if (strcmp(argv[1], "encoding") == 0) |  | ||||||
|         return MODE_ENCODING; |  | ||||||
|     return MODE_INVALID; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| error_t *do_action(mode_t mode, tokenlist_t *list) { |  | ||||||
|     switch (mode) { |  | ||||||
|     case MODE_TOKENS: |  | ||||||
|         print_tokens(list); |  | ||||||
|         return nullptr; |  | ||||||
|     case MODE_TEXT: |  | ||||||
|         print_text(list); |  | ||||||
|         return nullptr; |  | ||||||
|     case MODE_AST: |  | ||||||
|         return print_ast(list); |  | ||||||
|     case MODE_ENCODING: |  | ||||||
|         return print_encoding(list); |  | ||||||
|     case MODE_INVALID: |  | ||||||
|         /* can't happen */ |  | ||||||
|     } |  | ||||||
|     __builtin_unreachable(); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| int main(int argc, char *argv[]) { | int main(int argc, char *argv[]) { | ||||||
|     mode_t mode = get_execution_mode(argc, argv); |     if (argc != 3 || | ||||||
|     if (mode == MODE_INVALID) { |         (strcmp(argv[1], "-tokens") != 0 && strcmp(argv[1], "-text") != 0)) { | ||||||
|         puts("Usage: oas [tokens|text|ast|encoding] <filename>"); |         puts("Usage: oas -tokens <filename>"); | ||||||
|         exit(1); |         puts("Usage: oas -text <filename>"); | ||||||
|  |         return 1; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     bool (*print_fn)(lexer_token_t *); | ||||||
|     char *filename = argv[2]; |     char *filename = argv[2]; | ||||||
|  |     if (strcmp(argv[1], "-tokens") == 0) { | ||||||
|  |         print_fn = print_token; | ||||||
|  |     } else { | ||||||
|  |         print_fn = print_value; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     lexer_t *lex = &(lexer_t){}; |     lexer_t *lex = &(lexer_t){}; | ||||||
|     error_t *err = lexer_open(lex, filename); |     error_t *err = lexer_open(lex, filename); | ||||||
| @@ -156,10 +54,9 @@ int main(int argc, char *argv[]) { | |||||||
|     if (err) |     if (err) | ||||||
|         goto cleanup_tokens; |         goto cleanup_tokens; | ||||||
|  |  | ||||||
|     err = do_action(mode, list); |     for (auto entry = list->head; entry; entry = entry->next) { | ||||||
|     if (err) |         print_fn(&entry->token); | ||||||
|         goto cleanup_tokens; |     } | ||||||
|  |  | ||||||
|     tokenlist_free(list); |     tokenlist_free(list); | ||||||
|     error_free(err); |     error_free(err); | ||||||
|     return 0; |     return 0; | ||||||
|   | |||||||
| @@ -1,134 +0,0 @@ | |||||||
| #include "combinators.h" |  | ||||||
| #include "util.h" |  | ||||||
|  |  | ||||||
| // Parse a list of the given parser delimited by the given token id. Does not |  | ||||||
| // store the delimiters in the parent node |  | ||||||
| parse_result_t parse_list(tokenlist_entry_t *current, node_id_t id, |  | ||||||
|                           bool allow_none, lexer_token_id_t delimiter_id, |  | ||||||
|                           parser_t parser) { |  | ||||||
|     ast_node_t *many; |  | ||||||
|     error_t *err = ast_node_alloc(&many); |  | ||||||
|     parse_result_t result; |  | ||||||
|     if (err) |  | ||||||
|         return parse_error(err); |  | ||||||
|     many->id = id; |  | ||||||
|  |  | ||||||
|     while (current) { |  | ||||||
|         // Skip beyond the delimiter on all but the first iteration |  | ||||||
|         if (many->len > 0) { |  | ||||||
|             if (current->token.id != delimiter_id) |  | ||||||
|                 break; |  | ||||||
|             current = tokenlist_next(current); |  | ||||||
|             if (current == nullptr) { |  | ||||||
|                 // FIXME: this isn't quite right, we can't consume the delimiter |  | ||||||
|                 // if the next element will fail to parse but it's late and I |  | ||||||
|                 // must think this through tomorrow |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         result = parser(current); |  | ||||||
|         if (result.err == err_parse_no_match) |  | ||||||
|             break; |  | ||||||
|         if (result.err) { |  | ||||||
|             ast_node_free(many); |  | ||||||
|             return result; |  | ||||||
|         } |  | ||||||
|         err = ast_node_add_child(many, result.node); |  | ||||||
|         if (err) { |  | ||||||
|             ast_node_free(many); |  | ||||||
|             ast_node_free(result.node); |  | ||||||
|             return parse_error(err); |  | ||||||
|         } |  | ||||||
|         current = result.next; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (!allow_none && many->len == 0) { |  | ||||||
|         ast_node_free(many); |  | ||||||
|         return parse_no_match(); |  | ||||||
|     } |  | ||||||
|     return parse_success(many, current); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]) { |  | ||||||
|     parser_t parser; |  | ||||||
|     while ((parser = *parsers++)) { |  | ||||||
|         parse_result_t result = parser(current); |  | ||||||
|         if (result.err == nullptr) |  | ||||||
|             return result; |  | ||||||
|     } |  | ||||||
|     return parse_no_match(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // parse as many of the giver parsers objects in a row as possible, |  | ||||||
| // potentially allowing none wraps the found objects in a new ast node with |  | ||||||
| // the given note id |  | ||||||
| parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id, |  | ||||||
|                           bool allow_none, parser_t parser) { |  | ||||||
|     ast_node_t *many; |  | ||||||
|     error_t *err = ast_node_alloc(&many); |  | ||||||
|     parse_result_t result; |  | ||||||
|     if (err) |  | ||||||
|         return parse_error(err); |  | ||||||
|     many->id = id; |  | ||||||
|  |  | ||||||
|     while (current) { |  | ||||||
|         result = parser(current); |  | ||||||
|         if (result.err == err_parse_no_match) |  | ||||||
|             break; |  | ||||||
|         if (result.err) { |  | ||||||
|             ast_node_free(many); |  | ||||||
|             return result; |  | ||||||
|         } |  | ||||||
|         err = ast_node_add_child(many, result.node); |  | ||||||
|         if (err) { |  | ||||||
|             ast_node_free(many); |  | ||||||
|             ast_node_free(result.node); |  | ||||||
|             return parse_error(err); |  | ||||||
|         } |  | ||||||
|         current = result.next; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (!allow_none && many->len == 0) { |  | ||||||
|         ast_node_free(many); |  | ||||||
|         return parse_no_match(); |  | ||||||
|     } |  | ||||||
|     return parse_success(many, current); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Parse all tries to parse all parsers consecutively and if it succeeds it |  | ||||||
| // wraps the parsed nodes in a new parent node. |  | ||||||
| parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id, |  | ||||||
|                                  parser_t parsers[]) { |  | ||||||
|     ast_node_t *all; |  | ||||||
|     error_t *err = ast_node_alloc(&all); |  | ||||||
|     parse_result_t result; |  | ||||||
|     if (err) |  | ||||||
|         return parse_error(err); |  | ||||||
|  |  | ||||||
|     all->id = id; |  | ||||||
|  |  | ||||||
|     parser_t parser; |  | ||||||
|     while ((parser = *parsers++) && current) { |  | ||||||
|         result = parser(current); |  | ||||||
|         if (result.err) { |  | ||||||
|             ast_node_free(all); |  | ||||||
|             return result; |  | ||||||
|         } |  | ||||||
|         err = ast_node_add_child(all, result.node); |  | ||||||
|         if (err) { |  | ||||||
|             ast_node_free(result.node); |  | ||||||
|             ast_node_free(all); |  | ||||||
|             return parse_error(err); |  | ||||||
|         } |  | ||||||
|         current = result.next; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // token stream ended before we matched all parsers |  | ||||||
|     if (parser != nullptr) { |  | ||||||
|         ast_node_free(all); |  | ||||||
|         return parse_no_match(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return parse_success(all, current); |  | ||||||
| } |  | ||||||
| @@ -1,25 +0,0 @@ | |||||||
| #ifndef INCLUDE_PARSER_COMBINATORS_H_ |  | ||||||
| #define INCLUDE_PARSER_COMBINATORS_H_ |  | ||||||
|  |  | ||||||
| #include "util.h" |  | ||||||
|  |  | ||||||
| typedef parse_result_t (*parser_t)(tokenlist_entry_t *); |  | ||||||
|  |  | ||||||
| parse_result_t parse_any(tokenlist_entry_t *current, parser_t parsers[]); |  | ||||||
|  |  | ||||||
| // parse as many of the giver parsers objects in a row as possible, potentially |  | ||||||
| // allowing none wraps the found objects in a new ast node with the given note |  | ||||||
| // id |  | ||||||
| parse_result_t parse_many(tokenlist_entry_t *current, node_id_t id, |  | ||||||
|                           bool allow_none, parser_t parser); |  | ||||||
|  |  | ||||||
| parse_result_t parse_list(tokenlist_entry_t *current, node_id_t id, |  | ||||||
|                           bool allow_none, lexer_token_id_t delimiter_id, |  | ||||||
|                           parser_t parser); |  | ||||||
|  |  | ||||||
| // Parse all tries to parse all parsers consecutively and if it succeeds it |  | ||||||
| // wraps the parsed nodes in a new parent node. |  | ||||||
| parse_result_t parse_consecutive(tokenlist_entry_t *current, node_id_t id, |  | ||||||
|                                  parser_t parsers[]); |  | ||||||
|  |  | ||||||
| #endif // INCLUDE_PARSER_COMBINATORS_H_ |  | ||||||
| @@ -1,164 +0,0 @@ | |||||||
| #include "parser.h" |  | ||||||
| #include "../ast.h" |  | ||||||
| #include "../lexer.h" |  | ||||||
| #include "../tokenlist.h" |  | ||||||
| #include "combinators.h" |  | ||||||
| #include "primitives.h" |  | ||||||
| #include "util.h" |  | ||||||
|  |  | ||||||
| parse_result_t parse_number(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_octal, parse_decimal, parse_hexadecimal, |  | ||||||
|                           parse_binary, nullptr}; |  | ||||||
|     parse_result_t result = parse_any(current, parsers); |  | ||||||
|     return parse_result_wrap(NODE_NUMBER, result); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_plus_or_minus(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_plus, parse_minus, nullptr}; |  | ||||||
|     return parse_any(current, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_register_index(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_plus, parse_register, parse_asterisk, |  | ||||||
|                           parse_number, nullptr}; |  | ||||||
|     return parse_consecutive(current, NODE_REGISTER_INDEX, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_register_offset(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_plus_or_minus, parse_number, nullptr}; |  | ||||||
|     return parse_consecutive(current, NODE_REGISTER_OFFSET, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_register_expression(tokenlist_entry_t *current) { |  | ||||||
|     parse_result_t result; |  | ||||||
|  |  | ||||||
|     ast_node_t *expr; |  | ||||||
|     error_t *err = ast_node_alloc(&expr); |  | ||||||
|     if (err) |  | ||||||
|         return parse_error(err); |  | ||||||
|     expr->id = NODE_REGISTER_EXPRESSION; |  | ||||||
|  |  | ||||||
|     // <register> |  | ||||||
|     result = parse_register(current); |  | ||||||
|     if (result.err) { |  | ||||||
|         ast_node_free(expr); |  | ||||||
|         return result; |  | ||||||
|     } |  | ||||||
|     err = ast_node_add_child(expr, result.node); |  | ||||||
|     if (err) { |  | ||||||
|         ast_node_free(result.node); |  | ||||||
|         ast_node_free(expr); |  | ||||||
|         return parse_error(err); |  | ||||||
|     } |  | ||||||
|     current = result.next; |  | ||||||
|  |  | ||||||
|     // <register_index>? |  | ||||||
|     result = parse_register_index(current); |  | ||||||
|     if (result.err) { |  | ||||||
|         error_free(result.err); |  | ||||||
|     } else { |  | ||||||
|         err = ast_node_add_child(expr, result.node); |  | ||||||
|         if (err) { |  | ||||||
|             ast_node_free(result.node); |  | ||||||
|             ast_node_free(expr); |  | ||||||
|             return parse_error(err); |  | ||||||
|         } |  | ||||||
|         current = result.next; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // <register_offset>? |  | ||||||
|     result = parse_register_offset(current); |  | ||||||
|     if (result.err) { |  | ||||||
|         error_free(result.err); |  | ||||||
|     } else { |  | ||||||
|         err = ast_node_add_child(expr, result.node); |  | ||||||
|         if (err) { |  | ||||||
|             ast_node_free(result.node); |  | ||||||
|             ast_node_free(expr); |  | ||||||
|             return parse_error(err); |  | ||||||
|         } |  | ||||||
|         current = result.next; |  | ||||||
|     } |  | ||||||
|     return parse_success(expr, current); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_immediate(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_number, parse_label_reference, nullptr}; |  | ||||||
|     parse_result_t result = parse_any(current, parsers); |  | ||||||
|     return parse_result_wrap(NODE_IMMEDIATE, result); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_memory_expression(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_register_expression, parse_label_reference, |  | ||||||
|                           nullptr}; |  | ||||||
|     return parse_any(current, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_memory(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_lbracket, parse_memory_expression, |  | ||||||
|                           parse_rbracket, nullptr}; |  | ||||||
|     return parse_consecutive(current, NODE_MEMORY, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_operand(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_register, parse_memory, parse_immediate, |  | ||||||
|                           nullptr}; |  | ||||||
|     return parse_any(current, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_operands(tokenlist_entry_t *current) { |  | ||||||
|     return parse_list(current, NODE_OPERANDS, true, TOKEN_COMMA, parse_operand); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_label(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_identifier, parse_colon, nullptr}; |  | ||||||
|     return parse_consecutive(current, NODE_LABEL, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_section_directive(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_section, parse_identifier, nullptr}; |  | ||||||
|     return parse_consecutive(current, NODE_SECTION_DIRECTIVE, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_import_directive(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_import, parse_identifier, nullptr}; |  | ||||||
|     return parse_consecutive(current, NODE_IMPORT_DIRECTIVE, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_export_directive(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_export, parse_identifier, nullptr}; |  | ||||||
|     return parse_consecutive(current, NODE_EXPORT_DIRECTIVE, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_directive_options(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_section_directive, parse_import_directive, |  | ||||||
|                           parse_export_directive, nullptr}; |  | ||||||
|     return parse_any(current, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_directive(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_dot, parse_directive_options, parse_newline, |  | ||||||
|                           nullptr}; |  | ||||||
|     return parse_consecutive(current, NODE_DIRECTIVE, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_instruction(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_identifier, parse_operands, parse_newline, |  | ||||||
|                           nullptr}; |  | ||||||
|     return parse_consecutive(current, NODE_INSTRUCTION, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_statement(tokenlist_entry_t *current) { |  | ||||||
|     parser_t parsers[] = {parse_label, parse_directive, parse_instruction, |  | ||||||
|                           parse_newline, nullptr}; |  | ||||||
|     return parse_any(current, parsers); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse(tokenlist_entry_t *current) { |  | ||||||
|     current = tokenlist_skip_trivia(current); |  | ||||||
|     parse_result_t result = |  | ||||||
|         parse_many(current, NODE_PROGRAM, true, parse_statement); |  | ||||||
|     if (result.node != nullptr) |  | ||||||
|         ast_node_prune(result.node, NODE_NEWLINE); |  | ||||||
|     return result; |  | ||||||
| } |  | ||||||
| @@ -1,9 +0,0 @@ | |||||||
| #ifndef INCLUDE_PARSER_PARSER_H_ |  | ||||||
| #define INCLUDE_PARSER_PARSER_H_ |  | ||||||
|  |  | ||||||
| #include "../tokenlist.h" |  | ||||||
| #include "util.h" |  | ||||||
|  |  | ||||||
| parse_result_t parse(tokenlist_entry_t *current); |  | ||||||
|  |  | ||||||
| #endif // INCLUDE_PARSER_PARSER_H_ |  | ||||||
| @@ -1,110 +0,0 @@ | |||||||
| #include "primitives.h" |  | ||||||
| #include "../ast.h" |  | ||||||
| #include "../data/registers.h" |  | ||||||
| #include <string.h> |  | ||||||
|  |  | ||||||
| parse_result_t parse_identifier(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_IDENTIFIER, NODE_IDENTIFIER, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_decimal(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_DECIMAL, NODE_DECIMAL, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_hexadecimal(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_HEXADECIMAL, NODE_HEXADECIMAL, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_binary(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_BINARY, NODE_BINARY, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_octal(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_OCTAL, NODE_OCTAL, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_string(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_STRING, NODE_STRING, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_char(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_CHAR, NODE_CHAR, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_colon(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_COLON, NODE_COLON, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_comma(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_COMMA, NODE_COMMA, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_lbracket(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_LBRACKET, NODE_LBRACKET, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_rbracket(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_RBRACKET, NODE_RBRACKET, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_plus(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_PLUS, NODE_PLUS, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_minus(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_MINUS, NODE_MINUS, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_asterisk(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_ASTERISK, NODE_ASTERISK, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_dot(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_newline(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_NEWLINE, NODE_NEWLINE, nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_label_reference(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE, |  | ||||||
|                        nullptr); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool is_register_token(lexer_token_t *token) { |  | ||||||
|     for (size_t i = 0; registers[i] != nullptr; ++i) |  | ||||||
|         if (strcmp(token->value, registers[i]->name) == 0) |  | ||||||
|             return true; |  | ||||||
|     return false; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_register(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_IDENTIFIER, NODE_REGISTER, |  | ||||||
|                        is_register_token); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool is_section_token(lexer_token_t *token) { |  | ||||||
|     return strcmp(token->value, "section") == 0; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_section(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_IDENTIFIER, NODE_SECTION, |  | ||||||
|                        is_section_token); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool is_import_token(lexer_token_t *token) { |  | ||||||
|     return strcmp(token->value, "import") == 0; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_import(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_IDENTIFIER, NODE_IMPORT, is_import_token); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool is_export_token(lexer_token_t *token) { |  | ||||||
|     return strcmp(token->value, "export") == 0; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_export(tokenlist_entry_t *current) { |  | ||||||
|     return parse_token(current, TOKEN_IDENTIFIER, NODE_EXPORT, is_export_token); |  | ||||||
| } |  | ||||||
| @@ -1,33 +0,0 @@ | |||||||
| #ifndef INCLUDE_PARSER_PRIMITIVES_H_ |  | ||||||
| #define INCLUDE_PARSER_PRIMITIVES_H_ |  | ||||||
|  |  | ||||||
| #include "util.h" |  | ||||||
|  |  | ||||||
| parse_result_t parse_identifier(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_decimal(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_hexadecimal(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_binary(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_octal(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_string(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_char(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_colon(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_comma(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_lbracket(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_rbracket(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_plus(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_minus(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_asterisk(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_dot(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_newline(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_label_reference(tokenlist_entry_t *current); |  | ||||||
|  |  | ||||||
| /* These are "primitives" with a different name and some extra validation on top |  | ||||||
|  * for example, register is just an identifier but it only matches a limited set |  | ||||||
|  * of values |  | ||||||
|  */ |  | ||||||
| parse_result_t parse_register(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_section(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_import(tokenlist_entry_t *current); |  | ||||||
| parse_result_t parse_export(tokenlist_entry_t *current); |  | ||||||
|  |  | ||||||
| #endif // INCLUDE_PARSER_PRIMITIVES_H_ |  | ||||||
| @@ -1,56 +0,0 @@ | |||||||
| #include "util.h" |  | ||||||
| #include "../tokenlist.h" |  | ||||||
|  |  | ||||||
| error_t *const err_parse_no_match = |  | ||||||
|     &(error_t){.message = "parsing failed to find the correct token sequence"}; |  | ||||||
|  |  | ||||||
| parse_result_t parse_error(error_t *err) { |  | ||||||
|     return (parse_result_t){.err = err}; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_no_match() { |  | ||||||
|     return parse_error(err_parse_no_match); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next) { |  | ||||||
|     next = tokenlist_skip_trivia(next); |  | ||||||
|     return (parse_result_t){.node = ast, .next = next}; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_token(tokenlist_entry_t *current, |  | ||||||
|                            lexer_token_id_t token_id, node_id_t ast_id, |  | ||||||
|                            token_validator_t is_valid) { |  | ||||||
|     if (current->token.id != token_id || |  | ||||||
|         (is_valid && !is_valid(¤t->token))) |  | ||||||
|         return parse_no_match(); |  | ||||||
|  |  | ||||||
|     ast_node_t *node; |  | ||||||
|     error_t *err = ast_node_alloc(&node); |  | ||||||
|     if (err) |  | ||||||
|         return parse_error(err); |  | ||||||
|     node->id = ast_id; |  | ||||||
|     node->token_entry = current; |  | ||||||
|  |  | ||||||
|     return parse_success(node, current->next); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| parse_result_t parse_result_wrap(node_id_t id, parse_result_t result) { |  | ||||||
|     if (result.err) |  | ||||||
|         return result; |  | ||||||
|  |  | ||||||
|     ast_node_t *node; |  | ||||||
|     error_t *err = ast_node_alloc(&node); |  | ||||||
|     if (err) { |  | ||||||
|         ast_node_free(result.node); |  | ||||||
|         return parse_error(err); |  | ||||||
|     } |  | ||||||
|     node->id = id; |  | ||||||
|  |  | ||||||
|     err = ast_node_add_child(node, result.node); |  | ||||||
|     if (err) { |  | ||||||
|         ast_node_free(result.node); |  | ||||||
|         return parse_error(err); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return parse_success(node, result.next); |  | ||||||
| } |  | ||||||
| @@ -1,26 +0,0 @@ | |||||||
| #ifndef INCLUDE_PARSER_UTIL_H_ |  | ||||||
| #define INCLUDE_PARSER_UTIL_H_ |  | ||||||
|  |  | ||||||
| #include "../ast.h" |  | ||||||
| #include "../error.h" |  | ||||||
| #include "../tokenlist.h" |  | ||||||
|  |  | ||||||
| typedef struct parse_result { |  | ||||||
|     error_t *err; |  | ||||||
|     tokenlist_entry_t *next; |  | ||||||
|     ast_node_t *node; |  | ||||||
| } parse_result_t; |  | ||||||
|  |  | ||||||
| typedef bool (*token_validator_t)(lexer_token_t *); |  | ||||||
|  |  | ||||||
| parse_result_t parse_error(error_t *err); |  | ||||||
| parse_result_t parse_no_match(); |  | ||||||
| parse_result_t parse_success(ast_node_t *ast, tokenlist_entry_t *next); |  | ||||||
| parse_result_t parse_token(tokenlist_entry_t *current, |  | ||||||
|                            lexer_token_id_t token_id, node_id_t ast_id, |  | ||||||
|                            token_validator_t is_valid); |  | ||||||
| parse_result_t parse_result_wrap(node_id_t id, parse_result_t result); |  | ||||||
|  |  | ||||||
| extern error_t *const err_parse_no_match; |  | ||||||
|  |  | ||||||
| #endif // INCLUDE_PARSER_UTIL_H_ |  | ||||||
| @@ -81,25 +81,3 @@ error_t *tokenlist_fill(tokenlist_t *list, lexer_t *lex) { | |||||||
|         return err; |         return err; | ||||||
|     return nullptr; |     return nullptr; | ||||||
| } | } | ||||||
|  |  | ||||||
| bool is_trivia(tokenlist_entry_t *trivia) { |  | ||||||
|     switch (trivia->token.id) { |  | ||||||
|     case TOKEN_WHITESPACE: |  | ||||||
|     case TOKEN_COMMENT: |  | ||||||
|         return true; |  | ||||||
|     default: |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| tokenlist_entry_t *tokenlist_skip_trivia(tokenlist_entry_t *current) { |  | ||||||
|     while (current && is_trivia(current)) |  | ||||||
|         current = current->next; |  | ||||||
|     return current; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| tokenlist_entry_t *tokenlist_next(tokenlist_entry_t *current) { |  | ||||||
|     if (!current) |  | ||||||
|         return nullptr; |  | ||||||
|     return tokenlist_skip_trivia(current->next); |  | ||||||
| } |  | ||||||
|   | |||||||
| @@ -27,14 +27,4 @@ error_t *tokenlist_fill(tokenlist_t *list, lexer_t *lex); | |||||||
|  |  | ||||||
| void tokenlist_free(tokenlist_t *list); | void tokenlist_free(tokenlist_t *list); | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Return the first token entry that isn't whitespace, newline or comment |  | ||||||
|  */ |  | ||||||
| tokenlist_entry_t *tokenlist_skip_trivia(tokenlist_entry_t *current); |  | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * Return the next token entry that isn't whitespace, newline or comment |  | ||||||
|  */ |  | ||||||
| tokenlist_entry_t *tokenlist_next(tokenlist_entry_t *current); |  | ||||||
|  |  | ||||||
| #endif // INCLUDE_SRC_TOKENLIST_H_ | #endif // INCLUDE_SRC_TOKENLIST_H_ | ||||||
|   | |||||||
| @@ -1,6 +0,0 @@ | |||||||
| BasedOnStyle:    LLVM |  | ||||||
| IndentWidth:     4 |  | ||||||
| Cpp11BracedListStyle: true |  | ||||||
| AlignArrayOfStructures: Left |  | ||||||
| AllowShortFunctionsOnASingleLine: Empty |  | ||||||
| ColumnLimit: 120 |  | ||||||
							
								
								
									
										22
									
								
								tests/ast.c
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								tests/ast.c
									
									
									
									
									
								
							| @@ -1,22 +0,0 @@ | |||||||
| #include "../src/ast.h" |  | ||||||
| #include "munit.h" |  | ||||||
|  |  | ||||||
| MunitResult test_ast_node_alloc(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     ast_node_t *node = nullptr; |  | ||||||
|     error_t *err = ast_node_alloc(&node); |  | ||||||
|  |  | ||||||
|     munit_assert_ptr_not_null(node); |  | ||||||
|     munit_assert_ptr_null(err); |  | ||||||
|  |  | ||||||
|     ast_node_free(node); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitTest ast_tests[] = { |  | ||||||
|     {"/node_alloc", test_ast_node_alloc, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {nullptr,       nullptr,             nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr} |  | ||||||
| }; |  | ||||||
							
								
								
									
										164
									
								
								tests/bytes.c
									
									
									
									
									
								
							
							
						
						
									
										164
									
								
								tests/bytes.c
									
									
									
									
									
								
							| @@ -1,164 +0,0 @@ | |||||||
| #include "../src/bytes.h" |  | ||||||
| #include "munit.h" |  | ||||||
|  |  | ||||||
| MunitResult test_bytes_initializer(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     bytes_t *bytes = LOCAL_BYTES(16); |  | ||||||
|     munit_assert_size(bytes->len, ==, 0); |  | ||||||
|     munit_assert_size(bytes->cap, ==, 16); |  | ||||||
|     for (size_t i = 0; i < 16; ++i) |  | ||||||
|         munit_assert_uint8(bytes->buffer[i], ==, 0); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_bytes_append_uint8(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     bytes_t *bytes = LOCAL_BYTES(16); |  | ||||||
|     munit_assert_size(bytes->len, ==, 0); |  | ||||||
|     munit_assert_size(bytes->cap, ==, 16); |  | ||||||
|     for (size_t i = 0; i < 16; ++i) { |  | ||||||
|         error_t *err = bytes_append_uint8(bytes, (uint8_t)i); |  | ||||||
|         munit_assert_null(err); |  | ||||||
|         munit_assert_uint8(bytes->buffer[i], ==, (uint8_t)i); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     error_t *err = bytes_append_uint8(bytes, 0xFF); |  | ||||||
|     munit_assert_ptr(err, ==, err_bytes_no_capacity); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_bytes_append_array(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     bytes_t *bytes = LOCAL_BYTES(16); |  | ||||||
|     munit_assert_size(bytes->len, ==, 0); |  | ||||||
|     munit_assert_size(bytes->cap, ==, 16); |  | ||||||
|  |  | ||||||
|     uint8_t test_array[] = {0x01, 0x02, 0x03, 0x04, 0x05}; |  | ||||||
|     size_t array_len = sizeof(test_array) / sizeof(test_array[0]); |  | ||||||
|     error_t *err = bytes_append_array(bytes, array_len, test_array); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|     munit_assert_size(bytes->len, ==, array_len); |  | ||||||
|  |  | ||||||
|     for (size_t i = 0; i < array_len; ++i) { |  | ||||||
|         munit_assert_uint8(bytes->buffer[i], ==, test_array[i]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     uint8_t second_array[] = {0x06, 0x07, 0x08}; |  | ||||||
|     size_t second_len = sizeof(second_array) / sizeof(second_array[0]); |  | ||||||
|     err = bytes_append_array(bytes, second_len, second_array); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|     munit_assert_size(bytes->len, ==, array_len + second_len); |  | ||||||
|     for (size_t i = 0; i < second_len; ++i) { |  | ||||||
|         munit_assert_uint8(bytes->buffer[array_len + i], ==, second_array[i]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     uint8_t overflow_array[10] = {0}; // Array that would exceed capacity |  | ||||||
|     err = bytes_append_array(bytes, sizeof(overflow_array), overflow_array); |  | ||||||
|     munit_assert_ptr(err, ==, err_bytes_no_capacity); |  | ||||||
|     munit_assert_size(bytes->len, ==, array_len + second_len); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_bytes_append_bytes(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     bytes_t *src = LOCAL_BYTES(8); |  | ||||||
|     bytes_t *dst = LOCAL_BYTES(16); |  | ||||||
|  |  | ||||||
|     // Fill source bytes with test data |  | ||||||
|     for (uint8_t i = 0; i < 5; ++i) { |  | ||||||
|         error_t *err = bytes_append_uint8(src, i + 1); |  | ||||||
|         munit_assert_null(err); |  | ||||||
|     } |  | ||||||
|     munit_assert_size(src->len, ==, 5); |  | ||||||
|  |  | ||||||
|     // Append source to destination |  | ||||||
|     error_t *err = bytes_append_bytes(dst, src); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|     munit_assert_size(dst->len, ==, src->len); |  | ||||||
|  |  | ||||||
|     // Verify destination contents match source |  | ||||||
|     for (size_t i = 0; i < src->len; ++i) { |  | ||||||
|         munit_assert_uint8(dst->buffer[i], ==, src->buffer[i]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Fill source with more data and append again |  | ||||||
|     for (uint8_t i = 0; i < 3; ++i) { |  | ||||||
|         err = bytes_append_uint8(src, i + 6); |  | ||||||
|         munit_assert_null(err); |  | ||||||
|     } |  | ||||||
|     munit_assert_size(src->len, ==, 8); |  | ||||||
|  |  | ||||||
|     // Append updated source |  | ||||||
|     err = bytes_append_bytes(dst, src); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|     munit_assert_size(dst->len, ==, 13); // 5 + 8 |  | ||||||
|  |  | ||||||
|     // Test capacity boundary |  | ||||||
|     src->len = 4; // manually set length to barely not fit |  | ||||||
|     err = bytes_append_bytes(dst, src); |  | ||||||
|     munit_assert_ptr(err, ==, err_bytes_no_capacity); |  | ||||||
|     munit_assert_size(dst->len, ==, 13); // Length unchanged after error |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
| MunitResult test_bytes_append_uint16(const MunitParameter params[], void *data) { |  | ||||||
|     bytes_t *bytes = LOCAL_BYTES(16); |  | ||||||
|     munit_assert_size(bytes->len, ==, 0); |  | ||||||
|     munit_assert_size(bytes->cap, ==, 16); |  | ||||||
|  |  | ||||||
|     bytes_append_uint16(bytes, 0xFFAA); |  | ||||||
|     munit_assert_size(bytes->len, ==, 2); |  | ||||||
|     munit_assert_uint8(bytes->buffer[0], ==, 0xAA); |  | ||||||
|     munit_assert_uint8(bytes->buffer[1], ==, 0xFF); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
| MunitResult test_bytes_append_uint32(const MunitParameter params[], void *data) { |  | ||||||
|     bytes_t *bytes = LOCAL_BYTES(16); |  | ||||||
|     munit_assert_size(bytes->len, ==, 0); |  | ||||||
|     munit_assert_size(bytes->cap, ==, 16); |  | ||||||
|  |  | ||||||
|     bytes_append_uint32(bytes, 0xAABBCCDD); |  | ||||||
|     munit_assert_size(bytes->len, ==, 4); |  | ||||||
|     munit_assert_uint8(bytes->buffer[0], ==, 0xDD); |  | ||||||
|     munit_assert_uint8(bytes->buffer[1], ==, 0xCC); |  | ||||||
|     munit_assert_uint8(bytes->buffer[2], ==, 0xBB); |  | ||||||
|     munit_assert_uint8(bytes->buffer[3], ==, 0xAA); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
| MunitResult test_bytes_append_uint64(const MunitParameter params[], void *data) { |  | ||||||
|     bytes_t *bytes = LOCAL_BYTES(16); |  | ||||||
|     munit_assert_size(bytes->len, ==, 0); |  | ||||||
|     munit_assert_size(bytes->cap, ==, 16); |  | ||||||
|  |  | ||||||
|     bytes_append_uint64(bytes, 0xAABBCCDDEEFF9988); |  | ||||||
|     munit_assert_size(bytes->len, ==, 8); |  | ||||||
|     munit_assert_uint8(bytes->buffer[0], ==, 0x88); |  | ||||||
|     munit_assert_uint8(bytes->buffer[1], ==, 0x99); |  | ||||||
|     munit_assert_uint8(bytes->buffer[2], ==, 0xFF); |  | ||||||
|     munit_assert_uint8(bytes->buffer[3], ==, 0xEE); |  | ||||||
|     munit_assert_uint8(bytes->buffer[4], ==, 0xDD); |  | ||||||
|     munit_assert_uint8(bytes->buffer[5], ==, 0xCC); |  | ||||||
|     munit_assert_uint8(bytes->buffer[6], ==, 0xBB); |  | ||||||
|     munit_assert_uint8(bytes->buffer[7], ==, 0xAA); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitTest bytes_tests[] = { |  | ||||||
|     {"/initializer",   test_bytes_initializer,   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/append_uint8",  test_bytes_append_uint8,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/append_array",  test_bytes_append_array,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/append_bytes",  test_bytes_append_bytes,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/append_uint16", test_bytes_append_uint16, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/append_uint32", test_bytes_append_uint32, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/append_uint64", test_bytes_append_uint64, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {nullptr,          nullptr,                  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr} |  | ||||||
| }; |  | ||||||
| @@ -1,65 +0,0 @@ | |||||||
| lbl_0:  ; 65 symbols used for testing growing the symbols table |  | ||||||
| lbl_1: |  | ||||||
| lbl_2: |  | ||||||
| lbl_3: |  | ||||||
| lbl_4: |  | ||||||
| lbl_5: |  | ||||||
| lbl_6: |  | ||||||
| lbl_7: |  | ||||||
| lbl_8: |  | ||||||
| lbl_9: |  | ||||||
| lbl_10: |  | ||||||
| lbl_11: |  | ||||||
| lbl_12: |  | ||||||
| lbl_13: |  | ||||||
| lbl_14: |  | ||||||
| lbl_15: |  | ||||||
| lbl_16: |  | ||||||
| lbl_17: |  | ||||||
| lbl_18: |  | ||||||
| lbl_19: |  | ||||||
| lbl_20: |  | ||||||
| lbl_21: |  | ||||||
| lbl_22: |  | ||||||
| lbl_23: |  | ||||||
| lbl_24: |  | ||||||
| lbl_25: |  | ||||||
| lbl_26: |  | ||||||
| lbl_27: |  | ||||||
| lbl_28: |  | ||||||
| lbl_29: |  | ||||||
| lbl_30: |  | ||||||
| lbl_31: |  | ||||||
| lbl_32: |  | ||||||
| lbl_33: |  | ||||||
| lbl_34: |  | ||||||
| lbl_35: |  | ||||||
| lbl_36: |  | ||||||
| lbl_37: |  | ||||||
| lbl_38: |  | ||||||
| lbl_39: |  | ||||||
| lbl_40: |  | ||||||
| lbl_41: |  | ||||||
| lbl_42: |  | ||||||
| lbl_43: |  | ||||||
| lbl_44: |  | ||||||
| lbl_45: |  | ||||||
| lbl_46: |  | ||||||
| lbl_47: |  | ||||||
| lbl_48: |  | ||||||
| lbl_49: |  | ||||||
| lbl_50: |  | ||||||
| lbl_51: |  | ||||||
| lbl_52: |  | ||||||
| lbl_53: |  | ||||||
| lbl_54: |  | ||||||
| lbl_55: |  | ||||||
| lbl_56: |  | ||||||
| lbl_57: |  | ||||||
| lbl_58: |  | ||||||
| lbl_59: |  | ||||||
| lbl_60: |  | ||||||
| lbl_61: |  | ||||||
| lbl_62: |  | ||||||
| lbl_63: |  | ||||||
| lbl_64: |  | ||||||
| @@ -1,5 +0,0 @@ | |||||||
| ; regression test for two issues: |  | ||||||
| ;  - parsing two zero operand instructions in a row |  | ||||||
| ;  - a zero operand instruction just before eof |  | ||||||
|     syscall |  | ||||||
|     ret |  | ||||||
| @@ -1,5 +0,0 @@ | |||||||
| ; sample program with trivia on the head of the tokenlist |  | ||||||
|  |  | ||||||
| _start: |  | ||||||
|     xor rax, rax |  | ||||||
|     call exit |  | ||||||
| @@ -1,12 +0,0 @@ | |||||||
| .import test |  | ||||||
| .export test |  | ||||||
| test: |  | ||||||
|     call test |  | ||||||
| .import more |  | ||||||
| .export more |  | ||||||
| more: |  | ||||||
|     call more |  | ||||||
| .import other |  | ||||||
| .export other |  | ||||||
| other: |  | ||||||
|     call other |  | ||||||
| @@ -1,20 +1,5 @@ | |||||||
| .section text |  | ||||||
|  |  | ||||||
| ; Small valid code snippet that should contain all different AST nodes |  | ||||||
|  |  | ||||||
| .export _start |  | ||||||
| .import exit |  | ||||||
|  |  | ||||||
| _start: | _start: | ||||||
|     mov eax, ebx |     mov eax, 555            ; move 555 into eax | ||||||
|     lea eax, [eax + ebx * 4 + 8] |  | ||||||
|     lea eax, [eax + 8] |  | ||||||
|     lea eax, [eax + ebx * 8] |  | ||||||
|     lea eax, [esp - 24] |  | ||||||
|     lea eax, [eax + ebx * 4 - 8] |  | ||||||
|     lea eax, [_start] |  | ||||||
|     mov eax, _start |  | ||||||
|     mov eax, 555 |  | ||||||
|     push 0o777 |     push 0o777 | ||||||
|     xor eax, 0xDEADBEEF |     xor eax, 0xDEADBEEF | ||||||
|     and ecx, 0o770 |     and ecx, 0o770 | ||||||
| @@ -22,5 +7,3 @@ _start: | |||||||
|     push 0xffff:64 |     push 0xffff:64 | ||||||
|     push 0o777:16 |     push 0o777:16 | ||||||
|     push 0b0001:16 |     push 0b0001:16 | ||||||
|     mov rax, 0 |  | ||||||
|     call exit |  | ||||||
|   | |||||||
							
								
								
									
										896
									
								
								tests/lexer.c
									
									
									
									
									
								
							
							
						
						
									
										896
									
								
								tests/lexer.c
									
									
									
									
									
								
							| @@ -1,896 +0,0 @@ | |||||||
| #include "../src/lexer.h" |  | ||||||
| #include "../src/error.h" |  | ||||||
| #include "munit.h" |  | ||||||
| #include <string.h> |  | ||||||
|  |  | ||||||
| void lexer_setup_memory_test(lexer_t *lex, const char *input) { |  | ||||||
|     munit_assert_null(lex->fp); |  | ||||||
|     FILE *stream = fmemopen((void *)input, strlen(input), "rb"); |  | ||||||
|     munit_assert_not_null(stream); |  | ||||||
|     lex->fp = stream; |  | ||||||
|     lex->line_number = 0; |  | ||||||
|     lex->character_number = 0; |  | ||||||
|     lex->buffer_count = 0; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void lexer_expect_one_token(lexer_t *lex, lexer_token_id_t id, const char *value, size_t line, size_t column) { |  | ||||||
|     lexer_token_t token = {}; |  | ||||||
|  |  | ||||||
|     error_t *err = lexer_next(lex, &token); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|  |  | ||||||
|     munit_assert_int(token.id, ==, id); |  | ||||||
|     munit_assert_string_equal(token.value, value); |  | ||||||
|     munit_assert_int(token.line_number, ==, line); |  | ||||||
|     munit_assert_int(token.character_number, ==, column); |  | ||||||
|     lexer_token_cleanup(&token); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void lexer_expect_eof(lexer_t *lex) { |  | ||||||
|     lexer_token_t token = {}; |  | ||||||
|     error_t *err = lexer_next(lex, &token); |  | ||||||
|     munit_assert_ptr_equal(err, err_eof); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void lexer_test_one_token(lexer_token_id_t id, const char *value) { |  | ||||||
|     lexer_t lex = {}; |  | ||||||
|     lexer_setup_memory_test(&lex, value); |  | ||||||
|     lexer_expect_one_token(&lex, id, value, 0, 0); |  | ||||||
|     lexer_expect_eof(&lex); |  | ||||||
|     lexer_close(&lex); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_identifier(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_IDENTIFIER, "identifier"); |  | ||||||
|     lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier"); |  | ||||||
|     lexer_test_one_token(TOKEN_IDENTIFIER, "_identifier123_55"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| typedef struct token_data { |  | ||||||
|     lexer_token_id_t id; |  | ||||||
|     char *value; |  | ||||||
|     size_t line; |  | ||||||
|     size_t column; |  | ||||||
| } token_data_t; |  | ||||||
|  |  | ||||||
| typedef struct boundary { |  | ||||||
|     const char *input; |  | ||||||
|     token_data_t first; |  | ||||||
|     token_data_t second; |  | ||||||
| } boundary_t; |  | ||||||
|  |  | ||||||
| void test_lexer_boundary(boundary_t boundaries[]) { |  | ||||||
|     for (size_t i = 0; boundaries[i].input; ++i) { |  | ||||||
|         auto boundary = boundaries[i]; |  | ||||||
|         auto first = boundary.first; |  | ||||||
|         auto second = boundary.second; |  | ||||||
|  |  | ||||||
|         lexer_t lex = {}; |  | ||||||
|         lexer_setup_memory_test(&lex, boundary.input); |  | ||||||
|         lexer_expect_one_token(&lex, first.id, first.value, first.line, first.column); |  | ||||||
|         lexer_expect_one_token(&lex, second.id, second.value, second.line, second.column); |  | ||||||
|         lexer_expect_eof(&lex); |  | ||||||
|         lexer_close(&lex); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_identifier_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"id:",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COLON, ":", 0, 2}         }, |  | ||||||
|         {"id[",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_LBRACKET, "[", 0, 2}      }, |  | ||||||
|         {"id]",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_RBRACKET, "]", 0, 2}      }, |  | ||||||
|         {"id+",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_PLUS, "+", 0, 2}          }, |  | ||||||
|         {"id-",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_MINUS, "-", 0, 2}         }, |  | ||||||
|         {"id*",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_ASTERISK, "*", 0, 2}      }, |  | ||||||
|         {"id.",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_DOT, ".", 0, 2}           }, |  | ||||||
|         {"id;comment", {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_COMMENT, ";comment", 0, 2}}, |  | ||||||
|         {"id\n",       {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 2}      }, |  | ||||||
|         {"id\r\n",     {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 2}    }, |  | ||||||
|         {"id ",        {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 2}    }, |  | ||||||
|         {"id\t",       {TOKEN_IDENTIFIER, "id", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 2}   }, |  | ||||||
|         {nullptr,      {},                             {}                               }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_decimal(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_DECIMAL, "123"); |  | ||||||
|     lexer_test_one_token(TOKEN_DECIMAL, "0"); |  | ||||||
|     lexer_test_one_token(TOKEN_DECIMAL, "42"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_decimal_with_suffix(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_DECIMAL, "123:8"); |  | ||||||
|     lexer_test_one_token(TOKEN_DECIMAL, "0:16"); |  | ||||||
|     lexer_test_one_token(TOKEN_DECIMAL, "42:32"); |  | ||||||
|     lexer_test_one_token(TOKEN_DECIMAL, "69:64"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_hexadecimal(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123"); |  | ||||||
|     lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD"); |  | ||||||
|     lexer_test_one_token(TOKEN_HEXADECIMAL, "0x0"); |  | ||||||
|     lexer_test_one_token(TOKEN_HEXADECIMAL, "0xabcdef"); |  | ||||||
|     lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABCDEF"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_hexadecimal_with_suffix(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_HEXADECIMAL, "0x123:8"); |  | ||||||
|     lexer_test_one_token(TOKEN_HEXADECIMAL, "0xDEAD:16"); |  | ||||||
|     lexer_test_one_token(TOKEN_HEXADECIMAL, "0xABC:32"); |  | ||||||
|     lexer_test_one_token(TOKEN_HEXADECIMAL, "0xffff:64"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_octal(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_OCTAL, "0o777"); |  | ||||||
|     lexer_test_one_token(TOKEN_OCTAL, "0o0"); |  | ||||||
|     lexer_test_one_token(TOKEN_OCTAL, "0o123"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_octal_with_suffix(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_OCTAL, "0o777:8"); |  | ||||||
|     lexer_test_one_token(TOKEN_OCTAL, "0o123:16"); |  | ||||||
|     lexer_test_one_token(TOKEN_OCTAL, "0o777:32"); |  | ||||||
|     lexer_test_one_token(TOKEN_OCTAL, "0o123:64"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_binary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_BINARY, "0b101"); |  | ||||||
|     lexer_test_one_token(TOKEN_BINARY, "0b0"); |  | ||||||
|     lexer_test_one_token(TOKEN_BINARY, "0b1"); |  | ||||||
|     lexer_test_one_token(TOKEN_BINARY, "0b01010101"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_binary_with_suffix(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_BINARY, "0b101:8"); |  | ||||||
|     lexer_test_one_token(TOKEN_BINARY, "0b0:16"); |  | ||||||
|     lexer_test_one_token(TOKEN_BINARY, "0b1:32"); |  | ||||||
|     lexer_test_one_token(TOKEN_BINARY, "0b01010101:64"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_colon(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_COLON, ":"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_comma(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_COMMA, ","); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_lbracket(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_LBRACKET, "["); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_rbracket(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_RBRACKET, "]"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_plus(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_PLUS, "+"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_minus(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_MINUS, "-"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_asterisk(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_ASTERISK, "*"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_dot(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_DOT, "."); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_comment(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_COMMENT, ";This is a comment"); |  | ||||||
|     lexer_test_one_token(TOKEN_COMMENT, "; Another comment"); |  | ||||||
|     lexer_test_one_token(TOKEN_COMMENT, ";"); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_whitespace(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     lexer_test_one_token(TOKEN_WHITESPACE, " "); |  | ||||||
|     lexer_test_one_token(TOKEN_WHITESPACE, "  "); |  | ||||||
|     lexer_test_one_token(TOKEN_WHITESPACE, "\t"); |  | ||||||
|     lexer_test_one_token(TOKEN_WHITESPACE, " \t "); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_newlines(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     // Test simple newline |  | ||||||
|     lexer_t lex = {}; |  | ||||||
|     lexer_setup_memory_test(&lex, "\n"); |  | ||||||
|     lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 0); |  | ||||||
|     lexer_expect_eof(&lex); |  | ||||||
|     lexer_close(&lex); |  | ||||||
|  |  | ||||||
|     // Test Windows-style newline |  | ||||||
|     lexer_t lex2 = {}; |  | ||||||
|     lexer_setup_memory_test(&lex2, "\r\n"); |  | ||||||
|     lexer_expect_one_token(&lex2, TOKEN_NEWLINE, "\r\n", 0, 0); |  | ||||||
|     lexer_expect_eof(&lex2); |  | ||||||
|     lexer_close(&lex2); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_line_numbers(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     lexer_t lex = {}; |  | ||||||
|     lexer_setup_memory_test(&lex, "a\nb\nc"); |  | ||||||
|  |  | ||||||
|     lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "a", 0, 0); |  | ||||||
|     lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 0, 1); |  | ||||||
|     lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "b", 1, 0); |  | ||||||
|     lexer_expect_one_token(&lex, TOKEN_NEWLINE, "\n", 1, 1); |  | ||||||
|     lexer_expect_one_token(&lex, TOKEN_IDENTIFIER, "c", 2, 0); |  | ||||||
|     lexer_expect_eof(&lex); |  | ||||||
|     lexer_close(&lex); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_decimal_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"123,",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMA, ",", 0, 3}      }, |  | ||||||
|         {"123:",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COLON, ":", 0, 3}      }, |  | ||||||
|         {"123[",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 3}   }, |  | ||||||
|         {"123]",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 3}   }, |  | ||||||
|         {"123+",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_PLUS, "+", 0, 3}       }, |  | ||||||
|         {"123-",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_MINUS, "-", 0, 3}      }, |  | ||||||
|         {"123*",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 3}   }, |  | ||||||
|         {"123.",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_DOT, ".", 0, 3}        }, |  | ||||||
|         {"123;",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_COMMENT, ";", 0, 3}    }, |  | ||||||
|         {"123\n",   {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 3}   }, |  | ||||||
|         {"123\r\n", {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 3} }, |  | ||||||
|         {"123 ",    {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 3} }, |  | ||||||
|         {"123\t",   {TOKEN_DECIMAL, "123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 3}}, |  | ||||||
|         {nullptr,   {},                           {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_hexadecimal_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"0x123,",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      }, |  | ||||||
|         {"0x123:",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COLON, ":", 0, 5}      }, |  | ||||||
|         {"0x123[",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   }, |  | ||||||
|         {"0x123]",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   }, |  | ||||||
|         {"0x123+",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       }, |  | ||||||
|         {"0x123-",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      }, |  | ||||||
|         {"0x123*",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   }, |  | ||||||
|         {"0x123.",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_DOT, ".", 0, 5}        }, |  | ||||||
|         {"0x123;",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    }, |  | ||||||
|         {"0x123\n",   {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   }, |  | ||||||
|         {"0x123\r\n", {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} }, |  | ||||||
|         {"0x123 ",    {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} }, |  | ||||||
|         {"0x123\t",   {TOKEN_HEXADECIMAL, "0x123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}}, |  | ||||||
|         {nullptr,     {},                                 {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_octal_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"0o123,",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      }, |  | ||||||
|         {"0o123:",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COLON, ":", 0, 5}      }, |  | ||||||
|         {"0o123[",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   }, |  | ||||||
|         {"0o123]",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   }, |  | ||||||
|         {"0o123+",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       }, |  | ||||||
|         {"0o123-",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      }, |  | ||||||
|         {"0o123*",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   }, |  | ||||||
|         {"0o123.",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_DOT, ".", 0, 5}        }, |  | ||||||
|         {"0o123;",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    }, |  | ||||||
|         {"0o123\n",   {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   }, |  | ||||||
|         {"0o123\r\n", {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} }, |  | ||||||
|         {"0o123 ",    {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} }, |  | ||||||
|         {"0o123\t",   {TOKEN_OCTAL, "0o123", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}}, |  | ||||||
|         {nullptr,     {},                           {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_binary_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"0b101,",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMA, ",", 0, 5}      }, |  | ||||||
|         {"0b101:",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COLON, ":", 0, 5}      }, |  | ||||||
|         {"0b101[",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_LBRACKET, "[", 0, 5}   }, |  | ||||||
|         {"0b101]",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_RBRACKET, "]", 0, 5}   }, |  | ||||||
|         {"0b101+",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_PLUS, "+", 0, 5}       }, |  | ||||||
|         {"0b101-",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_MINUS, "-", 0, 5}      }, |  | ||||||
|         {"0b101*",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_ASTERISK, "*", 0, 5}   }, |  | ||||||
|         {"0b101.",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_DOT, ".", 0, 5}        }, |  | ||||||
|         {"0b101;",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_COMMENT, ";", 0, 5}    }, |  | ||||||
|         {"0b101\n",   {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 5}   }, |  | ||||||
|         {"0b101\r\n", {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 5} }, |  | ||||||
|         {"0b101 ",    {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 5} }, |  | ||||||
|         {"0b101\t",   {TOKEN_BINARY, "0b101", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 5}}, |  | ||||||
|         {nullptr,     {},                            {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_colon_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {":,",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      }, |  | ||||||
|         {"::",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COLON, ":", 0, 1}      }, |  | ||||||
|         {":[",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   }, |  | ||||||
|         {":]",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   }, |  | ||||||
|         {":+",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       }, |  | ||||||
|         {":-",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      }, |  | ||||||
|         {":*",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   }, |  | ||||||
|         {":.",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_DOT, ".", 0, 1}        }, |  | ||||||
|         {":;",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    }, |  | ||||||
|         {":\n",   {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   }, |  | ||||||
|         {":\r\n", {TOKEN_COLON, ":", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} }, |  | ||||||
|         {": ",    {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} }, |  | ||||||
|         {":\t",   {TOKEN_COLON, ":", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}}, |  | ||||||
|         {nullptr, {},                       {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_comma_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {",,",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      }, |  | ||||||
|         {",:",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COLON, ":", 0, 1}      }, |  | ||||||
|         {",[",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   }, |  | ||||||
|         {",]",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   }, |  | ||||||
|         {",+",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       }, |  | ||||||
|         {",-",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      }, |  | ||||||
|         {",*",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   }, |  | ||||||
|         {",.",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_DOT, ".", 0, 1}        }, |  | ||||||
|         {",;",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    }, |  | ||||||
|         {",\n",   {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   }, |  | ||||||
|         {",\r\n", {TOKEN_COMMA, ",", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} }, |  | ||||||
|         {", ",    {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} }, |  | ||||||
|         {",\t",   {TOKEN_COMMA, ",", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}}, |  | ||||||
|         {nullptr, {},                       {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_lbracket_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"[,",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      }, |  | ||||||
|         {"[:",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COLON, ":", 0, 1}      }, |  | ||||||
|         {"[[",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   }, |  | ||||||
|         {"[]",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   }, |  | ||||||
|         {"[+",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       }, |  | ||||||
|         {"[-",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      }, |  | ||||||
|         {"[*",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   }, |  | ||||||
|         {"[.",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_DOT, ".", 0, 1}        }, |  | ||||||
|         {"[;",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    }, |  | ||||||
|         {"[\n",   {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   }, |  | ||||||
|         {"[\r\n", {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} }, |  | ||||||
|         {"[ ",    {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} }, |  | ||||||
|         {"[\t",   {TOKEN_LBRACKET, "[", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}}, |  | ||||||
|         {nullptr, {},                          {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_rbracket_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"],",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      }, |  | ||||||
|         {"]:",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COLON, ":", 0, 1}      }, |  | ||||||
|         {"][",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   }, |  | ||||||
|         {"]]",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   }, |  | ||||||
|         {"]+",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       }, |  | ||||||
|         {"]-",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      }, |  | ||||||
|         {"]*",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   }, |  | ||||||
|         {"].",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_DOT, ".", 0, 1}        }, |  | ||||||
|         {"];",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    }, |  | ||||||
|         {"]\n",   {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   }, |  | ||||||
|         {"]\r\n", {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} }, |  | ||||||
|         {"] ",    {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} }, |  | ||||||
|         {"]\t",   {TOKEN_RBRACKET, "]", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}}, |  | ||||||
|         {nullptr, {},                          {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_plus_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"+,",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      }, |  | ||||||
|         {"+:",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COLON, ":", 0, 1}      }, |  | ||||||
|         {"+[",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   }, |  | ||||||
|         {"+]",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   }, |  | ||||||
|         {"++",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       }, |  | ||||||
|         {"+-",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      }, |  | ||||||
|         {"+*",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   }, |  | ||||||
|         {"+.",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_DOT, ".", 0, 1}        }, |  | ||||||
|         {"+;",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    }, |  | ||||||
|         {"+\n",   {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   }, |  | ||||||
|         {"+\r\n", {TOKEN_PLUS, "+", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} }, |  | ||||||
|         {"+ ",    {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} }, |  | ||||||
|         {"+\t",   {TOKEN_PLUS, "+", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}}, |  | ||||||
|         {nullptr, {},                      {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_minus_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"-,",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      }, |  | ||||||
|         {"-:",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COLON, ":", 0, 1}      }, |  | ||||||
|         {"-[",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   }, |  | ||||||
|         {"-]",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   }, |  | ||||||
|         {"-+",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       }, |  | ||||||
|         {"--",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      }, |  | ||||||
|         {"-*",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   }, |  | ||||||
|         {"-.",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_DOT, ".", 0, 1}        }, |  | ||||||
|         {"-;",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    }, |  | ||||||
|         {"-\n",   {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   }, |  | ||||||
|         {"-\r\n", {TOKEN_MINUS, "-", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} }, |  | ||||||
|         {"- ",    {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} }, |  | ||||||
|         {"-\t",   {TOKEN_MINUS, "-", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}}, |  | ||||||
|         {nullptr, {},                       {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_asterisk_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"*,",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      }, |  | ||||||
|         {"*:",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COLON, ":", 0, 1}      }, |  | ||||||
|         {"*[",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   }, |  | ||||||
|         {"*]",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   }, |  | ||||||
|         {"*+",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       }, |  | ||||||
|         {"*-",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      }, |  | ||||||
|         {"**",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   }, |  | ||||||
|         {"*.",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_DOT, ".", 0, 1}        }, |  | ||||||
|         {"*;",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    }, |  | ||||||
|         {"*\n",   {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   }, |  | ||||||
|         {"*\r\n", {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} }, |  | ||||||
|         {"* ",    {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} }, |  | ||||||
|         {"*\t",   {TOKEN_ASTERISK, "*", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}}, |  | ||||||
|         {nullptr, {},                          {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_dot_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {".,",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMA, ",", 0, 1}      }, |  | ||||||
|         {".:",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COLON, ":", 0, 1}      }, |  | ||||||
|         {".[",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}   }, |  | ||||||
|         {".]",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}   }, |  | ||||||
|         {".+",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_PLUS, "+", 0, 1}       }, |  | ||||||
|         {".-",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_MINUS, "-", 0, 1}      }, |  | ||||||
|         {".*",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}   }, |  | ||||||
|         {"..",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_DOT, ".", 0, 1}        }, |  | ||||||
|         {".;",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}    }, |  | ||||||
|         {".\n",   {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}   }, |  | ||||||
|         {".\r\n", {TOKEN_DOT, ".", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1} }, |  | ||||||
|         {". ",    {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, " ", 0, 1} }, |  | ||||||
|         {".\t",   {TOKEN_DOT, ".", 0, 0}, {TOKEN_WHITESPACE, "\t", 0, 1}}, |  | ||||||
|         {nullptr, {},                     {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_comment_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {";comment\n",   {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8}  }, |  | ||||||
|         {";comment\r\n", {TOKEN_COMMENT, ";comment", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 8}}, |  | ||||||
|         {nullptr,        {},                                {}                           }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_whitespace_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {" ,",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMA, ",", 0, 1}     }, |  | ||||||
|         {" :",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COLON, ":", 0, 1}     }, |  | ||||||
|         {" [",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_LBRACKET, "[", 0, 1}  }, |  | ||||||
|         {" ]",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_RBRACKET, "]", 0, 1}  }, |  | ||||||
|         {" +",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_PLUS, "+", 0, 1}      }, |  | ||||||
|         {" -",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_MINUS, "-", 0, 1}     }, |  | ||||||
|         {" *",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_ASTERISK, "*", 0, 1}  }, |  | ||||||
|         {" .",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_DOT, ".", 0, 1}       }, |  | ||||||
|         {" ;",    {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_COMMENT, ";", 0, 1}   }, |  | ||||||
|         {" \n",   {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 1}  }, |  | ||||||
|         {" \r\n", {TOKEN_WHITESPACE, " ", 0, 0}, {TOKEN_NEWLINE, "\r\n", 0, 1}}, |  | ||||||
|         {nullptr, {},                            {}                           }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_newline_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"\n,",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0}      }, |  | ||||||
|         {"\n:",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COLON, ":", 1, 0}      }, |  | ||||||
|         {"\n[",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0}   }, |  | ||||||
|         {"\n]",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0}   }, |  | ||||||
|         {"\n+",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0}       }, |  | ||||||
|         {"\n-",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0}      }, |  | ||||||
|         {"\n*",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0}   }, |  | ||||||
|         {"\n.",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_DOT, ".", 1, 0}        }, |  | ||||||
|         {"\n;",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0}    }, |  | ||||||
|         {"\n\n",   {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0}   }, |  | ||||||
|         {"\n\r\n", {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} }, |  | ||||||
|         {"\n ",    {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} }, |  | ||||||
|         {"\n\t",   {TOKEN_NEWLINE, "\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}}, |  | ||||||
|         {nullptr,  {},                          {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_crlf_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"\r\n,",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMA, ",", 1, 0}      }, |  | ||||||
|         {"\r\n:",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COLON, ":", 1, 0}      }, |  | ||||||
|         {"\r\n[",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_LBRACKET, "[", 1, 0}   }, |  | ||||||
|         {"\r\n]",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_RBRACKET, "]", 1, 0}   }, |  | ||||||
|         {"\r\n+",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_PLUS, "+", 1, 0}       }, |  | ||||||
|         {"\r\n-",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_MINUS, "-", 1, 0}      }, |  | ||||||
|         {"\r\n*",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_ASTERISK, "*", 1, 0}   }, |  | ||||||
|         {"\r\n.",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_DOT, ".", 1, 0}        }, |  | ||||||
|         {"\r\n;",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_COMMENT, ";", 1, 0}    }, |  | ||||||
|         {"\r\n\n",   {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\n", 1, 0}   }, |  | ||||||
|         {"\r\n\r\n", {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_NEWLINE, "\r\n", 1, 0} }, |  | ||||||
|         {"\r\n ",    {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, " ", 1, 0} }, |  | ||||||
|         {"\r\n\t",   {TOKEN_NEWLINE, "\r\n", 0, 0}, {TOKEN_WHITESPACE, "\t", 1, 0}}, |  | ||||||
|         {nullptr,    {},                            {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_number_boundary(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     boundary_t boundaries[] = { |  | ||||||
|         {"0x123:8,",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_COMMA, ",", 0, 7}      }, |  | ||||||
|         {"0x123:16:",    {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_COLON, ":", 0, 8}      }, |  | ||||||
|         {"0o777:32[",    {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_LBRACKET, "[", 0, 8}   }, |  | ||||||
|         {"0b101:64]",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_RBRACKET, "]", 0, 8}   }, |  | ||||||
|         {"0x123:8+",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_PLUS, "+", 0, 7}       }, |  | ||||||
|         {"0x123:16-",    {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_MINUS, "-", 0, 8}      }, |  | ||||||
|         {"0o777:32*",    {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_ASTERISK, "*", 0, 8}   }, |  | ||||||
|         {"0b101:64.",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_DOT, ".", 0, 8}        }, |  | ||||||
|         {"0x123:8;",     {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_COMMENT, ";", 0, 7}    }, |  | ||||||
|         {"0x123:16\n",   {TOKEN_HEXADECIMAL, "0x123:16", 0, 0}, {TOKEN_NEWLINE, "\n", 0, 8}   }, |  | ||||||
|         {"0o777:32\r\n", {TOKEN_OCTAL, "0o777:32", 0, 0},       {TOKEN_NEWLINE, "\r\n", 0, 8} }, |  | ||||||
|         {"0b101:64 ",    {TOKEN_BINARY, "0b101:64", 0, 0},      {TOKEN_WHITESPACE, " ", 0, 8} }, |  | ||||||
|         {"0x123:8\t",    {TOKEN_HEXADECIMAL, "0x123:8", 0, 0},  {TOKEN_WHITESPACE, "\t", 0, 7}}, |  | ||||||
|         {nullptr,        {},                                    {}                            }, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     test_lexer_boundary(boundaries); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_maximum_length_numbers(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     char *numbers[] = { |  | ||||||
|         "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" |  | ||||||
|         "9999999999999999999988", |  | ||||||
|         "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" |  | ||||||
|         "9999999999999999998:64", |  | ||||||
|         "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" |  | ||||||
|         "9999999999999999999988", |  | ||||||
|         "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" |  | ||||||
|         "9999999999999999998:64", |  | ||||||
|         nullptr, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     for (size_t i = 0; numbers[i]; ++i) { |  | ||||||
|         auto number = numbers[i]; |  | ||||||
|         munit_assert_size(128, ==, strlen(number)); |  | ||||||
|         lexer_t lex = {}; |  | ||||||
|         lexer_token_t token = {}; |  | ||||||
|         lexer_setup_memory_test(&lex, number); |  | ||||||
|         lexer_next(&lex, &token); |  | ||||||
|         munit_assert_true(token.id == TOKEN_DECIMAL || token.id == TOKEN_HEXADECIMAL); |  | ||||||
|         munit_assert_size(128, ==, strlen(token.value)); |  | ||||||
|         lexer_token_cleanup(&token); |  | ||||||
|         lexer_close(&lex); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_too_long_numbers(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     char *numbers[] = { |  | ||||||
|         "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" |  | ||||||
|         "99999999999999999999988", |  | ||||||
|         "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" |  | ||||||
|         "99999999999999999999988", |  | ||||||
|         "9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" |  | ||||||
|         "99999999999999999998:64", |  | ||||||
|         "0x99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999" |  | ||||||
|         "99999999999999999998:64", |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     // Without suffix we expect 128 characters and then failure |  | ||||||
|     for (size_t i = 0; i < 2; ++i) { |  | ||||||
|         auto number = numbers[i]; |  | ||||||
|         munit_assert_size(129, ==, strlen(number)); |  | ||||||
|         lexer_t lex = {}; |  | ||||||
|         lexer_token_t token = {}; |  | ||||||
|         lexer_setup_memory_test(&lex, number); |  | ||||||
|         lexer_next(&lex, &token); |  | ||||||
|         munit_assert_int(TOKEN_ERROR, ==, token.id); |  | ||||||
|         munit_assert_size(128, ==, strlen(token.value)); |  | ||||||
|         lexer_token_cleanup(&token); |  | ||||||
|         lexer_close(&lex); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // With suffix we fail at the suffix boundary |  | ||||||
|     for (size_t i = 2; i < 4; ++i) { |  | ||||||
|         auto number = numbers[i]; |  | ||||||
|         munit_assert_size(129, ==, strlen(number)); |  | ||||||
|         lexer_t lex = {}; |  | ||||||
|         lexer_token_t token = {}; |  | ||||||
|         lexer_setup_memory_test(&lex, number); |  | ||||||
|         lexer_next(&lex, &token); |  | ||||||
|         munit_assert_int(TOKEN_ERROR, ==, token.id); |  | ||||||
|         munit_assert_size(128, >=, strlen(token.value)); |  | ||||||
|         lexer_token_cleanup(&token); |  | ||||||
|  |  | ||||||
|         lexer_expect_one_token(&lex, TOKEN_COLON, ":", 0, 126); |  | ||||||
|         lexer_expect_one_token(&lex, TOKEN_DECIMAL, "64", 0, 127); |  | ||||||
|         lexer_close(&lex); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_max_whitespace_length(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     char whitespace[1025]; |  | ||||||
|     memset(whitespace, ' ', 1024); |  | ||||||
|     whitespace[1024] = '\0'; |  | ||||||
|  |  | ||||||
|     munit_assert_size(1024, ==, strlen(whitespace)); |  | ||||||
|     lexer_t lex = {}; |  | ||||||
|     lexer_token_t token = {}; |  | ||||||
|     lexer_setup_memory_test(&lex, whitespace); |  | ||||||
|     lexer_next(&lex, &token); |  | ||||||
|     munit_assert_int(TOKEN_WHITESPACE, ==, token.id); |  | ||||||
|     munit_assert_size(1024, ==, strlen(token.value)); |  | ||||||
|     lexer_token_cleanup(&token); |  | ||||||
|     lexer_close(&lex); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_lexer_too_long_whitespace(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     char whitespace[1026]; |  | ||||||
|     memset(whitespace, ' ', 1025); |  | ||||||
|     whitespace[1025] = '\0'; |  | ||||||
|  |  | ||||||
|     munit_assert_size(1025, ==, strlen(whitespace)); |  | ||||||
|     lexer_t lex = {}; |  | ||||||
|     lexer_token_t token = {}; |  | ||||||
|     lexer_setup_memory_test(&lex, whitespace); |  | ||||||
|     lexer_next(&lex, &token); |  | ||||||
|     munit_assert_int(TOKEN_ERROR, ==, token.id); |  | ||||||
|     munit_assert_size(1024, ==, strlen(token.value)); |  | ||||||
|     lexer_token_cleanup(&token); |  | ||||||
|  |  | ||||||
|     lexer_expect_one_token(&lex, TOKEN_WHITESPACE, " ", 0, 1024); |  | ||||||
|     lexer_close(&lex); |  | ||||||
|  |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitTest lexer_tests[] = { |  | ||||||
|     {"/identifier",              test_lexer_identifier,              nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/identifier_boundary",     test_lexer_identifier_boundary,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/decimal",                 test_lexer_decimal,                 nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/decimal_boundary",        test_lexer_decimal_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/hexadecimal",             test_lexer_hexadecimal,             nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/hexadecimal_with_suffix", test_lexer_hexadecimal_with_suffix, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/hexadecimal_boundary",    test_lexer_hexadecimal_boundary,    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/octal",                   test_lexer_octal,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/octal_with_suffix",       test_lexer_octal_with_suffix,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/octal_boundary",          test_lexer_octal_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/binary",                  test_lexer_binary,                  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/binary_with_suffix",      test_lexer_binary_with_suffix,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/binary_boundary",         test_lexer_binary_boundary,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/number_boundary",         test_lexer_number_boundary,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/colon",                   test_lexer_colon,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/colon_boundary",          test_lexer_colon_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/comma",                   test_lexer_comma,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/comma_boundary",          test_lexer_comma_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/lbracket",                test_lexer_lbracket,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/lbracket_boundary",       test_lexer_lbracket_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/rbracket",                test_lexer_rbracket,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/rbracket_boundary",       test_lexer_rbracket_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/plus",                    test_lexer_plus,                    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/plus_boundary",           test_lexer_plus_boundary,           nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/minus",                   test_lexer_minus,                   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/minus_boundary",          test_lexer_minus_boundary,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/asterisk",                test_lexer_asterisk,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/asterisk_boundary",       test_lexer_asterisk_boundary,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/dot",                     test_lexer_dot,                     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/dot_boundary",            test_lexer_dot_boundary,            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/comment",                 test_lexer_comment,                 nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/comment_boundary",        test_lexer_comment_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/whitespace",              test_lexer_whitespace,              nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/whitespace_boundary",     test_lexer_whitespace_boundary,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/newlines",                test_lexer_newlines,                nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/newline_boundary",        test_lexer_newline_boundary,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/crlf_boundary",           test_lexer_crlf_boundary,           nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/line_numbers",            test_lexer_line_numbers,            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/maximum_length_numbers",  test_lexer_maximum_length_numbers,  nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/too_long_numbers",        test_lexer_too_long_numbers,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/max_whitespace_length",   test_lexer_max_whitespace_length,   nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/too_long_whitespace",     test_lexer_too_long_whitespace,     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {nullptr,                    nullptr,                            nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr} |  | ||||||
| }; |  | ||||||
							
								
								
									
										22
									
								
								tests/main.c
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								tests/main.c
									
									
									
									
									
								
							| @@ -1,22 +0,0 @@ | |||||||
| #include "munit.h" |  | ||||||
|  |  | ||||||
| extern MunitTest ast_tests[]; |  | ||||||
| extern MunitTest lexer_tests[]; |  | ||||||
| extern MunitTest regression_tests[]; |  | ||||||
| extern MunitTest symbols_tests[]; |  | ||||||
| extern MunitTest bytes_tests[]; |  | ||||||
|  |  | ||||||
| int main(int argc, char *argv[MUNIT_ARRAY_PARAM(argc + 1)]) { |  | ||||||
|     MunitSuite suites[] = { |  | ||||||
|         {"/regression", regression_tests, nullptr, 1, MUNIT_SUITE_OPTION_NONE}, |  | ||||||
|         {"/ast",        ast_tests,        nullptr, 1, MUNIT_SUITE_OPTION_NONE}, |  | ||||||
|         {"/lexer",      lexer_tests,      nullptr, 1, MUNIT_SUITE_OPTION_NONE}, |  | ||||||
|         {"/symbols",    symbols_tests,    nullptr, 1, MUNIT_SUITE_OPTION_NONE}, |  | ||||||
|         {"/bytes",      bytes_tests,      nullptr, 1, MUNIT_SUITE_OPTION_NONE}, |  | ||||||
|         {nullptr,       nullptr,          nullptr, 0, MUNIT_SUITE_OPTION_NONE}, |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     MunitSuite master_suite = {"/oas", nullptr, suites, 1, MUNIT_SUITE_OPTION_NONE}; |  | ||||||
|  |  | ||||||
|     return munit_suite_main(&master_suite, nullptr, argc, argv); |  | ||||||
| } |  | ||||||
							
								
								
									
										2055
									
								
								tests/munit.c
									
									
									
									
									
								
							
							
						
						
									
										2055
									
								
								tests/munit.c
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										535
									
								
								tests/munit.h
									
									
									
									
									
								
							
							
						
						
									
										535
									
								
								tests/munit.h
									
									
									
									
									
								
							| @@ -1,535 +0,0 @@ | |||||||
| /* µnit Testing Framework |  | ||||||
|  * Copyright (c) 2013-2017 Evan Nemerson <evan@nemerson.com> |  | ||||||
|  * |  | ||||||
|  * Permission is hereby granted, free of charge, to any person |  | ||||||
|  * obtaining a copy of this software and associated documentation |  | ||||||
|  * files (the "Software"), to deal in the Software without |  | ||||||
|  * restriction, including without limitation the rights to use, copy, |  | ||||||
|  * modify, merge, publish, distribute, sublicense, and/or sell copies |  | ||||||
|  * of the Software, and to permit persons to whom the Software is |  | ||||||
|  * furnished to do so, subject to the following conditions: |  | ||||||
|  * |  | ||||||
|  * The above copyright notice and this permission notice shall be |  | ||||||
|  * included in all copies or substantial portions of the Software. |  | ||||||
|  * |  | ||||||
|  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |  | ||||||
|  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |  | ||||||
|  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |  | ||||||
|  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |  | ||||||
|  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |  | ||||||
|  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |  | ||||||
|  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |  | ||||||
|  * SOFTWARE. |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| #if !defined(MUNIT_H) |  | ||||||
| #define MUNIT_H |  | ||||||
|  |  | ||||||
| #include <stdarg.h> |  | ||||||
| #include <stdlib.h> |  | ||||||
|  |  | ||||||
| #define MUNIT_VERSION(major, minor, revision) \ |  | ||||||
|   (((major) << 16) | ((minor) << 8) | (revision)) |  | ||||||
|  |  | ||||||
| #define MUNIT_CURRENT_VERSION MUNIT_VERSION(0, 4, 1) |  | ||||||
|  |  | ||||||
| #if defined(_MSC_VER) && (_MSC_VER < 1600) |  | ||||||
| #  define munit_int8_t   __int8 |  | ||||||
| #  define munit_uint8_t  unsigned __int8 |  | ||||||
| #  define munit_int16_t  __int16 |  | ||||||
| #  define munit_uint16_t unsigned __int16 |  | ||||||
| #  define munit_int32_t  __int32 |  | ||||||
| #  define munit_uint32_t unsigned __int32 |  | ||||||
| #  define munit_int64_t  __int64 |  | ||||||
| #  define munit_uint64_t unsigned __int64 |  | ||||||
| #else |  | ||||||
| #  include <stdint.h> |  | ||||||
| #  define munit_int8_t   int8_t |  | ||||||
| #  define munit_uint8_t  uint8_t |  | ||||||
| #  define munit_int16_t  int16_t |  | ||||||
| #  define munit_uint16_t uint16_t |  | ||||||
| #  define munit_int32_t  int32_t |  | ||||||
| #  define munit_uint32_t uint32_t |  | ||||||
| #  define munit_int64_t  int64_t |  | ||||||
| #  define munit_uint64_t uint64_t |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #if defined(_MSC_VER) && (_MSC_VER < 1800) |  | ||||||
| #  if !defined(PRIi8) |  | ||||||
| #    define PRIi8 "i" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRIi16) |  | ||||||
| #    define PRIi16 "i" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRIi32) |  | ||||||
| #    define PRIi32 "i" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRIi64) |  | ||||||
| #    define PRIi64 "I64i" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRId8) |  | ||||||
| #    define PRId8 "d" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRId16) |  | ||||||
| #    define PRId16 "d" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRId32) |  | ||||||
| #    define PRId32 "d" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRId64) |  | ||||||
| #    define PRId64 "I64d" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRIx8) |  | ||||||
| #    define PRIx8 "x" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRIx16) |  | ||||||
| #    define PRIx16 "x" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRIx32) |  | ||||||
| #    define PRIx32 "x" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRIx64) |  | ||||||
| #    define PRIx64 "I64x" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRIu8) |  | ||||||
| #    define PRIu8 "u" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRIu16) |  | ||||||
| #    define PRIu16 "u" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRIu32) |  | ||||||
| #    define PRIu32 "u" |  | ||||||
| #  endif |  | ||||||
| #  if !defined(PRIu64) |  | ||||||
| #    define PRIu64 "I64u" |  | ||||||
| #  endif |  | ||||||
| #else |  | ||||||
| #  include <inttypes.h> |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #if !defined(munit_bool) |  | ||||||
| #  if defined(bool) |  | ||||||
| #    define munit_bool bool |  | ||||||
| #  elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) |  | ||||||
| #    define munit_bool _Bool |  | ||||||
| #  else |  | ||||||
| #    define munit_bool int |  | ||||||
| #  endif |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #if defined(__cplusplus) |  | ||||||
| extern "C" { |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #if defined(__GNUC__) |  | ||||||
| #  define MUNIT_LIKELY(expr) (__builtin_expect ((expr), 1)) |  | ||||||
| #  define MUNIT_UNLIKELY(expr) (__builtin_expect ((expr), 0)) |  | ||||||
| #  define MUNIT_UNUSED __attribute__((__unused__)) |  | ||||||
| #else |  | ||||||
| #  define MUNIT_LIKELY(expr) (expr) |  | ||||||
| #  define MUNIT_UNLIKELY(expr) (expr) |  | ||||||
| #  define MUNIT_UNUSED |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__PGI) |  | ||||||
| #  define MUNIT_ARRAY_PARAM(name) name |  | ||||||
| #else |  | ||||||
| #  define MUNIT_ARRAY_PARAM(name) |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #if !defined(_WIN32) |  | ||||||
| #  define MUNIT_SIZE_MODIFIER "z" |  | ||||||
| #  define MUNIT_CHAR_MODIFIER "hh" |  | ||||||
| #  define MUNIT_SHORT_MODIFIER "h" |  | ||||||
| #else |  | ||||||
| #  if defined(_M_X64) || defined(__amd64__) |  | ||||||
| #    define MUNIT_SIZE_MODIFIER "I64" |  | ||||||
| #  else |  | ||||||
| #    define MUNIT_SIZE_MODIFIER "" |  | ||||||
| #  endif |  | ||||||
| #  define MUNIT_CHAR_MODIFIER "" |  | ||||||
| #  define MUNIT_SHORT_MODIFIER "" |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L |  | ||||||
| #  define MUNIT_NO_RETURN _Noreturn |  | ||||||
| #elif defined(__GNUC__) |  | ||||||
| #  define MUNIT_NO_RETURN __attribute__((__noreturn__)) |  | ||||||
| #elif defined(_MSC_VER) |  | ||||||
| #  define MUNIT_NO_RETURN __declspec(noreturn) |  | ||||||
| #else |  | ||||||
| #  define MUNIT_NO_RETURN |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #if defined(_MSC_VER) &&  (_MSC_VER >= 1500) |  | ||||||
| #  define MUNIT_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) |  | ||||||
| #  define MUNIT_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) |  | ||||||
| #else |  | ||||||
| #  define MUNIT_PUSH_DISABLE_MSVC_C4127_ |  | ||||||
| #  define MUNIT_POP_DISABLE_MSVC_C4127_ |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| typedef enum { |  | ||||||
|   MUNIT_LOG_DEBUG, |  | ||||||
|   MUNIT_LOG_INFO, |  | ||||||
|   MUNIT_LOG_WARNING, |  | ||||||
|   MUNIT_LOG_ERROR |  | ||||||
| } MunitLogLevel; |  | ||||||
|  |  | ||||||
| #if defined(__GNUC__) && !defined(__MINGW32__) |  | ||||||
| #  define MUNIT_PRINTF(string_index, first_to_check) __attribute__((format (printf, string_index, first_to_check))) |  | ||||||
| #else |  | ||||||
| #  define MUNIT_PRINTF(string_index, first_to_check) |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| MUNIT_PRINTF(4, 5) |  | ||||||
| void munit_logf_ex(MunitLogLevel level, const char* filename, int line, const char* format, ...); |  | ||||||
|  |  | ||||||
| #define munit_logf(level, format, ...) \ |  | ||||||
|   munit_logf_ex(level, __FILE__, __LINE__, format, __VA_ARGS__) |  | ||||||
|  |  | ||||||
| #define munit_log(level, msg) \ |  | ||||||
|   munit_logf(level, "%s", msg) |  | ||||||
|  |  | ||||||
| MUNIT_NO_RETURN |  | ||||||
| MUNIT_PRINTF(3, 4) |  | ||||||
| void munit_errorf_ex(const char* filename, int line, const char* format, ...); |  | ||||||
|  |  | ||||||
| #define munit_errorf(format, ...) \ |  | ||||||
|   munit_errorf_ex(__FILE__, __LINE__, format, __VA_ARGS__) |  | ||||||
|  |  | ||||||
| #define munit_error(msg) \ |  | ||||||
|   munit_errorf("%s", msg) |  | ||||||
|  |  | ||||||
| #define munit_assert(expr) \ |  | ||||||
|   do { \ |  | ||||||
|     if (!MUNIT_LIKELY(expr)) { \ |  | ||||||
|       munit_error("assertion failed: " #expr); \ |  | ||||||
|     } \ |  | ||||||
|     MUNIT_PUSH_DISABLE_MSVC_C4127_ \ |  | ||||||
|   } while (0) \ |  | ||||||
|   MUNIT_POP_DISABLE_MSVC_C4127_ |  | ||||||
|  |  | ||||||
| #define munit_assert_true(expr) \ |  | ||||||
|   do { \ |  | ||||||
|     if (!MUNIT_LIKELY(expr)) { \ |  | ||||||
|       munit_error("assertion failed: " #expr " is not true"); \ |  | ||||||
|     } \ |  | ||||||
|     MUNIT_PUSH_DISABLE_MSVC_C4127_ \ |  | ||||||
|   } while (0) \ |  | ||||||
|   MUNIT_POP_DISABLE_MSVC_C4127_ |  | ||||||
|  |  | ||||||
| #define munit_assert_false(expr) \ |  | ||||||
|   do { \ |  | ||||||
|     if (!MUNIT_LIKELY(!(expr))) { \ |  | ||||||
|       munit_error("assertion failed: " #expr " is not false"); \ |  | ||||||
|     } \ |  | ||||||
|     MUNIT_PUSH_DISABLE_MSVC_C4127_ \ |  | ||||||
|   } while (0) \ |  | ||||||
|   MUNIT_POP_DISABLE_MSVC_C4127_ |  | ||||||
|  |  | ||||||
| #define munit_assert_type_full(prefix, suffix, T, fmt, a, op, b)   \ |  | ||||||
|   do { \ |  | ||||||
|     T munit_tmp_a_ = (a); \ |  | ||||||
|     T munit_tmp_b_ = (b); \ |  | ||||||
|     if (!(munit_tmp_a_ op munit_tmp_b_)) {                               \ |  | ||||||
|       munit_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")", \ |  | ||||||
|                    #a, #op, #b, munit_tmp_a_, #op, munit_tmp_b_); \ |  | ||||||
|     } \ |  | ||||||
|     MUNIT_PUSH_DISABLE_MSVC_C4127_ \ |  | ||||||
|   } while (0) \ |  | ||||||
|   MUNIT_POP_DISABLE_MSVC_C4127_ |  | ||||||
|  |  | ||||||
| #define munit_assert_type(T, fmt, a, op, b) \ |  | ||||||
|   munit_assert_type_full("", "", T, fmt, a, op, b) |  | ||||||
|  |  | ||||||
| #define munit_assert_char(a, op, b) \ |  | ||||||
|   munit_assert_type_full("'\\x", "'", char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b) |  | ||||||
| #define munit_assert_uchar(a, op, b) \ |  | ||||||
|   munit_assert_type_full("'\\x", "'", unsigned char, "02" MUNIT_CHAR_MODIFIER "x", a, op, b) |  | ||||||
| #define munit_assert_short(a, op, b) \ |  | ||||||
|   munit_assert_type(short, MUNIT_SHORT_MODIFIER "d", a, op, b) |  | ||||||
| #define munit_assert_ushort(a, op, b) \ |  | ||||||
|   munit_assert_type(unsigned short, MUNIT_SHORT_MODIFIER "u", a, op, b) |  | ||||||
| #define munit_assert_int(a, op, b) \ |  | ||||||
|   munit_assert_type(int, "d", a, op, b) |  | ||||||
| #define munit_assert_uint(a, op, b) \ |  | ||||||
|   munit_assert_type(unsigned int, "u", a, op, b) |  | ||||||
| #define munit_assert_long(a, op, b) \ |  | ||||||
|   munit_assert_type(long int, "ld", a, op, b) |  | ||||||
| #define munit_assert_ulong(a, op, b) \ |  | ||||||
|   munit_assert_type(unsigned long int, "lu", a, op, b) |  | ||||||
| #define munit_assert_llong(a, op, b) \ |  | ||||||
|   munit_assert_type(long long int, "lld", a, op, b) |  | ||||||
| #define munit_assert_ullong(a, op, b) \ |  | ||||||
|   munit_assert_type(unsigned long long int, "llu", a, op, b) |  | ||||||
|  |  | ||||||
| #define munit_assert_size(a, op, b) \ |  | ||||||
|   munit_assert_type(size_t, MUNIT_SIZE_MODIFIER "u", a, op, b) |  | ||||||
|  |  | ||||||
| #define munit_assert_float(a, op, b) \ |  | ||||||
|   munit_assert_type(float, "f", a, op, b) |  | ||||||
| #define munit_assert_double(a, op, b) \ |  | ||||||
|   munit_assert_type(double, "g", a, op, b) |  | ||||||
| #define munit_assert_ptr(a, op, b) \ |  | ||||||
|   munit_assert_type(const void*, "p", a, op, b) |  | ||||||
|  |  | ||||||
| #define munit_assert_int8(a, op, b)             \ |  | ||||||
|   munit_assert_type(munit_int8_t, PRIi8, a, op, b) |  | ||||||
| #define munit_assert_uint8(a, op, b) \ |  | ||||||
|   munit_assert_type(munit_uint8_t, PRIu8, a, op, b) |  | ||||||
| #define munit_assert_int16(a, op, b) \ |  | ||||||
|   munit_assert_type(munit_int16_t, PRIi16, a, op, b) |  | ||||||
| #define munit_assert_uint16(a, op, b) \ |  | ||||||
|   munit_assert_type(munit_uint16_t, PRIu16, a, op, b) |  | ||||||
| #define munit_assert_int32(a, op, b) \ |  | ||||||
|   munit_assert_type(munit_int32_t, PRIi32, a, op, b) |  | ||||||
| #define munit_assert_uint32(a, op, b) \ |  | ||||||
|   munit_assert_type(munit_uint32_t, PRIu32, a, op, b) |  | ||||||
| #define munit_assert_int64(a, op, b) \ |  | ||||||
|   munit_assert_type(munit_int64_t, PRIi64, a, op, b) |  | ||||||
| #define munit_assert_uint64(a, op, b) \ |  | ||||||
|   munit_assert_type(munit_uint64_t, PRIu64, a, op, b) |  | ||||||
|  |  | ||||||
| #define munit_assert_double_equal(a, b, precision) \ |  | ||||||
|   do { \ |  | ||||||
|     const double munit_tmp_a_ = (a); \ |  | ||||||
|     const double munit_tmp_b_ = (b); \ |  | ||||||
|     const double munit_tmp_diff_ = ((munit_tmp_a_ - munit_tmp_b_) < 0) ? \ |  | ||||||
|       -(munit_tmp_a_ - munit_tmp_b_) : \ |  | ||||||
|       (munit_tmp_a_ - munit_tmp_b_); \ |  | ||||||
|     if (MUNIT_UNLIKELY(munit_tmp_diff_ > 1e-##precision)) { \ |  | ||||||
|       munit_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)", \ |  | ||||||
| 		   #a, #b, munit_tmp_a_, munit_tmp_b_); \ |  | ||||||
|     } \ |  | ||||||
|     MUNIT_PUSH_DISABLE_MSVC_C4127_ \ |  | ||||||
|   } while (0) \ |  | ||||||
|   MUNIT_POP_DISABLE_MSVC_C4127_ |  | ||||||
|  |  | ||||||
| #include <string.h> |  | ||||||
| #define munit_assert_string_equal(a, b) \ |  | ||||||
|   do { \ |  | ||||||
|     const char* munit_tmp_a_ = a; \ |  | ||||||
|     const char* munit_tmp_b_ = b; \ |  | ||||||
|     if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) != 0)) { \ |  | ||||||
|       munit_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")", \ |  | ||||||
|                    #a, #b, munit_tmp_a_, munit_tmp_b_); \ |  | ||||||
|     } \ |  | ||||||
|     MUNIT_PUSH_DISABLE_MSVC_C4127_ \ |  | ||||||
|   } while (0) \ |  | ||||||
|   MUNIT_POP_DISABLE_MSVC_C4127_ |  | ||||||
|  |  | ||||||
| #define munit_assert_string_not_equal(a, b) \ |  | ||||||
|   do { \ |  | ||||||
|     const char* munit_tmp_a_ = a; \ |  | ||||||
|     const char* munit_tmp_b_ = b; \ |  | ||||||
|     if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) == 0)) { \ |  | ||||||
|       munit_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")", \ |  | ||||||
|                    #a, #b, munit_tmp_a_, munit_tmp_b_); \ |  | ||||||
|     } \ |  | ||||||
|     MUNIT_PUSH_DISABLE_MSVC_C4127_ \ |  | ||||||
|   } while (0) \ |  | ||||||
|   MUNIT_POP_DISABLE_MSVC_C4127_ |  | ||||||
|  |  | ||||||
| #define munit_assert_memory_equal(size, a, b) \ |  | ||||||
|   do { \ |  | ||||||
|     const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \ |  | ||||||
|     const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \ |  | ||||||
|     const size_t munit_tmp_size_ = (size); \ |  | ||||||
|     if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) != 0) { \ |  | ||||||
|       size_t munit_tmp_pos_; \ |  | ||||||
|       for (munit_tmp_pos_ = 0 ; munit_tmp_pos_ < munit_tmp_size_ ; munit_tmp_pos_++) { \ |  | ||||||
|         if (munit_tmp_a_[munit_tmp_pos_] != munit_tmp_b_[munit_tmp_pos_]) { \ |  | ||||||
|           munit_errorf("assertion failed: memory %s == %s, at offset %" MUNIT_SIZE_MODIFIER "u", \ |  | ||||||
|                        #a, #b, munit_tmp_pos_); \ |  | ||||||
|           break; \ |  | ||||||
|         } \ |  | ||||||
|       } \ |  | ||||||
|     } \ |  | ||||||
|     MUNIT_PUSH_DISABLE_MSVC_C4127_ \ |  | ||||||
|   } while (0) \ |  | ||||||
|   MUNIT_POP_DISABLE_MSVC_C4127_ |  | ||||||
|  |  | ||||||
| #define munit_assert_memory_not_equal(size, a, b) \ |  | ||||||
|   do { \ |  | ||||||
|     const unsigned char* munit_tmp_a_ = (const unsigned char*) (a); \ |  | ||||||
|     const unsigned char* munit_tmp_b_ = (const unsigned char*) (b); \ |  | ||||||
|     const size_t munit_tmp_size_ = (size); \ |  | ||||||
|     if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) == 0) { \ |  | ||||||
|       munit_errorf("assertion failed: memory %s != %s (%zu bytes)", \ |  | ||||||
|                    #a, #b, munit_tmp_size_); \ |  | ||||||
|     } \ |  | ||||||
|     MUNIT_PUSH_DISABLE_MSVC_C4127_ \ |  | ||||||
|   } while (0) \ |  | ||||||
|   MUNIT_POP_DISABLE_MSVC_C4127_ |  | ||||||
|  |  | ||||||
| #define munit_assert_ptr_equal(a, b) \ |  | ||||||
|   munit_assert_ptr(a, ==, b) |  | ||||||
| #define munit_assert_ptr_not_equal(a, b) \ |  | ||||||
|   munit_assert_ptr(a, !=, b) |  | ||||||
| #define munit_assert_null(ptr) \ |  | ||||||
|   munit_assert_ptr(ptr, ==, NULL) |  | ||||||
| #define munit_assert_not_null(ptr) \ |  | ||||||
|   munit_assert_ptr(ptr, !=, NULL) |  | ||||||
| #define munit_assert_ptr_null(ptr) \ |  | ||||||
|   munit_assert_ptr(ptr, ==, NULL) |  | ||||||
| #define munit_assert_ptr_not_null(ptr) \ |  | ||||||
|   munit_assert_ptr(ptr, !=, NULL) |  | ||||||
|  |  | ||||||
| /*** Memory allocation ***/ |  | ||||||
|  |  | ||||||
| void* munit_malloc_ex(const char* filename, int line, size_t size); |  | ||||||
|  |  | ||||||
| #define munit_malloc(size) \ |  | ||||||
|   munit_malloc_ex(__FILE__, __LINE__, (size)) |  | ||||||
|  |  | ||||||
| #define munit_new(type) \ |  | ||||||
|   ((type*) munit_malloc(sizeof(type))) |  | ||||||
|  |  | ||||||
| #define munit_calloc(nmemb, size) \ |  | ||||||
|   munit_malloc((nmemb) * (size)) |  | ||||||
|  |  | ||||||
| #define munit_newa(type, nmemb) \ |  | ||||||
|   ((type*) munit_calloc((nmemb), sizeof(type))) |  | ||||||
|  |  | ||||||
| /*** Random number generation ***/ |  | ||||||
|  |  | ||||||
| void munit_rand_seed(munit_uint32_t seed); |  | ||||||
| munit_uint32_t munit_rand_uint32(void); |  | ||||||
| int munit_rand_int_range(int min, int max); |  | ||||||
| double munit_rand_double(void); |  | ||||||
| void munit_rand_memory(size_t size, munit_uint8_t buffer[MUNIT_ARRAY_PARAM(size)]); |  | ||||||
|  |  | ||||||
| /*** Tests and Suites ***/ |  | ||||||
|  |  | ||||||
| typedef enum { |  | ||||||
|   /* Test successful */ |  | ||||||
|   MUNIT_OK, |  | ||||||
|   /* Test failed */ |  | ||||||
|   MUNIT_FAIL, |  | ||||||
|   /* Test was skipped */ |  | ||||||
|   MUNIT_SKIP, |  | ||||||
|   /* Test failed due to circumstances not intended to be tested |  | ||||||
|    * (things like network errors, invalid parameter value, failure to |  | ||||||
|    * allocate memory in the test harness, etc.). */ |  | ||||||
|   MUNIT_ERROR |  | ||||||
| } MunitResult; |  | ||||||
|  |  | ||||||
| typedef struct { |  | ||||||
|   char*  name; |  | ||||||
|   char** values; |  | ||||||
| } MunitParameterEnum; |  | ||||||
|  |  | ||||||
| typedef struct { |  | ||||||
|   char* name; |  | ||||||
|   char* value; |  | ||||||
| } MunitParameter; |  | ||||||
|  |  | ||||||
| const char* munit_parameters_get(const MunitParameter params[], const char* key); |  | ||||||
|  |  | ||||||
| typedef enum { |  | ||||||
|   MUNIT_TEST_OPTION_NONE             = 0, |  | ||||||
|   MUNIT_TEST_OPTION_SINGLE_ITERATION = 1 << 0, |  | ||||||
|   MUNIT_TEST_OPTION_TODO             = 1 << 1 |  | ||||||
| } MunitTestOptions; |  | ||||||
|  |  | ||||||
| typedef MunitResult (* MunitTestFunc)(const MunitParameter params[], void* user_data_or_fixture); |  | ||||||
| typedef void*       (* MunitTestSetup)(const MunitParameter params[], void* user_data); |  | ||||||
| typedef void        (* MunitTestTearDown)(void* fixture); |  | ||||||
|  |  | ||||||
| typedef struct { |  | ||||||
|   char*               name; |  | ||||||
|   MunitTestFunc       test; |  | ||||||
|   MunitTestSetup      setup; |  | ||||||
|   MunitTestTearDown   tear_down; |  | ||||||
|   MunitTestOptions    options; |  | ||||||
|   MunitParameterEnum* parameters; |  | ||||||
| } MunitTest; |  | ||||||
|  |  | ||||||
| typedef enum { |  | ||||||
|   MUNIT_SUITE_OPTION_NONE = 0 |  | ||||||
| } MunitSuiteOptions; |  | ||||||
|  |  | ||||||
| typedef struct MunitSuite_ MunitSuite; |  | ||||||
|  |  | ||||||
| struct MunitSuite_ { |  | ||||||
|   char*             prefix; |  | ||||||
|   MunitTest*        tests; |  | ||||||
|   MunitSuite*       suites; |  | ||||||
|   unsigned int      iterations; |  | ||||||
|   MunitSuiteOptions options; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| int munit_suite_main(const MunitSuite* suite, void* user_data, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)]); |  | ||||||
|  |  | ||||||
| /* Note: I'm not very happy with this API; it's likely to change if I |  | ||||||
|  * figure out something better.  Suggestions welcome. */ |  | ||||||
|  |  | ||||||
| typedef struct MunitArgument_ MunitArgument; |  | ||||||
|  |  | ||||||
| struct MunitArgument_ { |  | ||||||
|   char* name; |  | ||||||
|   munit_bool (* parse_argument)(const MunitSuite* suite, void* user_data, int* arg, int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)]); |  | ||||||
|   void (* write_help)(const MunitArgument* argument, void* user_data); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| int munit_suite_main_custom(const MunitSuite* suite, |  | ||||||
|                             void* user_data, |  | ||||||
|                             int argc, char* const argv[MUNIT_ARRAY_PARAM(argc + 1)], |  | ||||||
|                             const MunitArgument arguments[]); |  | ||||||
|  |  | ||||||
| #if defined(MUNIT_ENABLE_ASSERT_ALIASES) |  | ||||||
|  |  | ||||||
| #define assert_true(expr) munit_assert_true(expr) |  | ||||||
| #define assert_false(expr) munit_assert_false(expr) |  | ||||||
| #define assert_char(a, op, b) munit_assert_char(a, op, b) |  | ||||||
| #define assert_uchar(a, op, b) munit_assert_uchar(a, op, b) |  | ||||||
| #define assert_short(a, op, b) munit_assert_short(a, op, b) |  | ||||||
| #define assert_ushort(a, op, b) munit_assert_ushort(a, op, b) |  | ||||||
| #define assert_int(a, op, b) munit_assert_int(a, op, b) |  | ||||||
| #define assert_uint(a, op, b) munit_assert_uint(a, op, b) |  | ||||||
| #define assert_long(a, op, b) munit_assert_long(a, op, b) |  | ||||||
| #define assert_ulong(a, op, b) munit_assert_ulong(a, op, b) |  | ||||||
| #define assert_llong(a, op, b) munit_assert_llong(a, op, b) |  | ||||||
| #define assert_ullong(a, op, b) munit_assert_ullong(a, op, b) |  | ||||||
| #define assert_size(a, op, b) munit_assert_size(a, op, b) |  | ||||||
| #define assert_float(a, op, b) munit_assert_float(a, op, b) |  | ||||||
| #define assert_double(a, op, b) munit_assert_double(a, op, b) |  | ||||||
| #define assert_ptr(a, op, b) munit_assert_ptr(a, op, b) |  | ||||||
|  |  | ||||||
| #define assert_int8(a, op, b) munit_assert_int8(a, op, b) |  | ||||||
| #define assert_uint8(a, op, b) munit_assert_uint8(a, op, b) |  | ||||||
| #define assert_int16(a, op, b) munit_assert_int16(a, op, b) |  | ||||||
| #define assert_uint16(a, op, b) munit_assert_uint16(a, op, b) |  | ||||||
| #define assert_int32(a, op, b) munit_assert_int32(a, op, b) |  | ||||||
| #define assert_uint32(a, op, b) munit_assert_uint32(a, op, b) |  | ||||||
| #define assert_int64(a, op, b) munit_assert_int64(a, op, b) |  | ||||||
| #define assert_uint64(a, op, b) munit_assert_uint64(a, op, b) |  | ||||||
|  |  | ||||||
| #define assert_double_equal(a, b, precision) munit_assert_double_equal(a, b, precision) |  | ||||||
| #define assert_string_equal(a, b) munit_assert_string_equal(a, b) |  | ||||||
| #define assert_string_not_equal(a, b) munit_assert_string_not_equal(a, b) |  | ||||||
| #define assert_memory_equal(size, a, b) munit_assert_memory_equal(size, a, b) |  | ||||||
| #define assert_memory_not_equal(size, a, b) munit_assert_memory_not_equal(size, a, b) |  | ||||||
| #define assert_ptr_equal(a, b) munit_assert_ptr_equal(a, b) |  | ||||||
| #define assert_ptr_not_equal(a, b) munit_assert_ptr_not_equal(a, b) |  | ||||||
| #define assert_ptr_null(ptr) munit_assert_null_equal(ptr) |  | ||||||
| #define assert_ptr_not_null(ptr) munit_assert_not_null(ptr) |  | ||||||
|  |  | ||||||
| #define assert_null(ptr) munit_assert_null(ptr) |  | ||||||
| #define assert_not_null(ptr) munit_assert_not_null(ptr) |  | ||||||
|  |  | ||||||
| #endif /* defined(MUNIT_ENABLE_ASSERT_ALIASES) */ |  | ||||||
|  |  | ||||||
| #if defined(__cplusplus) |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #endif /* !defined(MUNIT_H) */ |  | ||||||
|  |  | ||||||
| #if defined(MUNIT_ENABLE_ASSERT_ALIASES) |  | ||||||
| #  if defined(assert) |  | ||||||
| #    undef assert |  | ||||||
| #  endif |  | ||||||
| #  define assert(expr) munit_assert(expr) |  | ||||||
| #endif |  | ||||||
| @@ -1,68 +0,0 @@ | |||||||
| #include "../src/ast.h" |  | ||||||
| #include "../src/parser/parser.h" |  | ||||||
| #include "munit.h" |  | ||||||
|  |  | ||||||
| MunitResult test_regression_trivia_head(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     lexer_t *lex = &(lexer_t){}; |  | ||||||
|     error_t *err = lexer_open(lex, "tests/input/regression/test_trivia_head.asm"); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|  |  | ||||||
|     tokenlist_t *list; |  | ||||||
|     err = tokenlist_alloc(&list); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|  |  | ||||||
|     err = tokenlist_fill(list, lex); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|  |  | ||||||
|     parse_result_t result = parse(list->head); |  | ||||||
|     munit_assert_null(result.err); |  | ||||||
|     munit_assert_null(result.next); |  | ||||||
|  |  | ||||||
|     ast_node_free(result.node); |  | ||||||
|     tokenlist_free(list); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_no_operands_eof(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     lexer_t *lex = &(lexer_t){}; |  | ||||||
|     error_t *err = lexer_open(lex, "tests/input/regression/test_no_operands_eof.asm"); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|  |  | ||||||
|     tokenlist_t *list; |  | ||||||
|     err = tokenlist_alloc(&list); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|  |  | ||||||
|     err = tokenlist_fill(list, lex); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|  |  | ||||||
|     parse_result_t result = parse(list->head); |  | ||||||
|     munit_assert_null(result.err); |  | ||||||
|     munit_assert_null(result.next); |  | ||||||
|  |  | ||||||
|     // Both children should be instructions |  | ||||||
|     munit_assert_size(result.node->len, ==, 2); |  | ||||||
|     munit_assert_int(result.node->children[0]->id, ==, NODE_INSTRUCTION); |  | ||||||
|     munit_assert_int(result.node->children[1]->id, ==, NODE_INSTRUCTION); |  | ||||||
|  |  | ||||||
|     // And they should have empty operands |  | ||||||
|     munit_assert_size(result.node->children[0]->len, ==, 2); |  | ||||||
|     munit_assert_size(result.node->children[1]->len, ==, 2); |  | ||||||
|     munit_assert_size(result.node->children[0]->children[1]->len, ==, 0); |  | ||||||
|     munit_assert_size(result.node->children[1]->children[1]->len, ==, 0); |  | ||||||
|  |  | ||||||
|     ast_node_free(result.node); |  | ||||||
|     tokenlist_free(list); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitTest regression_tests[] = { |  | ||||||
|     {"/trivia_head",     test_regression_trivia_head, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/no_operands_eof", test_no_operands_eof,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {nullptr,            nullptr,                     nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr} |  | ||||||
| }; |  | ||||||
							
								
								
									
										393
									
								
								tests/symbols.c
									
									
									
									
									
								
							
							
						
						
									
										393
									
								
								tests/symbols.c
									
									
									
									
									
								
							| @@ -1,393 +0,0 @@ | |||||||
| #include "../src/encoder/symbols.h" |  | ||||||
| #include "../src/ast.h" |  | ||||||
| #include "../src/error.h" |  | ||||||
| #include "../src/lexer.h" |  | ||||||
| #include "../src/parser/parser.h" |  | ||||||
| #include "munit.h" |  | ||||||
| #include <string.h> |  | ||||||
|  |  | ||||||
| void symbols_setup_test(ast_node_t **node, tokenlist_t **list, char *path) { |  | ||||||
|     lexer_t *lex = &(lexer_t){}; |  | ||||||
|     lexer_open(lex, path); |  | ||||||
|     tokenlist_alloc(list); |  | ||||||
|     tokenlist_fill(*list, lex); |  | ||||||
|     parse_result_t result = parse((*list)->head); |  | ||||||
|     lexer_close(lex); |  | ||||||
|  |  | ||||||
|     *node = result.node; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_symbol_table_alloc(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     symbol_table_t *table = nullptr; |  | ||||||
|     error_t *err = symbol_table_alloc(&table); |  | ||||||
|  |  | ||||||
|     munit_assert_ptr_not_null(table); |  | ||||||
|     munit_assert_ptr_null(err); |  | ||||||
|     munit_assert_size(table->cap, ==, 64); // Default capacity |  | ||||||
|     munit_assert_size(table->len, ==, 0); |  | ||||||
|     munit_assert_ptr_not_null(table->symbols); |  | ||||||
|  |  | ||||||
|     symbol_table_free(table); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_symbol_table_lookup_empty(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|  |  | ||||||
|     symbol_table_t *table = nullptr; |  | ||||||
|     symbol_table_alloc(&table); |  | ||||||
|  |  | ||||||
|     symbol_t *symbol = symbol_table_lookup(table, "nonexistent"); |  | ||||||
|     munit_assert_ptr_null(symbol); |  | ||||||
|  |  | ||||||
|     symbol_table_free(table); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_symbol_add_reference(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     ast_node_t *root; |  | ||||||
|     tokenlist_t *list; |  | ||||||
|     symbol_table_t *table = nullptr; |  | ||||||
|     symbols_setup_test(&root, &list, "tests/input/symbols.asm"); |  | ||||||
|     symbol_table_alloc(&table); |  | ||||||
|  |  | ||||||
|     ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0]; |  | ||||||
|     ast_node_t *statement = root->children[3]; // The containing statement |  | ||||||
|     munit_assert_int(reference->id, ==, NODE_LABEL_REFERENCE); |  | ||||||
|     munit_assert_size(table->len, ==, 0); |  | ||||||
|  |  | ||||||
|     error_t *err = symbol_table_update(table, reference, statement); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|     munit_assert_size(table->len, ==, 1); |  | ||||||
|  |  | ||||||
|     symbol_t *symbol = symbol_table_lookup(table, "test"); |  | ||||||
|     munit_assert_not_null(symbol); |  | ||||||
|     munit_assert_int(SYMBOL_REFERENCE, ==, symbol->kind); |  | ||||||
|     // For references, the statement should be nullptr |  | ||||||
|     munit_assert_ptr_null(symbol->statement); |  | ||||||
|     munit_assert_string_equal(symbol->name, "test"); |  | ||||||
|  |  | ||||||
|     symbol_table_free(table); |  | ||||||
|     ast_node_free(root); |  | ||||||
|     tokenlist_free(list); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_symbol_add_label(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     ast_node_t *root; |  | ||||||
|     tokenlist_t *list; |  | ||||||
|     symbol_table_t *table = nullptr; |  | ||||||
|     symbols_setup_test(&root, &list, "tests/input/symbols.asm"); |  | ||||||
|     symbol_table_alloc(&table); |  | ||||||
|  |  | ||||||
|     ast_node_t *label = root->children[2]; |  | ||||||
|     munit_assert_int(label->id, ==, NODE_LABEL); |  | ||||||
|     munit_assert_size(table->len, ==, 0); |  | ||||||
|  |  | ||||||
|     error_t *err = symbol_table_update(table, label, label); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|     munit_assert_size(table->len, ==, 1); |  | ||||||
|  |  | ||||||
|     symbol_t *symbol = symbol_table_lookup(table, "test"); |  | ||||||
|     munit_assert_not_null(symbol); |  | ||||||
|     munit_assert_int(SYMBOL_LOCAL, ==, symbol->kind); |  | ||||||
|     munit_assert_ptr_equal(label, symbol->statement); |  | ||||||
|     munit_assert_string_equal(symbol->name, "test"); |  | ||||||
|  |  | ||||||
|     symbol_table_free(table); |  | ||||||
|     ast_node_free(root); |  | ||||||
|     tokenlist_free(list); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_symbol_add_import(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     ast_node_t *root; |  | ||||||
|     tokenlist_t *list; |  | ||||||
|     symbol_table_t *table = nullptr; |  | ||||||
|     symbols_setup_test(&root, &list, "tests/input/symbols.asm"); |  | ||||||
|     symbol_table_alloc(&table); |  | ||||||
|  |  | ||||||
|     ast_node_t *import_directive = root->children[0]->children[1]; |  | ||||||
|     ast_node_t *statement = root->children[0]; // The containing statement |  | ||||||
|     munit_assert_int(import_directive->id, ==, NODE_IMPORT_DIRECTIVE); |  | ||||||
|     munit_assert_size(table->len, ==, 0); |  | ||||||
|  |  | ||||||
|     error_t *err = symbol_table_update(table, import_directive, statement); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|     munit_assert_size(table->len, ==, 1); |  | ||||||
|  |  | ||||||
|     symbol_t *symbol = symbol_table_lookup(table, "test"); |  | ||||||
|     munit_assert_not_null(symbol); |  | ||||||
|     munit_assert_int(SYMBOL_IMPORT, ==, symbol->kind); |  | ||||||
|     // For import directives, the statement should be nullptr |  | ||||||
|     munit_assert_ptr_null(symbol->statement); |  | ||||||
|     munit_assert_string_equal(symbol->name, "test"); |  | ||||||
|  |  | ||||||
|     symbol_table_free(table); |  | ||||||
|     ast_node_free(root); |  | ||||||
|     tokenlist_free(list); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void test_symbol_update(const char *name, ast_node_t *first, symbol_kind_t first_kind, ast_node_t *first_statement, |  | ||||||
|                         ast_node_t *second, symbol_kind_t second_kind, ast_node_t *second_statement, |  | ||||||
|                         bool should_succeed, bool should_update, ast_node_t *expected_statement) { |  | ||||||
|     symbol_table_t *table = nullptr; |  | ||||||
|     symbol_table_alloc(&table); |  | ||||||
|  |  | ||||||
|     // Add the first symbol |  | ||||||
|     error_t *err = symbol_table_update(table, first, first_statement); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|     munit_assert_size(table->len, ==, 1); |  | ||||||
|  |  | ||||||
|     // Verify first symbol state |  | ||||||
|     symbol_t *symbol = symbol_table_lookup(table, name); |  | ||||||
|     munit_assert_not_null(symbol); |  | ||||||
|     munit_assert_int(first_kind, ==, symbol->kind); |  | ||||||
|     munit_assert_string_equal(symbol->name, name); |  | ||||||
|  |  | ||||||
|     // Check statement based on symbol kind |  | ||||||
|     if (first_kind == SYMBOL_LOCAL) { |  | ||||||
|         munit_assert_ptr_equal(first_statement, symbol->statement); |  | ||||||
|     } else { |  | ||||||
|         munit_assert_ptr_null(symbol->statement); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Attempt the second update |  | ||||||
|     err = symbol_table_update(table, second, second_statement); |  | ||||||
|  |  | ||||||
|     // Check if update succeeded as expected |  | ||||||
|     if (should_succeed) { |  | ||||||
|         munit_assert_null(err); |  | ||||||
|     } else { |  | ||||||
|         munit_assert_ptr_equal(err, err_symbol_table_incompatible_symbols); |  | ||||||
|         symbol_table_free(table); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Verify symbol after second update |  | ||||||
|     symbol = symbol_table_lookup(table, name); |  | ||||||
|     munit_assert_not_null(symbol); |  | ||||||
|  |  | ||||||
|     // Check if kind updated as expected |  | ||||||
|     if (should_update) { |  | ||||||
|         munit_assert_int(second_kind, ==, symbol->kind); |  | ||||||
|     } else { |  | ||||||
|         munit_assert_int(first_kind, ==, symbol->kind); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Simply check against the expected statement value |  | ||||||
|     munit_assert_ptr_equal(expected_statement, symbol->statement); |  | ||||||
|  |  | ||||||
|     symbol_table_free(table); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_symbol_upgrade_valid(const MunitParameter params[], void *data) { |  | ||||||
|     ast_node_t *root; |  | ||||||
|     tokenlist_t *list; |  | ||||||
|  |  | ||||||
|     symbols_setup_test(&root, &list, "tests/input/symbols.asm"); |  | ||||||
|  |  | ||||||
|     ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0]; |  | ||||||
|     ast_node_t *reference_statement = root->children[3]; |  | ||||||
|     ast_node_t *label = root->children[2]; |  | ||||||
|     ast_node_t *import_directive = root->children[0]->children[1]; |  | ||||||
|     ast_node_t *import_statement = root->children[0]; |  | ||||||
|     ast_node_t *export_directive = root->children[1]->children[1]; |  | ||||||
|     ast_node_t *export_statement = root->children[1]; |  | ||||||
|  |  | ||||||
|     // real upgrades |  | ||||||
|     test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, label, SYMBOL_LOCAL, label, true, true, |  | ||||||
|                        label); |  | ||||||
|     test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, import_directive, SYMBOL_IMPORT, |  | ||||||
|                        import_statement, true, true, nullptr); |  | ||||||
|     test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, export_directive, SYMBOL_EXPORT, |  | ||||||
|                        export_statement, true, true, nullptr); |  | ||||||
|     test_symbol_update("test", label, SYMBOL_LOCAL, label, export_directive, SYMBOL_EXPORT, export_statement, true, |  | ||||||
|                        true, label); |  | ||||||
|  |  | ||||||
|     // identity upgrades |  | ||||||
|     test_symbol_update("test", reference, SYMBOL_REFERENCE, reference_statement, reference, SYMBOL_REFERENCE, |  | ||||||
|                        reference_statement, true, false, nullptr); |  | ||||||
|     test_symbol_update("test", label, SYMBOL_LOCAL, label, label, SYMBOL_LOCAL, label, true, false, label); |  | ||||||
|     test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, import_directive, SYMBOL_IMPORT, |  | ||||||
|                        import_statement, true, false, nullptr); |  | ||||||
|     test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, export_directive, SYMBOL_EXPORT, |  | ||||||
|                        export_statement, true, false, nullptr); |  | ||||||
|  |  | ||||||
|     // downgrades that are allowed and ignored |  | ||||||
|     test_symbol_update("test", label, SYMBOL_LOCAL, label, reference, SYMBOL_REFERENCE, reference_statement, true, |  | ||||||
|                        false, label); |  | ||||||
|     test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, reference, SYMBOL_REFERENCE, |  | ||||||
|                        reference_statement, true, false, nullptr); |  | ||||||
|     test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, reference, SYMBOL_REFERENCE, |  | ||||||
|                        reference_statement, true, false, nullptr); |  | ||||||
|     test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, label, SYMBOL_LOCAL, label, true, |  | ||||||
|                        false, label); |  | ||||||
|     test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, label, SYMBOL_LOCAL, label, true, |  | ||||||
|                        false, label); |  | ||||||
|  |  | ||||||
|     ast_node_free(root); |  | ||||||
|     tokenlist_free(list); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_symbol_upgrade_invalid(const MunitParameter params[], void *data) { |  | ||||||
|     ast_node_t *root; |  | ||||||
|     tokenlist_t *list; |  | ||||||
|  |  | ||||||
|     symbols_setup_test(&root, &list, "tests/input/symbols.asm"); |  | ||||||
|  |  | ||||||
|     ast_node_t *reference = root->children[3]->children[1]->children[0]->children[0]; |  | ||||||
|     ast_node_t *reference_statement = root->children[3]; |  | ||||||
|     ast_node_t *label = root->children[2]; |  | ||||||
|     ast_node_t *import_directive = root->children[0]->children[1]; |  | ||||||
|     ast_node_t *import_statement = root->children[0]; |  | ||||||
|     ast_node_t *export_directive = root->children[1]->children[1]; |  | ||||||
|     ast_node_t *export_statement = root->children[1]; |  | ||||||
|  |  | ||||||
|     // invalid upgrades |  | ||||||
|     test_symbol_update("test", label, SYMBOL_LOCAL, label, import_directive, SYMBOL_IMPORT, import_statement, false, |  | ||||||
|                        false, nullptr); |  | ||||||
|     test_symbol_update("test", export_directive, SYMBOL_EXPORT, export_statement, import_directive, SYMBOL_IMPORT, |  | ||||||
|                        import_statement, false, false, nullptr); |  | ||||||
|     test_symbol_update("test", import_directive, SYMBOL_IMPORT, import_statement, export_directive, SYMBOL_EXPORT, |  | ||||||
|                        export_statement, false, false, nullptr); |  | ||||||
|  |  | ||||||
|     ast_node_free(root); |  | ||||||
|     tokenlist_free(list); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_symbol_add_export(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     ast_node_t *root; |  | ||||||
|     tokenlist_t *list; |  | ||||||
|     symbol_table_t *table = nullptr; |  | ||||||
|     symbols_setup_test(&root, &list, "tests/input/symbols.asm"); |  | ||||||
|     symbol_table_alloc(&table); |  | ||||||
|  |  | ||||||
|     ast_node_t *export_directive = root->children[1]->children[1]; |  | ||||||
|     ast_node_t *statement = root->children[1]; // The containing statement |  | ||||||
|     munit_assert_int(export_directive->id, ==, NODE_EXPORT_DIRECTIVE); |  | ||||||
|     munit_assert_size(table->len, ==, 0); |  | ||||||
|  |  | ||||||
|     error_t *err = symbol_table_update(table, export_directive, statement); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|     munit_assert_size(table->len, ==, 1); |  | ||||||
|  |  | ||||||
|     symbol_t *symbol = symbol_table_lookup(table, "test"); |  | ||||||
|     munit_assert_not_null(symbol); |  | ||||||
|     munit_assert_int(SYMBOL_EXPORT, ==, symbol->kind); |  | ||||||
|     // For export directives, the statement should be nullptr |  | ||||||
|     munit_assert_ptr_null(symbol->statement); |  | ||||||
|     munit_assert_string_equal(symbol->name, "test"); |  | ||||||
|  |  | ||||||
|     symbol_table_free(table); |  | ||||||
|     ast_node_free(root); |  | ||||||
|     tokenlist_free(list); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_symbol_table_growth(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     ast_node_t *root; |  | ||||||
|     tokenlist_t *list; |  | ||||||
|     symbol_table_t *table = nullptr; |  | ||||||
|  |  | ||||||
|     // Set up with our manysymbols.asm file |  | ||||||
|     symbols_setup_test(&root, &list, "tests/input/manysymbols.asm"); |  | ||||||
|     symbol_table_alloc(&table); |  | ||||||
|  |  | ||||||
|     // Initial capacity should be the default (64) |  | ||||||
|     munit_assert_size(table->cap, ==, 64); |  | ||||||
|     munit_assert_size(table->len, ==, 0); |  | ||||||
|  |  | ||||||
|     // Add the first 64 labels (indices 0-63) |  | ||||||
|     size_t initial_cap = table->cap; |  | ||||||
|     for (size_t i = 0; i < 64; i++) { |  | ||||||
|         ast_node_t *label = root->children[i]; |  | ||||||
|         munit_assert_int(label->id, ==, NODE_LABEL); |  | ||||||
|  |  | ||||||
|         error_t *err = symbol_table_update(table, label, label); |  | ||||||
|         munit_assert_null(err); |  | ||||||
|         munit_assert_size(table->len, ==, i + 1); |  | ||||||
|  |  | ||||||
|         // Capacity should remain the same for the first 64 labels |  | ||||||
|         munit_assert_size(table->cap, ==, initial_cap); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Now add the 65th label (index 64), which should trigger growth |  | ||||||
|     ast_node_t *final_label = root->children[64]; |  | ||||||
|     munit_assert_int(final_label->id, ==, NODE_LABEL); |  | ||||||
|  |  | ||||||
|     error_t *err = symbol_table_update(table, final_label, final_label); |  | ||||||
|     munit_assert_null(err); |  | ||||||
|     munit_assert_size(table->len, ==, 65); |  | ||||||
|  |  | ||||||
|     // Capacity should have doubled |  | ||||||
|     munit_assert_size(table->cap, ==, initial_cap * 2); |  | ||||||
|  |  | ||||||
|     // Validate we can look up all the symbols |  | ||||||
|     for (size_t i = 0; i <= 64; i++) { |  | ||||||
|         char name[10]; |  | ||||||
|         sprintf(name, "lbl_%zu", i); |  | ||||||
|  |  | ||||||
|         symbol_t *symbol = symbol_table_lookup(table, name); |  | ||||||
|         munit_assert_not_null(symbol); |  | ||||||
|         munit_assert_int(SYMBOL_LOCAL, ==, symbol->kind); |  | ||||||
|         munit_assert_string_equal(symbol->name, name); |  | ||||||
|         munit_assert_ptr_equal(symbol->statement, root->children[i]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     symbol_table_free(table); |  | ||||||
|     ast_node_free(root); |  | ||||||
|     tokenlist_free(list); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitResult test_symbol_invalid_node(const MunitParameter params[], void *data) { |  | ||||||
|     (void)params; |  | ||||||
|     (void)data; |  | ||||||
|     ast_node_t *root; |  | ||||||
|     tokenlist_t *list; |  | ||||||
|     symbol_table_t *table = nullptr; |  | ||||||
|     symbols_setup_test(&root, &list, "tests/input/symbols.asm"); |  | ||||||
|     symbol_table_alloc(&table); |  | ||||||
|  |  | ||||||
|     munit_assert_size(table->len, ==, 0); |  | ||||||
|     error_t *err = symbol_table_update(table, root, root); |  | ||||||
|     munit_assert_ptr_equal(err, err_symbol_table_invalid_node); |  | ||||||
|     munit_assert_size(table->len, ==, 0); |  | ||||||
|  |  | ||||||
|     symbol_table_free(table); |  | ||||||
|     ast_node_free(root); |  | ||||||
|     tokenlist_free(list); |  | ||||||
|     return MUNIT_OK; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MunitTest symbols_tests[] = { |  | ||||||
|     {"/table_alloc",        test_symbol_table_alloc,        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/table_lookup_empty", test_symbol_table_lookup_empty, nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/add_reference",      test_symbol_add_reference,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/add_label",          test_symbol_add_label,          nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/add_import",         test_symbol_add_import,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/add_export",         test_symbol_add_export,         nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/upgrade_valid",      test_symbol_upgrade_valid,      nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/upgrade_invalid",    test_symbol_upgrade_invalid,    nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/table_growth",       test_symbol_table_growth,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {"/invalid_node",       test_symbol_invalid_node,       nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr}, |  | ||||||
|     {nullptr,               nullptr,                        nullptr, nullptr, MUNIT_TEST_OPTION_NONE, nullptr} |  | ||||||
| }; |  | ||||||
							
								
								
									
										18
									
								
								validate.sh
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								validate.sh
									
									
									
									
									
								
							| @@ -2,17 +2,19 @@ | |||||||
|  |  | ||||||
| set -euo pipefail | set -euo pipefail | ||||||
|  |  | ||||||
| make analyze debug asan msan | # Start with static analysis | ||||||
|  | make clean all | ||||||
|  | mkdir -p reports/static-analysis | ||||||
|  | scan-build -o reports/static-analysis/ -plist-html --status-bugs make all | ||||||
|  |  | ||||||
| ASAN=build/asan/oas | # Run the sanitizer builds and valgrind | ||||||
| MSAN=build/msan/oas | make clean sanitize all | ||||||
| DEBUG=build/debug/oas |  | ||||||
|  |  | ||||||
| ARGUMENTS=("tokens" "text" "ast") | ARGUMENTS=("-tokens" "-text") | ||||||
| while IFS= read -r INPUT_FILE; do | while IFS= read -r INPUT_FILE; do | ||||||
|     for ARGS in ${ARGUMENTS[@]}; do |     for ARGS in ${ARGUMENTS[@]}; do | ||||||
|         $ASAN $ARGS $INPUT_FILE > /dev/null |         ./oas-asan $ARGS $INPUT_FILE > /dev/null | ||||||
|         $MSAN $ARGS $INPUT_FILE > /dev/null |         ./oas-msan $ARGS $INPUT_FILE > /dev/null | ||||||
|         valgrind --leak-check=full --error-exitcode=1 $DEBUG $ARGS $INPUT_FILE >/dev/null |         valgrind --leak-check=full --error-exitcode=1 ./oas $ARGS $INPUT_FILE >/dev/null | ||||||
|     done |     done | ||||||
| done < <(find tests/input/ -type f -name '*.asm') | done < <(find tests/input/ -type f -name '*.asm') | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user