Fix grammar not being able to disambiguate some instructions

When two identifiers follow eachother it could be two instruction
mnemonics or one instruction mnemonic and one operand. To fix this
TOKEN_NEWLINE has been reintroduced as a semantic token. The grammar has
been changed to allow empty statements and every instruction and
directive has to end in a newline. Labels do not have to end in a
newline.

In addition to updating the grammar, the implementation of tokenlist,
ast and parser has been updated to reflect these changes.
This commit is contained in:
2025-04-16 12:34:44 +02:00
parent 1574ec6249
commit 242fd9baa5
7 changed files with 18 additions and 8 deletions

View File

@ -1,13 +1,13 @@
<program> ::= <statement>* <program> ::= <statement>*
<statement> ::= <label> | <directive> | <instruction> <statement> ::= <label> | <directive> | <instruction> | <newline>
<label> ::= <identifier> <colon> <label> ::= <identifier> <colon>
<directive> ::= <dot> <section_directive> <directive> ::= <dot> <section_directive> <newline>
<section_directive> ::= "section" <identifier> <section_directive> ::= "section" <identifier>
<instruction> ::= <identifier> <operands> <instruction> ::= <identifier> <operands> <newline>
<operands> ::= <operand> ( <comma> <operand> )* <operands> ::= <operand> ( <comma> <operand> )*

View File

@ -157,6 +157,8 @@ const char *ast_node_id_to_cstr(node_id_t id) {
return "NODE_ASTERISK"; return "NODE_ASTERISK";
case NODE_DOT: case NODE_DOT:
return "NODE_DOT"; return "NODE_DOT";
case NODE_NEWLINE:
return "NODE_NEWLINE";
} }
assert(!"Unreachable, weird node id" && id); assert(!"Unreachable, weird node id" && id);
__builtin_unreachable(); __builtin_unreachable();
@ -172,7 +174,8 @@ static void ast_node_print_internal(ast_node_t *node, int indent) {
} }
printf("%s", ast_node_id_to_cstr(node->id)); printf("%s", ast_node_id_to_cstr(node->id));
if (node->token_entry && node->token_entry->token.value) { if (node->token_entry && node->token_entry->token.value &&
node->id != NODE_NEWLINE) {
printf(" \"%s\"", node->token_entry->token.value); printf(" \"%s\"", node->token_entry->token.value);
} }
printf("\n"); printf("\n");

View File

@ -50,6 +50,7 @@ typedef enum node_id {
NODE_MINUS, NODE_MINUS,
NODE_ASTERISK, NODE_ASTERISK,
NODE_DOT, NODE_DOT,
NODE_NEWLINE,
} node_id_t; } node_id_t;
typedef struct ast_node ast_node_t; typedef struct ast_node ast_node_t;

View File

@ -120,18 +120,20 @@ parse_result_t parse_section_directive(tokenlist_entry_t *current) {
} }
parse_result_t parse_directive(tokenlist_entry_t *current) { parse_result_t parse_directive(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_dot, parse_section_directive, nullptr}; parser_t parsers[] = {parse_dot, parse_section_directive, parse_newline,
nullptr};
return parse_consecutive(current, NODE_DIRECTIVE, parsers); return parse_consecutive(current, NODE_DIRECTIVE, parsers);
} }
parse_result_t parse_instruction(tokenlist_entry_t *current) { parse_result_t parse_instruction(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_identifier, parse_operands, nullptr}; parser_t parsers[] = {parse_identifier, parse_operands, parse_newline,
nullptr};
return parse_consecutive(current, NODE_INSTRUCTION, parsers); return parse_consecutive(current, NODE_INSTRUCTION, parsers);
} }
parse_result_t parse_statement(tokenlist_entry_t *current) { parse_result_t parse_statement(tokenlist_entry_t *current) {
parser_t parsers[] = {parse_label, parse_directive, parse_instruction, parser_t parsers[] = {parse_label, parse_directive, parse_instruction,
nullptr}; parse_newline, nullptr};
return parse_any(current, parsers); return parse_any(current, parsers);
} }

View File

@ -62,6 +62,10 @@ parse_result_t parse_dot(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr); return parse_token(current, TOKEN_DOT, NODE_DOT, nullptr);
} }
parse_result_t parse_newline(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_NEWLINE, NODE_NEWLINE, nullptr);
}
parse_result_t parse_label_reference(tokenlist_entry_t *current) { parse_result_t parse_label_reference(tokenlist_entry_t *current) {
return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE, return parse_token(current, TOKEN_IDENTIFIER, NODE_LABEL_REFERENCE,
nullptr); nullptr);

View File

@ -18,6 +18,7 @@ parse_result_t parse_plus(tokenlist_entry_t *current);
parse_result_t parse_minus(tokenlist_entry_t *current); parse_result_t parse_minus(tokenlist_entry_t *current);
parse_result_t parse_asterisk(tokenlist_entry_t *current); parse_result_t parse_asterisk(tokenlist_entry_t *current);
parse_result_t parse_dot(tokenlist_entry_t *current); parse_result_t parse_dot(tokenlist_entry_t *current);
parse_result_t parse_newline(tokenlist_entry_t *current);
parse_result_t parse_label_reference(tokenlist_entry_t *current); parse_result_t parse_label_reference(tokenlist_entry_t *current);
/* These are "primitives" with a different name and some extra validation on top /* These are "primitives" with a different name and some extra validation on top

View File

@ -86,7 +86,6 @@ bool is_trivia(tokenlist_entry_t *trivia) {
switch (trivia->token.id) { switch (trivia->token.id) {
case TOKEN_WHITESPACE: case TOKEN_WHITESPACE:
case TOKEN_COMMENT: case TOKEN_COMMENT:
case TOKEN_NEWLINE:
return true; return true;
default: default:
return false; return false;