From d551ba3d12515b38d409d84f7f56ff64bc044301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Mon, 30 Dec 2024 22:36:35 +0200 Subject: [PATCH] a few bugfixes and a new TODO --- parser.c | 128 ++++++++++++++++++++++++++++++++----------------------- parser.h | 7 --- 2 files changed, 74 insertions(+), 61 deletions(-) diff --git a/parser.c b/parser.c index dcb2a9b..f2dd5cd 100644 --- a/parser.c +++ b/parser.c @@ -30,14 +30,35 @@ typedef struct { } payload; } SmallSpan; -void parseRoot(Parser* p) { - p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT; - p->nodes.main_tokens[p->nodes.len] = 0; - - // members root_members = parseContainerMembers(p); +static AstSubRange listToSpan(Parser* p, const AstNodeIndex* list, uint32_t count) { + SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); + memcpy(&p->extra_data.arr, list, count * sizeof(AstNodeIndex)); + p->extra_data.len += count; + return (AstSubRange) { + .start = p->extra_data.len - count, + .end = p->extra_data.len, + }; } -static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; } +typedef struct { + uint32_t len; + AstNodeIndex lhs; + AstNodeIndex rhs; + bool trailing; +} Members; + +static AstSubRange membersToSpan(const Members self, Parser* p) { + if (self.len <= 2) { + const AstNodeIndex nodes[] = { self.lhs, self.rhs }; + return listToSpan(p, nodes, self.len); + } else { + return (AstSubRange) { .start = self.lhs, .end = self.rhs }; + } +} + +static AstTokenIndex nextToken(Parser* p) { + return p->tok_i++; +} static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) { if (p->token_tags[p->tok_i] == tag) { @@ -103,15 +124,6 @@ static AstNodeIndex addExtra(Parser* p, const AstNodeIndex* extra, uint32_t coun return result; } -static AstNodeIndex parseTypeExpr(Parser* p); - -static AstNodeIndex expectTypeExpr(Parser* p) { - const AstNodeIndex node = parseTypeExpr(p); - if (node == 0) - exit(1); - return node; -} - static AstNodeIndex parseByteAlign(Parser* p) { bool ok; eatToken(p, TOKENIZER_TAG_KEYWORD_ALIGN, &ok); @@ -152,6 +164,8 @@ static AstNodeIndex parseCallconv(Parser* p) { return 0; // tcc } +static AstNodeIndex parseTypeExpr(Parser*); + typedef struct { AstNodeIndex align_expr, value_expr; } NodeContainerField; @@ -162,7 +176,7 @@ static AstNodeIndex expectContainerField(Parser* p) { if (p->token_tags[p->tok_i] == TOKENIZER_TAG_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) p->tok_i += 2; - const AstNodeIndex type_expr = expectTypeExpr(p); + const AstNodeIndex type_expr = parseTypeExpr(p); const AstNodeIndex align_expr = parseByteAlign(p); bool ok; eatToken(p, TOKENIZER_TAG_EQUAL, &ok); @@ -291,6 +305,9 @@ static AstNodeIndex parseSuffixExpr(Parser* p) { fprintf(stderr, "parseSuffixExpr does not support expr with parens\n"); exit(1); } + // TODO more work + // const bool comma = p->token_tags[p->tok_i - 2] == TOKENIZER_TAG_COMMA; + return res; } } @@ -322,8 +339,9 @@ static AstNodeIndex parseErrorUnionExpr(Parser* p) { .main_token = bang, .data = { .lhs = suffix_expr, - .rhs = expectTypeExpr(p), - } }); + .rhs = parseTypeExpr(p), + }, + }); } static AstNodeIndex parseTypeExpr(Parser* p) { @@ -536,7 +554,8 @@ static AstNodeIndex parseBlock(Parser* p) { expectToken(p, TOKENIZER_TAG_R_BRACE, NULL); const bool semicolon = (p->token_tags[p->tok_i] - 2 == TOKENIZER_TAG_SEMICOLON); - switch (p->scratch.len - scratch_top.old_len) { + const uint32_t statements_len = p->scratch.len - scratch_top.old_len; + switch (statements_len) { case 0: return addNode( &p->nodes, @@ -571,21 +590,15 @@ static AstNodeIndex parseBlock(Parser* p) { }, }); default:; - const uint32_t extra = p->scratch.len - scratch_top.old_len; - SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->scratch, extra); - memcpy( - &p->extra_data.arr[p->extra_data.len], - &p->scratch.arr[scratch_top.old_len], - sizeof(AstNodeIndex) * extra); - p->extra_data.len += extra; + const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], statements_len); return addNode( &p->nodes, (AstNodeItem) { .tag = semicolon ? AST_NODE_TAG_BLOCK_SEMICOLON : AST_NODE_TAG_BLOCK, .main_token = lbrace, .data = { - .lhs = p->scratch.arr[scratch_top.old_len], - .rhs = p->scratch.arr[p->scratch.len], + .lhs = span.start, + .rhs = span.end, }, }); } @@ -748,30 +761,29 @@ void findNextContainerMember(Parser* p) { } static Members parseContainerMembers(Parser* p) { - const uint32_t scratch_top = p->scratch.len; - // ast_token_index last_field; + CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = { + .scratch = &p->scratch, + .old_len = p->scratch.len, + }; bool ok; - while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok) + while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok), ok) ; FieldState field_state = { .tag = FIELD_STATE_NONE }; bool trailing = false; - AstNodeIndex top_level_decl; while (1) { eatDocComments(p); - switch (p->token_tags[p->tok_i]) { - case TOKENIZER_TAG_KEYWORD_TEST: case TOKENIZER_TAG_KEYWORD_COMPTIME: case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE:; const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]); fprintf(stderr, "%s not implemented in parseContainerMembers\n", str); exit(1); - case TOKENIZER_TAG_KEYWORD_PUB: + case TOKENIZER_TAG_KEYWORD_PUB: { p->tok_i++; - top_level_decl = expectTopLevelDecl(p); + AstNodeIndex top_level_decl = expectTopLevelDecl(p); if (top_level_decl != 0) { if (field_state.tag == FIELD_STATE_SEEN) { field_state.tag = FIELD_STATE_END; @@ -779,9 +791,9 @@ static Members parseContainerMembers(Parser* p) { } SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl); } - trailing = (p->token_tags[p->tok_i - 1] == TOKENIZER_TAG_SEMICOLON); + trailing = p->token_tags[p->tok_i - 1] == TOKENIZER_TAG_SEMICOLON; break; - + } case TOKENIZER_TAG_KEYWORD_CONST: case TOKENIZER_TAG_KEYWORD_VAR: case TOKENIZER_TAG_KEYWORD_THREADLOCAL: @@ -789,8 +801,8 @@ static Members parseContainerMembers(Parser* p) { case TOKENIZER_TAG_KEYWORD_EXTERN: case TOKENIZER_TAG_KEYWORD_INLINE: case TOKENIZER_TAG_KEYWORD_NOINLINE: - case TOKENIZER_TAG_KEYWORD_FN:; - top_level_decl = expectTopLevelDecl(p); + case TOKENIZER_TAG_KEYWORD_FN: { + const AstNodeIndex top_level_decl = expectTopLevelDecl(p); if (top_level_decl != 0) { if (field_state.tag == FIELD_STATE_SEEN) { field_state.tag = FIELD_STATE_END; @@ -800,6 +812,7 @@ static Members parseContainerMembers(Parser* p) { } trailing = (p->token_tags[p->tok_i - 1] == TOKENIZER_TAG_SEMICOLON); break; + } case TOKENIZER_TAG_EOF: case TOKENIZER_TAG_R_BRACE: goto break_loop; @@ -826,8 +839,7 @@ static Members parseContainerMembers(Parser* p) { case TOKENIZER_TAG_EOF: trailing = false; goto break_loop; - default: - continue; + default:; } findNextContainerMember(p); @@ -837,11 +849,8 @@ static Members parseContainerMembers(Parser* p) { break_loop:; - const uint32_t scratch_len = p->scratch.len; - p->scratch.len = scratch_top; - - const uint32_t n_items = scratch_len - scratch_top; - switch (n_items) { + const uint32_t items_len = p->scratch.len - scratch_top.old_len; + switch (items_len) { case 0: return (Members) { .len = 0, @@ -852,23 +861,34 @@ break_loop:; case 1: return (Members) { .len = 1, - .lhs = p->scratch.arr[scratch_top], + .lhs = p->scratch.arr[scratch_top.old_len], .rhs = 0, .trailing = trailing, }; case 2: return (Members) { .len = 2, - .lhs = p->scratch.arr[scratch_top], - .rhs = p->scratch.arr[scratch_top + 1], + .lhs = p->scratch.arr[scratch_top.old_len], + .rhs = p->scratch.arr[scratch_top.old_len + 1], .trailing = trailing, }; - default: + default:; + const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], items_len); return (Members) { - .len = n_items, - .lhs = p->scratch.arr[scratch_top], - .rhs = p->scratch.arr[scratch_len], + .len = items_len, + .lhs = span.start, + .rhs = span.end, .trailing = trailing, }; } } + +void parseRoot(Parser* p) { + addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_TAG_ROOT, .main_token = 0 }); + + Members root_members = parseContainerMembers(p); + AstSubRange root_decls = membersToSpan(root_members, p); + + p->nodes.datas[0].lhs = root_decls.start; + p->nodes.datas[0].rhs = root_decls.end; +} diff --git a/parser.h b/parser.h index ca6990f..922d52e 100644 --- a/parser.h +++ b/parser.h @@ -7,13 +7,6 @@ #include #include -typedef struct { - uint32_t len; - AstNodeIndex lhs; - AstNodeIndex rhs; - bool trailing; -} Members; - typedef struct { const char* source; uint32_t source_len;