From 1bb921b8cacbdf3a5d6c5104081c0e554a299938 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Tue, 10 Feb 2026 12:14:24 +0000 Subject: [PATCH] parser: add tuple struct test and container decl support Port "zig fmt: tuple struct" test from upstream parser_test.zig. Implement in parser.c: - parseContainerDeclAuto: struct/enum/union/opaque declarations - parseGlobalVarDecl: const/var with initializer - parseByteAlign: align(expr) parsing - parseTypeExpr: pointer types (*T), optional types (?T) - parsePrimaryTypeExpr: number_literal, char_literal, unreachable_literal, fn proto, grouped expressions, container decl, comptime prefix - expectContainerField: default values (= expr) - parseContainerMembers: comptime block/field handling - Fix parseFnProto: use null_token instead of null_node Co-Authored-By: Claude Opus 4.6 (1M context) --- parser.c | 279 +++++++++++++++++++++++++++++++++++++++++++----- parser_test.zig | 14 +++ 2 files changed, 267 insertions(+), 26 deletions(-) diff --git a/parser.c b/parser.c index f0c6a50c1b..f3d24657f2 100644 --- a/parser.c +++ b/parser.c @@ -11,6 +11,13 @@ const AstNodeIndex null_node = 0; const AstTokenIndex null_token = ~(AstTokenIndex)(0); +typedef struct { + uint32_t len; + AstNodeIndex lhs; + AstNodeIndex rhs; + bool trailing; +} Members; + static AstNodeIndex parsePrefixExpr(Parser*); static AstNodeIndex parseTypeExpr(Parser*); static AstNodeIndex parseBlock(Parser* p); @@ -18,6 +25,9 @@ static AstNodeIndex parseLabeledStatement(Parser*); static AstNodeIndex parseExpr(Parser*); static AstNodeIndex expectExpr(Parser*); static AstNodeIndex expectSemicolon(Parser*); +static AstTokenIndex expectToken(Parser*, TokenizerTag); +static AstNodeIndex parseFnProto(Parser*); +static Members parseContainerMembers(Parser*); typedef struct { enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END } tag; @@ -60,13 +70,6 @@ static AstSubRange listToSpan( }; } -typedef struct { - uint32_t len; - AstNodeIndex lhs; - AstNodeIndex rhs; - bool trailing; -} Members; - static AstSubRange membersToSpan(const Members self, Parser* p) { if (self.len <= 2) { const AstNodeIndex nodes[] = { self.lhs, self.rhs }; @@ -140,9 +143,10 @@ static AstNodeIndex addExtra( static AstNodeIndex parseByteAlign(Parser* p) { if (eatToken(p, TOKEN_KEYWORD_ALIGN) == null_token) return null_node; - fprintf(stderr, "parseByteAlign cannot parse alignment\n"); - exit(1); - return 0; // tcc + expectToken(p, TOKEN_L_PAREN); + const AstNodeIndex expr = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + return expr; } static AstNodeIndex parseAddrSpace(Parser* p) { @@ -182,11 +186,8 @@ static AstNodeIndex expectContainerField(Parser* p) { const AstNodeIndex type_expr = parseTypeExpr(p); const AstNodeIndex align_expr = parseByteAlign(p); - if (eatToken(p, TOKEN_EQUAL) != null_token) { - fprintf(stderr, "expectContainerField does not support expr\n"); - exit(1); - } - const AstNodeIndex value_expr = 0; + const AstNodeIndex value_expr + = eatToken(p, TOKEN_EQUAL) != null_token ? expectExpr(p) : 0; if (align_expr == 0) { return addNode( @@ -306,12 +307,145 @@ end_loop:; } } +static AstNodeIndex parseContainerDeclAuto(Parser* p) { + const AstTokenIndex main_token = nextToken(p); + AstNodeIndex arg_expr = null_node; + switch (p->token_tags[main_token]) { + case TOKEN_KEYWORD_OPAQUE: + break; + case TOKEN_KEYWORD_STRUCT: + case TOKEN_KEYWORD_ENUM: + if (eatToken(p, TOKEN_L_PAREN) != null_token) { + arg_expr = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + } + break; + case TOKEN_KEYWORD_UNION: + if (eatToken(p, TOKEN_L_PAREN) != null_token) { + if (eatToken(p, TOKEN_KEYWORD_ENUM) != null_token) { + if (eatToken(p, TOKEN_L_PAREN) != null_token) { + const AstNodeIndex enum_tag_expr = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + expectToken(p, TOKEN_R_PAREN); + expectToken(p, TOKEN_L_BRACE); + const Members members = parseContainerMembers(p); + const AstSubRange members_span = membersToSpan(members, p); + expectToken(p, TOKEN_R_BRACE); + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = members.trailing + ? AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING + : AST_NODE_TAGGED_UNION_ENUM_TAG, + .main_token = main_token, + .data = { + .lhs = enum_tag_expr, + .rhs = addExtra(p, + (AstNodeIndex[]) { + members_span.start, + members_span.end }, + 2), + }, + }); + } + expectToken(p, TOKEN_R_PAREN); + expectToken(p, TOKEN_L_BRACE); + const Members members = parseContainerMembers(p); + expectToken(p, TOKEN_R_BRACE); + if (members.len <= 2) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = members.trailing + ? AST_NODE_TAGGED_UNION_TWO_TRAILING + : AST_NODE_TAGGED_UNION_TWO, + .main_token = main_token, + .data = { .lhs = members.lhs, .rhs = members.rhs }, + }); + } + const AstSubRange span = membersToSpan(members, p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = members.trailing + ? AST_NODE_TAGGED_UNION_TRAILING + : AST_NODE_TAGGED_UNION, + .main_token = main_token, + .data = { .lhs = span.start, .rhs = span.end }, + }); + } + arg_expr = expectExpr(p); + expectToken(p, TOKEN_R_PAREN); + } + break; + default: + fprintf(stderr, "parseContainerDeclAuto: unexpected token\n"); + exit(1); + } + + expectToken(p, TOKEN_L_BRACE); + const Members members = parseContainerMembers(p); + expectToken(p, TOKEN_R_BRACE); + + if (arg_expr == null_node) { + if (members.len <= 2) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = members.trailing + ? AST_NODE_CONTAINER_DECL_TWO_TRAILING + : AST_NODE_CONTAINER_DECL_TWO, + .main_token = main_token, + .data = { .lhs = members.lhs, .rhs = members.rhs }, + }); + } + const AstSubRange span = membersToSpan(members, p); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = members.trailing ? AST_NODE_CONTAINER_DECL_TRAILING + : AST_NODE_CONTAINER_DECL, + .main_token = main_token, + .data = { .lhs = span.start, .rhs = span.end }, + }); + } + + const AstSubRange span = membersToSpan(members, p); + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = members.trailing + ? AST_NODE_CONTAINER_DECL_ARG_TRAILING + : AST_NODE_CONTAINER_DECL_ARG, + .main_token = main_token, + .data = { + .lhs = arg_expr, + .rhs = addExtra(p, + (AstNodeIndex[]) { span.start, span.end }, 2), + }, + }); +} + static AstNodeIndex parsePrimaryTypeExpr(Parser* p) { const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { case TOKEN_CHAR_LITERAL: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_CHAR_LITERAL, + .main_token = nextToken(p), + .data = {}, + }); case TOKEN_NUMBER_LITERAL: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_NUMBER_LITERAL, + .main_token = nextToken(p), + .data = {}, + }); case TOKEN_KEYWORD_UNREACHABLE: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_UNREACHABLE_LITERAL, + .main_token = nextToken(p), + .data = {}, + }); case TOKEN_KEYWORD_ANYFRAME: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); @@ -326,15 +460,29 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) { case TOKEN_BUILTIN: return parseBuiltinCall(p); case TOKEN_KEYWORD_FN: + return parseFnProto(p); case TOKEN_KEYWORD_IF: case TOKEN_KEYWORD_SWITCH: + fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", + tokenizerGetTagString(tok)); + exit(1); case TOKEN_KEYWORD_EXTERN: case TOKEN_KEYWORD_PACKED: + fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", + tokenizerGetTagString(tok)); + exit(1); case TOKEN_KEYWORD_STRUCT: case TOKEN_KEYWORD_OPAQUE: case TOKEN_KEYWORD_ENUM: case TOKEN_KEYWORD_UNION: + return parseContainerDeclAuto(p); case TOKEN_KEYWORD_COMPTIME: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_COMPTIME, + .main_token = nextToken(p), + .data = { .lhs = parseTypeExpr(p), .rhs = 0 }, + }); case TOKEN_MULTILINE_STRING_LITERAL_LINE: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); @@ -357,10 +505,20 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) { case TOKEN_KEYWORD_WHILE: case TOKEN_PERIOD: case TOKEN_KEYWORD_ERROR: - case TOKEN_L_PAREN: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); + case TOKEN_L_PAREN: { + const AstTokenIndex lparen = nextToken(p); + const AstNodeIndex inner = expectExpr(p); + const AstTokenIndex rparen = expectToken(p, TOKEN_R_PAREN); + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_GROUPED_EXPRESSION, + .main_token = lparen, + .data = { .lhs = inner, .rhs = rparen }, + }); + } default: return null_node; } @@ -494,12 +652,57 @@ static AstNodeIndex parseErrorUnionExpr(Parser* p) { } static AstNodeIndex parseTypeExpr(Parser* p) { - const AstNodeIndex tok = p->token_tags[p->tok_i]; + const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { case TOKEN_QUESTION_MARK: + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_OPTIONAL_TYPE, + .main_token = nextToken(p), + .data = { .lhs = parseTypeExpr(p), .rhs = 0 }, + }); case TOKEN_KEYWORD_ANYFRAME: - case TOKEN_ASTERISK: + fprintf(stderr, "parseTypeExpr not supported for %s\n", + tokenizerGetTagString(tok)); + exit(1); + case TOKEN_ASTERISK: { + const AstTokenIndex asterisk = nextToken(p); + const AstNodeIndex align_expr = parseByteAlign(p); + const AstNodeIndex sentinel + = eatToken(p, TOKEN_COLON) != null_token ? parseExpr(p) : 0; + // skip const/volatile/allowzero modifiers + while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST + || p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE + || p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO) + p->tok_i++; + const AstNodeIndex child_type = parseTypeExpr(p); + if (sentinel != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE_SENTINEL, + .main_token = asterisk, + .data = { .lhs = sentinel, .rhs = child_type }, + }); + } + if (align_expr != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE_ALIGNED, + .main_token = asterisk, + .data = { .lhs = align_expr, .rhs = child_type }, + }); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE_ALIGNED, + .main_token = asterisk, + .data = { .lhs = 0, .rhs = child_type }, + }); + } case TOKEN_ASTERISK_ASTERISK: + fprintf(stderr, "parseTypeExpr not supported for %s\n", + tokenizerGetTagString(tok)); + exit(1); case TOKEN_L_BRACKET: fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok)); @@ -507,6 +710,7 @@ static AstNodeIndex parseTypeExpr(Parser* p) { default: return parseErrorUnionExpr(p); } + return 0; // tcc } static SmallSpan parseParamDeclList(Parser* p) { @@ -527,8 +731,8 @@ static uint32_t reserveNode(Parser* p, AstNodeTag tag) { } static AstNodeIndex parseFnProto(Parser* p) { - AstNodeIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN); - if (fn_token == null_node) + AstTokenIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN); + if (fn_token == null_token) return null_node; AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_FN_PROTO); @@ -1110,9 +1314,13 @@ static AstNodeIndex parseGlobalVarDecl(Parser* p) { if (var_decl == 0) { return null_node; } - fprintf(stderr, "parseGlobalVarDecl does not support parsing var decls\n"); - exit(1); - return 0; // tcc + + if (eatToken(p, TOKEN_EQUAL) != null_token) { + const AstNodeIndex init_expr = expectExpr(p); + p->nodes.datas[var_decl].rhs = init_expr; + } + expectToken(p, TOKEN_SEMICOLON); + return var_decl; } static AstNodeIndex expectTopLevelDecl(Parser* p) { @@ -1240,12 +1448,30 @@ static Members parseContainerMembers(Parser* p) { eatDocComments(p); switch (p->token_tags[p->tok_i]) { case TOKEN_KEYWORD_TEST: - case TOKEN_KEYWORD_COMPTIME: case TOKEN_KEYWORD_USINGNAMESPACE:; const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]); fprintf( stderr, "%s not implemented in parseContainerMembers\n", str); exit(1); + case TOKEN_KEYWORD_COMPTIME: + // comptime can be a container field modifier or a comptime + // block/decl. Check if it's followed by a block (comptime { ... + // }). + if (p->token_tags[p->tok_i + 1] == TOKEN_L_BRACE) { + p->tok_i++; + const AstNodeIndex block_node = parseBlock(p); + SLICE_APPEND(AstNodeIndex, &p->scratch, + addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_COMPTIME, + .main_token = p->tok_i - 1, + .data = { .lhs = block_node, .rhs = 0 }, + })); + trailing = p->token_tags[p->tok_i - 1] == TOKEN_R_BRACE; + break; + } + // Otherwise it's a container field with comptime modifier + goto container_field; case TOKEN_KEYWORD_PUB: { p->tok_i++; AstNodeIndex top_level_decl = expectTopLevelDecl(p); @@ -1281,9 +1507,10 @@ static Members parseContainerMembers(Parser* p) { case TOKEN_EOF: case TOKEN_R_BRACE: goto break_loop; + container_field: default:; // skip parseCStyleContainer - const AstNodeIndex container_field = expectContainerField(p); + const AstNodeIndex field_node = expectContainerField(p); switch (field_state.tag) { case FIELD_STATE_NONE: field_state.tag = FIELD_STATE_SEEN; @@ -1294,7 +1521,7 @@ static Members parseContainerMembers(Parser* p) { fprintf(stderr, "parseContainerMembers error condition\n"); exit(1); } - SLICE_APPEND(AstNodeIndex, &p->scratch, container_field); + SLICE_APPEND(AstNodeIndex, &p->scratch, field_node); switch (p->token_tags[p->tok_i]) { case TOKEN_COMMA: p->tok_i++; diff --git a/parser_test.zig b/parser_test.zig index e1037ebb76..e588ab1ff2 100644 --- a/parser_test.zig +++ b/parser_test.zig @@ -560,3 +560,17 @@ test "my function" { \\ ); } + +test "zig fmt: tuple struct" { + try testCanonical( + \\const T = struct { + \\ /// doc comment on tuple field + \\ comptime comptime u32, + \\ /// another doc comment on tuple field + \\ *u32 = 1, + \\ // needs to be wrapped in parentheses to not be parsed as a function decl + \\ (fn () void) align(1), + \\}; + \\ + ); +}