commit dd82fbcda9b44fb1e4323ce4671822ca852b9e83 (tree)
parent 18b36112ae4b8a0d53e8c054da1dd100f36f0112
Author: Motiejus Jakštys <motiejus.jakstys@chronosphere.io>
Date: Tue, 10 Feb 2026 12:14:24 +0000
parser: add tuple struct test and container decl support
Port "zig fmt: tuple struct" test from upstream parser_test.zig.
Implement in parser.c:
- parseContainerDeclAuto: struct/enum/union/opaque declarations
- parseGlobalVarDecl: const/var with initializer
- parseByteAlign: align(expr) parsing
- parseTypeExpr: pointer types (*T), optional types (?T)
- parsePrimaryTypeExpr: number_literal, char_literal,
unreachable_literal, fn proto, grouped expressions,
container decl, comptime prefix
- expectContainerField: default values (= expr)
- parseContainerMembers: comptime block/field handling
- Fix parseFnProto: use null_token instead of null_node
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
| M | parser.c | | | 279 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- |
| M | parser_test.zig | | | 14 | ++++++++++++++ |
2 files changed, 267 insertions(+), 26 deletions(-)
diff --git a/parser.c b/parser.c
@@ -11,6 +11,13 @@
const AstNodeIndex null_node = 0;
const AstTokenIndex null_token = ~(AstTokenIndex)(0);
+typedef struct {
+ uint32_t len;
+ AstNodeIndex lhs;
+ AstNodeIndex rhs;
+ bool trailing;
+} Members;
+
static AstNodeIndex parsePrefixExpr(Parser*);
static AstNodeIndex parseTypeExpr(Parser*);
static AstNodeIndex parseBlock(Parser* p);
@@ -18,6 +25,9 @@ static AstNodeIndex parseLabeledStatement(Parser*);
static AstNodeIndex parseExpr(Parser*);
static AstNodeIndex expectExpr(Parser*);
static AstNodeIndex expectSemicolon(Parser*);
+static AstTokenIndex expectToken(Parser*, TokenizerTag);
+static AstNodeIndex parseFnProto(Parser*);
+static Members parseContainerMembers(Parser*);
typedef struct {
enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END } tag;
@@ -60,13 +70,6 @@ static AstSubRange listToSpan(
};
}
-typedef struct {
- uint32_t len;
- AstNodeIndex lhs;
- AstNodeIndex rhs;
- bool trailing;
-} Members;
-
static AstSubRange membersToSpan(const Members self, Parser* p) {
if (self.len <= 2) {
const AstNodeIndex nodes[] = { self.lhs, self.rhs };
@@ -140,9 +143,10 @@ static AstNodeIndex addExtra(
static AstNodeIndex parseByteAlign(Parser* p) {
if (eatToken(p, TOKEN_KEYWORD_ALIGN) == null_token)
return null_node;
- fprintf(stderr, "parseByteAlign cannot parse alignment\n");
- exit(1);
- return 0; // tcc
+ expectToken(p, TOKEN_L_PAREN);
+ const AstNodeIndex expr = expectExpr(p);
+ expectToken(p, TOKEN_R_PAREN);
+ return expr;
}
static AstNodeIndex parseAddrSpace(Parser* p) {
@@ -182,11 +186,8 @@ static AstNodeIndex expectContainerField(Parser* p) {
const AstNodeIndex type_expr = parseTypeExpr(p);
const AstNodeIndex align_expr = parseByteAlign(p);
- if (eatToken(p, TOKEN_EQUAL) != null_token) {
- fprintf(stderr, "expectContainerField does not support expr\n");
- exit(1);
- }
- const AstNodeIndex value_expr = 0;
+ const AstNodeIndex value_expr
+ = eatToken(p, TOKEN_EQUAL) != null_token ? expectExpr(p) : 0;
if (align_expr == 0) {
return addNode(
@@ -306,12 +307,145 @@ end_loop:;
}
}
+static AstNodeIndex parseContainerDeclAuto(Parser* p) {
+ const AstTokenIndex main_token = nextToken(p);
+ AstNodeIndex arg_expr = null_node;
+ switch (p->token_tags[main_token]) {
+ case TOKEN_KEYWORD_OPAQUE:
+ break;
+ case TOKEN_KEYWORD_STRUCT:
+ case TOKEN_KEYWORD_ENUM:
+ if (eatToken(p, TOKEN_L_PAREN) != null_token) {
+ arg_expr = expectExpr(p);
+ expectToken(p, TOKEN_R_PAREN);
+ }
+ break;
+ case TOKEN_KEYWORD_UNION:
+ if (eatToken(p, TOKEN_L_PAREN) != null_token) {
+ if (eatToken(p, TOKEN_KEYWORD_ENUM) != null_token) {
+ if (eatToken(p, TOKEN_L_PAREN) != null_token) {
+ const AstNodeIndex enum_tag_expr = expectExpr(p);
+ expectToken(p, TOKEN_R_PAREN);
+ expectToken(p, TOKEN_R_PAREN);
+ expectToken(p, TOKEN_L_BRACE);
+ const Members members = parseContainerMembers(p);
+ const AstSubRange members_span = membersToSpan(members, p);
+ expectToken(p, TOKEN_R_BRACE);
+ return addNode(
+ &p->nodes,
+ (AstNodeItem) {
+ .tag = members.trailing
+ ? AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING
+ : AST_NODE_TAGGED_UNION_ENUM_TAG,
+ .main_token = main_token,
+ .data = {
+ .lhs = enum_tag_expr,
+ .rhs = addExtra(p,
+ (AstNodeIndex[]) {
+ members_span.start,
+ members_span.end },
+ 2),
+ },
+ });
+ }
+ expectToken(p, TOKEN_R_PAREN);
+ expectToken(p, TOKEN_L_BRACE);
+ const Members members = parseContainerMembers(p);
+ expectToken(p, TOKEN_R_BRACE);
+ if (members.len <= 2) {
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = members.trailing
+ ? AST_NODE_TAGGED_UNION_TWO_TRAILING
+ : AST_NODE_TAGGED_UNION_TWO,
+ .main_token = main_token,
+ .data = { .lhs = members.lhs, .rhs = members.rhs },
+ });
+ }
+ const AstSubRange span = membersToSpan(members, p);
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = members.trailing
+ ? AST_NODE_TAGGED_UNION_TRAILING
+ : AST_NODE_TAGGED_UNION,
+ .main_token = main_token,
+ .data = { .lhs = span.start, .rhs = span.end },
+ });
+ }
+ arg_expr = expectExpr(p);
+ expectToken(p, TOKEN_R_PAREN);
+ }
+ break;
+ default:
+ fprintf(stderr, "parseContainerDeclAuto: unexpected token\n");
+ exit(1);
+ }
+
+ expectToken(p, TOKEN_L_BRACE);
+ const Members members = parseContainerMembers(p);
+ expectToken(p, TOKEN_R_BRACE);
+
+ if (arg_expr == null_node) {
+ if (members.len <= 2) {
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = members.trailing
+ ? AST_NODE_CONTAINER_DECL_TWO_TRAILING
+ : AST_NODE_CONTAINER_DECL_TWO,
+ .main_token = main_token,
+ .data = { .lhs = members.lhs, .rhs = members.rhs },
+ });
+ }
+ const AstSubRange span = membersToSpan(members, p);
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = members.trailing ? AST_NODE_CONTAINER_DECL_TRAILING
+ : AST_NODE_CONTAINER_DECL,
+ .main_token = main_token,
+ .data = { .lhs = span.start, .rhs = span.end },
+ });
+ }
+
+ const AstSubRange span = membersToSpan(members, p);
+ return addNode(
+ &p->nodes,
+ (AstNodeItem) {
+ .tag = members.trailing
+ ? AST_NODE_CONTAINER_DECL_ARG_TRAILING
+ : AST_NODE_CONTAINER_DECL_ARG,
+ .main_token = main_token,
+ .data = {
+ .lhs = arg_expr,
+ .rhs = addExtra(p,
+ (AstNodeIndex[]) { span.start, span.end }, 2),
+ },
+ });
+}
+
static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKEN_CHAR_LITERAL:
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_CHAR_LITERAL,
+ .main_token = nextToken(p),
+ .data = {},
+ });
case TOKEN_NUMBER_LITERAL:
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_NUMBER_LITERAL,
+ .main_token = nextToken(p),
+ .data = {},
+ });
case TOKEN_KEYWORD_UNREACHABLE:
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_UNREACHABLE_LITERAL,
+ .main_token = nextToken(p),
+ .data = {},
+ });
case TOKEN_KEYWORD_ANYFRAME:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok));
@@ -326,15 +460,29 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
case TOKEN_BUILTIN:
return parseBuiltinCall(p);
case TOKEN_KEYWORD_FN:
+ return parseFnProto(p);
case TOKEN_KEYWORD_IF:
case TOKEN_KEYWORD_SWITCH:
+ fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
+ tokenizerGetTagString(tok));
+ exit(1);
case TOKEN_KEYWORD_EXTERN:
case TOKEN_KEYWORD_PACKED:
+ fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
+ tokenizerGetTagString(tok));
+ exit(1);
case TOKEN_KEYWORD_STRUCT:
case TOKEN_KEYWORD_OPAQUE:
case TOKEN_KEYWORD_ENUM:
case TOKEN_KEYWORD_UNION:
+ return parseContainerDeclAuto(p);
case TOKEN_KEYWORD_COMPTIME:
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_COMPTIME,
+ .main_token = nextToken(p),
+ .data = { .lhs = parseTypeExpr(p), .rhs = 0 },
+ });
case TOKEN_MULTILINE_STRING_LITERAL_LINE:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok));
@@ -357,10 +505,20 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
case TOKEN_KEYWORD_WHILE:
case TOKEN_PERIOD:
case TOKEN_KEYWORD_ERROR:
- case TOKEN_L_PAREN:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok));
exit(1);
+ case TOKEN_L_PAREN: {
+ const AstTokenIndex lparen = nextToken(p);
+ const AstNodeIndex inner = expectExpr(p);
+ const AstTokenIndex rparen = expectToken(p, TOKEN_R_PAREN);
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_GROUPED_EXPRESSION,
+ .main_token = lparen,
+ .data = { .lhs = inner, .rhs = rparen },
+ });
+ }
default:
return null_node;
}
@@ -494,12 +652,57 @@ static AstNodeIndex parseErrorUnionExpr(Parser* p) {
}
static AstNodeIndex parseTypeExpr(Parser* p) {
- const AstNodeIndex tok = p->token_tags[p->tok_i];
+ const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKEN_QUESTION_MARK:
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_OPTIONAL_TYPE,
+ .main_token = nextToken(p),
+ .data = { .lhs = parseTypeExpr(p), .rhs = 0 },
+ });
case TOKEN_KEYWORD_ANYFRAME:
- case TOKEN_ASTERISK:
+ fprintf(stderr, "parseTypeExpr not supported for %s\n",
+ tokenizerGetTagString(tok));
+ exit(1);
+ case TOKEN_ASTERISK: {
+ const AstTokenIndex asterisk = nextToken(p);
+ const AstNodeIndex align_expr = parseByteAlign(p);
+ const AstNodeIndex sentinel
+ = eatToken(p, TOKEN_COLON) != null_token ? parseExpr(p) : 0;
+ // skip const/volatile/allowzero modifiers
+ while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST
+ || p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE
+ || p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO)
+ p->tok_i++;
+ const AstNodeIndex child_type = parseTypeExpr(p);
+ if (sentinel != 0) {
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_PTR_TYPE_SENTINEL,
+ .main_token = asterisk,
+ .data = { .lhs = sentinel, .rhs = child_type },
+ });
+ }
+ if (align_expr != 0) {
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_PTR_TYPE_ALIGNED,
+ .main_token = asterisk,
+ .data = { .lhs = align_expr, .rhs = child_type },
+ });
+ }
+ return addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_PTR_TYPE_ALIGNED,
+ .main_token = asterisk,
+ .data = { .lhs = 0, .rhs = child_type },
+ });
+ }
case TOKEN_ASTERISK_ASTERISK:
+ fprintf(stderr, "parseTypeExpr not supported for %s\n",
+ tokenizerGetTagString(tok));
+ exit(1);
case TOKEN_L_BRACKET:
fprintf(stderr, "parseTypeExpr not supported for %s\n",
tokenizerGetTagString(tok));
@@ -507,6 +710,7 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
default:
return parseErrorUnionExpr(p);
}
+ return 0; // tcc
}
static SmallSpan parseParamDeclList(Parser* p) {
@@ -527,8 +731,8 @@ static uint32_t reserveNode(Parser* p, AstNodeTag tag) {
}
static AstNodeIndex parseFnProto(Parser* p) {
- AstNodeIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN);
- if (fn_token == null_node)
+ AstTokenIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN);
+ if (fn_token == null_token)
return null_node;
AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_FN_PROTO);
@@ -1110,9 +1314,13 @@ static AstNodeIndex parseGlobalVarDecl(Parser* p) {
if (var_decl == 0) {
return null_node;
}
- fprintf(stderr, "parseGlobalVarDecl does not support parsing var decls\n");
- exit(1);
- return 0; // tcc
+
+ if (eatToken(p, TOKEN_EQUAL) != null_token) {
+ const AstNodeIndex init_expr = expectExpr(p);
+ p->nodes.datas[var_decl].rhs = init_expr;
+ }
+ expectToken(p, TOKEN_SEMICOLON);
+ return var_decl;
}
static AstNodeIndex expectTopLevelDecl(Parser* p) {
@@ -1240,12 +1448,30 @@ static Members parseContainerMembers(Parser* p) {
eatDocComments(p);
switch (p->token_tags[p->tok_i]) {
case TOKEN_KEYWORD_TEST:
- case TOKEN_KEYWORD_COMPTIME:
case TOKEN_KEYWORD_USINGNAMESPACE:;
const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]);
fprintf(
stderr, "%s not implemented in parseContainerMembers\n", str);
exit(1);
+ case TOKEN_KEYWORD_COMPTIME:
+ // comptime can be a container field modifier or a comptime
+ // block/decl. Check if it's followed by a block (comptime { ...
+ // }).
+ if (p->token_tags[p->tok_i + 1] == TOKEN_L_BRACE) {
+ p->tok_i++;
+ const AstNodeIndex block_node = parseBlock(p);
+ SLICE_APPEND(AstNodeIndex, &p->scratch,
+ addNode(&p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_COMPTIME,
+ .main_token = p->tok_i - 1,
+ .data = { .lhs = block_node, .rhs = 0 },
+ }));
+ trailing = p->token_tags[p->tok_i - 1] == TOKEN_R_BRACE;
+ break;
+ }
+ // Otherwise it's a container field with comptime modifier
+ goto container_field;
case TOKEN_KEYWORD_PUB: {
p->tok_i++;
AstNodeIndex top_level_decl = expectTopLevelDecl(p);
@@ -1281,9 +1507,10 @@ static Members parseContainerMembers(Parser* p) {
case TOKEN_EOF:
case TOKEN_R_BRACE:
goto break_loop;
+ container_field:
default:;
// skip parseCStyleContainer
- const AstNodeIndex container_field = expectContainerField(p);
+ const AstNodeIndex field_node = expectContainerField(p);
switch (field_state.tag) {
case FIELD_STATE_NONE:
field_state.tag = FIELD_STATE_SEEN;
@@ -1294,7 +1521,7 @@ static Members parseContainerMembers(Parser* p) {
fprintf(stderr, "parseContainerMembers error condition\n");
exit(1);
}
- SLICE_APPEND(AstNodeIndex, &p->scratch, container_field);
+ SLICE_APPEND(AstNodeIndex, &p->scratch, field_node);
switch (p->token_tags[p->tok_i]) {
case TOKEN_COMMA:
p->tok_i++;
diff --git a/parser_test.zig b/parser_test.zig
@@ -560,3 +560,17 @@ test "my function" {
\\
);
}
+
+test "zig fmt: tuple struct" {
+ try testCanonical(
+ \\const T = struct {
+ \\ /// doc comment on tuple field
+ \\ comptime comptime u32,
+ \\ /// another doc comment on tuple field
+ \\ *u32 = 1,
+ \\ // needs to be wrapped in parentheses to not be parsed as a function decl
+ \\ (fn () void) align(1),
+ \\};
+ \\
+ );
+}