parser: add tuple struct test and container decl support

Port "zig fmt: tuple struct" test from upstream parser_test.zig.

Implement in parser.c:
- parseContainerDeclAuto: struct/enum/union/opaque declarations
- parseGlobalVarDecl: const/var with initializer
- parseByteAlign: align(expr) parsing
- parseTypeExpr: pointer types (*T), optional types (?T)
- parsePrimaryTypeExpr: number_literal, char_literal,
  unreachable_literal, fn proto, grouped expressions,
  container decl, comptime prefix
- expectContainerField: default values (= expr)
- parseContainerMembers: comptime block/field handling
- Fix parseFnProto: use null_token instead of null_node

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-02-10 12:14:24 +00:00
parent 745b38fde2
commit 1bb921b8ca
2 changed files with 267 additions and 26 deletions

279
parser.c
View File

@@ -11,6 +11,13 @@
const AstNodeIndex null_node = 0; const AstNodeIndex null_node = 0;
const AstTokenIndex null_token = ~(AstTokenIndex)(0); const AstTokenIndex null_token = ~(AstTokenIndex)(0);
typedef struct {
uint32_t len;
AstNodeIndex lhs;
AstNodeIndex rhs;
bool trailing;
} Members;
static AstNodeIndex parsePrefixExpr(Parser*); static AstNodeIndex parsePrefixExpr(Parser*);
static AstNodeIndex parseTypeExpr(Parser*); static AstNodeIndex parseTypeExpr(Parser*);
static AstNodeIndex parseBlock(Parser* p); static AstNodeIndex parseBlock(Parser* p);
@@ -18,6 +25,9 @@ static AstNodeIndex parseLabeledStatement(Parser*);
static AstNodeIndex parseExpr(Parser*); static AstNodeIndex parseExpr(Parser*);
static AstNodeIndex expectExpr(Parser*); static AstNodeIndex expectExpr(Parser*);
static AstNodeIndex expectSemicolon(Parser*); static AstNodeIndex expectSemicolon(Parser*);
static AstTokenIndex expectToken(Parser*, TokenizerTag);
static AstNodeIndex parseFnProto(Parser*);
static Members parseContainerMembers(Parser*);
typedef struct { typedef struct {
enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END } tag; enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END } tag;
@@ -60,13 +70,6 @@ static AstSubRange listToSpan(
}; };
} }
typedef struct {
uint32_t len;
AstNodeIndex lhs;
AstNodeIndex rhs;
bool trailing;
} Members;
static AstSubRange membersToSpan(const Members self, Parser* p) { static AstSubRange membersToSpan(const Members self, Parser* p) {
if (self.len <= 2) { if (self.len <= 2) {
const AstNodeIndex nodes[] = { self.lhs, self.rhs }; const AstNodeIndex nodes[] = { self.lhs, self.rhs };
@@ -140,9 +143,10 @@ static AstNodeIndex addExtra(
static AstNodeIndex parseByteAlign(Parser* p) { static AstNodeIndex parseByteAlign(Parser* p) {
if (eatToken(p, TOKEN_KEYWORD_ALIGN) == null_token) if (eatToken(p, TOKEN_KEYWORD_ALIGN) == null_token)
return null_node; return null_node;
fprintf(stderr, "parseByteAlign cannot parse alignment\n"); expectToken(p, TOKEN_L_PAREN);
exit(1); const AstNodeIndex expr = expectExpr(p);
return 0; // tcc expectToken(p, TOKEN_R_PAREN);
return expr;
} }
static AstNodeIndex parseAddrSpace(Parser* p) { static AstNodeIndex parseAddrSpace(Parser* p) {
@@ -182,11 +186,8 @@ static AstNodeIndex expectContainerField(Parser* p) {
const AstNodeIndex type_expr = parseTypeExpr(p); const AstNodeIndex type_expr = parseTypeExpr(p);
const AstNodeIndex align_expr = parseByteAlign(p); const AstNodeIndex align_expr = parseByteAlign(p);
if (eatToken(p, TOKEN_EQUAL) != null_token) { const AstNodeIndex value_expr
fprintf(stderr, "expectContainerField does not support expr\n"); = eatToken(p, TOKEN_EQUAL) != null_token ? expectExpr(p) : 0;
exit(1);
}
const AstNodeIndex value_expr = 0;
if (align_expr == 0) { if (align_expr == 0) {
return addNode( return addNode(
@@ -306,12 +307,145 @@ end_loop:;
} }
} }
static AstNodeIndex parseContainerDeclAuto(Parser* p) {
const AstTokenIndex main_token = nextToken(p);
AstNodeIndex arg_expr = null_node;
switch (p->token_tags[main_token]) {
case TOKEN_KEYWORD_OPAQUE:
break;
case TOKEN_KEYWORD_STRUCT:
case TOKEN_KEYWORD_ENUM:
if (eatToken(p, TOKEN_L_PAREN) != null_token) {
arg_expr = expectExpr(p);
expectToken(p, TOKEN_R_PAREN);
}
break;
case TOKEN_KEYWORD_UNION:
if (eatToken(p, TOKEN_L_PAREN) != null_token) {
if (eatToken(p, TOKEN_KEYWORD_ENUM) != null_token) {
if (eatToken(p, TOKEN_L_PAREN) != null_token) {
const AstNodeIndex enum_tag_expr = expectExpr(p);
expectToken(p, TOKEN_R_PAREN);
expectToken(p, TOKEN_R_PAREN);
expectToken(p, TOKEN_L_BRACE);
const Members members = parseContainerMembers(p);
const AstSubRange members_span = membersToSpan(members, p);
expectToken(p, TOKEN_R_BRACE);
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = members.trailing
? AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING
: AST_NODE_TAGGED_UNION_ENUM_TAG,
.main_token = main_token,
.data = {
.lhs = enum_tag_expr,
.rhs = addExtra(p,
(AstNodeIndex[]) {
members_span.start,
members_span.end },
2),
},
});
}
expectToken(p, TOKEN_R_PAREN);
expectToken(p, TOKEN_L_BRACE);
const Members members = parseContainerMembers(p);
expectToken(p, TOKEN_R_BRACE);
if (members.len <= 2) {
return addNode(&p->nodes,
(AstNodeItem) {
.tag = members.trailing
? AST_NODE_TAGGED_UNION_TWO_TRAILING
: AST_NODE_TAGGED_UNION_TWO,
.main_token = main_token,
.data = { .lhs = members.lhs, .rhs = members.rhs },
});
}
const AstSubRange span = membersToSpan(members, p);
return addNode(&p->nodes,
(AstNodeItem) {
.tag = members.trailing
? AST_NODE_TAGGED_UNION_TRAILING
: AST_NODE_TAGGED_UNION,
.main_token = main_token,
.data = { .lhs = span.start, .rhs = span.end },
});
}
arg_expr = expectExpr(p);
expectToken(p, TOKEN_R_PAREN);
}
break;
default:
fprintf(stderr, "parseContainerDeclAuto: unexpected token\n");
exit(1);
}
expectToken(p, TOKEN_L_BRACE);
const Members members = parseContainerMembers(p);
expectToken(p, TOKEN_R_BRACE);
if (arg_expr == null_node) {
if (members.len <= 2) {
return addNode(&p->nodes,
(AstNodeItem) {
.tag = members.trailing
? AST_NODE_CONTAINER_DECL_TWO_TRAILING
: AST_NODE_CONTAINER_DECL_TWO,
.main_token = main_token,
.data = { .lhs = members.lhs, .rhs = members.rhs },
});
}
const AstSubRange span = membersToSpan(members, p);
return addNode(&p->nodes,
(AstNodeItem) {
.tag = members.trailing ? AST_NODE_CONTAINER_DECL_TRAILING
: AST_NODE_CONTAINER_DECL,
.main_token = main_token,
.data = { .lhs = span.start, .rhs = span.end },
});
}
const AstSubRange span = membersToSpan(members, p);
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = members.trailing
? AST_NODE_CONTAINER_DECL_ARG_TRAILING
: AST_NODE_CONTAINER_DECL_ARG,
.main_token = main_token,
.data = {
.lhs = arg_expr,
.rhs = addExtra(p,
(AstNodeIndex[]) { span.start, span.end }, 2),
},
});
}
static AstNodeIndex parsePrimaryTypeExpr(Parser* p) { static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
const TokenizerTag tok = p->token_tags[p->tok_i]; const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) { switch (tok) {
case TOKEN_CHAR_LITERAL: case TOKEN_CHAR_LITERAL:
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_CHAR_LITERAL,
.main_token = nextToken(p),
.data = {},
});
case TOKEN_NUMBER_LITERAL: case TOKEN_NUMBER_LITERAL:
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_NUMBER_LITERAL,
.main_token = nextToken(p),
.data = {},
});
case TOKEN_KEYWORD_UNREACHABLE: case TOKEN_KEYWORD_UNREACHABLE:
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_UNREACHABLE_LITERAL,
.main_token = nextToken(p),
.data = {},
});
case TOKEN_KEYWORD_ANYFRAME: case TOKEN_KEYWORD_ANYFRAME:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok)); tokenizerGetTagString(tok));
@@ -326,15 +460,29 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
case TOKEN_BUILTIN: case TOKEN_BUILTIN:
return parseBuiltinCall(p); return parseBuiltinCall(p);
case TOKEN_KEYWORD_FN: case TOKEN_KEYWORD_FN:
return parseFnProto(p);
case TOKEN_KEYWORD_IF: case TOKEN_KEYWORD_IF:
case TOKEN_KEYWORD_SWITCH: case TOKEN_KEYWORD_SWITCH:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok));
exit(1);
case TOKEN_KEYWORD_EXTERN: case TOKEN_KEYWORD_EXTERN:
case TOKEN_KEYWORD_PACKED: case TOKEN_KEYWORD_PACKED:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok));
exit(1);
case TOKEN_KEYWORD_STRUCT: case TOKEN_KEYWORD_STRUCT:
case TOKEN_KEYWORD_OPAQUE: case TOKEN_KEYWORD_OPAQUE:
case TOKEN_KEYWORD_ENUM: case TOKEN_KEYWORD_ENUM:
case TOKEN_KEYWORD_UNION: case TOKEN_KEYWORD_UNION:
return parseContainerDeclAuto(p);
case TOKEN_KEYWORD_COMPTIME: case TOKEN_KEYWORD_COMPTIME:
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_COMPTIME,
.main_token = nextToken(p),
.data = { .lhs = parseTypeExpr(p), .rhs = 0 },
});
case TOKEN_MULTILINE_STRING_LITERAL_LINE: case TOKEN_MULTILINE_STRING_LITERAL_LINE:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok)); tokenizerGetTagString(tok));
@@ -357,10 +505,20 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
case TOKEN_KEYWORD_WHILE: case TOKEN_KEYWORD_WHILE:
case TOKEN_PERIOD: case TOKEN_PERIOD:
case TOKEN_KEYWORD_ERROR: case TOKEN_KEYWORD_ERROR:
case TOKEN_L_PAREN:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok)); tokenizerGetTagString(tok));
exit(1); exit(1);
case TOKEN_L_PAREN: {
const AstTokenIndex lparen = nextToken(p);
const AstNodeIndex inner = expectExpr(p);
const AstTokenIndex rparen = expectToken(p, TOKEN_R_PAREN);
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_GROUPED_EXPRESSION,
.main_token = lparen,
.data = { .lhs = inner, .rhs = rparen },
});
}
default: default:
return null_node; return null_node;
} }
@@ -494,12 +652,57 @@ static AstNodeIndex parseErrorUnionExpr(Parser* p) {
} }
static AstNodeIndex parseTypeExpr(Parser* p) { static AstNodeIndex parseTypeExpr(Parser* p) {
const AstNodeIndex tok = p->token_tags[p->tok_i]; const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) { switch (tok) {
case TOKEN_QUESTION_MARK: case TOKEN_QUESTION_MARK:
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_OPTIONAL_TYPE,
.main_token = nextToken(p),
.data = { .lhs = parseTypeExpr(p), .rhs = 0 },
});
case TOKEN_KEYWORD_ANYFRAME: case TOKEN_KEYWORD_ANYFRAME:
case TOKEN_ASTERISK: fprintf(stderr, "parseTypeExpr not supported for %s\n",
tokenizerGetTagString(tok));
exit(1);
case TOKEN_ASTERISK: {
const AstTokenIndex asterisk = nextToken(p);
const AstNodeIndex align_expr = parseByteAlign(p);
const AstNodeIndex sentinel
= eatToken(p, TOKEN_COLON) != null_token ? parseExpr(p) : 0;
// skip const/volatile/allowzero modifiers
while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST
|| p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE
|| p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO)
p->tok_i++;
const AstNodeIndex child_type = parseTypeExpr(p);
if (sentinel != 0) {
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_PTR_TYPE_SENTINEL,
.main_token = asterisk,
.data = { .lhs = sentinel, .rhs = child_type },
});
}
if (align_expr != 0) {
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_PTR_TYPE_ALIGNED,
.main_token = asterisk,
.data = { .lhs = align_expr, .rhs = child_type },
});
}
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_PTR_TYPE_ALIGNED,
.main_token = asterisk,
.data = { .lhs = 0, .rhs = child_type },
});
}
case TOKEN_ASTERISK_ASTERISK: case TOKEN_ASTERISK_ASTERISK:
fprintf(stderr, "parseTypeExpr not supported for %s\n",
tokenizerGetTagString(tok));
exit(1);
case TOKEN_L_BRACKET: case TOKEN_L_BRACKET:
fprintf(stderr, "parseTypeExpr not supported for %s\n", fprintf(stderr, "parseTypeExpr not supported for %s\n",
tokenizerGetTagString(tok)); tokenizerGetTagString(tok));
@@ -507,6 +710,7 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
default: default:
return parseErrorUnionExpr(p); return parseErrorUnionExpr(p);
} }
return 0; // tcc
} }
static SmallSpan parseParamDeclList(Parser* p) { static SmallSpan parseParamDeclList(Parser* p) {
@@ -527,8 +731,8 @@ static uint32_t reserveNode(Parser* p, AstNodeTag tag) {
} }
static AstNodeIndex parseFnProto(Parser* p) { static AstNodeIndex parseFnProto(Parser* p) {
AstNodeIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN); AstTokenIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN);
if (fn_token == null_node) if (fn_token == null_token)
return null_node; return null_node;
AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_FN_PROTO); AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_FN_PROTO);
@@ -1110,9 +1314,13 @@ static AstNodeIndex parseGlobalVarDecl(Parser* p) {
if (var_decl == 0) { if (var_decl == 0) {
return null_node; return null_node;
} }
fprintf(stderr, "parseGlobalVarDecl does not support parsing var decls\n");
exit(1); if (eatToken(p, TOKEN_EQUAL) != null_token) {
return 0; // tcc const AstNodeIndex init_expr = expectExpr(p);
p->nodes.datas[var_decl].rhs = init_expr;
}
expectToken(p, TOKEN_SEMICOLON);
return var_decl;
} }
static AstNodeIndex expectTopLevelDecl(Parser* p) { static AstNodeIndex expectTopLevelDecl(Parser* p) {
@@ -1240,12 +1448,30 @@ static Members parseContainerMembers(Parser* p) {
eatDocComments(p); eatDocComments(p);
switch (p->token_tags[p->tok_i]) { switch (p->token_tags[p->tok_i]) {
case TOKEN_KEYWORD_TEST: case TOKEN_KEYWORD_TEST:
case TOKEN_KEYWORD_COMPTIME:
case TOKEN_KEYWORD_USINGNAMESPACE:; case TOKEN_KEYWORD_USINGNAMESPACE:;
const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]); const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]);
fprintf( fprintf(
stderr, "%s not implemented in parseContainerMembers\n", str); stderr, "%s not implemented in parseContainerMembers\n", str);
exit(1); exit(1);
case TOKEN_KEYWORD_COMPTIME:
// comptime can be a container field modifier or a comptime
// block/decl. Check if it's followed by a block (comptime { ...
// }).
if (p->token_tags[p->tok_i + 1] == TOKEN_L_BRACE) {
p->tok_i++;
const AstNodeIndex block_node = parseBlock(p);
SLICE_APPEND(AstNodeIndex, &p->scratch,
addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_COMPTIME,
.main_token = p->tok_i - 1,
.data = { .lhs = block_node, .rhs = 0 },
}));
trailing = p->token_tags[p->tok_i - 1] == TOKEN_R_BRACE;
break;
}
// Otherwise it's a container field with comptime modifier
goto container_field;
case TOKEN_KEYWORD_PUB: { case TOKEN_KEYWORD_PUB: {
p->tok_i++; p->tok_i++;
AstNodeIndex top_level_decl = expectTopLevelDecl(p); AstNodeIndex top_level_decl = expectTopLevelDecl(p);
@@ -1281,9 +1507,10 @@ static Members parseContainerMembers(Parser* p) {
case TOKEN_EOF: case TOKEN_EOF:
case TOKEN_R_BRACE: case TOKEN_R_BRACE:
goto break_loop; goto break_loop;
container_field:
default:; default:;
// skip parseCStyleContainer // skip parseCStyleContainer
const AstNodeIndex container_field = expectContainerField(p); const AstNodeIndex field_node = expectContainerField(p);
switch (field_state.tag) { switch (field_state.tag) {
case FIELD_STATE_NONE: case FIELD_STATE_NONE:
field_state.tag = FIELD_STATE_SEEN; field_state.tag = FIELD_STATE_SEEN;
@@ -1294,7 +1521,7 @@ static Members parseContainerMembers(Parser* p) {
fprintf(stderr, "parseContainerMembers error condition\n"); fprintf(stderr, "parseContainerMembers error condition\n");
exit(1); exit(1);
} }
SLICE_APPEND(AstNodeIndex, &p->scratch, container_field); SLICE_APPEND(AstNodeIndex, &p->scratch, field_node);
switch (p->token_tags[p->tok_i]) { switch (p->token_tags[p->tok_i]) {
case TOKEN_COMMA: case TOKEN_COMMA:
p->tok_i++; p->tok_i++;

View File

@@ -560,3 +560,17 @@ test "my function" {
\\ \\
); );
} }
test "zig fmt: tuple struct" {
try testCanonical(
\\const T = struct {
\\ /// doc comment on tuple field
\\ comptime comptime u32,
\\ /// another doc comment on tuple field
\\ *u32 = 1,
\\ // needs to be wrapped in parentheses to not be parsed as a function decl
\\ (fn () void) align(1),
\\};
\\
);
}