From a4d9e12498c9325b679006138178d23d3606e7b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Tue, 10 Feb 2026 17:40:48 +0000 Subject: [PATCH] parser: port pointer modifier tests Port tests: - "pointer-to-one with modifiers" - "pointer-to-many with modifiers" - "sentinel pointer with modifiers" - "c pointer with modifiers" - "slice with modifiers" - "sentinel slice with modifiers" - "allowzero pointer" Implement in parser.c: - parsePtrModifiersAndType: shared pointer modifier parsing with align(expr:expr:expr) bit-range, addrspace, sentinel support - ptr_type, ptr_type_bit_range nodes with proper OptionalIndex encoding via global OPT() macro - Refactor * and [*] pointer type parsing to use shared code Co-Authored-By: Claude Opus 4.6 (1M context) --- parser.c | 231 +++++++++++++++++++++++++++++++++++------------- parser_test.zig | 59 +++++++++++++ 2 files changed, 227 insertions(+), 63 deletions(-) diff --git a/parser.c b/parser.c index 8dec9f51a7..fcf0504ce0 100644 --- a/parser.c +++ b/parser.c @@ -11,6 +11,9 @@ const AstNodeIndex null_node = 0; const AstTokenIndex null_token = ~(AstTokenIndex)(0); +// OPT encodes a node index as OptionalIndex: 0 → ~0 (none) +#define OPT(x) ((x) == 0 ? ~(AstNodeIndex)0 : (x)) + typedef struct { uint32_t len; AstNodeIndex lhs; @@ -790,6 +793,95 @@ static AstNodeIndex parseErrorUnionExpr(Parser* p) { }); } +// parsePtrModifiersAndType parses pointer modifiers (allowzero, align, +// addrspace, const, volatile, sentinel) and the child type for a pointer +// started at main_token. +static AstNodeIndex parsePtrModifiersAndType( + Parser* p, AstTokenIndex main_token) { + AstNodeIndex sentinel = 0; + AstNodeIndex align_expr = 0; + AstNodeIndex bit_range_start = 0; + AstNodeIndex bit_range_end = 0; + AstNodeIndex addrspace_expr = 0; + + // sentinel: *:0 + if (eatToken(p, TOKEN_COLON) != null_token) + sentinel = expectExpr(p); + + // allowzero, const, volatile (before align) + while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST + || p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE + || p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO) + p->tok_i++; + + // align(expr) or align(expr:expr:expr) + if (eatToken(p, TOKEN_KEYWORD_ALIGN) != null_token) { + expectToken(p, TOKEN_L_PAREN); + align_expr = expectExpr(p); + if (eatToken(p, TOKEN_COLON) != null_token) { + bit_range_start = expectExpr(p); + expectToken(p, TOKEN_COLON); + bit_range_end = expectExpr(p); + } + expectToken(p, TOKEN_R_PAREN); + } + + // addrspace + addrspace_expr = parseAddrSpace(p); + + // const, volatile, allowzero (after align/addrspace) + while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST + || p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE + || p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO) + p->tok_i++; + + const AstNodeIndex child_type = parseTypeExpr(p); + + if (bit_range_start != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE_BIT_RANGE, + .main_token = main_token, + .data = { + .lhs = addExtra(p, + (AstNodeIndex[]) { OPT(sentinel), align_expr, + OPT(addrspace_expr), bit_range_start, + bit_range_end }, + 5), + .rhs = child_type, + }, + }); + } + if (addrspace_expr != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE, + .main_token = main_token, + .data = { + .lhs = addExtra(p, + (AstNodeIndex[]) { OPT(sentinel), OPT(align_expr), + addrspace_expr }, + 3), + .rhs = child_type, + }, + }); + } + if (sentinel != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE_SENTINEL, + .main_token = main_token, + .data = { .lhs = sentinel, .rhs = child_type }, + }); + } + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE_ALIGNED, + .main_token = main_token, + .data = { .lhs = align_expr, .rhs = child_type }, + }); +} + static AstNodeIndex parseTypeExpr(Parser* p) { const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { @@ -806,39 +898,7 @@ static AstNodeIndex parseTypeExpr(Parser* p) { exit(1); case TOKEN_ASTERISK: { const AstTokenIndex asterisk = nextToken(p); - const AstNodeIndex align_expr = parseByteAlign(p); - const AstNodeIndex sentinel - = eatToken(p, TOKEN_COLON) != null_token ? parseExpr(p) : 0; - // const/volatile/allowzero are pointer modifiers consumed here. - // They are not stored in the AST node; the renderer re-derives - // them from token positions. - while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST - || p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE - || p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO) - p->tok_i++; - const AstNodeIndex child_type = parseTypeExpr(p); - if (sentinel != 0) { - return addNode(&p->nodes, - (AstNodeItem) { - .tag = AST_NODE_PTR_TYPE_SENTINEL, - .main_token = asterisk, - .data = { .lhs = sentinel, .rhs = child_type }, - }); - } - if (align_expr != 0) { - return addNode(&p->nodes, - (AstNodeItem) { - .tag = AST_NODE_PTR_TYPE_ALIGNED, - .main_token = asterisk, - .data = { .lhs = align_expr, .rhs = child_type }, - }); - } - return addNode(&p->nodes, - (AstNodeItem) { - .tag = AST_NODE_PTR_TYPE_ALIGNED, - .main_token = asterisk, - .data = { .lhs = 0, .rhs = child_type }, - }); + return parsePtrModifiersAndType(p, asterisk); } case TOKEN_ASTERISK_ASTERISK: { // ** is two nested pointer types sharing the same token @@ -879,32 +939,54 @@ static AstNodeIndex parseTypeExpr(Parser* p) { sentinel = expectExpr(p); } expectToken(p, TOKEN_R_BRACKET); - // const/volatile/allowzero pointer modifiers + // Reuse shared pointer modifier + type parsing + // If we captured a sentinel from [*:s], temporarily store it + // and let parsePtrModifiersAndType handle the rest. + // But parsePtrModifiersAndType expects sentinel after main + // token via `:`. Since we already consumed it, we need to + // handle this inline. + + // allowzero, const, volatile (before align) while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST || p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE || p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO) p->tok_i++; - const AstNodeIndex align_expr = parseByteAlign(p); - const AstNodeIndex addrspace_expr = parseAddrSpace(p); - // const/volatile/allowzero again (can appear before or after - // align) - while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST - || p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE - || p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO) - p->tok_i++; - const AstNodeIndex elem_type = parseTypeExpr(p); - if (sentinel != 0) { - if (addrspace_expr != 0) { - fprintf(stderr, - "parseTypeExpr: [*:s] with addrspace not " - "implemented\n"); - exit(1); + + AstNodeIndex align_expr = 0; + AstNodeIndex bit_range_start = 0; + AstNodeIndex bit_range_end = 0; + if (eatToken(p, TOKEN_KEYWORD_ALIGN) != null_token) { + expectToken(p, TOKEN_L_PAREN); + align_expr = expectExpr(p); + if (eatToken(p, TOKEN_COLON) != null_token) { + bit_range_start = expectExpr(p); + expectToken(p, TOKEN_COLON); + bit_range_end = expectExpr(p); } + expectToken(p, TOKEN_R_PAREN); + } + const AstNodeIndex addrspace_expr = parseAddrSpace(p); + + while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST + || p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE + || p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO) + p->tok_i++; + + const AstNodeIndex elem_type = parseTypeExpr(p); + + if (bit_range_start != 0) { return addNode(&p->nodes, (AstNodeItem) { - .tag = AST_NODE_PTR_TYPE_SENTINEL, + .tag = AST_NODE_PTR_TYPE_BIT_RANGE, .main_token = lbracket, - .data = { .lhs = sentinel, .rhs = elem_type }, + .data = { + .lhs = addExtra(p, + (AstNodeIndex[]) { OPT(sentinel), + align_expr, OPT(addrspace_expr), + bit_range_start, bit_range_end }, + 5), + .rhs = elem_type, + }, }); } if (addrspace_expr != 0) { @@ -914,13 +996,21 @@ static AstNodeIndex parseTypeExpr(Parser* p) { .main_token = lbracket, .data = { .lhs = addExtra(p, - (AstNodeIndex[]) { - 0, align_expr, addrspace_expr }, + (AstNodeIndex[]) { OPT(sentinel), + OPT(align_expr), addrspace_expr }, 3), .rhs = elem_type, }, }); } + if (sentinel != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE_SENTINEL, + .main_token = lbracket, + .data = { .lhs = sentinel, .rhs = elem_type }, + }); + } return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_PTR_TYPE_ALIGNED, @@ -933,16 +1023,34 @@ static AstNodeIndex parseTypeExpr(Parser* p) { = eatToken(p, TOKEN_COLON) != null_token ? expectExpr(p) : 0; expectToken(p, TOKEN_R_BRACKET); if (len_expr == 0) { - // Slice type: []T or [:s]T - // const/volatile/allowzero are pointer modifiers consumed here. - // They are not stored in the AST node; the renderer re-derives - // them from token positions. + // Slice type: []T or [:s]T — reuse shared modifier parsing + // allowzero, const, volatile + while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST + || p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE + || p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO) + p->tok_i++; + const AstNodeIndex align_expr = parseByteAlign(p); + const AstNodeIndex addrspace_expr = parseAddrSpace(p); while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST || p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE || p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO) p->tok_i++; const AstNodeIndex elem_type = parseTypeExpr(p); - if (sentinel != 0) { + if (addrspace_expr != 0) { + return addNode(&p->nodes, + (AstNodeItem) { + .tag = AST_NODE_PTR_TYPE, + .main_token = lbracket, + .data = { + .lhs = addExtra(p, + (AstNodeIndex[]) { OPT(sentinel), + OPT(align_expr), addrspace_expr }, + 3), + .rhs = elem_type, + }, + }); + } + if (sentinel != 0 && align_expr == 0) { return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_PTR_TYPE_SENTINEL, @@ -954,7 +1062,7 @@ static AstNodeIndex parseTypeExpr(Parser* p) { (AstNodeItem) { .tag = AST_NODE_PTR_TYPE_ALIGNED, .main_token = lbracket, - .data = { .lhs = 0, .rhs = elem_type }, + .data = { .lhs = align_expr, .rhs = elem_type }, }); } // Array type: [N]T or [N:s]T @@ -1102,9 +1210,7 @@ static AstNodeIndex parseFnProto(Parser* p) { } } -// Complex fn proto with align/section/callconv/addrspace -// Extra data fields are OptionalIndex: 0 → ~0 (none) -#define OPT(x) ((x) == 0 ? ~(AstNodeIndex)0 : (x)) + // Complex fn proto with align/section/callconv/addrspace switch (params.tag) { case SMALL_SPAN_ZERO_OR_ONE: return setNode(p, fn_proto_index, @@ -1138,7 +1244,6 @@ static AstNodeIndex parseFnProto(Parser* p) { }, }); } -#undef OPT return 0; // tcc } diff --git a/parser_test.zig b/parser_test.zig index 6116330388..f61c6ae17d 100644 --- a/parser_test.zig +++ b/parser_test.zig @@ -1864,6 +1864,65 @@ test "zig fmt: C pointers" { ); } +test "zig fmt: pointer-to-one with modifiers" { + try testCanonical( + \\const x: *u32 = undefined; + \\const y: *allowzero align(8) addrspace(.generic) const volatile u32 = undefined; + \\const z: *allowzero align(8:4:2) addrspace(.generic) const volatile u32 = undefined; + \\ + ); +} + +test "zig fmt: pointer-to-many with modifiers" { + try testCanonical( + \\const x: [*]u32 = undefined; + \\const y: [*]allowzero align(8) addrspace(.generic) const volatile u32 = undefined; + \\const z: [*]allowzero align(8:4:2) addrspace(.generic) const volatile u32 = undefined; + \\ + ); +} + +test "zig fmt: sentinel pointer with modifiers" { + try testCanonical( + \\const x: [*:42]u32 = undefined; + \\const y: [*:42]allowzero align(8) addrspace(.generic) const volatile u32 = undefined; + \\const y: [*:42]allowzero align(8:4:2) addrspace(.generic) const volatile u32 = undefined; + \\ + ); +} + +test "zig fmt: c pointer with modifiers" { + try testCanonical( + \\const x: [*c]u32 = undefined; + \\const y: [*c]allowzero align(8) addrspace(.generic) const volatile u32 = undefined; + \\const z: [*c]allowzero align(8:4:2) addrspace(.generic) const volatile u32 = undefined; + \\ + ); +} + +test "zig fmt: slice with modifiers" { + try testCanonical( + \\const x: []u32 = undefined; + \\const y: []allowzero align(8) addrspace(.generic) const volatile u32 = undefined; + \\ + ); +} + +test "zig fmt: sentinel slice with modifiers" { + try testCanonical( + \\const x: [:42]u32 = undefined; + \\const y: [:42]allowzero align(8) addrspace(.generic) const volatile u32 = undefined; + \\ + ); +} + +test "zig fmt: allowzero pointer" { + try testCanonical( + \\const T = [*]allowzero const u8; + \\ + ); +} + test "zig fmt: sentinel-terminated array type" { try testCanonical( \\pub fn cStrToPrefixedFileW(s: [*:0]const u8) ![PATH_MAX_WIDE:0]u16 {