Add structural AST consistency check to parser tests

Compare the C parser's AST against Zig's std.zig.Ast.parse() output in
every testParse call. This catches structural mismatches (tokens, nodes,
extra_data) without needing a separate corpus.

Also fix two C parser bugs found by the new check:
- Empty anonymous init `.{}` now uses struct_init_dot_two (not
  array_init_dot_two), matching the Zig parser.
- for-type-expr with single input and no else now emits for_simple
  (not for with extra_data), matching the Zig parser's parseFor.

Skip the check under valgrind since Zig's tokenizer uses AVX-512.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-11 23:58:18 +02:00
parent 202733edbc
commit bf200f7ef9
2 changed files with 119 additions and 11 deletions

View File

@@ -1687,26 +1687,27 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
const uint32_t scratch_top2 = p->scratch.len; const uint32_t scratch_top2 = p->scratch.len;
const uint32_t inputs = forPrefix(p); const uint32_t inputs = forPrefix(p);
const AstNodeIndex body = parseTypeExpr(p); const AstNodeIndex body = parseTypeExpr(p);
bool has_else = false;
if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) { if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) {
parsePayload(p); parsePayload(p);
SLICE_APPEND(AstNodeIndex, &p->scratch, body); SLICE_APPEND(AstNodeIndex, &p->scratch, body);
const AstNodeIndex else_expr = parseTypeExpr(p); const AstNodeIndex else_expr = parseTypeExpr(p);
SLICE_APPEND(AstNodeIndex, &p->scratch, else_expr); SLICE_APPEND(AstNodeIndex, &p->scratch, else_expr);
const uint32_t total = p->scratch.len - scratch_top2; has_else = true;
const AstSubRange span } else if (inputs == 1) {
= listToSpan(p, &p->scratch.arr[scratch_top2], total);
p->scratch.len = scratch_top2; p->scratch.len = scratch_top2;
return addNode(&p->nodes, return addNode(&p->nodes,
(AstNodeItem) { (AstNodeItem) {
.tag = AST_NODE_FOR, .tag = AST_NODE_FOR_SIMPLE,
.main_token = for_token, .main_token = for_token,
.data = { .data = {
.lhs = span.start, .lhs = p->scratch.arr[scratch_top2],
.rhs = ((uint32_t)inputs & 0x7FFFFFFF) | (1u << 31), .rhs = body,
}, },
}); });
} else {
SLICE_APPEND(AstNodeIndex, &p->scratch, body);
} }
SLICE_APPEND(AstNodeIndex, &p->scratch, body);
const uint32_t total = p->scratch.len - scratch_top2; const uint32_t total = p->scratch.len - scratch_top2;
const AstSubRange span const AstSubRange span
= listToSpan(p, &p->scratch.arr[scratch_top2], total); = listToSpan(p, &p->scratch.arr[scratch_top2], total);
@@ -1717,7 +1718,8 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
.main_token = for_token, .main_token = for_token,
.data = { .data = {
.lhs = span.start, .lhs = span.start,
.rhs = (uint32_t)inputs & 0x7FFFFFFF, .rhs = ((uint32_t)inputs & 0x7FFFFFFF)
| (has_else ? (1u << 31) : 0),
}, },
}); });
} }
@@ -2275,8 +2277,10 @@ static AstNodeIndex parseInitList(
case 2: case 2:
return addNode(&p->nodes, return addNode(&p->nodes,
(AstNodeItem) { (AstNodeItem) {
.tag = comma ? AST_NODE_ARRAY_INIT_DOT_TWO_COMMA .tag = (elems_len == 0)
: AST_NODE_ARRAY_INIT_DOT_TWO, ? AST_NODE_STRUCT_INIT_DOT_TWO
: (comma ? AST_NODE_ARRAY_INIT_DOT_TWO_COMMA
: AST_NODE_ARRAY_INIT_DOT_TWO),
.main_token = lbrace, .main_token = lbrace,
.data = { .data = {
.lhs = elems_len >= 1 .lhs = elems_len >= 1

View File

@@ -6391,6 +6391,14 @@ fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: *
var tree = try zigAst(allocator, c_tree); var tree = try zigAst(allocator, c_tree);
defer tree.deinit(allocator); defer tree.deinit(allocator);
// Skip consistency check under valgrind: Zig's tokenizer uses SIMD
// instructions (AVX-512) that valgrind does not support.
if (!@import("std").debug.inValgrind()) {
var zig_tree = try Ast.parse(allocator, source, .zig);
defer zig_tree.deinit(allocator);
try expectAstConsistent(tree, zig_tree, source);
}
if (tree.errors.len != 0) { if (tree.errors.len != 0) {
return error.ParseError; return error.ParseError;
} }
@@ -6434,6 +6442,103 @@ fn testError(source: [:0]const u8, expected_errors: []const Error) !void {
} }
} }
// Returns the number of meaningful u32 fields in Node.Data for a given tag.
// 0 = data is undefined/unused, 1 = only first u32 is meaningful, 2 = both meaningful.
fn dataFieldCount(tag: Ast.Node.Tag) u2 {
return switch (tag) {
// data unused (undefined in Zig parser)
.identifier,
.string_literal,
.char_literal,
.number_literal,
.unreachable_literal,
.anyframe_literal,
.enum_literal,
.error_value,
=> 0,
// .node or .opt_node — only first u32
.@"defer",
.@"comptime",
.@"nosuspend",
.@"suspend",
.@"resume",
.bool_not,
.negation,
.bit_not,
.negation_wrap,
.address_of,
.@"try",
.deref,
.optional_type,
.@"return",
=> 1,
// everything else — both u32 fields
else => 2,
};
}
fn expectAstConsistent(c_tree: Ast, zig_tree: Ast, source: [:0]const u8) !void {
_ = source;
if (c_tree.tokens.len != zig_tree.tokens.len) {
print("token count mismatch: c={d} zig={d}\n", .{ c_tree.tokens.len, zig_tree.tokens.len });
return error.TestExpectedEqual;
}
for (0..c_tree.tokens.len) |i| {
if (c_tree.tokens.items(.start)[i] != zig_tree.tokens.items(.start)[i]) {
print("token[{d}] start mismatch: c={d} zig={d}\n", .{ i, c_tree.tokens.items(.start)[i], zig_tree.tokens.items(.start)[i] });
return error.TestExpectedEqual;
}
if (c_tree.tokens.items(.tag)[i] != zig_tree.tokens.items(.tag)[i]) {
print("token[{d}] tag mismatch: c={s} zig={s}\n", .{ i, @tagName(c_tree.tokens.items(.tag)[i]), @tagName(zig_tree.tokens.items(.tag)[i]) });
return error.TestExpectedEqual;
}
}
if (c_tree.nodes.len != zig_tree.nodes.len) {
print("node count mismatch: c={d} zig={d}\n", .{ c_tree.nodes.len, zig_tree.nodes.len });
return error.TestExpectedEqual;
}
for (0..c_tree.nodes.len) |i| {
const c_tag = c_tree.nodes.items(.tag)[i];
const z_tag = zig_tree.nodes.items(.tag)[i];
if (c_tag != z_tag) {
print("node[{d}] tag mismatch: c={s} zig={s}\n", .{ i, @tagName(c_tag), @tagName(z_tag) });
return error.TestExpectedEqual;
}
if (c_tree.nodes.items(.main_token)[i] != zig_tree.nodes.items(.main_token)[i]) {
print("node[{d}] main_token mismatch: c={d} zig={d}\n", .{ i, c_tree.nodes.items(.main_token)[i], zig_tree.nodes.items(.main_token)[i] });
return error.TestExpectedEqual;
}
const field_count = dataFieldCount(c_tag);
if (field_count >= 1) {
const c_data: *const [2]u32 = @ptrCast(&c_tree.nodes.items(.data)[i]);
const z_data: *const [2]u32 = @ptrCast(&zig_tree.nodes.items(.data)[i]);
if (c_data[0] != z_data[0]) {
print("node[{d}] data[0] mismatch: c={d} zig={d}\n", .{ i, c_data[0], z_data[0] });
return error.TestExpectedEqual;
}
if (field_count >= 2 and c_data[1] != z_data[1]) {
print("node[{d}] data[1] mismatch: c={d} zig={d}\n", .{ i, c_data[1], z_data[1] });
return error.TestExpectedEqual;
}
}
}
if (c_tree.extra_data.len != zig_tree.extra_data.len) {
print("extra_data length mismatch: c={d} zig={d}\n", .{ c_tree.extra_data.len, zig_tree.extra_data.len });
return error.TestExpectedEqual;
}
for (0..c_tree.extra_data.len) |i| {
if (c_tree.extra_data[i] != zig_tree.extra_data[i]) {
print("extra_data[{d}] mismatch: c={d} zig={d}\n", .{ i, c_tree.extra_data[i], zig_tree.extra_data[i] });
return error.TestExpectedEqual;
}
}
}
const testing = std.testing; const testing = std.testing;
const Ast = std.zig.Ast; const Ast = std.zig.Ast;
@@ -6914,4 +7019,3 @@ pub fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast {
.errors = errors, .errors = errors,
}; };
} }