Add structural AST consistency check to parser tests
Compare the C parser's AST against Zig's std.zig.Ast.parse() output in
every testParse call. This catches structural mismatches (tokens, nodes,
extra_data) without needing a separate corpus.
Also fix two C parser bugs found by the new check:
- Empty anonymous init `.{}` now uses struct_init_dot_two (not
array_init_dot_two), matching the Zig parser.
- for-type-expr with single input and no else now emits for_simple
(not for with extra_data), matching the Zig parser's parseFor.
Skip the check under valgrind since Zig's tokenizer uses AVX-512.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
24
parser.c
24
parser.c
@@ -1687,26 +1687,27 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
|
|||||||
const uint32_t scratch_top2 = p->scratch.len;
|
const uint32_t scratch_top2 = p->scratch.len;
|
||||||
const uint32_t inputs = forPrefix(p);
|
const uint32_t inputs = forPrefix(p);
|
||||||
const AstNodeIndex body = parseTypeExpr(p);
|
const AstNodeIndex body = parseTypeExpr(p);
|
||||||
|
bool has_else = false;
|
||||||
if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) {
|
if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) {
|
||||||
parsePayload(p);
|
parsePayload(p);
|
||||||
SLICE_APPEND(AstNodeIndex, &p->scratch, body);
|
SLICE_APPEND(AstNodeIndex, &p->scratch, body);
|
||||||
const AstNodeIndex else_expr = parseTypeExpr(p);
|
const AstNodeIndex else_expr = parseTypeExpr(p);
|
||||||
SLICE_APPEND(AstNodeIndex, &p->scratch, else_expr);
|
SLICE_APPEND(AstNodeIndex, &p->scratch, else_expr);
|
||||||
const uint32_t total = p->scratch.len - scratch_top2;
|
has_else = true;
|
||||||
const AstSubRange span
|
} else if (inputs == 1) {
|
||||||
= listToSpan(p, &p->scratch.arr[scratch_top2], total);
|
|
||||||
p->scratch.len = scratch_top2;
|
p->scratch.len = scratch_top2;
|
||||||
return addNode(&p->nodes,
|
return addNode(&p->nodes,
|
||||||
(AstNodeItem) {
|
(AstNodeItem) {
|
||||||
.tag = AST_NODE_FOR,
|
.tag = AST_NODE_FOR_SIMPLE,
|
||||||
.main_token = for_token,
|
.main_token = for_token,
|
||||||
.data = {
|
.data = {
|
||||||
.lhs = span.start,
|
.lhs = p->scratch.arr[scratch_top2],
|
||||||
.rhs = ((uint32_t)inputs & 0x7FFFFFFF) | (1u << 31),
|
.rhs = body,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
SLICE_APPEND(AstNodeIndex, &p->scratch, body);
|
||||||
}
|
}
|
||||||
SLICE_APPEND(AstNodeIndex, &p->scratch, body);
|
|
||||||
const uint32_t total = p->scratch.len - scratch_top2;
|
const uint32_t total = p->scratch.len - scratch_top2;
|
||||||
const AstSubRange span
|
const AstSubRange span
|
||||||
= listToSpan(p, &p->scratch.arr[scratch_top2], total);
|
= listToSpan(p, &p->scratch.arr[scratch_top2], total);
|
||||||
@@ -1717,7 +1718,8 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
|
|||||||
.main_token = for_token,
|
.main_token = for_token,
|
||||||
.data = {
|
.data = {
|
||||||
.lhs = span.start,
|
.lhs = span.start,
|
||||||
.rhs = (uint32_t)inputs & 0x7FFFFFFF,
|
.rhs = ((uint32_t)inputs & 0x7FFFFFFF)
|
||||||
|
| (has_else ? (1u << 31) : 0),
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -2275,8 +2277,10 @@ static AstNodeIndex parseInitList(
|
|||||||
case 2:
|
case 2:
|
||||||
return addNode(&p->nodes,
|
return addNode(&p->nodes,
|
||||||
(AstNodeItem) {
|
(AstNodeItem) {
|
||||||
.tag = comma ? AST_NODE_ARRAY_INIT_DOT_TWO_COMMA
|
.tag = (elems_len == 0)
|
||||||
: AST_NODE_ARRAY_INIT_DOT_TWO,
|
? AST_NODE_STRUCT_INIT_DOT_TWO
|
||||||
|
: (comma ? AST_NODE_ARRAY_INIT_DOT_TWO_COMMA
|
||||||
|
: AST_NODE_ARRAY_INIT_DOT_TWO),
|
||||||
.main_token = lbrace,
|
.main_token = lbrace,
|
||||||
.data = {
|
.data = {
|
||||||
.lhs = elems_len >= 1
|
.lhs = elems_len >= 1
|
||||||
|
|||||||
106
parser_test.zig
106
parser_test.zig
@@ -6391,6 +6391,14 @@ fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: *
|
|||||||
var tree = try zigAst(allocator, c_tree);
|
var tree = try zigAst(allocator, c_tree);
|
||||||
defer tree.deinit(allocator);
|
defer tree.deinit(allocator);
|
||||||
|
|
||||||
|
// Skip consistency check under valgrind: Zig's tokenizer uses SIMD
|
||||||
|
// instructions (AVX-512) that valgrind does not support.
|
||||||
|
if (!@import("std").debug.inValgrind()) {
|
||||||
|
var zig_tree = try Ast.parse(allocator, source, .zig);
|
||||||
|
defer zig_tree.deinit(allocator);
|
||||||
|
try expectAstConsistent(tree, zig_tree, source);
|
||||||
|
}
|
||||||
|
|
||||||
if (tree.errors.len != 0) {
|
if (tree.errors.len != 0) {
|
||||||
return error.ParseError;
|
return error.ParseError;
|
||||||
}
|
}
|
||||||
@@ -6434,6 +6442,103 @@ fn testError(source: [:0]const u8, expected_errors: []const Error) !void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns the number of meaningful u32 fields in Node.Data for a given tag.
|
||||||
|
// 0 = data is undefined/unused, 1 = only first u32 is meaningful, 2 = both meaningful.
|
||||||
|
fn dataFieldCount(tag: Ast.Node.Tag) u2 {
|
||||||
|
return switch (tag) {
|
||||||
|
// data unused (undefined in Zig parser)
|
||||||
|
.identifier,
|
||||||
|
.string_literal,
|
||||||
|
.char_literal,
|
||||||
|
.number_literal,
|
||||||
|
.unreachable_literal,
|
||||||
|
.anyframe_literal,
|
||||||
|
.enum_literal,
|
||||||
|
.error_value,
|
||||||
|
=> 0,
|
||||||
|
|
||||||
|
// .node or .opt_node — only first u32
|
||||||
|
.@"defer",
|
||||||
|
.@"comptime",
|
||||||
|
.@"nosuspend",
|
||||||
|
.@"suspend",
|
||||||
|
.@"resume",
|
||||||
|
.bool_not,
|
||||||
|
.negation,
|
||||||
|
.bit_not,
|
||||||
|
.negation_wrap,
|
||||||
|
.address_of,
|
||||||
|
.@"try",
|
||||||
|
.deref,
|
||||||
|
.optional_type,
|
||||||
|
.@"return",
|
||||||
|
=> 1,
|
||||||
|
|
||||||
|
// everything else — both u32 fields
|
||||||
|
else => 2,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expectAstConsistent(c_tree: Ast, zig_tree: Ast, source: [:0]const u8) !void {
|
||||||
|
_ = source;
|
||||||
|
|
||||||
|
if (c_tree.tokens.len != zig_tree.tokens.len) {
|
||||||
|
print("token count mismatch: c={d} zig={d}\n", .{ c_tree.tokens.len, zig_tree.tokens.len });
|
||||||
|
return error.TestExpectedEqual;
|
||||||
|
}
|
||||||
|
for (0..c_tree.tokens.len) |i| {
|
||||||
|
if (c_tree.tokens.items(.start)[i] != zig_tree.tokens.items(.start)[i]) {
|
||||||
|
print("token[{d}] start mismatch: c={d} zig={d}\n", .{ i, c_tree.tokens.items(.start)[i], zig_tree.tokens.items(.start)[i] });
|
||||||
|
return error.TestExpectedEqual;
|
||||||
|
}
|
||||||
|
if (c_tree.tokens.items(.tag)[i] != zig_tree.tokens.items(.tag)[i]) {
|
||||||
|
print("token[{d}] tag mismatch: c={s} zig={s}\n", .{ i, @tagName(c_tree.tokens.items(.tag)[i]), @tagName(zig_tree.tokens.items(.tag)[i]) });
|
||||||
|
return error.TestExpectedEqual;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c_tree.nodes.len != zig_tree.nodes.len) {
|
||||||
|
print("node count mismatch: c={d} zig={d}\n", .{ c_tree.nodes.len, zig_tree.nodes.len });
|
||||||
|
return error.TestExpectedEqual;
|
||||||
|
}
|
||||||
|
for (0..c_tree.nodes.len) |i| {
|
||||||
|
const c_tag = c_tree.nodes.items(.tag)[i];
|
||||||
|
const z_tag = zig_tree.nodes.items(.tag)[i];
|
||||||
|
if (c_tag != z_tag) {
|
||||||
|
print("node[{d}] tag mismatch: c={s} zig={s}\n", .{ i, @tagName(c_tag), @tagName(z_tag) });
|
||||||
|
return error.TestExpectedEqual;
|
||||||
|
}
|
||||||
|
if (c_tree.nodes.items(.main_token)[i] != zig_tree.nodes.items(.main_token)[i]) {
|
||||||
|
print("node[{d}] main_token mismatch: c={d} zig={d}\n", .{ i, c_tree.nodes.items(.main_token)[i], zig_tree.nodes.items(.main_token)[i] });
|
||||||
|
return error.TestExpectedEqual;
|
||||||
|
}
|
||||||
|
const field_count = dataFieldCount(c_tag);
|
||||||
|
if (field_count >= 1) {
|
||||||
|
const c_data: *const [2]u32 = @ptrCast(&c_tree.nodes.items(.data)[i]);
|
||||||
|
const z_data: *const [2]u32 = @ptrCast(&zig_tree.nodes.items(.data)[i]);
|
||||||
|
if (c_data[0] != z_data[0]) {
|
||||||
|
print("node[{d}] data[0] mismatch: c={d} zig={d}\n", .{ i, c_data[0], z_data[0] });
|
||||||
|
return error.TestExpectedEqual;
|
||||||
|
}
|
||||||
|
if (field_count >= 2 and c_data[1] != z_data[1]) {
|
||||||
|
print("node[{d}] data[1] mismatch: c={d} zig={d}\n", .{ i, c_data[1], z_data[1] });
|
||||||
|
return error.TestExpectedEqual;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c_tree.extra_data.len != zig_tree.extra_data.len) {
|
||||||
|
print("extra_data length mismatch: c={d} zig={d}\n", .{ c_tree.extra_data.len, zig_tree.extra_data.len });
|
||||||
|
return error.TestExpectedEqual;
|
||||||
|
}
|
||||||
|
for (0..c_tree.extra_data.len) |i| {
|
||||||
|
if (c_tree.extra_data[i] != zig_tree.extra_data[i]) {
|
||||||
|
print("extra_data[{d}] mismatch: c={d} zig={d}\n", .{ i, c_tree.extra_data[i], zig_tree.extra_data[i] });
|
||||||
|
return error.TestExpectedEqual;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const testing = std.testing;
|
const testing = std.testing;
|
||||||
|
|
||||||
const Ast = std.zig.Ast;
|
const Ast = std.zig.Ast;
|
||||||
@@ -6914,4 +7019,3 @@ pub fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast {
|
|||||||
.errors = errors,
|
.errors = errors,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user