From bf200f7ef961cbc31daf19d9a38adae6987c552b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Wed, 11 Feb 2026 23:58:18 +0200 Subject: [PATCH] Add structural AST consistency check to parser tests Compare the C parser's AST against Zig's std.zig.Ast.parse() output in every testParse call. This catches structural mismatches (tokens, nodes, extra_data) without needing a separate corpus. Also fix two C parser bugs found by the new check: - Empty anonymous init `.{}` now uses struct_init_dot_two (not array_init_dot_two), matching the Zig parser. - for-type-expr with single input and no else now emits for_simple (not for with extra_data), matching the Zig parser's parseFor. Skip the check under valgrind since Zig's tokenizer uses AVX-512. Co-Authored-By: Claude Opus 4.6 --- parser.c | 24 ++++++----- parser_test.zig | 106 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 11 deletions(-) diff --git a/parser.c b/parser.c index 0ac915edea..b664957a90 100644 --- a/parser.c +++ b/parser.c @@ -1687,26 +1687,27 @@ static AstNodeIndex parseTypeExpr(Parser* p) { const uint32_t scratch_top2 = p->scratch.len; const uint32_t inputs = forPrefix(p); const AstNodeIndex body = parseTypeExpr(p); + bool has_else = false; if (eatToken(p, TOKEN_KEYWORD_ELSE) != null_token) { parsePayload(p); SLICE_APPEND(AstNodeIndex, &p->scratch, body); const AstNodeIndex else_expr = parseTypeExpr(p); SLICE_APPEND(AstNodeIndex, &p->scratch, else_expr); - const uint32_t total = p->scratch.len - scratch_top2; - const AstSubRange span - = listToSpan(p, &p->scratch.arr[scratch_top2], total); + has_else = true; + } else if (inputs == 1) { p->scratch.len = scratch_top2; return addNode(&p->nodes, (AstNodeItem) { - .tag = AST_NODE_FOR, + .tag = AST_NODE_FOR_SIMPLE, .main_token = for_token, .data = { - .lhs = span.start, - .rhs = ((uint32_t)inputs & 0x7FFFFFFF) | (1u << 31), + .lhs = p->scratch.arr[scratch_top2], + .rhs = body, }, }); + } else { + SLICE_APPEND(AstNodeIndex, &p->scratch, body); } - SLICE_APPEND(AstNodeIndex, &p->scratch, body); const uint32_t total = p->scratch.len - scratch_top2; const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top2], total); @@ -1717,7 +1718,8 @@ static AstNodeIndex parseTypeExpr(Parser* p) { .main_token = for_token, .data = { .lhs = span.start, - .rhs = (uint32_t)inputs & 0x7FFFFFFF, + .rhs = ((uint32_t)inputs & 0x7FFFFFFF) + | (has_else ? (1u << 31) : 0), }, }); } @@ -2275,8 +2277,10 @@ static AstNodeIndex parseInitList( case 2: return addNode(&p->nodes, (AstNodeItem) { - .tag = comma ? AST_NODE_ARRAY_INIT_DOT_TWO_COMMA - : AST_NODE_ARRAY_INIT_DOT_TWO, + .tag = (elems_len == 0) + ? AST_NODE_STRUCT_INIT_DOT_TWO + : (comma ? AST_NODE_ARRAY_INIT_DOT_TWO_COMMA + : AST_NODE_ARRAY_INIT_DOT_TWO), .main_token = lbrace, .data = { .lhs = elems_len >= 1 diff --git a/parser_test.zig b/parser_test.zig index 56d5595718..134d65aae4 100644 --- a/parser_test.zig +++ b/parser_test.zig @@ -6391,6 +6391,14 @@ fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: * var tree = try zigAst(allocator, c_tree); defer tree.deinit(allocator); + // Skip consistency check under valgrind: Zig's tokenizer uses SIMD + // instructions (AVX-512) that valgrind does not support. + if (!@import("std").debug.inValgrind()) { + var zig_tree = try Ast.parse(allocator, source, .zig); + defer zig_tree.deinit(allocator); + try expectAstConsistent(tree, zig_tree, source); + } + if (tree.errors.len != 0) { return error.ParseError; } @@ -6434,6 +6442,103 @@ fn testError(source: [:0]const u8, expected_errors: []const Error) !void { } } +// Returns the number of meaningful u32 fields in Node.Data for a given tag. +// 0 = data is undefined/unused, 1 = only first u32 is meaningful, 2 = both meaningful. +fn dataFieldCount(tag: Ast.Node.Tag) u2 { + return switch (tag) { + // data unused (undefined in Zig parser) + .identifier, + .string_literal, + .char_literal, + .number_literal, + .unreachable_literal, + .anyframe_literal, + .enum_literal, + .error_value, + => 0, + + // .node or .opt_node — only first u32 + .@"defer", + .@"comptime", + .@"nosuspend", + .@"suspend", + .@"resume", + .bool_not, + .negation, + .bit_not, + .negation_wrap, + .address_of, + .@"try", + .deref, + .optional_type, + .@"return", + => 1, + + // everything else — both u32 fields + else => 2, + }; +} + +fn expectAstConsistent(c_tree: Ast, zig_tree: Ast, source: [:0]const u8) !void { + _ = source; + + if (c_tree.tokens.len != zig_tree.tokens.len) { + print("token count mismatch: c={d} zig={d}\n", .{ c_tree.tokens.len, zig_tree.tokens.len }); + return error.TestExpectedEqual; + } + for (0..c_tree.tokens.len) |i| { + if (c_tree.tokens.items(.start)[i] != zig_tree.tokens.items(.start)[i]) { + print("token[{d}] start mismatch: c={d} zig={d}\n", .{ i, c_tree.tokens.items(.start)[i], zig_tree.tokens.items(.start)[i] }); + return error.TestExpectedEqual; + } + if (c_tree.tokens.items(.tag)[i] != zig_tree.tokens.items(.tag)[i]) { + print("token[{d}] tag mismatch: c={s} zig={s}\n", .{ i, @tagName(c_tree.tokens.items(.tag)[i]), @tagName(zig_tree.tokens.items(.tag)[i]) }); + return error.TestExpectedEqual; + } + } + + if (c_tree.nodes.len != zig_tree.nodes.len) { + print("node count mismatch: c={d} zig={d}\n", .{ c_tree.nodes.len, zig_tree.nodes.len }); + return error.TestExpectedEqual; + } + for (0..c_tree.nodes.len) |i| { + const c_tag = c_tree.nodes.items(.tag)[i]; + const z_tag = zig_tree.nodes.items(.tag)[i]; + if (c_tag != z_tag) { + print("node[{d}] tag mismatch: c={s} zig={s}\n", .{ i, @tagName(c_tag), @tagName(z_tag) }); + return error.TestExpectedEqual; + } + if (c_tree.nodes.items(.main_token)[i] != zig_tree.nodes.items(.main_token)[i]) { + print("node[{d}] main_token mismatch: c={d} zig={d}\n", .{ i, c_tree.nodes.items(.main_token)[i], zig_tree.nodes.items(.main_token)[i] }); + return error.TestExpectedEqual; + } + const field_count = dataFieldCount(c_tag); + if (field_count >= 1) { + const c_data: *const [2]u32 = @ptrCast(&c_tree.nodes.items(.data)[i]); + const z_data: *const [2]u32 = @ptrCast(&zig_tree.nodes.items(.data)[i]); + if (c_data[0] != z_data[0]) { + print("node[{d}] data[0] mismatch: c={d} zig={d}\n", .{ i, c_data[0], z_data[0] }); + return error.TestExpectedEqual; + } + if (field_count >= 2 and c_data[1] != z_data[1]) { + print("node[{d}] data[1] mismatch: c={d} zig={d}\n", .{ i, c_data[1], z_data[1] }); + return error.TestExpectedEqual; + } + } + } + + if (c_tree.extra_data.len != zig_tree.extra_data.len) { + print("extra_data length mismatch: c={d} zig={d}\n", .{ c_tree.extra_data.len, zig_tree.extra_data.len }); + return error.TestExpectedEqual; + } + for (0..c_tree.extra_data.len) |i| { + if (c_tree.extra_data[i] != zig_tree.extra_data[i]) { + print("extra_data[{d}] mismatch: c={d} zig={d}\n", .{ i, c_tree.extra_data[i], zig_tree.extra_data[i] }); + return error.TestExpectedEqual; + } + } +} + const testing = std.testing; const Ast = std.zig.Ast; @@ -6914,4 +7019,3 @@ pub fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast { .errors = errors, }; } -