zig/stage0/astgen_test.zig

const std = @import("std");
const Ast = std.zig.Ast;
const Zir = std.zig.Zir;
const AstGen = std.zig.AstGen;
const Allocator = std.mem.Allocator;

const c = @cImport({
    @cInclude("astgen.h");
});

fn refZir(gpa: Allocator, source: [:0]const u8) !Zir {
    var tree = try Ast.parse(gpa, source, .zig);
    defer tree.deinit(gpa);
    return try AstGen.generate(gpa, tree);
}

test "astgen dump: simple cases" {
    const gpa = std.testing.allocator;

    const cases = .{
        .{ "empty", "" },
        .{ "comptime {}", "comptime {}" },
        .{ "const x = 0;", "const x = 0;" },
        .{ "const x = 1;", "const x = 1;" },
        .{ "const x = 0; const y = 0;", "const x = 0; const y = 0;" },
        .{ "test \"t\" {}", "test \"t\" {}" },
        .{ "const std = @import(\"std\");", "const std = @import(\"std\");" },
        .{ "test_all.zig", @embedFile("test_all.zig") },
    };

    inline for (cases) |case| {
        // std.debug.print("--- {s} ---\n", .{case[0]});
        const source: [:0]const u8 = case[1];
        var zir = try refZir(gpa, source);
        zir.deinit(gpa);
    }
}

/// Build a mask of extra[] indices that contain hash data (src_hash or
/// fields_hash). These are zero-filled in the C output but contain real
/// Blake3 hashes in the Zig reference. We skip these positions during
/// comparison.
fn buildHashSkipMask(gpa: Allocator, ref: Zir) ![]bool {
    const ref_extra_len: u32 = @intCast(ref.extra.len);
    const skip = try gpa.alloc(bool, ref_extra_len);
    @memset(skip, false);

    const ref_len: u32 = @intCast(ref.instructions.len);
    const ref_tags = ref.instructions.items(.tag);
    const ref_datas = ref.instructions.items(.data);
    for (0..ref_len) |i| {
        switch (ref_tags[i]) {
            .extended => {
                const ext = ref_datas[i].extended;
                if (ext.opcode == .struct_decl or ext.opcode == .enum_decl) {
                    // StructDecl/EnumDecl starts with fields_hash[4].
                    const pi = ext.operand;
                    for (0..4) |j| skip[pi + j] = true;
                }
            },
            .declaration => {
                // Declaration starts with src_hash[4].
                const pi = ref_datas[i].declaration.payload_index;
                for (0..4) |j| skip[pi + j] = true;
            },
            .func, .func_inferred => {
                // Func payload: ret_ty(1) + param_block(1) + body_len(1)
                // + trailing ret_ty + body + SrcLocs(3) + proto_hash(4).
                const pi = ref_datas[i].pl_node.payload_index;
                const ret_ty_raw: u32 = ref.extra[pi];
                const ret_body_len: u32 = ret_ty_raw & 0x7FFFFFFF;
                const body_len: u32 = ref.extra[pi + 2];
                // ret_ty trailing: if body_len > 1, it's a body; if == 1, it's a ref; if 0, void.
                const ret_trailing: u32 = if (ret_body_len > 1) ret_body_len else if (ret_body_len == 1) 1 else 0;
                // proto_hash is at: pi + 3 + ret_trailing + body_len + 3
                if (body_len > 0) {
                    const hash_start = pi + 3 + ret_trailing + body_len + 3;
                    for (0..4) |j| {
                        if (hash_start + j < ref_extra_len)
                            skip[hash_start + j] = true;
                    }
                }
            },
            .func_fancy => {
                // FuncFancy: param_block(1) + body_len(1) + bits(1)
                // + trailing cc + ret_ty + noalias + body + SrcLocs(3) + proto_hash(4).
                const pi = ref_datas[i].pl_node.payload_index;
                const body_len: u32 = ref.extra[pi + 1];
                const bits: u32 = ref.extra[pi + 2];
                var ei: u32 = pi + 3;
                const has_cc_ref: bool = (bits & (1 << 3)) != 0;
                const has_cc_body: bool = (bits & (1 << 4)) != 0;
                const has_ret_ty_ref: bool = (bits & (1 << 5)) != 0;
                const has_ret_ty_body: bool = (bits & (1 << 6)) != 0;
                const has_any_noalias: bool = (bits & (1 << 7)) != 0;
                if (has_cc_body) {
                    const cc_body_len = ref.extra[ei];
                    ei += 1 + cc_body_len;
                } else if (has_cc_ref) {
                    ei += 1;
                }
                if (has_ret_ty_body) {
                    const ret_body_len = ref.extra[ei];
                    ei += 1 + ret_body_len;
                } else if (has_ret_ty_ref) {
                    ei += 1;
                }
                if (has_any_noalias) ei += 1;
                // body + SrcLocs(3) + proto_hash(4)
                if (body_len > 0) {
                    const hash_start = ei + body_len + 3;
                    for (0..4) |j| {
                        if (hash_start + j < ref_extra_len)
                            skip[hash_start + j] = true;
                    }
                }
            },
            else => {},
        }
    }
    return skip;
}

test "astgen: empty source" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "";

    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);

    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);

    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: comptime {}" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "comptime {}";

    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);

    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);

    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: const x = 0;" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const x = 0;";

    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);

    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);

    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: const x = 1;" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const x = 1;";

    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);

    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);

    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: const x = 0; const y = 0;" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const x = 0; const y = 0;";

    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);

    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);

    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: field_access" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const std = @import(\"std\");\nconst mem = std.mem;";

    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);

    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);

    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: addr array init" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const x = &[_][]const u8{\"a\",\"b\"};";

    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);

    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);

    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: test empty body" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "test \"t\" {}";

    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);

    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);

    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: test_all.zig" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = @embedFile("test_all.zig");

    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);

    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);

    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: @import" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const std = @import(\"std\");";

    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);

    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);

    try expectEqualZir(gpa, ref_zir, c_zir);
}

fn expectEqualZir(gpa: Allocator, ref: Zir, got: c.Zir) !void {
    const ref_len: u32 = @intCast(ref.instructions.len);
    const ref_tags = ref.instructions.items(.tag);
    const ref_datas = ref.instructions.items(.data);

    // 1. Compare lengths.
    if (ref_len != got.inst_len) {
        std.debug.print("inst_len mismatch: ref={d} got={d}\n", .{ ref_len, got.inst_len });
        var ref_counts: [265]u32 = .{0} ** 265;
        var got_counts: [265]u32 = .{0} ** 265;
        for (0..ref_len) |i| ref_counts[@intFromEnum(ref_tags[i])] += 1;
        for (0..got.inst_len) |i| got_counts[got.inst_tags[i]] += 1;
        for (0..265) |t| {
            if (ref_counts[t] != got_counts[t])
                std.debug.print("tag {d}: ref={d} got={d} (diff={d})\n", .{
                    t,                                                                     ref_counts[t], got_counts[t],
                    @as(i32, @intCast(got_counts[t])) - @as(i32, @intCast(ref_counts[t])),
                });
        }
        // Find first tag divergence.
        const min_len = @min(ref_len, got.inst_len);
        for (0..min_len) |i| {
            const ref_tag: u8 = @intFromEnum(ref_tags[i]);
            const got_tag: u8 = @intCast(got.inst_tags[i]);
            if (ref_tag != got_tag) {
                std.debug.print("first divergence at [{d}]: ref_tag={d} got_tag={d}\n", .{ i, ref_tag, got_tag });
                // Show ref instruction data for this position.
                const rd = ref_datas[i];
                std.debug.print("  ref pl_node: src_node={d} payload={d}\n", .{
                    rd.pl_node.src_node, rd.pl_node.payload_index,
                });
                // Scan for nearest declaration.
                var j: usize = i;
                while (j > 0) {
                    j -= 1;
                    if (ref_tags[j] == .declaration) {
                        std.debug.print("  nearest decl at [{d}]: src_node={d}\n", .{
                            j, ref_datas[j].declaration.src_node,
                        });
                        break;
                    }
                }
                break;
            }
        }
        return error.TestExpectedEqual;
    }

    // 2. Compare instruction tags.
    for (0..ref_len) |i| {
        const ref_tag: u8 = @intFromEnum(ref_tags[i]);
        const got_tag: u8 = @intCast(got.inst_tags[i]);
        if (ref_tag != got_tag) {
            std.debug.print(
                "inst_tags[{d}] mismatch: ref={d} got={d}\n",
                .{ i, ref_tag, got_tag },
            );
            return error.TestExpectedEqual;
        }
    }

    // 3. Compare instruction data field-by-field.
    for (0..ref_len) |i| {
        expectEqualData(i, ref_tags[i], ref_datas[i], got.inst_datas[i]) catch {
            // Print nearest declaration for context.
            var j: usize = i;
            while (j > 0) {
                j -= 1;
                if (ref_tags[j] == .declaration) {
                    std.debug.print("  nearest decl at [{d}]: src_node={d}\n", .{
                        j, ref_datas[j].declaration.src_node,
                    });
                    break;
                }
            }
            // Print what tags are at the operand positions if break_inline.
            if (ref_tags[i] == .break_inline) {
                const r_op = @intFromEnum(ref_datas[i].@"break".operand);
                const g_op = got.inst_datas[i].break_data.operand;
                if (r_op >= 124 and r_op - 124 < ref_len) {
                    std.debug.print("  ref operand inst[{d}] tag={d}\n", .{
                        r_op - 124, @intFromEnum(ref_tags[r_op - 124]),
                    });
                }
                if (g_op >= 124 and g_op - 124 < ref_len) {
                    std.debug.print("  got operand inst[{d}] tag={d}\n", .{
                        g_op - 124, @intFromEnum(ref_tags[g_op - 124]),
                    });
                }
            }
            return error.TestExpectedEqual;
        };
    }
    // 4. Compare string bytes.
    const ref_sb_len: u32 = @intCast(ref.string_bytes.len);
    try std.testing.expectEqual(ref_sb_len, got.string_bytes_len);
    for (0..ref_sb_len) |i| {
        if (ref.string_bytes[i] != got.string_bytes[i]) {
            std.debug.print(
                "string_bytes[{d}] mismatch: ref=0x{x:0>2} got=0x{x:0>2}\n",
                .{ i, ref.string_bytes[i], got.string_bytes[i] },
            );
            return error.TestExpectedEqual;
        }
    }

    // 5. Compare extra data (skipping hash positions).
    const skip = try buildHashSkipMask(gpa, ref);
    defer gpa.free(skip);
    const ref_extra_len: u32 = @intCast(ref.extra.len);
    try std.testing.expectEqual(ref_extra_len, got.extra_len);
    for (0..ref_extra_len) |i| {
        if (skip[i]) continue;
        if (ref.extra[i] != got.extra[i]) {
            // Show first 10 extra diffs.
            var count: u32 = 0;
            for (0..ref_extra_len) |j| {
                if (!skip[j] and ref.extra[j] != got.extra[j]) {
                    std.debug.print(
                        "extra[{d}] mismatch: ref={d} got={d}\n",
                        .{ j, ref.extra[j], got.extra[j] },
                    );
                    count += 1;
                    if (count >= 10) break;
                }
            }
            return error.TestExpectedEqual;
        }
    }
}

/// Compare a single instruction's data, dispatching by tag.
/// Zig's Data union has no guaranteed in-memory layout, so we
/// compare each variant's fields individually.
fn expectEqualData(
    idx: usize,
    tag: Zir.Inst.Tag,
    ref: Zir.Inst.Data,
    got: c.ZirInstData,
) !void {
    switch (tag) {
        .extended => {
            const r = ref.extended;
            const g = got.extended;
            // Some extended opcodes have undefined/unused small+operand.
            const skip_data = switch (r.opcode) {
                .dbg_empty_stmt, .astgen_error => true,
                else => false,
            };
            const skip_small = switch (r.opcode) {
                .add_with_overflow,
                .sub_with_overflow,
                .mul_with_overflow,
                .shl_with_overflow,
                .restore_err_ret_index,
                .branch_hint,
                // Container decl Small packed structs have undefined padding bits.
                .struct_decl,
                .enum_decl,
                .union_decl,
                .opaque_decl,
                // addNodeExtended sets small = undefined (AstGen.zig:12775).
                .this,
                .ret_addr,
                .error_return_trace,
                .frame,
                .frame_address,
                .breakpoint,
                .disable_instrumentation,
                .disable_intrinsics,
                .in_comptime,
                .c_va_start,
                => true,
                else => false,
            };
            if (@intFromEnum(r.opcode) != g.opcode or
                (!skip_data and !skip_small and r.small != g.small) or
                (!skip_data and r.operand != g.operand))
            {
                std.debug.print(
                    "inst_datas[{d}] (extended) mismatch:\n" ++
                        "  ref: opcode={d} small=0x{x:0>4} operand={d}\n" ++
                        "  got: opcode={d} small=0x{x:0>4} operand={d}\n",
                    .{
                        idx,
                        @intFromEnum(r.opcode),
                        r.small,
                        r.operand,
                        g.opcode,
                        g.small,
                        g.operand,
                    },
                );
                return error.TestExpectedEqual;
            }
        },
        .declaration => {
            const r = ref.declaration;
            const g = got.declaration;
            if (@intFromEnum(r.src_node) != g.src_node or
                r.payload_index != g.payload_index)
            {
                std.debug.print(
                    "inst_datas[{d}] (declaration) mismatch:\n" ++
                        "  ref: src_node={d} payload_index={d}\n" ++
                        "  got: src_node={d} payload_index={d}\n",
                    .{
                        idx,
                        @intFromEnum(r.src_node),
                        r.payload_index,
                        g.src_node,
                        g.payload_index,
                    },
                );
                return error.TestExpectedEqual;
            }
        },
        .break_inline => {
            const r = ref.@"break";
            const g = got.break_data;
            if (@intFromEnum(r.operand) != g.operand or
                r.payload_index != g.payload_index)
            {
                std.debug.print(
                    "inst_datas[{d}] (break_inline) mismatch:\n" ++
                        "  ref: operand={d} payload_index={d}\n" ++
                        "  got: operand={d} payload_index={d}\n",
                    .{
                        idx,
                        @intFromEnum(r.operand),
                        r.payload_index,
                        g.operand,
                        g.payload_index,
                    },
                );
                return error.TestExpectedEqual;
            }
        },
        .import => {
            const r = ref.pl_tok;
            const g = got.pl_tok;
            if (@intFromEnum(r.src_tok) != g.src_tok or
                r.payload_index != g.payload_index)
            {
                std.debug.print(
                    "inst_datas[{d}] (import) mismatch:\n" ++
                        "  ref: src_tok={d} payload_index={d}\n" ++
                        "  got: src_tok={d} payload_index={d}\n",
                    .{
                        idx,
                        @intFromEnum(r.src_tok),
                        r.payload_index,
                        g.src_tok,
                        g.payload_index,
                    },
                );
                return error.TestExpectedEqual;
            }
        },
        .dbg_stmt => {
            const r = ref.dbg_stmt;
            const g = got.dbg_stmt;
            if (r.line != g.line or r.column != g.column) {
                std.debug.print(
                    "inst_datas[{d}] (dbg_stmt) mismatch:\n" ++
                        "  ref: line={d} column={d}\n" ++
                        "  got: line={d} column={d}\n",
                    .{ idx, r.line, r.column, g.line, g.column },
                );
                return error.TestExpectedEqual;
            }
        },
        .ensure_result_non_error,
        .restore_err_ret_index_unconditional,
        .validate_struct_init_ty,
        .validate_struct_init_result_ty,
        .struct_init_empty_result,
        .struct_init_empty,
        .struct_init_empty_ref_result,
        => {
            const r = ref.un_node;
            const g = got.un_node;
            if (@intFromEnum(r.src_node) != g.src_node or
                @intFromEnum(r.operand) != g.operand)
            {
                std.debug.print(
                    "inst_datas[{d}] ({s}) mismatch:\n" ++
                        "  ref: src_node={d} operand={d}\n" ++
                        "  got: src_node={d} operand={d}\n",
                    .{
                        idx,
                        @tagName(tag),
                        @intFromEnum(r.src_node),
                        @intFromEnum(r.operand),
                        g.src_node,
                        g.operand,
                    },
                );
                return error.TestExpectedEqual;
            }
        },
        .ret_implicit => {
            const r = ref.un_tok;
            const g = got.un_tok;
            if (@intFromEnum(r.src_tok) != g.src_tok or
                @intFromEnum(r.operand) != g.operand)
            {
                std.debug.print(
                    "inst_datas[{d}] (ret_implicit) mismatch:\n" ++
                        "  ref: src_tok={d} operand={d}\n" ++
                        "  got: src_tok={d} operand={d}\n",
                    .{
                        idx,
                        @intFromEnum(r.src_tok),
                        @intFromEnum(r.operand),
                        g.src_tok,
                        g.operand,
                    },
                );
                return error.TestExpectedEqual;
            }
        },
        .func,
        .func_inferred,
        .func_fancy,
        .array_type,
        .array_type_sentinel,
        .array_cat,
        .array_init,
        .array_init_ref,
        .error_set_decl,
        .struct_init_field_type,
        .struct_init,
        .struct_init_ref,
        .validate_array_init_ref_ty,
        .validate_array_init_ty,
        => {
            const r = ref.pl_node;
            const g = got.pl_node;
            if (@intFromEnum(r.src_node) != g.src_node or
                r.payload_index != g.payload_index)
            {
                std.debug.print(
                    "inst_datas[{d}] ({s}) mismatch:\n" ++
                        "  ref: src_node={d} payload_index={d}\n" ++
                        "  got: src_node={d} payload_index={d}\n",
                    .{
                        idx,
                        @tagName(tag),
                        @intFromEnum(r.src_node),
                        r.payload_index,
                        g.src_node,
                        g.payload_index,
                    },
                );
                return error.TestExpectedEqual;
            }
        },
        .decl_val, .decl_ref => {
            const r = ref.str_tok;
            const g = got.str_tok;
            if (@intFromEnum(r.start) != g.start or @intFromEnum(r.src_tok) != g.src_tok) {
                std.debug.print(
                    "inst_datas[{d}] ({s}) mismatch:\n" ++
                        "  ref: start={d} src_tok={d}\n" ++
                        "  got: start={d} src_tok={d}\n",
                    .{
                        idx,
                        @tagName(tag),
                        @intFromEnum(r.start),
                        @intFromEnum(r.src_tok),
                        g.start,
                        g.src_tok,
                    },
                );
                return error.TestExpectedEqual;
            }
        },
        .field_val, .field_ptr, .field_val_named, .field_ptr_named => {
            const r = ref.pl_node;
            const g = got.pl_node;
            if (@intFromEnum(r.src_node) != g.src_node or
                r.payload_index != g.payload_index)
            {
                std.debug.print(
                    "inst_datas[{d}] ({s}) mismatch:\n" ++
                        "  ref: src_node={d} payload_index={d}\n" ++
                        "  got: src_node={d} payload_index={d}\n",
                    .{
                        idx,
                        @tagName(tag),
                        @intFromEnum(r.src_node),
                        r.payload_index,
                        g.src_node,
                        g.payload_index,
                    },
                );
                return error.TestExpectedEqual;
            }
        },
        .int => {
            if (ref.int != got.int_val) {
                std.debug.print(
                    "inst_datas[{d}] (int) mismatch: ref={d} got={d}\n",
                    .{ idx, ref.int, got.int_val },
                );
                return error.TestExpectedEqual;
            }
        },
        .ptr_type => {
            // Compare ptr_type data: flags, size, payload_index.
            if (@as(u8, @bitCast(ref.ptr_type.flags)) != got.ptr_type.flags or
                @intFromEnum(ref.ptr_type.size) != got.ptr_type.size or
                ref.ptr_type.payload_index != got.ptr_type.payload_index)
            {
                std.debug.print(
                    "inst_datas[{d}] (ptr_type) mismatch:\n" ++
                        "  ref: flags=0x{x} size={d} pi={d}\n" ++
                        "  got: flags=0x{x} size={d} pi={d}\n",
                    .{
                        idx,
                        @as(u8, @bitCast(ref.ptr_type.flags)),
                        @intFromEnum(ref.ptr_type.size),
                        ref.ptr_type.payload_index,
                        got.ptr_type.flags,
                        got.ptr_type.size,
                        got.ptr_type.payload_index,
                    },
                );
                return error.TestExpectedEqual;
            }
        },
        .int_type => {
            const r = ref.int_type;
            const g = got.int_type;
            if (@intFromEnum(r.src_node) != g.src_node or
                @intFromEnum(r.signedness) != g.signedness or
                r.bit_count != g.bit_count)
            {
                std.debug.print(
                    "inst_datas[{d}] (int_type) mismatch\n",
                    .{idx},
                );
                return error.TestExpectedEqual;
            }
        },
        .str => {
            const r = ref.str;
            const g = got.str;
            if (@intFromEnum(r.start) != g.start or r.len != g.len) {
                std.debug.print(
                    "inst_datas[{d}] (str) mismatch:\n" ++
                        "  ref: start={d} len={d}\n" ++
                        "  got: start={d} len={d}\n",
                    .{ idx, @intFromEnum(r.start), r.len, g.start, g.len },
                );
                return error.TestExpectedEqual;
            }
        },
        else => {
            // Generic raw comparison: treat data as two u32 words.
            // Tags using .node data format have undefined second word.
            const ref_raw = @as([*]const u32, @ptrCast(&ref));
            const got_raw = @as([*]const u32, @ptrCast(&got));
            // Tags where only the first u32 word is meaningful
            // (second word is padding/undefined).
            const first_word_only = switch (tag) {
                // .node data format (single i32):
                .repeat,
                .repeat_inline,
                .ret_ptr,
                .ret_type,
                .trap,
                .alloc_inferred,
                .alloc_inferred_mut,
                .alloc_inferred_comptime,
                .alloc_inferred_comptime_mut,
                // .@"unreachable" data format (src_node + padding):
                .@"unreachable",
                // .save_err_ret_index data format (operand only):
                .save_err_ret_index,
                // .float data format (f32 = 4 bytes, second word is padding):
                .float,
                // .elem_val_imm data format (u32 + u8, 3 bytes padding):
                .elem_val_imm,
                => true,
                else => false,
            };
            const w1_match = ref_raw[0] == got_raw[0];
            const w2_match = first_word_only or ref_raw[1] == got_raw[1];
            if (!w1_match or !w2_match) {
                std.debug.print(
                    "inst_datas[{d}] ({s}) raw mismatch:\n" ++
                        "  ref: 0x{x:0>8} 0x{x:0>8}\n" ++
                        "  got: 0x{x:0>8} 0x{x:0>8}\n",
                    .{
                        idx,
                        @tagName(tag),
                        ref_raw[0],
                        ref_raw[1],
                        got_raw[0],
                        got_raw[1],
                    },
                );
                return error.TestExpectedEqual;
            }
        },
    }
}

const corpus_files = .{
    .{ "astgen_test.zig", @embedFile("astgen_test.zig") },
    .{ "build.zig", @embedFile("../build.zig") },
    .{ "parser_test.zig", @embedFile("parser_test.zig") },
    .{ "test_all.zig", @embedFile("test_all.zig") },
    .{ "tokenizer_test.zig", @embedFile("tokenizer_test.zig") },

    .{ "optional.zig", @embedFile("../test/behavior/optional.zig") },
    .{ "call.zig", @embedFile("../test/behavior/call.zig") },
    .{ "pointers.zig", @embedFile("../test/behavior/pointers.zig") },
    .{ "type.zig", @embedFile("../test/behavior/type.zig") },
    .{ "enum.zig", @embedFile("../test/behavior/enum.zig") },
    .{ "switch_on_captured_error.zig", @embedFile("../test/behavior/switch_on_captured_error.zig") },
    .{ "error.zig", @embedFile("../test/behavior/error.zig") },
    .{ "switch.zig", @embedFile("../test/behavior/switch.zig") },
    .{ "array.zig", @embedFile("../test/behavior/array.zig") },
    .{ "slice.zig", @embedFile("../test/behavior/slice.zig") },
    .{ "basic.zig", @embedFile("../test/behavior/basic.zig") },
    .{ "packed-struct.zig", @embedFile("../test/behavior/packed-struct.zig") },
    .{ "eval.zig", @embedFile("../test/behavior/eval.zig") },
    .{ "field_parent_ptr.zig", @embedFile("../test/behavior/field_parent_ptr.zig") },
    .{ "struct.zig", @embedFile("../test/behavior/struct.zig") },
    .{ "floatop.zig", @embedFile("../test/behavior/floatop.zig") },
    .{ "union.zig", @embedFile("../test/behavior/union.zig") },
    .{ "math.zig", @embedFile("../test/behavior/math.zig") },
    .{ "vector.zig", @embedFile("../test/behavior/vector.zig") },
    .{ "cast.zig", @embedFile("../test/behavior/cast.zig") },
};

fn corpusCheck(gpa: Allocator, source: [:0]const u8) !void {
    var tree = try Ast.parse(gpa, source, .zig);
    defer tree.deinit(gpa);

    var ref_zir = try AstGen.generate(gpa, tree);
    defer ref_zir.deinit(gpa);

    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);

    if (c_zir.has_compile_errors) {
        std.debug.print("C port returned compile errors (inst_len={d})\n", .{c_zir.inst_len});
        return error.TestUnexpectedResult;
    }

    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: struct single field" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const T = struct { x: u32 };";
    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);
    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);
    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: struct multiple fields" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const T = struct { x: u32, y: bool };";
    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);
    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);
    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: struct field with default" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const T = struct { x: u32 = 0 };";
    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);
    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);
    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: struct field with align" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const T = struct { x: u32 align(4) };";
    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);
    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);
    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: struct comptime field" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const T = struct { comptime x: u32 = 0 };";
    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);
    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);
    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: empty error set" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const E = error{};";
    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);
    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);
    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: error set with members" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const E = error{ OutOfMemory, OutOfTime };";
    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);
    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);
    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: extern var" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "extern var x: u32;";
    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);
    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);
    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: corpus test_all.zig" {
    const gpa = std.testing.allocator;
    try corpusCheck(gpa, @embedFile("test_all.zig"));
}

test "astgen: corpus build.zig" {
    const gpa = std.testing.allocator;
    try corpusCheck(gpa, @embedFile("../build.zig"));
}

test "astgen: corpus tokenizer_test.zig" {
    const gpa = std.testing.allocator;
    try corpusCheck(gpa, @embedFile("tokenizer_test.zig"));
}

test "astgen: corpus parser_test.zig" {
    const gpa = std.testing.allocator;
    try corpusCheck(gpa, @embedFile("parser_test.zig"));
}

test "astgen: corpus astgen_test.zig" {
    const gpa = std.testing.allocator;
    try corpusCheck(gpa, @embedFile("astgen_test.zig"));
}

test "astgen: corpus array_list.zig" {
    const gpa = std.testing.allocator;
    try corpusCheck(gpa, @embedFile("../lib/std/array_list.zig"));
}

test "astgen: corpus multi_array_list.zig" {
    if (true) return error.SkipZigTest; // TODO: parser bug - C parser produces nodes_len=1
    const gpa = std.testing.allocator;
    try corpusCheck(gpa, @embedFile("../lib/std/multi_array_list.zig"));
}

// Later much later
//test "astgen: corpus Sema.zig" {
//    if (true) return error.SkipZigTest; // TODO: too large, work on smaller files first
//    const gpa = std.testing.allocator;
//    try corpusCheck(gpa, @embedFile("../src/Sema.zig"));
//}

test "astgen: enum decl" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 = "const E = enum { a, b, c };";
    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);
    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);
    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: struct init typed" {
    const gpa = std.testing.allocator;
    const source: [:0]const u8 =
        \\const T = struct { x: u32 };
        \\const v = T{ .x = 1 };
    ;
    var ref_zir = try refZir(gpa, source);
    defer ref_zir.deinit(gpa);
    var c_ast = c.astParse(source.ptr, @intCast(source.len));
    defer c.astDeinit(&c_ast);
    var c_zir = c.astGen(&c_ast);
    defer c.zirDeinit(&c_zir);
    try expectEqualZir(gpa, ref_zir, c_zir);
}

test "astgen: corpus" {
    // All individual corpus tests now pass.
    const gpa = std.testing.allocator;

    var any_fail = false;
    inline for (corpus_files) |entry| {
        corpusCheck(gpa, entry[1]) catch {
            std.debug.print("FAIL: {s}\n", .{entry[0]});
            any_fail = true;
        };
    }
    if (any_fail) return error.ZirMismatch;
}