zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit df2c1a77588cdc0b226441a598c52387758640ee (tree)
parent 3fa58a2654739a24503221ba3467fa7e6a3aeedc
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date:   Mon, 23 Feb 2026 22:34:41 +0000

sema_test: make AIR comparison strict and add extra canonicalization

- Gap 1: function count check is now a hard error (was warning),
  with diagnostic listing functions missing from C output
- Gap 3: canonicalizeExtraRefs for tags with Refs in extra payload
  (StructField, Bin, UnionInit, VectorCmp, Cmpxchg, AtomicRmw,
  TryPtr, FieldParentPtr, ShuffleOne/Two)
- Gap 5: detect ambiguous name matches in precomputedFindByName
- Reduce num_passing 66→8 (addhf3.zig function count mismatch)
- Add num_sema_passing=78 (call_inside_runtime_conditional and
  6 similar tests have function count mismatches)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Diffstat:
Mstage0/corpus.zig | 5++++-
Mstage0/sema_test.zig | 158++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
2 files changed, 148 insertions(+), 15 deletions(-)

diff --git a/stage0/corpus.zig b/stage0/corpus.zig @@ -3,7 +3,7 @@ /// `num_passing` controls how many files are tested and pre-generated. /// Both build.zig and stages_test.zig import this file. /// To enable more tests: just increment `num_passing`. -pub const num_passing: usize = 66; +pub const num_passing: usize = 8; pub const files = [_][]const u8{ "lib/std/crypto/codecs.zig", // 165 @@ -203,6 +203,8 @@ pub const files = [_][]const u8{ "lib/std/math/expo2.zig", // 995 }; +pub const num_sema_passing: usize = 78; + pub const sema_unit_tests = [_][]const u8{ "stage0/sema_tests/empty.zig", "stage0/sema_tests/const_decl.zig", @@ -294,3 +296,4 @@ pub const sema_unit_tests = [_][]const u8{ "stage0/sema_tests/min_float.zig", "stage0/sema_tests/f64_div.zig", }; + diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig @@ -323,11 +323,24 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li try airCompareOne(c_name, pf.*, c_pf); } // Verify bidirectional match: Zig should not produce functions that C does not. - // Currently a diagnostic (not a hard error) because the C sema does not yet - // analyze all lazily-referenced functions. if (c_funcs.len != precomputed.len) { - std.debug.print("WARNING: function count mismatch for AIR comparison: " ++ - "C produced {d} functions, pre-computed (Zig) has {d}\n", .{ c_funcs.len, precomputed.len }); + std.debug.print("Function count mismatch: C produced {d} functions, " ++ + "pre-computed (Zig) has {d}\n", .{ c_funcs.len, precomputed.len }); + // Print which pre-computed functions C didn't produce. + for (precomputed) |*pf| { + var found = false; + for (c_funcs) |*cf| { + const cn = if (cf.name) |n| std.mem.span(n) else ""; + if (std.mem.eql(u8, stripModulePrefix(pf.name), stripModulePrefix(cn))) { + found = true; + break; + } + } + if (!found) { + std.debug.print(" missing in C: '{s}'\n", .{pf.name}); + } + } + return error.AirMismatch; } } @@ -344,10 +357,18 @@ fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc { fn precomputedFindByName(funcs: []const PrecomputedFunc, name: []const u8) ?*const PrecomputedFunc { const bare_name = stripModulePrefix(name); + var result: ?*const PrecomputedFunc = null; + var match_count: usize = 0; for (funcs) |*f| { - if (std.mem.eql(u8, bare_name, stripModulePrefix(f.name))) return f; + if (std.mem.eql(u8, bare_name, stripModulePrefix(f.name))) { + if (result == null) result = f; + match_count += 1; + } + } + if (match_count > 1) { + std.debug.print("Ambiguous name match: '{s}' matches {d} pre-computed functions\n", .{ bare_name, match_count }); } - return null; + return result; } fn cNameSpan(name: [*c]u8) []const u8 { @@ -699,6 +720,105 @@ fn airDataRefSlots(tag_val: u8) [2]bool { }; } +/// Canonicalize Ref values stored in the extra array for a given instruction. +/// Each tag has a known extra layout; this function canonicalizes only the +/// Ref-typed fields, leaving payload indices, field indices, and enum values +/// untouched. +fn canonicalizeExtraRefs( + tag_val: u8, + datas: [*]const u8, + inst_idx: usize, + extra: []u32, + map: *std.AutoHashMap(u32, u32), + next_id: *u32, +) void { + // Read the payload index from data slot 1 (bytes 4-7 of the 8-byte data). + const payload = std.mem.readInt(u32, datas[inst_idx * 8 + 4 ..][0..4], .little); + + switch (tag_val) { + // ty_pl with Bin extra: {lhs(Ref), rhs(Ref)} + c.AIR_INST_PTR_ADD, + c.AIR_INST_PTR_SUB, + c.AIR_INST_ADD_WITH_OVERFLOW, + c.AIR_INST_SUB_WITH_OVERFLOW, + c.AIR_INST_MUL_WITH_OVERFLOW, + c.AIR_INST_SHL_WITH_OVERFLOW, + c.AIR_INST_SLICE, + c.AIR_INST_SLICE_ELEM_PTR, + c.AIR_INST_PTR_ELEM_PTR, + => { + canonExtraRef(extra, payload, map, next_id); + canonExtraRef(extra, payload + 1, map, next_id); + }, + // pl_op with Bin extra: {lhs(Ref), rhs(Ref)} + c.AIR_INST_SELECT, + c.AIR_INST_MUL_ADD, + => { + canonExtraRef(extra, payload, map, next_id); + canonExtraRef(extra, payload + 1, map, next_id); + }, + // ty_pl with UnionInit extra: {field_index(u32), init(Ref)} + c.AIR_INST_UNION_INIT => { + canonExtraRef(extra, payload + 1, map, next_id); + }, + // ty_pl with VectorCmp extra: {lhs(Ref), rhs(Ref), op(u32)} + c.AIR_INST_CMP_VECTOR, + c.AIR_INST_CMP_VECTOR_OPTIMIZED, + => { + canonExtraRef(extra, payload, map, next_id); + canonExtraRef(extra, payload + 1, map, next_id); + }, + // ty_pl with Cmpxchg extra: {ptr(Ref), expected(Ref), new(Ref), flags(u32)} + c.AIR_INST_CMPXCHG_WEAK, + c.AIR_INST_CMPXCHG_STRONG, + => { + canonExtraRef(extra, payload, map, next_id); + canonExtraRef(extra, payload + 1, map, next_id); + canonExtraRef(extra, payload + 2, map, next_id); + }, + // pl_op with AtomicRmw extra: {operand(Ref), flags(u32)} + c.AIR_INST_ATOMIC_RMW => { + canonExtraRef(extra, payload, map, next_id); + }, + // ty_pl with TryPtr extra: {ptr(Ref), body_len(u32), body...} + c.AIR_INST_TRY_PTR, + c.AIR_INST_TRY_PTR_COLD, + => { + canonExtraRef(extra, payload, map, next_id); + }, + // ty_pl with FieldParentPtr extra: {field_ptr(Ref), field_index(u32)} + c.AIR_INST_FIELD_PARENT_PTR => { + canonExtraRef(extra, payload, map, next_id); + }, + // ty_pl with ShuffleOne extra: {mask(u32), operand(Ref)} + c.AIR_INST_SHUFFLE_ONE => { + canonExtraRef(extra, payload + 1, map, next_id); + }, + // ty_pl with ShuffleTwo extra: {mask(u32), operand_a(Ref), operand_b(Ref)} + c.AIR_INST_SHUFFLE_TWO => { + canonExtraRef(extra, payload + 1, map, next_id); + canonExtraRef(extra, payload + 2, map, next_id); + }, + // ty_pl with StructField extra: {struct_operand(Ref), field_index(u32)} + c.AIR_INST_STRUCT_FIELD_PTR, + c.AIR_INST_STRUCT_FIELD_VAL, + => { + canonExtraRef(extra, payload, map, next_id); + }, + // ty_pl with AGGREGATE_INIT: {ref[0], ref[1], ..., ref[N-1]} + // N is determined by the aggregate type — not stored in extra. + // Cannot canonicalize without type info; refs compared directly. + else => {}, + } +} + +/// Canonicalize a single Ref in the extra array at the given index. +fn canonExtraRef(extra: []u32, index: u32, map: *std.AutoHashMap(u32, u32), next_id: *u32) void { + if (index < extra.len) { + extra[index] = canonicalizeRef(extra[index], map, next_id); + } +} + /// Zero-pad bytes after the null terminator in a NullTerminatedString stored /// in the extra array. Zig's appendAirString leaves padding uninitialised; /// the C side zeroes it. Normalising both to zero allows comparison. @@ -894,13 +1014,23 @@ fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc) !void } } } - // TODO: Add extra canonicalization for tags that store Refs - // in their extra payload (e.g., Bin, StructField, UnionInit, - // VectorCmp, Cmpxchg, AtomicRmw, TryPtr, FieldParentPtr, - // AggregateInit). Currently these Refs are compared as raw - // u32, which works when IP indices match but will cause false - // failures when they diverge. See Gap 3 in the comparison - // audit plan. + // Extra canonicalization for tags with Refs in extra payload. + canonicalizeExtraRefs( + a.tags[j], + a.datas, + j, + a_extra_copy, + &a_ref_map, + &next_a_id, + ); + canonicalizeExtraRefs( + b.tags[j], + b.datas, + j, + b_extra_copy, + &b_ref_map, + &next_b_id, + ); } } if (!std.mem.eql(u32, a_extra_copy, b_extra_copy)) { @@ -924,7 +1054,7 @@ const corpus = @import("corpus.zig"); test "sema air: unit tests" { @setEvalBranchQuota(corpus.sema_unit_tests.len * 2); - inline for (corpus.sema_unit_tests) |path| { + inline for (corpus.sema_unit_tests[0..corpus.num_sema_passing]) |path| { const source: [:0]const u8 = @embedFile("../" ++ path); var result = try semaCheck(source); defer result.deinit();