commit df2c1a77588cdc0b226441a598c52387758640ee (tree)
parent 3fa58a2654739a24503221ba3467fa7e6a3aeedc
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Mon, 23 Feb 2026 22:34:41 +0000
sema_test: make AIR comparison strict and add extra canonicalization
- Gap 1: function count check is now a hard error (was warning),
with diagnostic listing functions missing from C output
- Gap 3: canonicalizeExtraRefs for tags with Refs in extra payload
(StructField, Bin, UnionInit, VectorCmp, Cmpxchg, AtomicRmw,
TryPtr, FieldParentPtr, ShuffleOne/Two)
- Gap 5: detect ambiguous name matches in precomputedFindByName
- Reduce num_passing 66→8 (addhf3.zig function count mismatch)
- Add num_sema_passing=78 (call_inside_runtime_conditional and
6 similar tests have function count mismatches)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
2 files changed, 148 insertions(+), 15 deletions(-)
diff --git a/stage0/corpus.zig b/stage0/corpus.zig
@@ -3,7 +3,7 @@
/// `num_passing` controls how many files are tested and pre-generated.
/// Both build.zig and stages_test.zig import this file.
/// To enable more tests: just increment `num_passing`.
-pub const num_passing: usize = 66;
+pub const num_passing: usize = 8;
pub const files = [_][]const u8{
"lib/std/crypto/codecs.zig", // 165
@@ -203,6 +203,8 @@ pub const files = [_][]const u8{
"lib/std/math/expo2.zig", // 995
};
+pub const num_sema_passing: usize = 78;
+
pub const sema_unit_tests = [_][]const u8{
"stage0/sema_tests/empty.zig",
"stage0/sema_tests/const_decl.zig",
@@ -294,3 +296,4 @@ pub const sema_unit_tests = [_][]const u8{
"stage0/sema_tests/min_float.zig",
"stage0/sema_tests/f64_div.zig",
};
+
diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig
@@ -323,11 +323,24 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li
try airCompareOne(c_name, pf.*, c_pf);
}
// Verify bidirectional match: Zig should not produce functions that C does not.
- // Currently a diagnostic (not a hard error) because the C sema does not yet
- // analyze all lazily-referenced functions.
if (c_funcs.len != precomputed.len) {
- std.debug.print("WARNING: function count mismatch for AIR comparison: " ++
- "C produced {d} functions, pre-computed (Zig) has {d}\n", .{ c_funcs.len, precomputed.len });
+ std.debug.print("Function count mismatch: C produced {d} functions, " ++
+ "pre-computed (Zig) has {d}\n", .{ c_funcs.len, precomputed.len });
+ // Print which pre-computed functions C didn't produce.
+ for (precomputed) |*pf| {
+ var found = false;
+ for (c_funcs) |*cf| {
+ const cn = if (cf.name) |n| std.mem.span(n) else "";
+ if (std.mem.eql(u8, stripModulePrefix(pf.name), stripModulePrefix(cn))) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ std.debug.print(" missing in C: '{s}'\n", .{pf.name});
+ }
+ }
+ return error.AirMismatch;
}
}
@@ -344,10 +357,18 @@ fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc {
fn precomputedFindByName(funcs: []const PrecomputedFunc, name: []const u8) ?*const PrecomputedFunc {
const bare_name = stripModulePrefix(name);
+ var result: ?*const PrecomputedFunc = null;
+ var match_count: usize = 0;
for (funcs) |*f| {
- if (std.mem.eql(u8, bare_name, stripModulePrefix(f.name))) return f;
+ if (std.mem.eql(u8, bare_name, stripModulePrefix(f.name))) {
+ if (result == null) result = f;
+ match_count += 1;
+ }
+ }
+ if (match_count > 1) {
+ std.debug.print("Ambiguous name match: '{s}' matches {d} pre-computed functions\n", .{ bare_name, match_count });
}
- return null;
+ return result;
}
fn cNameSpan(name: [*c]u8) []const u8 {
@@ -699,6 +720,105 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
};
}
+/// Canonicalize Ref values stored in the extra array for a given instruction.
+/// Each tag has a known extra layout; this function canonicalizes only the
+/// Ref-typed fields, leaving payload indices, field indices, and enum values
+/// untouched.
+fn canonicalizeExtraRefs(
+ tag_val: u8,
+ datas: [*]const u8,
+ inst_idx: usize,
+ extra: []u32,
+ map: *std.AutoHashMap(u32, u32),
+ next_id: *u32,
+) void {
+ // Read the payload index from data slot 1 (bytes 4-7 of the 8-byte data).
+ const payload = std.mem.readInt(u32, datas[inst_idx * 8 + 4 ..][0..4], .little);
+
+ switch (tag_val) {
+ // ty_pl with Bin extra: {lhs(Ref), rhs(Ref)}
+ c.AIR_INST_PTR_ADD,
+ c.AIR_INST_PTR_SUB,
+ c.AIR_INST_ADD_WITH_OVERFLOW,
+ c.AIR_INST_SUB_WITH_OVERFLOW,
+ c.AIR_INST_MUL_WITH_OVERFLOW,
+ c.AIR_INST_SHL_WITH_OVERFLOW,
+ c.AIR_INST_SLICE,
+ c.AIR_INST_SLICE_ELEM_PTR,
+ c.AIR_INST_PTR_ELEM_PTR,
+ => {
+ canonExtraRef(extra, payload, map, next_id);
+ canonExtraRef(extra, payload + 1, map, next_id);
+ },
+ // pl_op with Bin extra: {lhs(Ref), rhs(Ref)}
+ c.AIR_INST_SELECT,
+ c.AIR_INST_MUL_ADD,
+ => {
+ canonExtraRef(extra, payload, map, next_id);
+ canonExtraRef(extra, payload + 1, map, next_id);
+ },
+ // ty_pl with UnionInit extra: {field_index(u32), init(Ref)}
+ c.AIR_INST_UNION_INIT => {
+ canonExtraRef(extra, payload + 1, map, next_id);
+ },
+ // ty_pl with VectorCmp extra: {lhs(Ref), rhs(Ref), op(u32)}
+ c.AIR_INST_CMP_VECTOR,
+ c.AIR_INST_CMP_VECTOR_OPTIMIZED,
+ => {
+ canonExtraRef(extra, payload, map, next_id);
+ canonExtraRef(extra, payload + 1, map, next_id);
+ },
+ // ty_pl with Cmpxchg extra: {ptr(Ref), expected(Ref), new(Ref), flags(u32)}
+ c.AIR_INST_CMPXCHG_WEAK,
+ c.AIR_INST_CMPXCHG_STRONG,
+ => {
+ canonExtraRef(extra, payload, map, next_id);
+ canonExtraRef(extra, payload + 1, map, next_id);
+ canonExtraRef(extra, payload + 2, map, next_id);
+ },
+ // pl_op with AtomicRmw extra: {operand(Ref), flags(u32)}
+ c.AIR_INST_ATOMIC_RMW => {
+ canonExtraRef(extra, payload, map, next_id);
+ },
+ // ty_pl with TryPtr extra: {ptr(Ref), body_len(u32), body...}
+ c.AIR_INST_TRY_PTR,
+ c.AIR_INST_TRY_PTR_COLD,
+ => {
+ canonExtraRef(extra, payload, map, next_id);
+ },
+ // ty_pl with FieldParentPtr extra: {field_ptr(Ref), field_index(u32)}
+ c.AIR_INST_FIELD_PARENT_PTR => {
+ canonExtraRef(extra, payload, map, next_id);
+ },
+ // ty_pl with ShuffleOne extra: {mask(u32), operand(Ref)}
+ c.AIR_INST_SHUFFLE_ONE => {
+ canonExtraRef(extra, payload + 1, map, next_id);
+ },
+ // ty_pl with ShuffleTwo extra: {mask(u32), operand_a(Ref), operand_b(Ref)}
+ c.AIR_INST_SHUFFLE_TWO => {
+ canonExtraRef(extra, payload + 1, map, next_id);
+ canonExtraRef(extra, payload + 2, map, next_id);
+ },
+ // ty_pl with StructField extra: {struct_operand(Ref), field_index(u32)}
+ c.AIR_INST_STRUCT_FIELD_PTR,
+ c.AIR_INST_STRUCT_FIELD_VAL,
+ => {
+ canonExtraRef(extra, payload, map, next_id);
+ },
+ // ty_pl with AGGREGATE_INIT: {ref[0], ref[1], ..., ref[N-1]}
+ // N is determined by the aggregate type — not stored in extra.
+ // Cannot canonicalize without type info; refs compared directly.
+ else => {},
+ }
+}
+
+/// Canonicalize a single Ref in the extra array at the given index.
+fn canonExtraRef(extra: []u32, index: u32, map: *std.AutoHashMap(u32, u32), next_id: *u32) void {
+ if (index < extra.len) {
+ extra[index] = canonicalizeRef(extra[index], map, next_id);
+ }
+}
+
/// Zero-pad bytes after the null terminator in a NullTerminatedString stored
/// in the extra array. Zig's appendAirString leaves padding uninitialised;
/// the C side zeroes it. Normalising both to zero allows comparison.
@@ -894,13 +1014,23 @@ fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc) !void
}
}
}
- // TODO: Add extra canonicalization for tags that store Refs
- // in their extra payload (e.g., Bin, StructField, UnionInit,
- // VectorCmp, Cmpxchg, AtomicRmw, TryPtr, FieldParentPtr,
- // AggregateInit). Currently these Refs are compared as raw
- // u32, which works when IP indices match but will cause false
- // failures when they diverge. See Gap 3 in the comparison
- // audit plan.
+ // Extra canonicalization for tags with Refs in extra payload.
+ canonicalizeExtraRefs(
+ a.tags[j],
+ a.datas,
+ j,
+ a_extra_copy,
+ &a_ref_map,
+ &next_a_id,
+ );
+ canonicalizeExtraRefs(
+ b.tags[j],
+ b.datas,
+ j,
+ b_extra_copy,
+ &b_ref_map,
+ &next_b_id,
+ );
}
}
if (!std.mem.eql(u32, a_extra_copy, b_extra_copy)) {
@@ -924,7 +1054,7 @@ const corpus = @import("corpus.zig");
test "sema air: unit tests" {
@setEvalBranchQuota(corpus.sema_unit_tests.len * 2);
- inline for (corpus.sema_unit_tests) |path| {
+ inline for (corpus.sema_unit_tests[0..corpus.num_sema_passing]) |path| {
const source: [:0]const u8 = @embedFile("../" ++ path);
var result = try semaCheck(source);
defer result.deinit();