sema_test: harden AIR comparison with missing tag coverage and diagnostics

- Add bidirectional function count check (warns when Zig produces
  functions that C does not, surfacing lazy-analysis gaps)
- Replace magic number 51 with c.AIR_INST_BLOCK for robustness
- Add ~60 missing tags to airDataRefSlots with correct Ref slot
  mappings (bin_op, ty_op, ty_pl, pl_op, br, reduce, prefetch,
  atomic_load, vector_store_elem, ty_nav variants)
- Add SET_ERR_RETURN_TRACE (un_op) and ERR_RETURN_TRACE (ty) to
  airInstNumSlots for correct slot counts
- Add TODO for extra-array Ref canonicalization (Gap 3)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-23 22:15:16 +00:00
parent 2ea12a9f1e
commit 3fa58a2654

View File

@@ -322,6 +322,13 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li
const c_pf = precomputedFromCAir(cf);
try airCompareOne(c_name, pf.*, c_pf);
}
// Verify bidirectional match: Zig should not produce functions that C does not.
// Currently a diagnostic (not a hard error) because the C sema does not yet
// analyze all lazily-referenced functions.
if (c_funcs.len != precomputed.len) {
std.debug.print("WARNING: function count mismatch for AIR comparison: " ++
"C produced {d} functions, pre-computed (Zig) has {d}\n", .{ c_funcs.len, precomputed.len });
}
}
fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc {
@@ -437,9 +444,10 @@ fn airInstNumSlots(tag_val: u8) usize {
c.AIR_INST_ERROR_NAME,
c.AIR_INST_CMP_LT_ERRORS_LEN,
c.AIR_INST_C_VA_END,
c.AIR_INST_SET_ERR_RETURN_TRACE,
=> 1,
// ty: 4 meaningful bytes (1 slot)
c.AIR_INST_ALLOC, c.AIR_INST_RET_PTR, c.AIR_INST_C_VA_START => 1,
c.AIR_INST_ALLOC, c.AIR_INST_RET_PTR, c.AIR_INST_C_VA_START, c.AIR_INST_ERR_RETURN_TRACE => 1,
// repeat: 4 meaningful bytes (1 slot)
c.AIR_INST_REPEAT => 1,
// All other variants use the full 8 bytes (2 slots).
@@ -457,6 +465,10 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_RET_ADDR, c.AIR_INST_FRAME_ADDR, c.AIR_INST_TRAP, c.AIR_INST_UNREACH, c.AIR_INST_BREAKPOINT => .{ false, false },
// dbg_stmt: line(u32) + column(u32)
c.AIR_INST_DBG_STMT, c.AIR_INST_DBG_EMPTY_STMT => .{ false, false },
// inferred_alloc / inferred_alloc_comptime: special data, no standard Refs
c.AIR_INST_INFERRED_ALLOC, c.AIR_INST_INFERRED_ALLOC_COMPTIME => .{ false, false },
// repeat: loop_inst(u32) + pad
c.AIR_INST_REPEAT => .{ false, false },
// pl_op: operand(Ref) + payload(u32)
c.AIR_INST_DBG_VAR_PTR,
c.AIR_INST_DBG_VAR_VAL,
@@ -465,6 +477,19 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_CALL_ALWAYS_TAIL,
c.AIR_INST_CALL_NEVER_TAIL,
c.AIR_INST_CALL_NEVER_INLINE,
c.AIR_INST_COND_BR,
c.AIR_INST_SWITCH_BR,
c.AIR_INST_LOOP_SWITCH_BR,
c.AIR_INST_TRY,
c.AIR_INST_TRY_COLD,
c.AIR_INST_ATOMIC_RMW,
c.AIR_INST_SELECT,
c.AIR_INST_MUL_ADD,
c.AIR_INST_WASM_MEMORY_SIZE,
c.AIR_INST_WASM_MEMORY_GROW,
c.AIR_INST_WORK_ITEM_ID,
c.AIR_INST_WORK_GROUP_SIZE,
c.AIR_INST_WORK_GROUP_ID,
=> .{ true, false },
// un_op: operand(Ref) + pad
c.AIR_INST_RET,
@@ -498,11 +523,13 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_ERROR_NAME,
c.AIR_INST_CMP_LT_ERRORS_LEN,
c.AIR_INST_C_VA_END,
c.AIR_INST_SET_ERR_RETURN_TRACE,
=> .{ true, false },
// ty: type(Ref) + pad
c.AIR_INST_ALLOC,
c.AIR_INST_RET_PTR,
c.AIR_INST_C_VA_START,
c.AIR_INST_ERR_RETURN_TRACE,
=> .{ true, false },
// ty_pl: type(Ref) + payload(u32)
c.AIR_INST_STRUCT_FIELD_VAL,
@@ -510,7 +537,41 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_DBG_INLINE_BLOCK,
c.AIR_INST_BLOCK,
c.AIR_INST_AGGREGATE_INIT,
c.AIR_INST_PTR_ADD,
c.AIR_INST_PTR_SUB,
c.AIR_INST_ADD_WITH_OVERFLOW,
c.AIR_INST_SUB_WITH_OVERFLOW,
c.AIR_INST_MUL_WITH_OVERFLOW,
c.AIR_INST_SHL_WITH_OVERFLOW,
c.AIR_INST_SLICE,
c.AIR_INST_SLICE_ELEM_PTR,
c.AIR_INST_PTR_ELEM_PTR,
c.AIR_INST_CMP_VECTOR,
c.AIR_INST_CMP_VECTOR_OPTIMIZED,
c.AIR_INST_TRY_PTR,
c.AIR_INST_TRY_PTR_COLD,
c.AIR_INST_CMPXCHG_WEAK,
c.AIR_INST_CMPXCHG_STRONG,
c.AIR_INST_UNION_INIT,
c.AIR_INST_ASSEMBLY,
c.AIR_INST_LOOP,
c.AIR_INST_SAVE_ERR_RETURN_TRACE_INDEX,
c.AIR_INST_SHUFFLE_ONE,
c.AIR_INST_SHUFFLE_TWO,
c.AIR_INST_FIELD_PARENT_PTR,
=> .{ true, false },
// reduce: operand(Ref) + operation(enum)
c.AIR_INST_REDUCE,
c.AIR_INST_REDUCE_OPTIMIZED,
=> .{ true, false },
// prefetch: ptr(Ref) + packed fields
c.AIR_INST_PREFETCH => .{ true, false },
// atomic_load: ptr(Ref) + order(enum)
c.AIR_INST_ATOMIC_LOAD => .{ true, false },
// vector_store_elem: vector_ptr(Ref) + payload(u32)
c.AIR_INST_VECTOR_STORE_ELEM => .{ true, false },
// ty_nav: ty(IP index) + nav(u32)
c.AIR_INST_RUNTIME_NAV_PTR => .{ true, false },
// bin_op: lhs(Ref) + rhs(Ref)
c.AIR_INST_ADD,
c.AIR_INST_ADD_SAFE,
@@ -555,6 +616,28 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_ADD_SAT,
c.AIR_INST_SUB_SAT,
c.AIR_INST_MUL_SAT,
c.AIR_INST_REM,
c.AIR_INST_REM_OPTIMIZED,
c.AIR_INST_MOD,
c.AIR_INST_MOD_OPTIMIZED,
c.AIR_INST_CMP_LT_OPTIMIZED,
c.AIR_INST_CMP_LTE_OPTIMIZED,
c.AIR_INST_CMP_EQ_OPTIMIZED,
c.AIR_INST_CMP_GTE_OPTIMIZED,
c.AIR_INST_CMP_GT_OPTIMIZED,
c.AIR_INST_CMP_NEQ_OPTIMIZED,
c.AIR_INST_SET_UNION_TAG,
c.AIR_INST_ARRAY_ELEM_VAL,
c.AIR_INST_SLICE_ELEM_VAL,
c.AIR_INST_PTR_ELEM_VAL,
c.AIR_INST_MEMSET,
c.AIR_INST_MEMSET_SAFE,
c.AIR_INST_MEMCPY,
c.AIR_INST_MEMMOVE,
c.AIR_INST_ATOMIC_STORE_UNORDERED,
c.AIR_INST_ATOMIC_STORE_MONOTONIC,
c.AIR_INST_ATOMIC_STORE_RELEASE,
c.AIR_INST_ATOMIC_STORE_SEQ_CST,
=> .{ true, true },
// ty_op: type(Ref) + operand(Ref)
c.AIR_INST_BITCAST,
@@ -565,9 +648,13 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_FPEXT,
c.AIR_INST_OPTIONAL_PAYLOAD,
c.AIR_INST_OPTIONAL_PAYLOAD_PTR,
c.AIR_INST_OPTIONAL_PAYLOAD_PTR_SET,
c.AIR_INST_WRAP_OPTIONAL,
c.AIR_INST_UNWRAP_ERRUNION_PAYLOAD,
c.AIR_INST_UNWRAP_ERRUNION_ERR,
c.AIR_INST_UNWRAP_ERRUNION_PAYLOAD_PTR,
c.AIR_INST_UNWRAP_ERRUNION_ERR_PTR,
c.AIR_INST_ERRUNION_PAYLOAD_PTR_SET,
c.AIR_INST_WRAP_ERRUNION_PAYLOAD,
c.AIR_INST_WRAP_ERRUNION_ERR,
c.AIR_INST_ARRAY_TO_SLICE,
@@ -583,17 +670,28 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_POPCOUNT,
c.AIR_INST_BYTE_SWAP,
c.AIR_INST_ABS,
c.AIR_INST_BIT_REVERSE,
c.AIR_INST_STRUCT_FIELD_PTR_INDEX_0,
c.AIR_INST_STRUCT_FIELD_PTR_INDEX_1,
c.AIR_INST_STRUCT_FIELD_PTR_INDEX_2,
c.AIR_INST_STRUCT_FIELD_PTR_INDEX_3,
c.AIR_INST_GET_UNION_TAG,
c.AIR_INST_SLICE_LEN,
c.AIR_INST_SLICE_PTR,
c.AIR_INST_PTR_SLICE_LEN_PTR,
c.AIR_INST_PTR_SLICE_PTR_PTR,
c.AIR_INST_SPLAT,
c.AIR_INST_ADDRSPACE_CAST,
c.AIR_INST_ERROR_SET_HAS_VALUE,
c.AIR_INST_C_VA_ARG,
c.AIR_INST_C_VA_COPY,
=> .{ true, true },
// arg: type(Ref) + zir_param_index(u32)
c.AIR_INST_ARG => .{ true, false },
// br: block_inst(u32) + operand(Ref)
c.AIR_INST_BR => .{ false, true },
// pl_op (cond_br): operand(Ref) + payload(u32)
c.AIR_INST_COND_BR => .{ true, false },
c.AIR_INST_BR,
c.AIR_INST_SWITCH_DISPATCH,
=> .{ false, true },
// Default: assume no refs (compare directly).
// If a tag with refs is missed, the comparison will fail
// and we add it here.
@@ -678,11 +776,11 @@ fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc) !void
const a_word = std.mem.readInt(u32, a.datas[s..][0..4], .little);
const b_word = std.mem.readInt(u32, b.datas[s..][0..4], .little);
// Skip data comparison for dead BLOCKs (tag 51).
// Skip data comparison for dead BLOCKs.
// Dead BLOCKs have undefined data in Zig vs zeroed in C.
// Only check b_word to avoid reading uninitialized Zig data
// (which triggers valgrind "uninitialised value" errors).
if (tag_val == 51 and b_word == 0) continue;
if (tag_val == c.AIR_INST_BLOCK and b_word == 0) continue;
if (ref_slots[slot]) {
// This slot is a Ref — canonicalize IP refs.
@@ -796,6 +894,13 @@ fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc) !void
}
}
}
// TODO: Add extra canonicalization for tags that store Refs
// in their extra payload (e.g., Bin, StructField, UnionInit,
// VectorCmp, Cmpxchg, AtomicRmw, TryPtr, FieldParentPtr,
// AggregateInit). Currently these Refs are compared as raw
// u32, which works when IP indices match but will cause false
// failures when they diverge. See Gap 3 in the comparison
// audit plan.
}
}
if (!std.mem.eql(u32, a_extra_copy, b_extra_copy)) {