commit 3fa58a2654739a24503221ba3467fa7e6a3aeedc (tree)
parent 2ea12a9f1e34c11bf0fb8d35d9c95c4f529bdfb6
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Mon, 23 Feb 2026 22:15:16 +0000
sema_test: harden AIR comparison with missing tag coverage and diagnostics
- Add bidirectional function count check (warns when Zig produces
functions that C does not, surfacing lazy-analysis gaps)
- Replace magic number 51 with c.AIR_INST_BLOCK for robustness
- Add ~60 missing tags to airDataRefSlots with correct Ref slot
mappings (bin_op, ty_op, ty_pl, pl_op, br, reduce, prefetch,
atomic_load, vector_store_elem, ty_nav variants)
- Add SET_ERR_RETURN_TRACE (un_op) and ERR_RETURN_TRACE (ty) to
airInstNumSlots for correct slot counts
- Add TODO for extra-array Ref canonicalization (Gap 3)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
| M | stage0/sema_test.zig | | | 117 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- |
1 file changed, 111 insertions(+), 6 deletions(-)
diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig
@@ -322,6 +322,13 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li
const c_pf = precomputedFromCAir(cf);
try airCompareOne(c_name, pf.*, c_pf);
}
+ // Verify bidirectional match: Zig should not produce functions that C does not.
+ // Currently a diagnostic (not a hard error) because the C sema does not yet
+ // analyze all lazily-referenced functions.
+ if (c_funcs.len != precomputed.len) {
+ std.debug.print("WARNING: function count mismatch for AIR comparison: " ++
+ "C produced {d} functions, pre-computed (Zig) has {d}\n", .{ c_funcs.len, precomputed.len });
+ }
}
fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc {
@@ -437,9 +444,10 @@ fn airInstNumSlots(tag_val: u8) usize {
c.AIR_INST_ERROR_NAME,
c.AIR_INST_CMP_LT_ERRORS_LEN,
c.AIR_INST_C_VA_END,
+ c.AIR_INST_SET_ERR_RETURN_TRACE,
=> 1,
// ty: 4 meaningful bytes (1 slot)
- c.AIR_INST_ALLOC, c.AIR_INST_RET_PTR, c.AIR_INST_C_VA_START => 1,
+ c.AIR_INST_ALLOC, c.AIR_INST_RET_PTR, c.AIR_INST_C_VA_START, c.AIR_INST_ERR_RETURN_TRACE => 1,
// repeat: 4 meaningful bytes (1 slot)
c.AIR_INST_REPEAT => 1,
// All other variants use the full 8 bytes (2 slots).
@@ -457,6 +465,10 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_RET_ADDR, c.AIR_INST_FRAME_ADDR, c.AIR_INST_TRAP, c.AIR_INST_UNREACH, c.AIR_INST_BREAKPOINT => .{ false, false },
// dbg_stmt: line(u32) + column(u32)
c.AIR_INST_DBG_STMT, c.AIR_INST_DBG_EMPTY_STMT => .{ false, false },
+ // inferred_alloc / inferred_alloc_comptime: special data, no standard Refs
+ c.AIR_INST_INFERRED_ALLOC, c.AIR_INST_INFERRED_ALLOC_COMPTIME => .{ false, false },
+ // repeat: loop_inst(u32) + pad
+ c.AIR_INST_REPEAT => .{ false, false },
// pl_op: operand(Ref) + payload(u32)
c.AIR_INST_DBG_VAR_PTR,
c.AIR_INST_DBG_VAR_VAL,
@@ -465,6 +477,19 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_CALL_ALWAYS_TAIL,
c.AIR_INST_CALL_NEVER_TAIL,
c.AIR_INST_CALL_NEVER_INLINE,
+ c.AIR_INST_COND_BR,
+ c.AIR_INST_SWITCH_BR,
+ c.AIR_INST_LOOP_SWITCH_BR,
+ c.AIR_INST_TRY,
+ c.AIR_INST_TRY_COLD,
+ c.AIR_INST_ATOMIC_RMW,
+ c.AIR_INST_SELECT,
+ c.AIR_INST_MUL_ADD,
+ c.AIR_INST_WASM_MEMORY_SIZE,
+ c.AIR_INST_WASM_MEMORY_GROW,
+ c.AIR_INST_WORK_ITEM_ID,
+ c.AIR_INST_WORK_GROUP_SIZE,
+ c.AIR_INST_WORK_GROUP_ID,
=> .{ true, false },
// un_op: operand(Ref) + pad
c.AIR_INST_RET,
@@ -498,11 +523,13 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_ERROR_NAME,
c.AIR_INST_CMP_LT_ERRORS_LEN,
c.AIR_INST_C_VA_END,
+ c.AIR_INST_SET_ERR_RETURN_TRACE,
=> .{ true, false },
// ty: type(Ref) + pad
c.AIR_INST_ALLOC,
c.AIR_INST_RET_PTR,
c.AIR_INST_C_VA_START,
+ c.AIR_INST_ERR_RETURN_TRACE,
=> .{ true, false },
// ty_pl: type(Ref) + payload(u32)
c.AIR_INST_STRUCT_FIELD_VAL,
@@ -510,7 +537,41 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_DBG_INLINE_BLOCK,
c.AIR_INST_BLOCK,
c.AIR_INST_AGGREGATE_INIT,
+ c.AIR_INST_PTR_ADD,
+ c.AIR_INST_PTR_SUB,
+ c.AIR_INST_ADD_WITH_OVERFLOW,
+ c.AIR_INST_SUB_WITH_OVERFLOW,
+ c.AIR_INST_MUL_WITH_OVERFLOW,
+ c.AIR_INST_SHL_WITH_OVERFLOW,
+ c.AIR_INST_SLICE,
+ c.AIR_INST_SLICE_ELEM_PTR,
+ c.AIR_INST_PTR_ELEM_PTR,
+ c.AIR_INST_CMP_VECTOR,
+ c.AIR_INST_CMP_VECTOR_OPTIMIZED,
+ c.AIR_INST_TRY_PTR,
+ c.AIR_INST_TRY_PTR_COLD,
+ c.AIR_INST_CMPXCHG_WEAK,
+ c.AIR_INST_CMPXCHG_STRONG,
+ c.AIR_INST_UNION_INIT,
+ c.AIR_INST_ASSEMBLY,
+ c.AIR_INST_LOOP,
+ c.AIR_INST_SAVE_ERR_RETURN_TRACE_INDEX,
+ c.AIR_INST_SHUFFLE_ONE,
+ c.AIR_INST_SHUFFLE_TWO,
+ c.AIR_INST_FIELD_PARENT_PTR,
+ => .{ true, false },
+ // reduce: operand(Ref) + operation(enum)
+ c.AIR_INST_REDUCE,
+ c.AIR_INST_REDUCE_OPTIMIZED,
=> .{ true, false },
+ // prefetch: ptr(Ref) + packed fields
+ c.AIR_INST_PREFETCH => .{ true, false },
+ // atomic_load: ptr(Ref) + order(enum)
+ c.AIR_INST_ATOMIC_LOAD => .{ true, false },
+ // vector_store_elem: vector_ptr(Ref) + payload(u32)
+ c.AIR_INST_VECTOR_STORE_ELEM => .{ true, false },
+ // ty_nav: ty(IP index) + nav(u32)
+ c.AIR_INST_RUNTIME_NAV_PTR => .{ true, false },
// bin_op: lhs(Ref) + rhs(Ref)
c.AIR_INST_ADD,
c.AIR_INST_ADD_SAFE,
@@ -555,6 +616,28 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_ADD_SAT,
c.AIR_INST_SUB_SAT,
c.AIR_INST_MUL_SAT,
+ c.AIR_INST_REM,
+ c.AIR_INST_REM_OPTIMIZED,
+ c.AIR_INST_MOD,
+ c.AIR_INST_MOD_OPTIMIZED,
+ c.AIR_INST_CMP_LT_OPTIMIZED,
+ c.AIR_INST_CMP_LTE_OPTIMIZED,
+ c.AIR_INST_CMP_EQ_OPTIMIZED,
+ c.AIR_INST_CMP_GTE_OPTIMIZED,
+ c.AIR_INST_CMP_GT_OPTIMIZED,
+ c.AIR_INST_CMP_NEQ_OPTIMIZED,
+ c.AIR_INST_SET_UNION_TAG,
+ c.AIR_INST_ARRAY_ELEM_VAL,
+ c.AIR_INST_SLICE_ELEM_VAL,
+ c.AIR_INST_PTR_ELEM_VAL,
+ c.AIR_INST_MEMSET,
+ c.AIR_INST_MEMSET_SAFE,
+ c.AIR_INST_MEMCPY,
+ c.AIR_INST_MEMMOVE,
+ c.AIR_INST_ATOMIC_STORE_UNORDERED,
+ c.AIR_INST_ATOMIC_STORE_MONOTONIC,
+ c.AIR_INST_ATOMIC_STORE_RELEASE,
+ c.AIR_INST_ATOMIC_STORE_SEQ_CST,
=> .{ true, true },
// ty_op: type(Ref) + operand(Ref)
c.AIR_INST_BITCAST,
@@ -565,9 +648,13 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_FPEXT,
c.AIR_INST_OPTIONAL_PAYLOAD,
c.AIR_INST_OPTIONAL_PAYLOAD_PTR,
+ c.AIR_INST_OPTIONAL_PAYLOAD_PTR_SET,
c.AIR_INST_WRAP_OPTIONAL,
c.AIR_INST_UNWRAP_ERRUNION_PAYLOAD,
c.AIR_INST_UNWRAP_ERRUNION_ERR,
+ c.AIR_INST_UNWRAP_ERRUNION_PAYLOAD_PTR,
+ c.AIR_INST_UNWRAP_ERRUNION_ERR_PTR,
+ c.AIR_INST_ERRUNION_PAYLOAD_PTR_SET,
c.AIR_INST_WRAP_ERRUNION_PAYLOAD,
c.AIR_INST_WRAP_ERRUNION_ERR,
c.AIR_INST_ARRAY_TO_SLICE,
@@ -583,17 +670,28 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_POPCOUNT,
c.AIR_INST_BYTE_SWAP,
c.AIR_INST_ABS,
+ c.AIR_INST_BIT_REVERSE,
c.AIR_INST_STRUCT_FIELD_PTR_INDEX_0,
c.AIR_INST_STRUCT_FIELD_PTR_INDEX_1,
c.AIR_INST_STRUCT_FIELD_PTR_INDEX_2,
c.AIR_INST_STRUCT_FIELD_PTR_INDEX_3,
+ c.AIR_INST_GET_UNION_TAG,
+ c.AIR_INST_SLICE_LEN,
+ c.AIR_INST_SLICE_PTR,
+ c.AIR_INST_PTR_SLICE_LEN_PTR,
+ c.AIR_INST_PTR_SLICE_PTR_PTR,
+ c.AIR_INST_SPLAT,
+ c.AIR_INST_ADDRSPACE_CAST,
+ c.AIR_INST_ERROR_SET_HAS_VALUE,
+ c.AIR_INST_C_VA_ARG,
+ c.AIR_INST_C_VA_COPY,
=> .{ true, true },
// arg: type(Ref) + zir_param_index(u32)
c.AIR_INST_ARG => .{ true, false },
// br: block_inst(u32) + operand(Ref)
- c.AIR_INST_BR => .{ false, true },
- // pl_op (cond_br): operand(Ref) + payload(u32)
- c.AIR_INST_COND_BR => .{ true, false },
+ c.AIR_INST_BR,
+ c.AIR_INST_SWITCH_DISPATCH,
+ => .{ false, true },
// Default: assume no refs (compare directly).
// If a tag with refs is missed, the comparison will fail
// and we add it here.
@@ -678,11 +776,11 @@ fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc) !void
const a_word = std.mem.readInt(u32, a.datas[s..][0..4], .little);
const b_word = std.mem.readInt(u32, b.datas[s..][0..4], .little);
- // Skip data comparison for dead BLOCKs (tag 51).
+ // Skip data comparison for dead BLOCKs.
// Dead BLOCKs have undefined data in Zig vs zeroed in C.
// Only check b_word to avoid reading uninitialized Zig data
// (which triggers valgrind "uninitialised value" errors).
- if (tag_val == 51 and b_word == 0) continue;
+ if (tag_val == c.AIR_INST_BLOCK and b_word == 0) continue;
if (ref_slots[slot]) {
// This slot is a Ref — canonicalize IP refs.
@@ -796,6 +894,13 @@ fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc) !void
}
}
}
+ // TODO: Add extra canonicalization for tags that store Refs
+ // in their extra payload (e.g., Bin, StructField, UnionInit,
+ // VectorCmp, Cmpxchg, AtomicRmw, TryPtr, FieldParentPtr,
+ // AggregateInit). Currently these Refs are compared as raw
+ // u32, which works when IP indices match but will cause false
+ // failures when they diverge. See Gap 3 in the comparison
+ // audit plan.
}
}
if (!std.mem.eql(u32, a_extra_copy, b_extra_copy)) {