commit 30e77a3e444aed719699a5bc727cf52971a54f34 (tree)
parent d14607caefaef26de5c2ec124ad20025bccd0e63
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Fri, 20 Feb 2026 10:19:52 +0000
sema_test: compare AIR data per-instruction using tag-aware sizes
Move padding awareness from collection (verbose_air.zig) to the test
comparison (sema_test.zig). Air.Inst.Data is an 8-byte union where
some variants (un_op, no_op, ty, repeat) use fewer bytes; the rest is
uninitialised padding. Instead of zeroing padding at collection time,
compare only the meaningful bytes per tag in the test harness.
This reverts the verbose_air.zig zeroing from fbfecf51da19 and
replaces the bulk std.mem.eql in airCompareOne with a per-instruction
loop that also gives better diagnostics on mismatch (instruction
index, tag, byte count).
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
2 files changed, 48 insertions(+), 42 deletions(-)
diff --git a/src/verbose_air.zig b/src/verbose_air.zig
@@ -10,34 +10,6 @@ const Compilation = zig_internals.Compilation;
const Package = zig_internals.Package;
const Air = zig_internals.Air;
-/// Number of meaningful bytes in Air.Inst.Data for a given tag.
-/// Variants smaller than 8 bytes leave padding uninitialised; callers
-/// must only copy this many bytes and zero the rest.
-fn airInstDataSize(tag: Air.Inst.Tag) usize {
- return switch (tag) {
- // no_op: 0 meaningful bytes
- .ret_addr, .frame_addr => 0,
- // un_op: 4 meaningful bytes (single Ref / u32)
- .sqrt, .sin, .cos, .tan, .exp, .exp2,
- .log, .log2, .log10,
- .floor, .ceil, .round, .trunc_float,
- .neg, .neg_optimized,
- .is_null, .is_non_null, .is_null_ptr, .is_non_null_ptr,
- .is_err, .is_non_err, .is_err_ptr, .is_non_err_ptr,
- .ret, .ret_safe, .ret_load,
- .is_named_enum_value, .tag_name, .error_name,
- .cmp_lt_errors_len,
- .c_va_end,
- => 4,
- // ty: 4 meaningful bytes (single Type / u32)
- .alloc, .ret_ptr, .c_va_start => 4,
- // repeat: 4 meaningful bytes (single Index / u32)
- .repeat => 4,
- // All other variants use the full 8 bytes.
- else => 8,
- };
-}
-
/// Matches C `Air` struct layout (air.h).
const CAir = extern struct {
inst_len: u32,
@@ -92,20 +64,20 @@ const AirCollector = struct {
break :blk dst.ptr;
} else null;
- // Copy datas (8 bytes per instruction).
- // Air.Inst.Data is a union; variants smaller than 8 bytes
- // (un_op, no_op, ty, repeat) leave padding bytes uninitialised.
- // Zero the buffer first, then copy only the meaningful bytes
- // per instruction so that padding is deterministically zero.
+ // Copy datas (8 bytes per instruction)
const datas_byte_len = inst_len * 8;
const datas_copy: ?[*]u8 = if (inst_len > 0) blk: {
const dst = try gpa.alloc(u8, datas_byte_len);
- @memset(dst, 0);
const zig_datas = air.instructions.items(.data);
- for (0..inst_len) |i| {
- const src = @as(*const [8]u8, @ptrCast(&zig_datas[i]));
- const n = airInstDataSize(zig_tags[i]);
- @memcpy(dst[i * 8 ..][0..n], src[0..n]);
+ if (@sizeOf(Air.Inst.Data) == 8) {
+ const src = @as([*]const u8, @ptrCast(zig_datas.ptr))[0..datas_byte_len];
+ @memcpy(dst, src);
+ } else {
+ // Safety build: @sizeOf(Data) may be > 8, copy first 8 bytes per element
+ for (zig_datas, 0..) |*d, i| {
+ const src = @as(*const [8]u8, @ptrCast(d));
+ @memcpy(dst[i * 8 ..][0..8], src);
+ }
}
break :blk dst.ptr;
} else null;
diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig
@@ -325,6 +325,38 @@ fn canonicalizeRef(
return gop.value_ptr.*;
}
+/// Number of meaningful 4-byte slots in AirInstData for a given tag.
+/// Air.Inst.Data is an 8-byte union; variants smaller than 8 bytes
+/// (un_op, no_op, ty, repeat) leave padding bytes uninitialised.
+/// Only this many slots should be compared.
+fn airInstNumSlots(tag_val: u8) usize {
+ return switch (tag_val) {
+ // no_op: 0 meaningful bytes
+ c.AIR_INST_RET_ADDR, c.AIR_INST_FRAME_ADDR => 0,
+ // un_op: 4 meaningful bytes (1 slot)
+ c.AIR_INST_SQRT, c.AIR_INST_SIN, c.AIR_INST_COS, c.AIR_INST_TAN,
+ c.AIR_INST_EXP, c.AIR_INST_EXP2,
+ c.AIR_INST_LOG, c.AIR_INST_LOG2, c.AIR_INST_LOG10,
+ c.AIR_INST_FLOOR, c.AIR_INST_CEIL, c.AIR_INST_ROUND, c.AIR_INST_TRUNC_FLOAT,
+ c.AIR_INST_NEG, c.AIR_INST_NEG_OPTIMIZED,
+ c.AIR_INST_IS_NULL, c.AIR_INST_IS_NON_NULL,
+ c.AIR_INST_IS_NULL_PTR, c.AIR_INST_IS_NON_NULL_PTR,
+ c.AIR_INST_IS_ERR, c.AIR_INST_IS_NON_ERR,
+ c.AIR_INST_IS_ERR_PTR, c.AIR_INST_IS_NON_ERR_PTR,
+ c.AIR_INST_RET, c.AIR_INST_RET_SAFE, c.AIR_INST_RET_LOAD,
+ c.AIR_INST_IS_NAMED_ENUM_VALUE, c.AIR_INST_TAG_NAME, c.AIR_INST_ERROR_NAME,
+ c.AIR_INST_CMP_LT_ERRORS_LEN,
+ c.AIR_INST_C_VA_END,
+ => 1,
+ // ty: 4 meaningful bytes (1 slot)
+ c.AIR_INST_ALLOC, c.AIR_INST_RET_PTR, c.AIR_INST_C_VA_START => 1,
+ // repeat: 4 meaningful bytes (1 slot)
+ c.AIR_INST_REPEAT => 1,
+ // All other variants use the full 8 bytes (2 slots).
+ else => 2,
+ };
+}
+
/// Return which of the two 4-byte slots in Air.Inst.Data are Refs
/// for a given AIR instruction tag. [0] = bytes [0:4], [1] = bytes
/// [4:8]. Non-ref slots (line/column, payload indices, padding)
@@ -414,11 +446,14 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !
}
}
- // Datas (8 bytes per instruction, tag-aware comparison)
+ // Datas (8 bytes per instruction, tag-aware comparison).
// IP refs may differ between C and Zig InternPools, so we use
// canonical renumbering: each unique IP ref gets a sequential ID
// in order of first appearance. Inst refs (bit 31 set) and
// non-ref fields are compared directly.
+ // Air.Inst.Data is an 8-byte union; variants smaller than 8 bytes
+ // (un_op, no_op, ty, repeat) leave padding uninitialised — only
+ // compare the meaningful slots per tag via airInstNumSlots.
if (inst_len > 0) {
const zig_tags: [*]const u8 = cToOpt(u8, zig_air.inst_tags) orelse unreachable;
const zig_datas: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, zig_air.inst_datas) orelse {
@@ -439,11 +474,10 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !
for (0..inst_len) |j| {
const off = j * 8;
const tag_val = zig_tags[j];
- // Determine which 4-byte slots are refs based on the tag.
- // Slot 0 = bytes [0:4], Slot 1 = bytes [4:8].
const ref_slots = airDataRefSlots(tag_val);
+ const num_slots = airInstNumSlots(tag_val);
- for (0..2) |slot| {
+ for (0..num_slots) |slot| {
const s = off + slot * 4;
const zig_word = std.mem.readInt(u32, zig_datas[s..][0..4], .little);
const c_word = std.mem.readInt(u32, c_datas[s..][0..4], .little);