Files
zig/stage0/sema_test.zig
Motiejus 9cd2e0ff77 stage0: refactor globals into structs matching Zig architecture
Eliminates all 39 mutable static globals across sema.c (37) and
intern_pool.c (2). State is now held in properly-typed structs passed
as parameters, mirroring the Zig reference implementation layout.

New files matching Zig src/ layout:
- compilation.h: CompilationConfig + Compilation (matches Compilation.zig)
- zcu.h/zcu.c: ZcuFile, ZcuNamespace, Zcu, zcuInit/zcuDeinit (matches Zcu.zig)
- zcu_per_thread.h: forward declarations for PerThread-style functions

Key changes:
- InternPool gains navs[] (dynamically allocated) + nav_count/nav_cap;
  Nav functions now take InternPool* (was implicit via globals)
- Sema gains Zcu* zcu; semaInit now takes Zcu* instead of InternPool*
- All module-level state (files, namespaces, memoized state, config)
  moved from static globals into Zcu struct
- zig0.c creates Compilation + Zcu before semaInit
- Test files updated to use zcuInit/zcuDeinit API

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
2026-03-02 16:44:27 +00:00

902 lines
34 KiB
Zig

const std = @import("std");
// Import C types including sema.h (which transitively includes air.h, intern_pool.h, etc.)
// Also include astgen.h so we have the full pipeline in one namespace.
pub const c = @cImport({
@cInclude("astgen.h");
@cInclude("sema.h");
@cInclude("zcu.h");
});
// Helper to convert C #define integer constants (c_int) to u32 for comparison
// with uint32_t fields (InternPoolIndex, etc.).
fn idx(val: c_int) u32 {
return @bitCast(val);
}
// Helper to convert C enum values (c_uint) to the expected tag type for comparison.
fn tag(val: c_uint) c_uint {
return val;
}
// ---------------------------------------------------------------------------
// InternPool unit tests
// ---------------------------------------------------------------------------
test "intern_pool: init and pre-interned types" {
var ip = c.ipInit();
defer c.ipDeinit(&ip);
// Verify pre-interned count
try std.testing.expectEqual(@as(u32, 124), ip.items_len);
// Verify some key type indices
const void_key = c.ipIndexToKey(&ip, idx(c.IP_INDEX_VOID_TYPE));
try std.testing.expectEqual(tag(c.IP_KEY_SIMPLE_TYPE), void_key.tag);
try std.testing.expectEqual(tag(c.SIMPLE_TYPE_VOID), void_key.data.simple_type);
const u32_key = c.ipIndexToKey(&ip, idx(c.IP_INDEX_U32_TYPE));
try std.testing.expectEqual(tag(c.IP_KEY_INT_TYPE), u32_key.tag);
try std.testing.expectEqual(@as(u16, 32), u32_key.data.int_type.bits);
try std.testing.expectEqual(@as(u8, 0), u32_key.data.int_type.signedness); // unsigned
const i32_key = c.ipIndexToKey(&ip, idx(c.IP_INDEX_I32_TYPE));
try std.testing.expectEqual(tag(c.IP_KEY_INT_TYPE), i32_key.tag);
try std.testing.expectEqual(@as(u16, 32), i32_key.data.int_type.bits);
try std.testing.expectEqual(@as(u8, 1), i32_key.data.int_type.signedness); // signed
const bool_key = c.ipIndexToKey(&ip, idx(c.IP_INDEX_BOOL_TYPE));
try std.testing.expectEqual(tag(c.IP_KEY_SIMPLE_TYPE), bool_key.tag);
try std.testing.expectEqual(tag(c.SIMPLE_TYPE_BOOL), bool_key.data.simple_type);
}
test "intern_pool: pre-interned values" {
var ip = c.ipInit();
defer c.ipDeinit(&ip);
// Check void value
const void_val = c.ipIndexToKey(&ip, idx(c.IP_INDEX_VOID_VALUE));
try std.testing.expectEqual(tag(c.IP_KEY_SIMPLE_VALUE), void_val.tag);
try std.testing.expectEqual(tag(c.SIMPLE_VALUE_VOID), void_val.data.simple_value);
// Check bool true/false
const true_val = c.ipIndexToKey(&ip, idx(c.IP_INDEX_BOOL_TRUE));
try std.testing.expectEqual(tag(c.IP_KEY_SIMPLE_VALUE), true_val.tag);
try std.testing.expectEqual(tag(c.SIMPLE_VALUE_TRUE), true_val.data.simple_value);
const false_val = c.ipIndexToKey(&ip, idx(c.IP_INDEX_BOOL_FALSE));
try std.testing.expectEqual(tag(c.IP_KEY_SIMPLE_VALUE), false_val.tag);
try std.testing.expectEqual(tag(c.SIMPLE_VALUE_FALSE), false_val.data.simple_value);
// Check zero
const zero_key = c.ipIndexToKey(&ip, idx(c.IP_INDEX_ZERO));
try std.testing.expectEqual(tag(c.IP_KEY_INT), zero_key.tag);
}
test "intern_pool: ipTypeOf" {
var ip = c.ipInit();
defer c.ipDeinit(&ip);
// Types have type 'type'
try std.testing.expectEqual(idx(c.IP_INDEX_TYPE_TYPE), c.ipTypeOf(&ip, idx(c.IP_INDEX_VOID_TYPE)));
try std.testing.expectEqual(idx(c.IP_INDEX_TYPE_TYPE), c.ipTypeOf(&ip, idx(c.IP_INDEX_U32_TYPE)));
try std.testing.expectEqual(idx(c.IP_INDEX_TYPE_TYPE), c.ipTypeOf(&ip, idx(c.IP_INDEX_BOOL_TYPE)));
// Values have their respective types
try std.testing.expectEqual(idx(c.IP_INDEX_VOID_TYPE), c.ipTypeOf(&ip, idx(c.IP_INDEX_VOID_VALUE)));
try std.testing.expectEqual(idx(c.IP_INDEX_BOOL_TYPE), c.ipTypeOf(&ip, idx(c.IP_INDEX_BOOL_TRUE)));
try std.testing.expectEqual(idx(c.IP_INDEX_BOOL_TYPE), c.ipTypeOf(&ip, idx(c.IP_INDEX_BOOL_FALSE)));
}
test "intern_pool: ipIntern deduplication" {
var ip = c.ipInit();
defer c.ipDeinit(&ip);
// Interning an existing key should return the same index
var void_key: c.InternPoolKey = undefined;
@memset(std.mem.asBytes(&void_key), 0);
void_key.tag = c.IP_KEY_SIMPLE_TYPE;
void_key.data.simple_type = c.SIMPLE_TYPE_VOID;
const result = c.ipIntern(&ip, void_key);
try std.testing.expectEqual(idx(c.IP_INDEX_VOID_TYPE), result);
// Items count shouldn't increase for duplicate
try std.testing.expectEqual(@as(u32, 124), ip.items_len);
}
test "intern_pool: ipIntern new key" {
var ip = c.ipInit();
defer c.ipDeinit(&ip);
// Intern a new array type
var arr_key: c.InternPoolKey = undefined;
@memset(std.mem.asBytes(&arr_key), 0);
arr_key.tag = c.IP_KEY_ARRAY_TYPE;
arr_key.data.array_type = .{
.len = 10,
.child = idx(c.IP_INDEX_U8_TYPE),
.sentinel = c.IP_INDEX_NONE,
};
const idx1 = c.ipIntern(&ip, arr_key);
try std.testing.expect(idx1 >= idx(c.IP_INDEX_PREINTERN_COUNT));
try std.testing.expectEqual(@as(u32, 125), ip.items_len);
// Re-interning should return same index
const idx2 = c.ipIntern(&ip, arr_key);
try std.testing.expectEqual(idx1, idx2);
try std.testing.expectEqual(@as(u32, 125), ip.items_len);
}
test "intern_pool: vector types" {
var ip = c.ipInit();
defer c.ipDeinit(&ip);
// Verify vector_8_i8 at index 52
const v8i8 = c.ipIndexToKey(&ip, idx(c.IP_INDEX_VECTOR_8_I8_TYPE));
try std.testing.expectEqual(tag(c.IP_KEY_VECTOR_TYPE), v8i8.tag);
try std.testing.expectEqual(@as(u32, 8), v8i8.data.vector_type.len);
try std.testing.expectEqual(idx(c.IP_INDEX_I8_TYPE), v8i8.data.vector_type.child);
// Verify vector_4_f32 at index 93
const v4f32 = c.ipIndexToKey(&ip, idx(c.IP_INDEX_VECTOR_4_F32_TYPE));
try std.testing.expectEqual(tag(c.IP_KEY_VECTOR_TYPE), v4f32.tag);
try std.testing.expectEqual(@as(u32, 4), v4f32.data.vector_type.len);
try std.testing.expectEqual(idx(c.IP_INDEX_F32_TYPE), v4f32.data.vector_type.child);
}
test "intern_pool: pointer types" {
var ip = c.ipInit();
defer c.ipDeinit(&ip);
// ptr_usize (index 45): *usize
const ptr_usize = c.ipIndexToKey(&ip, idx(c.IP_INDEX_PTR_USIZE_TYPE));
try std.testing.expectEqual(tag(c.IP_KEY_PTR_TYPE), ptr_usize.tag);
try std.testing.expectEqual(idx(c.IP_INDEX_USIZE_TYPE), ptr_usize.data.ptr_type.child);
// manyptr_const_u8 (index 48): [*]const u8
const manyptr = c.ipIndexToKey(&ip, idx(c.IP_INDEX_MANYPTR_CONST_U8_TYPE));
try std.testing.expectEqual(tag(c.IP_KEY_PTR_TYPE), manyptr.tag);
try std.testing.expectEqual(idx(c.IP_INDEX_U8_TYPE), manyptr.data.ptr_type.child);
try std.testing.expect((manyptr.data.ptr_type.flags & idx(c.PTR_FLAGS_SIZE_MASK)) == idx(c.PTR_FLAGS_SIZE_MANY));
try std.testing.expect((manyptr.data.ptr_type.flags & idx(c.PTR_FLAGS_IS_CONST)) != 0);
}
// ---------------------------------------------------------------------------
// Sema smoke tests (using C sema pipeline directly)
// ---------------------------------------------------------------------------
const SemaCheckResult = struct {
c_zcu: *c.Zcu,
c_sema: c.Sema,
c_func_air_list: c.SemaFuncAirList,
fn deinit(self: *SemaCheckResult) void {
c.semaFuncAirListDeinit(&self.c_func_air_list);
c.semaDeinit(&self.c_sema);
c.zcuDeinit(self.c_zcu);
}
};
fn semaCheck(source: [:0]const u8) !SemaCheckResult {
var c_ast = c.astParse(source.ptr, @intCast(source.len));
defer c.astDeinit(&c_ast);
var c_zir = c.astGen(&c_ast);
defer c.zirDeinit(&c_zir);
var result: SemaCheckResult = undefined;
var comp: c.Compilation = .{ .config = .{
.module_root = null,
.target_cpu_arch = "wasm32",
.target_cpu_model = "lime1",
.object_format = "wasm",
.link_mode = "static",
.is_test = false,
} };
result.c_zcu = c.zcuInit(&comp);
c.semaInit(&result.c_sema, result.c_zcu, c_zir);
result.c_func_air_list = c.semaAnalyze(&result.c_sema);
return result;
}
test "sema: empty source smoke test" {
var result = try semaCheck("");
defer result.deinit();
// semaAnalyze frees AIR arrays and nulls out sema's pointers.
try std.testing.expect(result.c_sema.air_inst_tags == null);
try std.testing.expect(result.c_sema.air_inst_datas == null);
try std.testing.expect(result.c_sema.air_extra == null);
// No functions analyzed yet, so func_airs should be empty.
try std.testing.expectEqual(@as(u32, 0), result.c_func_air_list.len);
}
test "sema: const x = 0 smoke test" {
var result = try semaCheck("const x = 0;");
defer result.deinit();
// No functions, so func_airs should be empty.
try std.testing.expectEqual(@as(u32, 0), result.c_func_air_list.len);
}
test "sema: function decl smoke test" {
var result = try semaCheck("fn foo() void {}");
defer result.deinit();
// All named functions with bodies are analyzed.
try std.testing.expectEqual(@as(u32, 1), result.c_func_air_list.len);
}
// ---------------------------------------------------------------------------
// Air raw comparison: C vs pre-computed Zig AIR
// ---------------------------------------------------------------------------
const air_tag_names = @import("air_tag_names");
/// A parsed function from the pre-computed AIR binary data.
/// Fields are raw byte pointers into the binary data — no alignment
/// requirements, no copies. When inst_len == 0 or extra_len == 0 the
/// corresponding pointer is undefined and must not be dereferenced.
pub const PrecomputedFunc = struct {
name: []const u8,
func_ip: u32,
inst_len: u32,
tags: [*]const u8,
datas: [*]const u8,
extra_len: u32,
extra: [*]const u8,
};
/// Parse pre-computed AIR from binary data (generated by air_gen).
/// Zero-copy: pointers point directly into `data`.
/// Binary format:
/// func_count: u32 (little-endian)
/// Per function:
/// name_len: u32
/// name: [name_len]u8
/// func_ip: u32
/// inst_len: u32
/// inst_tags: [inst_len]u8
/// inst_datas: [inst_len * 8]u8
/// extra_len: u32
/// extra: [extra_len * 4]u8
pub fn parsePrecomputedAir(data: []const u8) ![]PrecomputedFunc {
var pos: usize = 0;
const func_count = readU32(data, &pos) orelse return error.InvalidAirData;
const funcs = try std.testing.allocator.alloc(PrecomputedFunc, func_count);
errdefer std.testing.allocator.free(funcs);
for (funcs) |*f| {
// name
const name_len = readU32(data, &pos) orelse return error.InvalidAirData;
if (pos + name_len > data.len) return error.InvalidAirData;
f.name = data[pos..][0..name_len];
pos += name_len;
// func_ip
f.func_ip = readU32(data, &pos) orelse return error.InvalidAirData;
// inst_tags + inst_datas — point directly into data
const inst_len = readU32(data, &pos) orelse return error.InvalidAirData;
f.inst_len = inst_len;
if (inst_len > 0) {
if (pos + inst_len > data.len) return error.InvalidAirData;
f.tags = data[pos..].ptr;
pos += inst_len;
const datas_byte_len = inst_len * 8;
if (pos + datas_byte_len > data.len) return error.InvalidAirData;
f.datas = data[pos..].ptr;
pos += datas_byte_len;
} else {
f.tags = undefined;
f.datas = undefined;
}
// extra — point directly into data
const extra_len = readU32(data, &pos) orelse return error.InvalidAirData;
f.extra_len = extra_len;
if (extra_len > 0) {
const extra_byte_len = extra_len * 4;
if (pos + extra_byte_len > data.len) return error.InvalidAirData;
f.extra = data[pos..].ptr;
pos += extra_byte_len;
} else {
f.extra = undefined;
}
}
return funcs;
}
fn readU32(data: []const u8, pos: *usize) ?u32 {
if (pos.* + 4 > data.len) return null;
const val = std.mem.readInt(u32, data[pos.*..][0..4], .little);
pos.* += 4;
return val;
}
pub fn freePrecomputedAir(funcs: []PrecomputedFunc) void {
std.testing.allocator.free(funcs);
}
/// Compare C sema output against pre-computed AIR data.
pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_list: c.SemaFuncAirList) !void {
const c_funcs_ptr: ?[*]const c.SemaFuncAir = @ptrCast(c_func_air_list.items);
const c_funcs = if (c_funcs_ptr) |items| items[0..c_func_air_list.len] else &[_]c.SemaFuncAir{};
// Direction 1: every C function must exist in Zig AIR and match.
for (c_funcs) |*cf| {
const c_name = if (cf.name) |n| std.mem.span(n) else "";
const pf = precomputedFindByName(precomputed, c_name) orelse {
std.debug.print("C function '{s}' not found in pre-computed AIR\n", .{c_name});
return error.AirMismatch;
};
const c_pf = precomputedFromCAir(cf);
try airCompareOne(c_name, pf.*, c_pf, pf.func_ip);
}
// Direction 2: every Zig function must exist in C output.
for (precomputed) |*pf| {
var found = false;
for (c_funcs) |*cf| {
const c_name = if (cf.name) |n| std.mem.span(n) else "";
if (std.mem.eql(u8, pf.name, c_name)) {
found = true;
break;
}
}
if (!found) {
std.debug.print("Zig function '{s}' not produced by C sema\n", .{pf.name});
return error.AirMismatch;
}
}
}
fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc {
return .{
.name = if (cf.name) |n| std.mem.span(n) else "",
.func_ip = cf.func_ip,
.inst_len = cf.air.inst_len,
.tags = if (cToOpt(u8, cf.air.inst_tags)) |t| t else undefined,
.datas = if (cToOpt(c.AirInstData, cf.air.inst_datas)) |d| @ptrCast(d) else undefined,
.extra_len = cf.air.extra_len,
.extra = if (cToOpt(u32, cf.air.extra)) |e| @ptrCast(e) else undefined,
};
}
fn precomputedFindByName(funcs: []const PrecomputedFunc, name: []const u8) ?*const PrecomputedFunc {
for (funcs) |*f| {
if (std.mem.eql(u8, name, f.name)) return f;
}
return null;
}
/// Convert a repo-relative lib/std/ path to a null-terminated module prefix.
/// "lib/std/crypto/codecs.zig" -> "crypto.codecs"
/// "lib/std/zig/llvm.zig" -> "zig.llvm"
pub fn pathToModulePrefix(comptime path: []const u8) [*:0]const u8 {
return comptime blk: {
const stripped = path["lib/std/".len..];
const no_ext = stripped[0 .. stripped.len - ".zig".len];
var buf: [no_ext.len:0]u8 = undefined;
for (no_ext, 0..) |ch, i| {
buf[i] = if (ch == '/') '.' else ch;
}
buf[no_ext.len] = 0;
const result = buf;
break :blk &result;
};
}
/// Extract filename stem from a path as a null-terminated string.
/// "stage0/sema_tests/empty_void_function.zig" -> "empty_void_function"
pub fn pathStem(comptime path: []const u8) [*:0]const u8 {
return comptime blk: {
// Find last '/' to get the filename
var last_slash: usize = 0;
var found_slash = false;
for (path, 0..) |ch, i| {
if (ch == '/') {
last_slash = i;
found_slash = true;
}
}
const filename = if (found_slash) path[last_slash + 1 ..] else path;
// Strip .zig extension
const stem = filename[0 .. filename.len - ".zig".len];
var buf: [stem.len:0]u8 = undefined;
@memcpy(&buf, stem);
buf[stem.len] = 0;
const result = buf;
break :blk &result;
};
}
fn cToOpt(comptime T: type, ptr: [*c]T) ?[*]const T {
return if (ptr == null) null else @ptrCast(ptr);
}
fn readExtraWord(extra: [*]const u8, index: usize) u32 {
return std.mem.readInt(u32, extra[index * 4 ..][0..4], .little);
}
fn airTagNameSlice(tag_val: u8) []const u8 {
return air_tag_names.names[tag_val];
}
fn refKindStr(ref: u32) []const u8 {
if (ref == 0xFFFFFFFF) return "none";
if ((ref >> 31) != 0) return "inst";
return "ip";
}
/// Number of meaningful 4-byte slots in AirInstData for a given tag.
/// Air.Inst.Data is an 8-byte union; variants smaller than 8 bytes
/// (un_op, no_op, ty, repeat) leave padding bytes uninitialised.
/// Only this many slots should be compared.
fn airInstNumSlots(tag_val: u8) usize {
return switch (tag_val) {
// no_op: 0 meaningful bytes
c.AIR_INST_RET_ADDR, c.AIR_INST_FRAME_ADDR, c.AIR_INST_TRAP, c.AIR_INST_UNREACH, c.AIR_INST_BREAKPOINT => 0,
// un_op: 4 meaningful bytes (1 slot)
c.AIR_INST_SQRT,
c.AIR_INST_SIN,
c.AIR_INST_COS,
c.AIR_INST_TAN,
c.AIR_INST_EXP,
c.AIR_INST_EXP2,
c.AIR_INST_LOG,
c.AIR_INST_LOG2,
c.AIR_INST_LOG10,
c.AIR_INST_FLOOR,
c.AIR_INST_CEIL,
c.AIR_INST_ROUND,
c.AIR_INST_TRUNC_FLOAT,
c.AIR_INST_NEG,
c.AIR_INST_NEG_OPTIMIZED,
c.AIR_INST_IS_NULL,
c.AIR_INST_IS_NON_NULL,
c.AIR_INST_IS_NULL_PTR,
c.AIR_INST_IS_NON_NULL_PTR,
c.AIR_INST_IS_ERR,
c.AIR_INST_IS_NON_ERR,
c.AIR_INST_IS_ERR_PTR,
c.AIR_INST_IS_NON_ERR_PTR,
c.AIR_INST_RET,
c.AIR_INST_RET_SAFE,
c.AIR_INST_RET_LOAD,
c.AIR_INST_IS_NAMED_ENUM_VALUE,
c.AIR_INST_TAG_NAME,
c.AIR_INST_ERROR_NAME,
c.AIR_INST_CMP_LT_ERRORS_LEN,
c.AIR_INST_C_VA_END,
c.AIR_INST_SET_ERR_RETURN_TRACE,
=> 1,
// ty: 4 meaningful bytes (1 slot)
c.AIR_INST_ALLOC, c.AIR_INST_RET_PTR, c.AIR_INST_C_VA_START, c.AIR_INST_ERR_RETURN_TRACE => 1,
// repeat: 4 meaningful bytes (1 slot)
c.AIR_INST_REPEAT => 1,
// All other variants use the full 8 bytes (2 slots).
else => 2,
};
}
/// Return which of the two 4-byte slots in Air.Inst.Data are Refs
/// for a given AIR instruction tag. [0] = bytes [0:4], [1] = bytes
/// [4:8]. Non-ref slots (line/column, payload indices, padding)
/// are compared directly.
fn airDataRefSlots(tag_val: u8) [2]bool {
return switch (tag_val) {
// no_op: no meaningful data
c.AIR_INST_RET_ADDR, c.AIR_INST_FRAME_ADDR, c.AIR_INST_TRAP, c.AIR_INST_UNREACH, c.AIR_INST_BREAKPOINT => .{ false, false },
// dbg_stmt: line(u32) + column(u32)
c.AIR_INST_DBG_STMT, c.AIR_INST_DBG_EMPTY_STMT => .{ false, false },
// inferred_alloc / inferred_alloc_comptime: special data, no standard Refs
c.AIR_INST_INFERRED_ALLOC, c.AIR_INST_INFERRED_ALLOC_COMPTIME => .{ false, false },
// repeat: loop_inst(u32) + pad
c.AIR_INST_REPEAT => .{ false, false },
// pl_op: operand(Ref) + payload(u32)
c.AIR_INST_DBG_VAR_PTR,
c.AIR_INST_DBG_VAR_VAL,
c.AIR_INST_DBG_ARG_INLINE,
c.AIR_INST_CALL,
c.AIR_INST_CALL_ALWAYS_TAIL,
c.AIR_INST_CALL_NEVER_TAIL,
c.AIR_INST_CALL_NEVER_INLINE,
c.AIR_INST_COND_BR,
c.AIR_INST_SWITCH_BR,
c.AIR_INST_LOOP_SWITCH_BR,
c.AIR_INST_TRY,
c.AIR_INST_TRY_COLD,
c.AIR_INST_ATOMIC_RMW,
c.AIR_INST_SELECT,
c.AIR_INST_MUL_ADD,
c.AIR_INST_WASM_MEMORY_SIZE,
c.AIR_INST_WASM_MEMORY_GROW,
c.AIR_INST_WORK_ITEM_ID,
c.AIR_INST_WORK_GROUP_SIZE,
c.AIR_INST_WORK_GROUP_ID,
=> .{ true, false },
// un_op: operand(Ref) + pad
c.AIR_INST_RET,
c.AIR_INST_RET_SAFE,
c.AIR_INST_RET_LOAD,
c.AIR_INST_NEG,
c.AIR_INST_NEG_OPTIMIZED,
c.AIR_INST_IS_NULL,
c.AIR_INST_IS_NON_NULL,
c.AIR_INST_IS_NULL_PTR,
c.AIR_INST_IS_NON_NULL_PTR,
c.AIR_INST_IS_ERR,
c.AIR_INST_IS_NON_ERR,
c.AIR_INST_IS_ERR_PTR,
c.AIR_INST_IS_NON_ERR_PTR,
c.AIR_INST_SQRT,
c.AIR_INST_SIN,
c.AIR_INST_COS,
c.AIR_INST_TAN,
c.AIR_INST_EXP,
c.AIR_INST_EXP2,
c.AIR_INST_LOG,
c.AIR_INST_LOG2,
c.AIR_INST_LOG10,
c.AIR_INST_FLOOR,
c.AIR_INST_CEIL,
c.AIR_INST_ROUND,
c.AIR_INST_TRUNC_FLOAT,
c.AIR_INST_IS_NAMED_ENUM_VALUE,
c.AIR_INST_TAG_NAME,
c.AIR_INST_ERROR_NAME,
c.AIR_INST_CMP_LT_ERRORS_LEN,
c.AIR_INST_C_VA_END,
c.AIR_INST_SET_ERR_RETURN_TRACE,
=> .{ true, false },
// ty: type(Ref) + pad
c.AIR_INST_ALLOC,
c.AIR_INST_RET_PTR,
c.AIR_INST_C_VA_START,
c.AIR_INST_ERR_RETURN_TRACE,
=> .{ true, false },
// ty_pl: type(Ref) + payload(u32)
c.AIR_INST_STRUCT_FIELD_VAL,
c.AIR_INST_STRUCT_FIELD_PTR,
c.AIR_INST_DBG_INLINE_BLOCK,
c.AIR_INST_BLOCK,
c.AIR_INST_AGGREGATE_INIT,
c.AIR_INST_PTR_ADD,
c.AIR_INST_PTR_SUB,
c.AIR_INST_ADD_WITH_OVERFLOW,
c.AIR_INST_SUB_WITH_OVERFLOW,
c.AIR_INST_MUL_WITH_OVERFLOW,
c.AIR_INST_SHL_WITH_OVERFLOW,
c.AIR_INST_SLICE,
c.AIR_INST_SLICE_ELEM_PTR,
c.AIR_INST_PTR_ELEM_PTR,
c.AIR_INST_CMP_VECTOR,
c.AIR_INST_CMP_VECTOR_OPTIMIZED,
c.AIR_INST_TRY_PTR,
c.AIR_INST_TRY_PTR_COLD,
c.AIR_INST_CMPXCHG_WEAK,
c.AIR_INST_CMPXCHG_STRONG,
c.AIR_INST_UNION_INIT,
c.AIR_INST_ASSEMBLY,
c.AIR_INST_LOOP,
c.AIR_INST_SAVE_ERR_RETURN_TRACE_INDEX,
c.AIR_INST_SHUFFLE_ONE,
c.AIR_INST_SHUFFLE_TWO,
c.AIR_INST_FIELD_PARENT_PTR,
=> .{ true, false },
// reduce: operand(Ref) + operation(enum)
c.AIR_INST_REDUCE,
c.AIR_INST_REDUCE_OPTIMIZED,
=> .{ true, false },
// prefetch: ptr(Ref) + packed fields
c.AIR_INST_PREFETCH => .{ true, false },
// atomic_load: ptr(Ref) + order(enum)
c.AIR_INST_ATOMIC_LOAD => .{ true, false },
// vector_store_elem: vector_ptr(Ref) + payload(u32)
c.AIR_INST_VECTOR_STORE_ELEM => .{ true, false },
// ty_nav: ty(IP index) + nav(u32)
c.AIR_INST_RUNTIME_NAV_PTR => .{ true, false },
// bin_op: lhs(Ref) + rhs(Ref)
c.AIR_INST_ADD,
c.AIR_INST_ADD_SAFE,
c.AIR_INST_ADD_OPTIMIZED,
c.AIR_INST_ADD_WRAP,
c.AIR_INST_SUB,
c.AIR_INST_SUB_SAFE,
c.AIR_INST_SUB_OPTIMIZED,
c.AIR_INST_SUB_WRAP,
c.AIR_INST_MUL,
c.AIR_INST_MUL_SAFE,
c.AIR_INST_MUL_OPTIMIZED,
c.AIR_INST_MUL_WRAP,
c.AIR_INST_BOOL_AND,
c.AIR_INST_BOOL_OR,
c.AIR_INST_STORE,
c.AIR_INST_STORE_SAFE,
c.AIR_INST_BIT_AND,
c.AIR_INST_BIT_OR,
c.AIR_INST_XOR,
c.AIR_INST_SHL,
c.AIR_INST_SHL_EXACT,
c.AIR_INST_SHL_SAT,
c.AIR_INST_SHR,
c.AIR_INST_SHR_EXACT,
c.AIR_INST_CMP_LT,
c.AIR_INST_CMP_LTE,
c.AIR_INST_CMP_EQ,
c.AIR_INST_CMP_GTE,
c.AIR_INST_CMP_GT,
c.AIR_INST_CMP_NEQ,
c.AIR_INST_MAX,
c.AIR_INST_MIN,
c.AIR_INST_DIV_FLOAT,
c.AIR_INST_DIV_FLOAT_OPTIMIZED,
c.AIR_INST_DIV_TRUNC,
c.AIR_INST_DIV_TRUNC_OPTIMIZED,
c.AIR_INST_DIV_FLOOR,
c.AIR_INST_DIV_FLOOR_OPTIMIZED,
c.AIR_INST_DIV_EXACT,
c.AIR_INST_DIV_EXACT_OPTIMIZED,
c.AIR_INST_ADD_SAT,
c.AIR_INST_SUB_SAT,
c.AIR_INST_MUL_SAT,
c.AIR_INST_REM,
c.AIR_INST_REM_OPTIMIZED,
c.AIR_INST_MOD,
c.AIR_INST_MOD_OPTIMIZED,
c.AIR_INST_CMP_LT_OPTIMIZED,
c.AIR_INST_CMP_LTE_OPTIMIZED,
c.AIR_INST_CMP_EQ_OPTIMIZED,
c.AIR_INST_CMP_GTE_OPTIMIZED,
c.AIR_INST_CMP_GT_OPTIMIZED,
c.AIR_INST_CMP_NEQ_OPTIMIZED,
c.AIR_INST_SET_UNION_TAG,
c.AIR_INST_ARRAY_ELEM_VAL,
c.AIR_INST_SLICE_ELEM_VAL,
c.AIR_INST_PTR_ELEM_VAL,
c.AIR_INST_MEMSET,
c.AIR_INST_MEMSET_SAFE,
c.AIR_INST_MEMCPY,
c.AIR_INST_MEMMOVE,
c.AIR_INST_ATOMIC_STORE_UNORDERED,
c.AIR_INST_ATOMIC_STORE_MONOTONIC,
c.AIR_INST_ATOMIC_STORE_RELEASE,
c.AIR_INST_ATOMIC_STORE_SEQ_CST,
=> .{ true, true },
// ty_op: type(Ref) + operand(Ref)
c.AIR_INST_BITCAST,
c.AIR_INST_INTCAST,
c.AIR_INST_INTCAST_SAFE,
c.AIR_INST_TRUNC,
c.AIR_INST_FPTRUNC,
c.AIR_INST_FPEXT,
c.AIR_INST_OPTIONAL_PAYLOAD,
c.AIR_INST_OPTIONAL_PAYLOAD_PTR,
c.AIR_INST_OPTIONAL_PAYLOAD_PTR_SET,
c.AIR_INST_WRAP_OPTIONAL,
c.AIR_INST_UNWRAP_ERRUNION_PAYLOAD,
c.AIR_INST_UNWRAP_ERRUNION_ERR,
c.AIR_INST_UNWRAP_ERRUNION_PAYLOAD_PTR,
c.AIR_INST_UNWRAP_ERRUNION_ERR_PTR,
c.AIR_INST_ERRUNION_PAYLOAD_PTR_SET,
c.AIR_INST_WRAP_ERRUNION_PAYLOAD,
c.AIR_INST_WRAP_ERRUNION_ERR,
c.AIR_INST_ARRAY_TO_SLICE,
c.AIR_INST_LOAD,
c.AIR_INST_NOT,
c.AIR_INST_INT_FROM_FLOAT,
c.AIR_INST_INT_FROM_FLOAT_OPTIMIZED,
c.AIR_INST_INT_FROM_FLOAT_SAFE,
c.AIR_INST_INT_FROM_FLOAT_OPTIMIZED_SAFE,
c.AIR_INST_FLOAT_FROM_INT,
c.AIR_INST_CLZ,
c.AIR_INST_CTZ,
c.AIR_INST_POPCOUNT,
c.AIR_INST_BYTE_SWAP,
c.AIR_INST_ABS,
c.AIR_INST_BIT_REVERSE,
c.AIR_INST_STRUCT_FIELD_PTR_INDEX_0,
c.AIR_INST_STRUCT_FIELD_PTR_INDEX_1,
c.AIR_INST_STRUCT_FIELD_PTR_INDEX_2,
c.AIR_INST_STRUCT_FIELD_PTR_INDEX_3,
c.AIR_INST_GET_UNION_TAG,
c.AIR_INST_SLICE_LEN,
c.AIR_INST_SLICE_PTR,
c.AIR_INST_PTR_SLICE_LEN_PTR,
c.AIR_INST_PTR_SLICE_PTR_PTR,
c.AIR_INST_SPLAT,
c.AIR_INST_ADDRSPACE_CAST,
c.AIR_INST_ERROR_SET_HAS_VALUE,
c.AIR_INST_C_VA_ARG,
c.AIR_INST_C_VA_COPY,
=> .{ true, true },
// arg: type(Ref) + zir_param_index(u32)
c.AIR_INST_ARG => .{ true, false },
// br: block_inst(u32) + operand(Ref)
c.AIR_INST_BR,
c.AIR_INST_SWITCH_DISPATCH,
=> .{ false, true },
// Default: assume no refs (compare directly).
// If a tag with refs is missed, the comparison will fail
// and we add it here.
else => .{ false, false },
};
}
/// Zero-pad bytes after the null terminator in a NullTerminatedString stored
/// in the extra array. Zig's appendAirString leaves padding uninitialised;
/// the C side zeroes it. Normalising both to zero allows comparison.
fn normalizeNtsPadding(extra: []u32, nts_index: u32) void {
if (nts_index == 0 or nts_index >= extra.len) return;
const bytes = std.mem.sliceAsBytes(extra);
const byte_start = nts_index * 4;
// Find null terminator.
var i = byte_start;
while (i < bytes.len) : (i += 1) {
if (bytes[i] == 0) break;
}
// Zero-pad from null+1 to next word boundary.
i += 1;
const next_word_byte = ((i + 3) / 4) * 4;
while (i < next_word_byte and i < bytes.len) : (i += 1) {
bytes[i] = 0;
}
}
fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc, zig_ip_base: u32) !void {
if (a.inst_len != b.inst_len) {
std.debug.print("'{s}': inst_len mismatch: a={d} b={d}\n", .{ name, a.inst_len, b.inst_len });
if (a.inst_len > 0) {
std.debug.print(" a tags:", .{});
for (0..a.inst_len) |j| std.debug.print(" {s}", .{airTagNameSlice(a.tags[j])});
std.debug.print("\n", .{});
}
if (b.inst_len > 0) {
std.debug.print(" b tags:", .{});
for (0..b.inst_len) |j| std.debug.print(" {s}", .{airTagNameSlice(b.tags[j])});
std.debug.print("\n", .{});
}
return error.AirMismatch;
}
const inst_len = a.inst_len;
// Tags
if (inst_len > 0) {
if (!std.mem.eql(u8, a.tags[0..inst_len], b.tags[0..inst_len])) {
std.debug.print("'{s}': tags mismatch (inst_len={d}):", .{ name, inst_len });
for (0..inst_len) |j| {
std.debug.print(" a[{d}]={d}({s}) b[{d}]={d}({s})", .{ j, a.tags[j], airTagNameSlice(a.tags[j]), j, b.tags[j], airTagNameSlice(b.tags[j]) });
}
std.debug.print("\n", .{});
return error.AirMismatch;
}
}
// Datas (8 bytes per instruction, tag-aware comparison).
// Air.Inst.Data is an 8-byte union; variants smaller than 8 bytes
// (un_op, no_op, ty, repeat) leave padding uninitialised — only
// compare the meaningful slots per tag via airInstNumSlots.
if (inst_len > 0) {
for (0..inst_len) |j| {
const off = j * 8;
const tag_val = a.tags[j];
const ref_slots = airDataRefSlots(tag_val);
const num_slots = airInstNumSlots(tag_val);
for (0..num_slots) |slot| {
const s = off + slot * 4;
const a_word = std.mem.readInt(u32, a.datas[s..][0..4], .little);
const b_word = std.mem.readInt(u32, b.datas[s..][0..4], .little);
// Skip data comparison for dead BLOCKs.
// Dead BLOCKs have undefined data in Zig vs zeroed in C.
// Only check b_word to avoid reading uninitialized Zig data
// (which triggers valgrind "uninitialised value" errors).
if (tag_val == c.AIR_INST_BLOCK and b_word == 0) continue;
if (ref_slots[slot]) {
// This slot is a Ref — compare directly (C and Zig
// IP indices must match).
if (a_word != b_word) {
std.debug.print("'{s}': datas ref mismatch at inst[{d}] slot {d}: a=0x{x}[{s}] b=0x{x}[{s}] (tag={s}) [zig_ip_base={d}]\n", .{ name, j, slot, a_word, refKindStr(a_word), b_word, refKindStr(b_word), airTagNameSlice(tag_val), zig_ip_base });
return error.AirMismatch;
}
} else {
// Non-ref field — compare directly.
if (a_word != b_word) {
std.debug.print("'{s}': datas mismatch at inst[{d}] slot {d}: a=0x{x} b=0x{x} (tag={s})\n", .{ name, j, slot, a_word, b_word, airTagNameSlice(tag_val) });
return error.AirMismatch;
}
}
}
}
}
// Extra
if (a.extra_len != b.extra_len) {
std.debug.print("'{s}': extra_len mismatch: a={d} b={d}\n", .{ name, a.extra_len, b.extra_len });
// Print first divergence point
const min_len = @min(a.extra_len, b.extra_len);
if (min_len > 0) {
var printed: u32 = 0;
for (0..min_len) |ei| {
const a_val = readExtraWord(a.extra, ei);
const b_val = readExtraWord(b.extra, ei);
if (a_val != b_val and printed < 40) {
std.debug.print(" extra[{d}]: a={d} b={d}\n", .{ ei, a_val, b_val });
printed += 1;
}
}
// Also dump the raw extra arrays around the first divergence
var first_diff: usize = min_len;
for (0..min_len) |ei| {
if (readExtraWord(a.extra, ei) != readExtraWord(b.extra, ei)) {
first_diff = ei;
break;
}
}
if (first_diff < min_len) {
const start = if (first_diff > 5) first_diff - 5 else 0;
const end = @min(first_diff + 20, min_len);
std.debug.print(" a extra[{d}..{d}]:", .{ start, end });
for (start..end) |ei| std.debug.print(" {d}", .{readExtraWord(a.extra, ei)});
std.debug.print("\n b extra[{d}..{d}]:", .{ start, end });
for (start..end) |ei| std.debug.print(" {d}", .{readExtraWord(b.extra, ei)});
std.debug.print("\n", .{});
}
}
return error.AirMismatch;
}
const extra_len = a.extra_len;
if (extra_len > 0) {
// Make mutable copies and normalize NullTerminatedString padding.
// Zig's appendAirString leaves trailing bytes uninitialised (0xaa
// in debug); the C side zeroes them. Normalise both to zero.
const a_extra_copy = try std.testing.allocator.alloc(u32, extra_len);
defer std.testing.allocator.free(a_extra_copy);
@memcpy(std.mem.sliceAsBytes(a_extra_copy), a.extra[0 .. extra_len * 4]);
const b_extra_copy = try std.testing.allocator.alloc(u32, extra_len);
defer std.testing.allocator.free(b_extra_copy);
@memcpy(std.mem.sliceAsBytes(b_extra_copy), b.extra[0 .. extra_len * 4]);
if (inst_len > 0) {
for (0..inst_len) |j| {
if (a.tags[j] == c.AIR_INST_DBG_VAR_VAL or
a.tags[j] == c.AIR_INST_DBG_VAR_PTR or
a.tags[j] == c.AIR_INST_DBG_ARG_INLINE)
{
// pl_op: slot 0 = operand, slot 1 = payload (NullTerminatedString)
const a_nts = std.mem.readInt(u32, a.datas[j * 8 + 4 ..][0..4], .little);
const b_nts = std.mem.readInt(u32, b.datas[j * 8 + 4 ..][0..4], .little);
normalizeNtsPadding(a_extra_copy, a_nts);
normalizeNtsPadding(b_extra_copy, b_nts);
}
// No IP ref canonicalization — C and Zig InternPool
// indices must match directly.
}
}
if (!std.mem.eql(u32, a_extra_copy, b_extra_copy)) {
std.debug.print("'{s}': extra mismatch (extra_len={d})\n", .{ name, extra_len });
std.debug.print(" a extra:", .{});
for (0..extra_len) |ei| std.debug.print(" {d}", .{a_extra_copy[ei]});
std.debug.print("\n b extra:", .{});
for (0..extra_len) |ei| std.debug.print(" {d}", .{b_extra_copy[ei]});
std.debug.print("\n", .{});
for (0..extra_len) |ei| {
if (a_extra_copy[ei] != b_extra_copy[ei]) {
std.debug.print(" extra[{d}]: a=0x{x} b=0x{x}\n", .{ ei, a_extra_copy[ei], b_extra_copy[ei] });
}
}
return error.AirMismatch;
}
}
}
// Sema unit tests are now in corpus.files and tested through stages_test.zig.