Move Air comparison from src/ to stage0/

src/verbose_air.zig now only collects Air data (zig_compile_air) instead
of comparing it (zig_compare_air). The comparison logic lives in
stage0/sema_test.zig, keeping testing infrastructure in stage0/.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-19 21:58:02 +00:00
parent 5ee105ffac
commit bdf753eaf8
4 changed files with 231 additions and 167 deletions

View File

@@ -1,6 +1,6 @@
// verbose_air.zig — Zig-side zero-copy Air comparer.
// Compiles source via the Zig compiler pipeline and compares Air arrays
// in-place against C-produced arrays (passed in via CSemaFuncAir).
// verbose_air.zig — Zig-side Air collector.
// Compiles source via the Zig compiler pipeline and collects Air arrays
// into C-compatible structs for comparison by stage0 tests.
// Exports C-compatible functions for use by stage0 tests.
const std = @import("std");
@@ -14,164 +14,129 @@ const Air = zig_internals.Air;
const CAir = extern struct {
inst_len: u32,
inst_cap: u32,
inst_tags: ?[*]const u8,
inst_datas: ?[*]const u8, // 8 bytes per inst
inst_tags: ?[*]u8,
inst_datas: ?[*]u8, // 8 bytes per inst
extra_len: u32,
extra_cap: u32,
extra: ?[*]const u32,
extra: ?[*]u32,
};
/// Matches C `SemaFuncAir` struct layout (sema.h).
const CSemaFuncAir = extern struct {
name: ?[*:0]const u8,
name: ?[*:0]u8,
air: CAir,
};
const CompareResult = extern struct {
matched_count: u32,
const CompileAirResult = extern struct {
items: ?[*]CSemaFuncAir,
len: u32,
error_msg: ?[*:0]u8, // NULL on success, caller frees
};
const AirComparer = struct {
c_funcs: ?[*]const CSemaFuncAir,
c_func_count: u32,
matched: u32 = 0,
const AirCollector = struct {
funcs: std.ArrayListUnmanaged(CSemaFuncAir) = .empty,
first_error: ?[*:0]u8 = null,
fn compareFunc(ctx: *anyopaque, name: []const u8, air: *const Air) void {
const self: *AirComparer = @ptrCast(@alignCast(ctx));
self.compareFuncInner(name, air) catch {};
fn collectFunc(ctx: *anyopaque, name: []const u8, air: *const Air) void {
const self: *AirCollector = @ptrCast(@alignCast(ctx));
self.collectFuncInner(name, air) catch {};
}
fn compareFuncInner(self: *AirComparer, name: []const u8, air: *const Air) !void {
// Already have an error, skip further comparisons.
fn collectFuncInner(self: *AirCollector, name: []const u8, air: *const Air) !void {
if (self.first_error != null) return;
// Find matching C function by name.
const c_items = if (self.c_funcs) |f| f[0..self.c_func_count] else {
self.setError("Zig function '{s}' not found in C output", .{name});
return;
};
for (c_items) |*cf| {
const c_name = std.mem.span(cf.name orelse continue);
if (std.mem.eql(u8, name, c_name)) {
self.compareAir(name, &cf.air, air);
return;
}
}
// Function not found in C output.
self.setError("Zig function '{s}' not found in C output", .{name});
}
fn compareAir(self: *AirComparer, name: []const u8, c_air: *const CAir, air: *const Air) void {
const gpa = std.heap.c_allocator;
const inst_len: u32 = @intCast(air.instructions.len);
if (inst_len != c_air.inst_len) {
self.setError("'{s}': inst_len mismatch: zig={d} c={d}", .{ name, inst_len, c_air.inst_len });
return;
}
// Tags
// Copy tags
const zig_tags = air.instructions.items(.tag);
const zig_tags_bytes = @as([*]const u8, @ptrCast(zig_tags.ptr))[0..inst_len];
const c_tags = (c_air.inst_tags orelse {
if (inst_len > 0) {
self.setError("'{s}': C inst_tags is null but inst_len={d}", .{ name, inst_len });
}
return;
})[0..inst_len];
if (!std.mem.eql(u8, zig_tags_bytes, c_tags)) {
self.setError("'{s}': tags mismatch (inst_len={d})", .{ name, inst_len });
return;
}
const tags_copy: ?[*]u8 = if (inst_len > 0) blk: {
const src = @as([*]const u8, @ptrCast(zig_tags.ptr))[0..inst_len];
const dst = gpa.alloc(u8, inst_len) catch return;
@memcpy(dst, src);
break :blk dst.ptr;
} else null;
// Datas (8 bytes per instruction)
const zig_datas = air.instructions.items(.data);
const c_datas = (c_air.inst_datas orelse {
if (inst_len > 0) {
self.setError("'{s}': C inst_datas is null but inst_len={d}", .{ name, inst_len });
}
return;
})[0 .. inst_len * 8];
if (@sizeOf(Air.Inst.Data) == 8) {
const zig_datas_bytes = @as([*]const u8, @ptrCast(zig_datas.ptr))[0 .. inst_len * 8];
if (!std.mem.eql(u8, zig_datas_bytes, c_datas)) {
self.setError("'{s}': datas mismatch (inst_len={d})", .{ name, inst_len });
return;
}
} else {
// Safety build: @sizeOf(Data) may be > 8, compare first 8 bytes per element
for (zig_datas, 0..) |*d, i| {
const zig_bytes = @as(*const [8]u8, @ptrCast(d));
if (!std.mem.eql(u8, zig_bytes, c_datas[i * 8 ..][0..8])) {
self.setError("'{s}': datas mismatch at inst {d}", .{ name, i });
return;
// Copy datas (8 bytes per instruction)
const datas_byte_len = inst_len * 8;
const datas_copy: ?[*]u8 = if (inst_len > 0) blk: {
const dst = gpa.alloc(u8, datas_byte_len) catch return;
const zig_datas = air.instructions.items(.data);
if (@sizeOf(Air.Inst.Data) == 8) {
const src = @as([*]const u8, @ptrCast(zig_datas.ptr))[0..datas_byte_len];
@memcpy(dst, src);
} else {
// Safety build: @sizeOf(Data) may be > 8, copy first 8 bytes per element
for (zig_datas, 0..) |*d, i| {
const src = @as(*const [8]u8, @ptrCast(d));
@memcpy(dst[i * 8 ..][0..8], src);
}
}
}
break :blk dst.ptr;
} else null;
// Extra
// Copy extra
const extra_len: u32 = @intCast(air.extra.items.len);
if (extra_len != c_air.extra_len) {
self.setError("'{s}': extra_len mismatch: zig={d} c={d}", .{ name, extra_len, c_air.extra_len });
return;
}
if (extra_len > 0) {
const c_extra = (c_air.extra orelse {
self.setError("'{s}': C extra is null but extra_len={d}", .{ name, extra_len });
return;
})[0..extra_len];
if (!std.mem.eql(u32, air.extra.items, c_extra)) {
self.setError("'{s}': extra mismatch (extra_len={d})", .{ name, extra_len });
return;
}
}
const extra_copy: ?[*]u32 = if (extra_len > 0) blk: {
const dst = gpa.alloc(u32, extra_len) catch return;
@memcpy(dst, air.extra.items);
break :blk dst.ptr;
} else null;
self.matched += 1;
}
// Copy name
const name_copy = gpa.dupeZ(u8, name) catch return;
fn setError(self: *AirComparer, comptime fmt: []const u8, args: anytype) void {
if (self.first_error != null) return;
const gpa = std.heap.c_allocator;
const msg = std.fmt.allocPrint(gpa, fmt, args) catch return;
const msg_z = gpa.dupeZ(u8, msg) catch return;
gpa.free(msg);
self.first_error = msg_z.ptr;
self.funcs.append(gpa, .{
.name = name_copy.ptr,
.air = .{
.inst_len = inst_len,
.inst_cap = inst_len,
.inst_tags = tags_copy,
.inst_datas = datas_copy,
.extra_len = extra_len,
.extra_cap = extra_len,
.extra = extra_copy,
},
}) catch return;
}
};
export fn zig_compare_air(
export fn zig_compile_air(
src_path_ptr: [*:0]const u8,
module_root_ptr: ?[*:0]const u8,
c_funcs_raw: ?*const anyopaque,
c_func_count: u32,
) CompareResult {
return zigCompareAirImpl(
) CompileAirResult {
return zigCompileAirImpl(
std.mem.span(src_path_ptr),
if (module_root_ptr) |p| std.mem.span(p) else null,
@ptrCast(@alignCast(c_funcs_raw)),
c_func_count,
) catch |err| {
return errResult(@errorName(err));
};
}
export fn zig_compare_result_free(result: *CompareResult) void {
if (result.error_msg) |e| std.heap.c_allocator.free(std.mem.span(e));
export fn zig_compile_air_free(result: *CompileAirResult) void {
const gpa = std.heap.c_allocator;
if (result.error_msg) |e| gpa.free(std.mem.span(e));
if (result.items) |items| {
for (items[0..result.len]) |*f| {
if (f.name) |n| gpa.free(std.mem.span(n));
if (f.air.inst_tags) |t| gpa.free(t[0..f.air.inst_len]);
if (f.air.inst_datas) |d| gpa.free(d[0 .. f.air.inst_len * 8]);
if (f.air.extra) |e| gpa.free(e[0..f.air.extra_len]);
}
gpa.free(items[0..result.len]);
}
}
fn errResult(msg: []const u8) CompareResult {
fn errResult(msg: []const u8) CompileAirResult {
const duped = std.heap.c_allocator.dupeZ(u8, msg) catch
return .{ .matched_count = 0, .error_msg = null };
return .{ .matched_count = 0, .error_msg = duped.ptr };
return .{ .items = null, .len = 0, .error_msg = null };
return .{ .items = null, .len = 0, .error_msg = duped.ptr };
}
fn zigCompareAirImpl(
fn zigCompileAirImpl(
src_path: []const u8,
module_root_opt: ?[]const u8,
c_funcs: ?[*]const CSemaFuncAir,
c_func_count: u32,
) !CompareResult {
) !CompileAirResult {
const gpa = std.heap.c_allocator;
var arena_state = std.heap.ArenaAllocator.init(gpa);
@@ -253,10 +218,7 @@ fn zigCompareAirImpl(
gpa.destroy(thread_pool);
}
var comparer: AirComparer = .{
.c_funcs = c_funcs,
.c_func_count = c_func_count,
};
var collector: AirCollector = .{};
var create_diag: Compilation.CreateDiagnostic = undefined;
const comp = Compilation.create(gpa, arena, &create_diag, .{
@@ -268,8 +230,8 @@ fn zigCompareAirImpl(
.thread_pool = thread_pool,
.cache_mode = .whole,
.verbose_air_callback = .{
.context = @ptrCast(&comparer),
.call = @ptrCast(&AirComparer.compareFunc),
.context = @ptrCast(&collector),
.call = @ptrCast(&AirCollector.collectFunc),
},
}) catch |err| switch (err) {
error.CreateFail => {
@@ -290,8 +252,20 @@ fn zigCompareAirImpl(
return errResult(buf.written());
}
return .{
.matched_count = comparer.matched,
.error_msg = comparer.first_error,
};
if (collector.first_error) |e| {
return .{ .items = null, .len = 0, .error_msg = e };
}
const items = collector.funcs.items;
const len: u32 = @intCast(collector.funcs.items.len);
// Transfer ownership: caller frees via zig_compile_air_free.
// The ArrayListUnmanaged allocatedSlice includes capacity, but we only
// expose items[0..len]. Free the excess capacity now.
if (collector.funcs.capacity > len) {
// Shrink to exact size so free works with items[0..len].
const exact = gpa.realloc(items[0..collector.funcs.capacity], len) catch
items[0..collector.funcs.capacity]; // keep original on realloc failure
return .{ .items = exact.ptr, .len = len, .error_msg = null };
}
return .{ .items = items.ptr, .len = len, .error_msg = null };
}

View File

@@ -1,4 +1,4 @@
// dump.h — Shared result types for raw Air C/Zig comparison.
// dump.h — Shared result types for Zig Air compilation export.
#ifndef _ZIG0_DUMP_H__
#define _ZIG0_DUMP_H__
@@ -6,12 +6,12 @@
#include <stdint.h>
typedef struct {
uint32_t matched_count;
char* error_msg; // NULL on success, caller frees
} AirCompareResult;
void* items; // SemaFuncAir* (from sema.h), owned by Zig allocator
uint32_t len;
char* error_msg; // NULL on success, owned by Zig allocator
} ZigCompileAirResult;
// c_funcs: pointer to SemaFuncAir array (from sema.h). Passed as void* to avoid header dep.
extern AirCompareResult zig_compare_air(const char* src_path, const char* module_root, const void* c_funcs, uint32_t c_func_count);
extern void zig_compare_result_free(AirCompareResult* result);
extern ZigCompileAirResult zig_compile_air(const char* src_path, const char* module_root);
extern void zig_compile_air_free(ZigCompileAirResult* result);
#endif

View File

@@ -8,13 +8,6 @@ pub const c = @cImport({
@cInclude("dump.h");
});
const AirCompareResult = extern struct {
matched_count: u32,
error_msg: ?[*:0]u8,
};
extern fn zig_compare_air([*:0]const u8, ?[*:0]const u8, ?*const anyopaque, u32) AirCompareResult;
extern fn zig_compare_result_free(*AirCompareResult) void;
// Helper to convert C #define integer constants (c_int) to u32 for comparison
// with uint32_t fields (InternPoolIndex, etc.).
fn idx(val: c_int) u32 {
@@ -228,15 +221,18 @@ test "sema: function decl smoke test" {
}
// ---------------------------------------------------------------------------
// Air raw comparison: C vs Zig memcmp
// Air raw comparison: C vs Zig
// ---------------------------------------------------------------------------
fn semaAirRawCheck(source: [:0]const u8) !void {
// C pipeline: parse -> astgen -> sema
var result = try semaCheck(source);
defer result.deinit();
const ZigCompileAirResult = extern struct {
items: ?[*]c.SemaFuncAir,
len: u32,
error_msg: ?[*:0]u8,
};
extern fn zig_compile_air([*:0]const u8, ?[*:0]const u8) ZigCompileAirResult;
extern fn zig_compile_air_free(*ZigCompileAirResult) void;
// Zig pipeline: write source to temp file, compile, compare in-place
pub fn airCompareFromSource(source: [:0]const u8, c_func_air_list: c.SemaFuncAirList) !void {
const tmp_path = "/tmp/zig0_sema_test_tmp.zig";
{
const f = std.fs.cwd().createFile(tmp_path, .{}) catch return error.TmpFileCreate;
@@ -245,16 +241,126 @@ fn semaAirRawCheck(source: [:0]const u8) !void {
}
defer std.fs.cwd().deleteFile(tmp_path) catch {};
var cmp_result = zig_compare_air(tmp_path, null, @ptrCast(result.c_func_air_list.items), result.c_func_air_list.len);
defer zig_compare_result_free(&cmp_result);
if (cmp_result.error_msg) |e| {
std.debug.print("zig_compare_air error: {s}\n", .{std.mem.span(e)});
return airCompare(tmp_path, null, c_func_air_list);
}
pub fn airCompare(
src_path: [*:0]const u8,
module_root: ?[*:0]const u8,
c_func_air_list: c.SemaFuncAirList,
) !void {
var zig_result = zig_compile_air(src_path, module_root);
defer zig_compile_air_free(&zig_result);
if (zig_result.error_msg) |e| {
std.debug.print("zig_compile_air error: {s}\n", .{std.mem.span(e)});
return error.ZigCompileError;
}
const zig_funcs = if (zig_result.items) |items| items[0..zig_result.len] else &[_]c.SemaFuncAir{};
const c_funcs_ptr: ?[*]const c.SemaFuncAir = @ptrCast(c_func_air_list.items);
const c_funcs = if (c_funcs_ptr) |items| items[0..c_func_air_list.len] else &[_]c.SemaFuncAir{};
if (zig_funcs.len != c_funcs.len) {
std.debug.print("Air func count mismatch: zig={d}, c={d}\n", .{ zig_funcs.len, c_funcs.len });
return error.AirMismatch;
}
if (cmp_result.matched_count != result.c_func_air_list.len) {
std.debug.print("Air func count mismatch: zig matched {d}, c produced {d}\n", .{ cmp_result.matched_count, result.c_func_air_list.len });
for (zig_funcs) |*zf| {
const zig_name = if (zf.name) |n| std.mem.span(n) else "";
const cf = airFindByName(c_funcs, zig_name) orelse {
std.debug.print("Zig function '{s}' not found in C output\n", .{zig_name});
return error.AirMismatch;
};
try airCompareOne(zig_name, &zf.air, &cf.air);
}
}
fn cNameSpan(name: [*c]u8) []const u8 {
const opt: ?[*:0]const u8 = @ptrCast(name);
return if (opt) |n| std.mem.span(n) else "";
}
fn airFindByName(funcs: []const c.SemaFuncAir, name: []const u8) ?*const c.SemaFuncAir {
for (funcs) |*f| {
if (std.mem.eql(u8, name, cNameSpan(f.name))) return f;
}
return null;
}
fn cToOpt(comptime T: type, ptr: [*c]T) ?[*]const T {
return if (ptr == null) null else @ptrCast(ptr);
}
fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !void {
if (zig_air.inst_len != c_air.inst_len) {
std.debug.print("'{s}': inst_len mismatch: zig={d} c={d}\n", .{ name, zig_air.inst_len, c_air.inst_len });
return error.AirMismatch;
}
const inst_len = zig_air.inst_len;
// Tags
if (inst_len > 0) {
const zig_tags: [*]const u8 = @ptrCast(cToOpt(c.AirInstTag, zig_air.inst_tags) orelse {
std.debug.print("'{s}': Zig inst_tags is null but inst_len={d}\n", .{ name, inst_len });
return error.AirMismatch;
});
const c_tags: [*]const u8 = @ptrCast(cToOpt(c.AirInstTag, c_air.inst_tags) orelse {
std.debug.print("'{s}': C inst_tags is null but inst_len={d}\n", .{ name, inst_len });
return error.AirMismatch;
});
if (!std.mem.eql(u8, zig_tags[0..inst_len], c_tags[0..inst_len])) {
std.debug.print("'{s}': tags mismatch (inst_len={d})\n", .{ name, inst_len });
return error.AirMismatch;
}
}
// Datas (8 bytes per instruction)
if (inst_len > 0) {
const byte_len = inst_len * 8;
const zig_datas: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, zig_air.inst_datas) orelse {
std.debug.print("'{s}': Zig inst_datas is null but inst_len={d}\n", .{ name, inst_len });
return error.AirMismatch;
});
const c_datas: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, c_air.inst_datas) orelse {
std.debug.print("'{s}': C inst_datas is null but inst_len={d}\n", .{ name, inst_len });
return error.AirMismatch;
});
if (!std.mem.eql(u8, zig_datas[0..byte_len], c_datas[0..byte_len])) {
std.debug.print("'{s}': datas mismatch (inst_len={d})\n", .{ name, inst_len });
return error.AirMismatch;
}
}
// Extra
if (zig_air.extra_len != c_air.extra_len) {
std.debug.print("'{s}': extra_len mismatch: zig={d} c={d}\n", .{ name, zig_air.extra_len, c_air.extra_len });
return error.AirMismatch;
}
const extra_len = zig_air.extra_len;
if (extra_len > 0) {
const zig_extra: [*]const u32 = cToOpt(u32, zig_air.extra) orelse {
std.debug.print("'{s}': Zig extra is null but extra_len={d}\n", .{ name, extra_len });
return error.AirMismatch;
};
const c_extra: [*]const u32 = cToOpt(u32, c_air.extra) orelse {
std.debug.print("'{s}': C extra is null but extra_len={d}\n", .{ name, extra_len });
return error.AirMismatch;
};
if (!std.mem.eql(u32, zig_extra[0..extra_len], c_extra[0..extra_len])) {
std.debug.print("'{s}': extra mismatch (extra_len={d})\n", .{ name, extra_len });
return error.AirMismatch;
}
}
}
fn semaAirRawCheck(source: [:0]const u8) !void {
// C pipeline: parse -> astgen -> sema
var result = try semaCheck(source);
defer result.deinit();
// Zig pipeline: compile source and compare Air arrays
try airCompareFromSource(source, result.c_func_air_list);
}
test "sema: Air raw C vs Zig comparison (empty)" {

View File

@@ -9,13 +9,6 @@ const sema_test = @import("sema_test.zig");
const c = parser_test.c;
const sc = sema_test.c;
const AirCompareResult = extern struct {
matched_count: u32,
error_msg: ?[*:0]u8,
};
extern fn zig_compare_air([*:0]const u8, ?[*:0]const u8, ?*const anyopaque, u32) AirCompareResult;
extern fn zig_compare_result_free(*AirCompareResult) void;
test "stages: corpus" {
@setEvalBranchQuota(corpus_files.len * 2);
const gpa = std.testing.allocator;
@@ -93,16 +86,7 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8)
const test_src: [:0]const u8 = symlink_path ++ "/" ++ repo_relative;
const module_root: [:0]const u8 = symlink_path;
var cmp_result = zig_compare_air(test_src.ptr, module_root.ptr, @ptrCast(c_func_air_list.items), c_func_air_list.len);
defer zig_compare_result_free(&cmp_result);
if (cmp_result.error_msg) |e| {
std.debug.print("zig_compare_air error: {s}\n", .{std.mem.span(e)});
return error.AirMismatch;
}
if (cmp_result.matched_count != c_func_air_list.len) {
std.debug.print("Air func count mismatch: zig matched {d}, c produced {d}\n", .{ cmp_result.matched_count, c_func_air_list.len });
return error.AirMismatch;
}
try sema_test.airCompare(test_src.ptr, module_root.ptr, c_func_air_list);
}
}