Replace copy-based Air export with zero-copy in-place comparison

Pass C-side SemaFuncAir arrays into zig_compare_air so the callback
can compare Air tags/datas/extra directly against the Zig compiler's
in-memory arrays, eliminating 4 heap allocations + 3 memcpys per
function.

Fix the early-return guard in PerThread.zig to also check
verbose_air_callback, so the callback fires even when
enable_debug_extensions is false (ReleaseFast).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-19 17:57:58 +00:00
parent 59b11a3be4
commit e275e9bb76
5 changed files with 179 additions and 199 deletions

View File

@@ -1734,7 +1734,7 @@ fn analyzeFuncBody(
const dump_air = build_options.enable_debug_extensions and comp.verbose_air;
const dump_llvm_ir = build_options.enable_debug_extensions and (comp.verbose_llvm_ir != null or comp.verbose_llvm_bc != null);
if (comp.bin_file == null and zcu.llvm_object == null and !dump_air and !dump_llvm_ir) {
if (comp.bin_file == null and zcu.llvm_object == null and !dump_air and !dump_llvm_ir and comp.verbose_air_callback == null) {
air.deinit(gpa);
return .{ .ies_outdated = ies_outdated };
}

View File

@@ -1,6 +1,6 @@
// verbose_air.zig — Zig-side raw Air array exporter.
// Compiles source via the Zig compiler pipeline and exports raw Air arrays
// (tags, datas, extra) for memcmp-based comparison with C-produced arrays.
// verbose_air.zig — Zig-side zero-copy Air comparer.
// Compiles source via the Zig compiler pipeline and compares Air arrays
// in-place against C-produced arrays (passed in via CSemaFuncAir).
// Exports C-compatible functions for use by stage0 tests.
const std = @import("std");
@@ -13,92 +13,165 @@ comptime {
_ = @import("verbose_intern_pool.zig");
}
const FuncAir = extern struct {
name: ?[*:0]u8, // c_allocator, caller frees
/// Matches C `Air` struct layout (air.h).
const CAir = extern struct {
inst_len: u32,
tags: ?[*]u8, // c_allocator, caller frees
datas: ?[*]u8, // 8 bytes per inst, c_allocator
inst_cap: u32,
inst_tags: ?[*]const u8,
inst_datas: ?[*]const u8, // 8 bytes per inst
extra_len: u32,
extra: ?[*]u32, // c_allocator, caller frees
extra_cap: u32,
extra: ?[*]const u32,
};
const AirResult = extern struct {
funcs: ?[*]FuncAir, // c_allocator array
func_count: u32,
error_msg: ?[*:0]u8, // NULL on success
/// Matches C `SemaFuncAir` struct layout (sema.h).
const CSemaFuncAir = extern struct {
name: ?[*:0]const u8,
air: CAir,
};
const AirCollector = struct {
funcs: std.ArrayListUnmanaged(FuncAir) = .empty,
const CompareResult = extern struct {
matched_count: u32,
error_msg: ?[*:0]u8, // NULL on success, caller frees
};
fn addFunc(ctx: *anyopaque, name: []const u8, air: *const Air) void {
const self: *AirCollector = @ptrCast(@alignCast(ctx));
self.addFuncInner(name, air) catch {};
const AirComparer = struct {
c_funcs: ?[*]const CSemaFuncAir,
c_func_count: u32,
matched: u32 = 0,
first_error: ?[*:0]u8 = null,
fn compareFunc(ctx: *anyopaque, name: []const u8, air: *const Air) void {
const self: *AirComparer = @ptrCast(@alignCast(ctx));
self.compareFuncInner(name, air) catch {};
}
fn addFuncInner(self: *AirCollector, name: []const u8, air: *const Air) !void {
const gpa = std.heap.c_allocator;
fn compareFuncInner(self: *AirComparer, name: []const u8, air: *const Air) !void {
// Already have an error, skip further comparisons.
if (self.first_error != null) return;
const name_z = try gpa.dupeZ(u8, name);
const inst_len = air.instructions.len;
const src_tags = air.instructions.items(.tag);
const tags_buf = try gpa.alloc(u8, inst_len);
@memcpy(tags_buf, @as([*]const u8, @ptrCast(src_tags.ptr))[0..inst_len]);
const src_datas = air.instructions.items(.data);
const datas_buf = try gpa.alloc(u8, inst_len * 8);
if (@sizeOf(Air.Inst.Data) == 8) {
@memcpy(datas_buf, @as([*]const u8, @ptrCast(src_datas.ptr))[0 .. inst_len * 8]);
} else {
// Safety build: @sizeOf(Data) may be > 8, copy first 8 bytes per element
for (src_datas, 0..) |*d, i| {
@memcpy(datas_buf[i * 8 ..][0..8], @as(*const [8]u8, @ptrCast(d)));
// Find matching C function by name.
const c_items = if (self.c_funcs) |f| f[0..self.c_func_count] else {
self.setError("Zig function '{s}' not found in C output", .{name});
return;
};
for (c_items) |*cf| {
const c_name = std.mem.span(cf.name orelse continue);
if (std.mem.eql(u8, name, c_name)) {
self.compareAir(name, &cf.air, air);
return;
}
}
const extra_len: u32 = @intCast(air.extra.items.len);
const extra_buf = try gpa.alloc(u32, extra_len);
@memcpy(extra_buf, air.extra.items);
// Function not found in C output.
self.setError("Zig function '{s}' not found in C output", .{name});
}
try self.funcs.append(gpa, .{
.name = name_z.ptr,
.inst_len = @intCast(inst_len),
.tags = tags_buf.ptr,
.datas = datas_buf.ptr,
.extra_len = extra_len,
.extra = extra_buf.ptr,
});
fn compareAir(self: *AirComparer, name: []const u8, c_air: *const CAir, air: *const Air) void {
const inst_len: u32 = @intCast(air.instructions.len);
if (inst_len != c_air.inst_len) {
self.setError("'{s}': inst_len mismatch: zig={d} c={d}", .{ name, inst_len, c_air.inst_len });
return;
}
// Tags
const zig_tags = air.instructions.items(.tag);
const zig_tags_bytes = @as([*]const u8, @ptrCast(zig_tags.ptr))[0..inst_len];
const c_tags = (c_air.inst_tags orelse {
if (inst_len > 0) {
self.setError("'{s}': C inst_tags is null but inst_len={d}", .{ name, inst_len });
}
return;
})[0..inst_len];
if (!std.mem.eql(u8, zig_tags_bytes, c_tags)) {
self.setError("'{s}': tags mismatch (inst_len={d})", .{ name, inst_len });
return;
}
// Datas (8 bytes per instruction)
const zig_datas = air.instructions.items(.data);
const c_datas = (c_air.inst_datas orelse {
if (inst_len > 0) {
self.setError("'{s}': C inst_datas is null but inst_len={d}", .{ name, inst_len });
}
return;
})[0 .. inst_len * 8];
if (@sizeOf(Air.Inst.Data) == 8) {
const zig_datas_bytes = @as([*]const u8, @ptrCast(zig_datas.ptr))[0 .. inst_len * 8];
if (!std.mem.eql(u8, zig_datas_bytes, c_datas)) {
self.setError("'{s}': datas mismatch (inst_len={d})", .{ name, inst_len });
return;
}
} else {
// Safety build: @sizeOf(Data) may be > 8, compare first 8 bytes per element
for (zig_datas, 0..) |*d, i| {
const zig_bytes = @as(*const [8]u8, @ptrCast(d));
if (!std.mem.eql(u8, zig_bytes, c_datas[i * 8 ..][0..8])) {
self.setError("'{s}': datas mismatch at inst {d}", .{ name, i });
return;
}
}
}
// Extra
const extra_len: u32 = @intCast(air.extra.items.len);
if (extra_len != c_air.extra_len) {
self.setError("'{s}': extra_len mismatch: zig={d} c={d}", .{ name, extra_len, c_air.extra_len });
return;
}
if (extra_len > 0) {
const c_extra = (c_air.extra orelse {
self.setError("'{s}': C extra is null but extra_len={d}", .{ name, extra_len });
return;
})[0..extra_len];
if (!std.mem.eql(u32, air.extra.items, c_extra)) {
self.setError("'{s}': extra mismatch (extra_len={d})", .{ name, extra_len });
return;
}
}
self.matched += 1;
}
fn setError(self: *AirComparer, comptime fmt: []const u8, args: anytype) void {
if (self.first_error != null) return;
const gpa = std.heap.c_allocator;
const msg = std.fmt.allocPrint(gpa, fmt, args) catch return;
const msg_z = gpa.dupeZ(u8, msg) catch return;
gpa.free(msg);
self.first_error = msg_z.ptr;
}
};
export fn zig_compile_air(src_path_ptr: [*:0]const u8) AirResult {
return zigCompileAirImpl(std.mem.span(src_path_ptr)) catch |err| {
export fn zig_compare_air(
src_path_ptr: [*:0]const u8,
c_funcs_raw: ?*const anyopaque,
c_func_count: u32,
) CompareResult {
return zigCompareAirImpl(
std.mem.span(src_path_ptr),
@ptrCast(@alignCast(c_funcs_raw)),
c_func_count,
) catch |err| {
return errResult(@errorName(err));
};
}
export fn zig_air_result_free(result: *AirResult) void {
const gpa = std.heap.c_allocator;
if (result.funcs) |funcs| {
for (funcs[0..result.func_count]) |*f| {
if (f.name) |n| gpa.free(std.mem.span(n));
if (f.tags) |t| gpa.free(t[0..f.inst_len]);
if (f.datas) |d| gpa.free(d[0 .. f.inst_len * 8]);
if (f.extra) |e| gpa.free(e[0..f.extra_len]);
}
gpa.free(funcs[0..result.func_count]);
}
if (result.error_msg) |e| gpa.free(std.mem.span(e));
export fn zig_compare_result_free(result: *CompareResult) void {
if (result.error_msg) |e| std.heap.c_allocator.free(std.mem.span(e));
}
fn errResult(msg: []const u8) AirResult {
fn errResult(msg: []const u8) CompareResult {
const duped = std.heap.c_allocator.dupeZ(u8, msg) catch
return .{ .funcs = null, .func_count = 0, .error_msg = null };
return .{ .funcs = null, .func_count = 0, .error_msg = duped.ptr };
return .{ .matched_count = 0, .error_msg = null };
return .{ .matched_count = 0, .error_msg = duped.ptr };
}
fn zigCompileAirImpl(src_path: []const u8) !AirResult {
fn zigCompareAirImpl(
src_path: []const u8,
c_funcs: ?[*]const CSemaFuncAir,
c_func_count: u32,
) !CompareResult {
const gpa = std.heap.c_allocator;
var arena_state = std.heap.ArenaAllocator.init(gpa);
@@ -169,7 +242,10 @@ fn zigCompileAirImpl(src_path: []const u8) !AirResult {
gpa.destroy(thread_pool);
}
var collector: AirCollector = .{};
var comparer: AirComparer = .{
.c_funcs = c_funcs,
.c_func_count = c_func_count,
};
var create_diag: Compilation.CreateDiagnostic = undefined;
const comp = Compilation.create(gpa, arena, &create_diag, .{
@@ -180,10 +256,9 @@ fn zigCompileAirImpl(src_path: []const u8) !AirResult {
.emit_bin = .no,
.thread_pool = thread_pool,
.cache_mode = .whole,
.verbose_air = true,
.verbose_air_callback = .{
.context = @ptrCast(&collector),
.call = @ptrCast(&AirCollector.addFunc),
.context = @ptrCast(&comparer),
.call = @ptrCast(&AirComparer.compareFunc),
},
}) catch |err| switch (err) {
error.CreateFail => {
@@ -205,8 +280,7 @@ fn zigCompileAirImpl(src_path: []const u8) !AirResult {
}
return .{
.funcs = if (collector.funcs.items.len > 0) collector.funcs.items.ptr else null,
.func_count = @intCast(collector.funcs.items.len),
.error_msg = null,
.matched_count = comparer.matched,
.error_msg = comparer.first_error,
};
}

View File

@@ -6,22 +6,13 @@
#include <stdint.h>
typedef struct {
char* name; // Caller frees
uint32_t inst_len;
uint8_t* tags; // Caller frees
uint8_t* datas; // 8 bytes per inst, caller frees
uint32_t extra_len;
uint32_t* extra; // Caller frees
} ZigFuncAir;
typedef struct {
ZigFuncAir* funcs; // Caller frees (and each element's arrays)
uint32_t func_count;
uint32_t matched_count;
char* error_msg; // NULL on success, caller frees
} ZigAirResult;
} AirCompareResult;
extern ZigAirResult zig_compile_air(const char* src_path);
extern void zig_air_result_free(ZigAirResult* result);
// c_funcs: pointer to SemaFuncAir array (from sema.h). Passed as void* to avoid header dep.
extern AirCompareResult zig_compare_air(const char* src_path, const void* c_funcs, uint32_t c_func_count);
extern void zig_compare_result_free(AirCompareResult* result);
// InternPool dumper (text-based, separate concern).
typedef struct {

View File

@@ -8,22 +8,12 @@ pub const c = @cImport({
@cInclude("dump.h");
});
// Zig-side raw Air types (linked from dumper_obj, not imported).
const ZigFuncAir = extern struct {
name: ?[*:0]u8,
inst_len: u32,
tags: ?[*]u8,
datas: ?[*]u8,
extra_len: u32,
extra: ?[*]u32,
};
const ZigAirResult = extern struct {
funcs: ?[*]ZigFuncAir,
func_count: u32,
const AirCompareResult = extern struct {
matched_count: u32,
error_msg: ?[*:0]u8,
};
extern fn zig_compile_air([*:0]const u8) ZigAirResult;
extern fn zig_air_result_free(*ZigAirResult) void;
extern fn zig_compare_air([*:0]const u8, ?*const anyopaque, u32) AirCompareResult;
extern fn zig_compare_result_free(*AirCompareResult) void;
// Helper to convert C #define integer constants (c_int) to u32 for comparison
// with uint32_t fields (InternPoolIndex, etc.).
@@ -241,27 +231,12 @@ test "sema: function decl smoke test" {
// Air raw comparison: C vs Zig memcmp
// ---------------------------------------------------------------------------
fn compareAir(c_air: *const c.Air, zig: *const ZigFuncAir) !void {
try std.testing.expectEqual(zig.inst_len, c_air.inst_len);
// Tags
const n = c_air.inst_len;
try std.testing.expectEqualSlices(u8, zig.tags.?[0..n], @as([*]const u8, @ptrCast(c_air.inst_tags))[0..n]);
// Data (8 bytes per instruction)
try std.testing.expectEqualSlices(u8, zig.datas.?[0 .. n * 8], @as([*]const u8, @ptrCast(c_air.inst_datas))[0 .. n * 8]);
// Extra
try std.testing.expectEqual(zig.extra_len, c_air.extra_len);
try std.testing.expectEqualSlices(u32, zig.extra.?[0..zig.extra_len], c_air.extra[0..c_air.extra_len]);
}
fn semaAirRawCheck(source: [:0]const u8) !void {
// C pipeline: parse -> astgen -> sema
var result = try semaCheck(source);
defer result.deinit();
// Zig pipeline: write source to temp file, compile, get raw Air arrays
// Zig pipeline: write source to temp file, compile, compare in-place
const tmp_path = "/tmp/zig0_sema_test_tmp.zig";
{
const f = std.fs.cwd().createFile(tmp_path, .{}) catch return error.TmpFileCreate;
@@ -270,37 +245,12 @@ fn semaAirRawCheck(source: [:0]const u8) !void {
}
defer std.fs.cwd().deleteFile(tmp_path) catch {};
var zig_result = zig_compile_air(tmp_path);
defer zig_air_result_free(&zig_result);
if (zig_result.error_msg) |e| {
std.debug.print("zig_compile_air error: {s}\n", .{std.mem.span(e)});
return error.ZigCompileFailed;
var cmp_result = zig_compare_air(tmp_path, @ptrCast(result.c_func_air_list.items), result.c_func_air_list.len);
defer zig_compare_result_free(&cmp_result);
if (cmp_result.error_msg) |e| {
std.debug.print("zig_compare_air error: {s}\n", .{std.mem.span(e)});
return error.AirMismatch;
}
const c_airs = result.c_func_air_list;
const zig_funcs = if (zig_result.funcs) |f| f[0..zig_result.func_count] else &[_]ZigFuncAir{};
// Match functions by name
for (zig_funcs) |*zf| {
const zig_name = std.mem.span(zf.name.?);
var found = false;
const c_items = c_airs.items[0..c_airs.len];
for (c_items) |*cf| {
const c_name: []const u8 = std.mem.span(cf.name);
if (std.mem.eql(u8, zig_name, c_name)) {
try compareAir(&cf.air, zf);
found = true;
break;
}
}
if (!found) {
std.debug.print("Zig function '{s}' not found in C output\n", .{zig_name});
return error.FunctionNotFound;
}
}
// Check that C didn't produce extra functions
try std.testing.expectEqual(zig_result.func_count, c_airs.len);
}
test "sema: Air raw C vs Zig comparison (empty)" {

View File

@@ -9,46 +9,25 @@ const sema_test = @import("sema_test.zig");
const c = parser_test.c;
const sc = sema_test.c;
// Zig-side raw Air types (linked from dumper_obj, not imported).
const ZigFuncAir = extern struct {
name: ?[*:0]u8,
inst_len: u32,
tags: ?[*]u8,
datas: ?[*]u8,
extra_len: u32,
extra: ?[*]u32,
};
const ZigAirResult = extern struct {
funcs: ?[*]ZigFuncAir,
func_count: u32,
const AirCompareResult = extern struct {
matched_count: u32,
error_msg: ?[*:0]u8,
};
extern fn zig_compile_air([*:0]const u8) ZigAirResult;
extern fn zig_air_result_free(*ZigAirResult) void;
fn compareAir(c_air: *const sc.Air, zig: *const ZigFuncAir) !void {
try std.testing.expectEqual(zig.inst_len, c_air.inst_len);
const n = c_air.inst_len;
try std.testing.expectEqualSlices(u8, zig.tags.?[0..n], @as([*]const u8, @ptrCast(c_air.inst_tags))[0..n]);
try std.testing.expectEqualSlices(u8, zig.datas.?[0 .. n * 8], @as([*]const u8, @ptrCast(c_air.inst_datas))[0 .. n * 8]);
try std.testing.expectEqual(zig.extra_len, c_air.extra_len);
try std.testing.expectEqualSlices(u32, zig.extra.?[0..zig.extra_len], c_air.extra[0..c_air.extra_len]);
}
extern fn zig_compare_air([*:0]const u8, ?*const anyopaque, u32) AirCompareResult;
extern fn zig_compare_result_free(*AirCompareResult) void;
test "stages: corpus" {
@setEvalBranchQuota(corpus_files.len * 2);
const gpa = std.testing.allocator;
inline for (corpus_files) |path| {
stagesCheck(gpa, @embedFile(path), path["../".len..]) catch {
stagesCheck(gpa, @embedFile(path)) catch {
std.debug.print("FAIL: {s}\n", .{path});
return error.TestFailed;
};
}
}
fn stagesCheck(gpa: Allocator, source: [:0]const u8, src_path: [:0]const u8) !void {
fn stagesCheck(gpa: Allocator, source: [:0]const u8) !void {
// Parse once with C parser
var c_ast = c.astParse(source.ptr, @intCast(source.len));
defer c.astDeinit(&c_ast);
@@ -92,36 +71,22 @@ fn stagesCheck(gpa: Allocator, source: [:0]const u8, src_path: [:0]const u8) !vo
var c_func_air_list = sc.semaAnalyze(&c_sema);
defer sc.semaFuncAirListDeinit(&c_func_air_list);
var zig_result = zig_compile_air(src_path.ptr);
defer zig_air_result_free(&zig_result);
if (zig_result.error_msg) |e| {
std.debug.print("zig_compile_air error: {s}\n", .{std.mem.span(e)});
return error.ZigCompileFailed;
// Write source to a temp file to avoid module path conflicts
// (source files inside lib/std/ conflict with the 'std' module).
const tmp_path = "/tmp/zig0_stages_test_tmp.zig";
{
const f = std.fs.cwd().createFile(tmp_path, .{}) catch return error.TmpFileCreate;
defer f.close();
f.writeAll(source) catch return error.TmpFileWrite;
}
defer std.fs.cwd().deleteFile(tmp_path) catch {};
const zig_funcs = if (zig_result.funcs) |f| f[0..zig_result.func_count] else &[_]ZigFuncAir{};
const c_items = c_func_air_list.items[0..c_func_air_list.len];
// Match functions by name
for (zig_funcs) |*zf| {
const zig_name = std.mem.span(zf.name.?);
var found = false;
for (c_items) |*cf| {
const c_name: []const u8 = std.mem.span(cf.name);
if (std.mem.eql(u8, zig_name, c_name)) {
try compareAir(&cf.air, zf);
found = true;
break;
}
}
if (!found) {
std.debug.print("Zig function '{s}' not found in C output\n", .{zig_name});
return error.FunctionNotFound;
}
var cmp_result = zig_compare_air(tmp_path, @ptrCast(c_func_air_list.items), c_func_air_list.len);
defer zig_compare_result_free(&cmp_result);
if (cmp_result.error_msg) |e| {
std.debug.print("zig_compare_air error: {s}\n", .{std.mem.span(e)});
return error.AirMismatch;
}
// Check that C didn't produce extra functions
try std.testing.expectEqual(zig_result.func_count, c_func_air_list.len);
}
}