commit 1ed1e8ddfcb8f532cf5d42d733423ea8ae1fddfd (tree)
parent 2516d3a46af9d4142e5d93f5c56c46b746344206
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Thu, 19 Feb 2026 17:57:58 +0000
Replace copy-based Air export with zero-copy in-place comparison
Pass C-side SemaFuncAir arrays into zig_compare_air so the callback
can compare Air tags/datas/extra directly against the Zig compiler's
in-memory arrays, eliminating 4 heap allocations + 3 memcpys per
function.
Fix the early-return guard in PerThread.zig to also check
verbose_air_callback, so the callback fires even when
enable_debug_extensions is false (ReleaseFast).
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
5 files changed, 175 insertions(+), 195 deletions(-)
diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig
@@ -1734,7 +1734,7 @@ fn analyzeFuncBody(
const dump_air = build_options.enable_debug_extensions and comp.verbose_air;
const dump_llvm_ir = build_options.enable_debug_extensions and (comp.verbose_llvm_ir != null or comp.verbose_llvm_bc != null);
- if (comp.bin_file == null and zcu.llvm_object == null and !dump_air and !dump_llvm_ir) {
+ if (comp.bin_file == null and zcu.llvm_object == null and !dump_air and !dump_llvm_ir and comp.verbose_air_callback == null) {
air.deinit(gpa);
return .{ .ies_outdated = ies_outdated };
}
diff --git a/src/verbose_air.zig b/src/verbose_air.zig
@@ -1,6 +1,6 @@
-// verbose_air.zig — Zig-side raw Air array exporter.
-// Compiles source via the Zig compiler pipeline and exports raw Air arrays
-// (tags, datas, extra) for memcmp-based comparison with C-produced arrays.
+// verbose_air.zig — Zig-side zero-copy Air comparer.
+// Compiles source via the Zig compiler pipeline and compares Air arrays
+// in-place against C-produced arrays (passed in via CSemaFuncAir).
// Exports C-compatible functions for use by stage0 tests.
const std = @import("std");
@@ -13,92 +13,165 @@ comptime {
_ = @import("verbose_intern_pool.zig");
}
-const FuncAir = extern struct {
- name: ?[*:0]u8, // c_allocator, caller frees
+/// Matches C `Air` struct layout (air.h).
+const CAir = extern struct {
inst_len: u32,
- tags: ?[*]u8, // c_allocator, caller frees
- datas: ?[*]u8, // 8 bytes per inst, c_allocator
+ inst_cap: u32,
+ inst_tags: ?[*]const u8,
+ inst_datas: ?[*]const u8, // 8 bytes per inst
extra_len: u32,
- extra: ?[*]u32, // c_allocator, caller frees
+ extra_cap: u32,
+ extra: ?[*]const u32,
};
-const AirResult = extern struct {
- funcs: ?[*]FuncAir, // c_allocator array
- func_count: u32,
- error_msg: ?[*:0]u8, // NULL on success
+/// Matches C `SemaFuncAir` struct layout (sema.h).
+const CSemaFuncAir = extern struct {
+ name: ?[*:0]const u8,
+ air: CAir,
};
-const AirCollector = struct {
- funcs: std.ArrayListUnmanaged(FuncAir) = .empty,
+const CompareResult = extern struct {
+ matched_count: u32,
+ error_msg: ?[*:0]u8, // NULL on success, caller frees
+};
+
+const AirComparer = struct {
+ c_funcs: ?[*]const CSemaFuncAir,
+ c_func_count: u32,
+ matched: u32 = 0,
+ first_error: ?[*:0]u8 = null,
- fn addFunc(ctx: *anyopaque, name: []const u8, air: *const Air) void {
- const self: *AirCollector = @ptrCast(@alignCast(ctx));
- self.addFuncInner(name, air) catch {};
+ fn compareFunc(ctx: *anyopaque, name: []const u8, air: *const Air) void {
+ const self: *AirComparer = @ptrCast(@alignCast(ctx));
+ self.compareFuncInner(name, air) catch {};
}
- fn addFuncInner(self: *AirCollector, name: []const u8, air: *const Air) !void {
- const gpa = std.heap.c_allocator;
+ fn compareFuncInner(self: *AirComparer, name: []const u8, air: *const Air) !void {
+ // Already have an error, skip further comparisons.
+ if (self.first_error != null) return;
+
+ // Find matching C function by name.
+ const c_items = if (self.c_funcs) |f| f[0..self.c_func_count] else {
+ self.setError("Zig function '{s}' not found in C output", .{name});
+ return;
+ };
+ for (c_items) |*cf| {
+ const c_name = std.mem.span(cf.name orelse continue);
+ if (std.mem.eql(u8, name, c_name)) {
+ self.compareAir(name, &cf.air, air);
+ return;
+ }
+ }
- const name_z = try gpa.dupeZ(u8, name);
+ // Function not found in C output.
+ self.setError("Zig function '{s}' not found in C output", .{name});
+ }
+
+ fn compareAir(self: *AirComparer, name: []const u8, c_air: *const CAir, air: *const Air) void {
+ const inst_len: u32 = @intCast(air.instructions.len);
+ if (inst_len != c_air.inst_len) {
+ self.setError("'{s}': inst_len mismatch: zig={d} c={d}", .{ name, inst_len, c_air.inst_len });
+ return;
+ }
- const inst_len = air.instructions.len;
- const src_tags = air.instructions.items(.tag);
- const tags_buf = try gpa.alloc(u8, inst_len);
- @memcpy(tags_buf, @as([*]const u8, @ptrCast(src_tags.ptr))[0..inst_len]);
+ // Tags
+ const zig_tags = air.instructions.items(.tag);
+ const zig_tags_bytes = @as([*]const u8, @ptrCast(zig_tags.ptr))[0..inst_len];
+ const c_tags = (c_air.inst_tags orelse {
+ if (inst_len > 0) {
+ self.setError("'{s}': C inst_tags is null but inst_len={d}", .{ name, inst_len });
+ }
+ return;
+ })[0..inst_len];
+ if (!std.mem.eql(u8, zig_tags_bytes, c_tags)) {
+ self.setError("'{s}': tags mismatch (inst_len={d})", .{ name, inst_len });
+ return;
+ }
- const src_datas = air.instructions.items(.data);
- const datas_buf = try gpa.alloc(u8, inst_len * 8);
+ // Datas (8 bytes per instruction)
+ const zig_datas = air.instructions.items(.data);
+ const c_datas = (c_air.inst_datas orelse {
+ if (inst_len > 0) {
+ self.setError("'{s}': C inst_datas is null but inst_len={d}", .{ name, inst_len });
+ }
+ return;
+ })[0 .. inst_len * 8];
if (@sizeOf(Air.Inst.Data) == 8) {
- @memcpy(datas_buf, @as([*]const u8, @ptrCast(src_datas.ptr))[0 .. inst_len * 8]);
+ const zig_datas_bytes = @as([*]const u8, @ptrCast(zig_datas.ptr))[0 .. inst_len * 8];
+ if (!std.mem.eql(u8, zig_datas_bytes, c_datas)) {
+ self.setError("'{s}': datas mismatch (inst_len={d})", .{ name, inst_len });
+ return;
+ }
} else {
- // Safety build: @sizeOf(Data) may be > 8, copy first 8 bytes per element
- for (src_datas, 0..) |*d, i| {
- @memcpy(datas_buf[i * 8 ..][0..8], @as(*const [8]u8, @ptrCast(d)));
+ // Safety build: @sizeOf(Data) may be > 8, compare first 8 bytes per element
+ for (zig_datas, 0..) |*d, i| {
+ const zig_bytes = @as(*const [8]u8, @ptrCast(d));
+ if (!std.mem.eql(u8, zig_bytes, c_datas[i * 8 ..][0..8])) {
+ self.setError("'{s}': datas mismatch at inst {d}", .{ name, i });
+ return;
+ }
}
}
+ // Extra
const extra_len: u32 = @intCast(air.extra.items.len);
- const extra_buf = try gpa.alloc(u32, extra_len);
- @memcpy(extra_buf, air.extra.items);
-
- try self.funcs.append(gpa, .{
- .name = name_z.ptr,
- .inst_len = @intCast(inst_len),
- .tags = tags_buf.ptr,
- .datas = datas_buf.ptr,
- .extra_len = extra_len,
- .extra = extra_buf.ptr,
- });
+ if (extra_len != c_air.extra_len) {
+ self.setError("'{s}': extra_len mismatch: zig={d} c={d}", .{ name, extra_len, c_air.extra_len });
+ return;
+ }
+ if (extra_len > 0) {
+ const c_extra = (c_air.extra orelse {
+ self.setError("'{s}': C extra is null but extra_len={d}", .{ name, extra_len });
+ return;
+ })[0..extra_len];
+ if (!std.mem.eql(u32, air.extra.items, c_extra)) {
+ self.setError("'{s}': extra mismatch (extra_len={d})", .{ name, extra_len });
+ return;
+ }
+ }
+
+ self.matched += 1;
+ }
+
+ fn setError(self: *AirComparer, comptime fmt: []const u8, args: anytype) void {
+ if (self.first_error != null) return;
+ const gpa = std.heap.c_allocator;
+ const msg = std.fmt.allocPrint(gpa, fmt, args) catch return;
+ const msg_z = gpa.dupeZ(u8, msg) catch return;
+ gpa.free(msg);
+ self.first_error = msg_z.ptr;
}
};
-export fn zig_compile_air(src_path_ptr: [*:0]const u8) AirResult {
- return zigCompileAirImpl(std.mem.span(src_path_ptr)) catch |err| {
+export fn zig_compare_air(
+ src_path_ptr: [*:0]const u8,
+ c_funcs_raw: ?*const anyopaque,
+ c_func_count: u32,
+) CompareResult {
+ return zigCompareAirImpl(
+ std.mem.span(src_path_ptr),
+ @ptrCast(@alignCast(c_funcs_raw)),
+ c_func_count,
+ ) catch |err| {
return errResult(@errorName(err));
};
}
-export fn zig_air_result_free(result: *AirResult) void {
- const gpa = std.heap.c_allocator;
- if (result.funcs) |funcs| {
- for (funcs[0..result.func_count]) |*f| {
- if (f.name) |n| gpa.free(std.mem.span(n));
- if (f.tags) |t| gpa.free(t[0..f.inst_len]);
- if (f.datas) |d| gpa.free(d[0 .. f.inst_len * 8]);
- if (f.extra) |e| gpa.free(e[0..f.extra_len]);
- }
- gpa.free(funcs[0..result.func_count]);
- }
- if (result.error_msg) |e| gpa.free(std.mem.span(e));
+export fn zig_compare_result_free(result: *CompareResult) void {
+ if (result.error_msg) |e| std.heap.c_allocator.free(std.mem.span(e));
}
-fn errResult(msg: []const u8) AirResult {
+fn errResult(msg: []const u8) CompareResult {
const duped = std.heap.c_allocator.dupeZ(u8, msg) catch
- return .{ .funcs = null, .func_count = 0, .error_msg = null };
- return .{ .funcs = null, .func_count = 0, .error_msg = duped.ptr };
+ return .{ .matched_count = 0, .error_msg = null };
+ return .{ .matched_count = 0, .error_msg = duped.ptr };
}
-fn zigCompileAirImpl(src_path: []const u8) !AirResult {
+fn zigCompareAirImpl(
+ src_path: []const u8,
+ c_funcs: ?[*]const CSemaFuncAir,
+ c_func_count: u32,
+) !CompareResult {
const gpa = std.heap.c_allocator;
var arena_state = std.heap.ArenaAllocator.init(gpa);
@@ -169,7 +242,10 @@ fn zigCompileAirImpl(src_path: []const u8) !AirResult {
gpa.destroy(thread_pool);
}
- var collector: AirCollector = .{};
+ var comparer: AirComparer = .{
+ .c_funcs = c_funcs,
+ .c_func_count = c_func_count,
+ };
var create_diag: Compilation.CreateDiagnostic = undefined;
const comp = Compilation.create(gpa, arena, &create_diag, .{
@@ -180,10 +256,9 @@ fn zigCompileAirImpl(src_path: []const u8) !AirResult {
.emit_bin = .no,
.thread_pool = thread_pool,
.cache_mode = .whole,
- .verbose_air = true,
.verbose_air_callback = .{
- .context = @ptrCast(&collector),
- .call = @ptrCast(&AirCollector.addFunc),
+ .context = @ptrCast(&comparer),
+ .call = @ptrCast(&AirComparer.compareFunc),
},
}) catch |err| switch (err) {
error.CreateFail => {
@@ -205,8 +280,7 @@ fn zigCompileAirImpl(src_path: []const u8) !AirResult {
}
return .{
- .funcs = if (collector.funcs.items.len > 0) collector.funcs.items.ptr else null,
- .func_count = @intCast(collector.funcs.items.len),
- .error_msg = null,
+ .matched_count = comparer.matched,
+ .error_msg = comparer.first_error,
};
}
diff --git a/stage0/dump.h b/stage0/dump.h
@@ -6,22 +6,13 @@
#include <stdint.h>
typedef struct {
- char* name; // Caller frees
- uint32_t inst_len;
- uint8_t* tags; // Caller frees
- uint8_t* datas; // 8 bytes per inst, caller frees
- uint32_t extra_len;
- uint32_t* extra; // Caller frees
-} ZigFuncAir;
-
-typedef struct {
- ZigFuncAir* funcs; // Caller frees (and each element's arrays)
- uint32_t func_count;
+ uint32_t matched_count;
char* error_msg; // NULL on success, caller frees
-} ZigAirResult;
+} AirCompareResult;
-extern ZigAirResult zig_compile_air(const char* src_path);
-extern void zig_air_result_free(ZigAirResult* result);
+// c_funcs: pointer to SemaFuncAir array (from sema.h). Passed as void* to avoid header dep.
+extern AirCompareResult zig_compare_air(const char* src_path, const void* c_funcs, uint32_t c_func_count);
+extern void zig_compare_result_free(AirCompareResult* result);
// InternPool dumper (text-based, separate concern).
typedef struct {
diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig
@@ -8,22 +8,12 @@ pub const c = @cImport({
@cInclude("dump.h");
});
-// Zig-side raw Air types (linked from dumper_obj, not imported).
-const ZigFuncAir = extern struct {
- name: ?[*:0]u8,
- inst_len: u32,
- tags: ?[*]u8,
- datas: ?[*]u8,
- extra_len: u32,
- extra: ?[*]u32,
-};
-const ZigAirResult = extern struct {
- funcs: ?[*]ZigFuncAir,
- func_count: u32,
+const AirCompareResult = extern struct {
+ matched_count: u32,
error_msg: ?[*:0]u8,
};
-extern fn zig_compile_air([*:0]const u8) ZigAirResult;
-extern fn zig_air_result_free(*ZigAirResult) void;
+extern fn zig_compare_air([*:0]const u8, ?*const anyopaque, u32) AirCompareResult;
+extern fn zig_compare_result_free(*AirCompareResult) void;
// Helper to convert C #define integer constants (c_int) to u32 for comparison
// with uint32_t fields (InternPoolIndex, etc.).
@@ -241,27 +231,12 @@ test "sema: function decl smoke test" {
// Air raw comparison: C vs Zig memcmp
// ---------------------------------------------------------------------------
-fn compareAir(c_air: *const c.Air, zig: *const ZigFuncAir) !void {
- try std.testing.expectEqual(zig.inst_len, c_air.inst_len);
-
- // Tags
- const n = c_air.inst_len;
- try std.testing.expectEqualSlices(u8, zig.tags.?[0..n], @as([*]const u8, @ptrCast(c_air.inst_tags))[0..n]);
-
- // Data (8 bytes per instruction)
- try std.testing.expectEqualSlices(u8, zig.datas.?[0 .. n * 8], @as([*]const u8, @ptrCast(c_air.inst_datas))[0 .. n * 8]);
-
- // Extra
- try std.testing.expectEqual(zig.extra_len, c_air.extra_len);
- try std.testing.expectEqualSlices(u32, zig.extra.?[0..zig.extra_len], c_air.extra[0..c_air.extra_len]);
-}
-
fn semaAirRawCheck(source: [:0]const u8) !void {
// C pipeline: parse -> astgen -> sema
var result = try semaCheck(source);
defer result.deinit();
- // Zig pipeline: write source to temp file, compile, get raw Air arrays
+ // Zig pipeline: write source to temp file, compile, compare in-place
const tmp_path = "/tmp/zig0_sema_test_tmp.zig";
{
const f = std.fs.cwd().createFile(tmp_path, .{}) catch return error.TmpFileCreate;
@@ -270,37 +245,12 @@ fn semaAirRawCheck(source: [:0]const u8) !void {
}
defer std.fs.cwd().deleteFile(tmp_path) catch {};
- var zig_result = zig_compile_air(tmp_path);
- defer zig_air_result_free(&zig_result);
- if (zig_result.error_msg) |e| {
- std.debug.print("zig_compile_air error: {s}\n", .{std.mem.span(e)});
- return error.ZigCompileFailed;
- }
-
- const c_airs = result.c_func_air_list;
- const zig_funcs = if (zig_result.funcs) |f| f[0..zig_result.func_count] else &[_]ZigFuncAir{};
-
- // Match functions by name
- for (zig_funcs) |*zf| {
- const zig_name = std.mem.span(zf.name.?);
- var found = false;
- const c_items = c_airs.items[0..c_airs.len];
- for (c_items) |*cf| {
- const c_name: []const u8 = std.mem.span(cf.name);
- if (std.mem.eql(u8, zig_name, c_name)) {
- try compareAir(&cf.air, zf);
- found = true;
- break;
- }
- }
- if (!found) {
- std.debug.print("Zig function '{s}' not found in C output\n", .{zig_name});
- return error.FunctionNotFound;
- }
+ var cmp_result = zig_compare_air(tmp_path, @ptrCast(result.c_func_air_list.items), result.c_func_air_list.len);
+ defer zig_compare_result_free(&cmp_result);
+ if (cmp_result.error_msg) |e| {
+ std.debug.print("zig_compare_air error: {s}\n", .{std.mem.span(e)});
+ return error.AirMismatch;
}
-
- // Check that C didn't produce extra functions
- try std.testing.expectEqual(zig_result.func_count, c_airs.len);
}
test "sema: Air raw C vs Zig comparison (empty)" {
diff --git a/stage0/stages_test.zig b/stage0/stages_test.zig
@@ -9,46 +9,25 @@ const sema_test = @import("sema_test.zig");
const c = parser_test.c;
const sc = sema_test.c;
-// Zig-side raw Air types (linked from dumper_obj, not imported).
-const ZigFuncAir = extern struct {
- name: ?[*:0]u8,
- inst_len: u32,
- tags: ?[*]u8,
- datas: ?[*]u8,
- extra_len: u32,
- extra: ?[*]u32,
-};
-const ZigAirResult = extern struct {
- funcs: ?[*]ZigFuncAir,
- func_count: u32,
+const AirCompareResult = extern struct {
+ matched_count: u32,
error_msg: ?[*:0]u8,
};
-extern fn zig_compile_air([*:0]const u8) ZigAirResult;
-extern fn zig_air_result_free(*ZigAirResult) void;
-
-fn compareAir(c_air: *const sc.Air, zig: *const ZigFuncAir) !void {
- try std.testing.expectEqual(zig.inst_len, c_air.inst_len);
-
- const n = c_air.inst_len;
- try std.testing.expectEqualSlices(u8, zig.tags.?[0..n], @as([*]const u8, @ptrCast(c_air.inst_tags))[0..n]);
- try std.testing.expectEqualSlices(u8, zig.datas.?[0 .. n * 8], @as([*]const u8, @ptrCast(c_air.inst_datas))[0 .. n * 8]);
-
- try std.testing.expectEqual(zig.extra_len, c_air.extra_len);
- try std.testing.expectEqualSlices(u32, zig.extra.?[0..zig.extra_len], c_air.extra[0..c_air.extra_len]);
-}
+extern fn zig_compare_air([*:0]const u8, ?*const anyopaque, u32) AirCompareResult;
+extern fn zig_compare_result_free(*AirCompareResult) void;
test "stages: corpus" {
@setEvalBranchQuota(corpus_files.len * 2);
const gpa = std.testing.allocator;
inline for (corpus_files) |path| {
- stagesCheck(gpa, @embedFile(path), path["../".len..]) catch {
+ stagesCheck(gpa, @embedFile(path)) catch {
std.debug.print("FAIL: {s}\n", .{path});
return error.TestFailed;
};
}
}
-fn stagesCheck(gpa: Allocator, source: [:0]const u8, src_path: [:0]const u8) !void {
+fn stagesCheck(gpa: Allocator, source: [:0]const u8) !void {
// Parse once with C parser
var c_ast = c.astParse(source.ptr, @intCast(source.len));
defer c.astDeinit(&c_ast);
@@ -92,36 +71,22 @@ fn stagesCheck(gpa: Allocator, source: [:0]const u8, src_path: [:0]const u8) !vo
var c_func_air_list = sc.semaAnalyze(&c_sema);
defer sc.semaFuncAirListDeinit(&c_func_air_list);
- var zig_result = zig_compile_air(src_path.ptr);
- defer zig_air_result_free(&zig_result);
- if (zig_result.error_msg) |e| {
- std.debug.print("zig_compile_air error: {s}\n", .{std.mem.span(e)});
- return error.ZigCompileFailed;
+ // Write source to a temp file to avoid module path conflicts
+ // (source files inside lib/std/ conflict with the 'std' module).
+ const tmp_path = "/tmp/zig0_stages_test_tmp.zig";
+ {
+ const f = std.fs.cwd().createFile(tmp_path, .{}) catch return error.TmpFileCreate;
+ defer f.close();
+ f.writeAll(source) catch return error.TmpFileWrite;
}
+ defer std.fs.cwd().deleteFile(tmp_path) catch {};
- const zig_funcs = if (zig_result.funcs) |f| f[0..zig_result.func_count] else &[_]ZigFuncAir{};
- const c_items = c_func_air_list.items[0..c_func_air_list.len];
-
- // Match functions by name
- for (zig_funcs) |*zf| {
- const zig_name = std.mem.span(zf.name.?);
- var found = false;
- for (c_items) |*cf| {
- const c_name: []const u8 = std.mem.span(cf.name);
- if (std.mem.eql(u8, zig_name, c_name)) {
- try compareAir(&cf.air, zf);
- found = true;
- break;
- }
- }
- if (!found) {
- std.debug.print("Zig function '{s}' not found in C output\n", .{zig_name});
- return error.FunctionNotFound;
- }
+ var cmp_result = zig_compare_air(tmp_path, @ptrCast(c_func_air_list.items), c_func_air_list.len);
+ defer zig_compare_result_free(&cmp_result);
+ if (cmp_result.error_msg) |e| {
+ std.debug.print("zig_compare_air error: {s}\n", .{std.mem.span(e)});
+ return error.AirMismatch;
}
-
- // Check that C didn't produce extra functions
- try std.testing.expectEqual(zig_result.func_count, c_func_air_list.len);
}
}