From bdf753eaf8dadabd68e06af0247469bbf92cb9fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Thu, 19 Feb 2026 21:58:02 +0000 Subject: [PATCH] Move Air comparison from src/ to stage0/ src/verbose_air.zig now only collects Air data (zig_compile_air) instead of comparing it (zig_compare_air). The comparison logic lives in stage0/sema_test.zig, keeping testing infrastructure in stage0/. Co-Authored-By: Claude Opus 4.6 --- src/verbose_air.zig | 222 ++++++++++++++++++----------------------- stage0/dump.h | 14 +-- stage0/sema_test.zig | 144 ++++++++++++++++++++++---- stage0/stages_test.zig | 18 +--- 4 files changed, 231 insertions(+), 167 deletions(-) diff --git a/src/verbose_air.zig b/src/verbose_air.zig index c0b540443b..590065edb5 100644 --- a/src/verbose_air.zig +++ b/src/verbose_air.zig @@ -1,6 +1,6 @@ -// verbose_air.zig — Zig-side zero-copy Air comparer. -// Compiles source via the Zig compiler pipeline and compares Air arrays -// in-place against C-produced arrays (passed in via CSemaFuncAir). +// verbose_air.zig — Zig-side Air collector. +// Compiles source via the Zig compiler pipeline and collects Air arrays +// into C-compatible structs for comparison by stage0 tests. // Exports C-compatible functions for use by stage0 tests. const std = @import("std"); @@ -14,164 +14,129 @@ const Air = zig_internals.Air; const CAir = extern struct { inst_len: u32, inst_cap: u32, - inst_tags: ?[*]const u8, - inst_datas: ?[*]const u8, // 8 bytes per inst + inst_tags: ?[*]u8, + inst_datas: ?[*]u8, // 8 bytes per inst extra_len: u32, extra_cap: u32, - extra: ?[*]const u32, + extra: ?[*]u32, }; /// Matches C `SemaFuncAir` struct layout (sema.h). const CSemaFuncAir = extern struct { - name: ?[*:0]const u8, + name: ?[*:0]u8, air: CAir, }; -const CompareResult = extern struct { - matched_count: u32, +const CompileAirResult = extern struct { + items: ?[*]CSemaFuncAir, + len: u32, error_msg: ?[*:0]u8, // NULL on success, caller frees }; -const AirComparer = struct { - c_funcs: ?[*]const CSemaFuncAir, - c_func_count: u32, - matched: u32 = 0, +const AirCollector = struct { + funcs: std.ArrayListUnmanaged(CSemaFuncAir) = .empty, first_error: ?[*:0]u8 = null, - fn compareFunc(ctx: *anyopaque, name: []const u8, air: *const Air) void { - const self: *AirComparer = @ptrCast(@alignCast(ctx)); - self.compareFuncInner(name, air) catch {}; + fn collectFunc(ctx: *anyopaque, name: []const u8, air: *const Air) void { + const self: *AirCollector = @ptrCast(@alignCast(ctx)); + self.collectFuncInner(name, air) catch {}; } - fn compareFuncInner(self: *AirComparer, name: []const u8, air: *const Air) !void { - // Already have an error, skip further comparisons. + fn collectFuncInner(self: *AirCollector, name: []const u8, air: *const Air) !void { if (self.first_error != null) return; - // Find matching C function by name. - const c_items = if (self.c_funcs) |f| f[0..self.c_func_count] else { - self.setError("Zig function '{s}' not found in C output", .{name}); - return; - }; - for (c_items) |*cf| { - const c_name = std.mem.span(cf.name orelse continue); - if (std.mem.eql(u8, name, c_name)) { - self.compareAir(name, &cf.air, air); - return; - } - } - - // Function not found in C output. - self.setError("Zig function '{s}' not found in C output", .{name}); - } - - fn compareAir(self: *AirComparer, name: []const u8, c_air: *const CAir, air: *const Air) void { + const gpa = std.heap.c_allocator; const inst_len: u32 = @intCast(air.instructions.len); - if (inst_len != c_air.inst_len) { - self.setError("'{s}': inst_len mismatch: zig={d} c={d}", .{ name, inst_len, c_air.inst_len }); - return; - } - // Tags + // Copy tags const zig_tags = air.instructions.items(.tag); - const zig_tags_bytes = @as([*]const u8, @ptrCast(zig_tags.ptr))[0..inst_len]; - const c_tags = (c_air.inst_tags orelse { - if (inst_len > 0) { - self.setError("'{s}': C inst_tags is null but inst_len={d}", .{ name, inst_len }); - } - return; - })[0..inst_len]; - if (!std.mem.eql(u8, zig_tags_bytes, c_tags)) { - self.setError("'{s}': tags mismatch (inst_len={d})", .{ name, inst_len }); - return; - } + const tags_copy: ?[*]u8 = if (inst_len > 0) blk: { + const src = @as([*]const u8, @ptrCast(zig_tags.ptr))[0..inst_len]; + const dst = gpa.alloc(u8, inst_len) catch return; + @memcpy(dst, src); + break :blk dst.ptr; + } else null; - // Datas (8 bytes per instruction) - const zig_datas = air.instructions.items(.data); - const c_datas = (c_air.inst_datas orelse { - if (inst_len > 0) { - self.setError("'{s}': C inst_datas is null but inst_len={d}", .{ name, inst_len }); - } - return; - })[0 .. inst_len * 8]; - if (@sizeOf(Air.Inst.Data) == 8) { - const zig_datas_bytes = @as([*]const u8, @ptrCast(zig_datas.ptr))[0 .. inst_len * 8]; - if (!std.mem.eql(u8, zig_datas_bytes, c_datas)) { - self.setError("'{s}': datas mismatch (inst_len={d})", .{ name, inst_len }); - return; - } - } else { - // Safety build: @sizeOf(Data) may be > 8, compare first 8 bytes per element - for (zig_datas, 0..) |*d, i| { - const zig_bytes = @as(*const [8]u8, @ptrCast(d)); - if (!std.mem.eql(u8, zig_bytes, c_datas[i * 8 ..][0..8])) { - self.setError("'{s}': datas mismatch at inst {d}", .{ name, i }); - return; + // Copy datas (8 bytes per instruction) + const datas_byte_len = inst_len * 8; + const datas_copy: ?[*]u8 = if (inst_len > 0) blk: { + const dst = gpa.alloc(u8, datas_byte_len) catch return; + const zig_datas = air.instructions.items(.data); + if (@sizeOf(Air.Inst.Data) == 8) { + const src = @as([*]const u8, @ptrCast(zig_datas.ptr))[0..datas_byte_len]; + @memcpy(dst, src); + } else { + // Safety build: @sizeOf(Data) may be > 8, copy first 8 bytes per element + for (zig_datas, 0..) |*d, i| { + const src = @as(*const [8]u8, @ptrCast(d)); + @memcpy(dst[i * 8 ..][0..8], src); } } - } + break :blk dst.ptr; + } else null; - // Extra + // Copy extra const extra_len: u32 = @intCast(air.extra.items.len); - if (extra_len != c_air.extra_len) { - self.setError("'{s}': extra_len mismatch: zig={d} c={d}", .{ name, extra_len, c_air.extra_len }); - return; - } - if (extra_len > 0) { - const c_extra = (c_air.extra orelse { - self.setError("'{s}': C extra is null but extra_len={d}", .{ name, extra_len }); - return; - })[0..extra_len]; - if (!std.mem.eql(u32, air.extra.items, c_extra)) { - self.setError("'{s}': extra mismatch (extra_len={d})", .{ name, extra_len }); - return; - } - } + const extra_copy: ?[*]u32 = if (extra_len > 0) blk: { + const dst = gpa.alloc(u32, extra_len) catch return; + @memcpy(dst, air.extra.items); + break :blk dst.ptr; + } else null; - self.matched += 1; - } + // Copy name + const name_copy = gpa.dupeZ(u8, name) catch return; - fn setError(self: *AirComparer, comptime fmt: []const u8, args: anytype) void { - if (self.first_error != null) return; - const gpa = std.heap.c_allocator; - const msg = std.fmt.allocPrint(gpa, fmt, args) catch return; - const msg_z = gpa.dupeZ(u8, msg) catch return; - gpa.free(msg); - self.first_error = msg_z.ptr; + self.funcs.append(gpa, .{ + .name = name_copy.ptr, + .air = .{ + .inst_len = inst_len, + .inst_cap = inst_len, + .inst_tags = tags_copy, + .inst_datas = datas_copy, + .extra_len = extra_len, + .extra_cap = extra_len, + .extra = extra_copy, + }, + }) catch return; } }; -export fn zig_compare_air( +export fn zig_compile_air( src_path_ptr: [*:0]const u8, module_root_ptr: ?[*:0]const u8, - c_funcs_raw: ?*const anyopaque, - c_func_count: u32, -) CompareResult { - return zigCompareAirImpl( +) CompileAirResult { + return zigCompileAirImpl( std.mem.span(src_path_ptr), if (module_root_ptr) |p| std.mem.span(p) else null, - @ptrCast(@alignCast(c_funcs_raw)), - c_func_count, ) catch |err| { return errResult(@errorName(err)); }; } -export fn zig_compare_result_free(result: *CompareResult) void { - if (result.error_msg) |e| std.heap.c_allocator.free(std.mem.span(e)); +export fn zig_compile_air_free(result: *CompileAirResult) void { + const gpa = std.heap.c_allocator; + if (result.error_msg) |e| gpa.free(std.mem.span(e)); + if (result.items) |items| { + for (items[0..result.len]) |*f| { + if (f.name) |n| gpa.free(std.mem.span(n)); + if (f.air.inst_tags) |t| gpa.free(t[0..f.air.inst_len]); + if (f.air.inst_datas) |d| gpa.free(d[0 .. f.air.inst_len * 8]); + if (f.air.extra) |e| gpa.free(e[0..f.air.extra_len]); + } + gpa.free(items[0..result.len]); + } } -fn errResult(msg: []const u8) CompareResult { +fn errResult(msg: []const u8) CompileAirResult { const duped = std.heap.c_allocator.dupeZ(u8, msg) catch - return .{ .matched_count = 0, .error_msg = null }; - return .{ .matched_count = 0, .error_msg = duped.ptr }; + return .{ .items = null, .len = 0, .error_msg = null }; + return .{ .items = null, .len = 0, .error_msg = duped.ptr }; } -fn zigCompareAirImpl( +fn zigCompileAirImpl( src_path: []const u8, module_root_opt: ?[]const u8, - c_funcs: ?[*]const CSemaFuncAir, - c_func_count: u32, -) !CompareResult { +) !CompileAirResult { const gpa = std.heap.c_allocator; var arena_state = std.heap.ArenaAllocator.init(gpa); @@ -253,10 +218,7 @@ fn zigCompareAirImpl( gpa.destroy(thread_pool); } - var comparer: AirComparer = .{ - .c_funcs = c_funcs, - .c_func_count = c_func_count, - }; + var collector: AirCollector = .{}; var create_diag: Compilation.CreateDiagnostic = undefined; const comp = Compilation.create(gpa, arena, &create_diag, .{ @@ -268,8 +230,8 @@ fn zigCompareAirImpl( .thread_pool = thread_pool, .cache_mode = .whole, .verbose_air_callback = .{ - .context = @ptrCast(&comparer), - .call = @ptrCast(&AirComparer.compareFunc), + .context = @ptrCast(&collector), + .call = @ptrCast(&AirCollector.collectFunc), }, }) catch |err| switch (err) { error.CreateFail => { @@ -290,8 +252,20 @@ fn zigCompareAirImpl( return errResult(buf.written()); } - return .{ - .matched_count = comparer.matched, - .error_msg = comparer.first_error, - }; + if (collector.first_error) |e| { + return .{ .items = null, .len = 0, .error_msg = e }; + } + + const items = collector.funcs.items; + const len: u32 = @intCast(collector.funcs.items.len); + // Transfer ownership: caller frees via zig_compile_air_free. + // The ArrayListUnmanaged allocatedSlice includes capacity, but we only + // expose items[0..len]. Free the excess capacity now. + if (collector.funcs.capacity > len) { + // Shrink to exact size so free works with items[0..len]. + const exact = gpa.realloc(items[0..collector.funcs.capacity], len) catch + items[0..collector.funcs.capacity]; // keep original on realloc failure + return .{ .items = exact.ptr, .len = len, .error_msg = null }; + } + return .{ .items = items.ptr, .len = len, .error_msg = null }; } diff --git a/stage0/dump.h b/stage0/dump.h index 5ecee38c52..efea53d365 100644 --- a/stage0/dump.h +++ b/stage0/dump.h @@ -1,4 +1,4 @@ -// dump.h — Shared result types for raw Air C/Zig comparison. +// dump.h — Shared result types for Zig Air compilation export. #ifndef _ZIG0_DUMP_H__ #define _ZIG0_DUMP_H__ @@ -6,12 +6,12 @@ #include typedef struct { - uint32_t matched_count; - char* error_msg; // NULL on success, caller frees -} AirCompareResult; + void* items; // SemaFuncAir* (from sema.h), owned by Zig allocator + uint32_t len; + char* error_msg; // NULL on success, owned by Zig allocator +} ZigCompileAirResult; -// c_funcs: pointer to SemaFuncAir array (from sema.h). Passed as void* to avoid header dep. -extern AirCompareResult zig_compare_air(const char* src_path, const char* module_root, const void* c_funcs, uint32_t c_func_count); -extern void zig_compare_result_free(AirCompareResult* result); +extern ZigCompileAirResult zig_compile_air(const char* src_path, const char* module_root); +extern void zig_compile_air_free(ZigCompileAirResult* result); #endif diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig index dd6a23eb28..90c9dd7677 100644 --- a/stage0/sema_test.zig +++ b/stage0/sema_test.zig @@ -8,13 +8,6 @@ pub const c = @cImport({ @cInclude("dump.h"); }); -const AirCompareResult = extern struct { - matched_count: u32, - error_msg: ?[*:0]u8, -}; -extern fn zig_compare_air([*:0]const u8, ?[*:0]const u8, ?*const anyopaque, u32) AirCompareResult; -extern fn zig_compare_result_free(*AirCompareResult) void; - // Helper to convert C #define integer constants (c_int) to u32 for comparison // with uint32_t fields (InternPoolIndex, etc.). fn idx(val: c_int) u32 { @@ -228,15 +221,18 @@ test "sema: function decl smoke test" { } // --------------------------------------------------------------------------- -// Air raw comparison: C vs Zig memcmp +// Air raw comparison: C vs Zig // --------------------------------------------------------------------------- -fn semaAirRawCheck(source: [:0]const u8) !void { - // C pipeline: parse -> astgen -> sema - var result = try semaCheck(source); - defer result.deinit(); +const ZigCompileAirResult = extern struct { + items: ?[*]c.SemaFuncAir, + len: u32, + error_msg: ?[*:0]u8, +}; +extern fn zig_compile_air([*:0]const u8, ?[*:0]const u8) ZigCompileAirResult; +extern fn zig_compile_air_free(*ZigCompileAirResult) void; - // Zig pipeline: write source to temp file, compile, compare in-place +pub fn airCompareFromSource(source: [:0]const u8, c_func_air_list: c.SemaFuncAirList) !void { const tmp_path = "/tmp/zig0_sema_test_tmp.zig"; { const f = std.fs.cwd().createFile(tmp_path, .{}) catch return error.TmpFileCreate; @@ -245,16 +241,126 @@ fn semaAirRawCheck(source: [:0]const u8) !void { } defer std.fs.cwd().deleteFile(tmp_path) catch {}; - var cmp_result = zig_compare_air(tmp_path, null, @ptrCast(result.c_func_air_list.items), result.c_func_air_list.len); - defer zig_compare_result_free(&cmp_result); - if (cmp_result.error_msg) |e| { - std.debug.print("zig_compare_air error: {s}\n", .{std.mem.span(e)}); + return airCompare(tmp_path, null, c_func_air_list); +} + +pub fn airCompare( + src_path: [*:0]const u8, + module_root: ?[*:0]const u8, + c_func_air_list: c.SemaFuncAirList, +) !void { + var zig_result = zig_compile_air(src_path, module_root); + defer zig_compile_air_free(&zig_result); + + if (zig_result.error_msg) |e| { + std.debug.print("zig_compile_air error: {s}\n", .{std.mem.span(e)}); + return error.ZigCompileError; + } + + const zig_funcs = if (zig_result.items) |items| items[0..zig_result.len] else &[_]c.SemaFuncAir{}; + const c_funcs_ptr: ?[*]const c.SemaFuncAir = @ptrCast(c_func_air_list.items); + const c_funcs = if (c_funcs_ptr) |items| items[0..c_func_air_list.len] else &[_]c.SemaFuncAir{}; + + if (zig_funcs.len != c_funcs.len) { + std.debug.print("Air func count mismatch: zig={d}, c={d}\n", .{ zig_funcs.len, c_funcs.len }); return error.AirMismatch; } - if (cmp_result.matched_count != result.c_func_air_list.len) { - std.debug.print("Air func count mismatch: zig matched {d}, c produced {d}\n", .{ cmp_result.matched_count, result.c_func_air_list.len }); + + for (zig_funcs) |*zf| { + const zig_name = if (zf.name) |n| std.mem.span(n) else ""; + const cf = airFindByName(c_funcs, zig_name) orelse { + std.debug.print("Zig function '{s}' not found in C output\n", .{zig_name}); + return error.AirMismatch; + }; + try airCompareOne(zig_name, &zf.air, &cf.air); + } +} + +fn cNameSpan(name: [*c]u8) []const u8 { + const opt: ?[*:0]const u8 = @ptrCast(name); + return if (opt) |n| std.mem.span(n) else ""; +} + +fn airFindByName(funcs: []const c.SemaFuncAir, name: []const u8) ?*const c.SemaFuncAir { + for (funcs) |*f| { + if (std.mem.eql(u8, name, cNameSpan(f.name))) return f; + } + return null; +} + +fn cToOpt(comptime T: type, ptr: [*c]T) ?[*]const T { + return if (ptr == null) null else @ptrCast(ptr); +} + +fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !void { + if (zig_air.inst_len != c_air.inst_len) { + std.debug.print("'{s}': inst_len mismatch: zig={d} c={d}\n", .{ name, zig_air.inst_len, c_air.inst_len }); return error.AirMismatch; } + const inst_len = zig_air.inst_len; + + // Tags + if (inst_len > 0) { + const zig_tags: [*]const u8 = @ptrCast(cToOpt(c.AirInstTag, zig_air.inst_tags) orelse { + std.debug.print("'{s}': Zig inst_tags is null but inst_len={d}\n", .{ name, inst_len }); + return error.AirMismatch; + }); + const c_tags: [*]const u8 = @ptrCast(cToOpt(c.AirInstTag, c_air.inst_tags) orelse { + std.debug.print("'{s}': C inst_tags is null but inst_len={d}\n", .{ name, inst_len }); + return error.AirMismatch; + }); + if (!std.mem.eql(u8, zig_tags[0..inst_len], c_tags[0..inst_len])) { + std.debug.print("'{s}': tags mismatch (inst_len={d})\n", .{ name, inst_len }); + return error.AirMismatch; + } + } + + // Datas (8 bytes per instruction) + if (inst_len > 0) { + const byte_len = inst_len * 8; + const zig_datas: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, zig_air.inst_datas) orelse { + std.debug.print("'{s}': Zig inst_datas is null but inst_len={d}\n", .{ name, inst_len }); + return error.AirMismatch; + }); + const c_datas: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, c_air.inst_datas) orelse { + std.debug.print("'{s}': C inst_datas is null but inst_len={d}\n", .{ name, inst_len }); + return error.AirMismatch; + }); + if (!std.mem.eql(u8, zig_datas[0..byte_len], c_datas[0..byte_len])) { + std.debug.print("'{s}': datas mismatch (inst_len={d})\n", .{ name, inst_len }); + return error.AirMismatch; + } + } + + // Extra + if (zig_air.extra_len != c_air.extra_len) { + std.debug.print("'{s}': extra_len mismatch: zig={d} c={d}\n", .{ name, zig_air.extra_len, c_air.extra_len }); + return error.AirMismatch; + } + const extra_len = zig_air.extra_len; + if (extra_len > 0) { + const zig_extra: [*]const u32 = cToOpt(u32, zig_air.extra) orelse { + std.debug.print("'{s}': Zig extra is null but extra_len={d}\n", .{ name, extra_len }); + return error.AirMismatch; + }; + const c_extra: [*]const u32 = cToOpt(u32, c_air.extra) orelse { + std.debug.print("'{s}': C extra is null but extra_len={d}\n", .{ name, extra_len }); + return error.AirMismatch; + }; + if (!std.mem.eql(u32, zig_extra[0..extra_len], c_extra[0..extra_len])) { + std.debug.print("'{s}': extra mismatch (extra_len={d})\n", .{ name, extra_len }); + return error.AirMismatch; + } + } +} + +fn semaAirRawCheck(source: [:0]const u8) !void { + // C pipeline: parse -> astgen -> sema + var result = try semaCheck(source); + defer result.deinit(); + + // Zig pipeline: compile source and compare Air arrays + try airCompareFromSource(source, result.c_func_air_list); } test "sema: Air raw C vs Zig comparison (empty)" { diff --git a/stage0/stages_test.zig b/stage0/stages_test.zig index b3029295ff..2c436ce270 100644 --- a/stage0/stages_test.zig +++ b/stage0/stages_test.zig @@ -9,13 +9,6 @@ const sema_test = @import("sema_test.zig"); const c = parser_test.c; const sc = sema_test.c; -const AirCompareResult = extern struct { - matched_count: u32, - error_msg: ?[*:0]u8, -}; -extern fn zig_compare_air([*:0]const u8, ?[*:0]const u8, ?*const anyopaque, u32) AirCompareResult; -extern fn zig_compare_result_free(*AirCompareResult) void; - test "stages: corpus" { @setEvalBranchQuota(corpus_files.len * 2); const gpa = std.testing.allocator; @@ -93,16 +86,7 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8) const test_src: [:0]const u8 = symlink_path ++ "/" ++ repo_relative; const module_root: [:0]const u8 = symlink_path; - var cmp_result = zig_compare_air(test_src.ptr, module_root.ptr, @ptrCast(c_func_air_list.items), c_func_air_list.len); - defer zig_compare_result_free(&cmp_result); - if (cmp_result.error_msg) |e| { - std.debug.print("zig_compare_air error: {s}\n", .{std.mem.span(e)}); - return error.AirMismatch; - } - if (cmp_result.matched_count != c_func_air_list.len) { - std.debug.print("Air func count mismatch: zig matched {d}, c produced {d}\n", .{ cmp_result.matched_count, c_func_air_list.len }); - return error.AirMismatch; - } + try sema_test.airCompare(test_src.ptr, module_root.ptr, c_func_air_list); } }