commit 00a6cb4fd4ab1cee62eb29cdb8745acfbe05127d (tree)
parent 0d15ca3dd434fa4b84e827c0a7aa82e5bb3d62cf
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Thu, 19 Feb 2026 14:48:40 +0000
Replace structural Air/IP comparison with text-based dumpers
Remove all @import("zig_internals") from stage0/ so that test_obj
compilation is independent of the Zig compiler (~6min). The sema
comparison now uses text-based dumpers:
- Zig side (src/verbose_air.zig): compiles source through the full Zig
pipeline, captures verbose_air output, exports zig_dump_air() as a C
function. Compiled as a separate dumper_obj that is cached
independently.
- C side (stage0/verbose_air.c): formats C Air structs to text in the
same format as Zig's Air/print.zig.
Changing stage0 code no longer triggers Zig compiler recompilation:
C compile + cached test_obj + cached dumper + link = seconds.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
12 files changed, 773 insertions(+), 666 deletions(-)
diff --git a/build.zig b/build.zig
@@ -10,8 +10,8 @@ const assert = std.debug.assert;
const DevEnv = @import("src/dev.zig").Env;
const ValueInterpretMode = enum { direct, by_name };
-const zig0_headers = &[_][]const u8{ "common.h", "ast.h", "parser.h", "zir.h", "astgen.h", "intern_pool.h", "air.h", "type.h", "value.h", "sema.h" };
-const zig0_c_lib_files = &[_][]const u8{ "tokenizer.c", "ast.c", "zig0.c", "parser.c", "zir.c", "astgen.c", "intern_pool.c", "air.c", "type.c", "value.c", "sema.c" };
+const zig0_headers = &[_][]const u8{ "common.h", "ast.h", "parser.h", "zir.h", "astgen.h", "intern_pool.h", "air.h", "type.h", "value.h", "sema.h", "dump.h", "verbose_air.h", "verbose_intern_pool.h" };
+const zig0_c_lib_files = &[_][]const u8{ "tokenizer.c", "ast.c", "zig0.c", "parser.c", "zir.c", "astgen.c", "intern_pool.c", "air.c", "type.c", "value.c", "sema.c", "verbose_air.c", "verbose_intern_pool.c" };
const zig0_all_c_files = zig0_c_lib_files ++ &[_][]const u8{"main.c"};
const zig0_cflags = &[_][]const u8{
"-std=c11",
@@ -1618,6 +1618,7 @@ fn addZig0TestStep(
exe_options: *std.Build.Step.Options,
) void {
// Step 1: Compile Zig test code to .o (cached independently of C objects).
+ // NOTE: test_mod does NOT import zig_internals — stage0 tests are fast.
const test_mod = b.createModule(.{
.root_source_file = b.path("stage0_test_root.zig"),
.optimize = optimize,
@@ -1626,7 +1627,14 @@ fn addZig0TestStep(
test_mod.addIncludePath(b.path("stage0"));
test_mod.linkSystemLibrary("c", .{});
- // Re-export module rooted in src/ (can resolve compiler-internal imports)
+ const test_obj = b.addTest(.{
+ .root_module = test_mod,
+ .emit_object = true,
+ .use_llvm = false,
+ .use_lld = false,
+ });
+
+ // Step 1b: Compile Zig dumper module (depends on zig_internals, cached separately).
const zig_internals_mod = b.createModule(.{
.root_source_file = b.path("src/test_exports.zig"),
});
@@ -1640,21 +1648,27 @@ fn addZig0TestStep(
zig_internals_mod.addImport("aro", aro_mod);
zig_internals_mod.addImport("aro_translate_c", aro_translate_c_mod);
zig_internals_mod.addOptions("build_options", exe_options);
- test_mod.addImport("zig_internals", zig_internals_mod);
- const test_obj = b.addTest(.{
- .root_module = test_mod,
- .emit_object = true,
- .use_llvm = false,
- .use_lld = false,
+ const dumper_mod = b.createModule(.{
+ .root_source_file = b.path("src/verbose_air.zig"),
+ .target = target,
+ .optimize = optimize,
+ });
+ dumper_mod.addImport("zig_internals", zig_internals_mod);
+ dumper_mod.linkSystemLibrary("c", .{});
+
+ const dumper_obj = b.addObject(.{
+ .name = "verbose_dumper",
+ .root_module = dumper_mod,
});
- // Step 2: Link test_obj + C objects into final executable.
+ // Step 2: Link test_obj + dumper_obj + C objects into final executable.
const link_mod = b.createModule(.{
.target = target,
.optimize = optimize,
});
link_mod.addObject(test_obj);
+ link_mod.addObject(dumper_obj);
addZig0CSources(b, link_mod, cc, optimize);
link_mod.linkSystemLibrary("c", .{});
diff --git a/src/verbose_air.zig b/src/verbose_air.zig
@@ -0,0 +1,174 @@
+// verbose_air.zig — Zig-side dumper for Air text output.
+// Compiles source via the Zig compiler pipeline and captures verbose_air output.
+// Exports C-compatible functions for use by stage0 tests.
+
+const std = @import("std");
+const zig_internals = @import("zig_internals");
+const Compilation = zig_internals.Compilation;
+const Package = zig_internals.Package;
+
+comptime {
+ _ = @import("verbose_intern_pool.zig");
+}
+
+const DumpResult = extern struct {
+ text: ?[*:0]u8,
+ error_msg: ?[*:0]u8,
+};
+
+/// Compile the source file at `src_path` (relative to cwd) through the Zig
+/// pipeline and return the verbose_air text output.
+/// func_filter: NULL=all functions, "foo"=only functions containing 'foo'.
+export fn zig_dump_air(
+ src_path_ptr: [*:0]const u8,
+ func_filter: ?[*:0]const u8,
+) DumpResult {
+ return zigDumpAirImpl(std.mem.span(src_path_ptr), func_filter) catch |err| {
+ return errResult(@errorName(err));
+ };
+}
+
+fn errResult(msg: []const u8) DumpResult {
+ const duped = std.heap.c_allocator.dupeZ(u8, msg) catch
+ return .{ .text = null, .error_msg = null };
+ return .{ .text = null, .error_msg = duped.ptr };
+}
+
+fn zigDumpAirImpl(src_path: []const u8, func_filter: ?[*:0]const u8) !DumpResult {
+ const gpa = std.heap.c_allocator;
+
+ var arena_state = std.heap.ArenaAllocator.init(gpa);
+ defer arena_state.deinit();
+ const arena = arena_state.allocator();
+
+ var dirs: Compilation.Directories = .init(
+ arena,
+ "lib/",
+ null,
+ .search,
+ {},
+ "",
+ );
+ defer dirs.deinit();
+
+ // Hardcode x86_64-linux-musl target.
+ const resolved_target: Package.Module.ResolvedTarget = .{
+ .result = try std.zig.system.resolveTargetQuery(.{
+ .cpu_arch = .x86_64,
+ .os_tag = .linux,
+ .abi = .musl,
+ }),
+ .is_native_os = false,
+ .is_native_abi = false,
+ .is_explicit_dynamic_linker = false,
+ };
+
+ const config = try Compilation.Config.resolve(.{
+ .output_mode = .Obj,
+ .resolved_target = resolved_target,
+ .have_zcu = true,
+ .emit_bin = false,
+ .is_test = false,
+ });
+
+ // Split src_path into directory and filename for the Module.
+ // Use .root = .none with absolute path to avoid the source being
+ // associated with zig_lib (which would conflict with the std module
+ // when compiling files under lib/).
+ const src_dir = std.fs.path.dirname(src_path) orelse ".";
+ const src_basename = std.fs.path.basename(src_path);
+ const abs_src_dir = try std.fs.cwd().realpathAlloc(arena, src_dir);
+ const root_path: Compilation.Path = .{ .root = .none, .sub_path = abs_src_dir };
+
+ const root_mod = try Package.Module.create(arena, .{
+ .paths = .{
+ .root = root_path,
+ .root_src_path = src_basename,
+ },
+ .fully_qualified_name = "root",
+ .cc_argv = &.{},
+ .inherited = .{
+ .resolved_target = resolved_target,
+ },
+ .global = config,
+ .parent = null,
+ });
+
+ // Heap-allocate the thread pool so its address stays stable.
+ const thread_pool = try gpa.create(std.Thread.Pool);
+ thread_pool.* = undefined;
+ try thread_pool.init(.{
+ .allocator = gpa,
+ .n_jobs = 1,
+ .track_ids = true,
+ .stack_size = 60 << 20,
+ });
+ defer {
+ thread_pool.deinit();
+ gpa.destroy(thread_pool);
+ }
+
+ var create_diag: Compilation.CreateDiagnostic = undefined;
+ const comp = Compilation.create(gpa, arena, &create_diag, .{
+ .dirs = dirs,
+ .root_name = "test",
+ .config = config,
+ .root_mod = root_mod,
+ .emit_bin = .no,
+ .thread_pool = thread_pool,
+ .cache_mode = .whole,
+ .verbose_air = true,
+ }) catch |err| switch (err) {
+ error.CreateFail => {
+ return errResult("Compilation.create failed");
+ },
+ else => return err,
+ };
+ defer comp.destroy();
+
+ // Capture per-function Air text via the verbose_air_output mechanism.
+ var air_output: std.io.Writer.Allocating = .init(gpa);
+ defer air_output.deinit();
+ comp.verbose_air_output = &air_output.writer;
+
+ try comp.update(std.Progress.Node.none);
+
+ var error_bundle = try comp.getAllErrorsAlloc();
+ defer error_bundle.deinit(gpa);
+ if (error_bundle.errorMessageCount() > 0) {
+ return errResult("compilation produced errors");
+ }
+
+ const full_text = air_output.written();
+
+ // Filter by function name if specified.
+ if (func_filter) |filter_ptr| {
+ const filter_str = std.mem.span(filter_ptr);
+ var filtered: std.ArrayListUnmanaged(u8) = .empty;
+ defer filtered.deinit(gpa);
+
+ const begin_marker = "# Begin Function AIR: ";
+ const end_marker = "# End Function AIR: ";
+ var pos: usize = 0;
+ while (std.mem.indexOfPos(u8, full_text, pos, begin_marker)) |begin| {
+ const fqn_start = begin + begin_marker.len;
+ const fqn_end = std.mem.indexOfPos(u8, full_text, fqn_start, ":\n") orelse break;
+ const fqn = full_text[fqn_start..fqn_end];
+
+ const end_search = std.mem.indexOfPos(u8, full_text, fqn_end, end_marker) orelse break;
+ const line_end = std.mem.indexOfPos(u8, full_text, end_search, "\n\n") orelse full_text.len;
+ const section_end = @min(line_end + 2, full_text.len);
+
+ if (std.mem.indexOf(u8, fqn, filter_str) != null) {
+ try filtered.appendSlice(gpa, full_text[begin..section_end]);
+ }
+ pos = section_end;
+ }
+
+ const result = try gpa.dupeZ(u8, filtered.items);
+ return .{ .text = result.ptr, .error_msg = null };
+ } else {
+ const result = try gpa.dupeZ(u8, full_text);
+ return .{ .text = result.ptr, .error_msg = null };
+ }
+}
diff --git a/src/verbose_intern_pool.zig b/src/verbose_intern_pool.zig
@@ -0,0 +1,23 @@
+// verbose_intern_pool.zig — Zig-side dumper for InternPool text output.
+// Compiles source via the Zig compiler pipeline and dumps the InternPool.
+// Exports a C-compatible function for use by stage0 tests.
+
+const std = @import("std");
+
+const DumpResult = extern struct {
+ text: ?[*:0]u8,
+ error_msg: ?[*:0]u8,
+};
+
+export fn zig_dump_intern_pool(
+ source_ptr: [*]const u8,
+ source_len: usize,
+) DumpResult {
+ // Stub: not yet implemented.
+ _ = source_ptr;
+ _ = source_len;
+ const gpa = std.heap.c_allocator;
+ const result = gpa.dupeZ(u8, "") catch
+ return .{ .text = null, .error_msg = null };
+ return .{ .text = result.ptr, .error_msg = null };
+}
diff --git a/stage0/dump.h b/stage0/dump.h
@@ -0,0 +1,18 @@
+// dump.h — Shared result type for text-based C/Zig comparison dumpers.
+#ifndef _ZIG0_DUMP_H__
+#define _ZIG0_DUMP_H__
+
+#include <stddef.h>
+
+typedef struct {
+ char* text; // Heap-allocated, NULL on error. Caller frees.
+ char* error_msg; // Heap-allocated, NULL on success. Caller frees.
+} DumpResult;
+
+// Zig side: compile source file at src_path, run full Zig pipeline, dump text.
+// src_path: file path relative to cwd.
+// func_filter: NULL=all functions, "foo"=only functions containing 'foo'.
+extern DumpResult zig_dump_air(const char* src_path, const char* func_filter);
+extern DumpResult zig_dump_intern_pool(const char* source, size_t len);
+
+#endif
diff --git a/stage0/sema.zig b/stage0/sema.zig
@@ -1,151 +0,0 @@
-// sema.zig — Run the real Zig sema pipeline via Compilation.
-// Used by stages_test.zig to produce reference sema output.
-
-const std = @import("std");
-const Allocator = std.mem.Allocator;
-const zig_internals = @import("zig_internals");
-const Compilation = zig_internals.Compilation;
-const Package = zig_internals.Package;
-
-/// Result of running the real Zig sema pipeline via Compilation.
-/// Owns the Compilation, Directories, thread pool, arena, and captured Air text.
-pub const ZigSemaResult = struct {
- comp: *Compilation,
- dirs: Compilation.Directories,
- arena_state: std.heap.ArenaAllocator,
- thread_pool: *std.Thread.Pool,
- air_output: std.io.Writer.Allocating,
- gpa: Allocator,
-
- pub fn deinit(self: *ZigSemaResult) void {
- self.comp.destroy();
- self.dirs.deinit();
- self.thread_pool.deinit();
- self.gpa.destroy(self.thread_pool);
- self.air_output.deinit();
- self.arena_state.deinit();
- }
-};
-
-/// Run the real Zig sema pipeline on the source file at `src_path`.
-/// `src_path` is relative to cwd (the repo root), e.g. "lib/std/crypto/codecs.zig".
-pub fn zigSema(gpa: Allocator, src_path: []const u8) !ZigSemaResult {
- var arena_state = std.heap.ArenaAllocator.init(gpa);
- errdefer arena_state.deinit();
- const arena = arena_state.allocator();
-
- // Use the real Zig cache directories: ~/.cache/zig (global) and .zig-cache (local).
- // Point zig_lib at zig-out/lib/zig/ (the installed copy) rather than lib/ (the source
- // tree) to avoid "file exists in modules 'root' and 'std'" when compiling source files
- // that live under lib/.
- var dirs: Compilation.Directories = .init(
- arena,
- "zig-out/lib/zig",
- null,
- .search,
- {},
- "",
- );
- errdefer dirs.deinit();
-
- // Hardcode x86_64-linux-musl target.
- const resolved_target: Package.Module.ResolvedTarget = .{
- .result = try std.zig.system.resolveTargetQuery(.{
- .cpu_arch = .x86_64,
- .os_tag = .linux,
- .abi = .musl,
- }),
- .is_native_os = false,
- .is_native_abi = false,
- .is_explicit_dynamic_linker = false,
- };
-
- const config = try Compilation.Config.resolve(.{
- .output_mode = .Obj,
- .resolved_target = resolved_target,
- .have_zcu = true,
- .emit_bin = false,
- .is_test = false,
- });
-
- // Split src_path into directory and filename for the Module.
- // Use .root = .none with absolute path to avoid the source being
- // associated with zig_lib (which would conflict with the std module
- // when compiling files under lib/).
- const src_dir = std.fs.path.dirname(src_path) orelse ".";
- const src_basename = std.fs.path.basename(src_path);
- const abs_src_dir = try std.fs.cwd().realpathAlloc(arena, src_dir);
- const root_path: Compilation.Path = .{ .root = .none, .sub_path = abs_src_dir };
-
- const root_mod = try Package.Module.create(arena, .{
- .paths = .{
- .root = root_path,
- .root_src_path = src_basename,
- },
- .fully_qualified_name = "root",
- .cc_argv = &.{},
- .inherited = .{
- .resolved_target = resolved_target,
- },
- .global = config,
- .parent = null,
- });
-
- // Heap-allocate the thread pool so its address stays stable after zigSema
- // returns. The worker threads hold references to the Pool's internal
- // condvar/mutex; a by-value copy would leave them waiting on a stale address.
- const thread_pool = try gpa.create(std.Thread.Pool);
- thread_pool.* = undefined;
- try thread_pool.init(.{
- .allocator = gpa,
- .n_jobs = 1,
- .track_ids = true,
- .stack_size = 60 << 20,
- });
- errdefer {
- thread_pool.deinit();
- gpa.destroy(thread_pool);
- }
-
- var create_diag: Compilation.CreateDiagnostic = undefined;
- const comp = Compilation.create(gpa, arena, &create_diag, .{
- .dirs = dirs,
- .root_name = "test",
- .config = config,
- .root_mod = root_mod,
- .emit_bin = .no,
- .thread_pool = thread_pool,
- .cache_mode = .whole,
- .verbose_air = true,
- }) catch |err| switch (err) {
- error.CreateFail => {
- std.debug.print("Compilation.create failed: {any}\n", .{create_diag});
- return error.ZigSemaFailed;
- },
- else => return err,
- };
- errdefer comp.destroy();
-
- // Capture per-function Air text via the verbose_air_output mechanism.
- var air_output: std.io.Writer.Allocating = .init(gpa);
- errdefer air_output.deinit();
- comp.verbose_air_output = &air_output.writer;
-
- try comp.update(std.Progress.Node.none);
-
- var error_bundle = try comp.getAllErrorsAlloc();
- defer error_bundle.deinit(gpa);
- if (error_bundle.errorMessageCount() > 0) {
- error_bundle.renderToStdErr(.{ .ttyconf = .no_color });
- return error.ZigSemaFailed;
- }
-
- return .{
- .comp = comp,
- .dirs = dirs,
- .arena_state = arena_state,
- .thread_pool = thread_pool,
- .air_output = air_output,
- .gpa = gpa,
- };
-}
diff --git a/stage0/sema_c.zig b/stage0/sema_c.zig
@@ -1,196 +0,0 @@
-// sema_c.zig — Convert C Sema output (Air + InternPool) to Zig Air format.
-// Ported mechanically from C structures defined in air.h / sema.h.
-
-const std = @import("std");
-const Allocator = std.mem.Allocator;
-const Air = @import("zig_internals").Air;
-
-const sema_test = @import("sema_test.zig");
-pub const c = sema_test.c;
-
-pub const FuncAir = struct {
- name: []const u8,
- owned_air: OwnedAir,
-};
-
-/// Result of running C sema on a ZIR and converting to Zig Air.
-/// Owns the C InternPool, C Sema, per-function Airs, and the converted Zig Airs.
-/// Call deinit() to free everything.
-pub const SemaResult = struct {
- func_airs: []FuncAir,
- c_func_air_list: c.SemaFuncAirList,
- c_ip: c.InternPool,
- c_sema: c.Sema,
-
- pub fn deinit(self: *SemaResult, gpa: Allocator) void {
- for (self.func_airs) |*fa| {
- fa.owned_air.deinit(gpa);
- }
- gpa.free(self.func_airs);
- c.semaFuncAirListDeinit(&self.c_func_air_list);
- c.semaDeinit(&self.c_sema);
- c.ipDeinit(&self.c_ip);
- }
-};
-
-/// Run C sema on a C ZIR, then convert the resulting per-function C Airs to Zig Air.
-/// The caller retains ownership of c_zir (and any backing c_ast).
-pub fn cSema(gpa: Allocator, c_zir: c.Zir) !SemaResult {
- var c_ip = c.ipInit();
- errdefer c.ipDeinit(&c_ip);
-
- var c_sema = c.semaInit(&c_ip, c_zir);
- errdefer c.semaDeinit(&c_sema);
-
- var c_func_air_list = c.semaAnalyze(&c_sema);
- errdefer c.semaFuncAirListDeinit(&c_func_air_list);
-
- if (c_sema.has_compile_errors) {
- return error.SemaCompileError;
- }
-
- // Convert each C per-function Air to Zig Air
- const func_airs = try gpa.alloc(FuncAir, c_func_air_list.len);
- errdefer gpa.free(func_airs);
-
- for (0..c_func_air_list.len) |i| {
- const c_item = c_func_air_list.items[i];
- func_airs[i] = .{
- .name = std.mem.span(c_item.name),
- .owned_air = try zigAir(gpa, c_item.air),
- };
- }
-
- return .{
- .func_airs = func_airs,
- .c_func_air_list = c_func_air_list,
- .c_ip = c_ip,
- .c_sema = c_sema,
- };
-}
-
-/// Owning wrapper for a Zig Air struct built from C Air data.
-/// Owns the backing MultiArrayList and extra array; call deinit() to free.
-pub const OwnedAir = struct {
- instructions: std.MultiArrayList(Air.Inst),
- extra: std.ArrayListUnmanaged(u32),
-
- pub fn air(self: *const OwnedAir) Air {
- return .{
- .instructions = self.instructions.slice(),
- .extra = self.extra,
- };
- }
-
- pub fn deinit(self: *OwnedAir, gpa: Allocator) void {
- self.instructions.deinit(gpa);
- self.extra.deinit(gpa);
- }
-};
-
-/// Convert a C Air struct (from sema.h) to a Zig Air struct.
-/// The caller owns the returned OwnedAir and must call deinit().
-pub fn zigAir(gpa: Allocator, c_air: c.Air) !OwnedAir {
- const inst_len: usize = @intCast(c_air.inst_len);
-
- var instructions = std.MultiArrayList(Air.Inst){};
- try instructions.ensureTotalCapacity(gpa, inst_len);
-
- for (0..inst_len) |i| {
- const tag_u8: u8 = @intCast(c_air.inst_tags.?[i]);
- const zig_tag: Air.Inst.Tag = @enumFromInt(tag_u8);
- instructions.appendAssumeCapacity(.{
- .tag = zig_tag,
- .data = convertData(zig_tag, c_air.inst_datas.?[i]),
- });
- }
-
- const extra_len: usize = @intCast(c_air.extra_len);
- var extra = std.ArrayListUnmanaged(u32){};
- try extra.ensureTotalCapacity(gpa, extra_len);
- if (extra_len > 0) {
- extra.appendSliceAssumeCapacity(c_air.extra[0..extra_len]);
- }
-
- return .{
- .instructions = instructions,
- .extra = extra,
- };
-}
-
-/// Convert a C AirInstData union to a Zig Air.Inst.Data union,
-/// dispatching on the tag to construct the correct Zig variant.
-fn convertData(tag: Air.Inst.Tag, cd: c.AirInstData) Air.Inst.Data {
- // Read the raw two u32 words from the C union. Both the C union
- // (AirInstData) and [2]u32 are 8 bytes with no padding.
- const raw: [2]u32 = @bitCast(cd);
-
- return switch (tag) {
- // --- no_op: void data ---
- .trap,
- .breakpoint,
- .ret_addr,
- .frame_addr,
- .unreach,
- .dbg_empty_stmt,
- => .{ .no_op = {} },
-
- // --- dbg_stmt: line + column ---
- .dbg_stmt => .{ .dbg_stmt = .{
- .line = raw[0],
- .column = raw[1],
- } },
-
- // --- un_op: single operand ref ---
- .ret,
- .ret_safe,
- .ret_load,
- => .{ .un_op = @enumFromInt(raw[0]) },
-
- // --- bin_op: lhs + rhs refs ---
- .add,
- .add_safe,
- .add_optimized,
- .add_wrap,
- .add_sat,
- .sub,
- .sub_safe,
- .sub_optimized,
- .sub_wrap,
- .sub_sat,
- .mul,
- .mul_safe,
- .mul_optimized,
- .mul_wrap,
- .mul_sat,
- .bit_and,
- .bit_or,
- .xor,
- .bool_and,
- .bool_or,
- => .{ .bin_op = .{
- .lhs = @enumFromInt(raw[0]),
- .rhs = @enumFromInt(raw[1]),
- } },
-
- // --- ty_pl: type ref + payload index ---
- .block,
- .@"try",
- .try_cold,
- => .{ .ty_pl = .{
- .ty = @enumFromInt(raw[0]),
- .payload = raw[1],
- } },
-
- // --- br: block_inst + operand ---
- .br => .{ .br = .{
- .block_inst = @enumFromInt(raw[0]),
- .operand = @enumFromInt(raw[1]),
- } },
-
- else => std.debug.panic(
- "unhandled Air tag in C->Zig conversion: {}",
- .{tag},
- ),
- };
-}
diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig
@@ -1,16 +1,21 @@
const std = @import("std");
-const Allocator = std.mem.Allocator;
-
-const parser_test = @import("parser_test.zig");
-const sema_c = @import("sema_c.zig");
// Import C types including sema.h (which transitively includes air.h, intern_pool.h, etc.)
-// Also include astgen.h so we have astParse/astGen/astDeinit/zirDeinit in the same namespace.
+// Also include astgen.h and verbose_air.h so we have the full pipeline in one namespace.
pub const c = @cImport({
@cInclude("astgen.h");
@cInclude("sema.h");
+ @cInclude("verbose_air.h");
+ @cInclude("dump.h");
});
+// Zig-side dumper functions (linked from dumper_obj, not imported).
+const DumpResult = extern struct {
+ text: ?[*:0]u8,
+ error_msg: ?[*:0]u8,
+};
+extern fn zig_dump_air([*:0]const u8, ?[*:0]const u8) DumpResult;
+
// Helper to convert C #define integer constants (c_int) to u32 for comparison
// with uint32_t fields (InternPoolIndex, etc.).
fn idx(val: c_int) u32 {
@@ -167,21 +172,36 @@ test "intern_pool: pointer types" {
}
// ---------------------------------------------------------------------------
-// Sema smoke tests (using cSema pipeline)
+// Sema smoke tests (using C sema pipeline directly)
// ---------------------------------------------------------------------------
-fn semaCheck(gpa: Allocator, source: [:0]const u8) !sema_c.SemaResult {
+const SemaCheckResult = struct {
+ c_ip: c.InternPool,
+ c_sema: c.Sema,
+ c_func_air_list: c.SemaFuncAirList,
+
+ fn deinit(self: *SemaCheckResult) void {
+ c.semaFuncAirListDeinit(&self.c_func_air_list);
+ c.semaDeinit(&self.c_sema);
+ c.ipDeinit(&self.c_ip);
+ }
+};
+
+fn semaCheck(source: [:0]const u8) !SemaCheckResult {
var c_ast = c.astParse(source.ptr, @intCast(source.len));
defer c.astDeinit(&c_ast);
var c_zir = c.astGen(&c_ast);
defer c.zirDeinit(&c_zir);
- return sema_c.cSema(gpa, c_zir);
+ var result: SemaCheckResult = undefined;
+ result.c_ip = c.ipInit();
+ result.c_sema = c.semaInit(&result.c_ip, c_zir);
+ result.c_func_air_list = c.semaAnalyze(&result.c_sema);
+ return result;
}
test "sema: empty source smoke test" {
- const gpa = std.testing.allocator;
- var result = try semaCheck(gpa, "");
- defer result.deinit(gpa);
+ var result = try semaCheck("");
+ defer result.deinit();
// semaAnalyze frees AIR arrays and nulls out sema's pointers.
try std.testing.expect(result.c_sema.air_inst_tags == null);
@@ -189,78 +209,64 @@ test "sema: empty source smoke test" {
try std.testing.expect(result.c_sema.air_extra == null);
// No functions analyzed yet, so func_airs should be empty.
- try std.testing.expectEqual(@as(usize, 0), result.func_airs.len);
+ try std.testing.expectEqual(@as(u32, 0), result.c_func_air_list.len);
}
test "sema: const x = 0 smoke test" {
- const gpa = std.testing.allocator;
- var result = try semaCheck(gpa, "const x = 0;");
- defer result.deinit(gpa);
+ var result = try semaCheck("const x = 0;");
+ defer result.deinit();
// No functions, so func_airs should be empty.
- try std.testing.expectEqual(@as(usize, 0), result.func_airs.len);
+ try std.testing.expectEqual(@as(u32, 0), result.c_func_air_list.len);
}
test "sema: function decl smoke test" {
- const gpa = std.testing.allocator;
- var result = try semaCheck(gpa, "fn foo() void {}");
- defer result.deinit(gpa);
+ var result = try semaCheck("fn foo() void {}");
+ defer result.deinit();
// zirFunc not yet ported, so func_airs should be empty.
- try std.testing.expectEqual(@as(usize, 0), result.func_airs.len);
+ try std.testing.expectEqual(@as(u32, 0), result.c_func_air_list.len);
}
// ---------------------------------------------------------------------------
-// C Air → Zig Air conversion tests (manual C Air construction)
+// Air dump: C vs Zig text comparison
// ---------------------------------------------------------------------------
-test "sema_c: convert C Air instructions to Zig Air" {
- const gpa = std.testing.allocator;
- const ZigAir = @import("zig_internals").Air;
-
- // Construct a C Air with three instructions covering different data
- // categories:
- // [0] dbg_stmt(line=42, column=7) — dbg_stmt data
- // [1] ret(operand=void_value) — un_op data
- // [2] unreach — no_op data
- var tags = [_]c_uint{
- @intCast(c.AIR_INST_DBG_STMT),
- @intCast(c.AIR_INST_RET),
- @intCast(c.AIR_INST_UNREACH),
- };
- var datas: [3]c.AirInstData = undefined;
- @memset(std.mem.asBytes(&datas), 0);
- datas[0].dbg_stmt.line = 42;
- datas[0].dbg_stmt.column = 7;
- datas[1].un_op.operand = idx(c.IP_INDEX_VOID_VALUE);
-
- var c_air: c.Air = undefined;
- @memset(std.mem.asBytes(&c_air), 0);
- c_air.inst_len = 3;
- c_air.inst_tags = &tags;
- c_air.inst_datas = &datas;
-
- var owned = try sema_c.zigAir(gpa, c_air);
- defer owned.deinit(gpa);
- const air = owned.air();
-
- try std.testing.expectEqual(@as(usize, 3), air.instructions.len);
-
- const zig_tags = air.instructions.items(.tag);
- const zig_datas = air.instructions.items(.data);
-
- // [0] dbg_stmt
- try std.testing.expectEqual(ZigAir.Inst.Tag.dbg_stmt, zig_tags[0]);
- try std.testing.expectEqual(@as(u32, 42), zig_datas[0].dbg_stmt.line);
- try std.testing.expectEqual(@as(u32, 7), zig_datas[0].dbg_stmt.column);
-
- // [1] ret with void_value operand
- try std.testing.expectEqual(ZigAir.Inst.Tag.ret, zig_tags[1]);
- try std.testing.expectEqual(
- @as(u32, idx(c.IP_INDEX_VOID_VALUE)),
- @intFromEnum(zig_datas[1].un_op),
- );
-
- // [2] unreach (no_op)
- try std.testing.expectEqual(ZigAir.Inst.Tag.unreach, zig_tags[2]);
+fn semaAirDumpCheck(source: [:0]const u8, func_filter: ?[*:0]const u8) !void {
+ // C pipeline: parse → astgen → sema → c_dump_air
+ var result = try semaCheck(source);
+ defer result.deinit();
+ const c_text_ptr = c.c_dump_air(&result.c_func_air_list, &result.c_ip, func_filter);
+ defer std.c.free(c_text_ptr);
+ const c_text: []const u8 = if (c_text_ptr) |p| std.mem.span(p) else "";
+
+ // Zig pipeline: write source to temp file, compile, dump Air text
+ const tmp_path = "/tmp/zig0_sema_test_tmp.zig";
+ {
+ const f = std.fs.cwd().createFile(tmp_path, .{}) catch return error.TmpFileCreate;
+ defer f.close();
+ f.writeAll(source) catch return error.TmpFileWrite;
+ }
+ defer std.fs.cwd().deleteFile(tmp_path) catch {};
+
+ const zig_result = zig_dump_air(tmp_path, func_filter);
+ defer {
+ if (zig_result.text) |t| std.c.free(t);
+ if (zig_result.error_msg) |e| std.c.free(e);
+ }
+ if (zig_result.error_msg) |e| {
+ std.debug.print("zig_dump_air error: {s}\n", .{std.mem.span(e)});
+ return error.ZigDumpFailed;
+ }
+ const zig_text: []const u8 = if (zig_result.text) |t| std.mem.span(t) else "";
+
+ try std.testing.expectEqualStrings(zig_text, c_text);
+}
+
+test "sema: Air dump C vs Zig comparison (empty)" {
+ try semaAirDumpCheck("", null);
+}
+
+test "sema: Air dump C vs Zig comparison (const)" {
+ try semaAirDumpCheck("const x = 0;", null);
}
diff --git a/stage0/stages_test.zig b/stage0/stages_test.zig
@@ -6,33 +6,28 @@ const AstGen = std.zig.AstGen;
const parser_test = @import("parser_test.zig");
const astgen_test = @import("astgen_test.zig");
const sema_test = @import("sema_test.zig");
-const sema_c = @import("sema_c.zig");
-const sema = @import("sema.zig");
const c = parser_test.c;
const sc = sema_test.c;
-const zig_internals = @import("zig_internals");
-const ZigIP = zig_internals.InternPool;
-const ZigAir = zig_internals.Air;
-const Stage = enum { parser, sema };
+// Zig-side dumper function (linked from dumper_obj, not imported).
+const DumpResult = extern struct {
+ text: ?[*:0]u8,
+ error_msg: ?[*:0]u8,
+};
+extern fn zig_dump_air([*:0]const u8, ?[*:0]const u8) DumpResult;
test "stages: corpus" {
@setEvalBranchQuota(corpus_files.len * 2);
const gpa = std.testing.allocator;
- const check = Stage.sema;
inline for (corpus_files) |path| {
- stagesCheck(gpa, @embedFile(path), path["../".len..], check) catch {
+ stagesCheck(gpa, @embedFile(path), path["../".len..]) catch {
std.debug.print("FAIL: {s}\n", .{path});
return error.TestFailed;
};
-
- //if (std.mem.eql(u8, path, last_successful_corpus)) {
- // check = Stage.parser;
- //}
}
}
-fn stagesCheck(gpa: Allocator, source: [:0]const u8, src_path: []const u8, check: Stage) !void {
+fn stagesCheck(gpa: Allocator, source: [:0]const u8, src_path: [:0]const u8) !void {
// Parse once with C parser
var c_ast = c.astParse(source.ptr, @intCast(source.len));
defer c.astDeinit(&c_ast);
@@ -67,85 +62,32 @@ fn stagesCheck(gpa: Allocator, source: [:0]const u8, src_path: []const u8, check
try astgen_test.expectEqualZir(gpa, ref_zir, c_zir);
- if (check == .sema) {
- var result = try sema_c.cSema(gpa, @bitCast(c_zir));
- defer result.deinit(gpa);
-
- var zig_result = try sema.zigSema(gpa, src_path);
- defer zig_result.deinit();
-
- // Parse Zig per-function Air sections from captured verbose_air output.
- const zig_air_text = zig_result.air_output.written();
- var zig_sections = try parseAirSections(gpa, zig_air_text);
- defer deinitAirSections(gpa, &zig_sections);
-
- // C and Zig must produce the same number of per-function Airs.
- try std.testing.expectEqual(zig_sections.count(), result.func_airs.len);
-
- // Compare per-function Air text.
- const Zcu = zig_internals.Zcu;
- const pt: Zcu.PerThread = .activate(zig_result.comp.zcu.?, .main);
- defer pt.deactivate();
+ // Stage 3: Sema — compare C and Zig Air text output
+ {
+ var c_ip = sc.ipInit();
+ defer sc.ipDeinit(&c_ip);
+ var c_sema = sc.semaInit(&c_ip, @bitCast(c_zir));
+ defer sc.semaDeinit(&c_sema);
+ var c_func_air_list = sc.semaAnalyze(&c_sema);
+ defer sc.semaFuncAirListDeinit(&c_func_air_list);
- for (result.func_airs) |func_air| {
- const zig_section = zig_sections.get(func_air.name) orelse {
- std.debug.print("C sema produced function '{s}' not found in Zig sema\n", .{func_air.name});
- return error.TestUnexpectedResult;
- };
+ const c_text_ptr = sc.c_dump_air(&c_func_air_list, &c_ip, null);
+ defer std.c.free(c_text_ptr);
+ const c_text: []const u8 = if (c_text_ptr) |p| std.mem.span(p) else "";
- // Render C Air as text using Zig's PerThread. InternPool indices
- // must match between C and Zig for the same source — if they don't,
- // the text will differ and the test catches the bug.
- var c_air_buf: std.io.Writer.Allocating = .init(gpa);
- defer c_air_buf.deinit();
- func_air.owned_air.air().write(&c_air_buf.writer, pt, null);
-
- try std.testing.expectEqualStrings(zig_section, c_air_buf.written());
+ const zig_result = zig_dump_air(src_path.ptr, null);
+ defer {
+ if (zig_result.text) |t| std.c.free(t);
+ if (zig_result.error_msg) |e| std.c.free(e);
}
- }
-}
-
-/// Parse verbose_air output into per-function sections keyed by FQN.
-/// Sections are delimited by "# Begin Function AIR: {fqn}:\n" and
-/// "# End Function AIR: {fqn}\n\n" markers.
-/// Returns owned keys and values that the caller must free.
-fn parseAirSections(gpa: Allocator, text: []const u8) !std.StringHashMapUnmanaged([]const u8) {
- const begin_marker = "# Begin Function AIR: ";
- const end_marker = "# End Function AIR: ";
- var map: std.StringHashMapUnmanaged([]const u8) = .empty;
- errdefer deinitAirSections(gpa, &map);
-
- var pos: usize = 0;
- while (std.mem.indexOfPos(u8, text, pos, begin_marker)) |begin| {
- const fqn_start = begin + begin_marker.len;
- // FQN ends at ":\n"
- const fqn_end = std.mem.indexOfPos(u8, text, fqn_start, ":\n") orelse break;
- const fqn = text[fqn_start..fqn_end];
-
- // Body starts after ":\n"
- const body_start = fqn_end + 2;
-
- // Find the matching end marker
- const end_pos = std.mem.indexOfPos(u8, text, body_start, end_marker) orelse break;
-
- const body = text[body_start..end_pos];
- const key = try gpa.dupe(u8, fqn);
- errdefer gpa.free(key);
- const val = try gpa.dupe(u8, body);
- try map.put(gpa, key, val);
-
- pos = end_pos + end_marker.len;
- }
- return map;
-}
+ if (zig_result.error_msg) |e| {
+ std.debug.print("zig_dump_air error: {s}\n", .{std.mem.span(e)});
+ return error.ZigDumpFailed;
+ }
+ const zig_text: []const u8 = if (zig_result.text) |t| std.mem.span(t) else "";
-fn deinitAirSections(gpa: Allocator, map: *std.StringHashMapUnmanaged([]const u8)) void {
- var it = map.iterator();
- while (it.next()) |entry| {
- gpa.free(entry.value_ptr.*);
- gpa.free(entry.key_ptr.*);
+ try std.testing.expectEqualStrings(zig_text, c_text);
}
- map.deinit(gpa);
}
const last_successful_corpus = "../lib/std/crypto/codecs.zig";
@@ -1269,155 +1211,3 @@ const corpus_files = .{
//"../src/arch/x86_64/CodeGen.zig", // 11086406
};
-// ---------------------------------------------------------------------------
-// InternPool cross-check: C vs Zig reference
-// ---------------------------------------------------------------------------
-
-test "sema: intern pool pre-interned comparison" {
- const gpa = std.testing.allocator;
-
- // Zig InternPool
- var zig_ip: ZigIP = ZigIP.empty;
- try zig_ip.init(gpa, 1);
- defer zig_ip.deinit(gpa);
-
- // C InternPool
- var c_ip = sc.ipInit();
- defer sc.ipDeinit(&c_ip);
-
- // Both should have 124 pre-interned entries
- try std.testing.expectEqual(@as(u32, 124), c_ip.items_len);
-
- // Compare each entry's key
- for (0..c_ip.items_len) |i| {
- const c_key = sc.ipIndexToKey(&c_ip, @intCast(i));
- const zig_key = zig_ip.indexToKey(@enumFromInt(i));
- try expectKeysEqual(c_key, zig_key, @intCast(i));
- }
-}
-
-fn expectKeysEqual(c_key: sc.InternPoolKey, zig_key: ZigIP.Key, index: u32) !void {
- switch (c_key.tag) {
- sc.IP_KEY_INT_TYPE => {
- const zig_int = zig_key.int_type;
- try std.testing.expectEqual(c_key.data.int_type.bits, zig_int.bits);
- // C: 0=unsigned, 1=signed; Zig: Signedness enum { signed=0, unsigned=1 }
- const c_is_signed = c_key.data.int_type.signedness != 0;
- try std.testing.expectEqual(c_is_signed, zig_int.signedness == .signed);
- },
- sc.IP_KEY_SIMPLE_TYPE => {
- // SimpleType enum values encode the Index directly in the Zig IP
- const zig_st = zig_key.simple_type;
- try std.testing.expectEqual(index, @intFromEnum(zig_st));
- },
- sc.IP_KEY_PTR_TYPE => {
- const zig_ptr = zig_key.ptr_type;
- const c_ptr = c_key.data.ptr_type;
- try std.testing.expectEqual(c_ptr.child, @intFromEnum(zig_ptr.child));
- try std.testing.expectEqual(c_ptr.sentinel, @intFromEnum(zig_ptr.sentinel));
- // Compare size
- const c_size: u2 = @intCast(c_ptr.flags & sc.PTR_FLAGS_SIZE_MASK);
- try std.testing.expectEqual(c_size, @intFromEnum(zig_ptr.flags.size));
- // Compare is_const
- const c_is_const = (c_ptr.flags & sc.PTR_FLAGS_IS_CONST) != 0;
- try std.testing.expectEqual(c_is_const, zig_ptr.flags.is_const);
- },
- sc.IP_KEY_VECTOR_TYPE => {
- const zig_vec = zig_key.vector_type;
- try std.testing.expectEqual(c_key.data.vector_type.len, zig_vec.len);
- try std.testing.expectEqual(c_key.data.vector_type.child, @intFromEnum(zig_vec.child));
- },
- sc.IP_KEY_OPT_TYPE => {
- try std.testing.expectEqual(c_key.data.opt_type, @intFromEnum(zig_key.opt_type));
- },
- sc.IP_KEY_ANYFRAME_TYPE => {
- try std.testing.expectEqual(c_key.data.anyframe_type, @intFromEnum(zig_key.anyframe_type));
- },
- sc.IP_KEY_ERROR_UNION_TYPE => {
- const zig_eu = zig_key.error_union_type;
- try std.testing.expectEqual(c_key.data.error_union_type.error_set, @intFromEnum(zig_eu.error_set_type));
- try std.testing.expectEqual(c_key.data.error_union_type.payload, @intFromEnum(zig_eu.payload_type));
- },
- sc.IP_KEY_TUPLE_TYPE => {
- // For the pre-interned set this is only empty_tuple_type
- const zig_tuple = zig_key.tuple_type;
- try std.testing.expectEqual(@as(u32, 0), zig_tuple.types.len);
- try std.testing.expectEqual(@as(u32, 0), zig_tuple.values.len);
- },
- sc.IP_KEY_UNDEF => {
- try std.testing.expectEqual(c_key.data.undef, @intFromEnum(zig_key.undef));
- },
- sc.IP_KEY_SIMPLE_VALUE => {
- const zig_sv = zig_key.simple_value;
- // Map C SimpleValue enum to Zig SimpleValue enum by comparing the
- // Index they encode (Zig SimpleValue values are the IP index itself).
- try std.testing.expectEqual(index, @intFromEnum(zig_sv));
- },
- sc.IP_KEY_INT => {
- const zig_int = zig_key.int;
- // Compare type
- try std.testing.expectEqual(c_key.data.int_val.ty, @intFromEnum(zig_int.ty));
- // Compare value
- switch (zig_int.storage) {
- .u64 => |v| {
- try std.testing.expect(!c_key.data.int_val.is_negative);
- try std.testing.expectEqual(c_key.data.int_val.value, v);
- },
- .i64 => |v| {
- if (v < 0) {
- try std.testing.expect(c_key.data.int_val.is_negative);
- const abs: u64 = @intCast(-v);
- try std.testing.expectEqual(c_key.data.int_val.value, abs);
- } else {
- try std.testing.expect(!c_key.data.int_val.is_negative);
- try std.testing.expectEqual(c_key.data.int_val.value, @as(u64, @intCast(v)));
- }
- },
- else => return error.TestUnexpectedResult,
- }
- },
- else => {
- std.debug.print("unhandled key tag {d} at index {d}\n", .{ c_key.tag, index });
- return error.TestUnexpectedResult;
- },
- }
-}
-
-test "sema: const x = 42 air + intern pool comparison" {
- const gpa = std.testing.allocator;
- const source: [:0]const u8 = "const x = 42;";
-
- // Run C pipeline: parse → astgen → sema → convert
- var c_ast = c.astParse(source.ptr, @intCast(source.len));
- defer c.astDeinit(&c_ast);
- var c_zir = c.astGen(&c_ast);
- defer c.zirDeinit(&c_zir);
-
- var result = try sema_c.cSema(gpa, @bitCast(c_zir));
- defer result.deinit(gpa);
-
- // For `const x = 42;`, sema produces no per-function Airs
- // (everything is resolved at comptime).
- try std.testing.expectEqual(@as(usize, 0), result.func_airs.len);
-
- // C IP should have grown beyond 124 pre-interned entries
- try std.testing.expect(result.c_ip.items_len > 124);
-
- // Init Zig reference IP and intern the same value
- var zig_ip: ZigIP = ZigIP.empty;
- try zig_ip.init(gpa, 1);
- defer zig_ip.deinit(gpa);
-
- const zig_idx = try zig_ip.get(gpa, .main, .{ .int = .{
- .ty = .comptime_int_type,
- .storage = .{ .u64 = 42 },
- } });
-
- // Both should have created the entry at the same index (124)
- try std.testing.expectEqual(@as(u32, 124), @intFromEnum(zig_idx));
-
- // Compare the key at index 124
- const c_key = sc.ipIndexToKey(&result.c_ip, 124);
- const zig_key = zig_ip.indexToKey(zig_idx);
- try expectKeysEqual(c_key, zig_key, 124);
-}
diff --git a/stage0/verbose_air.c b/stage0/verbose_air.c
@@ -0,0 +1,393 @@
+// verbose_air.c — C-side Air text dumper.
+// Formats per-function Air output matching the Zig verbose_air format
+// from src/Air/print.zig.
+
+#include "verbose_air.h"
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+// --- Growable string buffer ---
+
+typedef struct {
+ char* data;
+ size_t len;
+ size_t cap;
+} StringBuf;
+
+static void sbInit(StringBuf* sb) {
+ sb->data = NULL;
+ sb->len = 0;
+ sb->cap = 0;
+}
+
+static void sbGrow(StringBuf* sb, size_t need) {
+ if (sb->len + need <= sb->cap) return;
+ size_t new_cap = sb->cap ? sb->cap * 2 : 256;
+ while (new_cap < sb->len + need) new_cap *= 2;
+ sb->data = realloc(sb->data, new_cap);
+ sb->cap = new_cap;
+}
+
+static void sbAppend(StringBuf* sb, const char* s) {
+ size_t slen = strlen(s);
+ sbGrow(sb, slen + 1);
+ memcpy(sb->data + sb->len, s, slen);
+ sb->len += slen;
+ sb->data[sb->len] = '\0';
+}
+
+static void sbPrintf(StringBuf* sb, const char* fmt, ...) {
+ va_list ap;
+ char tmp[512];
+ va_start(ap, fmt);
+ int n = vsnprintf(tmp, sizeof(tmp), fmt, ap);
+ va_end(ap);
+ if (n < 0) return;
+ if ((size_t)n < sizeof(tmp)) {
+ sbGrow(sb, (size_t)n + 1);
+ memcpy(sb->data + sb->len, tmp, (size_t)n);
+ sb->len += (size_t)n;
+ sb->data[sb->len] = '\0';
+ } else {
+ // Large output: allocate temp buffer.
+ char* big = malloc((size_t)n + 1);
+ if (!big) return;
+ va_start(ap, fmt);
+ vsnprintf(big, (size_t)n + 1, fmt, ap);
+ va_end(ap);
+ sbGrow(sb, (size_t)n + 1);
+ memcpy(sb->data + sb->len, big, (size_t)n);
+ sb->len += (size_t)n;
+ sb->data[sb->len] = '\0';
+ free(big);
+ }
+}
+
+// --- Air tag name table ---
+// Maps AirInstTag enum values to snake_case names matching Zig's @tagName.
+
+static const char* airTagName(AirInstTag t) {
+ switch (t) {
+#define AIR_TAG_NAME(e) \
+ case e: \
+ return #e;
+ AIR_INST_FOREACH_TAG(AIR_TAG_NAME)
+#undef AIR_TAG_NAME
+ }
+ return "unknown";
+}
+
+// Convert "AIR_INST_FOO_BAR" → "foo_bar" (strip prefix, lowercase).
+// Returns pointer to static buffer — NOT reentrant.
+static const char* airTagSnakeName(AirInstTag t) {
+ static char buf[128];
+ const char* raw = airTagName(t);
+ const char* p = raw;
+ // Skip "AIR_INST_" prefix (9 chars).
+ if (strncmp(p, "AIR_INST_", 9) == 0) p += 9;
+ size_t i = 0;
+ for (; *p && i < sizeof(buf) - 1; p++, i++) {
+ buf[i] = (*p >= 'A' && *p <= 'Z') ? (char)(*p + ('a' - 'A')) : *p;
+ }
+ buf[i] = '\0';
+ return buf;
+}
+
+// --- Operand formatting ---
+
+static void writeRef(StringBuf* sb, AirInstRef ref) {
+ if (ref == AIR_REF_NONE) {
+ sbAppend(sb, ".none");
+ } else if (AIR_REF_IS_INST(ref)) {
+ sbPrintf(sb, "%%%u", AIR_REF_TO_INST(ref));
+ } else {
+ sbPrintf(sb, "@%u", AIR_REF_TO_IP(ref));
+ }
+}
+
+// --- Format bytes as Zig's {Bi} format ---
+
+static void writeBi(StringBuf* sb, size_t bytes) {
+ if (bytes == 0) {
+ sbAppend(sb, "0B");
+ } else if (bytes < 1024) {
+ sbPrintf(sb, "%zuB", bytes);
+ } else if (bytes < 1024 * 1024) {
+ sbPrintf(sb, "%.1fKiB", (double)bytes / 1024.0);
+ } else {
+ sbPrintf(sb, "%.1fMiB", (double)bytes / (1024.0 * 1024.0));
+ }
+}
+
+// --- Instruction formatting ---
+
+static void writeInst(StringBuf* sb, const Air* air, uint32_t idx) {
+ AirInstTag t = air->inst_tags[idx];
+ AirInstData d = air->inst_datas[idx];
+ const char* name = airTagSnakeName(t);
+
+ sbPrintf(sb, " %%%u = %s(", idx, name);
+
+ switch (t) {
+ // no_op: no arguments
+ case AIR_INST_TRAP:
+ case AIR_INST_BREAKPOINT:
+ case AIR_INST_RET_ADDR:
+ case AIR_INST_FRAME_ADDR:
+ case AIR_INST_UNREACH:
+ case AIR_INST_DBG_EMPTY_STMT:
+ break;
+
+ // dbg_stmt: line:column (1-indexed)
+ case AIR_INST_DBG_STMT:
+ sbPrintf(sb, "%u:%u", d.dbg_stmt.line + 1, d.dbg_stmt.column + 1);
+ break;
+
+ // un_op: single operand
+ case AIR_INST_RET:
+ case AIR_INST_RET_SAFE:
+ case AIR_INST_RET_LOAD:
+ case AIR_INST_NOT:
+ case AIR_INST_BITCAST:
+ case AIR_INST_LOAD:
+ case AIR_INST_IS_NULL:
+ case AIR_INST_IS_NON_NULL:
+ case AIR_INST_IS_NULL_PTR:
+ case AIR_INST_IS_NON_NULL_PTR:
+ case AIR_INST_IS_ERR:
+ case AIR_INST_IS_NON_ERR:
+ case AIR_INST_IS_ERR_PTR:
+ case AIR_INST_IS_NON_ERR_PTR:
+ case AIR_INST_OPTIONAL_PAYLOAD:
+ case AIR_INST_OPTIONAL_PAYLOAD_PTR:
+ case AIR_INST_OPTIONAL_PAYLOAD_PTR_SET:
+ case AIR_INST_WRAP_OPTIONAL:
+ case AIR_INST_UNWRAP_ERRUNION_PAYLOAD:
+ case AIR_INST_UNWRAP_ERRUNION_ERR:
+ case AIR_INST_UNWRAP_ERRUNION_PAYLOAD_PTR:
+ case AIR_INST_UNWRAP_ERRUNION_ERR_PTR:
+ case AIR_INST_ARRAY_TO_SLICE:
+ case AIR_INST_SPLAT:
+ case AIR_INST_CLZ:
+ case AIR_INST_CTZ:
+ case AIR_INST_POPCOUNT:
+ case AIR_INST_BYTE_SWAP:
+ case AIR_INST_BIT_REVERSE:
+ case AIR_INST_SQRT:
+ case AIR_INST_SIN:
+ case AIR_INST_COS:
+ case AIR_INST_TAN:
+ case AIR_INST_EXP:
+ case AIR_INST_EXP2:
+ case AIR_INST_LOG:
+ case AIR_INST_LOG2:
+ case AIR_INST_LOG10:
+ case AIR_INST_ABS:
+ case AIR_INST_FLOOR:
+ case AIR_INST_CEIL:
+ case AIR_INST_ROUND:
+ case AIR_INST_TRUNC_FLOAT:
+ case AIR_INST_NEG:
+ case AIR_INST_NEG_OPTIMIZED:
+ case AIR_INST_FPTRUNC:
+ case AIR_INST_FPEXT:
+ case AIR_INST_INTCAST:
+ case AIR_INST_INTCAST_SAFE:
+ case AIR_INST_TRUNC:
+ case AIR_INST_FLOAT_FROM_INT:
+ case AIR_INST_ERR_RETURN_TRACE:
+ case AIR_INST_SET_ERR_RETURN_TRACE:
+ case AIR_INST_ADDRSPACE_CAST:
+ case AIR_INST_C_VA_ARG:
+ case AIR_INST_C_VA_COPY:
+ case AIR_INST_C_VA_END:
+ case AIR_INST_C_VA_START:
+ case AIR_INST_GET_UNION_TAG:
+ case AIR_INST_TAG_NAME:
+ case AIR_INST_ERROR_NAME:
+ case AIR_INST_SLICE_LEN:
+ case AIR_INST_SLICE_PTR:
+ case AIR_INST_PTR_SLICE_LEN_PTR:
+ case AIR_INST_PTR_SLICE_PTR_PTR:
+ writeRef(sb, d.un_op.operand);
+ break;
+
+ // bin_op: lhs, rhs
+ case AIR_INST_ADD:
+ case AIR_INST_ADD_SAFE:
+ case AIR_INST_ADD_OPTIMIZED:
+ case AIR_INST_ADD_WRAP:
+ case AIR_INST_ADD_SAT:
+ case AIR_INST_SUB:
+ case AIR_INST_SUB_SAFE:
+ case AIR_INST_SUB_OPTIMIZED:
+ case AIR_INST_SUB_WRAP:
+ case AIR_INST_SUB_SAT:
+ case AIR_INST_MUL:
+ case AIR_INST_MUL_SAFE:
+ case AIR_INST_MUL_OPTIMIZED:
+ case AIR_INST_MUL_WRAP:
+ case AIR_INST_MUL_SAT:
+ case AIR_INST_DIV_FLOAT:
+ case AIR_INST_DIV_FLOAT_OPTIMIZED:
+ case AIR_INST_DIV_TRUNC:
+ case AIR_INST_DIV_TRUNC_OPTIMIZED:
+ case AIR_INST_DIV_FLOOR:
+ case AIR_INST_DIV_FLOOR_OPTIMIZED:
+ case AIR_INST_DIV_EXACT:
+ case AIR_INST_DIV_EXACT_OPTIMIZED:
+ case AIR_INST_REM:
+ case AIR_INST_REM_OPTIMIZED:
+ case AIR_INST_MOD:
+ case AIR_INST_MOD_OPTIMIZED:
+ case AIR_INST_PTR_ADD:
+ case AIR_INST_PTR_SUB:
+ case AIR_INST_MAX:
+ case AIR_INST_MIN:
+ case AIR_INST_BIT_AND:
+ case AIR_INST_BIT_OR:
+ case AIR_INST_SHR:
+ case AIR_INST_SHR_EXACT:
+ case AIR_INST_SHL:
+ case AIR_INST_SHL_EXACT:
+ case AIR_INST_SHL_SAT:
+ case AIR_INST_XOR:
+ case AIR_INST_BOOL_AND:
+ case AIR_INST_BOOL_OR:
+ case AIR_INST_STORE:
+ case AIR_INST_STORE_SAFE:
+ case AIR_INST_CMP_LT:
+ case AIR_INST_CMP_LT_OPTIMIZED:
+ case AIR_INST_CMP_LTE:
+ case AIR_INST_CMP_LTE_OPTIMIZED:
+ case AIR_INST_CMP_EQ:
+ case AIR_INST_CMP_EQ_OPTIMIZED:
+ case AIR_INST_CMP_GTE:
+ case AIR_INST_CMP_GTE_OPTIMIZED:
+ case AIR_INST_CMP_GT:
+ case AIR_INST_CMP_GT_OPTIMIZED:
+ case AIR_INST_CMP_NEQ:
+ case AIR_INST_CMP_NEQ_OPTIMIZED:
+ case AIR_INST_SET_UNION_TAG:
+ case AIR_INST_SLICE:
+ case AIR_INST_ARRAY_ELEM_VAL:
+ case AIR_INST_SLICE_ELEM_VAL:
+ case AIR_INST_SLICE_ELEM_PTR:
+ case AIR_INST_PTR_ELEM_VAL:
+ case AIR_INST_PTR_ELEM_PTR:
+ case AIR_INST_MEMSET:
+ case AIR_INST_MEMSET_SAFE:
+ case AIR_INST_MEMCPY:
+ case AIR_INST_MEMMOVE:
+ writeRef(sb, d.bin_op.lhs);
+ sbAppend(sb, ", ");
+ writeRef(sb, d.bin_op.rhs);
+ break;
+
+ // ty_pl: type, payload
+ case AIR_INST_BLOCK:
+ case AIR_INST_TRY:
+ case AIR_INST_TRY_COLD:
+ writeRef(sb, d.ty_pl.ty_ref);
+ sbPrintf(sb, ", %u", d.ty_pl.payload);
+ break;
+
+ // br: block_inst, operand
+ case AIR_INST_BR:
+ sbPrintf(sb, "%%%u, ", d.br.block_inst);
+ writeRef(sb, d.br.operand);
+ break;
+
+ // arg: type, zir_param_index
+ case AIR_INST_ARG:
+ writeRef(sb, d.arg.ty_ref);
+ sbPrintf(sb, ", %u", d.arg.zir_param_index);
+ break;
+
+ // ty_op: type, operand
+ case AIR_INST_ERRUNION_PAYLOAD_PTR_SET:
+ case AIR_INST_WRAP_ERRUNION_PAYLOAD:
+ case AIR_INST_WRAP_ERRUNION_ERR:
+ case AIR_INST_INT_FROM_FLOAT:
+ case AIR_INST_INT_FROM_FLOAT_OPTIMIZED:
+ case AIR_INST_INT_FROM_FLOAT_SAFE:
+ case AIR_INST_INT_FROM_FLOAT_OPTIMIZED_SAFE:
+ writeRef(sb, d.ty_op.ty_ref);
+ sbAppend(sb, ", ");
+ writeRef(sb, d.ty_op.operand);
+ break;
+
+ // ty: just type ref
+ case AIR_INST_ALLOC:
+ case AIR_INST_RET_PTR:
+ writeRef(sb, d.ty.ty_ref);
+ break;
+
+ // repeat: loop_inst
+ case AIR_INST_REPEAT:
+ sbPrintf(sb, "%%%u", d.repeat_data.loop_inst);
+ break;
+
+ default:
+ sbAppend(sb, "<TODO>");
+ break;
+ }
+
+ sbAppend(sb, ")\n");
+}
+
+// --- Main dump function ---
+
+static void dumpOneAir(StringBuf* sb, const char* name, const Air* air,
+ const InternPool* ip) {
+ (void)ip;
+ sbPrintf(sb, "# Begin Function AIR: %s:\n", name);
+
+ // Memory stats header matching src/Air/print.zig format.
+ size_t instruction_bytes =
+ (size_t)air->inst_len * (sizeof(AirInstTag) + 8);
+ size_t extra_bytes = (size_t)air->extra_len * sizeof(uint32_t);
+ size_t total_bytes = sizeof(Air) + instruction_bytes + extra_bytes;
+
+ sbAppend(sb, "# Total AIR+Liveness bytes: ");
+ writeBi(sb, total_bytes);
+ sbAppend(sb, "\n# AIR Instructions: ");
+ sbPrintf(sb, "%u (", air->inst_len);
+ writeBi(sb, instruction_bytes);
+ sbAppend(sb, ")\n# AIR Extra Data: ");
+ sbPrintf(sb, "%u (", air->extra_len);
+ writeBi(sb, extra_bytes);
+ sbAppend(sb, ")\n# Liveness tomb_bits: 0B");
+ sbAppend(sb, "\n# Liveness Extra Data: 0 (0B)");
+ sbAppend(sb, "\n# Liveness special table: 0 (0B)");
+ sbAppend(sb, "\n");
+
+ for (uint32_t i = 0; i < air->inst_len; i++) {
+ writeInst(sb, air, i);
+ }
+
+ sbPrintf(sb, "# End Function AIR: %s\n\n", name);
+}
+
+char* c_dump_air(const SemaFuncAirList* airs, const InternPool* ip,
+ const char* func_filter) {
+ StringBuf sb;
+ sbInit(&sb);
+
+ for (uint32_t i = 0; i < airs->len; i++) {
+ const SemaFuncAir* fa = &airs->items[i];
+ if (func_filter != NULL && strstr(fa->name, func_filter) == NULL)
+ continue;
+ dumpOneAir(&sb, fa->name, &fa->air, ip);
+ }
+
+ if (sb.data == NULL) {
+ // Return empty string, not NULL.
+ char* empty = malloc(1);
+ if (empty) empty[0] = '\0';
+ return empty;
+ }
+ return sb.data;
+}
diff --git a/stage0/verbose_air.h b/stage0/verbose_air.h
@@ -0,0 +1,12 @@
+// verbose_air.h — C-side Air text dumper.
+#ifndef _ZIG0_VERBOSE_AIR_H__
+#define _ZIG0_VERBOSE_AIR_H__
+
+#include "sema.h"
+
+// Format C Air as text matching the Zig verbose_air output.
+// func_filter: NULL=all functions, "foo"=only functions containing 'foo'.
+// Returns heap-allocated string. Caller frees.
+char* c_dump_air(const SemaFuncAirList* airs, const InternPool* ip, const char* func_filter);
+
+#endif
diff --git a/stage0/verbose_intern_pool.c b/stage0/verbose_intern_pool.c
@@ -0,0 +1,13 @@
+// verbose_intern_pool.c — C-side InternPool text dumper.
+// Formats InternPool entries as text matching the Zig-side format.
+
+#include "verbose_intern_pool.h"
+#include <stdlib.h>
+
+char* c_dump_intern_pool(const InternPool* ip) {
+ (void)ip;
+ // Stub: return empty string for now.
+ char* result = malloc(1);
+ if (result) result[0] = '\0';
+ return result;
+}
diff --git a/stage0/verbose_intern_pool.h b/stage0/verbose_intern_pool.h
@@ -0,0 +1,11 @@
+// verbose_intern_pool.h — C-side InternPool text dumper.
+#ifndef _ZIG0_VERBOSE_INTERN_POOL_H__
+#define _ZIG0_VERBOSE_INTERN_POOL_H__
+
+#include "intern_pool.h"
+
+// Format C InternPool as text matching the Zig-side format.
+// Returns heap-allocated string. Caller frees.
+char* c_dump_intern_pool(const InternPool* ip);
+
+#endif