zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 09eac343fba91ce69647e6ba8f4b7cfb742ab2f2 (tree)
parent 40c55b93aa3f8a06947355f3d0cd162e062cc3f0
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date:   Wed, 25 Feb 2026 18:43:03 +0000

sema: embed func_ip in .air format, add lazy module struct types

Embed the Zig compiler's IP items count (func_ip) in the .air binary
format so that test mismatch errors show [zig_ip_base=N], eliminating
the need for temporary debug prints in src/Zcu/PerThread.zig.

Add lazy module-level struct type creation in the C sema: each imported
module gets a type_struct IP entry when first loaded via
loadImportZirFromPath, matching the Zig compiler's demand-driven
ensureFileAnalyzed → createFileRootStruct sequence. The root module's
struct type is created at the start of semaAnalyze.

For neghf2.zig (num_passing=4), the IP index gap shrinks from 864 to
862 (root struct + common.zig struct created lazily during cross-module
call resolution).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Msrc/verbose_air.zig | 13++++++++++++-
Msrc/verbose_air_gen.zig | 4++++
Mstage0/CLAUDE.md | 24++++++++++--------------
Mstage0/sema.c | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mstage0/sema_test.zig | 12+++++++++---
5 files changed, 115 insertions(+), 18 deletions(-)

diff --git a/src/verbose_air.zig b/src/verbose_air.zig @@ -38,6 +38,7 @@ const AirCollector = struct { funcs: std.ArrayListUnmanaged(CSemaFuncAir) = .empty, err_buf: *[err_buf_size]u8, callback_count: u32 = 0, + comp: ?*Compilation = null, fn hasError(self: *const AirCollector) bool { return self.err_buf[0] != 0; @@ -97,9 +98,18 @@ const AirCollector = struct { // Copy name const name_copy = try gpa.dupeZ(u8, name); + // Compute IP items count from the Compilation's intern pool. + var ip_count: u32 = 0; + if (self.comp) |comp| { + if (comp.zcu) |zcu| { + for (zcu.intern_pool.locals) |*local| + ip_count += @intCast(local.mutate.items.len); + } + } + try self.funcs.append(gpa, .{ .name = name_copy.ptr, - .func_ip = std.math.maxInt(u32), // IP_INDEX_NONE + .func_ip = ip_count, .air = .{ .inst_len = inst_len, .inst_cap = inst_len, @@ -273,6 +283,7 @@ pub fn zigCompileAirImpl( else => return err, }; defer comp.destroy(); + collector.comp = comp; try comp.update(std.Progress.Node.none); diff --git a/src/verbose_air_gen.zig b/src/verbose_air_gen.zig @@ -163,6 +163,7 @@ pub fn main() !void { /// Per function: /// name_len: u32 /// name: [name_len]u8 +/// func_ip: u32 /// inst_len: u32 /// inst_tags: [inst_len]u8 /// inst_datas: [inst_len * 8]u8 @@ -213,6 +214,9 @@ fn processSource(gpa: std.mem.Allocator, src_path: []const u8, module_root: []co try w.writeInt(u32, @intCast(name.len), .little); try w.writeAll(name); + // func_ip + try w.writeInt(u32, f.func_ip, .little); + // inst_tags + inst_datas const inst_len = f.air.inst_len; try w.writeInt(u32, inst_len, .little); diff --git a/stage0/CLAUDE.md b/stage0/CLAUDE.md @@ -60,23 +60,19 @@ entries so that IP indices in the function body AIR match. - The Zig compiler creates IP entries through `createFileRootStruct` (in `src/Zcu/PerThread.zig`) and `scanNamespace`. These must be ported to C. -- Source of truth for the IP entry sequence: add a temporary debug print - in `src/Zcu/PerThread.zig` at the `verbose_air_callback` call site - (around line 4478) to dump `ip.locals[0].shared.items` for the - function being debugged. Build with `zig build test-zig0` to compile - a new `air_gen`, then run it directly on the target file. **Always - revert** the debug print before committing. +- The IP items count at function analysis time is embedded in the + `.air` binary format as `func_ip`. When a test mismatch occurs, it + is displayed as `[zig_ip_base=N]` in the error output. ### The module-system porting loop -1. **Dump the Zig IP.** Temporarily add debug output in - `src/Zcu/PerThread.zig` at the `verbose_air_callback` site. - Rebuild, run the `air_gen` binary directly on the target corpus - file (e.g. `lib/compiler_rt/neghf2.zig`), capture the IP entries. - Revert the debug print. -2. **Compare.** Run `zig build test-zig0` with `num_passing` bumped. - Note the mismatch: `a=0x???[ip] b=0x???[ip]`. The gap `a − b` is - the number of missing IP entries. +1. **Read `func_ip` from mismatch output.** Run `zig build test-zig0` + with `num_passing` bumped. The mismatch message includes + `[zig_ip_base=N]` — this is the Zig compiler's IP items count at + function analysis time. The gap between `a` and `b` IP refs tells + you how many entries the C sema is missing. +2. **Compare.** Note the mismatch: `a=0x???[ip] b=0x???[ip]`. The gap + `a − b` is the number of missing IP entries. 3. **Port the next batch.** Identify what the Zig compiler creates for the next ~10 IP entries (struct types, ptr_nav, enum types, etc.). Port the corresponding logic from `src/Zcu/PerThread.zig` and diff --git a/stage0/sema.c b/stage0/sema.c @@ -34,6 +34,21 @@ static uint32_t simpleStringHash(const char* s) { #define MAX_EXPORTED_DECL_NAMES 16 static uint32_t s_exported_decl_names[MAX_EXPORTED_DECL_NAMES]; +// --- Module-level IP entry tracking --- +// Tracks loaded modules to create struct type IP entries matching the +// Zig compiler's module-level analysis. Each imported file module gets +// a type_struct IP entry, created lazily when the module is first +// accessed during semantic analysis. +#define MAX_LOADED_MODULES 256 +typedef struct { + char path[1024]; // canonical file path +} LoadedModule; + +static LoadedModule s_loaded_modules[MAX_LOADED_MODULES]; +static uint32_t s_num_loaded_modules; +static uint32_t s_next_struct_hash; // unique hash counter for struct types +static InternPool* s_module_ip; // IP for struct type creation + void semaInit(Sema* sema, InternPool* ip, Zir code) { memset(sema, 0, sizeof(*sema)); sema->ip = ip; @@ -1719,6 +1734,8 @@ static FuncZirInfo parseFuncZir(Sema* sema, uint32_t inst) { // Forward declaration (defined later, used by findDeclImportPath et al). static uint32_t findDeclInstByNameInZir(const Zir* zir, const char* decl_name); +// Forward declaration (defined after loadStdImportZir). +static InternPoolIndex ensureModuleStructType(const char* full_path); // findDeclImportPath: given a declaration name index, check if the // declaration's value body contains a ZIR_INST_IMPORT. If so, return @@ -1841,6 +1858,10 @@ static Zir loadImportZirFromPath(const char* full_path, Ast* out_ast) { fclose(f); src[read_len] = '\0'; + // Lazily create a struct type for this module (matches the Zig + // compiler's ensureFileAnalyzed → createFileRootStruct flow). + (void)ensureModuleStructType(full_path); + // Parse. *out_ast = astParse(src, (uint32_t)read_len); out_ast->owns_source = true; @@ -2145,6 +2166,49 @@ static Zir loadStdImportZir(const char* module_root, const char* source_dir, return zir; } +// --- Module-level struct type creation --- +// Creates struct type IP entries for imported modules, matching the Zig +// compiler's createFileRootStruct / ensureFileAnalyzed sequence. + +// Create a struct type IP entry for a new module. +// Each module gets a unique struct type in the InternPool. +static InternPoolIndex createModuleStructType(void) { + InternPoolKey key; + memset(&key, 0, sizeof(key)); + key.tag = IP_KEY_STRUCT_TYPE; + key.data.struct_type = s_next_struct_hash++; + return ipIntern(s_module_ip, key); +} + +// Lazily ensure a struct type IP entry exists for a module identified +// by its filesystem path. Called when an import is first resolved +// during semantic analysis, matching the Zig compiler's demand-driven +// ensureFileAnalyzed / createFileRootStruct sequence. +// Returns the IP index of the struct type, or IP_INDEX_NONE if the +// module table is full. +static InternPoolIndex ensureModuleStructType(const char* full_path) { + if (!s_module_ip) + return IP_INDEX_NONE; + // Check if already tracked. + for (uint32_t i = 0; i < s_num_loaded_modules; i++) { + if (strcmp(s_loaded_modules[i].path, full_path) == 0) + return IP_INDEX_NONE; // already created, no new entry + } + if (s_num_loaded_modules >= MAX_LOADED_MODULES) + return IP_INDEX_NONE; + // Record module and create struct type. + LoadedModule* mod = &s_loaded_modules[s_num_loaded_modules++]; + snprintf(mod->path, sizeof(mod->path), "%s", full_path); + return createModuleStructType(); +} + +// Reset module tracking state. +static void resetModuleTracking(void) { + s_num_loaded_modules = 0; + s_next_struct_hash = 0; + s_module_ip = NULL; +} + // populateDeclTableFromZir: populate sema's decl table from a ZIR module. // Scans the struct_decl at instruction 0 and records name→inst mappings. // Used during cross-module inline calls so that decl_val/decl_ref can @@ -8427,6 +8491,19 @@ SemaFuncAirList semaAnalyze(Sema* sema) { SemaBlock root_block; semaBlockInit(&root_block, sema, NULL); + // --- Module-level IP entry creation --- + // Initialize module tracking. Struct type IP entries are created + // lazily as imports are resolved during semantic analysis, matching + // the Zig compiler's demand-driven processing order. + resetModuleTracking(); + s_module_ip = ip; + + // Create struct type for the root module (matches Zig's + // createFileRootStruct called from semaFile). + if (sema->code.inst_len > 0 && sema->source_dir) { + (void)createModuleStructType(); + } + // If we have ZIR instructions, attempt to analyze the main struct // body. Instruction 0 is always struct_decl (extended). if (sema->code.inst_len > 0) { @@ -8441,6 +8518,9 @@ SemaFuncAirList semaAnalyze(Sema* sema) { semaBlockDeinit(&root_block); sema->func_air_list = NULL; + // Clean up module tracking. + resetModuleTracking(); + // Free the module-level Air arrays from sema (they're empty for // comptime-only modules but allocated). free(sema->air_inst_tags); diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig @@ -232,6 +232,7 @@ const air_tag_names = @import("air_tag_names"); /// corresponding pointer is undefined and must not be dereferenced. pub const PrecomputedFunc = struct { name: []const u8, + func_ip: u32, inst_len: u32, tags: [*]const u8, datas: [*]const u8, @@ -246,6 +247,7 @@ pub const PrecomputedFunc = struct { /// Per function: /// name_len: u32 /// name: [name_len]u8 +/// func_ip: u32 /// inst_len: u32 /// inst_tags: [inst_len]u8 /// inst_datas: [inst_len * 8]u8 @@ -265,6 +267,9 @@ pub fn parsePrecomputedAir(data: []const u8) ![]PrecomputedFunc { f.name = data[pos..][0..name_len]; pos += name_len; + // func_ip + f.func_ip = readU32(data, &pos) orelse return error.InvalidAirData; + // inst_tags + inst_datas — point directly into data const inst_len = readU32(data, &pos) orelse return error.InvalidAirData; f.inst_len = inst_len; @@ -320,7 +325,7 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li return error.AirMismatch; }; const c_pf = precomputedFromCAir(cf); - try airCompareOne(c_name, pf.*, c_pf); + try airCompareOne(c_name, pf.*, c_pf, pf.func_ip); } // Unidirectional comparison: every C function must exist in Zig AIR // and match. Zig can have extra functions (e.g. from test blocks or @@ -330,6 +335,7 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc { return .{ .name = if (cf.name) |n| std.mem.span(n) else "", + .func_ip = cf.func_ip, .inst_len = cf.air.inst_len, .tags = if (cToOpt(u8, cf.air.inst_tags)) |t| t else undefined, .datas = if (cToOpt(c.AirInstData, cf.air.inst_datas)) |d| @ptrCast(d) else undefined, @@ -722,7 +728,7 @@ fn normalizeNtsPadding(extra: []u32, nts_index: u32) void { } } -fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc) !void { +fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc, zig_ip_base: u32) !void { if (a.inst_len != b.inst_len) { std.debug.print("'{s}': inst_len mismatch: a={d} b={d}\n", .{ name, a.inst_len, b.inst_len }); if (a.inst_len > 0) { @@ -778,7 +784,7 @@ fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc) !void // This slot is a Ref — compare directly (C and Zig // IP indices must match). if (a_word != b_word) { - std.debug.print("'{s}': datas ref mismatch at inst[{d}] slot {d}: a=0x{x}[{s}] b=0x{x}[{s}] (tag={s})\n", .{ name, j, slot, a_word, refKindStr(a_word), b_word, refKindStr(b_word), airTagNameSlice(tag_val) }); + std.debug.print("'{s}': datas ref mismatch at inst[{d}] slot {d}: a=0x{x}[{s}] b=0x{x}[{s}] (tag={s}) [zig_ip_base={d}]\n", .{ name, j, slot, a_word, refKindStr(a_word), b_word, refKindStr(b_word), airTagNameSlice(tag_val), zig_ip_base }); return error.AirMismatch; } } else {