sema: embed func_ip in .air format, add lazy module struct types

Embed the Zig compiler's IP items count (func_ip) in the .air binary
format so that test mismatch errors show [zig_ip_base=N], eliminating
the need for temporary debug prints in src/Zcu/PerThread.zig.

Add lazy module-level struct type creation in the C sema: each imported
module gets a type_struct IP entry when first loaded via
loadImportZirFromPath, matching the Zig compiler's demand-driven
ensureFileAnalyzed → createFileRootStruct sequence. The root module's
struct type is created at the start of semaAnalyze.

For neghf2.zig (num_passing=4), the IP index gap shrinks from 864 to
862 (root struct + common.zig struct created lazily during cross-module
call resolution).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-02-25 18:43:03 +00:00
parent 40c55b93aa
commit 09eac343fb
5 changed files with 115 additions and 18 deletions

View File

@@ -38,6 +38,7 @@ const AirCollector = struct {
funcs: std.ArrayListUnmanaged(CSemaFuncAir) = .empty,
err_buf: *[err_buf_size]u8,
callback_count: u32 = 0,
comp: ?*Compilation = null,
fn hasError(self: *const AirCollector) bool {
return self.err_buf[0] != 0;
@@ -97,9 +98,18 @@ const AirCollector = struct {
// Copy name
const name_copy = try gpa.dupeZ(u8, name);
// Compute IP items count from the Compilation's intern pool.
var ip_count: u32 = 0;
if (self.comp) |comp| {
if (comp.zcu) |zcu| {
for (zcu.intern_pool.locals) |*local|
ip_count += @intCast(local.mutate.items.len);
}
}
try self.funcs.append(gpa, .{
.name = name_copy.ptr,
.func_ip = std.math.maxInt(u32), // IP_INDEX_NONE
.func_ip = ip_count,
.air = .{
.inst_len = inst_len,
.inst_cap = inst_len,
@@ -273,6 +283,7 @@ pub fn zigCompileAirImpl(
else => return err,
};
defer comp.destroy();
collector.comp = comp;
try comp.update(std.Progress.Node.none);

View File

@@ -163,6 +163,7 @@ pub fn main() !void {
/// Per function:
/// name_len: u32
/// name: [name_len]u8
/// func_ip: u32
/// inst_len: u32
/// inst_tags: [inst_len]u8
/// inst_datas: [inst_len * 8]u8
@@ -213,6 +214,9 @@ fn processSource(gpa: std.mem.Allocator, src_path: []const u8, module_root: []co
try w.writeInt(u32, @intCast(name.len), .little);
try w.writeAll(name);
// func_ip
try w.writeInt(u32, f.func_ip, .little);
// inst_tags + inst_datas
const inst_len = f.air.inst_len;
try w.writeInt(u32, inst_len, .little);

View File

@@ -60,23 +60,19 @@ entries so that IP indices in the function body AIR match.
- The Zig compiler creates IP entries through `createFileRootStruct`
(in `src/Zcu/PerThread.zig`) and `scanNamespace`. These must be
ported to C.
- Source of truth for the IP entry sequence: add a temporary debug print
in `src/Zcu/PerThread.zig` at the `verbose_air_callback` call site
(around line 4478) to dump `ip.locals[0].shared.items` for the
function being debugged. Build with `zig build test-zig0` to compile
a new `air_gen`, then run it directly on the target file. **Always
revert** the debug print before committing.
- The IP items count at function analysis time is embedded in the
`.air` binary format as `func_ip`. When a test mismatch occurs, it
is displayed as `[zig_ip_base=N]` in the error output.
### The module-system porting loop
1. **Dump the Zig IP.** Temporarily add debug output in
`src/Zcu/PerThread.zig` at the `verbose_air_callback` site.
Rebuild, run the `air_gen` binary directly on the target corpus
file (e.g. `lib/compiler_rt/neghf2.zig`), capture the IP entries.
Revert the debug print.
2. **Compare.** Run `zig build test-zig0` with `num_passing` bumped.
Note the mismatch: `a=0x???[ip] b=0x???[ip]`. The gap `a b` is
the number of missing IP entries.
1. **Read `func_ip` from mismatch output.** Run `zig build test-zig0`
with `num_passing` bumped. The mismatch message includes
`[zig_ip_base=N]` — this is the Zig compiler's IP items count at
function analysis time. The gap between `a` and `b` IP refs tells
you how many entries the C sema is missing.
2. **Compare.** Note the mismatch: `a=0x???[ip] b=0x???[ip]`. The gap
`a b` is the number of missing IP entries.
3. **Port the next batch.** Identify what the Zig compiler creates for
the next ~10 IP entries (struct types, ptr_nav, enum types, etc.).
Port the corresponding logic from `src/Zcu/PerThread.zig` and

View File

@@ -34,6 +34,21 @@ static uint32_t simpleStringHash(const char* s) {
#define MAX_EXPORTED_DECL_NAMES 16
static uint32_t s_exported_decl_names[MAX_EXPORTED_DECL_NAMES];
// --- Module-level IP entry tracking ---
// Tracks loaded modules to create struct type IP entries matching the
// Zig compiler's module-level analysis. Each imported file module gets
// a type_struct IP entry, created lazily when the module is first
// accessed during semantic analysis.
#define MAX_LOADED_MODULES 256
typedef struct {
char path[1024]; // canonical file path
} LoadedModule;
static LoadedModule s_loaded_modules[MAX_LOADED_MODULES];
static uint32_t s_num_loaded_modules;
static uint32_t s_next_struct_hash; // unique hash counter for struct types
static InternPool* s_module_ip; // IP for struct type creation
void semaInit(Sema* sema, InternPool* ip, Zir code) {
memset(sema, 0, sizeof(*sema));
sema->ip = ip;
@@ -1719,6 +1734,8 @@ static FuncZirInfo parseFuncZir(Sema* sema, uint32_t inst) {
// Forward declaration (defined later, used by findDeclImportPath et al).
static uint32_t findDeclInstByNameInZir(const Zir* zir, const char* decl_name);
// Forward declaration (defined after loadStdImportZir).
static InternPoolIndex ensureModuleStructType(const char* full_path);
// findDeclImportPath: given a declaration name index, check if the
// declaration's value body contains a ZIR_INST_IMPORT. If so, return
@@ -1841,6 +1858,10 @@ static Zir loadImportZirFromPath(const char* full_path, Ast* out_ast) {
fclose(f);
src[read_len] = '\0';
// Lazily create a struct type for this module (matches the Zig
// compiler's ensureFileAnalyzed → createFileRootStruct flow).
(void)ensureModuleStructType(full_path);
// Parse.
*out_ast = astParse(src, (uint32_t)read_len);
out_ast->owns_source = true;
@@ -2145,6 +2166,49 @@ static Zir loadStdImportZir(const char* module_root, const char* source_dir,
return zir;
}
// --- Module-level struct type creation ---
// Creates struct type IP entries for imported modules, matching the Zig
// compiler's createFileRootStruct / ensureFileAnalyzed sequence.
// Create a struct type IP entry for a new module.
// Each module gets a unique struct type in the InternPool.
static InternPoolIndex createModuleStructType(void) {
InternPoolKey key;
memset(&key, 0, sizeof(key));
key.tag = IP_KEY_STRUCT_TYPE;
key.data.struct_type = s_next_struct_hash++;
return ipIntern(s_module_ip, key);
}
// Lazily ensure a struct type IP entry exists for a module identified
// by its filesystem path. Called when an import is first resolved
// during semantic analysis, matching the Zig compiler's demand-driven
// ensureFileAnalyzed / createFileRootStruct sequence.
// Returns the IP index of the struct type, or IP_INDEX_NONE if the
// module table is full.
static InternPoolIndex ensureModuleStructType(const char* full_path) {
if (!s_module_ip)
return IP_INDEX_NONE;
// Check if already tracked.
for (uint32_t i = 0; i < s_num_loaded_modules; i++) {
if (strcmp(s_loaded_modules[i].path, full_path) == 0)
return IP_INDEX_NONE; // already created, no new entry
}
if (s_num_loaded_modules >= MAX_LOADED_MODULES)
return IP_INDEX_NONE;
// Record module and create struct type.
LoadedModule* mod = &s_loaded_modules[s_num_loaded_modules++];
snprintf(mod->path, sizeof(mod->path), "%s", full_path);
return createModuleStructType();
}
// Reset module tracking state.
static void resetModuleTracking(void) {
s_num_loaded_modules = 0;
s_next_struct_hash = 0;
s_module_ip = NULL;
}
// populateDeclTableFromZir: populate sema's decl table from a ZIR module.
// Scans the struct_decl at instruction 0 and records name→inst mappings.
// Used during cross-module inline calls so that decl_val/decl_ref can
@@ -8427,6 +8491,19 @@ SemaFuncAirList semaAnalyze(Sema* sema) {
SemaBlock root_block;
semaBlockInit(&root_block, sema, NULL);
// --- Module-level IP entry creation ---
// Initialize module tracking. Struct type IP entries are created
// lazily as imports are resolved during semantic analysis, matching
// the Zig compiler's demand-driven processing order.
resetModuleTracking();
s_module_ip = ip;
// Create struct type for the root module (matches Zig's
// createFileRootStruct called from semaFile).
if (sema->code.inst_len > 0 && sema->source_dir) {
(void)createModuleStructType();
}
// If we have ZIR instructions, attempt to analyze the main struct
// body. Instruction 0 is always struct_decl (extended).
if (sema->code.inst_len > 0) {
@@ -8441,6 +8518,9 @@ SemaFuncAirList semaAnalyze(Sema* sema) {
semaBlockDeinit(&root_block);
sema->func_air_list = NULL;
// Clean up module tracking.
resetModuleTracking();
// Free the module-level Air arrays from sema (they're empty for
// comptime-only modules but allocated).
free(sema->air_inst_tags);

View File

@@ -232,6 +232,7 @@ const air_tag_names = @import("air_tag_names");
/// corresponding pointer is undefined and must not be dereferenced.
pub const PrecomputedFunc = struct {
name: []const u8,
func_ip: u32,
inst_len: u32,
tags: [*]const u8,
datas: [*]const u8,
@@ -246,6 +247,7 @@ pub const PrecomputedFunc = struct {
/// Per function:
/// name_len: u32
/// name: [name_len]u8
/// func_ip: u32
/// inst_len: u32
/// inst_tags: [inst_len]u8
/// inst_datas: [inst_len * 8]u8
@@ -265,6 +267,9 @@ pub fn parsePrecomputedAir(data: []const u8) ![]PrecomputedFunc {
f.name = data[pos..][0..name_len];
pos += name_len;
// func_ip
f.func_ip = readU32(data, &pos) orelse return error.InvalidAirData;
// inst_tags + inst_datas — point directly into data
const inst_len = readU32(data, &pos) orelse return error.InvalidAirData;
f.inst_len = inst_len;
@@ -320,7 +325,7 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li
return error.AirMismatch;
};
const c_pf = precomputedFromCAir(cf);
try airCompareOne(c_name, pf.*, c_pf);
try airCompareOne(c_name, pf.*, c_pf, pf.func_ip);
}
// Unidirectional comparison: every C function must exist in Zig AIR
// and match. Zig can have extra functions (e.g. from test blocks or
@@ -330,6 +335,7 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li
fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc {
return .{
.name = if (cf.name) |n| std.mem.span(n) else "",
.func_ip = cf.func_ip,
.inst_len = cf.air.inst_len,
.tags = if (cToOpt(u8, cf.air.inst_tags)) |t| t else undefined,
.datas = if (cToOpt(c.AirInstData, cf.air.inst_datas)) |d| @ptrCast(d) else undefined,
@@ -722,7 +728,7 @@ fn normalizeNtsPadding(extra: []u32, nts_index: u32) void {
}
}
fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc) !void {
fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc, zig_ip_base: u32) !void {
if (a.inst_len != b.inst_len) {
std.debug.print("'{s}': inst_len mismatch: a={d} b={d}\n", .{ name, a.inst_len, b.inst_len });
if (a.inst_len > 0) {
@@ -778,7 +784,7 @@ fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc) !void
// This slot is a Ref — compare directly (C and Zig
// IP indices must match).
if (a_word != b_word) {
std.debug.print("'{s}': datas ref mismatch at inst[{d}] slot {d}: a=0x{x}[{s}] b=0x{x}[{s}] (tag={s})\n", .{ name, j, slot, a_word, refKindStr(a_word), b_word, refKindStr(b_word), airTagNameSlice(tag_val) });
std.debug.print("'{s}': datas ref mismatch at inst[{d}] slot {d}: a=0x{x}[{s}] b=0x{x}[{s}] (tag={s}) [zig_ip_base={d}]\n", .{ name, j, slot, a_word, refKindStr(a_word), b_word, refKindStr(b_word), airTagNameSlice(tag_val), zig_ip_base });
return error.AirMismatch;
}
} else {