sema: embed func_ip in .air format, add lazy module struct types
Embed the Zig compiler's IP items count (func_ip) in the .air binary format so that test mismatch errors show [zig_ip_base=N], eliminating the need for temporary debug prints in src/Zcu/PerThread.zig. Add lazy module-level struct type creation in the C sema: each imported module gets a type_struct IP entry when first loaded via loadImportZirFromPath, matching the Zig compiler's demand-driven ensureFileAnalyzed → createFileRootStruct sequence. The root module's struct type is created at the start of semaAnalyze. For neghf2.zig (num_passing=4), the IP index gap shrinks from 864 to 862 (root struct + common.zig struct created lazily during cross-module call resolution). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -38,6 +38,7 @@ const AirCollector = struct {
|
||||
funcs: std.ArrayListUnmanaged(CSemaFuncAir) = .empty,
|
||||
err_buf: *[err_buf_size]u8,
|
||||
callback_count: u32 = 0,
|
||||
comp: ?*Compilation = null,
|
||||
|
||||
fn hasError(self: *const AirCollector) bool {
|
||||
return self.err_buf[0] != 0;
|
||||
@@ -97,9 +98,18 @@ const AirCollector = struct {
|
||||
// Copy name
|
||||
const name_copy = try gpa.dupeZ(u8, name);
|
||||
|
||||
// Compute IP items count from the Compilation's intern pool.
|
||||
var ip_count: u32 = 0;
|
||||
if (self.comp) |comp| {
|
||||
if (comp.zcu) |zcu| {
|
||||
for (zcu.intern_pool.locals) |*local|
|
||||
ip_count += @intCast(local.mutate.items.len);
|
||||
}
|
||||
}
|
||||
|
||||
try self.funcs.append(gpa, .{
|
||||
.name = name_copy.ptr,
|
||||
.func_ip = std.math.maxInt(u32), // IP_INDEX_NONE
|
||||
.func_ip = ip_count,
|
||||
.air = .{
|
||||
.inst_len = inst_len,
|
||||
.inst_cap = inst_len,
|
||||
@@ -273,6 +283,7 @@ pub fn zigCompileAirImpl(
|
||||
else => return err,
|
||||
};
|
||||
defer comp.destroy();
|
||||
collector.comp = comp;
|
||||
|
||||
try comp.update(std.Progress.Node.none);
|
||||
|
||||
|
||||
@@ -163,6 +163,7 @@ pub fn main() !void {
|
||||
/// Per function:
|
||||
/// name_len: u32
|
||||
/// name: [name_len]u8
|
||||
/// func_ip: u32
|
||||
/// inst_len: u32
|
||||
/// inst_tags: [inst_len]u8
|
||||
/// inst_datas: [inst_len * 8]u8
|
||||
@@ -213,6 +214,9 @@ fn processSource(gpa: std.mem.Allocator, src_path: []const u8, module_root: []co
|
||||
try w.writeInt(u32, @intCast(name.len), .little);
|
||||
try w.writeAll(name);
|
||||
|
||||
// func_ip
|
||||
try w.writeInt(u32, f.func_ip, .little);
|
||||
|
||||
// inst_tags + inst_datas
|
||||
const inst_len = f.air.inst_len;
|
||||
try w.writeInt(u32, inst_len, .little);
|
||||
|
||||
@@ -60,23 +60,19 @@ entries so that IP indices in the function body AIR match.
|
||||
- The Zig compiler creates IP entries through `createFileRootStruct`
|
||||
(in `src/Zcu/PerThread.zig`) and `scanNamespace`. These must be
|
||||
ported to C.
|
||||
- Source of truth for the IP entry sequence: add a temporary debug print
|
||||
in `src/Zcu/PerThread.zig` at the `verbose_air_callback` call site
|
||||
(around line 4478) to dump `ip.locals[0].shared.items` for the
|
||||
function being debugged. Build with `zig build test-zig0` to compile
|
||||
a new `air_gen`, then run it directly on the target file. **Always
|
||||
revert** the debug print before committing.
|
||||
- The IP items count at function analysis time is embedded in the
|
||||
`.air` binary format as `func_ip`. When a test mismatch occurs, it
|
||||
is displayed as `[zig_ip_base=N]` in the error output.
|
||||
|
||||
### The module-system porting loop
|
||||
|
||||
1. **Dump the Zig IP.** Temporarily add debug output in
|
||||
`src/Zcu/PerThread.zig` at the `verbose_air_callback` site.
|
||||
Rebuild, run the `air_gen` binary directly on the target corpus
|
||||
file (e.g. `lib/compiler_rt/neghf2.zig`), capture the IP entries.
|
||||
Revert the debug print.
|
||||
2. **Compare.** Run `zig build test-zig0` with `num_passing` bumped.
|
||||
Note the mismatch: `a=0x???[ip] b=0x???[ip]`. The gap `a − b` is
|
||||
the number of missing IP entries.
|
||||
1. **Read `func_ip` from mismatch output.** Run `zig build test-zig0`
|
||||
with `num_passing` bumped. The mismatch message includes
|
||||
`[zig_ip_base=N]` — this is the Zig compiler's IP items count at
|
||||
function analysis time. The gap between `a` and `b` IP refs tells
|
||||
you how many entries the C sema is missing.
|
||||
2. **Compare.** Note the mismatch: `a=0x???[ip] b=0x???[ip]`. The gap
|
||||
`a − b` is the number of missing IP entries.
|
||||
3. **Port the next batch.** Identify what the Zig compiler creates for
|
||||
the next ~10 IP entries (struct types, ptr_nav, enum types, etc.).
|
||||
Port the corresponding logic from `src/Zcu/PerThread.zig` and
|
||||
|
||||
@@ -34,6 +34,21 @@ static uint32_t simpleStringHash(const char* s) {
|
||||
#define MAX_EXPORTED_DECL_NAMES 16
|
||||
static uint32_t s_exported_decl_names[MAX_EXPORTED_DECL_NAMES];
|
||||
|
||||
// --- Module-level IP entry tracking ---
|
||||
// Tracks loaded modules to create struct type IP entries matching the
|
||||
// Zig compiler's module-level analysis. Each imported file module gets
|
||||
// a type_struct IP entry, created lazily when the module is first
|
||||
// accessed during semantic analysis.
|
||||
#define MAX_LOADED_MODULES 256
|
||||
typedef struct {
|
||||
char path[1024]; // canonical file path
|
||||
} LoadedModule;
|
||||
|
||||
static LoadedModule s_loaded_modules[MAX_LOADED_MODULES];
|
||||
static uint32_t s_num_loaded_modules;
|
||||
static uint32_t s_next_struct_hash; // unique hash counter for struct types
|
||||
static InternPool* s_module_ip; // IP for struct type creation
|
||||
|
||||
void semaInit(Sema* sema, InternPool* ip, Zir code) {
|
||||
memset(sema, 0, sizeof(*sema));
|
||||
sema->ip = ip;
|
||||
@@ -1719,6 +1734,8 @@ static FuncZirInfo parseFuncZir(Sema* sema, uint32_t inst) {
|
||||
|
||||
// Forward declaration (defined later, used by findDeclImportPath et al).
|
||||
static uint32_t findDeclInstByNameInZir(const Zir* zir, const char* decl_name);
|
||||
// Forward declaration (defined after loadStdImportZir).
|
||||
static InternPoolIndex ensureModuleStructType(const char* full_path);
|
||||
|
||||
// findDeclImportPath: given a declaration name index, check if the
|
||||
// declaration's value body contains a ZIR_INST_IMPORT. If so, return
|
||||
@@ -1841,6 +1858,10 @@ static Zir loadImportZirFromPath(const char* full_path, Ast* out_ast) {
|
||||
fclose(f);
|
||||
src[read_len] = '\0';
|
||||
|
||||
// Lazily create a struct type for this module (matches the Zig
|
||||
// compiler's ensureFileAnalyzed → createFileRootStruct flow).
|
||||
(void)ensureModuleStructType(full_path);
|
||||
|
||||
// Parse.
|
||||
*out_ast = astParse(src, (uint32_t)read_len);
|
||||
out_ast->owns_source = true;
|
||||
@@ -2145,6 +2166,49 @@ static Zir loadStdImportZir(const char* module_root, const char* source_dir,
|
||||
return zir;
|
||||
}
|
||||
|
||||
// --- Module-level struct type creation ---
|
||||
// Creates struct type IP entries for imported modules, matching the Zig
|
||||
// compiler's createFileRootStruct / ensureFileAnalyzed sequence.
|
||||
|
||||
// Create a struct type IP entry for a new module.
|
||||
// Each module gets a unique struct type in the InternPool.
|
||||
static InternPoolIndex createModuleStructType(void) {
|
||||
InternPoolKey key;
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.tag = IP_KEY_STRUCT_TYPE;
|
||||
key.data.struct_type = s_next_struct_hash++;
|
||||
return ipIntern(s_module_ip, key);
|
||||
}
|
||||
|
||||
// Lazily ensure a struct type IP entry exists for a module identified
|
||||
// by its filesystem path. Called when an import is first resolved
|
||||
// during semantic analysis, matching the Zig compiler's demand-driven
|
||||
// ensureFileAnalyzed / createFileRootStruct sequence.
|
||||
// Returns the IP index of the struct type, or IP_INDEX_NONE if the
|
||||
// module table is full.
|
||||
static InternPoolIndex ensureModuleStructType(const char* full_path) {
|
||||
if (!s_module_ip)
|
||||
return IP_INDEX_NONE;
|
||||
// Check if already tracked.
|
||||
for (uint32_t i = 0; i < s_num_loaded_modules; i++) {
|
||||
if (strcmp(s_loaded_modules[i].path, full_path) == 0)
|
||||
return IP_INDEX_NONE; // already created, no new entry
|
||||
}
|
||||
if (s_num_loaded_modules >= MAX_LOADED_MODULES)
|
||||
return IP_INDEX_NONE;
|
||||
// Record module and create struct type.
|
||||
LoadedModule* mod = &s_loaded_modules[s_num_loaded_modules++];
|
||||
snprintf(mod->path, sizeof(mod->path), "%s", full_path);
|
||||
return createModuleStructType();
|
||||
}
|
||||
|
||||
// Reset module tracking state.
|
||||
static void resetModuleTracking(void) {
|
||||
s_num_loaded_modules = 0;
|
||||
s_next_struct_hash = 0;
|
||||
s_module_ip = NULL;
|
||||
}
|
||||
|
||||
// populateDeclTableFromZir: populate sema's decl table from a ZIR module.
|
||||
// Scans the struct_decl at instruction 0 and records name→inst mappings.
|
||||
// Used during cross-module inline calls so that decl_val/decl_ref can
|
||||
@@ -8427,6 +8491,19 @@ SemaFuncAirList semaAnalyze(Sema* sema) {
|
||||
SemaBlock root_block;
|
||||
semaBlockInit(&root_block, sema, NULL);
|
||||
|
||||
// --- Module-level IP entry creation ---
|
||||
// Initialize module tracking. Struct type IP entries are created
|
||||
// lazily as imports are resolved during semantic analysis, matching
|
||||
// the Zig compiler's demand-driven processing order.
|
||||
resetModuleTracking();
|
||||
s_module_ip = ip;
|
||||
|
||||
// Create struct type for the root module (matches Zig's
|
||||
// createFileRootStruct called from semaFile).
|
||||
if (sema->code.inst_len > 0 && sema->source_dir) {
|
||||
(void)createModuleStructType();
|
||||
}
|
||||
|
||||
// If we have ZIR instructions, attempt to analyze the main struct
|
||||
// body. Instruction 0 is always struct_decl (extended).
|
||||
if (sema->code.inst_len > 0) {
|
||||
@@ -8441,6 +8518,9 @@ SemaFuncAirList semaAnalyze(Sema* sema) {
|
||||
semaBlockDeinit(&root_block);
|
||||
sema->func_air_list = NULL;
|
||||
|
||||
// Clean up module tracking.
|
||||
resetModuleTracking();
|
||||
|
||||
// Free the module-level Air arrays from sema (they're empty for
|
||||
// comptime-only modules but allocated).
|
||||
free(sema->air_inst_tags);
|
||||
|
||||
@@ -232,6 +232,7 @@ const air_tag_names = @import("air_tag_names");
|
||||
/// corresponding pointer is undefined and must not be dereferenced.
|
||||
pub const PrecomputedFunc = struct {
|
||||
name: []const u8,
|
||||
func_ip: u32,
|
||||
inst_len: u32,
|
||||
tags: [*]const u8,
|
||||
datas: [*]const u8,
|
||||
@@ -246,6 +247,7 @@ pub const PrecomputedFunc = struct {
|
||||
/// Per function:
|
||||
/// name_len: u32
|
||||
/// name: [name_len]u8
|
||||
/// func_ip: u32
|
||||
/// inst_len: u32
|
||||
/// inst_tags: [inst_len]u8
|
||||
/// inst_datas: [inst_len * 8]u8
|
||||
@@ -265,6 +267,9 @@ pub fn parsePrecomputedAir(data: []const u8) ![]PrecomputedFunc {
|
||||
f.name = data[pos..][0..name_len];
|
||||
pos += name_len;
|
||||
|
||||
// func_ip
|
||||
f.func_ip = readU32(data, &pos) orelse return error.InvalidAirData;
|
||||
|
||||
// inst_tags + inst_datas — point directly into data
|
||||
const inst_len = readU32(data, &pos) orelse return error.InvalidAirData;
|
||||
f.inst_len = inst_len;
|
||||
@@ -320,7 +325,7 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li
|
||||
return error.AirMismatch;
|
||||
};
|
||||
const c_pf = precomputedFromCAir(cf);
|
||||
try airCompareOne(c_name, pf.*, c_pf);
|
||||
try airCompareOne(c_name, pf.*, c_pf, pf.func_ip);
|
||||
}
|
||||
// Unidirectional comparison: every C function must exist in Zig AIR
|
||||
// and match. Zig can have extra functions (e.g. from test blocks or
|
||||
@@ -330,6 +335,7 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li
|
||||
fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc {
|
||||
return .{
|
||||
.name = if (cf.name) |n| std.mem.span(n) else "",
|
||||
.func_ip = cf.func_ip,
|
||||
.inst_len = cf.air.inst_len,
|
||||
.tags = if (cToOpt(u8, cf.air.inst_tags)) |t| t else undefined,
|
||||
.datas = if (cToOpt(c.AirInstData, cf.air.inst_datas)) |d| @ptrCast(d) else undefined,
|
||||
@@ -722,7 +728,7 @@ fn normalizeNtsPadding(extra: []u32, nts_index: u32) void {
|
||||
}
|
||||
}
|
||||
|
||||
fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc) !void {
|
||||
fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc, zig_ip_base: u32) !void {
|
||||
if (a.inst_len != b.inst_len) {
|
||||
std.debug.print("'{s}': inst_len mismatch: a={d} b={d}\n", .{ name, a.inst_len, b.inst_len });
|
||||
if (a.inst_len > 0) {
|
||||
@@ -778,7 +784,7 @@ fn airCompareOne(name: []const u8, a: PrecomputedFunc, b: PrecomputedFunc) !void
|
||||
// This slot is a Ref — compare directly (C and Zig
|
||||
// IP indices must match).
|
||||
if (a_word != b_word) {
|
||||
std.debug.print("'{s}': datas ref mismatch at inst[{d}] slot {d}: a=0x{x}[{s}] b=0x{x}[{s}] (tag={s})\n", .{ name, j, slot, a_word, refKindStr(a_word), b_word, refKindStr(b_word), airTagNameSlice(tag_val) });
|
||||
std.debug.print("'{s}': datas ref mismatch at inst[{d}] slot {d}: a=0x{x}[{s}] b=0x{x}[{s}] (tag={s}) [zig_ip_base={d}]\n", .{ name, j, slot, a_word, refKindStr(a_word), b_word, refKindStr(b_word), airTagNameSlice(tag_val), zig_ip_base });
|
||||
return error.AirMismatch;
|
||||
}
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user