compile std as root + unidirectional AIR comparison + exact FQN matching

air_gen: replace per-file symlink workaround with two-pass compilation.
Pass 1 compiles lib/std/std.zig as root with use_root_as_std=true
(one compilation, all lib/std/ functions). Pass 2 compiles non-lib/std/
files standalone. Symlink workaround eliminated entirely.

build.zig: pass all corpus.files (not 0..num_passing) to air_gen,
skipping lib/std/ files. Bumping num_passing no longer invalidates
the air_gen cache.

air_data.zig: route lib/std/ paths to the combined std.zig.air file.

sema_test.zig: switch to unidirectional comparison (C→Zig only) and
exact FQN matching. Remove stripModulePrefix, bare-name fallback, and
unused cNameSpan. Add pathToModulePrefix and pathStem helpers.

sema.h/sema.c: add root_fqn, module_prefix, and is_test fields to
Sema struct. Function names use "{root_fqn}[.{prefix}].{name}" format
to match Zig's FQN convention.

stages_test.zig: set root_fqn and module_prefix on C sema so FQNs
match Zig's naming. Remove symlink workaround — C sema uses real
paths directly. Set is_test=false to match air_gen.

corpus.zig: remove lib/init/src/main.zig (template file with
@import(".NAME") that cannot compile standalone).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-02-25 07:38:35 +00:00
parent 9fa8232778
commit 7f58c766f8
8 changed files with 177 additions and 114 deletions

View File

@@ -1746,8 +1746,10 @@ fn addAirGen(
// Run generator: air_gen <output_dir> [<name> <resolved_path>]...
const gen_run = b.addRunArtifact(gen_exe);
const air_dir = gen_run.addOutputDirectoryArg("air");
// Add corpus files as name/path pairs.
for (corpus.files[0..corpus.num_passing]) |path| {
// Add non-lib/std/ corpus files as name/path pairs.
// lib/std/ files are compiled via std.zig root inside air_gen.
for (corpus.files) |path| {
if (std.mem.startsWith(u8, path, "lib/std/")) continue;
gen_run.addArg(path);
gen_run.addFileArg(b.path(path));
}

View File

@@ -126,6 +126,8 @@ export fn zig_compile_air(
std.mem.span(src_path_ptr),
if (module_root_ptr) |p| std.mem.span(p) else null,
err_buf,
false,
false,
) catch |err| {
setErr(err_buf, "{s}", .{@errorName(err)});
return .{ .items = null, .len = 0, .callback_count = 0 };
@@ -157,6 +159,8 @@ pub fn zigCompileAirImpl(
src_path: []const u8,
module_root_opt: ?[]const u8,
err_buf: *[err_buf_size]u8,
is_test: bool,
use_root_as_std: bool,
) !CompileAirResult {
const gpa = std.heap.c_allocator;
@@ -190,7 +194,7 @@ pub fn zigCompileAirImpl(
.resolved_target = resolved_target,
.have_zcu = true,
.emit_bin = true,
.is_test = false,
.is_test = is_test,
// Use the self-hosted wasm backend (not LLVM) so that error
// return tracing is disabled. This keeps the AIR minimal and
// avoids dependence on StackTrace type resolution, which the
@@ -253,6 +257,7 @@ pub fn zigCompileAirImpl(
.root_name = "test",
.config = config,
.root_mod = root_mod,
.std_mod = if (use_root_as_std) root_mod else null,
.emit_bin = .no,
.thread_pool = thread_pool,
.cache_mode = .incremental,

View File

@@ -47,49 +47,51 @@ pub fn main() !void {
break :blk std.fs.path.dirname(first_resolved) orelse ".";
};
// Create a symlink workaround so that files in lib/std/ are not seen as
// belonging to both 'root' and 'std' modules. We symlink repo_root into
// a temp directory and pass all paths through the symlink.
// The temp directory must be OUTSIDE the output_dir to avoid confusing the
// Zig build system's cache (it would follow the symlink back into the repo).
const tmp_dir_path = "/tmp/zig-air-gen";
// Pass 1: Compile lib/std/std.zig as root with is_test=true.
// This single compilation covers ALL lib/std/ files. The root_mod
// has root at lib/std/ with root_src_path="std.zig", and std_mod=root_mod
// so @import("std") == @This(). No symlink needed.
{
const std_root = try std.fmt.allocPrint(gpa, "{s}lib/std/", .{repo_root});
defer gpa.free(std_root);
const std_src = try std.fmt.allocPrint(gpa, "{s}lib/std/std.zig", .{repo_root});
defer gpa.free(std_src);
std.fs.makeDirAbsolute(tmp_dir_path) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => {
std.debug.print("error creating tmp directory: {s}\n", .{@errorName(err)});
const data = processSource(gpa, std_src, std_root, false, true) catch |err| {
std.debug.print("FAIL: lib/std/std.zig ({s}): {s}\n", .{ std_src, @errorName(err) });
return err;
},
};
var tmp_dir = std.fs.openDirAbsolute(tmp_dir_path, .{}) catch |err| {
std.debug.print("error opening tmp directory: {s}\n", .{@errorName(err)});
return err;
};
defer tmp_dir.close();
defer std.fs.deleteTreeAbsolute(tmp_dir_path) catch {};
};
defer gpa.free(data);
tmp_dir.symLink(repo_root, "root", .{ .is_directory = true }) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => {
std.debug.print("error creating symlink: {s}\n", .{@errorName(err)});
// Write the combined AIR data to lib/std/std.zig.air.
output_dir.makePath("lib/std") catch |err| {
std.debug.print("error creating directory 'lib/std': {s}\n", .{@errorName(err)});
return err;
},
};
};
var file = output_dir.createFile("lib/std/std.zig.air", .{}) catch |err| {
std.debug.print("error creating file 'lib/std/std.zig.air': {s}\n", .{@errorName(err)});
return err;
};
defer file.close();
file.writeAll(data) catch |err| {
std.debug.print("error writing file 'lib/std/std.zig.air': {s}\n", .{@errorName(err)});
return err;
};
}
// Pass 2: Compile non-lib/std/ files standalone (compiler_rt, sema_tests, etc.).
for (0..num_entries) |i| {
const name = pairs[i * 2];
const resolved = pairs[i * 2 + 1];
_ = resolved;
// Construct paths through the symlink to avoid the "file exists in
// modules 'root' and 'std'" error.
const src_path = try std.fmt.allocPrint(gpa, "{s}/root/{s}", .{ tmp_dir_path, name });
defer gpa.free(src_path);
const module_root = try std.fmt.allocPrint(gpa, "{s}/root", .{tmp_dir_path});
defer gpa.free(module_root);
// lib/std/ files are covered by the std.zig compilation above.
if (std.mem.startsWith(u8, name, "lib/std/")) continue;
const data = processSource(gpa, src_path, module_root) catch |err| {
std.debug.print("FAIL: {s} ({s}): {s}\n", .{ name, src_path, @errorName(err) });
const module_root = std.fs.path.dirname(resolved) orelse ".";
const data = processSource(gpa, resolved, module_root, false, false) catch |err| {
std.debug.print("FAIL: {s} ({s}): {s}\n", .{ name, resolved, @errorName(err) });
return err;
};
defer gpa.free(data);
@@ -143,7 +145,11 @@ pub fn main() !void {
defer file.close();
try file.writeAll(
\\// Generated by verbose_air_gen. Do not edit.
\\const std = @import("std");
\\pub fn getData(comptime path: []const u8) []const u8 {
\\ if (comptime std.mem.startsWith(u8, path, "lib/std/")) {
\\ return @embedFile("lib/std/std.zig.air");
\\ }
\\ return @embedFile(path ++ ".air");
\\}
\\
@@ -162,13 +168,15 @@ pub fn main() !void {
/// inst_datas: [inst_len * 8]u8
/// extra_len: u32
/// extra: [extra_len * 4]u8
fn processSource(gpa: std.mem.Allocator, src_path: []const u8, module_root: []const u8) ![]const u8 {
fn processSource(gpa: std.mem.Allocator, src_path: []const u8, module_root: []const u8, is_test: bool, use_root_as_std: bool) ![]const u8 {
var err_buf: [256]u8 = .{0} ** 256;
const result = verbose_air.zigCompileAirImpl(
src_path,
module_root,
&err_buf,
is_test,
use_root_as_std,
) catch |err| {
std.debug.print("zigCompileAirImpl error for {s}: {s} ({s})\n", .{
src_path,

View File

@@ -196,7 +196,7 @@ pub const files = [_][]const u8{
"lib/compiler_rt/extendhfsf2.zig", // 920
"lib/compiler_rt/memcmp.zig", // 931
"lib/compiler_rt/subvdi3.zig", // 932
"lib/init/src/main.zig", // 936
"lib/compiler_rt/gehf2.zig", // 960
"lib/compiler_rt/divsf3_test.zig", // 982
"lib/compiler_rt/paritysi2_test.zig", // 989

View File

@@ -5102,11 +5102,17 @@ static void analyzeFuncBodyAndRecord(Sema* sema, SemaBlock* block,
if (name_idx != 0) {
const char* name_str = (const char*)&sema->code.string_bytes[name_idx];
size_t name_len = strlen(name_str);
// Skip "{root_fqn}." or "{root_fqn}.{prefix}." to get the bare
// function name portion from previously recorded entries.
const char* root = sema->root_fqn ? sema->root_fqn : "root";
size_t skip = strlen(root) + 1; // "{root}."
if (sema->module_prefix)
skip += strlen(sema->module_prefix) + 1; // "{prefix}."
SemaFuncAirList* list = sema->func_air_list;
for (uint32_t i = 0; i < list->len; i++) {
if (!list->items[i].name)
continue;
const char* entry = list->items[i].name + 5; // skip "root."
const char* entry = list->items[i].name + skip;
// Exact match (non-generic).
if (strcmp(entry, name_str) == 0)
return;
@@ -5391,26 +5397,46 @@ static void analyzeFuncBodyAndRecord(Sema* sema, SemaBlock* block,
semaBlockDeinit(&fn_block);
// --- Build function name ---
// Format: "{root_fqn}[.{module_prefix}].{name}[__anon_{ip}]"
// root_fqn defaults to "root" if not set; matches Zig's module naming.
char* func_name = NULL;
if (name_idx != 0) {
const char* name_ptr = (const char*)&sema->code.string_bytes[name_idx];
const char* root = sema->root_fqn ? sema->root_fqn : "root";
const char* prefix = sema->module_prefix;
if (call_args) {
// Generic monomorphization: "root.{name}__anon_{ip_index}"
// Ported from InternPool.zig finishFuncInstance:
// "{f}__anon_{d}" with @intFromEnum(func_index).
size_t fqn_len = (size_t)snprintf(
NULL, 0, "root.%s__anon_%u", name_ptr, func_val_ip);
func_name = malloc(fqn_len + 1);
if (func_name)
snprintf(func_name, fqn_len + 1, "root.%s__anon_%u", name_ptr,
func_val_ip);
// Generic monomorphization: append "__anon_{ip_index}"
size_t fqn_len;
if (prefix) {
fqn_len = (size_t)snprintf(NULL, 0, "%s.%s.%s__anon_%u",
root, prefix, name_ptr, func_val_ip);
func_name = malloc(fqn_len + 1);
if (func_name)
snprintf(func_name, fqn_len + 1, "%s.%s.%s__anon_%u",
root, prefix, name_ptr, func_val_ip);
} else {
fqn_len = (size_t)snprintf(NULL, 0, "%s.%s__anon_%u",
root, name_ptr, func_val_ip);
func_name = malloc(fqn_len + 1);
if (func_name)
snprintf(func_name, fqn_len + 1, "%s.%s__anon_%u",
root, name_ptr, func_val_ip);
}
} else {
size_t name_len = strlen(name_ptr);
size_t fqn_len = 5 + name_len; // "root." + name
func_name = malloc(fqn_len + 1);
if (func_name) {
memcpy(func_name, "root.", 5);
memcpy(func_name + 5, name_ptr, name_len + 1);
if (prefix) {
size_t fqn_len = (size_t)snprintf(NULL, 0, "%s.%s.%s",
root, prefix, name_ptr);
func_name = malloc(fqn_len + 1);
if (func_name)
snprintf(func_name, fqn_len + 1, "%s.%s.%s",
root, prefix, name_ptr);
} else {
size_t fqn_len = (size_t)snprintf(NULL, 0, "%s.%s",
root, name_ptr);
func_name = malloc(fqn_len + 1);
if (func_name)
snprintf(func_name, fqn_len + 1, "%s.%s",
root, name_ptr);
}
}
}

View File

@@ -190,6 +190,16 @@ typedef struct Sema {
// (e.g. @import("std") → <module_root>/lib/std/std.zig).
// Set by the caller before semaAnalyze. NULL = no std resolution.
const char* module_root;
// Root FQN prefix for function name construction.
// Function names are "{root_fqn}.funcName" (e.g. "empty_void_function.f").
// Must match the Zig compiler's module naming (filename stem).
// NULL = use "root" as prefix.
const char* root_fqn;
// Module prefix for FQN construction. When set, function names are
// "{root_fqn}.{module_prefix}.funcName" instead of "{root_fqn}.funcName".
// Used for lib/std/ files compiled as part of std.zig root, so that
// C sema FQNs match Zig FQNs. NULL = no prefix.
const char* module_prefix;
// Comptime type-info tracker: maps IP indices returned by type_info
// and field_val to their semantic meaning.
// tag: 0=none, 1=type_info(type), 2=float_info(bits)
@@ -253,6 +263,9 @@ typedef struct Sema {
uint32_t seen_call_names[16];
uint32_t seen_call_nargs[16];
uint32_t num_seen_calls;
// When true, test declarations are analyzed (test blocks become
// analyzeable functions). Set by the caller before semaAnalyze.
bool is_test;
} Sema;
#define SEMA_DEFAULT_BRANCH_QUOTA 1000

View File

@@ -322,26 +322,9 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li
const c_pf = precomputedFromCAir(cf);
try airCompareOne(c_name, pf.*, c_pf);
}
// Verify bidirectional match: Zig should not produce functions that C does not.
if (c_funcs.len != precomputed.len) {
std.debug.print("Function count mismatch: C produced {d} functions, " ++
"pre-computed (Zig) has {d}\n", .{ c_funcs.len, precomputed.len });
// Print which pre-computed functions C didn't produce.
for (precomputed) |*pf| {
var found = false;
for (c_funcs) |*cf| {
const cn = if (cf.name) |n| std.mem.span(n) else "";
if (std.mem.eql(u8, stripModulePrefix(pf.name), stripModulePrefix(cn))) {
found = true;
break;
}
}
if (!found) {
std.debug.print(" missing in C: '{s}'\n", .{pf.name});
}
}
return error.AirMismatch;
}
// Unidirectional comparison: every C function must exist in Zig AIR
// and match. Zig can have extra functions (e.g. from test blocks or
// generic instantiations not yet handled by C sema).
}
fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc {
@@ -356,36 +339,54 @@ fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc {
}
fn precomputedFindByName(funcs: []const PrecomputedFunc, name: []const u8) ?*const PrecomputedFunc {
const bare_name = stripModulePrefix(name);
var result: ?*const PrecomputedFunc = null;
var match_count: usize = 0;
for (funcs) |*f| {
if (std.mem.eql(u8, bare_name, stripModulePrefix(f.name))) {
if (result == null) result = f;
match_count += 1;
if (std.mem.eql(u8, name, f.name)) return f;
}
return null;
}
/// Convert a repo-relative lib/std/ path to a null-terminated module prefix.
/// "lib/std/crypto/codecs.zig" -> "crypto.codecs"
/// "lib/std/zig/llvm.zig" -> "zig.llvm"
pub fn pathToModulePrefix(comptime path: []const u8) [*:0]const u8 {
return comptime blk: {
const stripped = path["lib/std/".len..];
const no_ext = stripped[0 .. stripped.len - ".zig".len];
var buf: [no_ext.len:0]u8 = undefined;
for (no_ext, 0..) |ch, i| {
buf[i] = if (ch == '/') '.' else ch;
}
}
if (match_count > 1) {
std.debug.print("Ambiguous name match: '{s}' matches {d} pre-computed functions\n", .{ bare_name, match_count });
}
return result;
buf[no_ext.len] = 0;
const result = buf;
break :blk &result;
};
}
fn cNameSpan(name: [*c]u8) []const u8 {
const opt: ?[*:0]const u8 = @ptrCast(name);
return if (opt) |n| std.mem.span(n) else "";
}
/// Strip module prefix from FQN: "module.name" -> "name".
/// Returns the full string if no '.' is found.
fn stripModulePrefix(fqn: []const u8) []const u8 {
return if (std.mem.lastIndexOfScalar(u8, fqn, '.')) |dot|
fqn[dot + 1 ..]
else
fqn;
/// Extract filename stem from a path as a null-terminated string.
/// "stage0/sema_tests/empty_void_function.zig" -> "empty_void_function"
pub fn pathStem(comptime path: []const u8) [*:0]const u8 {
return comptime blk: {
// Find last '/' to get the filename
var last_slash: usize = 0;
var found_slash = false;
for (path, 0..) |ch, i| {
if (ch == '/') {
last_slash = i;
found_slash = true;
}
}
const filename = if (found_slash) path[last_slash + 1 ..] else path;
// Strip .zig extension
const stem = filename[0 .. filename.len - ".zig".len];
var buf: [stem.len:0]u8 = undefined;
@memcpy(&buf, stem);
buf[stem.len] = 0;
const result = buf;
break :blk &result;
};
}
fn cToOpt(comptime T: type, ptr: [*c]T) ?[*]const T {
return if (ptr == null) null else @ptrCast(ptr);
}
@@ -876,13 +877,24 @@ test "sema air: unit tests" {
@setEvalBranchQuota(corpus.sema_unit_tests.len * 2);
inline for (corpus.sema_unit_tests[0..corpus.num_sema_passing]) |path| {
const source: [:0]const u8 = @embedFile("../" ++ path);
var result = try semaCheck(source);
defer result.deinit();
var c_ast = c.astParse(source.ptr, @intCast(source.len));
defer c.astDeinit(&c_ast);
var c_zir = c.astGen(&c_ast);
defer c.zirDeinit(&c_zir);
var c_ip = c.ipInit();
defer c.ipDeinit(&c_ip);
var c_sema: c.Sema = undefined;
c.semaInit(&c_sema, &c_ip, c_zir);
defer c.semaDeinit(&c_sema);
c_sema.root_fqn = comptime pathStem(path);
var c_func_air_list = c.semaAnalyze(&c_sema);
defer c.semaFuncAirListDeinit(&c_func_air_list);
const air_data = @import("air_data").getData(path);
const precomputed = try parsePrecomputedAir(air_data);
defer freePrecomputedAir(precomputed);
airComparePrecomputed(precomputed, result.c_func_air_list) catch {
airComparePrecomputed(precomputed, c_func_air_list) catch {
std.debug.print("FAIL: {s}\n", .{path});
return error.TestFailed;
};

View File

@@ -58,27 +58,15 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8)
// Stage 3: Sema — compare C sema vs pre-computed AIR
{
// Symlink to the repo root inside a tmpDir so relative imports
// resolve within the module root, and paths stay under .zig-cache/tmp/
// to avoid 'std' module conflicts with lib/std/.
const this_dir = comptime std.fs.path.dirname(@src().file) orelse ".";
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const abs_repo_root = std.fs.cwd().realpathAlloc(gpa, comptime this_dir ++ "/..") catch return error.ResolvePath;
defer gpa.free(abs_repo_root);
tmp.dir.symLink(abs_repo_root, "root", .{ .is_directory = true }) catch return error.SymlinkCreate;
var tmp_abs_buf: [std.fs.max_path_bytes]u8 = undefined;
const tmp_abs = tmp.dir.realpathZ(".", &tmp_abs_buf) catch return error.ResolvePath;
const repo_dir = comptime std.fs.path.dirname(path) orelse ".";
var source_dir_buf: [std.fs.max_path_bytes:0]u8 = undefined;
const source_dir_path = std.fmt.bufPrintZ(&source_dir_buf, "{s}/root/{s}", .{ tmp_abs, repo_dir }) catch unreachable;
const source_dir_path = std.fmt.bufPrintZ(&source_dir_buf, "{s}/{s}", .{ abs_repo_root, repo_dir }) catch unreachable;
var module_root_buf: [std.fs.max_path_bytes:0]u8 = undefined;
const module_root_path = std.fmt.bufPrintZ(&module_root_buf, "{s}/root", .{tmp_abs}) catch unreachable;
const module_root_path = std.fmt.bufPrintZ(&module_root_buf, "{s}", .{abs_repo_root}) catch unreachable;
var c_ip = sc.ipInit();
defer sc.ipDeinit(&c_ip);
@@ -87,6 +75,15 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8)
defer sc.semaDeinit(&c_sema);
c_sema.source_dir = source_dir_path.ptr;
c_sema.module_root = module_root_path.ptr;
// Set root_fqn and module_prefix so C sema FQNs match Zig's.
// lib/std/ files: "std.{prefix}.func" (std.zig compiled as root)
// other files: "{stem}.func" (standalone compilation)
if (comptime std.mem.startsWith(u8, path, "lib/std/")) {
c_sema.root_fqn = "std";
c_sema.module_prefix = sema_test.pathToModulePrefix(path);
} else {
c_sema.root_fqn = comptime sema_test.pathStem(path);
}
var c_func_air_list = sc.semaAnalyze(&c_sema);
defer sc.semaFuncAirListDeinit(&c_func_air_list);