zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 7f58c766f82bd4fc4b21b2d81078d18fc65c77cd (tree)
parent 9fa82327784d0edd198276d8e5f4152036fcd369
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date:   Wed, 25 Feb 2026 07:38:35 +0000

compile std as root + unidirectional AIR comparison + exact FQN matching

air_gen: replace per-file symlink workaround with two-pass compilation.
Pass 1 compiles lib/std/std.zig as root with use_root_as_std=true
(one compilation, all lib/std/ functions). Pass 2 compiles non-lib/std/
files standalone. Symlink workaround eliminated entirely.

build.zig: pass all corpus.files (not 0..num_passing) to air_gen,
skipping lib/std/ files. Bumping num_passing no longer invalidates
the air_gen cache.

air_data.zig: route lib/std/ paths to the combined std.zig.air file.

sema_test.zig: switch to unidirectional comparison (C→Zig only) and
exact FQN matching. Remove stripModulePrefix, bare-name fallback, and
unused cNameSpan. Add pathToModulePrefix and pathStem helpers.

sema.h/sema.c: add root_fqn, module_prefix, and is_test fields to
Sema struct. Function names use "{root_fqn}[.{prefix}].{name}" format
to match Zig's FQN convention.

stages_test.zig: set root_fqn and module_prefix on C sema so FQNs
match Zig's naming. Remove symlink workaround — C sema uses real
paths directly. Set is_test=false to match air_gen.

corpus.zig: remove lib/init/src/main.zig (template file with
@import(".NAME") that cannot compile standalone).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mbuild.zig | 6++++--
Msrc/verbose_air.zig | 7++++++-
Msrc/verbose_air_gen.zig | 78+++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mstage0/corpus.zig | 2+-
Mstage0/sema.c | 58++++++++++++++++++++++++++++++++++++++++++----------------
Mstage0/sema.h | 13+++++++++++++
Mstage0/sema_test.zig | 102++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mstage0/stages_test.zig | 25+++++++++++--------------
8 files changed, 177 insertions(+), 114 deletions(-)

diff --git a/build.zig b/build.zig @@ -1746,8 +1746,10 @@ fn addAirGen( // Run generator: air_gen <output_dir> [<name> <resolved_path>]... const gen_run = b.addRunArtifact(gen_exe); const air_dir = gen_run.addOutputDirectoryArg("air"); - // Add corpus files as name/path pairs. - for (corpus.files[0..corpus.num_passing]) |path| { + // Add non-lib/std/ corpus files as name/path pairs. + // lib/std/ files are compiled via std.zig root inside air_gen. + for (corpus.files) |path| { + if (std.mem.startsWith(u8, path, "lib/std/")) continue; gen_run.addArg(path); gen_run.addFileArg(b.path(path)); } diff --git a/src/verbose_air.zig b/src/verbose_air.zig @@ -126,6 +126,8 @@ export fn zig_compile_air( std.mem.span(src_path_ptr), if (module_root_ptr) |p| std.mem.span(p) else null, err_buf, + false, + false, ) catch |err| { setErr(err_buf, "{s}", .{@errorName(err)}); return .{ .items = null, .len = 0, .callback_count = 0 }; @@ -157,6 +159,8 @@ pub fn zigCompileAirImpl( src_path: []const u8, module_root_opt: ?[]const u8, err_buf: *[err_buf_size]u8, + is_test: bool, + use_root_as_std: bool, ) !CompileAirResult { const gpa = std.heap.c_allocator; @@ -190,7 +194,7 @@ pub fn zigCompileAirImpl( .resolved_target = resolved_target, .have_zcu = true, .emit_bin = true, - .is_test = false, + .is_test = is_test, // Use the self-hosted wasm backend (not LLVM) so that error // return tracing is disabled. This keeps the AIR minimal and // avoids dependence on StackTrace type resolution, which the @@ -253,6 +257,7 @@ pub fn zigCompileAirImpl( .root_name = "test", .config = config, .root_mod = root_mod, + .std_mod = if (use_root_as_std) root_mod else null, .emit_bin = .no, .thread_pool = thread_pool, .cache_mode = .incremental, diff --git a/src/verbose_air_gen.zig b/src/verbose_air_gen.zig @@ -47,49 +47,51 @@ pub fn main() !void { break :blk std.fs.path.dirname(first_resolved) orelse "."; }; - // Create a symlink workaround so that files in lib/std/ are not seen as - // belonging to both 'root' and 'std' modules. We symlink repo_root into - // a temp directory and pass all paths through the symlink. - // The temp directory must be OUTSIDE the output_dir to avoid confusing the - // Zig build system's cache (it would follow the symlink back into the repo). - const tmp_dir_path = "/tmp/zig-air-gen"; - - std.fs.makeDirAbsolute(tmp_dir_path) catch |err| switch (err) { - error.PathAlreadyExists => {}, - else => { - std.debug.print("error creating tmp directory: {s}\n", .{@errorName(err)}); + // Pass 1: Compile lib/std/std.zig as root with is_test=true. + // This single compilation covers ALL lib/std/ files. The root_mod + // has root at lib/std/ with root_src_path="std.zig", and std_mod=root_mod + // so @import("std") == @This(). No symlink needed. + { + const std_root = try std.fmt.allocPrint(gpa, "{s}lib/std/", .{repo_root}); + defer gpa.free(std_root); + const std_src = try std.fmt.allocPrint(gpa, "{s}lib/std/std.zig", .{repo_root}); + defer gpa.free(std_src); + + const data = processSource(gpa, std_src, std_root, false, true) catch |err| { + std.debug.print("FAIL: lib/std/std.zig ({s}): {s}\n", .{ std_src, @errorName(err) }); return err; - }, - }; - var tmp_dir = std.fs.openDirAbsolute(tmp_dir_path, .{}) catch |err| { - std.debug.print("error opening tmp directory: {s}\n", .{@errorName(err)}); - return err; - }; - defer tmp_dir.close(); - defer std.fs.deleteTreeAbsolute(tmp_dir_path) catch {}; + }; + defer gpa.free(data); - tmp_dir.symLink(repo_root, "root", .{ .is_directory = true }) catch |err| switch (err) { - error.PathAlreadyExists => {}, - else => { - std.debug.print("error creating symlink: {s}\n", .{@errorName(err)}); + // Write the combined AIR data to lib/std/std.zig.air. + output_dir.makePath("lib/std") catch |err| { + std.debug.print("error creating directory 'lib/std': {s}\n", .{@errorName(err)}); return err; - }, - }; + }; + var file = output_dir.createFile("lib/std/std.zig.air", .{}) catch |err| { + std.debug.print("error creating file 'lib/std/std.zig.air': {s}\n", .{@errorName(err)}); + return err; + }; + defer file.close(); + file.writeAll(data) catch |err| { + std.debug.print("error writing file 'lib/std/std.zig.air': {s}\n", .{@errorName(err)}); + return err; + }; + } + + // Pass 2: Compile non-lib/std/ files standalone (compiler_rt, sema_tests, etc.). for (0..num_entries) |i| { const name = pairs[i * 2]; const resolved = pairs[i * 2 + 1]; - _ = resolved; - // Construct paths through the symlink to avoid the "file exists in - // modules 'root' and 'std'" error. - const src_path = try std.fmt.allocPrint(gpa, "{s}/root/{s}", .{ tmp_dir_path, name }); - defer gpa.free(src_path); - const module_root = try std.fmt.allocPrint(gpa, "{s}/root", .{tmp_dir_path}); - defer gpa.free(module_root); + // lib/std/ files are covered by the std.zig compilation above. + if (std.mem.startsWith(u8, name, "lib/std/")) continue; + + const module_root = std.fs.path.dirname(resolved) orelse "."; - const data = processSource(gpa, src_path, module_root) catch |err| { - std.debug.print("FAIL: {s} ({s}): {s}\n", .{ name, src_path, @errorName(err) }); + const data = processSource(gpa, resolved, module_root, false, false) catch |err| { + std.debug.print("FAIL: {s} ({s}): {s}\n", .{ name, resolved, @errorName(err) }); return err; }; defer gpa.free(data); @@ -143,7 +145,11 @@ pub fn main() !void { defer file.close(); try file.writeAll( \\// Generated by verbose_air_gen. Do not edit. + \\const std = @import("std"); \\pub fn getData(comptime path: []const u8) []const u8 { + \\ if (comptime std.mem.startsWith(u8, path, "lib/std/")) { + \\ return @embedFile("lib/std/std.zig.air"); + \\ } \\ return @embedFile(path ++ ".air"); \\} \\ @@ -162,13 +168,15 @@ pub fn main() !void { /// inst_datas: [inst_len * 8]u8 /// extra_len: u32 /// extra: [extra_len * 4]u8 -fn processSource(gpa: std.mem.Allocator, src_path: []const u8, module_root: []const u8) ![]const u8 { +fn processSource(gpa: std.mem.Allocator, src_path: []const u8, module_root: []const u8, is_test: bool, use_root_as_std: bool) ![]const u8 { var err_buf: [256]u8 = .{0} ** 256; const result = verbose_air.zigCompileAirImpl( src_path, module_root, &err_buf, + is_test, + use_root_as_std, ) catch |err| { std.debug.print("zigCompileAirImpl error for {s}: {s} ({s})\n", .{ src_path, diff --git a/stage0/corpus.zig b/stage0/corpus.zig @@ -196,7 +196,7 @@ pub const files = [_][]const u8{ "lib/compiler_rt/extendhfsf2.zig", // 920 "lib/compiler_rt/memcmp.zig", // 931 "lib/compiler_rt/subvdi3.zig", // 932 - "lib/init/src/main.zig", // 936 + "lib/compiler_rt/gehf2.zig", // 960 "lib/compiler_rt/divsf3_test.zig", // 982 "lib/compiler_rt/paritysi2_test.zig", // 989 diff --git a/stage0/sema.c b/stage0/sema.c @@ -5102,11 +5102,17 @@ static void analyzeFuncBodyAndRecord(Sema* sema, SemaBlock* block, if (name_idx != 0) { const char* name_str = (const char*)&sema->code.string_bytes[name_idx]; size_t name_len = strlen(name_str); + // Skip "{root_fqn}." or "{root_fqn}.{prefix}." to get the bare + // function name portion from previously recorded entries. + const char* root = sema->root_fqn ? sema->root_fqn : "root"; + size_t skip = strlen(root) + 1; // "{root}." + if (sema->module_prefix) + skip += strlen(sema->module_prefix) + 1; // "{prefix}." SemaFuncAirList* list = sema->func_air_list; for (uint32_t i = 0; i < list->len; i++) { if (!list->items[i].name) continue; - const char* entry = list->items[i].name + 5; // skip "root." + const char* entry = list->items[i].name + skip; // Exact match (non-generic). if (strcmp(entry, name_str) == 0) return; @@ -5391,26 +5397,46 @@ static void analyzeFuncBodyAndRecord(Sema* sema, SemaBlock* block, semaBlockDeinit(&fn_block); // --- Build function name --- + // Format: "{root_fqn}[.{module_prefix}].{name}[__anon_{ip}]" + // root_fqn defaults to "root" if not set; matches Zig's module naming. char* func_name = NULL; if (name_idx != 0) { const char* name_ptr = (const char*)&sema->code.string_bytes[name_idx]; + const char* root = sema->root_fqn ? sema->root_fqn : "root"; + const char* prefix = sema->module_prefix; if (call_args) { - // Generic monomorphization: "root.{name}__anon_{ip_index}" - // Ported from InternPool.zig finishFuncInstance: - // "{f}__anon_{d}" with @intFromEnum(func_index). - size_t fqn_len = (size_t)snprintf( - NULL, 0, "root.%s__anon_%u", name_ptr, func_val_ip); - func_name = malloc(fqn_len + 1); - if (func_name) - snprintf(func_name, fqn_len + 1, "root.%s__anon_%u", name_ptr, - func_val_ip); + // Generic monomorphization: append "__anon_{ip_index}" + size_t fqn_len; + if (prefix) { + fqn_len = (size_t)snprintf(NULL, 0, "%s.%s.%s__anon_%u", + root, prefix, name_ptr, func_val_ip); + func_name = malloc(fqn_len + 1); + if (func_name) + snprintf(func_name, fqn_len + 1, "%s.%s.%s__anon_%u", + root, prefix, name_ptr, func_val_ip); + } else { + fqn_len = (size_t)snprintf(NULL, 0, "%s.%s__anon_%u", + root, name_ptr, func_val_ip); + func_name = malloc(fqn_len + 1); + if (func_name) + snprintf(func_name, fqn_len + 1, "%s.%s__anon_%u", + root, name_ptr, func_val_ip); + } } else { - size_t name_len = strlen(name_ptr); - size_t fqn_len = 5 + name_len; // "root." + name - func_name = malloc(fqn_len + 1); - if (func_name) { - memcpy(func_name, "root.", 5); - memcpy(func_name + 5, name_ptr, name_len + 1); + if (prefix) { + size_t fqn_len = (size_t)snprintf(NULL, 0, "%s.%s.%s", + root, prefix, name_ptr); + func_name = malloc(fqn_len + 1); + if (func_name) + snprintf(func_name, fqn_len + 1, "%s.%s.%s", + root, prefix, name_ptr); + } else { + size_t fqn_len = (size_t)snprintf(NULL, 0, "%s.%s", + root, name_ptr); + func_name = malloc(fqn_len + 1); + if (func_name) + snprintf(func_name, fqn_len + 1, "%s.%s", + root, name_ptr); } } } diff --git a/stage0/sema.h b/stage0/sema.h @@ -190,6 +190,16 @@ typedef struct Sema { // (e.g. @import("std") → <module_root>/lib/std/std.zig). // Set by the caller before semaAnalyze. NULL = no std resolution. const char* module_root; + // Root FQN prefix for function name construction. + // Function names are "{root_fqn}.funcName" (e.g. "empty_void_function.f"). + // Must match the Zig compiler's module naming (filename stem). + // NULL = use "root" as prefix. + const char* root_fqn; + // Module prefix for FQN construction. When set, function names are + // "{root_fqn}.{module_prefix}.funcName" instead of "{root_fqn}.funcName". + // Used for lib/std/ files compiled as part of std.zig root, so that + // C sema FQNs match Zig FQNs. NULL = no prefix. + const char* module_prefix; // Comptime type-info tracker: maps IP indices returned by type_info // and field_val to their semantic meaning. // tag: 0=none, 1=type_info(type), 2=float_info(bits) @@ -253,6 +263,9 @@ typedef struct Sema { uint32_t seen_call_names[16]; uint32_t seen_call_nargs[16]; uint32_t num_seen_calls; + // When true, test declarations are analyzed (test blocks become + // analyzeable functions). Set by the caller before semaAnalyze. + bool is_test; } Sema; #define SEMA_DEFAULT_BRANCH_QUOTA 1000 diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig @@ -322,26 +322,9 @@ pub fn airComparePrecomputed(precomputed: []const PrecomputedFunc, c_func_air_li const c_pf = precomputedFromCAir(cf); try airCompareOne(c_name, pf.*, c_pf); } - // Verify bidirectional match: Zig should not produce functions that C does not. - if (c_funcs.len != precomputed.len) { - std.debug.print("Function count mismatch: C produced {d} functions, " ++ - "pre-computed (Zig) has {d}\n", .{ c_funcs.len, precomputed.len }); - // Print which pre-computed functions C didn't produce. - for (precomputed) |*pf| { - var found = false; - for (c_funcs) |*cf| { - const cn = if (cf.name) |n| std.mem.span(n) else ""; - if (std.mem.eql(u8, stripModulePrefix(pf.name), stripModulePrefix(cn))) { - found = true; - break; - } - } - if (!found) { - std.debug.print(" missing in C: '{s}'\n", .{pf.name}); - } - } - return error.AirMismatch; - } + // Unidirectional comparison: every C function must exist in Zig AIR + // and match. Zig can have extra functions (e.g. from test blocks or + // generic instantiations not yet handled by C sema). } fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc { @@ -356,35 +339,53 @@ fn precomputedFromCAir(cf: *const c.SemaFuncAir) PrecomputedFunc { } fn precomputedFindByName(funcs: []const PrecomputedFunc, name: []const u8) ?*const PrecomputedFunc { - const bare_name = stripModulePrefix(name); - var result: ?*const PrecomputedFunc = null; - var match_count: usize = 0; for (funcs) |*f| { - if (std.mem.eql(u8, bare_name, stripModulePrefix(f.name))) { - if (result == null) result = f; - match_count += 1; - } - } - if (match_count > 1) { - std.debug.print("Ambiguous name match: '{s}' matches {d} pre-computed functions\n", .{ bare_name, match_count }); + if (std.mem.eql(u8, name, f.name)) return f; } - return result; + return null; } -fn cNameSpan(name: [*c]u8) []const u8 { - const opt: ?[*:0]const u8 = @ptrCast(name); - return if (opt) |n| std.mem.span(n) else ""; +/// Convert a repo-relative lib/std/ path to a null-terminated module prefix. +/// "lib/std/crypto/codecs.zig" -> "crypto.codecs" +/// "lib/std/zig/llvm.zig" -> "zig.llvm" +pub fn pathToModulePrefix(comptime path: []const u8) [*:0]const u8 { + return comptime blk: { + const stripped = path["lib/std/".len..]; + const no_ext = stripped[0 .. stripped.len - ".zig".len]; + var buf: [no_ext.len:0]u8 = undefined; + for (no_ext, 0..) |ch, i| { + buf[i] = if (ch == '/') '.' else ch; + } + buf[no_ext.len] = 0; + const result = buf; + break :blk &result; + }; } -/// Strip module prefix from FQN: "module.name" -> "name". -/// Returns the full string if no '.' is found. -fn stripModulePrefix(fqn: []const u8) []const u8 { - return if (std.mem.lastIndexOfScalar(u8, fqn, '.')) |dot| - fqn[dot + 1 ..] - else - fqn; -} +/// Extract filename stem from a path as a null-terminated string. +/// "stage0/sema_tests/empty_void_function.zig" -> "empty_void_function" +pub fn pathStem(comptime path: []const u8) [*:0]const u8 { + return comptime blk: { + // Find last '/' to get the filename + var last_slash: usize = 0; + var found_slash = false; + for (path, 0..) |ch, i| { + if (ch == '/') { + last_slash = i; + found_slash = true; + } + } + const filename = if (found_slash) path[last_slash + 1 ..] else path; + // Strip .zig extension + const stem = filename[0 .. filename.len - ".zig".len]; + var buf: [stem.len:0]u8 = undefined; + @memcpy(&buf, stem); + buf[stem.len] = 0; + const result = buf; + break :blk &result; + }; +} fn cToOpt(comptime T: type, ptr: [*c]T) ?[*]const T { return if (ptr == null) null else @ptrCast(ptr); @@ -876,13 +877,24 @@ test "sema air: unit tests" { @setEvalBranchQuota(corpus.sema_unit_tests.len * 2); inline for (corpus.sema_unit_tests[0..corpus.num_sema_passing]) |path| { const source: [:0]const u8 = @embedFile("../" ++ path); - var result = try semaCheck(source); - defer result.deinit(); + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + var c_ip = c.ipInit(); + defer c.ipDeinit(&c_ip); + var c_sema: c.Sema = undefined; + c.semaInit(&c_sema, &c_ip, c_zir); + defer c.semaDeinit(&c_sema); + c_sema.root_fqn = comptime pathStem(path); + var c_func_air_list = c.semaAnalyze(&c_sema); + defer c.semaFuncAirListDeinit(&c_func_air_list); const air_data = @import("air_data").getData(path); const precomputed = try parsePrecomputedAir(air_data); defer freePrecomputedAir(precomputed); - airComparePrecomputed(precomputed, result.c_func_air_list) catch { + airComparePrecomputed(precomputed, c_func_air_list) catch { std.debug.print("FAIL: {s}\n", .{path}); return error.TestFailed; }; diff --git a/stage0/stages_test.zig b/stage0/stages_test.zig @@ -58,27 +58,15 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8) // Stage 3: Sema — compare C sema vs pre-computed AIR { - // Symlink to the repo root inside a tmpDir so relative imports - // resolve within the module root, and paths stay under .zig-cache/tmp/ - // to avoid 'std' module conflicts with lib/std/. const this_dir = comptime std.fs.path.dirname(@src().file) orelse "."; - - var tmp = std.testing.tmpDir(.{}); - defer tmp.cleanup(); - const abs_repo_root = std.fs.cwd().realpathAlloc(gpa, comptime this_dir ++ "/..") catch return error.ResolvePath; defer gpa.free(abs_repo_root); - tmp.dir.symLink(abs_repo_root, "root", .{ .is_directory = true }) catch return error.SymlinkCreate; - - var tmp_abs_buf: [std.fs.max_path_bytes]u8 = undefined; - const tmp_abs = tmp.dir.realpathZ(".", &tmp_abs_buf) catch return error.ResolvePath; - const repo_dir = comptime std.fs.path.dirname(path) orelse "."; var source_dir_buf: [std.fs.max_path_bytes:0]u8 = undefined; - const source_dir_path = std.fmt.bufPrintZ(&source_dir_buf, "{s}/root/{s}", .{ tmp_abs, repo_dir }) catch unreachable; + const source_dir_path = std.fmt.bufPrintZ(&source_dir_buf, "{s}/{s}", .{ abs_repo_root, repo_dir }) catch unreachable; var module_root_buf: [std.fs.max_path_bytes:0]u8 = undefined; - const module_root_path = std.fmt.bufPrintZ(&module_root_buf, "{s}/root", .{tmp_abs}) catch unreachable; + const module_root_path = std.fmt.bufPrintZ(&module_root_buf, "{s}", .{abs_repo_root}) catch unreachable; var c_ip = sc.ipInit(); defer sc.ipDeinit(&c_ip); @@ -87,6 +75,15 @@ fn stagesCheck(gpa: Allocator, comptime path: []const u8, source: [:0]const u8) defer sc.semaDeinit(&c_sema); c_sema.source_dir = source_dir_path.ptr; c_sema.module_root = module_root_path.ptr; + // Set root_fqn and module_prefix so C sema FQNs match Zig's. + // lib/std/ files: "std.{prefix}.func" (std.zig compiled as root) + // other files: "{stem}.func" (standalone compilation) + if (comptime std.mem.startsWith(u8, path, "lib/std/")) { + c_sema.root_fqn = "std"; + c_sema.module_prefix = sema_test.pathToModulePrefix(path); + } else { + c_sema.root_fqn = comptime sema_test.pathStem(path); + } var c_func_air_list = sc.semaAnalyze(&c_sema); defer sc.semaFuncAirListDeinit(&c_func_air_list);