zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 33e3e5475d45061237d74cce3d995784dac4cf21 (tree)
parent 00370ba4dc9152d19345d0da166f020baeca0ed2
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date:   Thu, 19 Feb 2026 09:14:28 +0000

WIP: wire up Zig Compilation for reference sema in stage0 tests

Add zigSema helper (stage0/sema.zig) that creates a Compilation,
points it at a source file, and runs the full Zig sema pipeline.
Export Compilation and Package from test_exports.zig. Wire up in
stagesCheck to run Zig sema alongside C sema.

Not yet working: files under lib/ conflict with the auto-created
std module ("file exists in modules 'root' and 'std'"). The fix
(using .root = .none with absolute path) needs testing.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Diffstat:
Msrc/test_exports.zig | 2++
Mstage0/.claude/sema-plan.md | 602++++++++++++++-----------------------------------------------------------------
Astage0/sema.zig | 142+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mstage0/stages_test.zig | 12++++++------
4 files changed, 254 insertions(+), 504 deletions(-)

diff --git a/src/test_exports.zig b/src/test_exports.zig @@ -1,3 +1,5 @@ pub const InternPool = @import("InternPool.zig"); pub const Air = @import("Air.zig"); +pub const Compilation = @import("Compilation.zig"); +pub const Package = @import("Package.zig"); // Later: pub const Sema = @import("Sema.zig"); diff --git a/stage0/.claude/sema-plan.md b/stage0/.claude/sema-plan.md @@ -1,541 +1,147 @@ -# Sema Porting Plan for zig/stage0 +# Plan: Wire up Zig Compilation for reference sema in stage0 tests ## Context -The stage0 bootstrap tool currently has tokenizer, parser, and AstGen fully ported to C. -The next major milestone is Sema (semantic analysis), which transforms ZIR into AIR. -Before Sema functions can be ported, the data structures it depends on must exist in C. +`stagesCheck` in `stages_test.zig` runs C sema and converts the result to +Zig Air, but has no reference Zig sema output to compare against. We want +to run the real Zig sema pipeline on the same source and verify it succeeds. -This plan covers: -1. Identifying Sema's dependencies and their porting order -2. Defining the C data structures (header files) -3. Designing a test framework modeled on astgen_test.zig +## Current State (in progress) -Scope decisions: -- **InternPool**: Simplified single-threaded (no sharding/deps/incremental) -- **Sema**: Incremental subset-first approach, test-driven -- **Compilation context**: Minimal bootstrap `SemaCtx` (not full Zcu) +### Files changed so far: ---- +1. **`src/test_exports.zig`** — added `Compilation` and `Package` exports +2. **`stage0/sema.zig`** — NEW file with `ZigSemaResult` struct and `zigSema` helper +3. **`stage0/sema_c.zig`** — unchanged (zigSema was moved out to `sema.zig`) +4. **`stage0/stages_test.zig`** — imports `sema.zig`, passes `src_path` to `stagesCheck` -## 1. Dependency Graph and Porting Order +### Problem to solve: -``` - ┌──────────┐ - │ sema.h │ (Sema context, Block, InstMap) - └────┬─────┘ - │ depends on - ┌─────────────┼─────────────┐ - │ │ │ - ┌─────▼────┐ ┌─────▼────┐ ┌────▼─────┐ - │ air.h │ │ type.h │ │ value.h │ - └─────┬────┘ └─────┬────┘ └────┬─────┘ - │ │ │ - └─────────────┼─────────────┘ - │ all depend on - ┌───────▼────────┐ - │ intern_pool.h │ - └───────┬────────┘ - │ depends on - ┌───────▼────────┐ - │ common.h │ (already exists) - │ zir.h │ (already exists) - └────────────────┘ -``` - -**Porting order (bottom-up):** - -| Phase | File | Upstream Source | Estimated C lines | -|-------|------|---------------|-------------------| -| 1 | `intern_pool.h` | `src/InternPool.zig` | ~800 | -| 2 | `air.h` | `src/Air.zig` | ~600 | -| 3 | `type.h` / `value.h` | `src/Type.zig`, `src/Value.zig` | ~200 | -| 4 | `sema.h` | `src/Sema.zig` (struct defs only) | ~300 | -| 5 | `sema_test.zig` | new (modeled on `astgen_test.zig`) | ~400 | - ---- - -## 2. Data Structure Definitions - -### 2.1 `intern_pool.h` — Simplified Single-Threaded InternPool - -**Upstream**: `src/InternPool.zig` (~13K lines) -**Scope**: Type/value interning and lookup only. No threading, no dependency tracking, no incremental compilation. +`zigSema` takes a cwd-relative `src_path` (e.g. `"lib/std/crypto/codecs.zig"`) +and points a `Compilation` at it. However, files under `lib/` overlap with the +`zig_lib` directory, causing: ``` -Key types to define: - -InternPoolIndex (uint32_t) - - Pre-interned constants matching Zig's InternPool.Index enum: - IP_INDEX_U0_TYPE, IP_INDEX_U8_TYPE, IP_INDEX_I32_TYPE, ... - IP_INDEX_BOOL_TYPE, IP_INDEX_VOID_TYPE, IP_INDEX_TYPE_TYPE, ... - IP_INDEX_UNDEF, IP_INDEX_ZERO, IP_INDEX_ONE, IP_INDEX_BOOL_TRUE, ... - IP_INDEX_NONE = UINT32_MAX - - ~120 pre-interned type indices + ~20 pre-interned value indices - -SimpleType enum (matching InternPool.SimpleType): - f16, f32, f64, f80, f128, usize, isize, c_char, ..., - anyopaque, bool, void, type, anyerror, comptime_int, ... - -SimpleValue enum (matching InternPool.SimpleValue): - undefined, void_val, null_val, empty_tuple, true_val, false_val, unreachable_val - -IntType struct: - uint16_t bits; - uint8_t signedness; // 0=unsigned, 1=signed - -PtrType struct: - InternPoolIndex child; - InternPoolIndex sentinel; // IP_INDEX_NONE if absent - uint32_t flags; // packed: size(2), alignment(6), is_const(1), is_volatile(1), is_allowzero(1), address_space(5), vector_index(16) - uint32_t packed_offset; // packed: host_size(16), bit_offset(16) - -ArrayType struct: - uint64_t len; - InternPoolIndex child; - InternPoolIndex sentinel; // IP_INDEX_NONE if absent - -VectorType struct: - uint32_t len; - InternPoolIndex child; - -ErrorUnionType struct: - InternPoolIndex error_set; - InternPoolIndex payload; - -FuncType struct: - InternPoolIndex* param_types; - uint32_t param_count; - InternPoolIndex return_type; - uint32_t comptime_bits; - uint32_t noalias_bits; - uint8_t cc; // calling convention - bool is_var_args; - bool is_generic; - bool is_noinline; - -ErrorSetType struct: - uint32_t* names; // NullTerminatedString indices - uint32_t names_count; - -NamespaceType struct (for struct/union/enum/opaque): - uint32_t zir_index; // TrackedInst.Index equivalent - uint32_t type_hash; // for reified types - -InternPoolKeyTag enum: - IP_KEY_INT_TYPE, IP_KEY_PTR_TYPE, IP_KEY_ARRAY_TYPE, - IP_KEY_VECTOR_TYPE, IP_KEY_OPT_TYPE, IP_KEY_ANYFRAME_TYPE, - IP_KEY_ERROR_UNION_TYPE, IP_KEY_SIMPLE_TYPE, IP_KEY_STRUCT_TYPE, - IP_KEY_TUPLE_TYPE, IP_KEY_UNION_TYPE, IP_KEY_OPAQUE_TYPE, - IP_KEY_ENUM_TYPE, IP_KEY_FUNC_TYPE, IP_KEY_ERROR_SET_TYPE, - IP_KEY_INFERRED_ERROR_SET_TYPE, - IP_KEY_UNDEF, IP_KEY_SIMPLE_VALUE, IP_KEY_VARIABLE, - IP_KEY_EXTERN, IP_KEY_FUNC, IP_KEY_INT, IP_KEY_ERR, - IP_KEY_ERROR_UNION, IP_KEY_ENUM_LITERAL, IP_KEY_ENUM_TAG, - IP_KEY_FLOAT, IP_KEY_PTR, IP_KEY_SLICE, IP_KEY_OPT, - IP_KEY_AGGREGATE, IP_KEY_UNION_VALUE - -InternPoolKey struct (tagged union): - InternPoolKeyTag tag; - union { IntType int_type; PtrType ptr_type; ... } data; - -InternPool struct: - InternPoolKey* items; // indexed by InternPoolIndex - uint32_t items_len; - uint32_t items_cap; - // Hash table for deduplication: - uint32_t* hash_table; // maps hash → index - uint32_t hash_cap; - // String storage: - uint8_t* string_bytes; - uint32_t string_bytes_len; - uint32_t string_bytes_cap; - // Global error set: - uint32_t* error_names; - uint32_t error_names_len; - -Functions: - InternPool ipInit(void); - void ipDeinit(InternPool*); - InternPoolIndex ipIntern(InternPool*, InternPoolKey); - InternPoolKey ipIndexToKey(InternPool*, InternPoolIndex); - InternPoolIndex ipTypeOf(InternPool*, InternPoolIndex); +error: file exists in modules 'root' and 'std' ``` -### 2.2 `air.h` — Analyzed Intermediate Representation +The current attempted fix uses `.root = .none` with an absolute path to +dissociate the root module from zig_lib, but this hasn't been tested yet. -**Upstream**: `src/Air.zig` (~2,172 lines) +## Approach: file-path based -``` -Key types to define: - -AirInstTag enum (uint8_t, ~204 tags): - AIR_INST_ARG, AIR_INST_ADD, AIR_INST_ADD_SAFE, AIR_INST_ADD_OPTIMIZED, - AIR_INST_ADD_WRAP, AIR_INST_ADD_SAT, AIR_INST_SUB, ... - AIR_INST_BLOCK, AIR_INST_LOOP, AIR_INST_BR, AIR_INST_CALL, ... - (use X-macro pattern like ZIR_INST_FOREACH_TAG) - -AirInstRef (uint32_t): - Uses MSB (bit 31) as tag bit: - - Bit 31 = 0: value is an InternPool index (lower 31 bits) - - Bit 31 = 1: value is an AIR instruction index (lower 31 bits) - - AIR_REF_NONE = UINT32_MAX (special sentinel) - Helper macros: - #define AIR_REF_NONE UINT32_MAX - #define AIR_REF_IS_INST(r) (((r) >> 31) != 0 && (r) != AIR_REF_NONE) - #define AIR_REF_IS_IP(r) (((r) >> 31) == 0) - #define AIR_REF_TO_INST(r) ((r) & 0x7FFFFFFF) - #define AIR_REF_TO_IP(r) ((r) & 0x7FFFFFFF) - #define AIR_REF_FROM_INST(i) ((i) | 0x80000000) - #define AIR_REF_FROM_IP(i) (i) - -AirInstData union (8 bytes, matching Air.Inst.Data): - struct { AirInstRef operand; } un_op; - struct { AirInstRef lhs, rhs; } bin_op; - struct { AirInstRef ty; } ty; - struct { AirInstRef ty; uint32_t zir_param_index; } arg; - struct { AirInstRef ty; AirInstRef operand; } ty_op; - struct { AirInstRef ty; uint32_t payload; } ty_pl; - struct { uint32_t block_inst; AirInstRef operand; } br; - struct { AirInstRef operand; uint32_t payload; } pl_op; - struct { uint32_t line; uint32_t column; } dbg_stmt; - ... (all variants from Air.Inst.Data) - -Air struct: - uint32_t inst_len; - uint32_t inst_cap; - AirInstTag* inst_tags; - AirInstData* inst_datas; - uint32_t extra_len; - uint32_t extra_cap; - uint32_t* extra; - -Extra payload structs: - AirBlock, AirCall, AirCondBr, AirSwitchBr, AirTry, AirBin, etc. -``` +Point a `Compilation` at the source file directly (no temp-file copy of source). +The Compilation reads the file, parses, AstGens, and runs sema — exactly +like the real compiler. -### 2.3 `type.h` / `value.h` — Thin Wrappers +Still needs a temp dir for the Compilation cache. -**Upstream**: `src/Type.zig` (~4,179 lines), `src/Value.zig` (~3,288 lines) +Reference: `jitCmd` in `src/main.zig:5588-5654` shows the simplest +Compilation.create + update flow. -In C, Type and Value are just `InternPoolIndex` with helper functions. -These start minimal and grow as Sema functions need them. +## Changes needed -``` -type.h: - typedef InternPoolIndex TypeIndex; - #define TYPE_NONE IP_INDEX_NONE - - // Functions ported on-demand as Sema needs them: - uint32_t typeZigTypeTag(InternPool*, TypeIndex); - TypeIndex typeChildType(InternPool*, TypeIndex); - TypeIndex typeElemType(InternPool*, TypeIndex); - bool typeIsSlice(InternPool*, TypeIndex); - bool typeIsCPtr(InternPool*, TypeIndex); - uint64_t typeArrayLen(InternPool*, TypeIndex); - PtrType typePtrInfo(InternPool*, TypeIndex); - -value.h: - typedef InternPoolIndex ValueIndex; - #define VALUE_NONE IP_INDEX_NONE - - TypeIndex valueToType(ValueIndex); - ValueIndex valueFromInterned(InternPoolIndex); - uint64_t valueToUnsignedInt(InternPool*, ValueIndex); +### 1. `src/test_exports.zig` — export Compilation types (DONE) + +```zig +pub const Compilation = @import("Compilation.zig"); +pub const Package = @import("Package.zig"); ``` -### 2.4 `sema.h` — Sema Context and Block +### 2. `stage0/sema.zig` — zigSema helper (IN PROGRESS) -**Upstream**: `src/Sema.zig` (lines 41-500) +```zig +const Compilation = @import("zig_internals").Compilation; +const Package = @import("zig_internals").Package; + +pub const ZigSemaResult = struct { + comp: *Compilation, + dirs: Compilation.Directories, + tmp_dir: std.testing.TmpDir, + arena_state: std.heap.ArenaAllocator, + thread_pool: std.Thread.Pool, + + pub fn deinit(self: *ZigSemaResult) void { + self.comp.destroy(); + self.dirs.deinit(); + self.thread_pool.deinit(); + self.tmp_dir.cleanup(); + self.arena_state.deinit(); + } +}; -``` -RuntimeIndex (uint32_t): - #define RUNTIME_INDEX_ZERO 0 - #define RUNTIME_INDEX_COMPTIME_FIELD_PTR UINT32_MAX - -InstMap struct: - AirInstRef* items; - uint32_t start; // ZIR instruction index offset - uint32_t len; - -SemaBlock struct: - SemaBlock* parent; - Sema* sema; - uint32_t namespace; // NamespaceIndex - uint32_t* instructions; // AIR instruction indices - uint32_t instructions_len; - uint32_t instructions_cap; - SemaBlockLabel* label; - SemaBlockInlining* inlining; - int32_t runtime_cond; // LazySrcLoc or -1 - int32_t runtime_loop; // LazySrcLoc or -1 - uint32_t runtime_index; - uint32_t comptime_reason; // 0 = not comptime - bool is_typeof; - AirInstRef error_return_trace_index; - bool want_safety; // nullable via flag - bool want_safety_set; - uint8_t float_mode; // strict=0, optimized=1 - uint32_t src_base_inst; - uint32_t type_name_ctx; // NullTerminatedString - -SemaBlockLabel struct: - uint32_t zir_block; // Zir.Inst.Index - SemaBlockMerges merges; - -SemaBlockInlining struct: - SemaBlock* call_block; - int32_t call_src; - InternPoolIndex func; - bool is_generic_instantiation; - bool has_comptime_args; - AirInstRef comptime_result; - SemaBlockMerges merges; - -SemaBlockMerges struct: - uint32_t block_inst; // Air.Inst.Index - AirInstRef* results; - uint32_t results_len, results_cap; - uint32_t* br_list; - uint32_t br_list_len, br_list_cap; - -InferredErrorSet struct: - InternPoolIndex func; - uint32_t* error_names; // NullTerminatedString indices - uint32_t error_names_len, error_names_cap; - InternPoolIndex* inferred_sets; - uint32_t inferred_sets_len, inferred_sets_cap; - InternPoolIndex resolved; // IP_INDEX_NONE until resolved - -MaybeComptimeAlloc struct: - uint32_t runtime_index; - uint32_t* store_insts; - uint32_t stores_len, stores_cap; - -ComptimeAlloc struct: - InternPoolIndex val; - bool is_const; - int32_t src; // LazySrcLoc - uint8_t alignment; - uint32_t runtime_index; - -Sema struct: - InternPool* ip; // shared intern pool - Air air; // output AIR being built - Zir code; // input ZIR being analyzed - InstMap inst_map; // ZIR→AIR mapping - InternPoolIndex owner; // AnalUnit - InternPoolIndex func_index; - bool func_is_naked; - TypeIndex fn_ret_ty; - InferredErrorSet* fn_ret_ty_ies; // NULL if not inferred - uint32_t branch_quota; - uint32_t branch_count; - uint32_t comptime_break_inst; - // Hash maps (simplified as arrays for bootstrap): - ComptimeAlloc* comptime_allocs; - uint32_t comptime_allocs_len, comptime_allocs_cap; - bool allow_memoize; - uint8_t branch_hint; // 0=none - bool has_compile_errors; - char err_buf[ERR_BUF_SIZE]; - -Functions (stubs, filled incrementally): - Sema semaInit(InternPool*, Zir); - void semaDeinit(Sema*); - void semaAnalyzeFnBody(Sema*, SemaBlock*, uint32_t* body, uint32_t body_len); - AirInstRef semaResolveInst(Sema*, uint32_t zir_ref); +pub fn zigSema(gpa: Allocator, src_path: []const u8) !ZigSemaResult { ... } ``` ---- +Inside `zigSema`: -## 3. Test Framework: `sema_test.zig` +1. **Arena**: `var arena_state = std.heap.ArenaAllocator.init(gpa);` +2. **Temp dir**: `var tmp_dir = std.testing.tmpDir(.{});` (for cache only) +3. **Dirs**: Construct `Compilation.Directories` manually: + - `zig_lib` = open `lib/` dir with absolute path + - `global_cache` = `local_cache` = `.cache` subdir of tmp_dir +4. **Target**: hardcode x86_64-linux-musl +5. **Config**: `Compilation.Config.resolve(.{ .output_mode = .Obj, .have_zcu = true, .emit_bin = false, .is_test = false, .resolved_target = ... })` +6. **Root path**: Must use `.root = .none` with absolute path to the source + dir so it doesn't overlap with zig_lib (avoids "file exists in modules + 'root' and 'std'" error) +7. **Module**: `Package.Module.create(arena, .{ .paths = .{ .root = root_path, .root_src_path = basename }, ... })` +8. **Thread pool**: `std.Thread.Pool` with `n_jobs = 1, track_ids = true, stack_size = 60 << 20` +9. **Create**: `Compilation.create(gpa, arena, &diag, .{ ... .emit_bin = .no, .cache_mode = .whole })` +10. **Run**: `try comp.update(std.Progress.Node.none)` +11. **Errors**: `comp.getAllErrorsAlloc()` → if errors, print to stderr and return `error.ZigSemaFailed` +12. **Return**: ZigSemaResult owning comp, dirs, tmp_dir, arena, thread_pool -**Modeled on**: `astgen_test.zig` and `stages_test.zig` +### 3. `stage0/stages_test.zig` — use zigSema in stagesCheck (DONE) -### 3.1 Architecture - -``` -Source code ([:0]const u8) - │ - ├─── C path ───────────────────────────────────┐ - │ c.astParse() → C AST │ - │ c.astGen() → C ZIR │ - │ c.semaAnalyze() → C AIR │ - │ │ - ├─── Zig reference path ───────────────────────┤ - │ zigAst() → Zig AST │ - │ AstGen.generate() → Zig ZIR │ - │ setupSema() + analyzeFnBody() → Zig AIR │ - │ │ - └─── Compare ──────────────────────────────────┘ - expectEqualAir(zig_air, c_air) -``` - -### 3.2 Key Test Functions +The inline for passes `path["../".len..]` to strip the `"../"` prefix +(corpus paths are relative to stage0/, stripping gives cwd-relative paths): ```zig -// sema_test.zig - -const c = parser_test.c; - -/// Set up a minimal Zig Sema environment and analyze a module's ZIR. -/// Returns the AIR for the top-level declarations. -fn refAir(gpa: Allocator, source: [:0]const u8) !Air { - // 1. Parse and generate ZIR via Zig stdlib - var tree = try std.zig.Ast.parse(gpa, source, .zig); - defer tree.deinit(gpa); - var zir = try std.zig.AstGen.generate(gpa, tree); - defer zir.deinit(gpa); - // 2. Set up minimal compilation context - // 3. Run Sema - // 4. Return AIR +inline for (corpus_files) |path| { + stagesCheck(gpa, @embedFile(path), path["../".len..], check) catch { ... }; } -/// Compare two AIR outputs instruction-by-instruction. -fn expectEqualAir(gpa: Allocator, ref: Air, got_tags: []AirInstTag, - got_datas: []AirInstData, got_extra: []u32) !void { - // Step 1: Compare instruction count - // Step 2: Compare instruction tags - // Step 3: Compare instruction data field-by-field (like expectEqualData in astgen_test) - // Step 4: Compare extra data -} +fn stagesCheck(gpa: Allocator, source: [:0]const u8, src_path: []const u8, check: Stage) !void { + ... + if (check == .sema) { + var c_result = try sema_c.cSema(gpa, @bitCast(c_zir)); + defer c_result.deinit(gpa); + try sema_c.expectEqualAir(c_result.air(), c_result.c_air); -/// Corpus test: run Sema on real Zig files -test "sema: corpus" { - inline for (corpus_files) |path| { - semaCheck(gpa, @embedFile(path)) catch { - std.debug.print("FAIL: {s}\n", .{path}); - return error.TestFailed; - }; + var zig_result = try sema.zigSema(gpa, src_path); + defer zig_result.deinit(); } } ``` -### 3.3 Bridge Extension +## Open issue: zig_lib overlap -The `zig0_bridge.zig` needs a new export for Sema: +When the source file is under `lib/` (all current corpus files are), the +Compilation's auto-created `std` module claims the same files. Fix is to +ensure the root module path uses `.root = .none` (absolute, not associated +with zig_lib). Current code does this but needs testing. -```zig -// New C API function exposed via zig0_bridge.zig: -pub export fn semaAnalyze(zir: *c.Zir) c.Air { ... } -``` - -And the corresponding C function in a new `sema.c`: - -```c -// sema.c — Sema entry point -Air semaAnalyze(const Zir* zir, const Ast* ast) { - Sema sema = semaInit(&global_ip, *zir); - // ... set up root block, analyze body ... - semaDeinit(&sema); // frees internal state, AIR is returned - return sema.air; -} -``` +## Key references -### 3.4 Test Progression Strategy +| What | Where | +|---|---| +| Simplest Compilation.create | `src/main.zig:5588` (jitCmd) | +| Config.resolve | `src/Compilation/Config.zig:152` | +| Package.Module.create | `src/Package/Module.zig:110` | +| Directories struct | `src/Compilation.zig:709` | +| Path.fromUnresolved | `src/Compilation.zig:510` | +| Cache.Directory | `lib/std/Build/Cache/Directory.zig:1` | +| ResolvedTarget | `src/Package/Module.zig:87` | +| resolveTargetQuery | `lib/std/zig/system.zig:183` | +| Progress.Node.none | used in `src/Compilation.zig:261` | +| ErrorBundle.renderToStdErr | `lib/std/zig/ErrorBundle.zig:165` | +| thread_stack_size = 60<<20 | `src/main.zig:42` | -Following the astgen porting pattern: +## Verification -1. **Start with trivial cases**: empty source, single const decl, simple expressions -2. **Use `SkipZigTest` / commented corpus entries** to gate unimplemented features -3. **Orchestrator skill** (`.claude/skills/port-sema/`) drives incremental enabling -4. **Each ZIR instruction handler** is added as tests require it - -Initial test cases (in order): -``` -"" // empty module -"const x = 0;" // const declaration, int literal -"const x = 1 + 2;" // binary arithmetic (comptime) -"fn foo() void {}" // empty function -"fn foo(x: u32) u32 { return x; }" // function with param and return -"const S = struct { x: u32 };" // struct declaration -"test \"t\" { _ = 1; }" // test declaration -``` - ---- - -## 4. Files to Create - -| File | Purpose | Upstream Reference | -|------|---------|-------------------| -| `stage0/intern_pool.h` | InternPool data structures | `src/InternPool.zig` | -| `stage0/intern_pool.c` | InternPool init/deinit/intern/lookup | `src/InternPool.zig` | -| `stage0/air.h` | AIR instruction tags, data, ref types | `src/Air.zig` | -| `stage0/air.c` | Air deinit | `src/Air.zig` | -| `stage0/type.h` | Type helper function declarations | `src/Type.zig` | -| `stage0/type.c` | Type helper implementations | `src/Type.zig` | -| `stage0/value.h` | Value helper function declarations | `src/Value.zig` | -| `stage0/value.c` | Value helper implementations | `src/Value.zig` | -| `stage0/sema.h` | Sema, Block, InstMap structures | `src/Sema.zig` | -| `stage0/sema.c` | Sema analysis functions | `src/Sema.zig` | -| `stage0/sema_test.zig` | Test framework | new (modeled on `astgen_test.zig`) | - -### Files to Modify - -| File | Change | -|------|--------| -| `stage0/zig0.c` | Add sema step after astgen | -| `stage0/zig0_bridge.zig` | Add sema C→Zig bridge functions | -| `stage0/common.h` | Possibly add new macros (hash table, etc.) | -| `stage0/build.zig` (parent) | Add sema.c to compilation, sema_test.zig to tests | - ---- - -## 5. Implementation Phases - -### Phase A: Data structures only (this plan) -- Create all `.h` files with struct/enum/union definitions -- Create minimal `.c` files (init/deinit only) -- No Sema logic yet - -### Phase B: InternPool core -- Implement `ipInit`, `ipDeinit`, `ipIntern`, `ipIndexToKey` -- Pre-populate with all ~140 pre-interned types/values -- Hash table for deduplication - -### Phase C: Test framework skeleton -- Create `sema_test.zig` with infrastructure -- First test: empty source through Sema (C path stubs → compare) -- Wire up build system - -### Phase D: First Sema instruction handlers (incremental) -- Start with declaration/block/return instructions -- Each handler follows the upstream Sema.zig mechanically -- Enable test cases one at a time (astgen porting pattern) - ---- - -## 6. Verification - -```bash -# After Phase A (data structures): -./zig-out/bin/zig build fmt-zig0 # C formatting check -./zig-out/bin/zig build test-zig0 -Dzig0-cc=tcc # Existing tests still pass - -# After Phase C (test framework): -./zig-out/bin/zig build test-zig0 # New sema tests run (trivial cases) - -# After Phase D (incremental handlers): -./zig-out/bin/zig build all-zig0 -Dvalgrind # Full suite including sema tests +```sh +cd ~/code/zig && ./zig-out/bin/zig build test-zig0 -Dzig0-cc=tcc 2>&1 | tail -5 ``` ---- - -## 7. Key Design Decisions - -1. **InternPool hash table**: Use open addressing with linear probing. Keys are hashed by their tag + fields. Simple and cache-friendly. - -2. **Memory management**: Follow existing pattern — `calloc`/`realloc` with `exit(1)` on failure. InternPool owns all interned data. - -3. **String interning**: Reuse the same `string_bytes` pattern from ZIR (packed bytes with length-prefixed or null-terminated strings). InternPool gets its own string table separate from ZIR's. - -4. **Type/Value as thin wrappers**: In C, `TypeIndex` and `ValueIndex` are just `typedef InternPoolIndex`. Helper functions take `InternPool*` as first arg. This avoids the method-on-struct pattern that's natural in Zig but awkward in C. - -5. **No Zcu**: Replace with `SemaCtx` embedded in `Sema` struct. Namespaces are simplified arrays. Error reporting reuses `SET_ERROR` macro. - -6. **AIR Ref encoding**: Different from ZIR. AIR uses the MSB (bit 31) as a tag: - - Bit 31 = 0 → InternPool index (lower 31 bits) - - Bit 31 = 1 → AIR instruction index (lower 31 bits) - - `NONE` = UINT32_MAX (all bits set) - This is cleaner than ZIR's offset-based encoding. +Should exit 0 with no output. diff --git a/stage0/sema.zig b/stage0/sema.zig @@ -0,0 +1,142 @@ +// sema.zig — Run the real Zig sema pipeline via Compilation. +// Used by stages_test.zig to produce reference sema output. + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const zig_internals = @import("zig_internals"); +const Compilation = zig_internals.Compilation; +const Package = zig_internals.Package; + +/// Result of running the real Zig sema pipeline via Compilation. +/// Owns the Compilation, Directories, thread pool, temp dir, and arena. +pub const ZigSemaResult = struct { + comp: *Compilation, + dirs: Compilation.Directories, + tmp_dir: std.testing.TmpDir, + arena_state: std.heap.ArenaAllocator, + thread_pool: std.Thread.Pool, + + pub fn deinit(self: *ZigSemaResult) void { + self.comp.destroy(); + self.dirs.deinit(); + self.thread_pool.deinit(); + self.tmp_dir.cleanup(); + self.arena_state.deinit(); + } +}; + +/// Run the real Zig sema pipeline on the source file at `src_path`. +/// `src_path` is relative to cwd (the repo root), e.g. "lib/std/crypto/codecs.zig". +pub fn zigSema(gpa: Allocator, src_path: []const u8) !ZigSemaResult { + var arena_state = std.heap.ArenaAllocator.init(gpa); + errdefer arena_state.deinit(); + const arena = arena_state.allocator(); + + // Set up temp dir for Compilation cache. + var tmp_dir = std.testing.tmpDir(.{}); + errdefer tmp_dir.cleanup(); + + // Resolve paths. + const cwd_path = try std.fs.cwd().realpathAlloc(arena, "."); + const zig_lib_path = try std.fs.path.join(arena, &.{ cwd_path, "lib" }); + const zig_lib_handle = try std.fs.cwd().openDir("lib", .{}); + const tmp_path = try tmp_dir.dir.realpathAlloc(arena, "."); + const cache_path = try std.fmt.allocPrint(arena, "{s}/.cache", .{tmp_path}); + try tmp_dir.dir.makeDir(".cache"); + const cache_handle = try tmp_dir.dir.openDir(".cache", .{}); + + var dirs = Compilation.Directories{ + .cwd = cwd_path, + .zig_lib = .{ .path = zig_lib_path, .handle = zig_lib_handle }, + .global_cache = .{ .path = cache_path, .handle = cache_handle }, + .local_cache = .{ .path = cache_path, .handle = cache_handle }, + }; + errdefer dirs.deinit(); + + // Hardcode x86_64-linux-musl target. + const resolved_target: Package.Module.ResolvedTarget = .{ + .result = try std.zig.system.resolveTargetQuery(.{ + .cpu_arch = .x86_64, + .os_tag = .linux, + .abi = .musl, + }), + .is_native_os = false, + .is_native_abi = false, + .is_explicit_dynamic_linker = false, + }; + + const config = try Compilation.Config.resolve(.{ + .output_mode = .Obj, + .resolved_target = resolved_target, + .have_zcu = true, + .emit_bin = false, + .is_test = false, + }); + + // Split src_path into directory and filename for the Module. + // Use .root = .none with absolute path to avoid the source being + // associated with zig_lib (which would conflict with the std module + // when compiling files under lib/). + const src_dir = std.fs.path.dirname(src_path) orelse "."; + const src_basename = std.fs.path.basename(src_path); + const abs_src_dir = try std.fs.cwd().realpathAlloc(arena, src_dir); + const root_path: Compilation.Path = .{ .root = .none, .sub_path = abs_src_dir }; + + const root_mod = try Package.Module.create(arena, .{ + .paths = .{ + .root = root_path, + .root_src_path = src_basename, + }, + .fully_qualified_name = "root", + .cc_argv = &.{}, + .inherited = .{ + .resolved_target = resolved_target, + }, + .global = config, + .parent = null, + }); + + var thread_pool: std.Thread.Pool = undefined; + try thread_pool.init(.{ + .allocator = gpa, + .n_jobs = 1, + .track_ids = true, + .stack_size = 60 << 20, + }); + errdefer thread_pool.deinit(); + + var create_diag: Compilation.CreateDiagnostic = undefined; + const comp = Compilation.create(gpa, arena, &create_diag, .{ + .dirs = dirs, + .root_name = "test", + .config = config, + .root_mod = root_mod, + .emit_bin = .no, + .thread_pool = &thread_pool, + .cache_mode = .whole, + }) catch |err| switch (err) { + error.CreateFail => { + std.debug.print("Compilation.create failed: {any}\n", .{create_diag}); + return error.ZigSemaFailed; + }, + else => return err, + }; + errdefer comp.destroy(); + + try comp.update(std.Progress.Node.none); + + var error_bundle = try comp.getAllErrorsAlloc(); + defer error_bundle.deinit(gpa); + if (error_bundle.errorMessageCount() > 0) { + error_bundle.renderToStdErr(.{ .ttyconf = .no_color }); + return error.ZigSemaFailed; + } + + return .{ + .comp = comp, + .dirs = dirs, + .tmp_dir = tmp_dir, + .arena_state = arena_state, + .thread_pool = thread_pool, + }; +} diff --git a/stage0/stages_test.zig b/stage0/stages_test.zig @@ -7,6 +7,7 @@ const parser_test = @import("parser_test.zig"); const astgen_test = @import("astgen_test.zig"); const sema_test = @import("sema_test.zig"); const sema_c = @import("sema_c.zig"); +const sema = @import("sema.zig"); const c = parser_test.c; const sc = sema_test.c; const zig_internals = @import("zig_internals"); @@ -20,7 +21,7 @@ test "stages: corpus" { const gpa = std.testing.allocator; const check = Stage.sema; inline for (corpus_files) |path| { - stagesCheck(gpa, @embedFile(path), check) catch { + stagesCheck(gpa, @embedFile(path), path["../".len..], check) catch { std.debug.print("FAIL: {s}\n", .{path}); return error.TestFailed; }; @@ -31,7 +32,7 @@ test "stages: corpus" { } } -fn stagesCheck(gpa: Allocator, source: [:0]const u8, check: Stage) !void { +fn stagesCheck(gpa: Allocator, source: [:0]const u8, src_path: []const u8, check: Stage) !void { // Parse once with C parser var c_ast = c.astParse(source.ptr, @intCast(source.len)); defer c.astDeinit(&c_ast); @@ -73,10 +74,9 @@ fn stagesCheck(gpa: Allocator, source: [:0]const u8, check: Stage) !void { // Verify C→Zig Air conversion is faithful (tags, data, extra). try sema_c.expectEqualAir(result.air(), result.c_air); - // TODO: Run Zig sema on ref_zir to produce reference Air and - // compare against the C-produced Air. This requires a full - // Compilation context (Zcu, InternPool, Package.Module, etc.) - // which is not yet set up for unit tests. + // Run Zig sema on the same source and verify it succeeds. + var zig_result = try sema.zigSema(gpa, src_path); + defer zig_result.deinit(); } }