commit 33e3e5475d45061237d74cce3d995784dac4cf21 (tree)
parent 00370ba4dc9152d19345d0da166f020baeca0ed2
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Thu, 19 Feb 2026 09:14:28 +0000
WIP: wire up Zig Compilation for reference sema in stage0 tests
Add zigSema helper (stage0/sema.zig) that creates a Compilation,
points it at a source file, and runs the full Zig sema pipeline.
Export Compilation and Package from test_exports.zig. Wire up in
stagesCheck to run Zig sema alongside C sema.
Not yet working: files under lib/ conflict with the auto-created
std module ("file exists in modules 'root' and 'std'"). The fix
(using .root = .none with absolute path) needs testing.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
4 files changed, 254 insertions(+), 504 deletions(-)
diff --git a/src/test_exports.zig b/src/test_exports.zig
@@ -1,3 +1,5 @@
pub const InternPool = @import("InternPool.zig");
pub const Air = @import("Air.zig");
+pub const Compilation = @import("Compilation.zig");
+pub const Package = @import("Package.zig");
// Later: pub const Sema = @import("Sema.zig");
diff --git a/stage0/.claude/sema-plan.md b/stage0/.claude/sema-plan.md
@@ -1,541 +1,147 @@
-# Sema Porting Plan for zig/stage0
+# Plan: Wire up Zig Compilation for reference sema in stage0 tests
## Context
-The stage0 bootstrap tool currently has tokenizer, parser, and AstGen fully ported to C.
-The next major milestone is Sema (semantic analysis), which transforms ZIR into AIR.
-Before Sema functions can be ported, the data structures it depends on must exist in C.
+`stagesCheck` in `stages_test.zig` runs C sema and converts the result to
+Zig Air, but has no reference Zig sema output to compare against. We want
+to run the real Zig sema pipeline on the same source and verify it succeeds.
-This plan covers:
-1. Identifying Sema's dependencies and their porting order
-2. Defining the C data structures (header files)
-3. Designing a test framework modeled on astgen_test.zig
+## Current State (in progress)
-Scope decisions:
-- **InternPool**: Simplified single-threaded (no sharding/deps/incremental)
-- **Sema**: Incremental subset-first approach, test-driven
-- **Compilation context**: Minimal bootstrap `SemaCtx` (not full Zcu)
+### Files changed so far:
----
+1. **`src/test_exports.zig`** — added `Compilation` and `Package` exports
+2. **`stage0/sema.zig`** — NEW file with `ZigSemaResult` struct and `zigSema` helper
+3. **`stage0/sema_c.zig`** — unchanged (zigSema was moved out to `sema.zig`)
+4. **`stage0/stages_test.zig`** — imports `sema.zig`, passes `src_path` to `stagesCheck`
-## 1. Dependency Graph and Porting Order
+### Problem to solve:
-```
- ┌──────────┐
- │ sema.h │ (Sema context, Block, InstMap)
- └────┬─────┘
- │ depends on
- ┌─────────────┼─────────────┐
- │ │ │
- ┌─────▼────┐ ┌─────▼────┐ ┌────▼─────┐
- │ air.h │ │ type.h │ │ value.h │
- └─────┬────┘ └─────┬────┘ └────┬─────┘
- │ │ │
- └─────────────┼─────────────┘
- │ all depend on
- ┌───────▼────────┐
- │ intern_pool.h │
- └───────┬────────┘
- │ depends on
- ┌───────▼────────┐
- │ common.h │ (already exists)
- │ zir.h │ (already exists)
- └────────────────┘
-```
-
-**Porting order (bottom-up):**
-
-| Phase | File | Upstream Source | Estimated C lines |
-|-------|------|---------------|-------------------|
-| 1 | `intern_pool.h` | `src/InternPool.zig` | ~800 |
-| 2 | `air.h` | `src/Air.zig` | ~600 |
-| 3 | `type.h` / `value.h` | `src/Type.zig`, `src/Value.zig` | ~200 |
-| 4 | `sema.h` | `src/Sema.zig` (struct defs only) | ~300 |
-| 5 | `sema_test.zig` | new (modeled on `astgen_test.zig`) | ~400 |
-
----
-
-## 2. Data Structure Definitions
-
-### 2.1 `intern_pool.h` — Simplified Single-Threaded InternPool
-
-**Upstream**: `src/InternPool.zig` (~13K lines)
-**Scope**: Type/value interning and lookup only. No threading, no dependency tracking, no incremental compilation.
+`zigSema` takes a cwd-relative `src_path` (e.g. `"lib/std/crypto/codecs.zig"`)
+and points a `Compilation` at it. However, files under `lib/` overlap with the
+`zig_lib` directory, causing:
```
-Key types to define:
-
-InternPoolIndex (uint32_t)
- - Pre-interned constants matching Zig's InternPool.Index enum:
- IP_INDEX_U0_TYPE, IP_INDEX_U8_TYPE, IP_INDEX_I32_TYPE, ...
- IP_INDEX_BOOL_TYPE, IP_INDEX_VOID_TYPE, IP_INDEX_TYPE_TYPE, ...
- IP_INDEX_UNDEF, IP_INDEX_ZERO, IP_INDEX_ONE, IP_INDEX_BOOL_TRUE, ...
- IP_INDEX_NONE = UINT32_MAX
- - ~120 pre-interned type indices + ~20 pre-interned value indices
-
-SimpleType enum (matching InternPool.SimpleType):
- f16, f32, f64, f80, f128, usize, isize, c_char, ...,
- anyopaque, bool, void, type, anyerror, comptime_int, ...
-
-SimpleValue enum (matching InternPool.SimpleValue):
- undefined, void_val, null_val, empty_tuple, true_val, false_val, unreachable_val
-
-IntType struct:
- uint16_t bits;
- uint8_t signedness; // 0=unsigned, 1=signed
-
-PtrType struct:
- InternPoolIndex child;
- InternPoolIndex sentinel; // IP_INDEX_NONE if absent
- uint32_t flags; // packed: size(2), alignment(6), is_const(1), is_volatile(1), is_allowzero(1), address_space(5), vector_index(16)
- uint32_t packed_offset; // packed: host_size(16), bit_offset(16)
-
-ArrayType struct:
- uint64_t len;
- InternPoolIndex child;
- InternPoolIndex sentinel; // IP_INDEX_NONE if absent
-
-VectorType struct:
- uint32_t len;
- InternPoolIndex child;
-
-ErrorUnionType struct:
- InternPoolIndex error_set;
- InternPoolIndex payload;
-
-FuncType struct:
- InternPoolIndex* param_types;
- uint32_t param_count;
- InternPoolIndex return_type;
- uint32_t comptime_bits;
- uint32_t noalias_bits;
- uint8_t cc; // calling convention
- bool is_var_args;
- bool is_generic;
- bool is_noinline;
-
-ErrorSetType struct:
- uint32_t* names; // NullTerminatedString indices
- uint32_t names_count;
-
-NamespaceType struct (for struct/union/enum/opaque):
- uint32_t zir_index; // TrackedInst.Index equivalent
- uint32_t type_hash; // for reified types
-
-InternPoolKeyTag enum:
- IP_KEY_INT_TYPE, IP_KEY_PTR_TYPE, IP_KEY_ARRAY_TYPE,
- IP_KEY_VECTOR_TYPE, IP_KEY_OPT_TYPE, IP_KEY_ANYFRAME_TYPE,
- IP_KEY_ERROR_UNION_TYPE, IP_KEY_SIMPLE_TYPE, IP_KEY_STRUCT_TYPE,
- IP_KEY_TUPLE_TYPE, IP_KEY_UNION_TYPE, IP_KEY_OPAQUE_TYPE,
- IP_KEY_ENUM_TYPE, IP_KEY_FUNC_TYPE, IP_KEY_ERROR_SET_TYPE,
- IP_KEY_INFERRED_ERROR_SET_TYPE,
- IP_KEY_UNDEF, IP_KEY_SIMPLE_VALUE, IP_KEY_VARIABLE,
- IP_KEY_EXTERN, IP_KEY_FUNC, IP_KEY_INT, IP_KEY_ERR,
- IP_KEY_ERROR_UNION, IP_KEY_ENUM_LITERAL, IP_KEY_ENUM_TAG,
- IP_KEY_FLOAT, IP_KEY_PTR, IP_KEY_SLICE, IP_KEY_OPT,
- IP_KEY_AGGREGATE, IP_KEY_UNION_VALUE
-
-InternPoolKey struct (tagged union):
- InternPoolKeyTag tag;
- union { IntType int_type; PtrType ptr_type; ... } data;
-
-InternPool struct:
- InternPoolKey* items; // indexed by InternPoolIndex
- uint32_t items_len;
- uint32_t items_cap;
- // Hash table for deduplication:
- uint32_t* hash_table; // maps hash → index
- uint32_t hash_cap;
- // String storage:
- uint8_t* string_bytes;
- uint32_t string_bytes_len;
- uint32_t string_bytes_cap;
- // Global error set:
- uint32_t* error_names;
- uint32_t error_names_len;
-
-Functions:
- InternPool ipInit(void);
- void ipDeinit(InternPool*);
- InternPoolIndex ipIntern(InternPool*, InternPoolKey);
- InternPoolKey ipIndexToKey(InternPool*, InternPoolIndex);
- InternPoolIndex ipTypeOf(InternPool*, InternPoolIndex);
+error: file exists in modules 'root' and 'std'
```
-### 2.2 `air.h` — Analyzed Intermediate Representation
+The current attempted fix uses `.root = .none` with an absolute path to
+dissociate the root module from zig_lib, but this hasn't been tested yet.
-**Upstream**: `src/Air.zig` (~2,172 lines)
+## Approach: file-path based
-```
-Key types to define:
-
-AirInstTag enum (uint8_t, ~204 tags):
- AIR_INST_ARG, AIR_INST_ADD, AIR_INST_ADD_SAFE, AIR_INST_ADD_OPTIMIZED,
- AIR_INST_ADD_WRAP, AIR_INST_ADD_SAT, AIR_INST_SUB, ...
- AIR_INST_BLOCK, AIR_INST_LOOP, AIR_INST_BR, AIR_INST_CALL, ...
- (use X-macro pattern like ZIR_INST_FOREACH_TAG)
-
-AirInstRef (uint32_t):
- Uses MSB (bit 31) as tag bit:
- - Bit 31 = 0: value is an InternPool index (lower 31 bits)
- - Bit 31 = 1: value is an AIR instruction index (lower 31 bits)
- - AIR_REF_NONE = UINT32_MAX (special sentinel)
- Helper macros:
- #define AIR_REF_NONE UINT32_MAX
- #define AIR_REF_IS_INST(r) (((r) >> 31) != 0 && (r) != AIR_REF_NONE)
- #define AIR_REF_IS_IP(r) (((r) >> 31) == 0)
- #define AIR_REF_TO_INST(r) ((r) & 0x7FFFFFFF)
- #define AIR_REF_TO_IP(r) ((r) & 0x7FFFFFFF)
- #define AIR_REF_FROM_INST(i) ((i) | 0x80000000)
- #define AIR_REF_FROM_IP(i) (i)
-
-AirInstData union (8 bytes, matching Air.Inst.Data):
- struct { AirInstRef operand; } un_op;
- struct { AirInstRef lhs, rhs; } bin_op;
- struct { AirInstRef ty; } ty;
- struct { AirInstRef ty; uint32_t zir_param_index; } arg;
- struct { AirInstRef ty; AirInstRef operand; } ty_op;
- struct { AirInstRef ty; uint32_t payload; } ty_pl;
- struct { uint32_t block_inst; AirInstRef operand; } br;
- struct { AirInstRef operand; uint32_t payload; } pl_op;
- struct { uint32_t line; uint32_t column; } dbg_stmt;
- ... (all variants from Air.Inst.Data)
-
-Air struct:
- uint32_t inst_len;
- uint32_t inst_cap;
- AirInstTag* inst_tags;
- AirInstData* inst_datas;
- uint32_t extra_len;
- uint32_t extra_cap;
- uint32_t* extra;
-
-Extra payload structs:
- AirBlock, AirCall, AirCondBr, AirSwitchBr, AirTry, AirBin, etc.
-```
+Point a `Compilation` at the source file directly (no temp-file copy of source).
+The Compilation reads the file, parses, AstGens, and runs sema — exactly
+like the real compiler.
-### 2.3 `type.h` / `value.h` — Thin Wrappers
+Still needs a temp dir for the Compilation cache.
-**Upstream**: `src/Type.zig` (~4,179 lines), `src/Value.zig` (~3,288 lines)
+Reference: `jitCmd` in `src/main.zig:5588-5654` shows the simplest
+Compilation.create + update flow.
-In C, Type and Value are just `InternPoolIndex` with helper functions.
-These start minimal and grow as Sema functions need them.
+## Changes needed
-```
-type.h:
- typedef InternPoolIndex TypeIndex;
- #define TYPE_NONE IP_INDEX_NONE
-
- // Functions ported on-demand as Sema needs them:
- uint32_t typeZigTypeTag(InternPool*, TypeIndex);
- TypeIndex typeChildType(InternPool*, TypeIndex);
- TypeIndex typeElemType(InternPool*, TypeIndex);
- bool typeIsSlice(InternPool*, TypeIndex);
- bool typeIsCPtr(InternPool*, TypeIndex);
- uint64_t typeArrayLen(InternPool*, TypeIndex);
- PtrType typePtrInfo(InternPool*, TypeIndex);
-
-value.h:
- typedef InternPoolIndex ValueIndex;
- #define VALUE_NONE IP_INDEX_NONE
-
- TypeIndex valueToType(ValueIndex);
- ValueIndex valueFromInterned(InternPoolIndex);
- uint64_t valueToUnsignedInt(InternPool*, ValueIndex);
+### 1. `src/test_exports.zig` — export Compilation types (DONE)
+
+```zig
+pub const Compilation = @import("Compilation.zig");
+pub const Package = @import("Package.zig");
```
-### 2.4 `sema.h` — Sema Context and Block
+### 2. `stage0/sema.zig` — zigSema helper (IN PROGRESS)
-**Upstream**: `src/Sema.zig` (lines 41-500)
+```zig
+const Compilation = @import("zig_internals").Compilation;
+const Package = @import("zig_internals").Package;
+
+pub const ZigSemaResult = struct {
+ comp: *Compilation,
+ dirs: Compilation.Directories,
+ tmp_dir: std.testing.TmpDir,
+ arena_state: std.heap.ArenaAllocator,
+ thread_pool: std.Thread.Pool,
+
+ pub fn deinit(self: *ZigSemaResult) void {
+ self.comp.destroy();
+ self.dirs.deinit();
+ self.thread_pool.deinit();
+ self.tmp_dir.cleanup();
+ self.arena_state.deinit();
+ }
+};
-```
-RuntimeIndex (uint32_t):
- #define RUNTIME_INDEX_ZERO 0
- #define RUNTIME_INDEX_COMPTIME_FIELD_PTR UINT32_MAX
-
-InstMap struct:
- AirInstRef* items;
- uint32_t start; // ZIR instruction index offset
- uint32_t len;
-
-SemaBlock struct:
- SemaBlock* parent;
- Sema* sema;
- uint32_t namespace; // NamespaceIndex
- uint32_t* instructions; // AIR instruction indices
- uint32_t instructions_len;
- uint32_t instructions_cap;
- SemaBlockLabel* label;
- SemaBlockInlining* inlining;
- int32_t runtime_cond; // LazySrcLoc or -1
- int32_t runtime_loop; // LazySrcLoc or -1
- uint32_t runtime_index;
- uint32_t comptime_reason; // 0 = not comptime
- bool is_typeof;
- AirInstRef error_return_trace_index;
- bool want_safety; // nullable via flag
- bool want_safety_set;
- uint8_t float_mode; // strict=0, optimized=1
- uint32_t src_base_inst;
- uint32_t type_name_ctx; // NullTerminatedString
-
-SemaBlockLabel struct:
- uint32_t zir_block; // Zir.Inst.Index
- SemaBlockMerges merges;
-
-SemaBlockInlining struct:
- SemaBlock* call_block;
- int32_t call_src;
- InternPoolIndex func;
- bool is_generic_instantiation;
- bool has_comptime_args;
- AirInstRef comptime_result;
- SemaBlockMerges merges;
-
-SemaBlockMerges struct:
- uint32_t block_inst; // Air.Inst.Index
- AirInstRef* results;
- uint32_t results_len, results_cap;
- uint32_t* br_list;
- uint32_t br_list_len, br_list_cap;
-
-InferredErrorSet struct:
- InternPoolIndex func;
- uint32_t* error_names; // NullTerminatedString indices
- uint32_t error_names_len, error_names_cap;
- InternPoolIndex* inferred_sets;
- uint32_t inferred_sets_len, inferred_sets_cap;
- InternPoolIndex resolved; // IP_INDEX_NONE until resolved
-
-MaybeComptimeAlloc struct:
- uint32_t runtime_index;
- uint32_t* store_insts;
- uint32_t stores_len, stores_cap;
-
-ComptimeAlloc struct:
- InternPoolIndex val;
- bool is_const;
- int32_t src; // LazySrcLoc
- uint8_t alignment;
- uint32_t runtime_index;
-
-Sema struct:
- InternPool* ip; // shared intern pool
- Air air; // output AIR being built
- Zir code; // input ZIR being analyzed
- InstMap inst_map; // ZIR→AIR mapping
- InternPoolIndex owner; // AnalUnit
- InternPoolIndex func_index;
- bool func_is_naked;
- TypeIndex fn_ret_ty;
- InferredErrorSet* fn_ret_ty_ies; // NULL if not inferred
- uint32_t branch_quota;
- uint32_t branch_count;
- uint32_t comptime_break_inst;
- // Hash maps (simplified as arrays for bootstrap):
- ComptimeAlloc* comptime_allocs;
- uint32_t comptime_allocs_len, comptime_allocs_cap;
- bool allow_memoize;
- uint8_t branch_hint; // 0=none
- bool has_compile_errors;
- char err_buf[ERR_BUF_SIZE];
-
-Functions (stubs, filled incrementally):
- Sema semaInit(InternPool*, Zir);
- void semaDeinit(Sema*);
- void semaAnalyzeFnBody(Sema*, SemaBlock*, uint32_t* body, uint32_t body_len);
- AirInstRef semaResolveInst(Sema*, uint32_t zir_ref);
+pub fn zigSema(gpa: Allocator, src_path: []const u8) !ZigSemaResult { ... }
```
----
+Inside `zigSema`:
-## 3. Test Framework: `sema_test.zig`
+1. **Arena**: `var arena_state = std.heap.ArenaAllocator.init(gpa);`
+2. **Temp dir**: `var tmp_dir = std.testing.tmpDir(.{});` (for cache only)
+3. **Dirs**: Construct `Compilation.Directories` manually:
+ - `zig_lib` = open `lib/` dir with absolute path
+ - `global_cache` = `local_cache` = `.cache` subdir of tmp_dir
+4. **Target**: hardcode x86_64-linux-musl
+5. **Config**: `Compilation.Config.resolve(.{ .output_mode = .Obj, .have_zcu = true, .emit_bin = false, .is_test = false, .resolved_target = ... })`
+6. **Root path**: Must use `.root = .none` with absolute path to the source
+ dir so it doesn't overlap with zig_lib (avoids "file exists in modules
+ 'root' and 'std'" error)
+7. **Module**: `Package.Module.create(arena, .{ .paths = .{ .root = root_path, .root_src_path = basename }, ... })`
+8. **Thread pool**: `std.Thread.Pool` with `n_jobs = 1, track_ids = true, stack_size = 60 << 20`
+9. **Create**: `Compilation.create(gpa, arena, &diag, .{ ... .emit_bin = .no, .cache_mode = .whole })`
+10. **Run**: `try comp.update(std.Progress.Node.none)`
+11. **Errors**: `comp.getAllErrorsAlloc()` → if errors, print to stderr and return `error.ZigSemaFailed`
+12. **Return**: ZigSemaResult owning comp, dirs, tmp_dir, arena, thread_pool
-**Modeled on**: `astgen_test.zig` and `stages_test.zig`
+### 3. `stage0/stages_test.zig` — use zigSema in stagesCheck (DONE)
-### 3.1 Architecture
-
-```
-Source code ([:0]const u8)
- │
- ├─── C path ───────────────────────────────────┐
- │ c.astParse() → C AST │
- │ c.astGen() → C ZIR │
- │ c.semaAnalyze() → C AIR │
- │ │
- ├─── Zig reference path ───────────────────────┤
- │ zigAst() → Zig AST │
- │ AstGen.generate() → Zig ZIR │
- │ setupSema() + analyzeFnBody() → Zig AIR │
- │ │
- └─── Compare ──────────────────────────────────┘
- expectEqualAir(zig_air, c_air)
-```
-
-### 3.2 Key Test Functions
+The inline for passes `path["../".len..]` to strip the `"../"` prefix
+(corpus paths are relative to stage0/, stripping gives cwd-relative paths):
```zig
-// sema_test.zig
-
-const c = parser_test.c;
-
-/// Set up a minimal Zig Sema environment and analyze a module's ZIR.
-/// Returns the AIR for the top-level declarations.
-fn refAir(gpa: Allocator, source: [:0]const u8) !Air {
- // 1. Parse and generate ZIR via Zig stdlib
- var tree = try std.zig.Ast.parse(gpa, source, .zig);
- defer tree.deinit(gpa);
- var zir = try std.zig.AstGen.generate(gpa, tree);
- defer zir.deinit(gpa);
- // 2. Set up minimal compilation context
- // 3. Run Sema
- // 4. Return AIR
+inline for (corpus_files) |path| {
+ stagesCheck(gpa, @embedFile(path), path["../".len..], check) catch { ... };
}
-/// Compare two AIR outputs instruction-by-instruction.
-fn expectEqualAir(gpa: Allocator, ref: Air, got_tags: []AirInstTag,
- got_datas: []AirInstData, got_extra: []u32) !void {
- // Step 1: Compare instruction count
- // Step 2: Compare instruction tags
- // Step 3: Compare instruction data field-by-field (like expectEqualData in astgen_test)
- // Step 4: Compare extra data
-}
+fn stagesCheck(gpa: Allocator, source: [:0]const u8, src_path: []const u8, check: Stage) !void {
+ ...
+ if (check == .sema) {
+ var c_result = try sema_c.cSema(gpa, @bitCast(c_zir));
+ defer c_result.deinit(gpa);
+ try sema_c.expectEqualAir(c_result.air(), c_result.c_air);
-/// Corpus test: run Sema on real Zig files
-test "sema: corpus" {
- inline for (corpus_files) |path| {
- semaCheck(gpa, @embedFile(path)) catch {
- std.debug.print("FAIL: {s}\n", .{path});
- return error.TestFailed;
- };
+ var zig_result = try sema.zigSema(gpa, src_path);
+ defer zig_result.deinit();
}
}
```
-### 3.3 Bridge Extension
+## Open issue: zig_lib overlap
-The `zig0_bridge.zig` needs a new export for Sema:
+When the source file is under `lib/` (all current corpus files are), the
+Compilation's auto-created `std` module claims the same files. Fix is to
+ensure the root module path uses `.root = .none` (absolute, not associated
+with zig_lib). Current code does this but needs testing.
-```zig
-// New C API function exposed via zig0_bridge.zig:
-pub export fn semaAnalyze(zir: *c.Zir) c.Air { ... }
-```
-
-And the corresponding C function in a new `sema.c`:
-
-```c
-// sema.c — Sema entry point
-Air semaAnalyze(const Zir* zir, const Ast* ast) {
- Sema sema = semaInit(&global_ip, *zir);
- // ... set up root block, analyze body ...
- semaDeinit(&sema); // frees internal state, AIR is returned
- return sema.air;
-}
-```
+## Key references
-### 3.4 Test Progression Strategy
+| What | Where |
+|---|---|
+| Simplest Compilation.create | `src/main.zig:5588` (jitCmd) |
+| Config.resolve | `src/Compilation/Config.zig:152` |
+| Package.Module.create | `src/Package/Module.zig:110` |
+| Directories struct | `src/Compilation.zig:709` |
+| Path.fromUnresolved | `src/Compilation.zig:510` |
+| Cache.Directory | `lib/std/Build/Cache/Directory.zig:1` |
+| ResolvedTarget | `src/Package/Module.zig:87` |
+| resolveTargetQuery | `lib/std/zig/system.zig:183` |
+| Progress.Node.none | used in `src/Compilation.zig:261` |
+| ErrorBundle.renderToStdErr | `lib/std/zig/ErrorBundle.zig:165` |
+| thread_stack_size = 60<<20 | `src/main.zig:42` |
-Following the astgen porting pattern:
+## Verification
-1. **Start with trivial cases**: empty source, single const decl, simple expressions
-2. **Use `SkipZigTest` / commented corpus entries** to gate unimplemented features
-3. **Orchestrator skill** (`.claude/skills/port-sema/`) drives incremental enabling
-4. **Each ZIR instruction handler** is added as tests require it
-
-Initial test cases (in order):
-```
-"" // empty module
-"const x = 0;" // const declaration, int literal
-"const x = 1 + 2;" // binary arithmetic (comptime)
-"fn foo() void {}" // empty function
-"fn foo(x: u32) u32 { return x; }" // function with param and return
-"const S = struct { x: u32 };" // struct declaration
-"test \"t\" { _ = 1; }" // test declaration
-```
-
----
-
-## 4. Files to Create
-
-| File | Purpose | Upstream Reference |
-|------|---------|-------------------|
-| `stage0/intern_pool.h` | InternPool data structures | `src/InternPool.zig` |
-| `stage0/intern_pool.c` | InternPool init/deinit/intern/lookup | `src/InternPool.zig` |
-| `stage0/air.h` | AIR instruction tags, data, ref types | `src/Air.zig` |
-| `stage0/air.c` | Air deinit | `src/Air.zig` |
-| `stage0/type.h` | Type helper function declarations | `src/Type.zig` |
-| `stage0/type.c` | Type helper implementations | `src/Type.zig` |
-| `stage0/value.h` | Value helper function declarations | `src/Value.zig` |
-| `stage0/value.c` | Value helper implementations | `src/Value.zig` |
-| `stage0/sema.h` | Sema, Block, InstMap structures | `src/Sema.zig` |
-| `stage0/sema.c` | Sema analysis functions | `src/Sema.zig` |
-| `stage0/sema_test.zig` | Test framework | new (modeled on `astgen_test.zig`) |
-
-### Files to Modify
-
-| File | Change |
-|------|--------|
-| `stage0/zig0.c` | Add sema step after astgen |
-| `stage0/zig0_bridge.zig` | Add sema C→Zig bridge functions |
-| `stage0/common.h` | Possibly add new macros (hash table, etc.) |
-| `stage0/build.zig` (parent) | Add sema.c to compilation, sema_test.zig to tests |
-
----
-
-## 5. Implementation Phases
-
-### Phase A: Data structures only (this plan)
-- Create all `.h` files with struct/enum/union definitions
-- Create minimal `.c` files (init/deinit only)
-- No Sema logic yet
-
-### Phase B: InternPool core
-- Implement `ipInit`, `ipDeinit`, `ipIntern`, `ipIndexToKey`
-- Pre-populate with all ~140 pre-interned types/values
-- Hash table for deduplication
-
-### Phase C: Test framework skeleton
-- Create `sema_test.zig` with infrastructure
-- First test: empty source through Sema (C path stubs → compare)
-- Wire up build system
-
-### Phase D: First Sema instruction handlers (incremental)
-- Start with declaration/block/return instructions
-- Each handler follows the upstream Sema.zig mechanically
-- Enable test cases one at a time (astgen porting pattern)
-
----
-
-## 6. Verification
-
-```bash
-# After Phase A (data structures):
-./zig-out/bin/zig build fmt-zig0 # C formatting check
-./zig-out/bin/zig build test-zig0 -Dzig0-cc=tcc # Existing tests still pass
-
-# After Phase C (test framework):
-./zig-out/bin/zig build test-zig0 # New sema tests run (trivial cases)
-
-# After Phase D (incremental handlers):
-./zig-out/bin/zig build all-zig0 -Dvalgrind # Full suite including sema tests
+```sh
+cd ~/code/zig && ./zig-out/bin/zig build test-zig0 -Dzig0-cc=tcc 2>&1 | tail -5
```
----
-
-## 7. Key Design Decisions
-
-1. **InternPool hash table**: Use open addressing with linear probing. Keys are hashed by their tag + fields. Simple and cache-friendly.
-
-2. **Memory management**: Follow existing pattern — `calloc`/`realloc` with `exit(1)` on failure. InternPool owns all interned data.
-
-3. **String interning**: Reuse the same `string_bytes` pattern from ZIR (packed bytes with length-prefixed or null-terminated strings). InternPool gets its own string table separate from ZIR's.
-
-4. **Type/Value as thin wrappers**: In C, `TypeIndex` and `ValueIndex` are just `typedef InternPoolIndex`. Helper functions take `InternPool*` as first arg. This avoids the method-on-struct pattern that's natural in Zig but awkward in C.
-
-5. **No Zcu**: Replace with `SemaCtx` embedded in `Sema` struct. Namespaces are simplified arrays. Error reporting reuses `SET_ERROR` macro.
-
-6. **AIR Ref encoding**: Different from ZIR. AIR uses the MSB (bit 31) as a tag:
- - Bit 31 = 0 → InternPool index (lower 31 bits)
- - Bit 31 = 1 → AIR instruction index (lower 31 bits)
- - `NONE` = UINT32_MAX (all bits set)
- This is cleaner than ZIR's offset-based encoding.
+Should exit 0 with no output.
diff --git a/stage0/sema.zig b/stage0/sema.zig
@@ -0,0 +1,142 @@
+// sema.zig — Run the real Zig sema pipeline via Compilation.
+// Used by stages_test.zig to produce reference sema output.
+
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const zig_internals = @import("zig_internals");
+const Compilation = zig_internals.Compilation;
+const Package = zig_internals.Package;
+
+/// Result of running the real Zig sema pipeline via Compilation.
+/// Owns the Compilation, Directories, thread pool, temp dir, and arena.
+pub const ZigSemaResult = struct {
+ comp: *Compilation,
+ dirs: Compilation.Directories,
+ tmp_dir: std.testing.TmpDir,
+ arena_state: std.heap.ArenaAllocator,
+ thread_pool: std.Thread.Pool,
+
+ pub fn deinit(self: *ZigSemaResult) void {
+ self.comp.destroy();
+ self.dirs.deinit();
+ self.thread_pool.deinit();
+ self.tmp_dir.cleanup();
+ self.arena_state.deinit();
+ }
+};
+
+/// Run the real Zig sema pipeline on the source file at `src_path`.
+/// `src_path` is relative to cwd (the repo root), e.g. "lib/std/crypto/codecs.zig".
+pub fn zigSema(gpa: Allocator, src_path: []const u8) !ZigSemaResult {
+ var arena_state = std.heap.ArenaAllocator.init(gpa);
+ errdefer arena_state.deinit();
+ const arena = arena_state.allocator();
+
+ // Set up temp dir for Compilation cache.
+ var tmp_dir = std.testing.tmpDir(.{});
+ errdefer tmp_dir.cleanup();
+
+ // Resolve paths.
+ const cwd_path = try std.fs.cwd().realpathAlloc(arena, ".");
+ const zig_lib_path = try std.fs.path.join(arena, &.{ cwd_path, "lib" });
+ const zig_lib_handle = try std.fs.cwd().openDir("lib", .{});
+ const tmp_path = try tmp_dir.dir.realpathAlloc(arena, ".");
+ const cache_path = try std.fmt.allocPrint(arena, "{s}/.cache", .{tmp_path});
+ try tmp_dir.dir.makeDir(".cache");
+ const cache_handle = try tmp_dir.dir.openDir(".cache", .{});
+
+ var dirs = Compilation.Directories{
+ .cwd = cwd_path,
+ .zig_lib = .{ .path = zig_lib_path, .handle = zig_lib_handle },
+ .global_cache = .{ .path = cache_path, .handle = cache_handle },
+ .local_cache = .{ .path = cache_path, .handle = cache_handle },
+ };
+ errdefer dirs.deinit();
+
+ // Hardcode x86_64-linux-musl target.
+ const resolved_target: Package.Module.ResolvedTarget = .{
+ .result = try std.zig.system.resolveTargetQuery(.{
+ .cpu_arch = .x86_64,
+ .os_tag = .linux,
+ .abi = .musl,
+ }),
+ .is_native_os = false,
+ .is_native_abi = false,
+ .is_explicit_dynamic_linker = false,
+ };
+
+ const config = try Compilation.Config.resolve(.{
+ .output_mode = .Obj,
+ .resolved_target = resolved_target,
+ .have_zcu = true,
+ .emit_bin = false,
+ .is_test = false,
+ });
+
+ // Split src_path into directory and filename for the Module.
+ // Use .root = .none with absolute path to avoid the source being
+ // associated with zig_lib (which would conflict with the std module
+ // when compiling files under lib/).
+ const src_dir = std.fs.path.dirname(src_path) orelse ".";
+ const src_basename = std.fs.path.basename(src_path);
+ const abs_src_dir = try std.fs.cwd().realpathAlloc(arena, src_dir);
+ const root_path: Compilation.Path = .{ .root = .none, .sub_path = abs_src_dir };
+
+ const root_mod = try Package.Module.create(arena, .{
+ .paths = .{
+ .root = root_path,
+ .root_src_path = src_basename,
+ },
+ .fully_qualified_name = "root",
+ .cc_argv = &.{},
+ .inherited = .{
+ .resolved_target = resolved_target,
+ },
+ .global = config,
+ .parent = null,
+ });
+
+ var thread_pool: std.Thread.Pool = undefined;
+ try thread_pool.init(.{
+ .allocator = gpa,
+ .n_jobs = 1,
+ .track_ids = true,
+ .stack_size = 60 << 20,
+ });
+ errdefer thread_pool.deinit();
+
+ var create_diag: Compilation.CreateDiagnostic = undefined;
+ const comp = Compilation.create(gpa, arena, &create_diag, .{
+ .dirs = dirs,
+ .root_name = "test",
+ .config = config,
+ .root_mod = root_mod,
+ .emit_bin = .no,
+ .thread_pool = &thread_pool,
+ .cache_mode = .whole,
+ }) catch |err| switch (err) {
+ error.CreateFail => {
+ std.debug.print("Compilation.create failed: {any}\n", .{create_diag});
+ return error.ZigSemaFailed;
+ },
+ else => return err,
+ };
+ errdefer comp.destroy();
+
+ try comp.update(std.Progress.Node.none);
+
+ var error_bundle = try comp.getAllErrorsAlloc();
+ defer error_bundle.deinit(gpa);
+ if (error_bundle.errorMessageCount() > 0) {
+ error_bundle.renderToStdErr(.{ .ttyconf = .no_color });
+ return error.ZigSemaFailed;
+ }
+
+ return .{
+ .comp = comp,
+ .dirs = dirs,
+ .tmp_dir = tmp_dir,
+ .arena_state = arena_state,
+ .thread_pool = thread_pool,
+ };
+}
diff --git a/stage0/stages_test.zig b/stage0/stages_test.zig
@@ -7,6 +7,7 @@ const parser_test = @import("parser_test.zig");
const astgen_test = @import("astgen_test.zig");
const sema_test = @import("sema_test.zig");
const sema_c = @import("sema_c.zig");
+const sema = @import("sema.zig");
const c = parser_test.c;
const sc = sema_test.c;
const zig_internals = @import("zig_internals");
@@ -20,7 +21,7 @@ test "stages: corpus" {
const gpa = std.testing.allocator;
const check = Stage.sema;
inline for (corpus_files) |path| {
- stagesCheck(gpa, @embedFile(path), check) catch {
+ stagesCheck(gpa, @embedFile(path), path["../".len..], check) catch {
std.debug.print("FAIL: {s}\n", .{path});
return error.TestFailed;
};
@@ -31,7 +32,7 @@ test "stages: corpus" {
}
}
-fn stagesCheck(gpa: Allocator, source: [:0]const u8, check: Stage) !void {
+fn stagesCheck(gpa: Allocator, source: [:0]const u8, src_path: []const u8, check: Stage) !void {
// Parse once with C parser
var c_ast = c.astParse(source.ptr, @intCast(source.len));
defer c.astDeinit(&c_ast);
@@ -73,10 +74,9 @@ fn stagesCheck(gpa: Allocator, source: [:0]const u8, check: Stage) !void {
// Verify C→Zig Air conversion is faithful (tags, data, extra).
try sema_c.expectEqualAir(result.air(), result.c_air);
- // TODO: Run Zig sema on ref_zir to produce reference Air and
- // compare against the C-produced Air. This requires a full
- // Compilation context (Zcu, InternPool, Package.Module, etc.)
- // which is not yet set up for unit tests.
+ // Run Zig sema on the same source and verify it succeeds.
+ var zig_result = try sema.zigSema(gpa, src_path);
+ defer zig_result.deinit();
}
}