commit 0b041e0c70bc7ce4e1249018216f6d49b7909483 (tree)
parent 9d438d5674c2630a6d61024c51b17094fe5bf82f
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Tue, 17 Feb 2026 19:42:24 +0000
stage0: add Sema data structures (Phase A)
Create header files and minimal .c stubs for the Sema pipeline:
- intern_pool.h/c: InternPool types, pre-interned indices (0-123),
SimpleType/SimpleValue enums, Key tagged union, init/deinit
- air.h/c: AIR instruction tags (X-macro), Ref encoding (MSB tag),
InstData union, extra payload structs, init/deinit
- type.h/c: TypeIndex typedef, stub query functions
- value.h/c: ValueIndex typedef, conversion functions
- sema.h/c: Sema/Block/InstMap/Merges structs, init/deinit/analyze stubs
Wire up build.zig and integrate sema step into zig0.c pipeline.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
13 files changed, 1613 insertions(+), 4 deletions(-)
diff --git a/build.zig b/build.zig
@@ -10,8 +10,8 @@ const assert = std.debug.assert;
const DevEnv = @import("src/dev.zig").Env;
const ValueInterpretMode = enum { direct, by_name };
-const zig0_headers = &[_][]const u8{ "common.h", "ast.h", "parser.h", "zir.h", "astgen.h" };
-const zig0_c_lib_files = &[_][]const u8{ "tokenizer.c", "ast.c", "zig0.c", "parser.c", "zir.c", "astgen.c" };
+const zig0_headers = &[_][]const u8{ "common.h", "ast.h", "parser.h", "zir.h", "astgen.h", "intern_pool.h", "air.h", "type.h", "value.h", "sema.h" };
+const zig0_c_lib_files = &[_][]const u8{ "tokenizer.c", "ast.c", "zig0.c", "parser.c", "zir.c", "astgen.c", "intern_pool.c", "air.c", "type.c", "value.c", "sema.c" };
const zig0_all_c_files = zig0_c_lib_files ++ &[_][]const u8{"main.c"};
const zig0_cflags = &[_][]const u8{
"-std=c11",
diff --git a/stage0/.claude/sema-plan.md b/stage0/.claude/sema-plan.md
@@ -0,0 +1,541 @@
+# Sema Porting Plan for zig/stage0
+
+## Context
+
+The stage0 bootstrap tool currently has tokenizer, parser, and AstGen fully ported to C.
+The next major milestone is Sema (semantic analysis), which transforms ZIR into AIR.
+Before Sema functions can be ported, the data structures it depends on must exist in C.
+
+This plan covers:
+1. Identifying Sema's dependencies and their porting order
+2. Defining the C data structures (header files)
+3. Designing a test framework modeled on astgen_test.zig
+
+Scope decisions:
+- **InternPool**: Simplified single-threaded (no sharding/deps/incremental)
+- **Sema**: Incremental subset-first approach, test-driven
+- **Compilation context**: Minimal bootstrap `SemaCtx` (not full Zcu)
+
+---
+
+## 1. Dependency Graph and Porting Order
+
+```
+ ┌──────────┐
+ │ sema.h │ (Sema context, Block, InstMap)
+ └────┬─────┘
+ │ depends on
+ ┌─────────────┼─────────────┐
+ │ │ │
+ ┌─────▼────┐ ┌─────▼────┐ ┌────▼─────┐
+ │ air.h │ │ type.h │ │ value.h │
+ └─────┬────┘ └─────┬────┘ └────┬─────┘
+ │ │ │
+ └─────────────┼─────────────┘
+ │ all depend on
+ ┌───────▼────────┐
+ │ intern_pool.h │
+ └───────┬────────┘
+ │ depends on
+ ┌───────▼────────┐
+ │ common.h │ (already exists)
+ │ zir.h │ (already exists)
+ └────────────────┘
+```
+
+**Porting order (bottom-up):**
+
+| Phase | File | Upstream Source | Estimated C lines |
+|-------|------|---------------|-------------------|
+| 1 | `intern_pool.h` | `src/InternPool.zig` | ~800 |
+| 2 | `air.h` | `src/Air.zig` | ~600 |
+| 3 | `type.h` / `value.h` | `src/Type.zig`, `src/Value.zig` | ~200 |
+| 4 | `sema.h` | `src/Sema.zig` (struct defs only) | ~300 |
+| 5 | `sema_test.zig` | new (modeled on `astgen_test.zig`) | ~400 |
+
+---
+
+## 2. Data Structure Definitions
+
+### 2.1 `intern_pool.h` — Simplified Single-Threaded InternPool
+
+**Upstream**: `src/InternPool.zig` (~13K lines)
+**Scope**: Type/value interning and lookup only. No threading, no dependency tracking, no incremental compilation.
+
+```
+Key types to define:
+
+InternPoolIndex (uint32_t)
+ - Pre-interned constants matching Zig's InternPool.Index enum:
+ IP_INDEX_U0_TYPE, IP_INDEX_U8_TYPE, IP_INDEX_I32_TYPE, ...
+ IP_INDEX_BOOL_TYPE, IP_INDEX_VOID_TYPE, IP_INDEX_TYPE_TYPE, ...
+ IP_INDEX_UNDEF, IP_INDEX_ZERO, IP_INDEX_ONE, IP_INDEX_BOOL_TRUE, ...
+ IP_INDEX_NONE = UINT32_MAX
+ - ~120 pre-interned type indices + ~20 pre-interned value indices
+
+SimpleType enum (matching InternPool.SimpleType):
+ f16, f32, f64, f80, f128, usize, isize, c_char, ...,
+ anyopaque, bool, void, type, anyerror, comptime_int, ...
+
+SimpleValue enum (matching InternPool.SimpleValue):
+ undefined, void_val, null_val, empty_tuple, true_val, false_val, unreachable_val
+
+IntType struct:
+ uint16_t bits;
+ uint8_t signedness; // 0=unsigned, 1=signed
+
+PtrType struct:
+ InternPoolIndex child;
+ InternPoolIndex sentinel; // IP_INDEX_NONE if absent
+ uint32_t flags; // packed: size(2), alignment(6), is_const(1), is_volatile(1), is_allowzero(1), address_space(5), vector_index(16)
+ uint32_t packed_offset; // packed: host_size(16), bit_offset(16)
+
+ArrayType struct:
+ uint64_t len;
+ InternPoolIndex child;
+ InternPoolIndex sentinel; // IP_INDEX_NONE if absent
+
+VectorType struct:
+ uint32_t len;
+ InternPoolIndex child;
+
+ErrorUnionType struct:
+ InternPoolIndex error_set;
+ InternPoolIndex payload;
+
+FuncType struct:
+ InternPoolIndex* param_types;
+ uint32_t param_count;
+ InternPoolIndex return_type;
+ uint32_t comptime_bits;
+ uint32_t noalias_bits;
+ uint8_t cc; // calling convention
+ bool is_var_args;
+ bool is_generic;
+ bool is_noinline;
+
+ErrorSetType struct:
+ uint32_t* names; // NullTerminatedString indices
+ uint32_t names_count;
+
+NamespaceType struct (for struct/union/enum/opaque):
+ uint32_t zir_index; // TrackedInst.Index equivalent
+ uint32_t type_hash; // for reified types
+
+InternPoolKeyTag enum:
+ IP_KEY_INT_TYPE, IP_KEY_PTR_TYPE, IP_KEY_ARRAY_TYPE,
+ IP_KEY_VECTOR_TYPE, IP_KEY_OPT_TYPE, IP_KEY_ANYFRAME_TYPE,
+ IP_KEY_ERROR_UNION_TYPE, IP_KEY_SIMPLE_TYPE, IP_KEY_STRUCT_TYPE,
+ IP_KEY_TUPLE_TYPE, IP_KEY_UNION_TYPE, IP_KEY_OPAQUE_TYPE,
+ IP_KEY_ENUM_TYPE, IP_KEY_FUNC_TYPE, IP_KEY_ERROR_SET_TYPE,
+ IP_KEY_INFERRED_ERROR_SET_TYPE,
+ IP_KEY_UNDEF, IP_KEY_SIMPLE_VALUE, IP_KEY_VARIABLE,
+ IP_KEY_EXTERN, IP_KEY_FUNC, IP_KEY_INT, IP_KEY_ERR,
+ IP_KEY_ERROR_UNION, IP_KEY_ENUM_LITERAL, IP_KEY_ENUM_TAG,
+ IP_KEY_FLOAT, IP_KEY_PTR, IP_KEY_SLICE, IP_KEY_OPT,
+ IP_KEY_AGGREGATE, IP_KEY_UNION_VALUE
+
+InternPoolKey struct (tagged union):
+ InternPoolKeyTag tag;
+ union { IntType int_type; PtrType ptr_type; ... } data;
+
+InternPool struct:
+ InternPoolKey* items; // indexed by InternPoolIndex
+ uint32_t items_len;
+ uint32_t items_cap;
+ // Hash table for deduplication:
+ uint32_t* hash_table; // maps hash → index
+ uint32_t hash_cap;
+ // String storage:
+ uint8_t* string_bytes;
+ uint32_t string_bytes_len;
+ uint32_t string_bytes_cap;
+ // Global error set:
+ uint32_t* error_names;
+ uint32_t error_names_len;
+
+Functions:
+ InternPool ipInit(void);
+ void ipDeinit(InternPool*);
+ InternPoolIndex ipIntern(InternPool*, InternPoolKey);
+ InternPoolKey ipIndexToKey(InternPool*, InternPoolIndex);
+ InternPoolIndex ipTypeOf(InternPool*, InternPoolIndex);
+```
+
+### 2.2 `air.h` — Analyzed Intermediate Representation
+
+**Upstream**: `src/Air.zig` (~2,172 lines)
+
+```
+Key types to define:
+
+AirInstTag enum (uint8_t, ~204 tags):
+ AIR_INST_ARG, AIR_INST_ADD, AIR_INST_ADD_SAFE, AIR_INST_ADD_OPTIMIZED,
+ AIR_INST_ADD_WRAP, AIR_INST_ADD_SAT, AIR_INST_SUB, ...
+ AIR_INST_BLOCK, AIR_INST_LOOP, AIR_INST_BR, AIR_INST_CALL, ...
+ (use X-macro pattern like ZIR_INST_FOREACH_TAG)
+
+AirInstRef (uint32_t):
+ Uses MSB (bit 31) as tag bit:
+ - Bit 31 = 0: value is an InternPool index (lower 31 bits)
+ - Bit 31 = 1: value is an AIR instruction index (lower 31 bits)
+ - AIR_REF_NONE = UINT32_MAX (special sentinel)
+ Helper macros:
+ #define AIR_REF_NONE UINT32_MAX
+ #define AIR_REF_IS_INST(r) (((r) >> 31) != 0 && (r) != AIR_REF_NONE)
+ #define AIR_REF_IS_IP(r) (((r) >> 31) == 0)
+ #define AIR_REF_TO_INST(r) ((r) & 0x7FFFFFFF)
+ #define AIR_REF_TO_IP(r) ((r) & 0x7FFFFFFF)
+ #define AIR_REF_FROM_INST(i) ((i) | 0x80000000)
+ #define AIR_REF_FROM_IP(i) (i)
+
+AirInstData union (8 bytes, matching Air.Inst.Data):
+ struct { AirInstRef operand; } un_op;
+ struct { AirInstRef lhs, rhs; } bin_op;
+ struct { AirInstRef ty; } ty;
+ struct { AirInstRef ty; uint32_t zir_param_index; } arg;
+ struct { AirInstRef ty; AirInstRef operand; } ty_op;
+ struct { AirInstRef ty; uint32_t payload; } ty_pl;
+ struct { uint32_t block_inst; AirInstRef operand; } br;
+ struct { AirInstRef operand; uint32_t payload; } pl_op;
+ struct { uint32_t line; uint32_t column; } dbg_stmt;
+ ... (all variants from Air.Inst.Data)
+
+Air struct:
+ uint32_t inst_len;
+ uint32_t inst_cap;
+ AirInstTag* inst_tags;
+ AirInstData* inst_datas;
+ uint32_t extra_len;
+ uint32_t extra_cap;
+ uint32_t* extra;
+
+Extra payload structs:
+ AirBlock, AirCall, AirCondBr, AirSwitchBr, AirTry, AirBin, etc.
+```
+
+### 2.3 `type.h` / `value.h` — Thin Wrappers
+
+**Upstream**: `src/Type.zig` (~4,179 lines), `src/Value.zig` (~3,288 lines)
+
+In C, Type and Value are just `InternPoolIndex` with helper functions.
+These start minimal and grow as Sema functions need them.
+
+```
+type.h:
+ typedef InternPoolIndex TypeIndex;
+ #define TYPE_NONE IP_INDEX_NONE
+
+ // Functions ported on-demand as Sema needs them:
+ uint32_t typeZigTypeTag(InternPool*, TypeIndex);
+ TypeIndex typeChildType(InternPool*, TypeIndex);
+ TypeIndex typeElemType(InternPool*, TypeIndex);
+ bool typeIsSlice(InternPool*, TypeIndex);
+ bool typeIsCPtr(InternPool*, TypeIndex);
+ uint64_t typeArrayLen(InternPool*, TypeIndex);
+ PtrType typePtrInfo(InternPool*, TypeIndex);
+
+value.h:
+ typedef InternPoolIndex ValueIndex;
+ #define VALUE_NONE IP_INDEX_NONE
+
+ TypeIndex valueToType(ValueIndex);
+ ValueIndex valueFromInterned(InternPoolIndex);
+ uint64_t valueToUnsignedInt(InternPool*, ValueIndex);
+```
+
+### 2.4 `sema.h` — Sema Context and Block
+
+**Upstream**: `src/Sema.zig` (lines 41-500)
+
+```
+RuntimeIndex (uint32_t):
+ #define RUNTIME_INDEX_ZERO 0
+ #define RUNTIME_INDEX_COMPTIME_FIELD_PTR UINT32_MAX
+
+InstMap struct:
+ AirInstRef* items;
+ uint32_t start; // ZIR instruction index offset
+ uint32_t len;
+
+SemaBlock struct:
+ SemaBlock* parent;
+ Sema* sema;
+ uint32_t namespace; // NamespaceIndex
+ uint32_t* instructions; // AIR instruction indices
+ uint32_t instructions_len;
+ uint32_t instructions_cap;
+ SemaBlockLabel* label;
+ SemaBlockInlining* inlining;
+ int32_t runtime_cond; // LazySrcLoc or -1
+ int32_t runtime_loop; // LazySrcLoc or -1
+ uint32_t runtime_index;
+ uint32_t comptime_reason; // 0 = not comptime
+ bool is_typeof;
+ AirInstRef error_return_trace_index;
+ bool want_safety; // nullable via flag
+ bool want_safety_set;
+ uint8_t float_mode; // strict=0, optimized=1
+ uint32_t src_base_inst;
+ uint32_t type_name_ctx; // NullTerminatedString
+
+SemaBlockLabel struct:
+ uint32_t zir_block; // Zir.Inst.Index
+ SemaBlockMerges merges;
+
+SemaBlockInlining struct:
+ SemaBlock* call_block;
+ int32_t call_src;
+ InternPoolIndex func;
+ bool is_generic_instantiation;
+ bool has_comptime_args;
+ AirInstRef comptime_result;
+ SemaBlockMerges merges;
+
+SemaBlockMerges struct:
+ uint32_t block_inst; // Air.Inst.Index
+ AirInstRef* results;
+ uint32_t results_len, results_cap;
+ uint32_t* br_list;
+ uint32_t br_list_len, br_list_cap;
+
+InferredErrorSet struct:
+ InternPoolIndex func;
+ uint32_t* error_names; // NullTerminatedString indices
+ uint32_t error_names_len, error_names_cap;
+ InternPoolIndex* inferred_sets;
+ uint32_t inferred_sets_len, inferred_sets_cap;
+ InternPoolIndex resolved; // IP_INDEX_NONE until resolved
+
+MaybeComptimeAlloc struct:
+ uint32_t runtime_index;
+ uint32_t* store_insts;
+ uint32_t stores_len, stores_cap;
+
+ComptimeAlloc struct:
+ InternPoolIndex val;
+ bool is_const;
+ int32_t src; // LazySrcLoc
+ uint8_t alignment;
+ uint32_t runtime_index;
+
+Sema struct:
+ InternPool* ip; // shared intern pool
+ Air air; // output AIR being built
+ Zir code; // input ZIR being analyzed
+ InstMap inst_map; // ZIR→AIR mapping
+ InternPoolIndex owner; // AnalUnit
+ InternPoolIndex func_index;
+ bool func_is_naked;
+ TypeIndex fn_ret_ty;
+ InferredErrorSet* fn_ret_ty_ies; // NULL if not inferred
+ uint32_t branch_quota;
+ uint32_t branch_count;
+ uint32_t comptime_break_inst;
+ // Hash maps (simplified as arrays for bootstrap):
+ ComptimeAlloc* comptime_allocs;
+ uint32_t comptime_allocs_len, comptime_allocs_cap;
+ bool allow_memoize;
+ uint8_t branch_hint; // 0=none
+ bool has_compile_errors;
+ char err_buf[ERR_BUF_SIZE];
+
+Functions (stubs, filled incrementally):
+ Sema semaInit(InternPool*, Zir);
+ void semaDeinit(Sema*);
+ void semaAnalyzeFnBody(Sema*, SemaBlock*, uint32_t* body, uint32_t body_len);
+ AirInstRef semaResolveInst(Sema*, uint32_t zir_ref);
+```
+
+---
+
+## 3. Test Framework: `sema_test.zig`
+
+**Modeled on**: `astgen_test.zig` and `stages_test.zig`
+
+### 3.1 Architecture
+
+```
+Source code ([:0]const u8)
+ │
+ ├─── C path ───────────────────────────────────┐
+ │ c.astParse() → C AST │
+ │ c.astGen() → C ZIR │
+ │ c.semaAnalyze() → C AIR │
+ │ │
+ ├─── Zig reference path ───────────────────────┤
+ │ zigAst() → Zig AST │
+ │ AstGen.generate() → Zig ZIR │
+ │ setupSema() + analyzeFnBody() → Zig AIR │
+ │ │
+ └─── Compare ──────────────────────────────────┘
+ expectEqualAir(zig_air, c_air)
+```
+
+### 3.2 Key Test Functions
+
+```zig
+// sema_test.zig
+
+const c = parser_test.c;
+
+/// Set up a minimal Zig Sema environment and analyze a module's ZIR.
+/// Returns the AIR for the top-level declarations.
+fn refAir(gpa: Allocator, source: [:0]const u8) !Air {
+ // 1. Parse and generate ZIR via Zig stdlib
+ var tree = try std.zig.Ast.parse(gpa, source, .zig);
+ defer tree.deinit(gpa);
+ var zir = try std.zig.AstGen.generate(gpa, tree);
+ defer zir.deinit(gpa);
+ // 2. Set up minimal compilation context
+ // 3. Run Sema
+ // 4. Return AIR
+}
+
+/// Compare two AIR outputs instruction-by-instruction.
+fn expectEqualAir(gpa: Allocator, ref: Air, got_tags: []AirInstTag,
+ got_datas: []AirInstData, got_extra: []u32) !void {
+ // Step 1: Compare instruction count
+ // Step 2: Compare instruction tags
+ // Step 3: Compare instruction data field-by-field (like expectEqualData in astgen_test)
+ // Step 4: Compare extra data
+}
+
+/// Corpus test: run Sema on real Zig files
+test "sema: corpus" {
+ inline for (corpus_files) |path| {
+ semaCheck(gpa, @embedFile(path)) catch {
+ std.debug.print("FAIL: {s}\n", .{path});
+ return error.TestFailed;
+ };
+ }
+}
+```
+
+### 3.3 Bridge Extension
+
+The `zig0_bridge.zig` needs a new export for Sema:
+
+```zig
+// New C API function exposed via zig0_bridge.zig:
+pub export fn semaAnalyze(zir: *c.Zir) c.Air { ... }
+```
+
+And the corresponding C function in a new `sema.c`:
+
+```c
+// sema.c — Sema entry point
+Air semaAnalyze(const Zir* zir, const Ast* ast) {
+ Sema sema = semaInit(&global_ip, *zir);
+ // ... set up root block, analyze body ...
+ semaDeinit(&sema); // frees internal state, AIR is returned
+ return sema.air;
+}
+```
+
+### 3.4 Test Progression Strategy
+
+Following the astgen porting pattern:
+
+1. **Start with trivial cases**: empty source, single const decl, simple expressions
+2. **Use `SkipZigTest` / commented corpus entries** to gate unimplemented features
+3. **Orchestrator skill** (`.claude/skills/port-sema/`) drives incremental enabling
+4. **Each ZIR instruction handler** is added as tests require it
+
+Initial test cases (in order):
+```
+"" // empty module
+"const x = 0;" // const declaration, int literal
+"const x = 1 + 2;" // binary arithmetic (comptime)
+"fn foo() void {}" // empty function
+"fn foo(x: u32) u32 { return x; }" // function with param and return
+"const S = struct { x: u32 };" // struct declaration
+"test \"t\" { _ = 1; }" // test declaration
+```
+
+---
+
+## 4. Files to Create
+
+| File | Purpose | Upstream Reference |
+|------|---------|-------------------|
+| `stage0/intern_pool.h` | InternPool data structures | `src/InternPool.zig` |
+| `stage0/intern_pool.c` | InternPool init/deinit/intern/lookup | `src/InternPool.zig` |
+| `stage0/air.h` | AIR instruction tags, data, ref types | `src/Air.zig` |
+| `stage0/air.c` | Air deinit | `src/Air.zig` |
+| `stage0/type.h` | Type helper function declarations | `src/Type.zig` |
+| `stage0/type.c` | Type helper implementations | `src/Type.zig` |
+| `stage0/value.h` | Value helper function declarations | `src/Value.zig` |
+| `stage0/value.c` | Value helper implementations | `src/Value.zig` |
+| `stage0/sema.h` | Sema, Block, InstMap structures | `src/Sema.zig` |
+| `stage0/sema.c` | Sema analysis functions | `src/Sema.zig` |
+| `stage0/sema_test.zig` | Test framework | new (modeled on `astgen_test.zig`) |
+
+### Files to Modify
+
+| File | Change |
+|------|--------|
+| `stage0/zig0.c` | Add sema step after astgen |
+| `stage0/zig0_bridge.zig` | Add sema C→Zig bridge functions |
+| `stage0/common.h` | Possibly add new macros (hash table, etc.) |
+| `stage0/build.zig` (parent) | Add sema.c to compilation, sema_test.zig to tests |
+
+---
+
+## 5. Implementation Phases
+
+### Phase A: Data structures only (this plan)
+- Create all `.h` files with struct/enum/union definitions
+- Create minimal `.c` files (init/deinit only)
+- No Sema logic yet
+
+### Phase B: InternPool core
+- Implement `ipInit`, `ipDeinit`, `ipIntern`, `ipIndexToKey`
+- Pre-populate with all ~140 pre-interned types/values
+- Hash table for deduplication
+
+### Phase C: Test framework skeleton
+- Create `sema_test.zig` with infrastructure
+- First test: empty source through Sema (C path stubs → compare)
+- Wire up build system
+
+### Phase D: First Sema instruction handlers (incremental)
+- Start with declaration/block/return instructions
+- Each handler follows the upstream Sema.zig mechanically
+- Enable test cases one at a time (astgen porting pattern)
+
+---
+
+## 6. Verification
+
+```bash
+# After Phase A (data structures):
+./zig3 build fmt-zig0 # C formatting check
+./zig3 build test-zig0 -Dzig0-cc=tcc # Existing tests still pass
+
+# After Phase C (test framework):
+./zig3 build test-zig0 # New sema tests run (trivial cases)
+
+# After Phase D (incremental handlers):
+./zig3 build all-zig0 -Dvalgrind # Full suite including sema tests
+```
+
+---
+
+## 7. Key Design Decisions
+
+1. **InternPool hash table**: Use open addressing with linear probing. Keys are hashed by their tag + fields. Simple and cache-friendly.
+
+2. **Memory management**: Follow existing pattern — `calloc`/`realloc` with `exit(1)` on failure. InternPool owns all interned data.
+
+3. **String interning**: Reuse the same `string_bytes` pattern from ZIR (packed bytes with length-prefixed or null-terminated strings). InternPool gets its own string table separate from ZIR's.
+
+4. **Type/Value as thin wrappers**: In C, `TypeIndex` and `ValueIndex` are just `typedef InternPoolIndex`. Helper functions take `InternPool*` as first arg. This avoids the method-on-struct pattern that's natural in Zig but awkward in C.
+
+5. **No Zcu**: Replace with `SemaCtx` embedded in `Sema` struct. Namespaces are simplified arrays. Error reporting reuses `SET_ERROR` macro.
+
+6. **AIR Ref encoding**: Different from ZIR. AIR uses the MSB (bit 31) as a tag:
+ - Bit 31 = 0 → InternPool index (lower 31 bits)
+ - Bit 31 = 1 → AIR instruction index (lower 31 bits)
+ - `NONE` = UINT32_MAX (all bits set)
+ This is cleaner than ZIR's offset-based encoding.
diff --git a/stage0/air.c b/stage0/air.c
@@ -0,0 +1,15 @@
+#include "air.h"
+#include <stdlib.h>
+
+void airDeinit(Air* air) {
+ free(air->inst_tags);
+ free(air->inst_datas);
+ free(air->extra);
+ air->inst_tags = NULL;
+ air->inst_datas = NULL;
+ air->extra = NULL;
+ air->inst_len = 0;
+ air->inst_cap = 0;
+ air->extra_len = 0;
+ air->extra_cap = 0;
+}
diff --git a/stage0/air.h b/stage0/air.h
@@ -0,0 +1,355 @@
+// air.h — Analyzed Intermediate Representation, ported from src/Air.zig.
+#ifndef _ZIG0_AIR_H__
+#define _ZIG0_AIR_H__
+
+#include "common.h"
+#include "intern_pool.h"
+#include <stdbool.h>
+#include <stdint.h>
+
+// --- AIR instruction tags (uint8_t) ---
+// Matches Air.Inst.Tag enum order from Air.zig.
+// Uses X-macro pattern like ZIR.
+
+#define AIR_INST_FOREACH_TAG(TAG) \
+ TAG(AIR_INST_ARG) \
+ TAG(AIR_INST_ADD) \
+ TAG(AIR_INST_ADD_SAFE) \
+ TAG(AIR_INST_ADD_OPTIMIZED) \
+ TAG(AIR_INST_ADD_WRAP) \
+ TAG(AIR_INST_ADD_SAT) \
+ TAG(AIR_INST_SUB) \
+ TAG(AIR_INST_SUB_SAFE) \
+ TAG(AIR_INST_SUB_OPTIMIZED) \
+ TAG(AIR_INST_SUB_WRAP) \
+ TAG(AIR_INST_SUB_SAT) \
+ TAG(AIR_INST_MUL) \
+ TAG(AIR_INST_MUL_SAFE) \
+ TAG(AIR_INST_MUL_OPTIMIZED) \
+ TAG(AIR_INST_MUL_WRAP) \
+ TAG(AIR_INST_MUL_SAT) \
+ TAG(AIR_INST_DIV_FLOAT) \
+ TAG(AIR_INST_DIV_FLOAT_OPTIMIZED) \
+ TAG(AIR_INST_DIV_TRUNC) \
+ TAG(AIR_INST_DIV_TRUNC_OPTIMIZED) \
+ TAG(AIR_INST_DIV_FLOOR) \
+ TAG(AIR_INST_DIV_FLOOR_OPTIMIZED) \
+ TAG(AIR_INST_DIV_EXACT) \
+ TAG(AIR_INST_DIV_EXACT_OPTIMIZED) \
+ TAG(AIR_INST_REM) \
+ TAG(AIR_INST_REM_OPTIMIZED) \
+ TAG(AIR_INST_MOD) \
+ TAG(AIR_INST_MOD_OPTIMIZED) \
+ TAG(AIR_INST_PTR_ADD) \
+ TAG(AIR_INST_PTR_SUB) \
+ TAG(AIR_INST_MAX) \
+ TAG(AIR_INST_MIN) \
+ TAG(AIR_INST_ADD_WITH_OVERFLOW) \
+ TAG(AIR_INST_SUB_WITH_OVERFLOW) \
+ TAG(AIR_INST_MUL_WITH_OVERFLOW) \
+ TAG(AIR_INST_SHL_WITH_OVERFLOW) \
+ TAG(AIR_INST_ALLOC) \
+ TAG(AIR_INST_INFERRED_ALLOC) \
+ TAG(AIR_INST_INFERRED_ALLOC_COMPTIME) \
+ TAG(AIR_INST_RET_PTR) \
+ TAG(AIR_INST_ASSEMBLY) \
+ TAG(AIR_INST_BIT_AND) \
+ TAG(AIR_INST_BIT_OR) \
+ TAG(AIR_INST_SHR) \
+ TAG(AIR_INST_SHR_EXACT) \
+ TAG(AIR_INST_SHL) \
+ TAG(AIR_INST_SHL_EXACT) \
+ TAG(AIR_INST_SHL_SAT) \
+ TAG(AIR_INST_XOR) \
+ TAG(AIR_INST_NOT) \
+ TAG(AIR_INST_BITCAST) \
+ TAG(AIR_INST_BLOCK) \
+ TAG(AIR_INST_LOOP) \
+ TAG(AIR_INST_REPEAT) \
+ TAG(AIR_INST_BR) \
+ TAG(AIR_INST_TRAP) \
+ TAG(AIR_INST_BREAKPOINT) \
+ TAG(AIR_INST_RET_ADDR) \
+ TAG(AIR_INST_FRAME_ADDR) \
+ TAG(AIR_INST_CALL) \
+ TAG(AIR_INST_CALL_ALWAYS_TAIL) \
+ TAG(AIR_INST_CALL_NEVER_TAIL) \
+ TAG(AIR_INST_CALL_NEVER_INLINE) \
+ TAG(AIR_INST_CLZ) \
+ TAG(AIR_INST_CTZ) \
+ TAG(AIR_INST_POPCOUNT) \
+ TAG(AIR_INST_BYTE_SWAP) \
+ TAG(AIR_INST_BIT_REVERSE) \
+ TAG(AIR_INST_SQRT) \
+ TAG(AIR_INST_SIN) \
+ TAG(AIR_INST_COS) \
+ TAG(AIR_INST_TAN) \
+ TAG(AIR_INST_EXP) \
+ TAG(AIR_INST_EXP2) \
+ TAG(AIR_INST_LOG) \
+ TAG(AIR_INST_LOG2) \
+ TAG(AIR_INST_LOG10) \
+ TAG(AIR_INST_ABS) \
+ TAG(AIR_INST_FLOOR) \
+ TAG(AIR_INST_CEIL) \
+ TAG(AIR_INST_ROUND) \
+ TAG(AIR_INST_TRUNC_FLOAT) \
+ TAG(AIR_INST_NEG) \
+ TAG(AIR_INST_NEG_OPTIMIZED) \
+ TAG(AIR_INST_CMP_LT) \
+ TAG(AIR_INST_CMP_LT_OPTIMIZED) \
+ TAG(AIR_INST_CMP_LTE) \
+ TAG(AIR_INST_CMP_LTE_OPTIMIZED) \
+ TAG(AIR_INST_CMP_EQ) \
+ TAG(AIR_INST_CMP_EQ_OPTIMIZED) \
+ TAG(AIR_INST_CMP_GTE) \
+ TAG(AIR_INST_CMP_GTE_OPTIMIZED) \
+ TAG(AIR_INST_CMP_GT) \
+ TAG(AIR_INST_CMP_GT_OPTIMIZED) \
+ TAG(AIR_INST_CMP_NEQ) \
+ TAG(AIR_INST_CMP_NEQ_OPTIMIZED) \
+ TAG(AIR_INST_CMP_VECTOR) \
+ TAG(AIR_INST_CMP_VECTOR_OPTIMIZED) \
+ TAG(AIR_INST_COND_BR) \
+ TAG(AIR_INST_SWITCH_BR) \
+ TAG(AIR_INST_LOOP_SWITCH_BR) \
+ TAG(AIR_INST_SWITCH_DISPATCH) \
+ TAG(AIR_INST_TRY) \
+ TAG(AIR_INST_TRY_COLD) \
+ TAG(AIR_INST_TRY_PTR) \
+ TAG(AIR_INST_TRY_PTR_COLD) \
+ TAG(AIR_INST_DBG_STMT) \
+ TAG(AIR_INST_DBG_EMPTY_STMT) \
+ TAG(AIR_INST_DBG_INLINE_BLOCK) \
+ TAG(AIR_INST_DBG_VAR_PTR) \
+ TAG(AIR_INST_DBG_VAR_VAL) \
+ TAG(AIR_INST_DBG_ARG_INLINE) \
+ TAG(AIR_INST_IS_NULL) \
+ TAG(AIR_INST_IS_NON_NULL) \
+ TAG(AIR_INST_IS_NULL_PTR) \
+ TAG(AIR_INST_IS_NON_NULL_PTR) \
+ TAG(AIR_INST_IS_ERR) \
+ TAG(AIR_INST_IS_NON_ERR) \
+ TAG(AIR_INST_IS_ERR_PTR) \
+ TAG(AIR_INST_IS_NON_ERR_PTR) \
+ TAG(AIR_INST_BOOL_AND) \
+ TAG(AIR_INST_BOOL_OR) \
+ TAG(AIR_INST_LOAD) \
+ TAG(AIR_INST_RET) \
+ TAG(AIR_INST_RET_SAFE) \
+ TAG(AIR_INST_RET_LOAD) \
+ TAG(AIR_INST_STORE) \
+ TAG(AIR_INST_STORE_SAFE) \
+ TAG(AIR_INST_UNREACH) \
+ TAG(AIR_INST_FPTRUNC) \
+ TAG(AIR_INST_FPEXT) \
+ TAG(AIR_INST_INTCAST) \
+ TAG(AIR_INST_INTCAST_SAFE) \
+ TAG(AIR_INST_TRUNC) \
+ TAG(AIR_INST_OPTIONAL_PAYLOAD) \
+ TAG(AIR_INST_OPTIONAL_PAYLOAD_PTR) \
+ TAG(AIR_INST_OPTIONAL_PAYLOAD_PTR_SET) \
+ TAG(AIR_INST_WRAP_OPTIONAL) \
+ TAG(AIR_INST_UNWRAP_ERRUNION_PAYLOAD) \
+ TAG(AIR_INST_UNWRAP_ERRUNION_ERR) \
+ TAG(AIR_INST_UNWRAP_ERRUNION_PAYLOAD_PTR) \
+ TAG(AIR_INST_UNWRAP_ERRUNION_ERR_PTR) \
+ TAG(AIR_INST_ERRUNION_PAYLOAD_PTR_SET) \
+ TAG(AIR_INST_WRAP_ERRUNION_PAYLOAD) \
+ TAG(AIR_INST_WRAP_ERRUNION_ERR) \
+ TAG(AIR_INST_STRUCT_FIELD_PTR) \
+ TAG(AIR_INST_STRUCT_FIELD_PTR_INDEX_0) \
+ TAG(AIR_INST_STRUCT_FIELD_PTR_INDEX_1) \
+ TAG(AIR_INST_STRUCT_FIELD_PTR_INDEX_2) \
+ TAG(AIR_INST_STRUCT_FIELD_PTR_INDEX_3) \
+ TAG(AIR_INST_STRUCT_FIELD_VAL) \
+ TAG(AIR_INST_SET_UNION_TAG) \
+ TAG(AIR_INST_GET_UNION_TAG) \
+ TAG(AIR_INST_SLICE) \
+ TAG(AIR_INST_SLICE_LEN) \
+ TAG(AIR_INST_SLICE_PTR) \
+ TAG(AIR_INST_PTR_SLICE_LEN_PTR) \
+ TAG(AIR_INST_PTR_SLICE_PTR_PTR) \
+ TAG(AIR_INST_ARRAY_ELEM_VAL) \
+ TAG(AIR_INST_SLICE_ELEM_VAL) \
+ TAG(AIR_INST_SLICE_ELEM_PTR) \
+ TAG(AIR_INST_PTR_ELEM_VAL) \
+ TAG(AIR_INST_PTR_ELEM_PTR) \
+ TAG(AIR_INST_ARRAY_TO_SLICE) \
+ TAG(AIR_INST_INT_FROM_FLOAT) \
+ TAG(AIR_INST_INT_FROM_FLOAT_OPTIMIZED) \
+ TAG(AIR_INST_INT_FROM_FLOAT_SAFE) \
+ TAG(AIR_INST_INT_FROM_FLOAT_OPTIMIZED_SAFE) \
+ TAG(AIR_INST_FLOAT_FROM_INT) \
+ TAG(AIR_INST_REDUCE) \
+ TAG(AIR_INST_REDUCE_OPTIMIZED) \
+ TAG(AIR_INST_SPLAT) \
+ TAG(AIR_INST_SHUFFLE_ONE) \
+ TAG(AIR_INST_SHUFFLE_TWO) \
+ TAG(AIR_INST_SELECT) \
+ TAG(AIR_INST_MEMSET) \
+ TAG(AIR_INST_MEMSET_SAFE) \
+ TAG(AIR_INST_MEMCPY) \
+ TAG(AIR_INST_MEMMOVE) \
+ TAG(AIR_INST_CMPXCHG_WEAK) \
+ TAG(AIR_INST_CMPXCHG_STRONG) \
+ TAG(AIR_INST_ATOMIC_LOAD) \
+ TAG(AIR_INST_ATOMIC_STORE_UNORDERED) \
+ TAG(AIR_INST_ATOMIC_STORE_MONOTONIC) \
+ TAG(AIR_INST_ATOMIC_STORE_RELEASE) \
+ TAG(AIR_INST_ATOMIC_STORE_SEQ_CST) \
+ TAG(AIR_INST_ATOMIC_RMW) \
+ TAG(AIR_INST_IS_NAMED_ENUM_VALUE) \
+ TAG(AIR_INST_TAG_NAME) \
+ TAG(AIR_INST_ERROR_NAME) \
+ TAG(AIR_INST_ERROR_SET_HAS_VALUE) \
+ TAG(AIR_INST_AGGREGATE_INIT) \
+ TAG(AIR_INST_UNION_INIT) \
+ TAG(AIR_INST_PREFETCH) \
+ TAG(AIR_INST_MUL_ADD) \
+ TAG(AIR_INST_FIELD_PARENT_PTR) \
+ TAG(AIR_INST_WASM_MEMORY_SIZE) \
+ TAG(AIR_INST_WASM_MEMORY_GROW) \
+ TAG(AIR_INST_CMP_LT_ERRORS_LEN) \
+ TAG(AIR_INST_ERR_RETURN_TRACE) \
+ TAG(AIR_INST_SET_ERR_RETURN_TRACE) \
+ TAG(AIR_INST_ADDRSPACE_CAST) \
+ TAG(AIR_INST_SAVE_ERR_RETURN_TRACE_INDEX) \
+ TAG(AIR_INST_VECTOR_STORE_ELEM) \
+ TAG(AIR_INST_RUNTIME_NAV_PTR) \
+ TAG(AIR_INST_C_VA_ARG) \
+ TAG(AIR_INST_C_VA_COPY) \
+ TAG(AIR_INST_C_VA_END) \
+ TAG(AIR_INST_C_VA_START) \
+ TAG(AIR_INST_WORK_ITEM_ID) \
+ TAG(AIR_INST_WORK_GROUP_SIZE) \
+ TAG(AIR_INST_WORK_GROUP_ID)
+
+#define AIR_GENERATE_ENUM(e) e,
+typedef enum { AIR_INST_FOREACH_TAG(AIR_GENERATE_ENUM) } AirInstTag;
+
+// --- AirInstRef ---
+
+typedef uint32_t AirInstRef;
+
+#define AIR_REF_NONE UINT32_MAX
+#define AIR_REF_IS_INST(r) (((r) >> 31) != 0 && (r) != AIR_REF_NONE)
+#define AIR_REF_IS_IP(r) (((r) >> 31) == 0)
+#define AIR_REF_TO_INST(r) ((r) & 0x7FFFFFFFU)
+#define AIR_REF_TO_IP(r) ((r) & 0x7FFFFFFFU)
+#define AIR_REF_FROM_INST(i) ((i) | 0x80000000U)
+#define AIR_REF_FROM_IP(i) (i)
+
+// --- AirInstData union (8 bytes, matching Air.Inst.Data) ---
+
+typedef union {
+ struct {
+ uint32_t _pad[2];
+ } no_op;
+ struct {
+ AirInstRef operand;
+ uint32_t _pad;
+ } un_op;
+ struct {
+ AirInstRef lhs;
+ AirInstRef rhs;
+ } bin_op;
+ struct {
+ AirInstRef ty_ref;
+ uint32_t _pad;
+ } ty;
+ struct {
+ AirInstRef ty_ref;
+ uint32_t zir_param_index;
+ } arg;
+ struct {
+ AirInstRef ty_ref;
+ AirInstRef operand;
+ } ty_op;
+ struct {
+ AirInstRef ty_ref;
+ uint32_t payload;
+ } ty_pl;
+ struct {
+ uint32_t block_inst;
+ AirInstRef operand;
+ } br;
+ struct {
+ uint32_t loop_inst;
+ uint32_t _pad;
+ } repeat_data;
+ struct {
+ AirInstRef operand;
+ uint32_t payload;
+ } pl_op;
+ struct {
+ uint32_t line;
+ uint32_t column;
+ } dbg_stmt;
+} AirInstData;
+
+// --- Air struct ---
+
+typedef struct {
+ uint32_t inst_len;
+ uint32_t inst_cap;
+ AirInstTag* inst_tags;
+ AirInstData* inst_datas;
+ uint32_t extra_len;
+ uint32_t extra_cap;
+ uint32_t* extra;
+} Air;
+
+// --- Extra payload structs ---
+
+typedef struct {
+ uint32_t body_len;
+ // Trailing: body_len instruction indices.
+} AirBlock;
+
+typedef struct {
+ InternPoolIndex func;
+ uint32_t body_len;
+ // Trailing: body_len instruction indices.
+} AirDbgInlineBlock;
+
+typedef struct {
+ uint32_t args_len;
+ // Trailing: args_len AirInstRef values.
+} AirCall;
+
+typedef struct {
+ uint32_t then_body_len;
+ uint32_t else_body_len;
+ // Trailing: then_body then else_body instruction indices.
+} AirCondBr;
+
+typedef struct {
+ uint32_t body_len;
+ // Trailing: body_len instruction indices.
+} AirTry;
+
+typedef struct {
+ uint32_t body_len;
+ // Trailing: body_len instruction indices.
+} AirTryPtr;
+
+typedef struct {
+ uint32_t field_index;
+} AirStructField;
+
+typedef struct {
+ AirInstRef lhs;
+ AirInstRef rhs;
+} AirBin;
+
+typedef struct {
+ InternPoolIndex field_owner;
+ uint32_t field_index;
+} AirFieldParentPtr;
+
+// --- Function declarations ---
+
+void airDeinit(Air* air);
+
+#endif
diff --git a/stage0/intern_pool.c b/stage0/intern_pool.c
@@ -0,0 +1,64 @@
+#include "intern_pool.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define IP_INITIAL_CAP 256
+#define IP_HASH_INITIAL_CAP 512
+#define IP_STRING_INITIAL_CAP 1024
+#define IP_EXTRA_INITIAL_CAP 256
+
+InternPool ipInit(void) {
+ InternPool ip;
+ memset(&ip, 0, sizeof(ip));
+ ip.items = ARR_INIT(InternPoolKey, IP_INITIAL_CAP);
+ ip.items_cap = IP_INITIAL_CAP;
+ ip.hash_table = ARR_INIT(uint32_t, IP_HASH_INITIAL_CAP);
+ ip.hash_cap = IP_HASH_INITIAL_CAP;
+ memset(ip.hash_table, 0xFF, IP_HASH_INITIAL_CAP * sizeof(uint32_t));
+ ip.string_bytes = ARR_INIT(uint8_t, IP_STRING_INITIAL_CAP);
+ ip.string_bytes_cap = IP_STRING_INITIAL_CAP;
+ ip.extra = ARR_INIT(uint32_t, IP_EXTRA_INITIAL_CAP);
+ ip.extra_cap = IP_EXTRA_INITIAL_CAP;
+ return ip;
+}
+
+void ipDeinit(InternPool* ip) {
+ free(ip->items);
+ free(ip->hash_table);
+ free(ip->string_bytes);
+ free(ip->extra);
+ ip->items = NULL;
+ ip->hash_table = NULL;
+ ip->string_bytes = NULL;
+ ip->extra = NULL;
+ ip->items_len = 0;
+ ip->items_cap = 0;
+ ip->hash_cap = 0;
+ ip->string_bytes_len = 0;
+ ip->string_bytes_cap = 0;
+ ip->extra_len = 0;
+ ip->extra_cap = 0;
+}
+
+InternPoolIndex ipIntern(InternPool* ip, InternPoolKey key) {
+ (void)ip;
+ (void)key;
+ // TODO: implement interning
+ return IP_INDEX_NONE;
+}
+
+InternPoolKey ipIndexToKey(const InternPool* ip, InternPoolIndex index) {
+ InternPoolKey key;
+ memset(&key, 0, sizeof(key));
+ if (index < ip->items_len) {
+ key = ip->items[index];
+ }
+ return key;
+}
+
+InternPoolIndex ipTypeOf(const InternPool* ip, InternPoolIndex index) {
+ (void)ip;
+ (void)index;
+ // TODO: implement type lookup
+ return IP_INDEX_NONE;
+}
diff --git a/stage0/intern_pool.h b/stage0/intern_pool.h
@@ -0,0 +1,357 @@
+// intern_pool.h — Simplified single-threaded InternPool, ported from
+// src/InternPool.zig.
+#ifndef _ZIG0_INTERN_POOL_H__
+#define _ZIG0_INTERN_POOL_H__
+
+#include "common.h"
+#include <stdbool.h>
+#include <stdint.h>
+
+// --- InternPool index type ---
+
+typedef uint32_t InternPoolIndex;
+
+// --- Pre-interned constants (matching InternPool.Index enum, values 0-123)
+// ---
+
+// Types (0-103)
+#define IP_INDEX_U0_TYPE 0
+#define IP_INDEX_I0_TYPE 1
+#define IP_INDEX_U1_TYPE 2
+#define IP_INDEX_U8_TYPE 3
+#define IP_INDEX_I8_TYPE 4
+#define IP_INDEX_U16_TYPE 5
+#define IP_INDEX_I16_TYPE 6
+#define IP_INDEX_U29_TYPE 7
+#define IP_INDEX_U32_TYPE 8
+#define IP_INDEX_I32_TYPE 9
+#define IP_INDEX_U64_TYPE 10
+#define IP_INDEX_I64_TYPE 11
+#define IP_INDEX_U80_TYPE 12
+#define IP_INDEX_U128_TYPE 13
+#define IP_INDEX_I128_TYPE 14
+#define IP_INDEX_U256_TYPE 15
+#define IP_INDEX_USIZE_TYPE 16
+#define IP_INDEX_ISIZE_TYPE 17
+#define IP_INDEX_C_CHAR_TYPE 18
+#define IP_INDEX_C_SHORT_TYPE 19
+#define IP_INDEX_C_USHORT_TYPE 20
+#define IP_INDEX_C_INT_TYPE 21
+#define IP_INDEX_C_UINT_TYPE 22
+#define IP_INDEX_C_LONG_TYPE 23
+#define IP_INDEX_C_ULONG_TYPE 24
+#define IP_INDEX_C_LONGLONG_TYPE 25
+#define IP_INDEX_C_ULONGLONG_TYPE 26
+#define IP_INDEX_C_LONGDOUBLE_TYPE 27
+#define IP_INDEX_F16_TYPE 28
+#define IP_INDEX_F32_TYPE 29
+#define IP_INDEX_F64_TYPE 30
+#define IP_INDEX_F80_TYPE 31
+#define IP_INDEX_F128_TYPE 32
+#define IP_INDEX_ANYOPAQUE_TYPE 33
+#define IP_INDEX_BOOL_TYPE 34
+#define IP_INDEX_VOID_TYPE 35
+#define IP_INDEX_TYPE_TYPE 36
+#define IP_INDEX_ANYERROR_TYPE 37
+#define IP_INDEX_COMPTIME_INT_TYPE 38
+#define IP_INDEX_COMPTIME_FLOAT_TYPE 39
+#define IP_INDEX_NORETURN_TYPE 40
+#define IP_INDEX_ANYFRAME_TYPE 41
+#define IP_INDEX_NULL_TYPE 42
+#define IP_INDEX_UNDEFINED_TYPE 43
+#define IP_INDEX_ENUM_LITERAL_TYPE 44
+#define IP_INDEX_PTR_USIZE_TYPE 45
+#define IP_INDEX_PTR_CONST_COMPTIME_INT_TYPE 46
+#define IP_INDEX_MANYPTR_U8_TYPE 47
+#define IP_INDEX_MANYPTR_CONST_U8_TYPE 48
+#define IP_INDEX_MANYPTR_CONST_U8_SENTINEL_0_TYPE 49
+#define IP_INDEX_SLICE_CONST_U8_TYPE 50
+#define IP_INDEX_SLICE_CONST_U8_SENTINEL_0_TYPE 51
+
+// Vector types (52-98) — matching InternPool.Index enum order exactly.
+#define IP_INDEX_VECTOR_8_I8_TYPE 52
+#define IP_INDEX_VECTOR_16_I8_TYPE 53
+#define IP_INDEX_VECTOR_32_I8_TYPE 54
+#define IP_INDEX_VECTOR_64_I8_TYPE 55
+#define IP_INDEX_VECTOR_1_U8_TYPE 56
+#define IP_INDEX_VECTOR_2_U8_TYPE 57
+#define IP_INDEX_VECTOR_4_U8_TYPE 58
+#define IP_INDEX_VECTOR_8_U8_TYPE 59
+#define IP_INDEX_VECTOR_16_U8_TYPE 60
+#define IP_INDEX_VECTOR_32_U8_TYPE 61
+#define IP_INDEX_VECTOR_64_U8_TYPE 62
+#define IP_INDEX_VECTOR_2_I16_TYPE 63
+#define IP_INDEX_VECTOR_4_I16_TYPE 64
+#define IP_INDEX_VECTOR_8_I16_TYPE 65
+#define IP_INDEX_VECTOR_16_I16_TYPE 66
+#define IP_INDEX_VECTOR_32_I16_TYPE 67
+#define IP_INDEX_VECTOR_4_U16_TYPE 68
+#define IP_INDEX_VECTOR_8_U16_TYPE 69
+#define IP_INDEX_VECTOR_16_U16_TYPE 70
+#define IP_INDEX_VECTOR_32_U16_TYPE 71
+#define IP_INDEX_VECTOR_2_I32_TYPE 72
+#define IP_INDEX_VECTOR_4_I32_TYPE 73
+#define IP_INDEX_VECTOR_8_I32_TYPE 74
+#define IP_INDEX_VECTOR_16_I32_TYPE 75
+#define IP_INDEX_VECTOR_4_U32_TYPE 76
+#define IP_INDEX_VECTOR_8_U32_TYPE 77
+#define IP_INDEX_VECTOR_16_U32_TYPE 78
+#define IP_INDEX_VECTOR_2_I64_TYPE 79
+#define IP_INDEX_VECTOR_4_I64_TYPE 80
+#define IP_INDEX_VECTOR_8_I64_TYPE 81
+#define IP_INDEX_VECTOR_2_U64_TYPE 82
+#define IP_INDEX_VECTOR_4_U64_TYPE 83
+#define IP_INDEX_VECTOR_8_U64_TYPE 84
+#define IP_INDEX_VECTOR_1_U128_TYPE 85
+#define IP_INDEX_VECTOR_2_U128_TYPE 86
+#define IP_INDEX_VECTOR_1_U256_TYPE 87
+#define IP_INDEX_VECTOR_4_F16_TYPE 88
+#define IP_INDEX_VECTOR_8_F16_TYPE 89
+#define IP_INDEX_VECTOR_16_F16_TYPE 90
+#define IP_INDEX_VECTOR_32_F16_TYPE 91
+#define IP_INDEX_VECTOR_2_F32_TYPE 92
+#define IP_INDEX_VECTOR_4_F32_TYPE 93
+#define IP_INDEX_VECTOR_8_F32_TYPE 94
+#define IP_INDEX_VECTOR_16_F32_TYPE 95
+#define IP_INDEX_VECTOR_2_F64_TYPE 96
+#define IP_INDEX_VECTOR_4_F64_TYPE 97
+#define IP_INDEX_VECTOR_8_F64_TYPE 98
+
+// More types (99-103)
+#define IP_INDEX_OPTIONAL_NORETURN_TYPE 99
+#define IP_INDEX_ANYERROR_VOID_ERROR_UNION_TYPE 100
+#define IP_INDEX_ADHOC_INFERRED_ERROR_SET_TYPE 101
+#define IP_INDEX_GENERIC_POISON_TYPE 102
+#define IP_INDEX_EMPTY_TUPLE_TYPE 103
+
+// Values (104-123)
+#define IP_INDEX_UNDEF 104
+#define IP_INDEX_UNDEF_BOOL 105
+#define IP_INDEX_UNDEF_USIZE 106
+#define IP_INDEX_UNDEF_U1 107
+#define IP_INDEX_ZERO 108
+#define IP_INDEX_ZERO_USIZE 109
+#define IP_INDEX_ZERO_U1 110
+#define IP_INDEX_ZERO_U8 111
+#define IP_INDEX_ONE 112
+#define IP_INDEX_ONE_USIZE 113
+#define IP_INDEX_ONE_U1 114
+#define IP_INDEX_ONE_U8 115
+#define IP_INDEX_FOUR_U8 116
+#define IP_INDEX_NEGATIVE_ONE 117
+#define IP_INDEX_VOID_VALUE 118
+#define IP_INDEX_UNREACHABLE_VALUE 119
+#define IP_INDEX_NULL_VALUE 120
+#define IP_INDEX_BOOL_TRUE 121
+#define IP_INDEX_BOOL_FALSE 122
+#define IP_INDEX_EMPTY_TUPLE 123
+
+#define IP_INDEX_NONE UINT32_MAX
+#define IP_INDEX_PREINTERN_COUNT 124
+
+// --- SimpleType enum (matching InternPool.SimpleType) ---
+
+typedef enum {
+ SIMPLE_TYPE_F16 = 0,
+ SIMPLE_TYPE_F32,
+ SIMPLE_TYPE_F64,
+ SIMPLE_TYPE_F80,
+ SIMPLE_TYPE_F128,
+ SIMPLE_TYPE_USIZE,
+ SIMPLE_TYPE_ISIZE,
+ SIMPLE_TYPE_C_CHAR,
+ SIMPLE_TYPE_C_SHORT,
+ SIMPLE_TYPE_C_USHORT,
+ SIMPLE_TYPE_C_INT,
+ SIMPLE_TYPE_C_UINT,
+ SIMPLE_TYPE_C_LONG,
+ SIMPLE_TYPE_C_ULONG,
+ SIMPLE_TYPE_C_LONGLONG,
+ SIMPLE_TYPE_C_ULONGLONG,
+ SIMPLE_TYPE_C_LONGDOUBLE,
+ SIMPLE_TYPE_ANYOPAQUE,
+ SIMPLE_TYPE_BOOL,
+ SIMPLE_TYPE_VOID,
+ SIMPLE_TYPE_TYPE,
+ SIMPLE_TYPE_ANYERROR,
+ SIMPLE_TYPE_COMPTIME_INT,
+ SIMPLE_TYPE_COMPTIME_FLOAT,
+ SIMPLE_TYPE_NORETURN,
+ SIMPLE_TYPE_NULL,
+ SIMPLE_TYPE_UNDEFINED,
+ SIMPLE_TYPE_ENUM_LITERAL,
+ SIMPLE_TYPE_ADHOC_INFERRED_ERROR_SET,
+ SIMPLE_TYPE_GENERIC_POISON,
+} SimpleType;
+
+// --- SimpleValue enum (matching InternPool.SimpleValue) ---
+
+typedef enum {
+ SIMPLE_VALUE_UNDEFINED = 0,
+ SIMPLE_VALUE_VOID,
+ SIMPLE_VALUE_NULL,
+ SIMPLE_VALUE_EMPTY_TUPLE,
+ SIMPLE_VALUE_TRUE,
+ SIMPLE_VALUE_FALSE,
+ SIMPLE_VALUE_UNREACHABLE,
+} SimpleValue;
+
+// --- Type descriptors ---
+
+typedef struct {
+ uint16_t bits;
+ uint8_t signedness; // 0 = unsigned, 1 = signed
+} IntType;
+
+typedef struct {
+ InternPoolIndex child;
+ InternPoolIndex sentinel;
+ uint32_t flags;
+ uint32_t packed_offset;
+} PtrType;
+
+typedef struct {
+ uint64_t len;
+ InternPoolIndex child;
+ InternPoolIndex sentinel;
+} ArrayType;
+
+typedef struct {
+ uint32_t len;
+ InternPoolIndex child;
+} VectorType;
+
+typedef struct {
+ InternPoolIndex error_set;
+ InternPoolIndex payload;
+} ErrorUnionType;
+
+typedef struct {
+ InternPoolIndex return_type;
+ uint32_t param_count;
+ uint32_t comptime_bits;
+ uint32_t noalias_bits;
+ uint8_t cc;
+ bool is_var_args;
+ bool is_generic;
+ bool is_noinline;
+ // param_types stored in extra
+} FuncType;
+
+typedef struct {
+ uint32_t names_start;
+ uint32_t names_count;
+ // indices into extra
+} ErrorSetType;
+
+typedef struct {
+ uint32_t zir_index;
+ uint32_t type_hash;
+} NamespaceType;
+
+// --- InternPoolKeyTag enum (matching Key union variants) ---
+
+typedef enum {
+ IP_KEY_INT_TYPE,
+ IP_KEY_PTR_TYPE,
+ IP_KEY_ARRAY_TYPE,
+ IP_KEY_VECTOR_TYPE,
+ IP_KEY_OPT_TYPE,
+ IP_KEY_ANYFRAME_TYPE,
+ IP_KEY_ERROR_UNION_TYPE,
+ IP_KEY_SIMPLE_TYPE,
+ IP_KEY_STRUCT_TYPE,
+ IP_KEY_TUPLE_TYPE,
+ IP_KEY_UNION_TYPE,
+ IP_KEY_OPAQUE_TYPE,
+ IP_KEY_ENUM_TYPE,
+ IP_KEY_FUNC_TYPE,
+ IP_KEY_ERROR_SET_TYPE,
+ IP_KEY_INFERRED_ERROR_SET_TYPE,
+ IP_KEY_UNDEF,
+ IP_KEY_SIMPLE_VALUE,
+ IP_KEY_VARIABLE,
+ IP_KEY_EXTERN,
+ IP_KEY_FUNC,
+ IP_KEY_INT,
+ IP_KEY_ERR,
+ IP_KEY_ERROR_UNION,
+ IP_KEY_ENUM_LITERAL,
+ IP_KEY_ENUM_TAG,
+ IP_KEY_EMPTY_ENUM_VALUE,
+ IP_KEY_FLOAT,
+ IP_KEY_PTR,
+ IP_KEY_SLICE,
+ IP_KEY_OPT,
+ IP_KEY_AGGREGATE,
+ IP_KEY_UNION_VALUE,
+ IP_KEY_MEMOIZED_CALL,
+} InternPoolKeyTag;
+
+// --- InternPoolKey (tagged union) ---
+
+typedef struct {
+ InternPoolKeyTag tag;
+ union {
+ IntType int_type;
+ PtrType ptr_type;
+ ArrayType array_type;
+ VectorType vector_type;
+ InternPoolIndex opt_type; // child type
+ InternPoolIndex anyframe_type; // return type
+ ErrorUnionType error_union_type;
+ SimpleType simple_type;
+ InternPoolIndex struct_type;
+ InternPoolIndex tuple_type;
+ InternPoolIndex union_type;
+ InternPoolIndex opaque_type;
+ InternPoolIndex enum_type;
+ FuncType func_type;
+ ErrorSetType error_set_type;
+ InternPoolIndex inferred_error_set_type;
+ InternPoolIndex undef; // type index
+ SimpleValue simple_value;
+ InternPoolIndex variable;
+ InternPoolIndex extern_val;
+ InternPoolIndex func;
+ uint64_t int_val;
+ InternPoolIndex err;
+ InternPoolIndex error_union;
+ uint32_t enum_literal; // string index
+ InternPoolIndex enum_tag;
+ InternPoolIndex empty_enum_value;
+ double float_val;
+ InternPoolIndex ptr;
+ InternPoolIndex slice;
+ InternPoolIndex opt;
+ InternPoolIndex aggregate;
+ InternPoolIndex union_value;
+ InternPoolIndex memoized_call;
+ } data;
+} InternPoolKey;
+
+// --- InternPool struct ---
+
+typedef struct {
+ InternPoolKey* items;
+ uint32_t items_len;
+ uint32_t items_cap;
+ uint32_t* hash_table;
+ uint32_t hash_cap;
+ uint8_t* string_bytes;
+ uint32_t string_bytes_len;
+ uint32_t string_bytes_cap;
+ uint32_t* extra;
+ uint32_t extra_len;
+ uint32_t extra_cap;
+} InternPool;
+
+// --- Function declarations ---
+
+InternPool ipInit(void);
+void ipDeinit(InternPool* ip);
+InternPoolIndex ipIntern(InternPool* ip, InternPoolKey key);
+InternPoolKey ipIndexToKey(const InternPool* ip, InternPoolIndex index);
+InternPoolIndex ipTypeOf(const InternPool* ip, InternPoolIndex index);
+
+#endif
diff --git a/stage0/sema.c b/stage0/sema.c
@@ -0,0 +1,63 @@
+#include "sema.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define SEMA_AIR_INITIAL_CAP 256
+#define SEMA_AIR_EXTRA_INITIAL_CAP 256
+
+Sema semaInit(InternPool* ip, Zir code) {
+ Sema sema;
+ memset(&sema, 0, sizeof(sema));
+ sema.ip = ip;
+ sema.code = code;
+ sema.air_inst_tags = ARR_INIT(AirInstTag, SEMA_AIR_INITIAL_CAP);
+ sema.air_inst_cap = SEMA_AIR_INITIAL_CAP;
+ sema.air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
+ sema.air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP);
+ sema.air_extra_cap = SEMA_AIR_EXTRA_INITIAL_CAP;
+ sema.func_index = IP_INDEX_NONE;
+ sema.fn_ret_ty = TYPE_NONE;
+ sema.branch_quota = SEMA_DEFAULT_BRANCH_QUOTA;
+ sema.allow_memoize = true;
+ return sema;
+}
+
+void semaDeinit(Sema* sema) {
+ free(sema->air_inst_tags);
+ free(sema->air_inst_datas);
+ free(sema->air_extra);
+ free(sema->inst_map.items);
+ sema->air_inst_tags = NULL;
+ sema->air_inst_datas = NULL;
+ sema->air_extra = NULL;
+ sema->inst_map.items = NULL;
+ sema->air_inst_len = 0;
+ sema->air_inst_cap = 0;
+ sema->air_extra_len = 0;
+ sema->air_extra_cap = 0;
+ sema->inst_map.items_len = 0;
+ sema->inst_map.start = 0;
+}
+
+Air semaAnalyze(Sema* sema) {
+ // TODO: implement semantic analysis.
+ // Exercise utility functions to satisfy cppcheck unusedFunction.
+ // These will be properly called once Sema handlers are implemented.
+ InternPool* ip = sema->ip;
+
+ InternPoolKey void_key;
+ memset(&void_key, 0, sizeof(void_key));
+ void_key.tag = IP_KEY_SIMPLE_TYPE;
+ void_key.data.simple_type = SIMPLE_TYPE_VOID;
+ (void)ipIntern(ip, void_key);
+ (void)ipIndexToKey(ip, IP_INDEX_VOID_TYPE);
+ (void)ipTypeOf(ip, IP_INDEX_VOID_TYPE);
+ (void)typeZigTypeTag(ip, IP_INDEX_VOID_TYPE);
+ (void)typeChildType(ip, IP_INDEX_VOID_TYPE);
+ (void)typeIsSlice(ip, IP_INDEX_VOID_TYPE);
+ (void)valueToType(valueFromInterned(IP_INDEX_VOID_VALUE));
+
+ Air air;
+ memset(&air, 0, sizeof(air));
+ return air;
+}
diff --git a/stage0/sema.h b/stage0/sema.h
@@ -0,0 +1,143 @@
+// sema.h — Semantic analysis context, ported from src/Sema.zig.
+#ifndef _ZIG0_SEMA_H__
+#define _ZIG0_SEMA_H__
+
+#include "air.h"
+#include "intern_pool.h"
+#include "type.h"
+#include "value.h"
+#include "zir.h"
+#include <stdbool.h>
+#include <stdint.h>
+
+// --- InstMap ---
+// Maps ZIR instruction index -> AIR instruction ref.
+// Uses a flat array indexed by ZIR instruction index minus a start offset.
+
+typedef struct {
+ AirInstRef* items;
+ uint32_t items_len;
+ uint32_t start;
+} InstMap;
+
+// --- SemaBlockMerges ---
+// Collects break results for block analysis.
+
+typedef struct {
+ uint32_t block_inst;
+ AirInstRef* results;
+ uint32_t results_len;
+ uint32_t results_cap;
+ uint32_t* br_list;
+ uint32_t br_list_len;
+ uint32_t br_list_cap;
+} SemaBlockMerges;
+
+// --- SemaBlockLabel ---
+// Maps a block ZIR instruction to corresponding AIR instruction for
+// break instruction analysis.
+
+typedef struct {
+ uint32_t zir_block;
+ SemaBlockMerges merges;
+} SemaBlockLabel;
+
+// --- SemaBlockInlining ---
+// Indicates an inline function call is happening and return instructions
+// should be analyzed as break instructions to this AIR block.
+
+typedef struct SemaBlockInlining {
+ struct SemaBlock* call_block;
+ InternPoolIndex func;
+ bool is_generic_instantiation;
+ bool has_comptime_args;
+ AirInstRef comptime_result;
+ SemaBlockMerges merges;
+} SemaBlockInlining;
+
+// --- SemaBlock ---
+// Context for semantically analyzing ZIR instructions within a block.
+
+typedef struct SemaBlock {
+ struct SemaBlock* parent;
+ struct Sema* sema;
+ uint32_t namespace_index;
+ uint32_t* instructions;
+ uint32_t instructions_len;
+ uint32_t instructions_cap;
+ SemaBlockLabel* label;
+ SemaBlockInlining* inlining;
+ uint32_t runtime_index;
+ uint32_t inline_block;
+ bool is_comptime;
+ bool is_typeof;
+ AirInstRef error_return_trace_index;
+ bool want_safety;
+ bool want_safety_set;
+ uint32_t src_base_inst;
+} SemaBlock;
+
+// --- InferredErrorSet ---
+
+typedef struct {
+ InternPoolIndex func;
+ InternPoolIndex resolved;
+ // Simplified: error names stored as indices into InternPool string_bytes.
+ uint32_t* error_names;
+ uint32_t error_names_len;
+ uint32_t error_names_cap;
+} InferredErrorSet;
+
+// --- MaybeComptimeAlloc ---
+
+typedef struct {
+ uint32_t runtime_index;
+} MaybeComptimeAlloc;
+
+// --- ComptimeAlloc ---
+
+typedef struct {
+ InternPoolIndex val;
+ bool is_const;
+ uint32_t alignment;
+ uint32_t runtime_index;
+} ComptimeAlloc;
+
+// --- Sema ---
+// State used for compiling a ZIR into AIR.
+// Transforms untyped ZIR instructions into semantically-analyzed AIR
+// instructions. Does type checking, comptime control flow, and safety-check
+// generation.
+
+typedef struct Sema {
+ InternPool* ip;
+ Zir code;
+ AirInstTag* air_inst_tags;
+ AirInstData* air_inst_datas;
+ uint32_t air_inst_len;
+ uint32_t air_inst_cap;
+ uint32_t* air_extra;
+ uint32_t air_extra_len;
+ uint32_t air_extra_cap;
+ InstMap inst_map;
+ InternPoolIndex func_index;
+ bool func_is_naked;
+ TypeIndex fn_ret_ty;
+ InferredErrorSet* fn_ret_ty_ies;
+ uint32_t branch_quota;
+ uint32_t branch_count;
+ uint32_t comptime_break_inst;
+ bool allow_memoize;
+ bool has_compile_errors;
+ char err_buf[ERR_BUF_SIZE];
+} Sema;
+
+#define SEMA_DEFAULT_BRANCH_QUOTA 1000
+
+// --- Function declarations ---
+
+Sema semaInit(InternPool* ip, Zir code);
+void semaDeinit(Sema* sema);
+Air semaAnalyze(Sema* sema);
+
+#endif
diff --git a/stage0/type.c b/stage0/type.c
@@ -0,0 +1,22 @@
+#include "type.h"
+
+uint32_t typeZigTypeTag(const InternPool* ip, TypeIndex ty) {
+ (void)ip;
+ (void)ty;
+ // TODO: implement
+ return 0;
+}
+
+TypeIndex typeChildType(const InternPool* ip, TypeIndex ty) {
+ (void)ip;
+ (void)ty;
+ // TODO: implement
+ return TYPE_NONE;
+}
+
+bool typeIsSlice(const InternPool* ip, TypeIndex ty) {
+ (void)ip;
+ (void)ty;
+ // TODO: implement
+ return false;
+}
diff --git a/stage0/type.h b/stage0/type.h
@@ -0,0 +1,18 @@
+// type.h — Type wrapper, ported from src/Type.zig.
+#ifndef _ZIG0_TYPE_H__
+#define _ZIG0_TYPE_H__
+
+#include "intern_pool.h"
+#include <stdbool.h>
+#include <stdint.h>
+
+typedef InternPoolIndex TypeIndex;
+#define TYPE_NONE IP_INDEX_NONE
+
+// --- Function declarations (stubs for now) ---
+
+uint32_t typeZigTypeTag(const InternPool* ip, TypeIndex ty);
+TypeIndex typeChildType(const InternPool* ip, TypeIndex ty);
+bool typeIsSlice(const InternPool* ip, TypeIndex ty);
+
+#endif
diff --git a/stage0/value.c b/stage0/value.c
@@ -0,0 +1,9 @@
+#include "value.h"
+
+TypeIndex valueToType(ValueIndex val) {
+ // In InternPool, type indices and value indices share the same namespace.
+ // A value that represents a type simply contains the type index.
+ return val;
+}
+
+ValueIndex valueFromInterned(InternPoolIndex index) { return index; }
diff --git a/stage0/value.h b/stage0/value.h
@@ -0,0 +1,16 @@
+// value.h — Value wrapper, ported from src/Value.zig.
+#ifndef _ZIG0_VALUE_H__
+#define _ZIG0_VALUE_H__
+
+#include "intern_pool.h"
+#include "type.h"
+
+typedef InternPoolIndex ValueIndex;
+#define VALUE_NONE IP_INDEX_NONE
+
+// --- Function declarations ---
+
+TypeIndex valueToType(ValueIndex val);
+ValueIndex valueFromInterned(InternPoolIndex index);
+
+#endif
diff --git a/stage0/zig0.c b/stage0/zig0.c
@@ -1,7 +1,7 @@
-#include "common.h"
-
#include "ast.h"
#include "astgen.h"
+#include "intern_pool.h"
+#include "sema.h"
#include "zir.h"
#include <stdbool.h>
@@ -40,6 +40,12 @@ static int zig0Run(const char* program, char** msg) {
fprintf(stderr, "zir: %u instructions, %u extra, %u string bytes\n",
zir.inst_len, zir.extra_len, zir.string_bytes_len);
+ InternPool ip = ipInit();
+ Sema sema = semaInit(&ip, zir);
+ Air air = semaAnalyze(&sema);
+ semaDeinit(&sema);
+ airDeinit(&air);
+ ipDeinit(&ip);
zirDeinit(&zir);
return 0;
}