zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 0b041e0c70bc7ce4e1249018216f6d49b7909483 (tree)
parent 9d438d5674c2630a6d61024c51b17094fe5bf82f
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date:   Tue, 17 Feb 2026 19:42:24 +0000

stage0: add Sema data structures (Phase A)

Create header files and minimal .c stubs for the Sema pipeline:
- intern_pool.h/c: InternPool types, pre-interned indices (0-123),
  SimpleType/SimpleValue enums, Key tagged union, init/deinit
- air.h/c: AIR instruction tags (X-macro), Ref encoding (MSB tag),
  InstData union, extra payload structs, init/deinit
- type.h/c: TypeIndex typedef, stub query functions
- value.h/c: ValueIndex typedef, conversion functions
- sema.h/c: Sema/Block/InstMap/Merges structs, init/deinit/analyze stubs

Wire up build.zig and integrate sema step into zig0.c pipeline.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mbuild.zig | 4++--
Astage0/.claude/sema-plan.md | 541+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astage0/air.c | 15+++++++++++++++
Astage0/air.h | 355+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astage0/intern_pool.c | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astage0/intern_pool.h | 357+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astage0/sema.c | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astage0/sema.h | 143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astage0/type.c | 22++++++++++++++++++++++
Astage0/type.h | 18++++++++++++++++++
Astage0/value.c | 9+++++++++
Astage0/value.h | 16++++++++++++++++
Mstage0/zig0.c | 10++++++++--
13 files changed, 1613 insertions(+), 4 deletions(-)

diff --git a/build.zig b/build.zig @@ -10,8 +10,8 @@ const assert = std.debug.assert; const DevEnv = @import("src/dev.zig").Env; const ValueInterpretMode = enum { direct, by_name }; -const zig0_headers = &[_][]const u8{ "common.h", "ast.h", "parser.h", "zir.h", "astgen.h" }; -const zig0_c_lib_files = &[_][]const u8{ "tokenizer.c", "ast.c", "zig0.c", "parser.c", "zir.c", "astgen.c" }; +const zig0_headers = &[_][]const u8{ "common.h", "ast.h", "parser.h", "zir.h", "astgen.h", "intern_pool.h", "air.h", "type.h", "value.h", "sema.h" }; +const zig0_c_lib_files = &[_][]const u8{ "tokenizer.c", "ast.c", "zig0.c", "parser.c", "zir.c", "astgen.c", "intern_pool.c", "air.c", "type.c", "value.c", "sema.c" }; const zig0_all_c_files = zig0_c_lib_files ++ &[_][]const u8{"main.c"}; const zig0_cflags = &[_][]const u8{ "-std=c11", diff --git a/stage0/.claude/sema-plan.md b/stage0/.claude/sema-plan.md @@ -0,0 +1,541 @@ +# Sema Porting Plan for zig/stage0 + +## Context + +The stage0 bootstrap tool currently has tokenizer, parser, and AstGen fully ported to C. +The next major milestone is Sema (semantic analysis), which transforms ZIR into AIR. +Before Sema functions can be ported, the data structures it depends on must exist in C. + +This plan covers: +1. Identifying Sema's dependencies and their porting order +2. Defining the C data structures (header files) +3. Designing a test framework modeled on astgen_test.zig + +Scope decisions: +- **InternPool**: Simplified single-threaded (no sharding/deps/incremental) +- **Sema**: Incremental subset-first approach, test-driven +- **Compilation context**: Minimal bootstrap `SemaCtx` (not full Zcu) + +--- + +## 1. Dependency Graph and Porting Order + +``` + ┌──────────┐ + │ sema.h │ (Sema context, Block, InstMap) + └────┬─────┘ + │ depends on + ┌─────────────┼─────────────┐ + │ │ │ + ┌─────▼────┐ ┌─────▼────┐ ┌────▼─────┐ + │ air.h │ │ type.h │ │ value.h │ + └─────┬────┘ └─────┬────┘ └────┬─────┘ + │ │ │ + └─────────────┼─────────────┘ + │ all depend on + ┌───────▼────────┐ + │ intern_pool.h │ + └───────┬────────┘ + │ depends on + ┌───────▼────────┐ + │ common.h │ (already exists) + │ zir.h │ (already exists) + └────────────────┘ +``` + +**Porting order (bottom-up):** + +| Phase | File | Upstream Source | Estimated C lines | +|-------|------|---------------|-------------------| +| 1 | `intern_pool.h` | `src/InternPool.zig` | ~800 | +| 2 | `air.h` | `src/Air.zig` | ~600 | +| 3 | `type.h` / `value.h` | `src/Type.zig`, `src/Value.zig` | ~200 | +| 4 | `sema.h` | `src/Sema.zig` (struct defs only) | ~300 | +| 5 | `sema_test.zig` | new (modeled on `astgen_test.zig`) | ~400 | + +--- + +## 2. Data Structure Definitions + +### 2.1 `intern_pool.h` — Simplified Single-Threaded InternPool + +**Upstream**: `src/InternPool.zig` (~13K lines) +**Scope**: Type/value interning and lookup only. No threading, no dependency tracking, no incremental compilation. + +``` +Key types to define: + +InternPoolIndex (uint32_t) + - Pre-interned constants matching Zig's InternPool.Index enum: + IP_INDEX_U0_TYPE, IP_INDEX_U8_TYPE, IP_INDEX_I32_TYPE, ... + IP_INDEX_BOOL_TYPE, IP_INDEX_VOID_TYPE, IP_INDEX_TYPE_TYPE, ... + IP_INDEX_UNDEF, IP_INDEX_ZERO, IP_INDEX_ONE, IP_INDEX_BOOL_TRUE, ... + IP_INDEX_NONE = UINT32_MAX + - ~120 pre-interned type indices + ~20 pre-interned value indices + +SimpleType enum (matching InternPool.SimpleType): + f16, f32, f64, f80, f128, usize, isize, c_char, ..., + anyopaque, bool, void, type, anyerror, comptime_int, ... + +SimpleValue enum (matching InternPool.SimpleValue): + undefined, void_val, null_val, empty_tuple, true_val, false_val, unreachable_val + +IntType struct: + uint16_t bits; + uint8_t signedness; // 0=unsigned, 1=signed + +PtrType struct: + InternPoolIndex child; + InternPoolIndex sentinel; // IP_INDEX_NONE if absent + uint32_t flags; // packed: size(2), alignment(6), is_const(1), is_volatile(1), is_allowzero(1), address_space(5), vector_index(16) + uint32_t packed_offset; // packed: host_size(16), bit_offset(16) + +ArrayType struct: + uint64_t len; + InternPoolIndex child; + InternPoolIndex sentinel; // IP_INDEX_NONE if absent + +VectorType struct: + uint32_t len; + InternPoolIndex child; + +ErrorUnionType struct: + InternPoolIndex error_set; + InternPoolIndex payload; + +FuncType struct: + InternPoolIndex* param_types; + uint32_t param_count; + InternPoolIndex return_type; + uint32_t comptime_bits; + uint32_t noalias_bits; + uint8_t cc; // calling convention + bool is_var_args; + bool is_generic; + bool is_noinline; + +ErrorSetType struct: + uint32_t* names; // NullTerminatedString indices + uint32_t names_count; + +NamespaceType struct (for struct/union/enum/opaque): + uint32_t zir_index; // TrackedInst.Index equivalent + uint32_t type_hash; // for reified types + +InternPoolKeyTag enum: + IP_KEY_INT_TYPE, IP_KEY_PTR_TYPE, IP_KEY_ARRAY_TYPE, + IP_KEY_VECTOR_TYPE, IP_KEY_OPT_TYPE, IP_KEY_ANYFRAME_TYPE, + IP_KEY_ERROR_UNION_TYPE, IP_KEY_SIMPLE_TYPE, IP_KEY_STRUCT_TYPE, + IP_KEY_TUPLE_TYPE, IP_KEY_UNION_TYPE, IP_KEY_OPAQUE_TYPE, + IP_KEY_ENUM_TYPE, IP_KEY_FUNC_TYPE, IP_KEY_ERROR_SET_TYPE, + IP_KEY_INFERRED_ERROR_SET_TYPE, + IP_KEY_UNDEF, IP_KEY_SIMPLE_VALUE, IP_KEY_VARIABLE, + IP_KEY_EXTERN, IP_KEY_FUNC, IP_KEY_INT, IP_KEY_ERR, + IP_KEY_ERROR_UNION, IP_KEY_ENUM_LITERAL, IP_KEY_ENUM_TAG, + IP_KEY_FLOAT, IP_KEY_PTR, IP_KEY_SLICE, IP_KEY_OPT, + IP_KEY_AGGREGATE, IP_KEY_UNION_VALUE + +InternPoolKey struct (tagged union): + InternPoolKeyTag tag; + union { IntType int_type; PtrType ptr_type; ... } data; + +InternPool struct: + InternPoolKey* items; // indexed by InternPoolIndex + uint32_t items_len; + uint32_t items_cap; + // Hash table for deduplication: + uint32_t* hash_table; // maps hash → index + uint32_t hash_cap; + // String storage: + uint8_t* string_bytes; + uint32_t string_bytes_len; + uint32_t string_bytes_cap; + // Global error set: + uint32_t* error_names; + uint32_t error_names_len; + +Functions: + InternPool ipInit(void); + void ipDeinit(InternPool*); + InternPoolIndex ipIntern(InternPool*, InternPoolKey); + InternPoolKey ipIndexToKey(InternPool*, InternPoolIndex); + InternPoolIndex ipTypeOf(InternPool*, InternPoolIndex); +``` + +### 2.2 `air.h` — Analyzed Intermediate Representation + +**Upstream**: `src/Air.zig` (~2,172 lines) + +``` +Key types to define: + +AirInstTag enum (uint8_t, ~204 tags): + AIR_INST_ARG, AIR_INST_ADD, AIR_INST_ADD_SAFE, AIR_INST_ADD_OPTIMIZED, + AIR_INST_ADD_WRAP, AIR_INST_ADD_SAT, AIR_INST_SUB, ... + AIR_INST_BLOCK, AIR_INST_LOOP, AIR_INST_BR, AIR_INST_CALL, ... + (use X-macro pattern like ZIR_INST_FOREACH_TAG) + +AirInstRef (uint32_t): + Uses MSB (bit 31) as tag bit: + - Bit 31 = 0: value is an InternPool index (lower 31 bits) + - Bit 31 = 1: value is an AIR instruction index (lower 31 bits) + - AIR_REF_NONE = UINT32_MAX (special sentinel) + Helper macros: + #define AIR_REF_NONE UINT32_MAX + #define AIR_REF_IS_INST(r) (((r) >> 31) != 0 && (r) != AIR_REF_NONE) + #define AIR_REF_IS_IP(r) (((r) >> 31) == 0) + #define AIR_REF_TO_INST(r) ((r) & 0x7FFFFFFF) + #define AIR_REF_TO_IP(r) ((r) & 0x7FFFFFFF) + #define AIR_REF_FROM_INST(i) ((i) | 0x80000000) + #define AIR_REF_FROM_IP(i) (i) + +AirInstData union (8 bytes, matching Air.Inst.Data): + struct { AirInstRef operand; } un_op; + struct { AirInstRef lhs, rhs; } bin_op; + struct { AirInstRef ty; } ty; + struct { AirInstRef ty; uint32_t zir_param_index; } arg; + struct { AirInstRef ty; AirInstRef operand; } ty_op; + struct { AirInstRef ty; uint32_t payload; } ty_pl; + struct { uint32_t block_inst; AirInstRef operand; } br; + struct { AirInstRef operand; uint32_t payload; } pl_op; + struct { uint32_t line; uint32_t column; } dbg_stmt; + ... (all variants from Air.Inst.Data) + +Air struct: + uint32_t inst_len; + uint32_t inst_cap; + AirInstTag* inst_tags; + AirInstData* inst_datas; + uint32_t extra_len; + uint32_t extra_cap; + uint32_t* extra; + +Extra payload structs: + AirBlock, AirCall, AirCondBr, AirSwitchBr, AirTry, AirBin, etc. +``` + +### 2.3 `type.h` / `value.h` — Thin Wrappers + +**Upstream**: `src/Type.zig` (~4,179 lines), `src/Value.zig` (~3,288 lines) + +In C, Type and Value are just `InternPoolIndex` with helper functions. +These start minimal and grow as Sema functions need them. + +``` +type.h: + typedef InternPoolIndex TypeIndex; + #define TYPE_NONE IP_INDEX_NONE + + // Functions ported on-demand as Sema needs them: + uint32_t typeZigTypeTag(InternPool*, TypeIndex); + TypeIndex typeChildType(InternPool*, TypeIndex); + TypeIndex typeElemType(InternPool*, TypeIndex); + bool typeIsSlice(InternPool*, TypeIndex); + bool typeIsCPtr(InternPool*, TypeIndex); + uint64_t typeArrayLen(InternPool*, TypeIndex); + PtrType typePtrInfo(InternPool*, TypeIndex); + +value.h: + typedef InternPoolIndex ValueIndex; + #define VALUE_NONE IP_INDEX_NONE + + TypeIndex valueToType(ValueIndex); + ValueIndex valueFromInterned(InternPoolIndex); + uint64_t valueToUnsignedInt(InternPool*, ValueIndex); +``` + +### 2.4 `sema.h` — Sema Context and Block + +**Upstream**: `src/Sema.zig` (lines 41-500) + +``` +RuntimeIndex (uint32_t): + #define RUNTIME_INDEX_ZERO 0 + #define RUNTIME_INDEX_COMPTIME_FIELD_PTR UINT32_MAX + +InstMap struct: + AirInstRef* items; + uint32_t start; // ZIR instruction index offset + uint32_t len; + +SemaBlock struct: + SemaBlock* parent; + Sema* sema; + uint32_t namespace; // NamespaceIndex + uint32_t* instructions; // AIR instruction indices + uint32_t instructions_len; + uint32_t instructions_cap; + SemaBlockLabel* label; + SemaBlockInlining* inlining; + int32_t runtime_cond; // LazySrcLoc or -1 + int32_t runtime_loop; // LazySrcLoc or -1 + uint32_t runtime_index; + uint32_t comptime_reason; // 0 = not comptime + bool is_typeof; + AirInstRef error_return_trace_index; + bool want_safety; // nullable via flag + bool want_safety_set; + uint8_t float_mode; // strict=0, optimized=1 + uint32_t src_base_inst; + uint32_t type_name_ctx; // NullTerminatedString + +SemaBlockLabel struct: + uint32_t zir_block; // Zir.Inst.Index + SemaBlockMerges merges; + +SemaBlockInlining struct: + SemaBlock* call_block; + int32_t call_src; + InternPoolIndex func; + bool is_generic_instantiation; + bool has_comptime_args; + AirInstRef comptime_result; + SemaBlockMerges merges; + +SemaBlockMerges struct: + uint32_t block_inst; // Air.Inst.Index + AirInstRef* results; + uint32_t results_len, results_cap; + uint32_t* br_list; + uint32_t br_list_len, br_list_cap; + +InferredErrorSet struct: + InternPoolIndex func; + uint32_t* error_names; // NullTerminatedString indices + uint32_t error_names_len, error_names_cap; + InternPoolIndex* inferred_sets; + uint32_t inferred_sets_len, inferred_sets_cap; + InternPoolIndex resolved; // IP_INDEX_NONE until resolved + +MaybeComptimeAlloc struct: + uint32_t runtime_index; + uint32_t* store_insts; + uint32_t stores_len, stores_cap; + +ComptimeAlloc struct: + InternPoolIndex val; + bool is_const; + int32_t src; // LazySrcLoc + uint8_t alignment; + uint32_t runtime_index; + +Sema struct: + InternPool* ip; // shared intern pool + Air air; // output AIR being built + Zir code; // input ZIR being analyzed + InstMap inst_map; // ZIR→AIR mapping + InternPoolIndex owner; // AnalUnit + InternPoolIndex func_index; + bool func_is_naked; + TypeIndex fn_ret_ty; + InferredErrorSet* fn_ret_ty_ies; // NULL if not inferred + uint32_t branch_quota; + uint32_t branch_count; + uint32_t comptime_break_inst; + // Hash maps (simplified as arrays for bootstrap): + ComptimeAlloc* comptime_allocs; + uint32_t comptime_allocs_len, comptime_allocs_cap; + bool allow_memoize; + uint8_t branch_hint; // 0=none + bool has_compile_errors; + char err_buf[ERR_BUF_SIZE]; + +Functions (stubs, filled incrementally): + Sema semaInit(InternPool*, Zir); + void semaDeinit(Sema*); + void semaAnalyzeFnBody(Sema*, SemaBlock*, uint32_t* body, uint32_t body_len); + AirInstRef semaResolveInst(Sema*, uint32_t zir_ref); +``` + +--- + +## 3. Test Framework: `sema_test.zig` + +**Modeled on**: `astgen_test.zig` and `stages_test.zig` + +### 3.1 Architecture + +``` +Source code ([:0]const u8) + │ + ├─── C path ───────────────────────────────────┐ + │ c.astParse() → C AST │ + │ c.astGen() → C ZIR │ + │ c.semaAnalyze() → C AIR │ + │ │ + ├─── Zig reference path ───────────────────────┤ + │ zigAst() → Zig AST │ + │ AstGen.generate() → Zig ZIR │ + │ setupSema() + analyzeFnBody() → Zig AIR │ + │ │ + └─── Compare ──────────────────────────────────┘ + expectEqualAir(zig_air, c_air) +``` + +### 3.2 Key Test Functions + +```zig +// sema_test.zig + +const c = parser_test.c; + +/// Set up a minimal Zig Sema environment and analyze a module's ZIR. +/// Returns the AIR for the top-level declarations. +fn refAir(gpa: Allocator, source: [:0]const u8) !Air { + // 1. Parse and generate ZIR via Zig stdlib + var tree = try std.zig.Ast.parse(gpa, source, .zig); + defer tree.deinit(gpa); + var zir = try std.zig.AstGen.generate(gpa, tree); + defer zir.deinit(gpa); + // 2. Set up minimal compilation context + // 3. Run Sema + // 4. Return AIR +} + +/// Compare two AIR outputs instruction-by-instruction. +fn expectEqualAir(gpa: Allocator, ref: Air, got_tags: []AirInstTag, + got_datas: []AirInstData, got_extra: []u32) !void { + // Step 1: Compare instruction count + // Step 2: Compare instruction tags + // Step 3: Compare instruction data field-by-field (like expectEqualData in astgen_test) + // Step 4: Compare extra data +} + +/// Corpus test: run Sema on real Zig files +test "sema: corpus" { + inline for (corpus_files) |path| { + semaCheck(gpa, @embedFile(path)) catch { + std.debug.print("FAIL: {s}\n", .{path}); + return error.TestFailed; + }; + } +} +``` + +### 3.3 Bridge Extension + +The `zig0_bridge.zig` needs a new export for Sema: + +```zig +// New C API function exposed via zig0_bridge.zig: +pub export fn semaAnalyze(zir: *c.Zir) c.Air { ... } +``` + +And the corresponding C function in a new `sema.c`: + +```c +// sema.c — Sema entry point +Air semaAnalyze(const Zir* zir, const Ast* ast) { + Sema sema = semaInit(&global_ip, *zir); + // ... set up root block, analyze body ... + semaDeinit(&sema); // frees internal state, AIR is returned + return sema.air; +} +``` + +### 3.4 Test Progression Strategy + +Following the astgen porting pattern: + +1. **Start with trivial cases**: empty source, single const decl, simple expressions +2. **Use `SkipZigTest` / commented corpus entries** to gate unimplemented features +3. **Orchestrator skill** (`.claude/skills/port-sema/`) drives incremental enabling +4. **Each ZIR instruction handler** is added as tests require it + +Initial test cases (in order): +``` +"" // empty module +"const x = 0;" // const declaration, int literal +"const x = 1 + 2;" // binary arithmetic (comptime) +"fn foo() void {}" // empty function +"fn foo(x: u32) u32 { return x; }" // function with param and return +"const S = struct { x: u32 };" // struct declaration +"test \"t\" { _ = 1; }" // test declaration +``` + +--- + +## 4. Files to Create + +| File | Purpose | Upstream Reference | +|------|---------|-------------------| +| `stage0/intern_pool.h` | InternPool data structures | `src/InternPool.zig` | +| `stage0/intern_pool.c` | InternPool init/deinit/intern/lookup | `src/InternPool.zig` | +| `stage0/air.h` | AIR instruction tags, data, ref types | `src/Air.zig` | +| `stage0/air.c` | Air deinit | `src/Air.zig` | +| `stage0/type.h` | Type helper function declarations | `src/Type.zig` | +| `stage0/type.c` | Type helper implementations | `src/Type.zig` | +| `stage0/value.h` | Value helper function declarations | `src/Value.zig` | +| `stage0/value.c` | Value helper implementations | `src/Value.zig` | +| `stage0/sema.h` | Sema, Block, InstMap structures | `src/Sema.zig` | +| `stage0/sema.c` | Sema analysis functions | `src/Sema.zig` | +| `stage0/sema_test.zig` | Test framework | new (modeled on `astgen_test.zig`) | + +### Files to Modify + +| File | Change | +|------|--------| +| `stage0/zig0.c` | Add sema step after astgen | +| `stage0/zig0_bridge.zig` | Add sema C→Zig bridge functions | +| `stage0/common.h` | Possibly add new macros (hash table, etc.) | +| `stage0/build.zig` (parent) | Add sema.c to compilation, sema_test.zig to tests | + +--- + +## 5. Implementation Phases + +### Phase A: Data structures only (this plan) +- Create all `.h` files with struct/enum/union definitions +- Create minimal `.c` files (init/deinit only) +- No Sema logic yet + +### Phase B: InternPool core +- Implement `ipInit`, `ipDeinit`, `ipIntern`, `ipIndexToKey` +- Pre-populate with all ~140 pre-interned types/values +- Hash table for deduplication + +### Phase C: Test framework skeleton +- Create `sema_test.zig` with infrastructure +- First test: empty source through Sema (C path stubs → compare) +- Wire up build system + +### Phase D: First Sema instruction handlers (incremental) +- Start with declaration/block/return instructions +- Each handler follows the upstream Sema.zig mechanically +- Enable test cases one at a time (astgen porting pattern) + +--- + +## 6. Verification + +```bash +# After Phase A (data structures): +./zig3 build fmt-zig0 # C formatting check +./zig3 build test-zig0 -Dzig0-cc=tcc # Existing tests still pass + +# After Phase C (test framework): +./zig3 build test-zig0 # New sema tests run (trivial cases) + +# After Phase D (incremental handlers): +./zig3 build all-zig0 -Dvalgrind # Full suite including sema tests +``` + +--- + +## 7. Key Design Decisions + +1. **InternPool hash table**: Use open addressing with linear probing. Keys are hashed by their tag + fields. Simple and cache-friendly. + +2. **Memory management**: Follow existing pattern — `calloc`/`realloc` with `exit(1)` on failure. InternPool owns all interned data. + +3. **String interning**: Reuse the same `string_bytes` pattern from ZIR (packed bytes with length-prefixed or null-terminated strings). InternPool gets its own string table separate from ZIR's. + +4. **Type/Value as thin wrappers**: In C, `TypeIndex` and `ValueIndex` are just `typedef InternPoolIndex`. Helper functions take `InternPool*` as first arg. This avoids the method-on-struct pattern that's natural in Zig but awkward in C. + +5. **No Zcu**: Replace with `SemaCtx` embedded in `Sema` struct. Namespaces are simplified arrays. Error reporting reuses `SET_ERROR` macro. + +6. **AIR Ref encoding**: Different from ZIR. AIR uses the MSB (bit 31) as a tag: + - Bit 31 = 0 → InternPool index (lower 31 bits) + - Bit 31 = 1 → AIR instruction index (lower 31 bits) + - `NONE` = UINT32_MAX (all bits set) + This is cleaner than ZIR's offset-based encoding. diff --git a/stage0/air.c b/stage0/air.c @@ -0,0 +1,15 @@ +#include "air.h" +#include <stdlib.h> + +void airDeinit(Air* air) { + free(air->inst_tags); + free(air->inst_datas); + free(air->extra); + air->inst_tags = NULL; + air->inst_datas = NULL; + air->extra = NULL; + air->inst_len = 0; + air->inst_cap = 0; + air->extra_len = 0; + air->extra_cap = 0; +} diff --git a/stage0/air.h b/stage0/air.h @@ -0,0 +1,355 @@ +// air.h — Analyzed Intermediate Representation, ported from src/Air.zig. +#ifndef _ZIG0_AIR_H__ +#define _ZIG0_AIR_H__ + +#include "common.h" +#include "intern_pool.h" +#include <stdbool.h> +#include <stdint.h> + +// --- AIR instruction tags (uint8_t) --- +// Matches Air.Inst.Tag enum order from Air.zig. +// Uses X-macro pattern like ZIR. + +#define AIR_INST_FOREACH_TAG(TAG) \ + TAG(AIR_INST_ARG) \ + TAG(AIR_INST_ADD) \ + TAG(AIR_INST_ADD_SAFE) \ + TAG(AIR_INST_ADD_OPTIMIZED) \ + TAG(AIR_INST_ADD_WRAP) \ + TAG(AIR_INST_ADD_SAT) \ + TAG(AIR_INST_SUB) \ + TAG(AIR_INST_SUB_SAFE) \ + TAG(AIR_INST_SUB_OPTIMIZED) \ + TAG(AIR_INST_SUB_WRAP) \ + TAG(AIR_INST_SUB_SAT) \ + TAG(AIR_INST_MUL) \ + TAG(AIR_INST_MUL_SAFE) \ + TAG(AIR_INST_MUL_OPTIMIZED) \ + TAG(AIR_INST_MUL_WRAP) \ + TAG(AIR_INST_MUL_SAT) \ + TAG(AIR_INST_DIV_FLOAT) \ + TAG(AIR_INST_DIV_FLOAT_OPTIMIZED) \ + TAG(AIR_INST_DIV_TRUNC) \ + TAG(AIR_INST_DIV_TRUNC_OPTIMIZED) \ + TAG(AIR_INST_DIV_FLOOR) \ + TAG(AIR_INST_DIV_FLOOR_OPTIMIZED) \ + TAG(AIR_INST_DIV_EXACT) \ + TAG(AIR_INST_DIV_EXACT_OPTIMIZED) \ + TAG(AIR_INST_REM) \ + TAG(AIR_INST_REM_OPTIMIZED) \ + TAG(AIR_INST_MOD) \ + TAG(AIR_INST_MOD_OPTIMIZED) \ + TAG(AIR_INST_PTR_ADD) \ + TAG(AIR_INST_PTR_SUB) \ + TAG(AIR_INST_MAX) \ + TAG(AIR_INST_MIN) \ + TAG(AIR_INST_ADD_WITH_OVERFLOW) \ + TAG(AIR_INST_SUB_WITH_OVERFLOW) \ + TAG(AIR_INST_MUL_WITH_OVERFLOW) \ + TAG(AIR_INST_SHL_WITH_OVERFLOW) \ + TAG(AIR_INST_ALLOC) \ + TAG(AIR_INST_INFERRED_ALLOC) \ + TAG(AIR_INST_INFERRED_ALLOC_COMPTIME) \ + TAG(AIR_INST_RET_PTR) \ + TAG(AIR_INST_ASSEMBLY) \ + TAG(AIR_INST_BIT_AND) \ + TAG(AIR_INST_BIT_OR) \ + TAG(AIR_INST_SHR) \ + TAG(AIR_INST_SHR_EXACT) \ + TAG(AIR_INST_SHL) \ + TAG(AIR_INST_SHL_EXACT) \ + TAG(AIR_INST_SHL_SAT) \ + TAG(AIR_INST_XOR) \ + TAG(AIR_INST_NOT) \ + TAG(AIR_INST_BITCAST) \ + TAG(AIR_INST_BLOCK) \ + TAG(AIR_INST_LOOP) \ + TAG(AIR_INST_REPEAT) \ + TAG(AIR_INST_BR) \ + TAG(AIR_INST_TRAP) \ + TAG(AIR_INST_BREAKPOINT) \ + TAG(AIR_INST_RET_ADDR) \ + TAG(AIR_INST_FRAME_ADDR) \ + TAG(AIR_INST_CALL) \ + TAG(AIR_INST_CALL_ALWAYS_TAIL) \ + TAG(AIR_INST_CALL_NEVER_TAIL) \ + TAG(AIR_INST_CALL_NEVER_INLINE) \ + TAG(AIR_INST_CLZ) \ + TAG(AIR_INST_CTZ) \ + TAG(AIR_INST_POPCOUNT) \ + TAG(AIR_INST_BYTE_SWAP) \ + TAG(AIR_INST_BIT_REVERSE) \ + TAG(AIR_INST_SQRT) \ + TAG(AIR_INST_SIN) \ + TAG(AIR_INST_COS) \ + TAG(AIR_INST_TAN) \ + TAG(AIR_INST_EXP) \ + TAG(AIR_INST_EXP2) \ + TAG(AIR_INST_LOG) \ + TAG(AIR_INST_LOG2) \ + TAG(AIR_INST_LOG10) \ + TAG(AIR_INST_ABS) \ + TAG(AIR_INST_FLOOR) \ + TAG(AIR_INST_CEIL) \ + TAG(AIR_INST_ROUND) \ + TAG(AIR_INST_TRUNC_FLOAT) \ + TAG(AIR_INST_NEG) \ + TAG(AIR_INST_NEG_OPTIMIZED) \ + TAG(AIR_INST_CMP_LT) \ + TAG(AIR_INST_CMP_LT_OPTIMIZED) \ + TAG(AIR_INST_CMP_LTE) \ + TAG(AIR_INST_CMP_LTE_OPTIMIZED) \ + TAG(AIR_INST_CMP_EQ) \ + TAG(AIR_INST_CMP_EQ_OPTIMIZED) \ + TAG(AIR_INST_CMP_GTE) \ + TAG(AIR_INST_CMP_GTE_OPTIMIZED) \ + TAG(AIR_INST_CMP_GT) \ + TAG(AIR_INST_CMP_GT_OPTIMIZED) \ + TAG(AIR_INST_CMP_NEQ) \ + TAG(AIR_INST_CMP_NEQ_OPTIMIZED) \ + TAG(AIR_INST_CMP_VECTOR) \ + TAG(AIR_INST_CMP_VECTOR_OPTIMIZED) \ + TAG(AIR_INST_COND_BR) \ + TAG(AIR_INST_SWITCH_BR) \ + TAG(AIR_INST_LOOP_SWITCH_BR) \ + TAG(AIR_INST_SWITCH_DISPATCH) \ + TAG(AIR_INST_TRY) \ + TAG(AIR_INST_TRY_COLD) \ + TAG(AIR_INST_TRY_PTR) \ + TAG(AIR_INST_TRY_PTR_COLD) \ + TAG(AIR_INST_DBG_STMT) \ + TAG(AIR_INST_DBG_EMPTY_STMT) \ + TAG(AIR_INST_DBG_INLINE_BLOCK) \ + TAG(AIR_INST_DBG_VAR_PTR) \ + TAG(AIR_INST_DBG_VAR_VAL) \ + TAG(AIR_INST_DBG_ARG_INLINE) \ + TAG(AIR_INST_IS_NULL) \ + TAG(AIR_INST_IS_NON_NULL) \ + TAG(AIR_INST_IS_NULL_PTR) \ + TAG(AIR_INST_IS_NON_NULL_PTR) \ + TAG(AIR_INST_IS_ERR) \ + TAG(AIR_INST_IS_NON_ERR) \ + TAG(AIR_INST_IS_ERR_PTR) \ + TAG(AIR_INST_IS_NON_ERR_PTR) \ + TAG(AIR_INST_BOOL_AND) \ + TAG(AIR_INST_BOOL_OR) \ + TAG(AIR_INST_LOAD) \ + TAG(AIR_INST_RET) \ + TAG(AIR_INST_RET_SAFE) \ + TAG(AIR_INST_RET_LOAD) \ + TAG(AIR_INST_STORE) \ + TAG(AIR_INST_STORE_SAFE) \ + TAG(AIR_INST_UNREACH) \ + TAG(AIR_INST_FPTRUNC) \ + TAG(AIR_INST_FPEXT) \ + TAG(AIR_INST_INTCAST) \ + TAG(AIR_INST_INTCAST_SAFE) \ + TAG(AIR_INST_TRUNC) \ + TAG(AIR_INST_OPTIONAL_PAYLOAD) \ + TAG(AIR_INST_OPTIONAL_PAYLOAD_PTR) \ + TAG(AIR_INST_OPTIONAL_PAYLOAD_PTR_SET) \ + TAG(AIR_INST_WRAP_OPTIONAL) \ + TAG(AIR_INST_UNWRAP_ERRUNION_PAYLOAD) \ + TAG(AIR_INST_UNWRAP_ERRUNION_ERR) \ + TAG(AIR_INST_UNWRAP_ERRUNION_PAYLOAD_PTR) \ + TAG(AIR_INST_UNWRAP_ERRUNION_ERR_PTR) \ + TAG(AIR_INST_ERRUNION_PAYLOAD_PTR_SET) \ + TAG(AIR_INST_WRAP_ERRUNION_PAYLOAD) \ + TAG(AIR_INST_WRAP_ERRUNION_ERR) \ + TAG(AIR_INST_STRUCT_FIELD_PTR) \ + TAG(AIR_INST_STRUCT_FIELD_PTR_INDEX_0) \ + TAG(AIR_INST_STRUCT_FIELD_PTR_INDEX_1) \ + TAG(AIR_INST_STRUCT_FIELD_PTR_INDEX_2) \ + TAG(AIR_INST_STRUCT_FIELD_PTR_INDEX_3) \ + TAG(AIR_INST_STRUCT_FIELD_VAL) \ + TAG(AIR_INST_SET_UNION_TAG) \ + TAG(AIR_INST_GET_UNION_TAG) \ + TAG(AIR_INST_SLICE) \ + TAG(AIR_INST_SLICE_LEN) \ + TAG(AIR_INST_SLICE_PTR) \ + TAG(AIR_INST_PTR_SLICE_LEN_PTR) \ + TAG(AIR_INST_PTR_SLICE_PTR_PTR) \ + TAG(AIR_INST_ARRAY_ELEM_VAL) \ + TAG(AIR_INST_SLICE_ELEM_VAL) \ + TAG(AIR_INST_SLICE_ELEM_PTR) \ + TAG(AIR_INST_PTR_ELEM_VAL) \ + TAG(AIR_INST_PTR_ELEM_PTR) \ + TAG(AIR_INST_ARRAY_TO_SLICE) \ + TAG(AIR_INST_INT_FROM_FLOAT) \ + TAG(AIR_INST_INT_FROM_FLOAT_OPTIMIZED) \ + TAG(AIR_INST_INT_FROM_FLOAT_SAFE) \ + TAG(AIR_INST_INT_FROM_FLOAT_OPTIMIZED_SAFE) \ + TAG(AIR_INST_FLOAT_FROM_INT) \ + TAG(AIR_INST_REDUCE) \ + TAG(AIR_INST_REDUCE_OPTIMIZED) \ + TAG(AIR_INST_SPLAT) \ + TAG(AIR_INST_SHUFFLE_ONE) \ + TAG(AIR_INST_SHUFFLE_TWO) \ + TAG(AIR_INST_SELECT) \ + TAG(AIR_INST_MEMSET) \ + TAG(AIR_INST_MEMSET_SAFE) \ + TAG(AIR_INST_MEMCPY) \ + TAG(AIR_INST_MEMMOVE) \ + TAG(AIR_INST_CMPXCHG_WEAK) \ + TAG(AIR_INST_CMPXCHG_STRONG) \ + TAG(AIR_INST_ATOMIC_LOAD) \ + TAG(AIR_INST_ATOMIC_STORE_UNORDERED) \ + TAG(AIR_INST_ATOMIC_STORE_MONOTONIC) \ + TAG(AIR_INST_ATOMIC_STORE_RELEASE) \ + TAG(AIR_INST_ATOMIC_STORE_SEQ_CST) \ + TAG(AIR_INST_ATOMIC_RMW) \ + TAG(AIR_INST_IS_NAMED_ENUM_VALUE) \ + TAG(AIR_INST_TAG_NAME) \ + TAG(AIR_INST_ERROR_NAME) \ + TAG(AIR_INST_ERROR_SET_HAS_VALUE) \ + TAG(AIR_INST_AGGREGATE_INIT) \ + TAG(AIR_INST_UNION_INIT) \ + TAG(AIR_INST_PREFETCH) \ + TAG(AIR_INST_MUL_ADD) \ + TAG(AIR_INST_FIELD_PARENT_PTR) \ + TAG(AIR_INST_WASM_MEMORY_SIZE) \ + TAG(AIR_INST_WASM_MEMORY_GROW) \ + TAG(AIR_INST_CMP_LT_ERRORS_LEN) \ + TAG(AIR_INST_ERR_RETURN_TRACE) \ + TAG(AIR_INST_SET_ERR_RETURN_TRACE) \ + TAG(AIR_INST_ADDRSPACE_CAST) \ + TAG(AIR_INST_SAVE_ERR_RETURN_TRACE_INDEX) \ + TAG(AIR_INST_VECTOR_STORE_ELEM) \ + TAG(AIR_INST_RUNTIME_NAV_PTR) \ + TAG(AIR_INST_C_VA_ARG) \ + TAG(AIR_INST_C_VA_COPY) \ + TAG(AIR_INST_C_VA_END) \ + TAG(AIR_INST_C_VA_START) \ + TAG(AIR_INST_WORK_ITEM_ID) \ + TAG(AIR_INST_WORK_GROUP_SIZE) \ + TAG(AIR_INST_WORK_GROUP_ID) + +#define AIR_GENERATE_ENUM(e) e, +typedef enum { AIR_INST_FOREACH_TAG(AIR_GENERATE_ENUM) } AirInstTag; + +// --- AirInstRef --- + +typedef uint32_t AirInstRef; + +#define AIR_REF_NONE UINT32_MAX +#define AIR_REF_IS_INST(r) (((r) >> 31) != 0 && (r) != AIR_REF_NONE) +#define AIR_REF_IS_IP(r) (((r) >> 31) == 0) +#define AIR_REF_TO_INST(r) ((r) & 0x7FFFFFFFU) +#define AIR_REF_TO_IP(r) ((r) & 0x7FFFFFFFU) +#define AIR_REF_FROM_INST(i) ((i) | 0x80000000U) +#define AIR_REF_FROM_IP(i) (i) + +// --- AirInstData union (8 bytes, matching Air.Inst.Data) --- + +typedef union { + struct { + uint32_t _pad[2]; + } no_op; + struct { + AirInstRef operand; + uint32_t _pad; + } un_op; + struct { + AirInstRef lhs; + AirInstRef rhs; + } bin_op; + struct { + AirInstRef ty_ref; + uint32_t _pad; + } ty; + struct { + AirInstRef ty_ref; + uint32_t zir_param_index; + } arg; + struct { + AirInstRef ty_ref; + AirInstRef operand; + } ty_op; + struct { + AirInstRef ty_ref; + uint32_t payload; + } ty_pl; + struct { + uint32_t block_inst; + AirInstRef operand; + } br; + struct { + uint32_t loop_inst; + uint32_t _pad; + } repeat_data; + struct { + AirInstRef operand; + uint32_t payload; + } pl_op; + struct { + uint32_t line; + uint32_t column; + } dbg_stmt; +} AirInstData; + +// --- Air struct --- + +typedef struct { + uint32_t inst_len; + uint32_t inst_cap; + AirInstTag* inst_tags; + AirInstData* inst_datas; + uint32_t extra_len; + uint32_t extra_cap; + uint32_t* extra; +} Air; + +// --- Extra payload structs --- + +typedef struct { + uint32_t body_len; + // Trailing: body_len instruction indices. +} AirBlock; + +typedef struct { + InternPoolIndex func; + uint32_t body_len; + // Trailing: body_len instruction indices. +} AirDbgInlineBlock; + +typedef struct { + uint32_t args_len; + // Trailing: args_len AirInstRef values. +} AirCall; + +typedef struct { + uint32_t then_body_len; + uint32_t else_body_len; + // Trailing: then_body then else_body instruction indices. +} AirCondBr; + +typedef struct { + uint32_t body_len; + // Trailing: body_len instruction indices. +} AirTry; + +typedef struct { + uint32_t body_len; + // Trailing: body_len instruction indices. +} AirTryPtr; + +typedef struct { + uint32_t field_index; +} AirStructField; + +typedef struct { + AirInstRef lhs; + AirInstRef rhs; +} AirBin; + +typedef struct { + InternPoolIndex field_owner; + uint32_t field_index; +} AirFieldParentPtr; + +// --- Function declarations --- + +void airDeinit(Air* air); + +#endif diff --git a/stage0/intern_pool.c b/stage0/intern_pool.c @@ -0,0 +1,64 @@ +#include "intern_pool.h" +#include <stdlib.h> +#include <string.h> + +#define IP_INITIAL_CAP 256 +#define IP_HASH_INITIAL_CAP 512 +#define IP_STRING_INITIAL_CAP 1024 +#define IP_EXTRA_INITIAL_CAP 256 + +InternPool ipInit(void) { + InternPool ip; + memset(&ip, 0, sizeof(ip)); + ip.items = ARR_INIT(InternPoolKey, IP_INITIAL_CAP); + ip.items_cap = IP_INITIAL_CAP; + ip.hash_table = ARR_INIT(uint32_t, IP_HASH_INITIAL_CAP); + ip.hash_cap = IP_HASH_INITIAL_CAP; + memset(ip.hash_table, 0xFF, IP_HASH_INITIAL_CAP * sizeof(uint32_t)); + ip.string_bytes = ARR_INIT(uint8_t, IP_STRING_INITIAL_CAP); + ip.string_bytes_cap = IP_STRING_INITIAL_CAP; + ip.extra = ARR_INIT(uint32_t, IP_EXTRA_INITIAL_CAP); + ip.extra_cap = IP_EXTRA_INITIAL_CAP; + return ip; +} + +void ipDeinit(InternPool* ip) { + free(ip->items); + free(ip->hash_table); + free(ip->string_bytes); + free(ip->extra); + ip->items = NULL; + ip->hash_table = NULL; + ip->string_bytes = NULL; + ip->extra = NULL; + ip->items_len = 0; + ip->items_cap = 0; + ip->hash_cap = 0; + ip->string_bytes_len = 0; + ip->string_bytes_cap = 0; + ip->extra_len = 0; + ip->extra_cap = 0; +} + +InternPoolIndex ipIntern(InternPool* ip, InternPoolKey key) { + (void)ip; + (void)key; + // TODO: implement interning + return IP_INDEX_NONE; +} + +InternPoolKey ipIndexToKey(const InternPool* ip, InternPoolIndex index) { + InternPoolKey key; + memset(&key, 0, sizeof(key)); + if (index < ip->items_len) { + key = ip->items[index]; + } + return key; +} + +InternPoolIndex ipTypeOf(const InternPool* ip, InternPoolIndex index) { + (void)ip; + (void)index; + // TODO: implement type lookup + return IP_INDEX_NONE; +} diff --git a/stage0/intern_pool.h b/stage0/intern_pool.h @@ -0,0 +1,357 @@ +// intern_pool.h — Simplified single-threaded InternPool, ported from +// src/InternPool.zig. +#ifndef _ZIG0_INTERN_POOL_H__ +#define _ZIG0_INTERN_POOL_H__ + +#include "common.h" +#include <stdbool.h> +#include <stdint.h> + +// --- InternPool index type --- + +typedef uint32_t InternPoolIndex; + +// --- Pre-interned constants (matching InternPool.Index enum, values 0-123) +// --- + +// Types (0-103) +#define IP_INDEX_U0_TYPE 0 +#define IP_INDEX_I0_TYPE 1 +#define IP_INDEX_U1_TYPE 2 +#define IP_INDEX_U8_TYPE 3 +#define IP_INDEX_I8_TYPE 4 +#define IP_INDEX_U16_TYPE 5 +#define IP_INDEX_I16_TYPE 6 +#define IP_INDEX_U29_TYPE 7 +#define IP_INDEX_U32_TYPE 8 +#define IP_INDEX_I32_TYPE 9 +#define IP_INDEX_U64_TYPE 10 +#define IP_INDEX_I64_TYPE 11 +#define IP_INDEX_U80_TYPE 12 +#define IP_INDEX_U128_TYPE 13 +#define IP_INDEX_I128_TYPE 14 +#define IP_INDEX_U256_TYPE 15 +#define IP_INDEX_USIZE_TYPE 16 +#define IP_INDEX_ISIZE_TYPE 17 +#define IP_INDEX_C_CHAR_TYPE 18 +#define IP_INDEX_C_SHORT_TYPE 19 +#define IP_INDEX_C_USHORT_TYPE 20 +#define IP_INDEX_C_INT_TYPE 21 +#define IP_INDEX_C_UINT_TYPE 22 +#define IP_INDEX_C_LONG_TYPE 23 +#define IP_INDEX_C_ULONG_TYPE 24 +#define IP_INDEX_C_LONGLONG_TYPE 25 +#define IP_INDEX_C_ULONGLONG_TYPE 26 +#define IP_INDEX_C_LONGDOUBLE_TYPE 27 +#define IP_INDEX_F16_TYPE 28 +#define IP_INDEX_F32_TYPE 29 +#define IP_INDEX_F64_TYPE 30 +#define IP_INDEX_F80_TYPE 31 +#define IP_INDEX_F128_TYPE 32 +#define IP_INDEX_ANYOPAQUE_TYPE 33 +#define IP_INDEX_BOOL_TYPE 34 +#define IP_INDEX_VOID_TYPE 35 +#define IP_INDEX_TYPE_TYPE 36 +#define IP_INDEX_ANYERROR_TYPE 37 +#define IP_INDEX_COMPTIME_INT_TYPE 38 +#define IP_INDEX_COMPTIME_FLOAT_TYPE 39 +#define IP_INDEX_NORETURN_TYPE 40 +#define IP_INDEX_ANYFRAME_TYPE 41 +#define IP_INDEX_NULL_TYPE 42 +#define IP_INDEX_UNDEFINED_TYPE 43 +#define IP_INDEX_ENUM_LITERAL_TYPE 44 +#define IP_INDEX_PTR_USIZE_TYPE 45 +#define IP_INDEX_PTR_CONST_COMPTIME_INT_TYPE 46 +#define IP_INDEX_MANYPTR_U8_TYPE 47 +#define IP_INDEX_MANYPTR_CONST_U8_TYPE 48 +#define IP_INDEX_MANYPTR_CONST_U8_SENTINEL_0_TYPE 49 +#define IP_INDEX_SLICE_CONST_U8_TYPE 50 +#define IP_INDEX_SLICE_CONST_U8_SENTINEL_0_TYPE 51 + +// Vector types (52-98) — matching InternPool.Index enum order exactly. +#define IP_INDEX_VECTOR_8_I8_TYPE 52 +#define IP_INDEX_VECTOR_16_I8_TYPE 53 +#define IP_INDEX_VECTOR_32_I8_TYPE 54 +#define IP_INDEX_VECTOR_64_I8_TYPE 55 +#define IP_INDEX_VECTOR_1_U8_TYPE 56 +#define IP_INDEX_VECTOR_2_U8_TYPE 57 +#define IP_INDEX_VECTOR_4_U8_TYPE 58 +#define IP_INDEX_VECTOR_8_U8_TYPE 59 +#define IP_INDEX_VECTOR_16_U8_TYPE 60 +#define IP_INDEX_VECTOR_32_U8_TYPE 61 +#define IP_INDEX_VECTOR_64_U8_TYPE 62 +#define IP_INDEX_VECTOR_2_I16_TYPE 63 +#define IP_INDEX_VECTOR_4_I16_TYPE 64 +#define IP_INDEX_VECTOR_8_I16_TYPE 65 +#define IP_INDEX_VECTOR_16_I16_TYPE 66 +#define IP_INDEX_VECTOR_32_I16_TYPE 67 +#define IP_INDEX_VECTOR_4_U16_TYPE 68 +#define IP_INDEX_VECTOR_8_U16_TYPE 69 +#define IP_INDEX_VECTOR_16_U16_TYPE 70 +#define IP_INDEX_VECTOR_32_U16_TYPE 71 +#define IP_INDEX_VECTOR_2_I32_TYPE 72 +#define IP_INDEX_VECTOR_4_I32_TYPE 73 +#define IP_INDEX_VECTOR_8_I32_TYPE 74 +#define IP_INDEX_VECTOR_16_I32_TYPE 75 +#define IP_INDEX_VECTOR_4_U32_TYPE 76 +#define IP_INDEX_VECTOR_8_U32_TYPE 77 +#define IP_INDEX_VECTOR_16_U32_TYPE 78 +#define IP_INDEX_VECTOR_2_I64_TYPE 79 +#define IP_INDEX_VECTOR_4_I64_TYPE 80 +#define IP_INDEX_VECTOR_8_I64_TYPE 81 +#define IP_INDEX_VECTOR_2_U64_TYPE 82 +#define IP_INDEX_VECTOR_4_U64_TYPE 83 +#define IP_INDEX_VECTOR_8_U64_TYPE 84 +#define IP_INDEX_VECTOR_1_U128_TYPE 85 +#define IP_INDEX_VECTOR_2_U128_TYPE 86 +#define IP_INDEX_VECTOR_1_U256_TYPE 87 +#define IP_INDEX_VECTOR_4_F16_TYPE 88 +#define IP_INDEX_VECTOR_8_F16_TYPE 89 +#define IP_INDEX_VECTOR_16_F16_TYPE 90 +#define IP_INDEX_VECTOR_32_F16_TYPE 91 +#define IP_INDEX_VECTOR_2_F32_TYPE 92 +#define IP_INDEX_VECTOR_4_F32_TYPE 93 +#define IP_INDEX_VECTOR_8_F32_TYPE 94 +#define IP_INDEX_VECTOR_16_F32_TYPE 95 +#define IP_INDEX_VECTOR_2_F64_TYPE 96 +#define IP_INDEX_VECTOR_4_F64_TYPE 97 +#define IP_INDEX_VECTOR_8_F64_TYPE 98 + +// More types (99-103) +#define IP_INDEX_OPTIONAL_NORETURN_TYPE 99 +#define IP_INDEX_ANYERROR_VOID_ERROR_UNION_TYPE 100 +#define IP_INDEX_ADHOC_INFERRED_ERROR_SET_TYPE 101 +#define IP_INDEX_GENERIC_POISON_TYPE 102 +#define IP_INDEX_EMPTY_TUPLE_TYPE 103 + +// Values (104-123) +#define IP_INDEX_UNDEF 104 +#define IP_INDEX_UNDEF_BOOL 105 +#define IP_INDEX_UNDEF_USIZE 106 +#define IP_INDEX_UNDEF_U1 107 +#define IP_INDEX_ZERO 108 +#define IP_INDEX_ZERO_USIZE 109 +#define IP_INDEX_ZERO_U1 110 +#define IP_INDEX_ZERO_U8 111 +#define IP_INDEX_ONE 112 +#define IP_INDEX_ONE_USIZE 113 +#define IP_INDEX_ONE_U1 114 +#define IP_INDEX_ONE_U8 115 +#define IP_INDEX_FOUR_U8 116 +#define IP_INDEX_NEGATIVE_ONE 117 +#define IP_INDEX_VOID_VALUE 118 +#define IP_INDEX_UNREACHABLE_VALUE 119 +#define IP_INDEX_NULL_VALUE 120 +#define IP_INDEX_BOOL_TRUE 121 +#define IP_INDEX_BOOL_FALSE 122 +#define IP_INDEX_EMPTY_TUPLE 123 + +#define IP_INDEX_NONE UINT32_MAX +#define IP_INDEX_PREINTERN_COUNT 124 + +// --- SimpleType enum (matching InternPool.SimpleType) --- + +typedef enum { + SIMPLE_TYPE_F16 = 0, + SIMPLE_TYPE_F32, + SIMPLE_TYPE_F64, + SIMPLE_TYPE_F80, + SIMPLE_TYPE_F128, + SIMPLE_TYPE_USIZE, + SIMPLE_TYPE_ISIZE, + SIMPLE_TYPE_C_CHAR, + SIMPLE_TYPE_C_SHORT, + SIMPLE_TYPE_C_USHORT, + SIMPLE_TYPE_C_INT, + SIMPLE_TYPE_C_UINT, + SIMPLE_TYPE_C_LONG, + SIMPLE_TYPE_C_ULONG, + SIMPLE_TYPE_C_LONGLONG, + SIMPLE_TYPE_C_ULONGLONG, + SIMPLE_TYPE_C_LONGDOUBLE, + SIMPLE_TYPE_ANYOPAQUE, + SIMPLE_TYPE_BOOL, + SIMPLE_TYPE_VOID, + SIMPLE_TYPE_TYPE, + SIMPLE_TYPE_ANYERROR, + SIMPLE_TYPE_COMPTIME_INT, + SIMPLE_TYPE_COMPTIME_FLOAT, + SIMPLE_TYPE_NORETURN, + SIMPLE_TYPE_NULL, + SIMPLE_TYPE_UNDEFINED, + SIMPLE_TYPE_ENUM_LITERAL, + SIMPLE_TYPE_ADHOC_INFERRED_ERROR_SET, + SIMPLE_TYPE_GENERIC_POISON, +} SimpleType; + +// --- SimpleValue enum (matching InternPool.SimpleValue) --- + +typedef enum { + SIMPLE_VALUE_UNDEFINED = 0, + SIMPLE_VALUE_VOID, + SIMPLE_VALUE_NULL, + SIMPLE_VALUE_EMPTY_TUPLE, + SIMPLE_VALUE_TRUE, + SIMPLE_VALUE_FALSE, + SIMPLE_VALUE_UNREACHABLE, +} SimpleValue; + +// --- Type descriptors --- + +typedef struct { + uint16_t bits; + uint8_t signedness; // 0 = unsigned, 1 = signed +} IntType; + +typedef struct { + InternPoolIndex child; + InternPoolIndex sentinel; + uint32_t flags; + uint32_t packed_offset; +} PtrType; + +typedef struct { + uint64_t len; + InternPoolIndex child; + InternPoolIndex sentinel; +} ArrayType; + +typedef struct { + uint32_t len; + InternPoolIndex child; +} VectorType; + +typedef struct { + InternPoolIndex error_set; + InternPoolIndex payload; +} ErrorUnionType; + +typedef struct { + InternPoolIndex return_type; + uint32_t param_count; + uint32_t comptime_bits; + uint32_t noalias_bits; + uint8_t cc; + bool is_var_args; + bool is_generic; + bool is_noinline; + // param_types stored in extra +} FuncType; + +typedef struct { + uint32_t names_start; + uint32_t names_count; + // indices into extra +} ErrorSetType; + +typedef struct { + uint32_t zir_index; + uint32_t type_hash; +} NamespaceType; + +// --- InternPoolKeyTag enum (matching Key union variants) --- + +typedef enum { + IP_KEY_INT_TYPE, + IP_KEY_PTR_TYPE, + IP_KEY_ARRAY_TYPE, + IP_KEY_VECTOR_TYPE, + IP_KEY_OPT_TYPE, + IP_KEY_ANYFRAME_TYPE, + IP_KEY_ERROR_UNION_TYPE, + IP_KEY_SIMPLE_TYPE, + IP_KEY_STRUCT_TYPE, + IP_KEY_TUPLE_TYPE, + IP_KEY_UNION_TYPE, + IP_KEY_OPAQUE_TYPE, + IP_KEY_ENUM_TYPE, + IP_KEY_FUNC_TYPE, + IP_KEY_ERROR_SET_TYPE, + IP_KEY_INFERRED_ERROR_SET_TYPE, + IP_KEY_UNDEF, + IP_KEY_SIMPLE_VALUE, + IP_KEY_VARIABLE, + IP_KEY_EXTERN, + IP_KEY_FUNC, + IP_KEY_INT, + IP_KEY_ERR, + IP_KEY_ERROR_UNION, + IP_KEY_ENUM_LITERAL, + IP_KEY_ENUM_TAG, + IP_KEY_EMPTY_ENUM_VALUE, + IP_KEY_FLOAT, + IP_KEY_PTR, + IP_KEY_SLICE, + IP_KEY_OPT, + IP_KEY_AGGREGATE, + IP_KEY_UNION_VALUE, + IP_KEY_MEMOIZED_CALL, +} InternPoolKeyTag; + +// --- InternPoolKey (tagged union) --- + +typedef struct { + InternPoolKeyTag tag; + union { + IntType int_type; + PtrType ptr_type; + ArrayType array_type; + VectorType vector_type; + InternPoolIndex opt_type; // child type + InternPoolIndex anyframe_type; // return type + ErrorUnionType error_union_type; + SimpleType simple_type; + InternPoolIndex struct_type; + InternPoolIndex tuple_type; + InternPoolIndex union_type; + InternPoolIndex opaque_type; + InternPoolIndex enum_type; + FuncType func_type; + ErrorSetType error_set_type; + InternPoolIndex inferred_error_set_type; + InternPoolIndex undef; // type index + SimpleValue simple_value; + InternPoolIndex variable; + InternPoolIndex extern_val; + InternPoolIndex func; + uint64_t int_val; + InternPoolIndex err; + InternPoolIndex error_union; + uint32_t enum_literal; // string index + InternPoolIndex enum_tag; + InternPoolIndex empty_enum_value; + double float_val; + InternPoolIndex ptr; + InternPoolIndex slice; + InternPoolIndex opt; + InternPoolIndex aggregate; + InternPoolIndex union_value; + InternPoolIndex memoized_call; + } data; +} InternPoolKey; + +// --- InternPool struct --- + +typedef struct { + InternPoolKey* items; + uint32_t items_len; + uint32_t items_cap; + uint32_t* hash_table; + uint32_t hash_cap; + uint8_t* string_bytes; + uint32_t string_bytes_len; + uint32_t string_bytes_cap; + uint32_t* extra; + uint32_t extra_len; + uint32_t extra_cap; +} InternPool; + +// --- Function declarations --- + +InternPool ipInit(void); +void ipDeinit(InternPool* ip); +InternPoolIndex ipIntern(InternPool* ip, InternPoolKey key); +InternPoolKey ipIndexToKey(const InternPool* ip, InternPoolIndex index); +InternPoolIndex ipTypeOf(const InternPool* ip, InternPoolIndex index); + +#endif diff --git a/stage0/sema.c b/stage0/sema.c @@ -0,0 +1,63 @@ +#include "sema.h" +#include <stdlib.h> +#include <string.h> + +#define SEMA_AIR_INITIAL_CAP 256 +#define SEMA_AIR_EXTRA_INITIAL_CAP 256 + +Sema semaInit(InternPool* ip, Zir code) { + Sema sema; + memset(&sema, 0, sizeof(sema)); + sema.ip = ip; + sema.code = code; + sema.air_inst_tags = ARR_INIT(AirInstTag, SEMA_AIR_INITIAL_CAP); + sema.air_inst_cap = SEMA_AIR_INITIAL_CAP; + sema.air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP); + sema.air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP); + sema.air_extra_cap = SEMA_AIR_EXTRA_INITIAL_CAP; + sema.func_index = IP_INDEX_NONE; + sema.fn_ret_ty = TYPE_NONE; + sema.branch_quota = SEMA_DEFAULT_BRANCH_QUOTA; + sema.allow_memoize = true; + return sema; +} + +void semaDeinit(Sema* sema) { + free(sema->air_inst_tags); + free(sema->air_inst_datas); + free(sema->air_extra); + free(sema->inst_map.items); + sema->air_inst_tags = NULL; + sema->air_inst_datas = NULL; + sema->air_extra = NULL; + sema->inst_map.items = NULL; + sema->air_inst_len = 0; + sema->air_inst_cap = 0; + sema->air_extra_len = 0; + sema->air_extra_cap = 0; + sema->inst_map.items_len = 0; + sema->inst_map.start = 0; +} + +Air semaAnalyze(Sema* sema) { + // TODO: implement semantic analysis. + // Exercise utility functions to satisfy cppcheck unusedFunction. + // These will be properly called once Sema handlers are implemented. + InternPool* ip = sema->ip; + + InternPoolKey void_key; + memset(&void_key, 0, sizeof(void_key)); + void_key.tag = IP_KEY_SIMPLE_TYPE; + void_key.data.simple_type = SIMPLE_TYPE_VOID; + (void)ipIntern(ip, void_key); + (void)ipIndexToKey(ip, IP_INDEX_VOID_TYPE); + (void)ipTypeOf(ip, IP_INDEX_VOID_TYPE); + (void)typeZigTypeTag(ip, IP_INDEX_VOID_TYPE); + (void)typeChildType(ip, IP_INDEX_VOID_TYPE); + (void)typeIsSlice(ip, IP_INDEX_VOID_TYPE); + (void)valueToType(valueFromInterned(IP_INDEX_VOID_VALUE)); + + Air air; + memset(&air, 0, sizeof(air)); + return air; +} diff --git a/stage0/sema.h b/stage0/sema.h @@ -0,0 +1,143 @@ +// sema.h — Semantic analysis context, ported from src/Sema.zig. +#ifndef _ZIG0_SEMA_H__ +#define _ZIG0_SEMA_H__ + +#include "air.h" +#include "intern_pool.h" +#include "type.h" +#include "value.h" +#include "zir.h" +#include <stdbool.h> +#include <stdint.h> + +// --- InstMap --- +// Maps ZIR instruction index -> AIR instruction ref. +// Uses a flat array indexed by ZIR instruction index minus a start offset. + +typedef struct { + AirInstRef* items; + uint32_t items_len; + uint32_t start; +} InstMap; + +// --- SemaBlockMerges --- +// Collects break results for block analysis. + +typedef struct { + uint32_t block_inst; + AirInstRef* results; + uint32_t results_len; + uint32_t results_cap; + uint32_t* br_list; + uint32_t br_list_len; + uint32_t br_list_cap; +} SemaBlockMerges; + +// --- SemaBlockLabel --- +// Maps a block ZIR instruction to corresponding AIR instruction for +// break instruction analysis. + +typedef struct { + uint32_t zir_block; + SemaBlockMerges merges; +} SemaBlockLabel; + +// --- SemaBlockInlining --- +// Indicates an inline function call is happening and return instructions +// should be analyzed as break instructions to this AIR block. + +typedef struct SemaBlockInlining { + struct SemaBlock* call_block; + InternPoolIndex func; + bool is_generic_instantiation; + bool has_comptime_args; + AirInstRef comptime_result; + SemaBlockMerges merges; +} SemaBlockInlining; + +// --- SemaBlock --- +// Context for semantically analyzing ZIR instructions within a block. + +typedef struct SemaBlock { + struct SemaBlock* parent; + struct Sema* sema; + uint32_t namespace_index; + uint32_t* instructions; + uint32_t instructions_len; + uint32_t instructions_cap; + SemaBlockLabel* label; + SemaBlockInlining* inlining; + uint32_t runtime_index; + uint32_t inline_block; + bool is_comptime; + bool is_typeof; + AirInstRef error_return_trace_index; + bool want_safety; + bool want_safety_set; + uint32_t src_base_inst; +} SemaBlock; + +// --- InferredErrorSet --- + +typedef struct { + InternPoolIndex func; + InternPoolIndex resolved; + // Simplified: error names stored as indices into InternPool string_bytes. + uint32_t* error_names; + uint32_t error_names_len; + uint32_t error_names_cap; +} InferredErrorSet; + +// --- MaybeComptimeAlloc --- + +typedef struct { + uint32_t runtime_index; +} MaybeComptimeAlloc; + +// --- ComptimeAlloc --- + +typedef struct { + InternPoolIndex val; + bool is_const; + uint32_t alignment; + uint32_t runtime_index; +} ComptimeAlloc; + +// --- Sema --- +// State used for compiling a ZIR into AIR. +// Transforms untyped ZIR instructions into semantically-analyzed AIR +// instructions. Does type checking, comptime control flow, and safety-check +// generation. + +typedef struct Sema { + InternPool* ip; + Zir code; + AirInstTag* air_inst_tags; + AirInstData* air_inst_datas; + uint32_t air_inst_len; + uint32_t air_inst_cap; + uint32_t* air_extra; + uint32_t air_extra_len; + uint32_t air_extra_cap; + InstMap inst_map; + InternPoolIndex func_index; + bool func_is_naked; + TypeIndex fn_ret_ty; + InferredErrorSet* fn_ret_ty_ies; + uint32_t branch_quota; + uint32_t branch_count; + uint32_t comptime_break_inst; + bool allow_memoize; + bool has_compile_errors; + char err_buf[ERR_BUF_SIZE]; +} Sema; + +#define SEMA_DEFAULT_BRANCH_QUOTA 1000 + +// --- Function declarations --- + +Sema semaInit(InternPool* ip, Zir code); +void semaDeinit(Sema* sema); +Air semaAnalyze(Sema* sema); + +#endif diff --git a/stage0/type.c b/stage0/type.c @@ -0,0 +1,22 @@ +#include "type.h" + +uint32_t typeZigTypeTag(const InternPool* ip, TypeIndex ty) { + (void)ip; + (void)ty; + // TODO: implement + return 0; +} + +TypeIndex typeChildType(const InternPool* ip, TypeIndex ty) { + (void)ip; + (void)ty; + // TODO: implement + return TYPE_NONE; +} + +bool typeIsSlice(const InternPool* ip, TypeIndex ty) { + (void)ip; + (void)ty; + // TODO: implement + return false; +} diff --git a/stage0/type.h b/stage0/type.h @@ -0,0 +1,18 @@ +// type.h — Type wrapper, ported from src/Type.zig. +#ifndef _ZIG0_TYPE_H__ +#define _ZIG0_TYPE_H__ + +#include "intern_pool.h" +#include <stdbool.h> +#include <stdint.h> + +typedef InternPoolIndex TypeIndex; +#define TYPE_NONE IP_INDEX_NONE + +// --- Function declarations (stubs for now) --- + +uint32_t typeZigTypeTag(const InternPool* ip, TypeIndex ty); +TypeIndex typeChildType(const InternPool* ip, TypeIndex ty); +bool typeIsSlice(const InternPool* ip, TypeIndex ty); + +#endif diff --git a/stage0/value.c b/stage0/value.c @@ -0,0 +1,9 @@ +#include "value.h" + +TypeIndex valueToType(ValueIndex val) { + // In InternPool, type indices and value indices share the same namespace. + // A value that represents a type simply contains the type index. + return val; +} + +ValueIndex valueFromInterned(InternPoolIndex index) { return index; } diff --git a/stage0/value.h b/stage0/value.h @@ -0,0 +1,16 @@ +// value.h — Value wrapper, ported from src/Value.zig. +#ifndef _ZIG0_VALUE_H__ +#define _ZIG0_VALUE_H__ + +#include "intern_pool.h" +#include "type.h" + +typedef InternPoolIndex ValueIndex; +#define VALUE_NONE IP_INDEX_NONE + +// --- Function declarations --- + +TypeIndex valueToType(ValueIndex val); +ValueIndex valueFromInterned(InternPoolIndex index); + +#endif diff --git a/stage0/zig0.c b/stage0/zig0.c @@ -1,7 +1,7 @@ -#include "common.h" - #include "ast.h" #include "astgen.h" +#include "intern_pool.h" +#include "sema.h" #include "zir.h" #include <stdbool.h> @@ -40,6 +40,12 @@ static int zig0Run(const char* program, char** msg) { fprintf(stderr, "zir: %u instructions, %u extra, %u string bytes\n", zir.inst_len, zir.extra_len, zir.string_bytes_len); + InternPool ip = ipInit(); + Sema sema = semaInit(&ip, zir); + Air air = semaAnalyze(&sema); + semaDeinit(&sema); + airDeinit(&air); + ipDeinit(&ip); zirDeinit(&zir); return 0; }