zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 40863676ca97b8b893ac65d83984c15ad66cbecc (tree)
parent 958d1ee942c415a909970bcdcfcc217b8a805127
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date:   Tue, 17 Feb 2026 20:36:25 +0000

stage0: implement first Sema instruction handlers (Phase D)

Add core Sema analysis infrastructure:
- InstMap operations: ensureSpaceForBody, get, put
- resolveInst: maps ZIR refs to AIR refs (pre-interned + inst_map)
- addAirInst: appends AIR instructions with auto-growth
- SemaBlock helpers: init, deinit, blockAddInst
- zirDbgStmt handler with comptime elision and coalescing

Implement analyzeBodyInner dispatch loop handling:
- dbg_stmt, break_inline, ret_implicit, extended (stub),
  block_inline (recursive body analysis), declaration (skip)
- Default case maps unhandled instructions to void_type

Update semaAnalyze to set up root block, verify ZIR instruction 0
is struct_decl, exercise dispatch infrastructure, and transfer AIR
array ownership to returned Air struct.

Add smoke test for "fn foo() void {}" declarations.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mstage0/sema.c | 339++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mstage0/sema_test.zig | 35+++++++++++++++++++++++++++++++++--
2 files changed, 370 insertions(+), 4 deletions(-)

diff --git a/stage0/sema.c b/stage0/sema.c @@ -1,9 +1,12 @@ #include "sema.h" +#include <assert.h> #include <stdlib.h> #include <string.h> #define SEMA_AIR_INITIAL_CAP 256 #define SEMA_AIR_EXTRA_INITIAL_CAP 256 +#define SEMA_BLOCK_INITIAL_CAP 64 +#define INST_MAP_INITIAL_CAP 32 Sema semaInit(InternPool* ip, Zir code) { Sema sema; @@ -39,10 +42,302 @@ void semaDeinit(Sema* sema) { sema->inst_map.start = 0; } +// --- InstMap operations --- +// Ported from src/Sema.zig InstMap. + +// Ensure the inst_map covers the range needed for the given body of ZIR +// instruction indices. After this call, instMapPut can safely be called +// for any instruction index in the body. +static void instMapEnsureSpaceForBody( + InstMap* map, const uint32_t* body, uint32_t body_len) { + if (body_len == 0) + return; + + // Find min and max instruction indices in the body. + uint32_t min_idx = body[0]; + uint32_t max_idx = body[0]; + for (uint32_t i = 1; i < body_len; i++) { + if (body[i] < min_idx) + min_idx = body[i]; + if (body[i] > max_idx) + max_idx = body[i]; + } + + // Check if current map already covers the range. + if (map->items_len > 0 && map->start <= min_idx + && max_idx < map->items_len + map->start) { + return; + } + + uint32_t old_start = (map->items_len == 0) ? min_idx : map->start; + uint32_t better_cap = map->items_len; + uint32_t better_start = old_start; + + // Grow until the range [better_start, better_start + better_cap) covers + // [min_idx, max_idx]. + while (!(better_start <= min_idx && max_idx < better_cap + better_start)) { + uint32_t extra_cap = better_cap / 2 + 16; + better_cap += extra_cap; + uint32_t half = extra_cap / 2; + if (better_start > half) + better_start -= half; + else + better_start = 0; + } + + AirInstRef* new_items = calloc(better_cap, sizeof(AirInstRef)); + if (!new_items) + exit(1); + // Initialize all to AIR_REF_NONE. + for (uint32_t i = 0; i < better_cap; i++) + new_items[i] = AIR_REF_NONE; + + // Copy old items into the new buffer at the correct offset. + if (map->items_len > 0) { + uint32_t start_diff = old_start - better_start; + memcpy(&new_items[start_diff], map->items, + map->items_len * sizeof(AirInstRef)); + } + + free(map->items); + map->items = new_items; + map->items_len = better_cap; + map->start = better_start; +} + +static AirInstRef instMapGet(const InstMap* map, uint32_t zir_inst) { + if (map->items_len == 0) + return AIR_REF_NONE; + assert(zir_inst >= map->start); + assert(zir_inst < map->start + map->items_len); + return map->items[zir_inst - map->start]; +} + +static void instMapPut(InstMap* map, uint32_t zir_inst, AirInstRef ref) { + assert(zir_inst >= map->start); + assert(zir_inst < map->start + map->items_len); + map->items[zir_inst - map->start] = ref; +} + +// --- resolveInst --- +// Ported from src/Sema.zig resolveInst. +// Maps a ZIR Inst.Ref to an AIR Inst.Ref. +// - If ref < ZIR_REF_START_INDEX, it's a pre-interned InternPool index; +// we map it directly with AIR_REF_FROM_IP. +// - Otherwise, ref - ZIR_REF_START_INDEX is the ZIR instruction index; +// look it up in inst_map. + +static AirInstRef resolveInst(Sema* sema, ZirInstRef zir_ref) { + assert(zir_ref != ZIR_REF_NONE); + if (zir_ref >= ZIR_REF_START_INDEX) { + uint32_t zir_inst = zir_ref - ZIR_REF_START_INDEX; + AirInstRef result = instMapGet(&sema->inst_map, zir_inst); + assert(result != AIR_REF_NONE); + return result; + } + // First section of indices correspond to pre-interned constants. + // We intentionally map the same indices between ZIR and AIR. + return AIR_REF_FROM_IP(zir_ref); +} + +// --- addAirInst --- +// Appends an AIR instruction to sema's output arrays. +// Returns the instruction index (not the ref). + +static uint32_t addAirInst(Sema* sema, AirInstTag inst_tag, AirInstData data) { + if (sema->air_inst_len >= sema->air_inst_cap) { + uint32_t new_cap = sema->air_inst_cap * 2; + AirInstTag* new_tags + = realloc(sema->air_inst_tags, new_cap * sizeof(AirInstTag)); + AirInstData* new_datas + = realloc(sema->air_inst_datas, new_cap * sizeof(AirInstData)); + if (!new_tags || !new_datas) + exit(1); + sema->air_inst_tags = new_tags; + sema->air_inst_datas = new_datas; + sema->air_inst_cap = new_cap; + } + uint32_t idx = sema->air_inst_len; + sema->air_inst_tags[idx] = inst_tag; + sema->air_inst_datas[idx] = data; + sema->air_inst_len++; + return idx; +} + +// --- SemaBlock helpers --- + +static void semaBlockInit(SemaBlock* block, Sema* sema, SemaBlock* parent) { + memset(block, 0, sizeof(*block)); + block->sema = sema; + block->parent = parent; + block->instructions = ARR_INIT(uint32_t, SEMA_BLOCK_INITIAL_CAP); + block->instructions_cap = SEMA_BLOCK_INITIAL_CAP; + block->is_comptime = true; // Module-level analysis is comptime. + block->error_return_trace_index = AIR_REF_NONE; +} + +static void semaBlockDeinit(SemaBlock* block) { + free(block->instructions); + block->instructions = NULL; + block->instructions_len = 0; + block->instructions_cap = 0; +} + +// Add an AIR instruction to the block's instruction list. +static AirInstRef blockAddInst( + SemaBlock* block, AirInstTag inst_tag, AirInstData data) { + Sema* sema = block->sema; + uint32_t idx = addAirInst(sema, inst_tag, data); + + // Append to block's instruction list. + if (block->instructions_len >= block->instructions_cap) { + uint32_t new_cap = block->instructions_cap * 2; + uint32_t* new_insts + = realloc(block->instructions, new_cap * sizeof(uint32_t)); + if (!new_insts) + exit(1); + block->instructions = new_insts; + block->instructions_cap = new_cap; + } + block->instructions[block->instructions_len++] = idx; + + return AIR_REF_FROM_INST(idx); +} + +// --- ZIR instruction handlers --- +// Ported from src/Sema.zig instruction handlers. + +// dbg_stmt: emit AIR_INST_DBG_STMT with line/column from ZIR. +// Ported from src/Sema.zig zirDbgStmt. +static void zirDbgStmt(Sema* sema, SemaBlock* block, uint32_t inst) { + // In comptime blocks, debug statements are elided. + if (block->is_comptime) + return; + + uint32_t line = sema->code.inst_datas[inst].dbg_stmt.line; + uint32_t column = sema->code.inst_datas[inst].dbg_stmt.column; + + // If the previous instruction in the block was also a dbg_stmt, + // replace it instead of adding a new one. + if (block->instructions_len != 0) { + uint32_t prev_idx = block->instructions[block->instructions_len - 1]; + if (sema->air_inst_tags[prev_idx] == AIR_INST_DBG_STMT) { + sema->air_inst_datas[prev_idx].dbg_stmt.line = line; + sema->air_inst_datas[prev_idx].dbg_stmt.column = column; + return; + } + } + + AirInstData data; + memset(&data, 0, sizeof(data)); + data.dbg_stmt.line = line; + data.dbg_stmt.column = column; + (void)blockAddInst(block, AIR_INST_DBG_STMT, data); +} + +// --- analyzeBodyInner --- +// Ported from src/Sema.zig analyzeBodyInner. +// Main dispatch loop: iterates over ZIR instructions in a body and +// dispatches to the appropriate handler. +// Returns true if analysis completed normally, false if terminated by +// a noreturn instruction (break_inline, ret_implicit, etc.). + +static bool analyzeBodyInner( + Sema* sema, SemaBlock* block, const uint32_t* body, uint32_t body_len) { + instMapEnsureSpaceForBody(&sema->inst_map, body, body_len); + + uint32_t i = 0; + while (i < body_len) { + uint32_t inst = body[i]; + ZirInstTag inst_tag = sema->code.inst_tags[inst]; + + switch (inst_tag) { + + // Instructions that don't produce a ref and don't go into the + // map. + case ZIR_INST_DBG_STMT: + zirDbgStmt(sema, block, inst); + i++; + continue; + + // break_inline: comptime break — signals end of an inline + // block body. Ported from src/Sema.zig. + case ZIR_INST_BREAK_INLINE: + sema->comptime_break_inst = inst; + return false; + + // ret_implicit: implicit return at end of function/block. + // For module-level analysis this is effectively a no-op. + case ZIR_INST_RET_IMPLICIT: + return false; + + // extended: handle extended opcodes. + case ZIR_INST_EXTENDED: { + // For now, skip all extended opcodes. + // struct_decl, enum_decl, etc. need full type machinery. + AirInstRef air_ref = AIR_REF_FROM_IP(IP_INDEX_VOID_TYPE); + instMapPut(&sema->inst_map, inst, air_ref); + i++; + continue; + } + + // block_inline: analyze inline block body directly. + case ZIR_INST_BLOCK_INLINE: { + ZirInstData data = sema->code.inst_datas[inst]; + uint32_t payload_index = data.pl_node.payload_index; + // Extra data at payload_index: body_len followed by + // body_len instruction indices. + uint32_t inner_body_len = sema->code.extra[payload_index]; + const uint32_t* inner_body = &sema->code.extra[payload_index + 1]; + + bool completed + = analyzeBodyInner(sema, block, inner_body, inner_body_len); + if (!completed) { + // The inner body terminated with a break_inline. + // The break_inline's operand is the result of this + // block. + uint32_t break_inst = sema->comptime_break_inst; + ZirInstData break_data = sema->code.inst_datas[break_inst]; + ZirInstRef operand = break_data.break_data.operand; + AirInstRef result; + if (operand == ZIR_REF_NONE) { + result = AIR_REF_FROM_IP(IP_INDEX_VOID_VALUE); + } else { + result = resolveInst(sema, operand); + } + instMapPut(&sema->inst_map, inst, result); + } + i++; + continue; + } + + // declaration: only appears in container-type declaration + // lists, not in analyzed bodies. Skip it. + case ZIR_INST_DECLARATION: + i++; + continue; + + // For all other instructions, produce a void mapping and skip. + // As handlers are implemented, they will replace this default. + default: { + AirInstRef air_ref = AIR_REF_FROM_IP(IP_INDEX_VOID_TYPE); + instMapPut(&sema->inst_map, inst, air_ref); + i++; + continue; + } + } + } + return true; +} + +// --- semaAnalyze --- +// Ported from src/Sema.zig analyzeBodyInner entry point. +// For the bootstrap, we analyze the main module's ZIR. +// ZIR instruction 0 is always ZIR_INST_EXTENDED with opcode +// ZIR_EXT_STRUCT_DECL, representing the root module struct. + Air semaAnalyze(Sema* sema) { - // TODO: implement semantic analysis. // Exercise utility functions to satisfy cppcheck unusedFunction. - // These will be properly called once Sema handlers are implemented. InternPool* ip = sema->ip; InternPoolKey void_key; @@ -57,7 +352,47 @@ Air semaAnalyze(Sema* sema) { (void)typeIsSlice(ip, IP_INDEX_VOID_TYPE); (void)valueToType(valueFromInterned(IP_INDEX_VOID_VALUE)); + // Set up a root SemaBlock for module-level analysis. + SemaBlock root_block; + semaBlockInit(&root_block, sema, NULL); + + // If we have ZIR instructions, attempt to analyze the main struct + // body. Instruction 0 is always struct_decl (extended). For the + // bootstrap, we don't yet have the type machinery to fully process + // struct_decl, so we just exercise the infrastructure. + if (sema->code.inst_len > 0) { + assert(sema->code.inst_tags[0] == ZIR_INST_EXTENDED); + assert( + sema->code.inst_datas[0].extended.opcode == ZIR_EXT_STRUCT_DECL); + + // Analyze instruction 0 through analyzeBodyInner to exercise + // the dispatch path. + uint32_t single_inst = 0; + (void)analyzeBodyInner(sema, &root_block, &single_inst, 1); + } + + semaBlockDeinit(&root_block); + + // Build the output Air from sema's arrays. + // Transfer ownership of the instruction arrays to Air. Air air; memset(&air, 0, sizeof(air)); + air.inst_tags = sema->air_inst_tags; + air.inst_datas = sema->air_inst_datas; + air.inst_len = sema->air_inst_len; + air.inst_cap = sema->air_inst_cap; + air.extra = sema->air_extra; + air.extra_len = sema->air_extra_len; + air.extra_cap = sema->air_extra_cap; + + // Null out sema's arrays so semaDeinit won't double-free. + sema->air_inst_tags = NULL; + sema->air_inst_datas = NULL; + sema->air_inst_len = 0; + sema->air_inst_cap = 0; + sema->air_extra = NULL; + sema->air_extra_len = 0; + sema->air_extra_cap = 0; + return air; } diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig @@ -186,8 +186,16 @@ test "sema: empty source smoke test" { var air = c.semaAnalyze(&sema); defer c.airDeinit(&air); - // For now, just verify it doesn't crash. - // Once Sema handlers are implemented, we'll compare with Zig reference. + // semaAnalyze transfers ownership of AIR arrays to Air. + // Verify the Air struct has valid (non-null) arrays. + try std.testing.expect(air.inst_tags != null); + try std.testing.expect(air.inst_datas != null); + try std.testing.expect(air.extra != null); + + // After transfer, sema's arrays should be nulled out. + try std.testing.expect(sema.air_inst_tags == null); + try std.testing.expect(sema.air_inst_datas == null); + try std.testing.expect(sema.air_extra == null); } test "sema: const x = 0 smoke test" { @@ -204,4 +212,27 @@ test "sema: const x = 0 smoke test" { defer c.semaDeinit(&sema); var air = c.semaAnalyze(&sema); defer c.airDeinit(&air); + + // Verify Air arrays are properly transferred. + try std.testing.expect(air.inst_tags != null); + try std.testing.expect(air.inst_datas != null); +} + +test "sema: function decl smoke test" { + const source: [:0]const u8 = "fn foo() void {}"; + + var c_ast = c.astParse(source.ptr, @intCast(source.len)); + defer c.astDeinit(&c_ast); + var c_zir = c.astGen(&c_ast); + defer c.zirDeinit(&c_zir); + + var ip = c.ipInit(); + defer c.ipDeinit(&ip); + var sema = c.semaInit(&ip, c_zir); + defer c.semaDeinit(&sema); + var air = c.semaAnalyze(&sema); + defer c.airDeinit(&air); + + // Verify no crash with function declarations. + try std.testing.expect(air.inst_tags != null); }