commit 40863676ca97b8b893ac65d83984c15ad66cbecc (tree)
parent 958d1ee942c415a909970bcdcfcc217b8a805127
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Tue, 17 Feb 2026 20:36:25 +0000
stage0: implement first Sema instruction handlers (Phase D)
Add core Sema analysis infrastructure:
- InstMap operations: ensureSpaceForBody, get, put
- resolveInst: maps ZIR refs to AIR refs (pre-interned + inst_map)
- addAirInst: appends AIR instructions with auto-growth
- SemaBlock helpers: init, deinit, blockAddInst
- zirDbgStmt handler with comptime elision and coalescing
Implement analyzeBodyInner dispatch loop handling:
- dbg_stmt, break_inline, ret_implicit, extended (stub),
block_inline (recursive body analysis), declaration (skip)
- Default case maps unhandled instructions to void_type
Update semaAnalyze to set up root block, verify ZIR instruction 0
is struct_decl, exercise dispatch infrastructure, and transfer AIR
array ownership to returned Air struct.
Add smoke test for "fn foo() void {}" declarations.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
2 files changed, 370 insertions(+), 4 deletions(-)
diff --git a/stage0/sema.c b/stage0/sema.c
@@ -1,9 +1,12 @@
#include "sema.h"
+#include <assert.h>
#include <stdlib.h>
#include <string.h>
#define SEMA_AIR_INITIAL_CAP 256
#define SEMA_AIR_EXTRA_INITIAL_CAP 256
+#define SEMA_BLOCK_INITIAL_CAP 64
+#define INST_MAP_INITIAL_CAP 32
Sema semaInit(InternPool* ip, Zir code) {
Sema sema;
@@ -39,10 +42,302 @@ void semaDeinit(Sema* sema) {
sema->inst_map.start = 0;
}
+// --- InstMap operations ---
+// Ported from src/Sema.zig InstMap.
+
+// Ensure the inst_map covers the range needed for the given body of ZIR
+// instruction indices. After this call, instMapPut can safely be called
+// for any instruction index in the body.
+static void instMapEnsureSpaceForBody(
+ InstMap* map, const uint32_t* body, uint32_t body_len) {
+ if (body_len == 0)
+ return;
+
+ // Find min and max instruction indices in the body.
+ uint32_t min_idx = body[0];
+ uint32_t max_idx = body[0];
+ for (uint32_t i = 1; i < body_len; i++) {
+ if (body[i] < min_idx)
+ min_idx = body[i];
+ if (body[i] > max_idx)
+ max_idx = body[i];
+ }
+
+ // Check if current map already covers the range.
+ if (map->items_len > 0 && map->start <= min_idx
+ && max_idx < map->items_len + map->start) {
+ return;
+ }
+
+ uint32_t old_start = (map->items_len == 0) ? min_idx : map->start;
+ uint32_t better_cap = map->items_len;
+ uint32_t better_start = old_start;
+
+ // Grow until the range [better_start, better_start + better_cap) covers
+ // [min_idx, max_idx].
+ while (!(better_start <= min_idx && max_idx < better_cap + better_start)) {
+ uint32_t extra_cap = better_cap / 2 + 16;
+ better_cap += extra_cap;
+ uint32_t half = extra_cap / 2;
+ if (better_start > half)
+ better_start -= half;
+ else
+ better_start = 0;
+ }
+
+ AirInstRef* new_items = calloc(better_cap, sizeof(AirInstRef));
+ if (!new_items)
+ exit(1);
+ // Initialize all to AIR_REF_NONE.
+ for (uint32_t i = 0; i < better_cap; i++)
+ new_items[i] = AIR_REF_NONE;
+
+ // Copy old items into the new buffer at the correct offset.
+ if (map->items_len > 0) {
+ uint32_t start_diff = old_start - better_start;
+ memcpy(&new_items[start_diff], map->items,
+ map->items_len * sizeof(AirInstRef));
+ }
+
+ free(map->items);
+ map->items = new_items;
+ map->items_len = better_cap;
+ map->start = better_start;
+}
+
+static AirInstRef instMapGet(const InstMap* map, uint32_t zir_inst) {
+ if (map->items_len == 0)
+ return AIR_REF_NONE;
+ assert(zir_inst >= map->start);
+ assert(zir_inst < map->start + map->items_len);
+ return map->items[zir_inst - map->start];
+}
+
+static void instMapPut(InstMap* map, uint32_t zir_inst, AirInstRef ref) {
+ assert(zir_inst >= map->start);
+ assert(zir_inst < map->start + map->items_len);
+ map->items[zir_inst - map->start] = ref;
+}
+
+// --- resolveInst ---
+// Ported from src/Sema.zig resolveInst.
+// Maps a ZIR Inst.Ref to an AIR Inst.Ref.
+// - If ref < ZIR_REF_START_INDEX, it's a pre-interned InternPool index;
+// we map it directly with AIR_REF_FROM_IP.
+// - Otherwise, ref - ZIR_REF_START_INDEX is the ZIR instruction index;
+// look it up in inst_map.
+
+static AirInstRef resolveInst(Sema* sema, ZirInstRef zir_ref) {
+ assert(zir_ref != ZIR_REF_NONE);
+ if (zir_ref >= ZIR_REF_START_INDEX) {
+ uint32_t zir_inst = zir_ref - ZIR_REF_START_INDEX;
+ AirInstRef result = instMapGet(&sema->inst_map, zir_inst);
+ assert(result != AIR_REF_NONE);
+ return result;
+ }
+ // First section of indices correspond to pre-interned constants.
+ // We intentionally map the same indices between ZIR and AIR.
+ return AIR_REF_FROM_IP(zir_ref);
+}
+
+// --- addAirInst ---
+// Appends an AIR instruction to sema's output arrays.
+// Returns the instruction index (not the ref).
+
+static uint32_t addAirInst(Sema* sema, AirInstTag inst_tag, AirInstData data) {
+ if (sema->air_inst_len >= sema->air_inst_cap) {
+ uint32_t new_cap = sema->air_inst_cap * 2;
+ AirInstTag* new_tags
+ = realloc(sema->air_inst_tags, new_cap * sizeof(AirInstTag));
+ AirInstData* new_datas
+ = realloc(sema->air_inst_datas, new_cap * sizeof(AirInstData));
+ if (!new_tags || !new_datas)
+ exit(1);
+ sema->air_inst_tags = new_tags;
+ sema->air_inst_datas = new_datas;
+ sema->air_inst_cap = new_cap;
+ }
+ uint32_t idx = sema->air_inst_len;
+ sema->air_inst_tags[idx] = inst_tag;
+ sema->air_inst_datas[idx] = data;
+ sema->air_inst_len++;
+ return idx;
+}
+
+// --- SemaBlock helpers ---
+
+static void semaBlockInit(SemaBlock* block, Sema* sema, SemaBlock* parent) {
+ memset(block, 0, sizeof(*block));
+ block->sema = sema;
+ block->parent = parent;
+ block->instructions = ARR_INIT(uint32_t, SEMA_BLOCK_INITIAL_CAP);
+ block->instructions_cap = SEMA_BLOCK_INITIAL_CAP;
+ block->is_comptime = true; // Module-level analysis is comptime.
+ block->error_return_trace_index = AIR_REF_NONE;
+}
+
+static void semaBlockDeinit(SemaBlock* block) {
+ free(block->instructions);
+ block->instructions = NULL;
+ block->instructions_len = 0;
+ block->instructions_cap = 0;
+}
+
+// Add an AIR instruction to the block's instruction list.
+static AirInstRef blockAddInst(
+ SemaBlock* block, AirInstTag inst_tag, AirInstData data) {
+ Sema* sema = block->sema;
+ uint32_t idx = addAirInst(sema, inst_tag, data);
+
+ // Append to block's instruction list.
+ if (block->instructions_len >= block->instructions_cap) {
+ uint32_t new_cap = block->instructions_cap * 2;
+ uint32_t* new_insts
+ = realloc(block->instructions, new_cap * sizeof(uint32_t));
+ if (!new_insts)
+ exit(1);
+ block->instructions = new_insts;
+ block->instructions_cap = new_cap;
+ }
+ block->instructions[block->instructions_len++] = idx;
+
+ return AIR_REF_FROM_INST(idx);
+}
+
+// --- ZIR instruction handlers ---
+// Ported from src/Sema.zig instruction handlers.
+
+// dbg_stmt: emit AIR_INST_DBG_STMT with line/column from ZIR.
+// Ported from src/Sema.zig zirDbgStmt.
+static void zirDbgStmt(Sema* sema, SemaBlock* block, uint32_t inst) {
+ // In comptime blocks, debug statements are elided.
+ if (block->is_comptime)
+ return;
+
+ uint32_t line = sema->code.inst_datas[inst].dbg_stmt.line;
+ uint32_t column = sema->code.inst_datas[inst].dbg_stmt.column;
+
+ // If the previous instruction in the block was also a dbg_stmt,
+ // replace it instead of adding a new one.
+ if (block->instructions_len != 0) {
+ uint32_t prev_idx = block->instructions[block->instructions_len - 1];
+ if (sema->air_inst_tags[prev_idx] == AIR_INST_DBG_STMT) {
+ sema->air_inst_datas[prev_idx].dbg_stmt.line = line;
+ sema->air_inst_datas[prev_idx].dbg_stmt.column = column;
+ return;
+ }
+ }
+
+ AirInstData data;
+ memset(&data, 0, sizeof(data));
+ data.dbg_stmt.line = line;
+ data.dbg_stmt.column = column;
+ (void)blockAddInst(block, AIR_INST_DBG_STMT, data);
+}
+
+// --- analyzeBodyInner ---
+// Ported from src/Sema.zig analyzeBodyInner.
+// Main dispatch loop: iterates over ZIR instructions in a body and
+// dispatches to the appropriate handler.
+// Returns true if analysis completed normally, false if terminated by
+// a noreturn instruction (break_inline, ret_implicit, etc.).
+
+static bool analyzeBodyInner(
+ Sema* sema, SemaBlock* block, const uint32_t* body, uint32_t body_len) {
+ instMapEnsureSpaceForBody(&sema->inst_map, body, body_len);
+
+ uint32_t i = 0;
+ while (i < body_len) {
+ uint32_t inst = body[i];
+ ZirInstTag inst_tag = sema->code.inst_tags[inst];
+
+ switch (inst_tag) {
+
+ // Instructions that don't produce a ref and don't go into the
+ // map.
+ case ZIR_INST_DBG_STMT:
+ zirDbgStmt(sema, block, inst);
+ i++;
+ continue;
+
+ // break_inline: comptime break — signals end of an inline
+ // block body. Ported from src/Sema.zig.
+ case ZIR_INST_BREAK_INLINE:
+ sema->comptime_break_inst = inst;
+ return false;
+
+ // ret_implicit: implicit return at end of function/block.
+ // For module-level analysis this is effectively a no-op.
+ case ZIR_INST_RET_IMPLICIT:
+ return false;
+
+ // extended: handle extended opcodes.
+ case ZIR_INST_EXTENDED: {
+ // For now, skip all extended opcodes.
+ // struct_decl, enum_decl, etc. need full type machinery.
+ AirInstRef air_ref = AIR_REF_FROM_IP(IP_INDEX_VOID_TYPE);
+ instMapPut(&sema->inst_map, inst, air_ref);
+ i++;
+ continue;
+ }
+
+ // block_inline: analyze inline block body directly.
+ case ZIR_INST_BLOCK_INLINE: {
+ ZirInstData data = sema->code.inst_datas[inst];
+ uint32_t payload_index = data.pl_node.payload_index;
+ // Extra data at payload_index: body_len followed by
+ // body_len instruction indices.
+ uint32_t inner_body_len = sema->code.extra[payload_index];
+ const uint32_t* inner_body = &sema->code.extra[payload_index + 1];
+
+ bool completed
+ = analyzeBodyInner(sema, block, inner_body, inner_body_len);
+ if (!completed) {
+ // The inner body terminated with a break_inline.
+ // The break_inline's operand is the result of this
+ // block.
+ uint32_t break_inst = sema->comptime_break_inst;
+ ZirInstData break_data = sema->code.inst_datas[break_inst];
+ ZirInstRef operand = break_data.break_data.operand;
+ AirInstRef result;
+ if (operand == ZIR_REF_NONE) {
+ result = AIR_REF_FROM_IP(IP_INDEX_VOID_VALUE);
+ } else {
+ result = resolveInst(sema, operand);
+ }
+ instMapPut(&sema->inst_map, inst, result);
+ }
+ i++;
+ continue;
+ }
+
+ // declaration: only appears in container-type declaration
+ // lists, not in analyzed bodies. Skip it.
+ case ZIR_INST_DECLARATION:
+ i++;
+ continue;
+
+ // For all other instructions, produce a void mapping and skip.
+ // As handlers are implemented, they will replace this default.
+ default: {
+ AirInstRef air_ref = AIR_REF_FROM_IP(IP_INDEX_VOID_TYPE);
+ instMapPut(&sema->inst_map, inst, air_ref);
+ i++;
+ continue;
+ }
+ }
+ }
+ return true;
+}
+
+// --- semaAnalyze ---
+// Ported from src/Sema.zig analyzeBodyInner entry point.
+// For the bootstrap, we analyze the main module's ZIR.
+// ZIR instruction 0 is always ZIR_INST_EXTENDED with opcode
+// ZIR_EXT_STRUCT_DECL, representing the root module struct.
+
Air semaAnalyze(Sema* sema) {
- // TODO: implement semantic analysis.
// Exercise utility functions to satisfy cppcheck unusedFunction.
- // These will be properly called once Sema handlers are implemented.
InternPool* ip = sema->ip;
InternPoolKey void_key;
@@ -57,7 +352,47 @@ Air semaAnalyze(Sema* sema) {
(void)typeIsSlice(ip, IP_INDEX_VOID_TYPE);
(void)valueToType(valueFromInterned(IP_INDEX_VOID_VALUE));
+ // Set up a root SemaBlock for module-level analysis.
+ SemaBlock root_block;
+ semaBlockInit(&root_block, sema, NULL);
+
+ // If we have ZIR instructions, attempt to analyze the main struct
+ // body. Instruction 0 is always struct_decl (extended). For the
+ // bootstrap, we don't yet have the type machinery to fully process
+ // struct_decl, so we just exercise the infrastructure.
+ if (sema->code.inst_len > 0) {
+ assert(sema->code.inst_tags[0] == ZIR_INST_EXTENDED);
+ assert(
+ sema->code.inst_datas[0].extended.opcode == ZIR_EXT_STRUCT_DECL);
+
+ // Analyze instruction 0 through analyzeBodyInner to exercise
+ // the dispatch path.
+ uint32_t single_inst = 0;
+ (void)analyzeBodyInner(sema, &root_block, &single_inst, 1);
+ }
+
+ semaBlockDeinit(&root_block);
+
+ // Build the output Air from sema's arrays.
+ // Transfer ownership of the instruction arrays to Air.
Air air;
memset(&air, 0, sizeof(air));
+ air.inst_tags = sema->air_inst_tags;
+ air.inst_datas = sema->air_inst_datas;
+ air.inst_len = sema->air_inst_len;
+ air.inst_cap = sema->air_inst_cap;
+ air.extra = sema->air_extra;
+ air.extra_len = sema->air_extra_len;
+ air.extra_cap = sema->air_extra_cap;
+
+ // Null out sema's arrays so semaDeinit won't double-free.
+ sema->air_inst_tags = NULL;
+ sema->air_inst_datas = NULL;
+ sema->air_inst_len = 0;
+ sema->air_inst_cap = 0;
+ sema->air_extra = NULL;
+ sema->air_extra_len = 0;
+ sema->air_extra_cap = 0;
+
return air;
}
diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig
@@ -186,8 +186,16 @@ test "sema: empty source smoke test" {
var air = c.semaAnalyze(&sema);
defer c.airDeinit(&air);
- // For now, just verify it doesn't crash.
- // Once Sema handlers are implemented, we'll compare with Zig reference.
+ // semaAnalyze transfers ownership of AIR arrays to Air.
+ // Verify the Air struct has valid (non-null) arrays.
+ try std.testing.expect(air.inst_tags != null);
+ try std.testing.expect(air.inst_datas != null);
+ try std.testing.expect(air.extra != null);
+
+ // After transfer, sema's arrays should be nulled out.
+ try std.testing.expect(sema.air_inst_tags == null);
+ try std.testing.expect(sema.air_inst_datas == null);
+ try std.testing.expect(sema.air_extra == null);
}
test "sema: const x = 0 smoke test" {
@@ -204,4 +212,27 @@ test "sema: const x = 0 smoke test" {
defer c.semaDeinit(&sema);
var air = c.semaAnalyze(&sema);
defer c.airDeinit(&air);
+
+ // Verify Air arrays are properly transferred.
+ try std.testing.expect(air.inst_tags != null);
+ try std.testing.expect(air.inst_datas != null);
+}
+
+test "sema: function decl smoke test" {
+ const source: [:0]const u8 = "fn foo() void {}";
+
+ var c_ast = c.astParse(source.ptr, @intCast(source.len));
+ defer c.astDeinit(&c_ast);
+ var c_zir = c.astGen(&c_ast);
+ defer c.zirDeinit(&c_zir);
+
+ var ip = c.ipInit();
+ defer c.ipDeinit(&ip);
+ var sema = c.semaInit(&ip, c_zir);
+ defer c.semaDeinit(&sema);
+ var air = c.semaAnalyze(&sema);
+ defer c.airDeinit(&air);
+
+ // Verify no crash with function declarations.
+ try std.testing.expect(air.inst_tags != null);
}