commit d14607caefaef26de5c2ec124ad20025bccd0e63 (tree)
parent fbfecf51da19f4f99df525a9cd1ed8dfc355f936
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Fri, 20 Feb 2026 09:48:45 +0000
sema: port ret_node and fix AIR tag storage type
Three issues fixed to enable the "return integer" test
(export fn f() u32 { return 42; }):
1. AIR tag storage: AirInstTag was a C enum (4 bytes) but the Zig-side
CAir expects u8 (matching Air.Inst.Tag = enum(u8)). The byte-for-byte
comparison read padding bytes instead of subsequent tags. Fixed by
storing tags as uint8_t in Air and Sema structs.
2. New ZIR_INST_RET_NODE handler: resolves the operand, coerces from
comptime_int to the function return type via coerceIntRef, and emits
AIR_INST_RET.
3. Return type resolution in zirFunc: reads the actual return type from
ZIR extra data (ret_ty_body_len == 1) instead of hardcoding void.
Also rewrites the AIR datas comparison to be tag-aware with canonical
ref renumbering, so structurally equivalent AIR compares equal even
when InternPool indices differ between C and Zig.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
4 files changed, 198 insertions(+), 22 deletions(-)
diff --git a/stage0/air.h b/stage0/air.h
@@ -293,7 +293,7 @@ typedef union {
typedef struct {
uint32_t inst_len;
uint32_t inst_cap;
- AirInstTag* inst_tags;
+ uint8_t* inst_tags; // AirInstTag stored as uint8_t (matches Zig Air.Inst.Tag = u8)
AirInstData* inst_datas;
uint32_t extra_len;
uint32_t extra_cap;
diff --git a/stage0/sema.c b/stage0/sema.c
@@ -13,7 +13,7 @@ Sema semaInit(InternPool* ip, Zir code) {
memset(&sema, 0, sizeof(sema));
sema.ip = ip;
sema.code = code;
- sema.air_inst_tags = ARR_INIT(AirInstTag, SEMA_AIR_INITIAL_CAP);
+ sema.air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP);
sema.air_inst_cap = SEMA_AIR_INITIAL_CAP;
sema.air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
sema.air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP);
@@ -147,8 +147,8 @@ static AirInstRef resolveInst(Sema* sema, ZirInstRef zir_ref) {
static uint32_t addAirInst(Sema* sema, AirInstTag inst_tag, AirInstData data) {
if (sema->air_inst_len >= sema->air_inst_cap) {
uint32_t new_cap = sema->air_inst_cap * 2;
- AirInstTag* new_tags
- = realloc(sema->air_inst_tags, new_cap * sizeof(AirInstTag));
+ uint8_t* new_tags
+ = realloc(sema->air_inst_tags, new_cap * sizeof(uint8_t));
AirInstData* new_datas
= realloc(sema->air_inst_datas, new_cap * sizeof(AirInstData));
if (!new_tags || !new_datas)
@@ -158,7 +158,7 @@ static uint32_t addAirInst(Sema* sema, AirInstTag inst_tag, AirInstData data) {
sema->air_inst_cap = new_cap;
}
uint32_t idx = sema->air_inst_len;
- sema->air_inst_tags[idx] = inst_tag;
+ sema->air_inst_tags[idx] = (uint8_t)inst_tag;
sema->air_inst_datas[idx] = data;
sema->air_inst_len++;
return idx;
@@ -313,6 +313,31 @@ static bool declIdIsExport(uint32_t id) {
static bool analyzeBodyInner(
Sema* sema, SemaBlock* block, const uint32_t* body, uint32_t body_len);
+// coerceIntRef: coerce an integer AIR ref to a target type.
+// If the ref points to a comptime_int IP entry and the target type
+// is a concrete integer type, creates a new IP entry with that type.
+// Ported from InternPool.getCoercedInts (src/InternPool.zig:10947).
+static AirInstRef coerceIntRef(
+ Sema* sema, AirInstRef air_ref, TypeIndex target_ty) {
+ // Extract IP index from the AIR ref.
+ uint32_t ip_idx = air_ref; // AIR_REF_FROM_IP is identity
+ InternPoolKey key = ipIndexToKey(sema->ip, ip_idx);
+ if (key.tag != IP_KEY_INT)
+ return air_ref; // not an int — no coercion needed
+ if (key.data.int_val.ty == target_ty)
+ return air_ref; // already the right type
+
+ // Create a new IP entry with the target type but same value.
+ InternPoolKey new_key;
+ memset(&new_key, 0, sizeof(new_key));
+ new_key.tag = IP_KEY_INT;
+ new_key.data.int_val.ty = target_ty;
+ new_key.data.int_val.value = key.data.int_val.value;
+ new_key.data.int_val.is_negative = key.data.int_val.is_negative;
+ uint32_t new_ip_idx = ipIntern(sema->ip, new_key);
+ return AIR_REF_FROM_IP(new_ip_idx);
+}
+
// zirInt: intern a comptime integer value.
// Ported from src/Sema.zig zirInt.
static void zirInt(Sema* sema, uint32_t inst) {
@@ -357,7 +382,7 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
return;
// --- Save the current AIR state ---
- AirInstTag* saved_tags = sema->air_inst_tags;
+ uint8_t* saved_tags = sema->air_inst_tags;
AirInstData* saved_datas = sema->air_inst_datas;
uint32_t saved_inst_len = sema->air_inst_len;
uint32_t saved_inst_cap = sema->air_inst_cap;
@@ -368,7 +393,7 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
TypeIndex saved_fn_ret_ty = sema->fn_ret_ty;
// --- Set up fresh AIR arrays for the function body ---
- sema->air_inst_tags = ARR_INIT(AirInstTag, SEMA_AIR_INITIAL_CAP);
+ sema->air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP);
sema->air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
sema->air_inst_cap = SEMA_AIR_INITIAL_CAP;
sema->air_inst_len = 0;
@@ -380,9 +405,20 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
// Reserve extra[0] for main_block index (Air.ExtraIndex.main_block).
addAirExtra(sema, 0);
- // Set function return type to void (for ret_implicit).
- // ret_ty_body_len == 0 means void.
- sema->fn_ret_ty = IP_INDEX_VOID_TYPE;
+ // Resolve the function return type.
+ // Ported from src/Sema.zig zirFunc (lines 8869-8885).
+ if (ret_ty_body_len == 0) {
+ sema->fn_ret_ty = IP_INDEX_VOID_TYPE;
+ } else if (ret_ty_body_len == 1) {
+ // Single ref at payload_index + 3 (already read past by extra_index).
+ ZirInstRef ret_ty_ref = sema->code.extra[payload_index + 3];
+ // For pre-interned refs, the ZIR ref == IP index.
+ assert(ret_ty_ref < ZIR_REF_START_INDEX);
+ sema->fn_ret_ty = ret_ty_ref;
+ } else {
+ // Multi-instruction return type body — not yet supported.
+ sema->fn_ret_ty = IP_INDEX_VOID_TYPE;
+ }
// --- Set up block for function body ---
SemaBlock fn_block;
@@ -613,6 +649,22 @@ static bool analyzeBodyInner(
}
return false;
+ // ret_node: explicit `return <expr>;`.
+ // Ported from src/Sema.zig zirRetNode / analyzeRet.
+ case ZIR_INST_RET_NODE: {
+ ZirInstRef operand_ref
+ = sema->code.inst_datas[inst].un_node.operand;
+ AirInstRef operand = resolveInst(sema, operand_ref);
+ // Coerce the operand to the function return type.
+ operand = coerceIntRef(sema, operand, sema->fn_ret_ty);
+ AirInstData ret_data;
+ memset(&ret_data, 0, sizeof(ret_data));
+ ret_data.un_op.operand = operand;
+ // ReleaseFast: use ret, not ret_safe (no safety checks).
+ (void)blockAddInst(block, AIR_INST_RET, ret_data);
+ return false;
+ }
+
// func: function declaration.
// Ported from src/Sema.zig zirFunc.
case ZIR_INST_FUNC:
diff --git a/stage0/sema.h b/stage0/sema.h
@@ -126,7 +126,7 @@ typedef struct {
typedef struct Sema {
InternPool* ip;
Zir code;
- AirInstTag* air_inst_tags;
+ uint8_t* air_inst_tags; // AirInstTag stored as uint8_t (matches Zig u8)
AirInstData* air_inst_datas;
uint32_t air_inst_len;
uint32_t air_inst_cap;
diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig
@@ -303,6 +303,90 @@ fn cToOpt(comptime T: type, ptr: [*c]T) ?[*]const T {
return if (ptr == null) null else @ptrCast(ptr);
}
+/// Canonicalize an AIR Ref for comparison. Inst refs (bit 31 set)
+/// and the special NONE sentinel are returned as-is. IP refs (bit 31
+/// clear) are assigned a sequential canonical ID via the map, in
+/// order of first appearance, so that two AIR streams that intern
+/// the same values in the same order produce identical canonical IDs
+/// even when the raw InternPool indices differ.
+fn canonicalizeRef(
+ ref: u32,
+ map: *std.AutoHashMap(u32, u32),
+ next_id: *u32,
+) u32 {
+ if (ref == 0xFFFFFFFF) return ref; // AIR_REF_NONE
+ if ((ref >> 31) != 0) return ref; // Inst ref — keep as-is
+ // IP ref — canonicalize.
+ const gop = map.getOrPut(ref) catch unreachable;
+ if (!gop.found_existing) {
+ gop.value_ptr.* = next_id.*;
+ next_id.* += 1;
+ }
+ return gop.value_ptr.*;
+}
+
+/// Return which of the two 4-byte slots in Air.Inst.Data are Refs
+/// for a given AIR instruction tag. [0] = bytes [0:4], [1] = bytes
+/// [4:8]. Non-ref slots (line/column, payload indices, padding)
+/// are compared directly.
+fn airDataRefSlots(tag_val: u8) [2]bool {
+ return switch (tag_val) {
+ // no_op: no meaningful data
+ c.AIR_INST_RET_ADDR, c.AIR_INST_FRAME_ADDR => .{ false, false },
+ // dbg_stmt: line(u32) + column(u32)
+ c.AIR_INST_DBG_STMT, c.AIR_INST_DBG_EMPTY_STMT => .{ false, false },
+ // un_op: operand(Ref) + pad
+ c.AIR_INST_RET, c.AIR_INST_RET_SAFE, c.AIR_INST_RET_LOAD,
+ c.AIR_INST_NOT, c.AIR_INST_BITCAST, c.AIR_INST_LOAD,
+ c.AIR_INST_NEG, c.AIR_INST_NEG_OPTIMIZED,
+ c.AIR_INST_IS_NULL, c.AIR_INST_IS_NON_NULL,
+ c.AIR_INST_IS_NULL_PTR, c.AIR_INST_IS_NON_NULL_PTR,
+ c.AIR_INST_IS_ERR, c.AIR_INST_IS_NON_ERR,
+ c.AIR_INST_IS_ERR_PTR, c.AIR_INST_IS_NON_ERR_PTR,
+ c.AIR_INST_SQRT, c.AIR_INST_SIN, c.AIR_INST_COS,
+ c.AIR_INST_TAN, c.AIR_INST_EXP, c.AIR_INST_EXP2,
+ c.AIR_INST_LOG, c.AIR_INST_LOG2, c.AIR_INST_LOG10,
+ c.AIR_INST_FLOOR, c.AIR_INST_CEIL, c.AIR_INST_ROUND,
+ c.AIR_INST_TRUNC_FLOAT,
+ c.AIR_INST_IS_NAMED_ENUM_VALUE, c.AIR_INST_TAG_NAME,
+ c.AIR_INST_ERROR_NAME, c.AIR_INST_CMP_LT_ERRORS_LEN,
+ c.AIR_INST_C_VA_END,
+ => .{ true, false },
+ // ty: type(Ref) + pad
+ c.AIR_INST_ALLOC, c.AIR_INST_RET_PTR,
+ c.AIR_INST_C_VA_START,
+ => .{ true, false },
+ // bin_op: lhs(Ref) + rhs(Ref)
+ c.AIR_INST_ADD, c.AIR_INST_ADD_SAFE, c.AIR_INST_ADD_OPTIMIZED,
+ c.AIR_INST_SUB, c.AIR_INST_SUB_SAFE, c.AIR_INST_SUB_OPTIMIZED,
+ c.AIR_INST_MUL, c.AIR_INST_MUL_SAFE, c.AIR_INST_MUL_OPTIMIZED,
+ c.AIR_INST_BOOL_AND, c.AIR_INST_BOOL_OR,
+ c.AIR_INST_STORE, c.AIR_INST_STORE_SAFE,
+ c.AIR_INST_BIT_AND, c.AIR_INST_BIT_OR, c.AIR_INST_XOR,
+ c.AIR_INST_CMP_LT, c.AIR_INST_CMP_LTE,
+ c.AIR_INST_CMP_EQ, c.AIR_INST_CMP_GTE,
+ c.AIR_INST_CMP_GT, c.AIR_INST_CMP_NEQ,
+ => .{ true, true },
+ // ty_op: type(Ref) + operand(Ref)
+ c.AIR_INST_INTCAST, c.AIR_INST_INTCAST_SAFE,
+ c.AIR_INST_TRUNC, c.AIR_INST_FPTRUNC, c.AIR_INST_FPEXT,
+ c.AIR_INST_OPTIONAL_PAYLOAD, c.AIR_INST_OPTIONAL_PAYLOAD_PTR,
+ c.AIR_INST_WRAP_OPTIONAL,
+ c.AIR_INST_UNWRAP_ERRUNION_PAYLOAD,
+ c.AIR_INST_UNWRAP_ERRUNION_ERR,
+ c.AIR_INST_WRAP_ERRUNION_PAYLOAD,
+ c.AIR_INST_WRAP_ERRUNION_ERR,
+ c.AIR_INST_ARRAY_TO_SLICE,
+ => .{ true, true },
+ // arg: type(Ref) + zir_param_index(u32)
+ c.AIR_INST_ARG => .{ true, false },
+ // Default: assume no refs (compare directly).
+ // If a tag with refs is missed, the comparison will fail
+ // and we add it here.
+ else => .{ false, false },
+ };
+}
+
fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !void {
if (zig_air.inst_len != c_air.inst_len) {
std.debug.print("'{s}': inst_len mismatch: zig={d} c={d}\n", .{ name, zig_air.inst_len, c_air.inst_len });
@@ -312,23 +396,31 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !
// Tags
if (inst_len > 0) {
- const zig_tags: [*]const u8 = @ptrCast(cToOpt(c.AirInstTag, zig_air.inst_tags) orelse {
+ const zig_tags: [*]const u8 = cToOpt(u8, zig_air.inst_tags) orelse {
std.debug.print("'{s}': Zig inst_tags is null but inst_len={d}\n", .{ name, inst_len });
return error.AirMismatch;
- });
- const c_tags: [*]const u8 = @ptrCast(cToOpt(c.AirInstTag, c_air.inst_tags) orelse {
+ };
+ const c_tags: [*]const u8 = cToOpt(u8, c_air.inst_tags) orelse {
std.debug.print("'{s}': C inst_tags is null but inst_len={d}\n", .{ name, inst_len });
return error.AirMismatch;
- });
+ };
if (!std.mem.eql(u8, zig_tags[0..inst_len], c_tags[0..inst_len])) {
- std.debug.print("'{s}': tags mismatch (inst_len={d})\n", .{ name, inst_len });
+ std.debug.print("'{s}': tags mismatch (inst_len={d}):", .{ name, inst_len });
+ for (0..inst_len) |j| {
+ std.debug.print(" zig[{d}]={d} c[{d}]={d}", .{ j, zig_tags[j], j, c_tags[j] });
+ }
+ std.debug.print("\n", .{});
return error.AirMismatch;
}
}
- // Datas (8 bytes per instruction)
+ // Datas (8 bytes per instruction, tag-aware comparison)
+ // IP refs may differ between C and Zig InternPools, so we use
+ // canonical renumbering: each unique IP ref gets a sequential ID
+ // in order of first appearance. Inst refs (bit 31 set) and
+ // non-ref fields are compared directly.
if (inst_len > 0) {
- const byte_len = inst_len * 8;
+ const zig_tags: [*]const u8 = cToOpt(u8, zig_air.inst_tags) orelse unreachable;
const zig_datas: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, zig_air.inst_datas) orelse {
std.debug.print("'{s}': Zig inst_datas is null but inst_len={d}\n", .{ name, inst_len });
return error.AirMismatch;
@@ -337,9 +429,41 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !
std.debug.print("'{s}': C inst_datas is null but inst_len={d}\n", .{ name, inst_len });
return error.AirMismatch;
});
- if (!std.mem.eql(u8, zig_datas[0..byte_len], c_datas[0..byte_len])) {
- std.debug.print("'{s}': datas mismatch (inst_len={d})\n", .{ name, inst_len });
- return error.AirMismatch;
+ var zig_ref_map = std.AutoHashMap(u32, u32).init(std.testing.allocator);
+ defer zig_ref_map.deinit();
+ var c_ref_map = std.AutoHashMap(u32, u32).init(std.testing.allocator);
+ defer c_ref_map.deinit();
+ var next_zig_id: u32 = 0;
+ var next_c_id: u32 = 0;
+
+ for (0..inst_len) |j| {
+ const off = j * 8;
+ const tag_val = zig_tags[j];
+ // Determine which 4-byte slots are refs based on the tag.
+ // Slot 0 = bytes [0:4], Slot 1 = bytes [4:8].
+ const ref_slots = airDataRefSlots(tag_val);
+
+ for (0..2) |slot| {
+ const s = off + slot * 4;
+ const zig_word = std.mem.readInt(u32, zig_datas[s..][0..4], .little);
+ const c_word = std.mem.readInt(u32, c_datas[s..][0..4], .little);
+
+ if (ref_slots[slot]) {
+ // This slot is a Ref — canonicalize IP refs.
+ const zig_canon = canonicalizeRef(zig_word, &zig_ref_map, &next_zig_id);
+ const c_canon = canonicalizeRef(c_word, &c_ref_map, &next_c_id);
+ if (zig_canon != c_canon) {
+ std.debug.print("'{s}': datas ref mismatch at inst[{d}] slot {d}: zig=0x{x} c=0x{x} (canon: zig={d} c={d})\n", .{ name, j, slot, zig_word, c_word, zig_canon, c_canon });
+ return error.AirMismatch;
+ }
+ } else {
+ // Non-ref field — compare directly.
+ if (zig_word != c_word) {
+ std.debug.print("'{s}': datas mismatch at inst[{d}] slot {d}: zig=0x{x} c=0x{x}\n", .{ name, j, slot, zig_word, c_word });
+ return error.AirMismatch;
+ }
+ }
+ }
}
}
@@ -391,7 +515,7 @@ test "sema air: empty void function" {
}
test "sema air: return integer" {
- if (true) return error.SkipZigTest;
+ //if (true) return error.SkipZigTest;
try semaAirRawCheck("export fn f() u32 { return 42; }");
}