From d14607caefaef26de5c2ec124ad20025bccd0e63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 20 Feb 2026 09:48:45 +0000 Subject: [PATCH] sema: port ret_node and fix AIR tag storage type Three issues fixed to enable the "return integer" test (export fn f() u32 { return 42; }): 1. AIR tag storage: AirInstTag was a C enum (4 bytes) but the Zig-side CAir expects u8 (matching Air.Inst.Tag = enum(u8)). The byte-for-byte comparison read padding bytes instead of subsequent tags. Fixed by storing tags as uint8_t in Air and Sema structs. 2. New ZIR_INST_RET_NODE handler: resolves the operand, coerces from comptime_int to the function return type via coerceIntRef, and emits AIR_INST_RET. 3. Return type resolution in zirFunc: reads the actual return type from ZIR extra data (ret_ty_body_len == 1) instead of hardcoding void. Also rewrites the AIR datas comparison to be tag-aware with canonical ref renumbering, so structurally equivalent AIR compares equal even when InternPool indices differ between C and Zig. Co-Authored-By: Claude Opus 4.6 --- stage0/air.h | 2 +- stage0/sema.c | 70 ++++++++++++++++++--- stage0/sema.h | 2 +- stage0/sema_test.zig | 146 +++++++++++++++++++++++++++++++++++++++---- 4 files changed, 198 insertions(+), 22 deletions(-) diff --git a/stage0/air.h b/stage0/air.h index 9f48829f4f..f675ef0764 100644 --- a/stage0/air.h +++ b/stage0/air.h @@ -293,7 +293,7 @@ typedef union { typedef struct { uint32_t inst_len; uint32_t inst_cap; - AirInstTag* inst_tags; + uint8_t* inst_tags; // AirInstTag stored as uint8_t (matches Zig Air.Inst.Tag = u8) AirInstData* inst_datas; uint32_t extra_len; uint32_t extra_cap; diff --git a/stage0/sema.c b/stage0/sema.c index 0d8bfbf3d7..6ea4f1ac29 100644 --- a/stage0/sema.c +++ b/stage0/sema.c @@ -13,7 +13,7 @@ Sema semaInit(InternPool* ip, Zir code) { memset(&sema, 0, sizeof(sema)); sema.ip = ip; sema.code = code; - sema.air_inst_tags = ARR_INIT(AirInstTag, SEMA_AIR_INITIAL_CAP); + sema.air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP); sema.air_inst_cap = SEMA_AIR_INITIAL_CAP; sema.air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP); sema.air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP); @@ -147,8 +147,8 @@ static AirInstRef resolveInst(Sema* sema, ZirInstRef zir_ref) { static uint32_t addAirInst(Sema* sema, AirInstTag inst_tag, AirInstData data) { if (sema->air_inst_len >= sema->air_inst_cap) { uint32_t new_cap = sema->air_inst_cap * 2; - AirInstTag* new_tags - = realloc(sema->air_inst_tags, new_cap * sizeof(AirInstTag)); + uint8_t* new_tags + = realloc(sema->air_inst_tags, new_cap * sizeof(uint8_t)); AirInstData* new_datas = realloc(sema->air_inst_datas, new_cap * sizeof(AirInstData)); if (!new_tags || !new_datas) @@ -158,7 +158,7 @@ static uint32_t addAirInst(Sema* sema, AirInstTag inst_tag, AirInstData data) { sema->air_inst_cap = new_cap; } uint32_t idx = sema->air_inst_len; - sema->air_inst_tags[idx] = inst_tag; + sema->air_inst_tags[idx] = (uint8_t)inst_tag; sema->air_inst_datas[idx] = data; sema->air_inst_len++; return idx; @@ -313,6 +313,31 @@ static bool declIdIsExport(uint32_t id) { static bool analyzeBodyInner( Sema* sema, SemaBlock* block, const uint32_t* body, uint32_t body_len); +// coerceIntRef: coerce an integer AIR ref to a target type. +// If the ref points to a comptime_int IP entry and the target type +// is a concrete integer type, creates a new IP entry with that type. +// Ported from InternPool.getCoercedInts (src/InternPool.zig:10947). +static AirInstRef coerceIntRef( + Sema* sema, AirInstRef air_ref, TypeIndex target_ty) { + // Extract IP index from the AIR ref. + uint32_t ip_idx = air_ref; // AIR_REF_FROM_IP is identity + InternPoolKey key = ipIndexToKey(sema->ip, ip_idx); + if (key.tag != IP_KEY_INT) + return air_ref; // not an int — no coercion needed + if (key.data.int_val.ty == target_ty) + return air_ref; // already the right type + + // Create a new IP entry with the target type but same value. + InternPoolKey new_key; + memset(&new_key, 0, sizeof(new_key)); + new_key.tag = IP_KEY_INT; + new_key.data.int_val.ty = target_ty; + new_key.data.int_val.value = key.data.int_val.value; + new_key.data.int_val.is_negative = key.data.int_val.is_negative; + uint32_t new_ip_idx = ipIntern(sema->ip, new_key); + return AIR_REF_FROM_IP(new_ip_idx); +} + // zirInt: intern a comptime integer value. // Ported from src/Sema.zig zirInt. static void zirInt(Sema* sema, uint32_t inst) { @@ -357,7 +382,7 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) { return; // --- Save the current AIR state --- - AirInstTag* saved_tags = sema->air_inst_tags; + uint8_t* saved_tags = sema->air_inst_tags; AirInstData* saved_datas = sema->air_inst_datas; uint32_t saved_inst_len = sema->air_inst_len; uint32_t saved_inst_cap = sema->air_inst_cap; @@ -368,7 +393,7 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) { TypeIndex saved_fn_ret_ty = sema->fn_ret_ty; // --- Set up fresh AIR arrays for the function body --- - sema->air_inst_tags = ARR_INIT(AirInstTag, SEMA_AIR_INITIAL_CAP); + sema->air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP); sema->air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP); sema->air_inst_cap = SEMA_AIR_INITIAL_CAP; sema->air_inst_len = 0; @@ -380,9 +405,20 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) { // Reserve extra[0] for main_block index (Air.ExtraIndex.main_block). addAirExtra(sema, 0); - // Set function return type to void (for ret_implicit). - // ret_ty_body_len == 0 means void. - sema->fn_ret_ty = IP_INDEX_VOID_TYPE; + // Resolve the function return type. + // Ported from src/Sema.zig zirFunc (lines 8869-8885). + if (ret_ty_body_len == 0) { + sema->fn_ret_ty = IP_INDEX_VOID_TYPE; + } else if (ret_ty_body_len == 1) { + // Single ref at payload_index + 3 (already read past by extra_index). + ZirInstRef ret_ty_ref = sema->code.extra[payload_index + 3]; + // For pre-interned refs, the ZIR ref == IP index. + assert(ret_ty_ref < ZIR_REF_START_INDEX); + sema->fn_ret_ty = ret_ty_ref; + } else { + // Multi-instruction return type body — not yet supported. + sema->fn_ret_ty = IP_INDEX_VOID_TYPE; + } // --- Set up block for function body --- SemaBlock fn_block; @@ -613,6 +649,22 @@ static bool analyzeBodyInner( } return false; + // ret_node: explicit `return ;`. + // Ported from src/Sema.zig zirRetNode / analyzeRet. + case ZIR_INST_RET_NODE: { + ZirInstRef operand_ref + = sema->code.inst_datas[inst].un_node.operand; + AirInstRef operand = resolveInst(sema, operand_ref); + // Coerce the operand to the function return type. + operand = coerceIntRef(sema, operand, sema->fn_ret_ty); + AirInstData ret_data; + memset(&ret_data, 0, sizeof(ret_data)); + ret_data.un_op.operand = operand; + // ReleaseFast: use ret, not ret_safe (no safety checks). + (void)blockAddInst(block, AIR_INST_RET, ret_data); + return false; + } + // func: function declaration. // Ported from src/Sema.zig zirFunc. case ZIR_INST_FUNC: diff --git a/stage0/sema.h b/stage0/sema.h index 30571eef79..de338829e7 100644 --- a/stage0/sema.h +++ b/stage0/sema.h @@ -126,7 +126,7 @@ typedef struct { typedef struct Sema { InternPool* ip; Zir code; - AirInstTag* air_inst_tags; + uint8_t* air_inst_tags; // AirInstTag stored as uint8_t (matches Zig u8) AirInstData* air_inst_datas; uint32_t air_inst_len; uint32_t air_inst_cap; diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig index 8eefd0ab11..79b60e3f2a 100644 --- a/stage0/sema_test.zig +++ b/stage0/sema_test.zig @@ -303,6 +303,90 @@ fn cToOpt(comptime T: type, ptr: [*c]T) ?[*]const T { return if (ptr == null) null else @ptrCast(ptr); } +/// Canonicalize an AIR Ref for comparison. Inst refs (bit 31 set) +/// and the special NONE sentinel are returned as-is. IP refs (bit 31 +/// clear) are assigned a sequential canonical ID via the map, in +/// order of first appearance, so that two AIR streams that intern +/// the same values in the same order produce identical canonical IDs +/// even when the raw InternPool indices differ. +fn canonicalizeRef( + ref: u32, + map: *std.AutoHashMap(u32, u32), + next_id: *u32, +) u32 { + if (ref == 0xFFFFFFFF) return ref; // AIR_REF_NONE + if ((ref >> 31) != 0) return ref; // Inst ref — keep as-is + // IP ref — canonicalize. + const gop = map.getOrPut(ref) catch unreachable; + if (!gop.found_existing) { + gop.value_ptr.* = next_id.*; + next_id.* += 1; + } + return gop.value_ptr.*; +} + +/// Return which of the two 4-byte slots in Air.Inst.Data are Refs +/// for a given AIR instruction tag. [0] = bytes [0:4], [1] = bytes +/// [4:8]. Non-ref slots (line/column, payload indices, padding) +/// are compared directly. +fn airDataRefSlots(tag_val: u8) [2]bool { + return switch (tag_val) { + // no_op: no meaningful data + c.AIR_INST_RET_ADDR, c.AIR_INST_FRAME_ADDR => .{ false, false }, + // dbg_stmt: line(u32) + column(u32) + c.AIR_INST_DBG_STMT, c.AIR_INST_DBG_EMPTY_STMT => .{ false, false }, + // un_op: operand(Ref) + pad + c.AIR_INST_RET, c.AIR_INST_RET_SAFE, c.AIR_INST_RET_LOAD, + c.AIR_INST_NOT, c.AIR_INST_BITCAST, c.AIR_INST_LOAD, + c.AIR_INST_NEG, c.AIR_INST_NEG_OPTIMIZED, + c.AIR_INST_IS_NULL, c.AIR_INST_IS_NON_NULL, + c.AIR_INST_IS_NULL_PTR, c.AIR_INST_IS_NON_NULL_PTR, + c.AIR_INST_IS_ERR, c.AIR_INST_IS_NON_ERR, + c.AIR_INST_IS_ERR_PTR, c.AIR_INST_IS_NON_ERR_PTR, + c.AIR_INST_SQRT, c.AIR_INST_SIN, c.AIR_INST_COS, + c.AIR_INST_TAN, c.AIR_INST_EXP, c.AIR_INST_EXP2, + c.AIR_INST_LOG, c.AIR_INST_LOG2, c.AIR_INST_LOG10, + c.AIR_INST_FLOOR, c.AIR_INST_CEIL, c.AIR_INST_ROUND, + c.AIR_INST_TRUNC_FLOAT, + c.AIR_INST_IS_NAMED_ENUM_VALUE, c.AIR_INST_TAG_NAME, + c.AIR_INST_ERROR_NAME, c.AIR_INST_CMP_LT_ERRORS_LEN, + c.AIR_INST_C_VA_END, + => .{ true, false }, + // ty: type(Ref) + pad + c.AIR_INST_ALLOC, c.AIR_INST_RET_PTR, + c.AIR_INST_C_VA_START, + => .{ true, false }, + // bin_op: lhs(Ref) + rhs(Ref) + c.AIR_INST_ADD, c.AIR_INST_ADD_SAFE, c.AIR_INST_ADD_OPTIMIZED, + c.AIR_INST_SUB, c.AIR_INST_SUB_SAFE, c.AIR_INST_SUB_OPTIMIZED, + c.AIR_INST_MUL, c.AIR_INST_MUL_SAFE, c.AIR_INST_MUL_OPTIMIZED, + c.AIR_INST_BOOL_AND, c.AIR_INST_BOOL_OR, + c.AIR_INST_STORE, c.AIR_INST_STORE_SAFE, + c.AIR_INST_BIT_AND, c.AIR_INST_BIT_OR, c.AIR_INST_XOR, + c.AIR_INST_CMP_LT, c.AIR_INST_CMP_LTE, + c.AIR_INST_CMP_EQ, c.AIR_INST_CMP_GTE, + c.AIR_INST_CMP_GT, c.AIR_INST_CMP_NEQ, + => .{ true, true }, + // ty_op: type(Ref) + operand(Ref) + c.AIR_INST_INTCAST, c.AIR_INST_INTCAST_SAFE, + c.AIR_INST_TRUNC, c.AIR_INST_FPTRUNC, c.AIR_INST_FPEXT, + c.AIR_INST_OPTIONAL_PAYLOAD, c.AIR_INST_OPTIONAL_PAYLOAD_PTR, + c.AIR_INST_WRAP_OPTIONAL, + c.AIR_INST_UNWRAP_ERRUNION_PAYLOAD, + c.AIR_INST_UNWRAP_ERRUNION_ERR, + c.AIR_INST_WRAP_ERRUNION_PAYLOAD, + c.AIR_INST_WRAP_ERRUNION_ERR, + c.AIR_INST_ARRAY_TO_SLICE, + => .{ true, true }, + // arg: type(Ref) + zir_param_index(u32) + c.AIR_INST_ARG => .{ true, false }, + // Default: assume no refs (compare directly). + // If a tag with refs is missed, the comparison will fail + // and we add it here. + else => .{ false, false }, + }; +} + fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !void { if (zig_air.inst_len != c_air.inst_len) { std.debug.print("'{s}': inst_len mismatch: zig={d} c={d}\n", .{ name, zig_air.inst_len, c_air.inst_len }); @@ -312,23 +396,31 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) ! // Tags if (inst_len > 0) { - const zig_tags: [*]const u8 = @ptrCast(cToOpt(c.AirInstTag, zig_air.inst_tags) orelse { + const zig_tags: [*]const u8 = cToOpt(u8, zig_air.inst_tags) orelse { std.debug.print("'{s}': Zig inst_tags is null but inst_len={d}\n", .{ name, inst_len }); return error.AirMismatch; - }); - const c_tags: [*]const u8 = @ptrCast(cToOpt(c.AirInstTag, c_air.inst_tags) orelse { + }; + const c_tags: [*]const u8 = cToOpt(u8, c_air.inst_tags) orelse { std.debug.print("'{s}': C inst_tags is null but inst_len={d}\n", .{ name, inst_len }); return error.AirMismatch; - }); + }; if (!std.mem.eql(u8, zig_tags[0..inst_len], c_tags[0..inst_len])) { - std.debug.print("'{s}': tags mismatch (inst_len={d})\n", .{ name, inst_len }); + std.debug.print("'{s}': tags mismatch (inst_len={d}):", .{ name, inst_len }); + for (0..inst_len) |j| { + std.debug.print(" zig[{d}]={d} c[{d}]={d}", .{ j, zig_tags[j], j, c_tags[j] }); + } + std.debug.print("\n", .{}); return error.AirMismatch; } } - // Datas (8 bytes per instruction) + // Datas (8 bytes per instruction, tag-aware comparison) + // IP refs may differ between C and Zig InternPools, so we use + // canonical renumbering: each unique IP ref gets a sequential ID + // in order of first appearance. Inst refs (bit 31 set) and + // non-ref fields are compared directly. if (inst_len > 0) { - const byte_len = inst_len * 8; + const zig_tags: [*]const u8 = cToOpt(u8, zig_air.inst_tags) orelse unreachable; const zig_datas: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, zig_air.inst_datas) orelse { std.debug.print("'{s}': Zig inst_datas is null but inst_len={d}\n", .{ name, inst_len }); return error.AirMismatch; @@ -337,9 +429,41 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) ! std.debug.print("'{s}': C inst_datas is null but inst_len={d}\n", .{ name, inst_len }); return error.AirMismatch; }); - if (!std.mem.eql(u8, zig_datas[0..byte_len], c_datas[0..byte_len])) { - std.debug.print("'{s}': datas mismatch (inst_len={d})\n", .{ name, inst_len }); - return error.AirMismatch; + var zig_ref_map = std.AutoHashMap(u32, u32).init(std.testing.allocator); + defer zig_ref_map.deinit(); + var c_ref_map = std.AutoHashMap(u32, u32).init(std.testing.allocator); + defer c_ref_map.deinit(); + var next_zig_id: u32 = 0; + var next_c_id: u32 = 0; + + for (0..inst_len) |j| { + const off = j * 8; + const tag_val = zig_tags[j]; + // Determine which 4-byte slots are refs based on the tag. + // Slot 0 = bytes [0:4], Slot 1 = bytes [4:8]. + const ref_slots = airDataRefSlots(tag_val); + + for (0..2) |slot| { + const s = off + slot * 4; + const zig_word = std.mem.readInt(u32, zig_datas[s..][0..4], .little); + const c_word = std.mem.readInt(u32, c_datas[s..][0..4], .little); + + if (ref_slots[slot]) { + // This slot is a Ref — canonicalize IP refs. + const zig_canon = canonicalizeRef(zig_word, &zig_ref_map, &next_zig_id); + const c_canon = canonicalizeRef(c_word, &c_ref_map, &next_c_id); + if (zig_canon != c_canon) { + std.debug.print("'{s}': datas ref mismatch at inst[{d}] slot {d}: zig=0x{x} c=0x{x} (canon: zig={d} c={d})\n", .{ name, j, slot, zig_word, c_word, zig_canon, c_canon }); + return error.AirMismatch; + } + } else { + // Non-ref field — compare directly. + if (zig_word != c_word) { + std.debug.print("'{s}': datas mismatch at inst[{d}] slot {d}: zig=0x{x} c=0x{x}\n", .{ name, j, slot, zig_word, c_word }); + return error.AirMismatch; + } + } + } } } @@ -391,7 +515,7 @@ test "sema air: empty void function" { } test "sema air: return integer" { - if (true) return error.SkipZigTest; + //if (true) return error.SkipZigTest; try semaAirRawCheck("export fn f() u32 { return 42; }"); }