sema: port ret_node and fix AIR tag storage type
Three issues fixed to enable the "return integer" test
(export fn f() u32 { return 42; }):
1. AIR tag storage: AirInstTag was a C enum (4 bytes) but the Zig-side
CAir expects u8 (matching Air.Inst.Tag = enum(u8)). The byte-for-byte
comparison read padding bytes instead of subsequent tags. Fixed by
storing tags as uint8_t in Air and Sema structs.
2. New ZIR_INST_RET_NODE handler: resolves the operand, coerces from
comptime_int to the function return type via coerceIntRef, and emits
AIR_INST_RET.
3. Return type resolution in zirFunc: reads the actual return type from
ZIR extra data (ret_ty_body_len == 1) instead of hardcoding void.
Also rewrites the AIR datas comparison to be tag-aware with canonical
ref renumbering, so structurally equivalent AIR compares equal even
when InternPool indices differ between C and Zig.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -293,7 +293,7 @@ typedef union {
|
||||
typedef struct {
|
||||
uint32_t inst_len;
|
||||
uint32_t inst_cap;
|
||||
AirInstTag* inst_tags;
|
||||
uint8_t* inst_tags; // AirInstTag stored as uint8_t (matches Zig Air.Inst.Tag = u8)
|
||||
AirInstData* inst_datas;
|
||||
uint32_t extra_len;
|
||||
uint32_t extra_cap;
|
||||
|
||||
@@ -13,7 +13,7 @@ Sema semaInit(InternPool* ip, Zir code) {
|
||||
memset(&sema, 0, sizeof(sema));
|
||||
sema.ip = ip;
|
||||
sema.code = code;
|
||||
sema.air_inst_tags = ARR_INIT(AirInstTag, SEMA_AIR_INITIAL_CAP);
|
||||
sema.air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP);
|
||||
sema.air_inst_cap = SEMA_AIR_INITIAL_CAP;
|
||||
sema.air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
|
||||
sema.air_extra = ARR_INIT(uint32_t, SEMA_AIR_EXTRA_INITIAL_CAP);
|
||||
@@ -147,8 +147,8 @@ static AirInstRef resolveInst(Sema* sema, ZirInstRef zir_ref) {
|
||||
static uint32_t addAirInst(Sema* sema, AirInstTag inst_tag, AirInstData data) {
|
||||
if (sema->air_inst_len >= sema->air_inst_cap) {
|
||||
uint32_t new_cap = sema->air_inst_cap * 2;
|
||||
AirInstTag* new_tags
|
||||
= realloc(sema->air_inst_tags, new_cap * sizeof(AirInstTag));
|
||||
uint8_t* new_tags
|
||||
= realloc(sema->air_inst_tags, new_cap * sizeof(uint8_t));
|
||||
AirInstData* new_datas
|
||||
= realloc(sema->air_inst_datas, new_cap * sizeof(AirInstData));
|
||||
if (!new_tags || !new_datas)
|
||||
@@ -158,7 +158,7 @@ static uint32_t addAirInst(Sema* sema, AirInstTag inst_tag, AirInstData data) {
|
||||
sema->air_inst_cap = new_cap;
|
||||
}
|
||||
uint32_t idx = sema->air_inst_len;
|
||||
sema->air_inst_tags[idx] = inst_tag;
|
||||
sema->air_inst_tags[idx] = (uint8_t)inst_tag;
|
||||
sema->air_inst_datas[idx] = data;
|
||||
sema->air_inst_len++;
|
||||
return idx;
|
||||
@@ -313,6 +313,31 @@ static bool declIdIsExport(uint32_t id) {
|
||||
static bool analyzeBodyInner(
|
||||
Sema* sema, SemaBlock* block, const uint32_t* body, uint32_t body_len);
|
||||
|
||||
// coerceIntRef: coerce an integer AIR ref to a target type.
|
||||
// If the ref points to a comptime_int IP entry and the target type
|
||||
// is a concrete integer type, creates a new IP entry with that type.
|
||||
// Ported from InternPool.getCoercedInts (src/InternPool.zig:10947).
|
||||
static AirInstRef coerceIntRef(
|
||||
Sema* sema, AirInstRef air_ref, TypeIndex target_ty) {
|
||||
// Extract IP index from the AIR ref.
|
||||
uint32_t ip_idx = air_ref; // AIR_REF_FROM_IP is identity
|
||||
InternPoolKey key = ipIndexToKey(sema->ip, ip_idx);
|
||||
if (key.tag != IP_KEY_INT)
|
||||
return air_ref; // not an int — no coercion needed
|
||||
if (key.data.int_val.ty == target_ty)
|
||||
return air_ref; // already the right type
|
||||
|
||||
// Create a new IP entry with the target type but same value.
|
||||
InternPoolKey new_key;
|
||||
memset(&new_key, 0, sizeof(new_key));
|
||||
new_key.tag = IP_KEY_INT;
|
||||
new_key.data.int_val.ty = target_ty;
|
||||
new_key.data.int_val.value = key.data.int_val.value;
|
||||
new_key.data.int_val.is_negative = key.data.int_val.is_negative;
|
||||
uint32_t new_ip_idx = ipIntern(sema->ip, new_key);
|
||||
return AIR_REF_FROM_IP(new_ip_idx);
|
||||
}
|
||||
|
||||
// zirInt: intern a comptime integer value.
|
||||
// Ported from src/Sema.zig zirInt.
|
||||
static void zirInt(Sema* sema, uint32_t inst) {
|
||||
@@ -357,7 +382,7 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
|
||||
return;
|
||||
|
||||
// --- Save the current AIR state ---
|
||||
AirInstTag* saved_tags = sema->air_inst_tags;
|
||||
uint8_t* saved_tags = sema->air_inst_tags;
|
||||
AirInstData* saved_datas = sema->air_inst_datas;
|
||||
uint32_t saved_inst_len = sema->air_inst_len;
|
||||
uint32_t saved_inst_cap = sema->air_inst_cap;
|
||||
@@ -368,7 +393,7 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
|
||||
TypeIndex saved_fn_ret_ty = sema->fn_ret_ty;
|
||||
|
||||
// --- Set up fresh AIR arrays for the function body ---
|
||||
sema->air_inst_tags = ARR_INIT(AirInstTag, SEMA_AIR_INITIAL_CAP);
|
||||
sema->air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP);
|
||||
sema->air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
|
||||
sema->air_inst_cap = SEMA_AIR_INITIAL_CAP;
|
||||
sema->air_inst_len = 0;
|
||||
@@ -380,9 +405,20 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
|
||||
// Reserve extra[0] for main_block index (Air.ExtraIndex.main_block).
|
||||
addAirExtra(sema, 0);
|
||||
|
||||
// Set function return type to void (for ret_implicit).
|
||||
// ret_ty_body_len == 0 means void.
|
||||
sema->fn_ret_ty = IP_INDEX_VOID_TYPE;
|
||||
// Resolve the function return type.
|
||||
// Ported from src/Sema.zig zirFunc (lines 8869-8885).
|
||||
if (ret_ty_body_len == 0) {
|
||||
sema->fn_ret_ty = IP_INDEX_VOID_TYPE;
|
||||
} else if (ret_ty_body_len == 1) {
|
||||
// Single ref at payload_index + 3 (already read past by extra_index).
|
||||
ZirInstRef ret_ty_ref = sema->code.extra[payload_index + 3];
|
||||
// For pre-interned refs, the ZIR ref == IP index.
|
||||
assert(ret_ty_ref < ZIR_REF_START_INDEX);
|
||||
sema->fn_ret_ty = ret_ty_ref;
|
||||
} else {
|
||||
// Multi-instruction return type body — not yet supported.
|
||||
sema->fn_ret_ty = IP_INDEX_VOID_TYPE;
|
||||
}
|
||||
|
||||
// --- Set up block for function body ---
|
||||
SemaBlock fn_block;
|
||||
@@ -613,6 +649,22 @@ static bool analyzeBodyInner(
|
||||
}
|
||||
return false;
|
||||
|
||||
// ret_node: explicit `return <expr>;`.
|
||||
// Ported from src/Sema.zig zirRetNode / analyzeRet.
|
||||
case ZIR_INST_RET_NODE: {
|
||||
ZirInstRef operand_ref
|
||||
= sema->code.inst_datas[inst].un_node.operand;
|
||||
AirInstRef operand = resolveInst(sema, operand_ref);
|
||||
// Coerce the operand to the function return type.
|
||||
operand = coerceIntRef(sema, operand, sema->fn_ret_ty);
|
||||
AirInstData ret_data;
|
||||
memset(&ret_data, 0, sizeof(ret_data));
|
||||
ret_data.un_op.operand = operand;
|
||||
// ReleaseFast: use ret, not ret_safe (no safety checks).
|
||||
(void)blockAddInst(block, AIR_INST_RET, ret_data);
|
||||
return false;
|
||||
}
|
||||
|
||||
// func: function declaration.
|
||||
// Ported from src/Sema.zig zirFunc.
|
||||
case ZIR_INST_FUNC:
|
||||
|
||||
@@ -126,7 +126,7 @@ typedef struct {
|
||||
typedef struct Sema {
|
||||
InternPool* ip;
|
||||
Zir code;
|
||||
AirInstTag* air_inst_tags;
|
||||
uint8_t* air_inst_tags; // AirInstTag stored as uint8_t (matches Zig u8)
|
||||
AirInstData* air_inst_datas;
|
||||
uint32_t air_inst_len;
|
||||
uint32_t air_inst_cap;
|
||||
|
||||
@@ -303,6 +303,90 @@ fn cToOpt(comptime T: type, ptr: [*c]T) ?[*]const T {
|
||||
return if (ptr == null) null else @ptrCast(ptr);
|
||||
}
|
||||
|
||||
/// Canonicalize an AIR Ref for comparison. Inst refs (bit 31 set)
|
||||
/// and the special NONE sentinel are returned as-is. IP refs (bit 31
|
||||
/// clear) are assigned a sequential canonical ID via the map, in
|
||||
/// order of first appearance, so that two AIR streams that intern
|
||||
/// the same values in the same order produce identical canonical IDs
|
||||
/// even when the raw InternPool indices differ.
|
||||
fn canonicalizeRef(
|
||||
ref: u32,
|
||||
map: *std.AutoHashMap(u32, u32),
|
||||
next_id: *u32,
|
||||
) u32 {
|
||||
if (ref == 0xFFFFFFFF) return ref; // AIR_REF_NONE
|
||||
if ((ref >> 31) != 0) return ref; // Inst ref — keep as-is
|
||||
// IP ref — canonicalize.
|
||||
const gop = map.getOrPut(ref) catch unreachable;
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = next_id.*;
|
||||
next_id.* += 1;
|
||||
}
|
||||
return gop.value_ptr.*;
|
||||
}
|
||||
|
||||
/// Return which of the two 4-byte slots in Air.Inst.Data are Refs
|
||||
/// for a given AIR instruction tag. [0] = bytes [0:4], [1] = bytes
|
||||
/// [4:8]. Non-ref slots (line/column, payload indices, padding)
|
||||
/// are compared directly.
|
||||
fn airDataRefSlots(tag_val: u8) [2]bool {
|
||||
return switch (tag_val) {
|
||||
// no_op: no meaningful data
|
||||
c.AIR_INST_RET_ADDR, c.AIR_INST_FRAME_ADDR => .{ false, false },
|
||||
// dbg_stmt: line(u32) + column(u32)
|
||||
c.AIR_INST_DBG_STMT, c.AIR_INST_DBG_EMPTY_STMT => .{ false, false },
|
||||
// un_op: operand(Ref) + pad
|
||||
c.AIR_INST_RET, c.AIR_INST_RET_SAFE, c.AIR_INST_RET_LOAD,
|
||||
c.AIR_INST_NOT, c.AIR_INST_BITCAST, c.AIR_INST_LOAD,
|
||||
c.AIR_INST_NEG, c.AIR_INST_NEG_OPTIMIZED,
|
||||
c.AIR_INST_IS_NULL, c.AIR_INST_IS_NON_NULL,
|
||||
c.AIR_INST_IS_NULL_PTR, c.AIR_INST_IS_NON_NULL_PTR,
|
||||
c.AIR_INST_IS_ERR, c.AIR_INST_IS_NON_ERR,
|
||||
c.AIR_INST_IS_ERR_PTR, c.AIR_INST_IS_NON_ERR_PTR,
|
||||
c.AIR_INST_SQRT, c.AIR_INST_SIN, c.AIR_INST_COS,
|
||||
c.AIR_INST_TAN, c.AIR_INST_EXP, c.AIR_INST_EXP2,
|
||||
c.AIR_INST_LOG, c.AIR_INST_LOG2, c.AIR_INST_LOG10,
|
||||
c.AIR_INST_FLOOR, c.AIR_INST_CEIL, c.AIR_INST_ROUND,
|
||||
c.AIR_INST_TRUNC_FLOAT,
|
||||
c.AIR_INST_IS_NAMED_ENUM_VALUE, c.AIR_INST_TAG_NAME,
|
||||
c.AIR_INST_ERROR_NAME, c.AIR_INST_CMP_LT_ERRORS_LEN,
|
||||
c.AIR_INST_C_VA_END,
|
||||
=> .{ true, false },
|
||||
// ty: type(Ref) + pad
|
||||
c.AIR_INST_ALLOC, c.AIR_INST_RET_PTR,
|
||||
c.AIR_INST_C_VA_START,
|
||||
=> .{ true, false },
|
||||
// bin_op: lhs(Ref) + rhs(Ref)
|
||||
c.AIR_INST_ADD, c.AIR_INST_ADD_SAFE, c.AIR_INST_ADD_OPTIMIZED,
|
||||
c.AIR_INST_SUB, c.AIR_INST_SUB_SAFE, c.AIR_INST_SUB_OPTIMIZED,
|
||||
c.AIR_INST_MUL, c.AIR_INST_MUL_SAFE, c.AIR_INST_MUL_OPTIMIZED,
|
||||
c.AIR_INST_BOOL_AND, c.AIR_INST_BOOL_OR,
|
||||
c.AIR_INST_STORE, c.AIR_INST_STORE_SAFE,
|
||||
c.AIR_INST_BIT_AND, c.AIR_INST_BIT_OR, c.AIR_INST_XOR,
|
||||
c.AIR_INST_CMP_LT, c.AIR_INST_CMP_LTE,
|
||||
c.AIR_INST_CMP_EQ, c.AIR_INST_CMP_GTE,
|
||||
c.AIR_INST_CMP_GT, c.AIR_INST_CMP_NEQ,
|
||||
=> .{ true, true },
|
||||
// ty_op: type(Ref) + operand(Ref)
|
||||
c.AIR_INST_INTCAST, c.AIR_INST_INTCAST_SAFE,
|
||||
c.AIR_INST_TRUNC, c.AIR_INST_FPTRUNC, c.AIR_INST_FPEXT,
|
||||
c.AIR_INST_OPTIONAL_PAYLOAD, c.AIR_INST_OPTIONAL_PAYLOAD_PTR,
|
||||
c.AIR_INST_WRAP_OPTIONAL,
|
||||
c.AIR_INST_UNWRAP_ERRUNION_PAYLOAD,
|
||||
c.AIR_INST_UNWRAP_ERRUNION_ERR,
|
||||
c.AIR_INST_WRAP_ERRUNION_PAYLOAD,
|
||||
c.AIR_INST_WRAP_ERRUNION_ERR,
|
||||
c.AIR_INST_ARRAY_TO_SLICE,
|
||||
=> .{ true, true },
|
||||
// arg: type(Ref) + zir_param_index(u32)
|
||||
c.AIR_INST_ARG => .{ true, false },
|
||||
// Default: assume no refs (compare directly).
|
||||
// If a tag with refs is missed, the comparison will fail
|
||||
// and we add it here.
|
||||
else => .{ false, false },
|
||||
};
|
||||
}
|
||||
|
||||
fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !void {
|
||||
if (zig_air.inst_len != c_air.inst_len) {
|
||||
std.debug.print("'{s}': inst_len mismatch: zig={d} c={d}\n", .{ name, zig_air.inst_len, c_air.inst_len });
|
||||
@@ -312,23 +396,31 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !
|
||||
|
||||
// Tags
|
||||
if (inst_len > 0) {
|
||||
const zig_tags: [*]const u8 = @ptrCast(cToOpt(c.AirInstTag, zig_air.inst_tags) orelse {
|
||||
const zig_tags: [*]const u8 = cToOpt(u8, zig_air.inst_tags) orelse {
|
||||
std.debug.print("'{s}': Zig inst_tags is null but inst_len={d}\n", .{ name, inst_len });
|
||||
return error.AirMismatch;
|
||||
});
|
||||
const c_tags: [*]const u8 = @ptrCast(cToOpt(c.AirInstTag, c_air.inst_tags) orelse {
|
||||
};
|
||||
const c_tags: [*]const u8 = cToOpt(u8, c_air.inst_tags) orelse {
|
||||
std.debug.print("'{s}': C inst_tags is null but inst_len={d}\n", .{ name, inst_len });
|
||||
return error.AirMismatch;
|
||||
});
|
||||
};
|
||||
if (!std.mem.eql(u8, zig_tags[0..inst_len], c_tags[0..inst_len])) {
|
||||
std.debug.print("'{s}': tags mismatch (inst_len={d})\n", .{ name, inst_len });
|
||||
std.debug.print("'{s}': tags mismatch (inst_len={d}):", .{ name, inst_len });
|
||||
for (0..inst_len) |j| {
|
||||
std.debug.print(" zig[{d}]={d} c[{d}]={d}", .{ j, zig_tags[j], j, c_tags[j] });
|
||||
}
|
||||
std.debug.print("\n", .{});
|
||||
return error.AirMismatch;
|
||||
}
|
||||
}
|
||||
|
||||
// Datas (8 bytes per instruction)
|
||||
// Datas (8 bytes per instruction, tag-aware comparison)
|
||||
// IP refs may differ between C and Zig InternPools, so we use
|
||||
// canonical renumbering: each unique IP ref gets a sequential ID
|
||||
// in order of first appearance. Inst refs (bit 31 set) and
|
||||
// non-ref fields are compared directly.
|
||||
if (inst_len > 0) {
|
||||
const byte_len = inst_len * 8;
|
||||
const zig_tags: [*]const u8 = cToOpt(u8, zig_air.inst_tags) orelse unreachable;
|
||||
const zig_datas: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, zig_air.inst_datas) orelse {
|
||||
std.debug.print("'{s}': Zig inst_datas is null but inst_len={d}\n", .{ name, inst_len });
|
||||
return error.AirMismatch;
|
||||
@@ -337,9 +429,41 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !
|
||||
std.debug.print("'{s}': C inst_datas is null but inst_len={d}\n", .{ name, inst_len });
|
||||
return error.AirMismatch;
|
||||
});
|
||||
if (!std.mem.eql(u8, zig_datas[0..byte_len], c_datas[0..byte_len])) {
|
||||
std.debug.print("'{s}': datas mismatch (inst_len={d})\n", .{ name, inst_len });
|
||||
return error.AirMismatch;
|
||||
var zig_ref_map = std.AutoHashMap(u32, u32).init(std.testing.allocator);
|
||||
defer zig_ref_map.deinit();
|
||||
var c_ref_map = std.AutoHashMap(u32, u32).init(std.testing.allocator);
|
||||
defer c_ref_map.deinit();
|
||||
var next_zig_id: u32 = 0;
|
||||
var next_c_id: u32 = 0;
|
||||
|
||||
for (0..inst_len) |j| {
|
||||
const off = j * 8;
|
||||
const tag_val = zig_tags[j];
|
||||
// Determine which 4-byte slots are refs based on the tag.
|
||||
// Slot 0 = bytes [0:4], Slot 1 = bytes [4:8].
|
||||
const ref_slots = airDataRefSlots(tag_val);
|
||||
|
||||
for (0..2) |slot| {
|
||||
const s = off + slot * 4;
|
||||
const zig_word = std.mem.readInt(u32, zig_datas[s..][0..4], .little);
|
||||
const c_word = std.mem.readInt(u32, c_datas[s..][0..4], .little);
|
||||
|
||||
if (ref_slots[slot]) {
|
||||
// This slot is a Ref — canonicalize IP refs.
|
||||
const zig_canon = canonicalizeRef(zig_word, &zig_ref_map, &next_zig_id);
|
||||
const c_canon = canonicalizeRef(c_word, &c_ref_map, &next_c_id);
|
||||
if (zig_canon != c_canon) {
|
||||
std.debug.print("'{s}': datas ref mismatch at inst[{d}] slot {d}: zig=0x{x} c=0x{x} (canon: zig={d} c={d})\n", .{ name, j, slot, zig_word, c_word, zig_canon, c_canon });
|
||||
return error.AirMismatch;
|
||||
}
|
||||
} else {
|
||||
// Non-ref field — compare directly.
|
||||
if (zig_word != c_word) {
|
||||
std.debug.print("'{s}': datas mismatch at inst[{d}] slot {d}: zig=0x{x} c=0x{x}\n", .{ name, j, slot, zig_word, c_word });
|
||||
return error.AirMismatch;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -391,7 +515,7 @@ test "sema air: empty void function" {
|
||||
}
|
||||
|
||||
test "sema air: return integer" {
|
||||
if (true) return error.SkipZigTest;
|
||||
//if (true) return error.SkipZigTest;
|
||||
try semaAirRawCheck("export fn f() u32 { return 42; }");
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user