sema: add mul, intcast, truncate, shl/shr, dbg_var_val handlers

Add new ZIR instruction handlers for:
- MUL (dispatch to existing zirArithmetic)
- INT_CAST, TRUNCATE (new zirTyOpCast generic handler)
- SHL, SHR (new zirShl handler)
- TYPEOF_LOG2_INT_TYPE, AS_SHIFT_OPERAND (shift type infrastructure)
- DBG_VAR_VAL, DBG_VAR_PTR (debug variable annotations)

Infrastructure additions:
- semaAppendAirString: stores NullTerminatedString in AIR extra array
- zirTypeofLog2IntType: computes log2 integer type for shift amounts
- zirAsShiftOperand: coerces shift operand to correct type
- Extend semaTypeOf for MUL, SHL, SHR, INTCAST, TRUNC tags
- Normalize NTS padding in test comparison (Zig leaves trailing
  bytes uninitialised; the C side zeroes them)

New tests: dbg_var_val, mul, intcast, truncate, shl, shr, chain of
casts, mixed arithmetic+bitwise, shift+mask, neghf2-equivalent.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-20 13:51:40 +00:00
parent 456a50694d
commit ad7aead47d
3 changed files with 328 additions and 4 deletions

View File

@@ -258,6 +258,55 @@ static void zirDbgStmt(Sema* sema, SemaBlock* block, uint32_t inst) {
(void)blockAddInst(block, AIR_INST_DBG_STMT, data);
}
// semaAppendAirString: store a NullTerminatedString in the AIR extra array.
// Ported from src/Sema.zig appendAirString.
// The string (including NUL) is packed into u32 words in the extra array.
// Returns the extra array index where the string starts.
static uint32_t semaAppendAirString(Sema* sema, const char* str) {
uint32_t result = sema->air_extra_len;
uint32_t len = (uint32_t)strlen(str);
uint32_t num_words = (len + 1 + 3) / 4; // str + NUL, round up to u32
// Ensure extra has space.
while (sema->air_extra_len + num_words > sema->air_extra_cap) {
uint32_t new_cap = sema->air_extra_cap * 2;
uint32_t* new_extra
= realloc(sema->air_extra, new_cap * sizeof(uint32_t));
if (!new_extra)
exit(1);
sema->air_extra = new_extra;
sema->air_extra_cap = new_cap;
}
// Zero-fill first, then copy string bytes.
memset(&sema->air_extra[sema->air_extra_len], 0,
num_words * sizeof(uint32_t));
memcpy(&sema->air_extra[sema->air_extra_len], str, len);
((uint8_t*)&sema->air_extra[sema->air_extra_len])[len] = 0; // NUL
sema->air_extra_len += num_words;
return result;
}
// zirDbgVar: emit debug variable annotation.
// Ported from src/Sema.zig zirDbgVar.
static void zirDbgVar(
Sema* sema, SemaBlock* block, uint32_t inst, AirInstTag air_tag) {
if (block->is_comptime)
return;
uint32_t str_idx = sema->code.inst_datas[inst].str_op.str;
ZirInstRef operand_ref = sema->code.inst_datas[inst].str_op.operand;
AirInstRef operand = resolveInst(sema, operand_ref);
const char* name
= (const char*)&sema->code.string_bytes[str_idx];
uint32_t name_nts = semaAppendAirString(sema, name);
AirInstData data;
memset(&data, 0, sizeof(data));
data.pl_op.operand = operand;
data.pl_op.payload = name_nts;
(void)blockAddInst(block, air_tag, data);
}
// --- Declaration.Flags.Id helpers ---
// Ported from lib/std/zig/Zir.zig Declaration.Flags.Id methods.
// The Id is a 5-bit enum packed in the upper bits of Declaration.flags_1.
@@ -410,13 +459,20 @@ static TypeIndex semaTypeOf(Sema* sema, AirInstRef ref) {
switch (inst_tag) {
case AIR_INST_ARG:
return AIR_REF_TO_IP(sema->air_inst_datas[inst_idx].arg.ty_ref);
// bin_op: type derived from LHS.
case AIR_INST_ADD:
case AIR_INST_SUB:
case AIR_INST_MUL:
case AIR_INST_BIT_AND:
case AIR_INST_BIT_OR:
case AIR_INST_XOR:
case AIR_INST_SHL:
case AIR_INST_SHR:
return semaTypeOf(sema, sema->air_inst_datas[inst_idx].bin_op.lhs);
// ty_op: type from ty_ref field.
case AIR_INST_BITCAST:
case AIR_INST_INTCAST:
case AIR_INST_TRUNC:
return AIR_REF_TO_IP(sema->air_inst_datas[inst_idx].ty_op.ty_ref);
default:
assert(0 && "semaTypeOf: unhandled AIR tag");
@@ -512,6 +568,91 @@ static AirInstRef zirBitcast(Sema* sema, SemaBlock* block, uint32_t inst) {
return blockAddInst(block, AIR_INST_BITCAST, data);
}
// zirTyOpCast: generic handler for type-changing cast ZIR instructions
// that produce a ty_op AIR instruction (intcast, truncate, etc.).
// Ported from src/Sema.zig zirIntCast / zirTruncate (simplified).
static AirInstRef zirTyOpCast(
Sema* sema, SemaBlock* block, uint32_t inst, AirInstTag air_tag) {
uint32_t payload_index
= sema->code.inst_datas[inst].pl_node.payload_index;
ZirInstRef dest_ty_ref = sema->code.extra[payload_index];
ZirInstRef operand_ref = sema->code.extra[payload_index + 1];
assert(dest_ty_ref < ZIR_REF_START_INDEX);
TypeIndex dest_ty = dest_ty_ref;
AirInstRef operand = resolveInst(sema, operand_ref);
// If operand is comptime, coerce instead of emitting a runtime cast.
if (!AIR_REF_IS_INST(operand)) {
return semaCoerce(sema, block, dest_ty, operand);
}
AirInstData data;
memset(&data, 0, sizeof(data));
data.ty_op.ty_ref = AIR_REF_FROM_IP(dest_ty);
data.ty_op.operand = operand;
return blockAddInst(block, air_tag, data);
}
// zirTypeofLog2IntType: compute the log2 integer type for shift amounts.
// Ported from src/Sema.zig zirTypeofLog2IntType.
// For an integer type with N bits, returns an unsigned integer type with
// ceil(log2(N)) bits (e.g. u32 → u5, u16 → u4).
static void zirTypeofLog2IntType(Sema* sema, uint32_t inst) {
ZirInstRef operand_ref = sema->code.inst_datas[inst].un_node.operand;
AirInstRef operand = resolveInst(sema, operand_ref);
TypeIndex operand_ty = semaTypeOf(sema, operand);
assert(sema->ip->items[operand_ty].tag == IP_KEY_INT_TYPE);
uint16_t bits = sema->ip->items[operand_ty].data.int_type.bits;
// Compute ceil(log2(bits)): count bits needed to represent 0..bits-1.
uint16_t count = 0;
uint16_t s = (bits > 0) ? (uint16_t)(bits - 1) : 0;
while (s != 0) {
count++;
s >>= 1;
}
// Intern the log2 integer type.
InternPoolKey key;
memset(&key, 0, sizeof(key));
key.tag = IP_KEY_INT_TYPE;
key.data.int_type.bits = count;
key.data.int_type.signedness = 0; // unsigned
InternPoolIndex ty_idx = ipIntern(sema->ip, key);
instMapPut(&sema->inst_map, inst, AIR_REF_FROM_IP(ty_idx));
}
// zirAsShiftOperand: coerce a shift amount to the correct type.
// Ported from src/Sema.zig zirAsShiftOperand.
// Uses pl_node + As payload (dest_type, operand) — same layout as as_node.
static void zirAsShiftOperand(Sema* sema, uint32_t inst) {
uint32_t payload_index
= sema->code.inst_datas[inst].pl_node.payload_index;
ZirInstRef dest_ty_ref = sema->code.extra[payload_index];
ZirInstRef operand_ref = sema->code.extra[payload_index + 1];
// dest_ty comes from typeof_log2_int_type, which was mapped to an IP ref.
AirInstRef dest_ty_air = resolveInst(sema, dest_ty_ref);
assert(!AIR_REF_IS_INST(dest_ty_air));
TypeIndex dest_ty = (TypeIndex)dest_ty_air;
AirInstRef operand = resolveInst(sema, operand_ref);
// Coerce the operand (typically a comptime int) to the shift type.
AirInstRef result = semaCoerce(sema, NULL, dest_ty, operand);
instMapPut(&sema->inst_map, inst, result);
}
// zirShl: handle shl ZIR instruction.
// Ported from src/Sema.zig zirShl (simplified, runtime path only).
static AirInstRef zirShl(
Sema* sema, SemaBlock* block, uint32_t inst, AirInstTag air_tag) {
uint32_t payload_index
= sema->code.inst_datas[inst].pl_node.payload_index;
ZirInstRef lhs_ref = sema->code.extra[payload_index];
ZirInstRef rhs_ref = sema->code.extra[payload_index + 1];
AirInstRef lhs = resolveInst(sema, lhs_ref);
AirInstRef rhs = resolveInst(sema, rhs_ref);
AirInstData data;
memset(&data, 0, sizeof(data));
data.bin_op.lhs = lhs;
data.bin_op.rhs = rhs;
return blockAddInst(block, air_tag, data);
}
// zirAsNode: handle @as ZIR instruction.
// Ported from src/Sema.zig zirAs / zirAsNode.
static AirInstRef zirAsNode(
@@ -637,7 +778,6 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
uint32_t saved_extra_cap = sema->air_extra_cap;
InstMap saved_map = sema->inst_map;
TypeIndex saved_fn_ret_ty = sema->fn_ret_ty;
// --- Set up fresh AIR arrays for the function body ---
sema->air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP);
sema->air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
@@ -1000,6 +1140,17 @@ static bool analyzeBodyInner(
i++;
continue;
// dbg_var_val / dbg_var_ptr: debug variable annotations.
// Ported from src/Sema.zig zirDbgVar.
case ZIR_INST_DBG_VAR_VAL:
zirDbgVar(sema, block, inst, AIR_INST_DBG_VAR_VAL);
i++;
continue;
case ZIR_INST_DBG_VAR_PTR:
zirDbgVar(sema, block, inst, AIR_INST_DBG_VAR_PTR);
i++;
continue;
// restore_err_ret_index_fn_entry: error return trace restore.
// In ReleaseFast (no safety), this is a no-op.
// Ported from src/Sema.zig zirRestoreErrRetIndex.
@@ -1094,7 +1245,7 @@ static bool analyzeBodyInner(
continue;
}
// Arithmetic: add, sub.
// Arithmetic: add, sub, mul.
case ZIR_INST_ADD:
instMapPut(&sema->inst_map, inst,
zirArithmetic(sema, block, inst, AIR_INST_ADD));
@@ -1105,6 +1256,11 @@ static bool analyzeBodyInner(
zirArithmetic(sema, block, inst, AIR_INST_SUB));
i++;
continue;
case ZIR_INST_MUL:
instMapPut(&sema->inst_map, inst,
zirArithmetic(sema, block, inst, AIR_INST_MUL));
i++;
continue;
// Bitwise: xor, bit_and, bit_or.
case ZIR_INST_XOR:
@@ -1130,6 +1286,44 @@ static bool analyzeBodyInner(
i++;
continue;
// @intCast.
case ZIR_INST_INT_CAST:
instMapPut(&sema->inst_map, inst,
zirTyOpCast(sema, block, inst, AIR_INST_INTCAST));
i++;
continue;
// @truncate.
case ZIR_INST_TRUNCATE:
instMapPut(&sema->inst_map, inst,
zirTyOpCast(sema, block, inst, AIR_INST_TRUNC));
i++;
continue;
// Shift operations.
case ZIR_INST_SHL:
instMapPut(&sema->inst_map, inst,
zirShl(sema, block, inst, AIR_INST_SHL));
i++;
continue;
case ZIR_INST_SHR:
instMapPut(&sema->inst_map, inst,
zirShl(sema, block, inst, AIR_INST_SHR));
i++;
continue;
// Shift type computation.
case ZIR_INST_TYPEOF_LOG2_INT_TYPE:
zirTypeofLog2IntType(sema, inst);
i++;
continue;
// Shift operand coercion.
case ZIR_INST_AS_SHIFT_OPERAND:
zirAsShiftOperand(sema, inst);
i++;
continue;
// @as.
case ZIR_INST_AS_NODE:
instMapPut(&sema->inst_map, inst,

View File

@@ -392,6 +392,8 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_RET_ADDR, c.AIR_INST_FRAME_ADDR => .{ false, false },
// dbg_stmt: line(u32) + column(u32)
c.AIR_INST_DBG_STMT, c.AIR_INST_DBG_EMPTY_STMT => .{ false, false },
// pl_op: operand(Ref) + payload(u32)
c.AIR_INST_DBG_VAR_PTR, c.AIR_INST_DBG_VAR_VAL, c.AIR_INST_DBG_ARG_INLINE => .{ true, false },
// un_op: operand(Ref) + pad
c.AIR_INST_RET,
c.AIR_INST_RET_SAFE,
@@ -449,6 +451,11 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_BIT_AND,
c.AIR_INST_BIT_OR,
c.AIR_INST_XOR,
c.AIR_INST_SHL,
c.AIR_INST_SHL_EXACT,
c.AIR_INST_SHL_SAT,
c.AIR_INST_SHR,
c.AIR_INST_SHR_EXACT,
c.AIR_INST_CMP_LT,
c.AIR_INST_CMP_LTE,
c.AIR_INST_CMP_EQ,
@@ -481,6 +488,26 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
};
}
/// Zero-pad bytes after the null terminator in a NullTerminatedString stored
/// in the extra array. Zig's appendAirString leaves padding uninitialised;
/// the C side zeroes it. Normalising both to zero allows comparison.
fn normalizeNtsPadding(extra: []u32, nts_index: u32) void {
if (nts_index == 0 or nts_index >= extra.len) return;
const bytes = std.mem.sliceAsBytes(extra);
const byte_start = nts_index * 4;
// Find null terminator.
var i = byte_start;
while (i < bytes.len) : (i += 1) {
if (bytes[i] == 0) break;
}
// Zero-pad from null+1 to next word boundary.
i += 1;
const next_word_byte = ((i + 3) / 4) * 4;
while (i < next_word_byte and i < bytes.len) : (i += 1) {
bytes[i] = 0;
}
}
fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !void {
if (zig_air.inst_len != c_air.inst_len) {
std.debug.print("'{s}': inst_len mismatch: zig={d} c={d}\n", .{ name, zig_air.inst_len, c_air.inst_len });
@@ -578,8 +605,39 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !
std.debug.print("'{s}': C extra is null but extra_len={d}\n", .{ name, extra_len });
return error.AirMismatch;
};
if (!std.mem.eql(u32, zig_extra[0..extra_len], c_extra[0..extra_len])) {
// Make mutable copies and normalize NullTerminatedString padding.
// Zig's appendAirString leaves trailing bytes uninitialised (0xaa
// in debug); the C side zeroes them. Normalise both to zero.
const zig_extra_copy = try std.testing.allocator.alloc(u32, extra_len);
defer std.testing.allocator.free(zig_extra_copy);
@memcpy(zig_extra_copy, zig_extra[0..extra_len]);
const c_extra_copy = try std.testing.allocator.alloc(u32, extra_len);
defer std.testing.allocator.free(c_extra_copy);
@memcpy(c_extra_copy, c_extra[0..extra_len]);
if (inst_len > 0) {
const tags: [*]const u8 = cToOpt(u8, zig_air.inst_tags).?;
const zig_datas_raw: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, zig_air.inst_datas).?);
const c_datas_raw: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, c_air.inst_datas).?);
for (0..inst_len) |j| {
if (tags[j] == c.AIR_INST_DBG_VAR_VAL or
tags[j] == c.AIR_INST_DBG_VAR_PTR or
tags[j] == c.AIR_INST_DBG_ARG_INLINE)
{
// pl_op: slot 0 = operand, slot 1 = payload (NullTerminatedString)
const zig_nts = std.mem.readInt(u32, zig_datas_raw[j * 8 + 4 ..][0..4], .little);
const c_nts = std.mem.readInt(u32, c_datas_raw[j * 8 + 4 ..][0..4], .little);
normalizeNtsPadding(zig_extra_copy, zig_nts);
normalizeNtsPadding(c_extra_copy, c_nts);
}
}
}
if (!std.mem.eql(u32, zig_extra_copy, c_extra_copy)) {
std.debug.print("'{s}': extra mismatch (extra_len={d})\n", .{ name, extra_len });
for (0..extra_len) |ei| {
if (zig_extra_copy[ei] != c_extra_copy[ei]) {
std.debug.print(" extra[{d}]: zig=0x{x} c=0x{x}\n", .{ ei, zig_extra_copy[ei], c_extra_copy[ei] });
}
}
return error.AirMismatch;
}
}
@@ -646,6 +704,14 @@ test "sema air: as node" {
try semaAirRawCheck("export fn f(x: u32) u32 { return @as(u32, x); }");
}
test "sema air: local const binding" {
try semaAirRawCheck("export fn f(x: u32) u32 { const y = x + 1; return y; }");
}
test "sema air: multiple operations" {
try semaAirRawCheck("export fn f(x: u32, y: u32) u32 { return (x + y) ^ 0xFF; }");
}
test "sema air: neghf2 inline equivalent" {
try semaAirRawCheck(
\\export fn f(a: f16) f16 {
@@ -653,3 +719,67 @@ test "sema air: neghf2 inline equivalent" {
\\}
);
}
test "sema air: mul two args" {
try semaAirRawCheck("export fn f(x: u32, y: u32) u32 { return x * y; }");
}
// TODO: bool and/or require block merges and conditional analysis.
// test "sema air: bool and" {
// try semaAirRawCheck("export fn f(x: bool, y: bool) bool { return x and y; }");
// }
test "sema air: bit shift right" {
try semaAirRawCheck("export fn f(x: u32) u32 { return x >> 1; }");
}
test "sema air: mul comptime int" {
try semaAirRawCheck("export fn f(x: u32) u32 { return x * 3; }");
}
test "sema air: chain of casts" {
try semaAirRawCheck(
\\export fn f(x: u8) u32 {
\\ const wide: u16 = @intCast(x);
\\ return @intCast(wide);
\\}
);
}
test "sema air: mixed arithmetic and bitwise" {
try semaAirRawCheck(
\\export fn f(a: u32, b: u32) u32 {
\\ return (a + b) & 0xFF;
\\}
);
}
test "sema air: shift and mask" {
try semaAirRawCheck(
\\export fn f(x: u32) u32 {
\\ return (x >> 8) & 0xFF;
\\}
);
}
test "sema air: bit shift left" {
try semaAirRawCheck("export fn f(x: u32) u32 { return x << 1; }");
}
test "sema air: intcast" {
try semaAirRawCheck("export fn f(x: u16) u32 { return @intCast(x); }");
}
test "sema air: truncate" {
try semaAirRawCheck("export fn f(x: u32) u16 { return @truncate(x); }");
}
test "sema air: two local bindings" {
try semaAirRawCheck(
\\export fn f(x: u32, y: u32) u32 {
\\ const a = x + 1;
\\ const b = y + 2;
\\ return a ^ b;
\\}
);
}

View File

@@ -97,7 +97,7 @@ const corpus_files = .{
"../lib/std/crypto/codecs.zig", // 165
"../lib/std/os/uefi/tables/table_header.zig", // 214
"../lib/std/zig/llvm.zig", // 247
//"../lib/compiler_rt/neghf2.zig", // 265 -- @export+func_fancy handled; body analysis incomplete
//"../lib/compiler_rt/neghf2.zig", // 265 -- needs decl_ref + field_call (inline fn from import)
//"../lib/compiler_rt/negxf2.zig", // 265 -- @export+func_fancy handled; body analysis incomplete
//"../lib/compiler_rt/absvdi2.zig", // 311 -- @export+func_fancy handled; body analysis incomplete
//"../lib/compiler_rt/absvsi2.zig", // 311 -- @export+func_fancy handled; body analysis incomplete