commit 0eaf57f15d32c2934164382c00a39be336837d11 (tree)
parent 40dd44c0d4ba9851ac72998d12cf6c0d74c2f999
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Fri, 20 Feb 2026 13:51:40 +0000
sema: add mul, intcast, truncate, shl/shr, dbg_var_val handlers
Add new ZIR instruction handlers for:
- MUL (dispatch to existing zirArithmetic)
- INT_CAST, TRUNCATE (new zirTyOpCast generic handler)
- SHL, SHR (new zirShl handler)
- TYPEOF_LOG2_INT_TYPE, AS_SHIFT_OPERAND (shift type infrastructure)
- DBG_VAR_VAL, DBG_VAR_PTR (debug variable annotations)
Infrastructure additions:
- semaAppendAirString: stores NullTerminatedString in AIR extra array
- zirTypeofLog2IntType: computes log2 integer type for shift amounts
- zirAsShiftOperand: coerces shift operand to correct type
- Extend semaTypeOf for MUL, SHL, SHR, INTCAST, TRUNC tags
- Normalize NTS padding in test comparison (Zig leaves trailing
bytes uninitialised; the C side zeroes them)
New tests: dbg_var_val, mul, intcast, truncate, shl, shr, chain of
casts, mixed arithmetic+bitwise, shift+mask, neghf2-equivalent.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
3 files changed, 328 insertions(+), 4 deletions(-)
diff --git a/stage0/sema.c b/stage0/sema.c
@@ -258,6 +258,55 @@ static void zirDbgStmt(Sema* sema, SemaBlock* block, uint32_t inst) {
(void)blockAddInst(block, AIR_INST_DBG_STMT, data);
}
+// semaAppendAirString: store a NullTerminatedString in the AIR extra array.
+// Ported from src/Sema.zig appendAirString.
+// The string (including NUL) is packed into u32 words in the extra array.
+// Returns the extra array index where the string starts.
+static uint32_t semaAppendAirString(Sema* sema, const char* str) {
+ uint32_t result = sema->air_extra_len;
+ uint32_t len = (uint32_t)strlen(str);
+ uint32_t num_words = (len + 1 + 3) / 4; // str + NUL, round up to u32
+ // Ensure extra has space.
+ while (sema->air_extra_len + num_words > sema->air_extra_cap) {
+ uint32_t new_cap = sema->air_extra_cap * 2;
+ uint32_t* new_extra
+ = realloc(sema->air_extra, new_cap * sizeof(uint32_t));
+ if (!new_extra)
+ exit(1);
+ sema->air_extra = new_extra;
+ sema->air_extra_cap = new_cap;
+ }
+ // Zero-fill first, then copy string bytes.
+ memset(&sema->air_extra[sema->air_extra_len], 0,
+ num_words * sizeof(uint32_t));
+ memcpy(&sema->air_extra[sema->air_extra_len], str, len);
+ ((uint8_t*)&sema->air_extra[sema->air_extra_len])[len] = 0; // NUL
+ sema->air_extra_len += num_words;
+ return result;
+}
+
+// zirDbgVar: emit debug variable annotation.
+// Ported from src/Sema.zig zirDbgVar.
+static void zirDbgVar(
+ Sema* sema, SemaBlock* block, uint32_t inst, AirInstTag air_tag) {
+ if (block->is_comptime)
+ return;
+
+ uint32_t str_idx = sema->code.inst_datas[inst].str_op.str;
+ ZirInstRef operand_ref = sema->code.inst_datas[inst].str_op.operand;
+ AirInstRef operand = resolveInst(sema, operand_ref);
+
+ const char* name
+ = (const char*)&sema->code.string_bytes[str_idx];
+ uint32_t name_nts = semaAppendAirString(sema, name);
+
+ AirInstData data;
+ memset(&data, 0, sizeof(data));
+ data.pl_op.operand = operand;
+ data.pl_op.payload = name_nts;
+ (void)blockAddInst(block, air_tag, data);
+}
+
// --- Declaration.Flags.Id helpers ---
// Ported from lib/std/zig/Zir.zig Declaration.Flags.Id methods.
// The Id is a 5-bit enum packed in the upper bits of Declaration.flags_1.
@@ -410,13 +459,20 @@ static TypeIndex semaTypeOf(Sema* sema, AirInstRef ref) {
switch (inst_tag) {
case AIR_INST_ARG:
return AIR_REF_TO_IP(sema->air_inst_datas[inst_idx].arg.ty_ref);
+ // bin_op: type derived from LHS.
case AIR_INST_ADD:
case AIR_INST_SUB:
+ case AIR_INST_MUL:
case AIR_INST_BIT_AND:
case AIR_INST_BIT_OR:
case AIR_INST_XOR:
+ case AIR_INST_SHL:
+ case AIR_INST_SHR:
return semaTypeOf(sema, sema->air_inst_datas[inst_idx].bin_op.lhs);
+ // ty_op: type from ty_ref field.
case AIR_INST_BITCAST:
+ case AIR_INST_INTCAST:
+ case AIR_INST_TRUNC:
return AIR_REF_TO_IP(sema->air_inst_datas[inst_idx].ty_op.ty_ref);
default:
assert(0 && "semaTypeOf: unhandled AIR tag");
@@ -512,6 +568,91 @@ static AirInstRef zirBitcast(Sema* sema, SemaBlock* block, uint32_t inst) {
return blockAddInst(block, AIR_INST_BITCAST, data);
}
+// zirTyOpCast: generic handler for type-changing cast ZIR instructions
+// that produce a ty_op AIR instruction (intcast, truncate, etc.).
+// Ported from src/Sema.zig zirIntCast / zirTruncate (simplified).
+static AirInstRef zirTyOpCast(
+ Sema* sema, SemaBlock* block, uint32_t inst, AirInstTag air_tag) {
+ uint32_t payload_index
+ = sema->code.inst_datas[inst].pl_node.payload_index;
+ ZirInstRef dest_ty_ref = sema->code.extra[payload_index];
+ ZirInstRef operand_ref = sema->code.extra[payload_index + 1];
+ assert(dest_ty_ref < ZIR_REF_START_INDEX);
+ TypeIndex dest_ty = dest_ty_ref;
+ AirInstRef operand = resolveInst(sema, operand_ref);
+ // If operand is comptime, coerce instead of emitting a runtime cast.
+ if (!AIR_REF_IS_INST(operand)) {
+ return semaCoerce(sema, block, dest_ty, operand);
+ }
+ AirInstData data;
+ memset(&data, 0, sizeof(data));
+ data.ty_op.ty_ref = AIR_REF_FROM_IP(dest_ty);
+ data.ty_op.operand = operand;
+ return blockAddInst(block, air_tag, data);
+}
+
+// zirTypeofLog2IntType: compute the log2 integer type for shift amounts.
+// Ported from src/Sema.zig zirTypeofLog2IntType.
+// For an integer type with N bits, returns an unsigned integer type with
+// ceil(log2(N)) bits (e.g. u32 → u5, u16 → u4).
+static void zirTypeofLog2IntType(Sema* sema, uint32_t inst) {
+ ZirInstRef operand_ref = sema->code.inst_datas[inst].un_node.operand;
+ AirInstRef operand = resolveInst(sema, operand_ref);
+ TypeIndex operand_ty = semaTypeOf(sema, operand);
+ assert(sema->ip->items[operand_ty].tag == IP_KEY_INT_TYPE);
+ uint16_t bits = sema->ip->items[operand_ty].data.int_type.bits;
+ // Compute ceil(log2(bits)): count bits needed to represent 0..bits-1.
+ uint16_t count = 0;
+ uint16_t s = (bits > 0) ? (uint16_t)(bits - 1) : 0;
+ while (s != 0) {
+ count++;
+ s >>= 1;
+ }
+ // Intern the log2 integer type.
+ InternPoolKey key;
+ memset(&key, 0, sizeof(key));
+ key.tag = IP_KEY_INT_TYPE;
+ key.data.int_type.bits = count;
+ key.data.int_type.signedness = 0; // unsigned
+ InternPoolIndex ty_idx = ipIntern(sema->ip, key);
+ instMapPut(&sema->inst_map, inst, AIR_REF_FROM_IP(ty_idx));
+}
+
+// zirAsShiftOperand: coerce a shift amount to the correct type.
+// Ported from src/Sema.zig zirAsShiftOperand.
+// Uses pl_node + As payload (dest_type, operand) — same layout as as_node.
+static void zirAsShiftOperand(Sema* sema, uint32_t inst) {
+ uint32_t payload_index
+ = sema->code.inst_datas[inst].pl_node.payload_index;
+ ZirInstRef dest_ty_ref = sema->code.extra[payload_index];
+ ZirInstRef operand_ref = sema->code.extra[payload_index + 1];
+ // dest_ty comes from typeof_log2_int_type, which was mapped to an IP ref.
+ AirInstRef dest_ty_air = resolveInst(sema, dest_ty_ref);
+ assert(!AIR_REF_IS_INST(dest_ty_air));
+ TypeIndex dest_ty = (TypeIndex)dest_ty_air;
+ AirInstRef operand = resolveInst(sema, operand_ref);
+ // Coerce the operand (typically a comptime int) to the shift type.
+ AirInstRef result = semaCoerce(sema, NULL, dest_ty, operand);
+ instMapPut(&sema->inst_map, inst, result);
+}
+
+// zirShl: handle shl ZIR instruction.
+// Ported from src/Sema.zig zirShl (simplified, runtime path only).
+static AirInstRef zirShl(
+ Sema* sema, SemaBlock* block, uint32_t inst, AirInstTag air_tag) {
+ uint32_t payload_index
+ = sema->code.inst_datas[inst].pl_node.payload_index;
+ ZirInstRef lhs_ref = sema->code.extra[payload_index];
+ ZirInstRef rhs_ref = sema->code.extra[payload_index + 1];
+ AirInstRef lhs = resolveInst(sema, lhs_ref);
+ AirInstRef rhs = resolveInst(sema, rhs_ref);
+ AirInstData data;
+ memset(&data, 0, sizeof(data));
+ data.bin_op.lhs = lhs;
+ data.bin_op.rhs = rhs;
+ return blockAddInst(block, air_tag, data);
+}
+
// zirAsNode: handle @as ZIR instruction.
// Ported from src/Sema.zig zirAs / zirAsNode.
static AirInstRef zirAsNode(
@@ -637,7 +778,6 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) {
uint32_t saved_extra_cap = sema->air_extra_cap;
InstMap saved_map = sema->inst_map;
TypeIndex saved_fn_ret_ty = sema->fn_ret_ty;
-
// --- Set up fresh AIR arrays for the function body ---
sema->air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP);
sema->air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP);
@@ -1000,6 +1140,17 @@ static bool analyzeBodyInner(
i++;
continue;
+ // dbg_var_val / dbg_var_ptr: debug variable annotations.
+ // Ported from src/Sema.zig zirDbgVar.
+ case ZIR_INST_DBG_VAR_VAL:
+ zirDbgVar(sema, block, inst, AIR_INST_DBG_VAR_VAL);
+ i++;
+ continue;
+ case ZIR_INST_DBG_VAR_PTR:
+ zirDbgVar(sema, block, inst, AIR_INST_DBG_VAR_PTR);
+ i++;
+ continue;
+
// restore_err_ret_index_fn_entry: error return trace restore.
// In ReleaseFast (no safety), this is a no-op.
// Ported from src/Sema.zig zirRestoreErrRetIndex.
@@ -1094,7 +1245,7 @@ static bool analyzeBodyInner(
continue;
}
- // Arithmetic: add, sub.
+ // Arithmetic: add, sub, mul.
case ZIR_INST_ADD:
instMapPut(&sema->inst_map, inst,
zirArithmetic(sema, block, inst, AIR_INST_ADD));
@@ -1105,6 +1256,11 @@ static bool analyzeBodyInner(
zirArithmetic(sema, block, inst, AIR_INST_SUB));
i++;
continue;
+ case ZIR_INST_MUL:
+ instMapPut(&sema->inst_map, inst,
+ zirArithmetic(sema, block, inst, AIR_INST_MUL));
+ i++;
+ continue;
// Bitwise: xor, bit_and, bit_or.
case ZIR_INST_XOR:
@@ -1130,6 +1286,44 @@ static bool analyzeBodyInner(
i++;
continue;
+ // @intCast.
+ case ZIR_INST_INT_CAST:
+ instMapPut(&sema->inst_map, inst,
+ zirTyOpCast(sema, block, inst, AIR_INST_INTCAST));
+ i++;
+ continue;
+
+ // @truncate.
+ case ZIR_INST_TRUNCATE:
+ instMapPut(&sema->inst_map, inst,
+ zirTyOpCast(sema, block, inst, AIR_INST_TRUNC));
+ i++;
+ continue;
+
+ // Shift operations.
+ case ZIR_INST_SHL:
+ instMapPut(&sema->inst_map, inst,
+ zirShl(sema, block, inst, AIR_INST_SHL));
+ i++;
+ continue;
+ case ZIR_INST_SHR:
+ instMapPut(&sema->inst_map, inst,
+ zirShl(sema, block, inst, AIR_INST_SHR));
+ i++;
+ continue;
+
+ // Shift type computation.
+ case ZIR_INST_TYPEOF_LOG2_INT_TYPE:
+ zirTypeofLog2IntType(sema, inst);
+ i++;
+ continue;
+
+ // Shift operand coercion.
+ case ZIR_INST_AS_SHIFT_OPERAND:
+ zirAsShiftOperand(sema, inst);
+ i++;
+ continue;
+
// @as.
case ZIR_INST_AS_NODE:
instMapPut(&sema->inst_map, inst,
diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig
@@ -392,6 +392,8 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_RET_ADDR, c.AIR_INST_FRAME_ADDR => .{ false, false },
// dbg_stmt: line(u32) + column(u32)
c.AIR_INST_DBG_STMT, c.AIR_INST_DBG_EMPTY_STMT => .{ false, false },
+ // pl_op: operand(Ref) + payload(u32)
+ c.AIR_INST_DBG_VAR_PTR, c.AIR_INST_DBG_VAR_VAL, c.AIR_INST_DBG_ARG_INLINE => .{ true, false },
// un_op: operand(Ref) + pad
c.AIR_INST_RET,
c.AIR_INST_RET_SAFE,
@@ -449,6 +451,11 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
c.AIR_INST_BIT_AND,
c.AIR_INST_BIT_OR,
c.AIR_INST_XOR,
+ c.AIR_INST_SHL,
+ c.AIR_INST_SHL_EXACT,
+ c.AIR_INST_SHL_SAT,
+ c.AIR_INST_SHR,
+ c.AIR_INST_SHR_EXACT,
c.AIR_INST_CMP_LT,
c.AIR_INST_CMP_LTE,
c.AIR_INST_CMP_EQ,
@@ -481,6 +488,26 @@ fn airDataRefSlots(tag_val: u8) [2]bool {
};
}
+/// Zero-pad bytes after the null terminator in a NullTerminatedString stored
+/// in the extra array. Zig's appendAirString leaves padding uninitialised;
+/// the C side zeroes it. Normalising both to zero allows comparison.
+fn normalizeNtsPadding(extra: []u32, nts_index: u32) void {
+ if (nts_index == 0 or nts_index >= extra.len) return;
+ const bytes = std.mem.sliceAsBytes(extra);
+ const byte_start = nts_index * 4;
+ // Find null terminator.
+ var i = byte_start;
+ while (i < bytes.len) : (i += 1) {
+ if (bytes[i] == 0) break;
+ }
+ // Zero-pad from null+1 to next word boundary.
+ i += 1;
+ const next_word_byte = ((i + 3) / 4) * 4;
+ while (i < next_word_byte and i < bytes.len) : (i += 1) {
+ bytes[i] = 0;
+ }
+}
+
fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !void {
if (zig_air.inst_len != c_air.inst_len) {
std.debug.print("'{s}': inst_len mismatch: zig={d} c={d}\n", .{ name, zig_air.inst_len, c_air.inst_len });
@@ -578,8 +605,39 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !
std.debug.print("'{s}': C extra is null but extra_len={d}\n", .{ name, extra_len });
return error.AirMismatch;
};
- if (!std.mem.eql(u32, zig_extra[0..extra_len], c_extra[0..extra_len])) {
+ // Make mutable copies and normalize NullTerminatedString padding.
+ // Zig's appendAirString leaves trailing bytes uninitialised (0xaa
+ // in debug); the C side zeroes them. Normalise both to zero.
+ const zig_extra_copy = try std.testing.allocator.alloc(u32, extra_len);
+ defer std.testing.allocator.free(zig_extra_copy);
+ @memcpy(zig_extra_copy, zig_extra[0..extra_len]);
+ const c_extra_copy = try std.testing.allocator.alloc(u32, extra_len);
+ defer std.testing.allocator.free(c_extra_copy);
+ @memcpy(c_extra_copy, c_extra[0..extra_len]);
+ if (inst_len > 0) {
+ const tags: [*]const u8 = cToOpt(u8, zig_air.inst_tags).?;
+ const zig_datas_raw: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, zig_air.inst_datas).?);
+ const c_datas_raw: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, c_air.inst_datas).?);
+ for (0..inst_len) |j| {
+ if (tags[j] == c.AIR_INST_DBG_VAR_VAL or
+ tags[j] == c.AIR_INST_DBG_VAR_PTR or
+ tags[j] == c.AIR_INST_DBG_ARG_INLINE)
+ {
+ // pl_op: slot 0 = operand, slot 1 = payload (NullTerminatedString)
+ const zig_nts = std.mem.readInt(u32, zig_datas_raw[j * 8 + 4 ..][0..4], .little);
+ const c_nts = std.mem.readInt(u32, c_datas_raw[j * 8 + 4 ..][0..4], .little);
+ normalizeNtsPadding(zig_extra_copy, zig_nts);
+ normalizeNtsPadding(c_extra_copy, c_nts);
+ }
+ }
+ }
+ if (!std.mem.eql(u32, zig_extra_copy, c_extra_copy)) {
std.debug.print("'{s}': extra mismatch (extra_len={d})\n", .{ name, extra_len });
+ for (0..extra_len) |ei| {
+ if (zig_extra_copy[ei] != c_extra_copy[ei]) {
+ std.debug.print(" extra[{d}]: zig=0x{x} c=0x{x}\n", .{ ei, zig_extra_copy[ei], c_extra_copy[ei] });
+ }
+ }
return error.AirMismatch;
}
}
@@ -646,6 +704,14 @@ test "sema air: as node" {
try semaAirRawCheck("export fn f(x: u32) u32 { return @as(u32, x); }");
}
+test "sema air: local const binding" {
+ try semaAirRawCheck("export fn f(x: u32) u32 { const y = x + 1; return y; }");
+}
+
+test "sema air: multiple operations" {
+ try semaAirRawCheck("export fn f(x: u32, y: u32) u32 { return (x + y) ^ 0xFF; }");
+}
+
test "sema air: neghf2 inline equivalent" {
try semaAirRawCheck(
\\export fn f(a: f16) f16 {
@@ -653,3 +719,67 @@ test "sema air: neghf2 inline equivalent" {
\\}
);
}
+
+test "sema air: mul two args" {
+ try semaAirRawCheck("export fn f(x: u32, y: u32) u32 { return x * y; }");
+}
+
+// TODO: bool and/or require block merges and conditional analysis.
+// test "sema air: bool and" {
+// try semaAirRawCheck("export fn f(x: bool, y: bool) bool { return x and y; }");
+// }
+
+test "sema air: bit shift right" {
+ try semaAirRawCheck("export fn f(x: u32) u32 { return x >> 1; }");
+}
+
+test "sema air: mul comptime int" {
+ try semaAirRawCheck("export fn f(x: u32) u32 { return x * 3; }");
+}
+
+test "sema air: chain of casts" {
+ try semaAirRawCheck(
+ \\export fn f(x: u8) u32 {
+ \\ const wide: u16 = @intCast(x);
+ \\ return @intCast(wide);
+ \\}
+ );
+}
+
+test "sema air: mixed arithmetic and bitwise" {
+ try semaAirRawCheck(
+ \\export fn f(a: u32, b: u32) u32 {
+ \\ return (a + b) & 0xFF;
+ \\}
+ );
+}
+
+test "sema air: shift and mask" {
+ try semaAirRawCheck(
+ \\export fn f(x: u32) u32 {
+ \\ return (x >> 8) & 0xFF;
+ \\}
+ );
+}
+
+test "sema air: bit shift left" {
+ try semaAirRawCheck("export fn f(x: u32) u32 { return x << 1; }");
+}
+
+test "sema air: intcast" {
+ try semaAirRawCheck("export fn f(x: u16) u32 { return @intCast(x); }");
+}
+
+test "sema air: truncate" {
+ try semaAirRawCheck("export fn f(x: u32) u16 { return @truncate(x); }");
+}
+
+test "sema air: two local bindings" {
+ try semaAirRawCheck(
+ \\export fn f(x: u32, y: u32) u32 {
+ \\ const a = x + 1;
+ \\ const b = y + 2;
+ \\ return a ^ b;
+ \\}
+ );
+}
diff --git a/stage0/stages_test.zig b/stage0/stages_test.zig
@@ -97,7 +97,7 @@ const corpus_files = .{
"../lib/std/crypto/codecs.zig", // 165
"../lib/std/os/uefi/tables/table_header.zig", // 214
"../lib/std/zig/llvm.zig", // 247
- //"../lib/compiler_rt/neghf2.zig", // 265 -- @export+func_fancy handled; body analysis incomplete
+ //"../lib/compiler_rt/neghf2.zig", // 265 -- needs decl_ref + field_call (inline fn from import)
//"../lib/compiler_rt/negxf2.zig", // 265 -- @export+func_fancy handled; body analysis incomplete
//"../lib/compiler_rt/absvdi2.zig", // 311 -- @export+func_fancy handled; body analysis incomplete
//"../lib/compiler_rt/absvsi2.zig", // 311 -- @export+func_fancy handled; body analysis incomplete