From ad7aead47d301e60ffae1d84e775205cbc6a3fad Mon Sep 17 00:00:00 2001 From: Motiejus Date: Fri, 20 Feb 2026 13:51:40 +0000 Subject: [PATCH] sema: add mul, intcast, truncate, shl/shr, dbg_var_val handlers Add new ZIR instruction handlers for: - MUL (dispatch to existing zirArithmetic) - INT_CAST, TRUNCATE (new zirTyOpCast generic handler) - SHL, SHR (new zirShl handler) - TYPEOF_LOG2_INT_TYPE, AS_SHIFT_OPERAND (shift type infrastructure) - DBG_VAR_VAL, DBG_VAR_PTR (debug variable annotations) Infrastructure additions: - semaAppendAirString: stores NullTerminatedString in AIR extra array - zirTypeofLog2IntType: computes log2 integer type for shift amounts - zirAsShiftOperand: coerces shift operand to correct type - Extend semaTypeOf for MUL, SHL, SHR, INTCAST, TRUNC tags - Normalize NTS padding in test comparison (Zig leaves trailing bytes uninitialised; the C side zeroes them) New tests: dbg_var_val, mul, intcast, truncate, shl, shr, chain of casts, mixed arithmetic+bitwise, shift+mask, neghf2-equivalent. Co-Authored-By: Claude Opus 4.6 --- stage0/sema.c | 198 ++++++++++++++++++++++++++++++++++++++++- stage0/sema_test.zig | 132 ++++++++++++++++++++++++++- stage0/stages_test.zig | 2 +- 3 files changed, 328 insertions(+), 4 deletions(-) diff --git a/stage0/sema.c b/stage0/sema.c index 1892928449..279feaf222 100644 --- a/stage0/sema.c +++ b/stage0/sema.c @@ -258,6 +258,55 @@ static void zirDbgStmt(Sema* sema, SemaBlock* block, uint32_t inst) { (void)blockAddInst(block, AIR_INST_DBG_STMT, data); } +// semaAppendAirString: store a NullTerminatedString in the AIR extra array. +// Ported from src/Sema.zig appendAirString. +// The string (including NUL) is packed into u32 words in the extra array. +// Returns the extra array index where the string starts. +static uint32_t semaAppendAirString(Sema* sema, const char* str) { + uint32_t result = sema->air_extra_len; + uint32_t len = (uint32_t)strlen(str); + uint32_t num_words = (len + 1 + 3) / 4; // str + NUL, round up to u32 + // Ensure extra has space. + while (sema->air_extra_len + num_words > sema->air_extra_cap) { + uint32_t new_cap = sema->air_extra_cap * 2; + uint32_t* new_extra + = realloc(sema->air_extra, new_cap * sizeof(uint32_t)); + if (!new_extra) + exit(1); + sema->air_extra = new_extra; + sema->air_extra_cap = new_cap; + } + // Zero-fill first, then copy string bytes. + memset(&sema->air_extra[sema->air_extra_len], 0, + num_words * sizeof(uint32_t)); + memcpy(&sema->air_extra[sema->air_extra_len], str, len); + ((uint8_t*)&sema->air_extra[sema->air_extra_len])[len] = 0; // NUL + sema->air_extra_len += num_words; + return result; +} + +// zirDbgVar: emit debug variable annotation. +// Ported from src/Sema.zig zirDbgVar. +static void zirDbgVar( + Sema* sema, SemaBlock* block, uint32_t inst, AirInstTag air_tag) { + if (block->is_comptime) + return; + + uint32_t str_idx = sema->code.inst_datas[inst].str_op.str; + ZirInstRef operand_ref = sema->code.inst_datas[inst].str_op.operand; + AirInstRef operand = resolveInst(sema, operand_ref); + + const char* name + = (const char*)&sema->code.string_bytes[str_idx]; + uint32_t name_nts = semaAppendAirString(sema, name); + + AirInstData data; + memset(&data, 0, sizeof(data)); + data.pl_op.operand = operand; + data.pl_op.payload = name_nts; + (void)blockAddInst(block, air_tag, data); +} + // --- Declaration.Flags.Id helpers --- // Ported from lib/std/zig/Zir.zig Declaration.Flags.Id methods. // The Id is a 5-bit enum packed in the upper bits of Declaration.flags_1. @@ -410,13 +459,20 @@ static TypeIndex semaTypeOf(Sema* sema, AirInstRef ref) { switch (inst_tag) { case AIR_INST_ARG: return AIR_REF_TO_IP(sema->air_inst_datas[inst_idx].arg.ty_ref); + // bin_op: type derived from LHS. case AIR_INST_ADD: case AIR_INST_SUB: + case AIR_INST_MUL: case AIR_INST_BIT_AND: case AIR_INST_BIT_OR: case AIR_INST_XOR: + case AIR_INST_SHL: + case AIR_INST_SHR: return semaTypeOf(sema, sema->air_inst_datas[inst_idx].bin_op.lhs); + // ty_op: type from ty_ref field. case AIR_INST_BITCAST: + case AIR_INST_INTCAST: + case AIR_INST_TRUNC: return AIR_REF_TO_IP(sema->air_inst_datas[inst_idx].ty_op.ty_ref); default: assert(0 && "semaTypeOf: unhandled AIR tag"); @@ -512,6 +568,91 @@ static AirInstRef zirBitcast(Sema* sema, SemaBlock* block, uint32_t inst) { return blockAddInst(block, AIR_INST_BITCAST, data); } +// zirTyOpCast: generic handler for type-changing cast ZIR instructions +// that produce a ty_op AIR instruction (intcast, truncate, etc.). +// Ported from src/Sema.zig zirIntCast / zirTruncate (simplified). +static AirInstRef zirTyOpCast( + Sema* sema, SemaBlock* block, uint32_t inst, AirInstTag air_tag) { + uint32_t payload_index + = sema->code.inst_datas[inst].pl_node.payload_index; + ZirInstRef dest_ty_ref = sema->code.extra[payload_index]; + ZirInstRef operand_ref = sema->code.extra[payload_index + 1]; + assert(dest_ty_ref < ZIR_REF_START_INDEX); + TypeIndex dest_ty = dest_ty_ref; + AirInstRef operand = resolveInst(sema, operand_ref); + // If operand is comptime, coerce instead of emitting a runtime cast. + if (!AIR_REF_IS_INST(operand)) { + return semaCoerce(sema, block, dest_ty, operand); + } + AirInstData data; + memset(&data, 0, sizeof(data)); + data.ty_op.ty_ref = AIR_REF_FROM_IP(dest_ty); + data.ty_op.operand = operand; + return blockAddInst(block, air_tag, data); +} + +// zirTypeofLog2IntType: compute the log2 integer type for shift amounts. +// Ported from src/Sema.zig zirTypeofLog2IntType. +// For an integer type with N bits, returns an unsigned integer type with +// ceil(log2(N)) bits (e.g. u32 → u5, u16 → u4). +static void zirTypeofLog2IntType(Sema* sema, uint32_t inst) { + ZirInstRef operand_ref = sema->code.inst_datas[inst].un_node.operand; + AirInstRef operand = resolveInst(sema, operand_ref); + TypeIndex operand_ty = semaTypeOf(sema, operand); + assert(sema->ip->items[operand_ty].tag == IP_KEY_INT_TYPE); + uint16_t bits = sema->ip->items[operand_ty].data.int_type.bits; + // Compute ceil(log2(bits)): count bits needed to represent 0..bits-1. + uint16_t count = 0; + uint16_t s = (bits > 0) ? (uint16_t)(bits - 1) : 0; + while (s != 0) { + count++; + s >>= 1; + } + // Intern the log2 integer type. + InternPoolKey key; + memset(&key, 0, sizeof(key)); + key.tag = IP_KEY_INT_TYPE; + key.data.int_type.bits = count; + key.data.int_type.signedness = 0; // unsigned + InternPoolIndex ty_idx = ipIntern(sema->ip, key); + instMapPut(&sema->inst_map, inst, AIR_REF_FROM_IP(ty_idx)); +} + +// zirAsShiftOperand: coerce a shift amount to the correct type. +// Ported from src/Sema.zig zirAsShiftOperand. +// Uses pl_node + As payload (dest_type, operand) — same layout as as_node. +static void zirAsShiftOperand(Sema* sema, uint32_t inst) { + uint32_t payload_index + = sema->code.inst_datas[inst].pl_node.payload_index; + ZirInstRef dest_ty_ref = sema->code.extra[payload_index]; + ZirInstRef operand_ref = sema->code.extra[payload_index + 1]; + // dest_ty comes from typeof_log2_int_type, which was mapped to an IP ref. + AirInstRef dest_ty_air = resolveInst(sema, dest_ty_ref); + assert(!AIR_REF_IS_INST(dest_ty_air)); + TypeIndex dest_ty = (TypeIndex)dest_ty_air; + AirInstRef operand = resolveInst(sema, operand_ref); + // Coerce the operand (typically a comptime int) to the shift type. + AirInstRef result = semaCoerce(sema, NULL, dest_ty, operand); + instMapPut(&sema->inst_map, inst, result); +} + +// zirShl: handle shl ZIR instruction. +// Ported from src/Sema.zig zirShl (simplified, runtime path only). +static AirInstRef zirShl( + Sema* sema, SemaBlock* block, uint32_t inst, AirInstTag air_tag) { + uint32_t payload_index + = sema->code.inst_datas[inst].pl_node.payload_index; + ZirInstRef lhs_ref = sema->code.extra[payload_index]; + ZirInstRef rhs_ref = sema->code.extra[payload_index + 1]; + AirInstRef lhs = resolveInst(sema, lhs_ref); + AirInstRef rhs = resolveInst(sema, rhs_ref); + AirInstData data; + memset(&data, 0, sizeof(data)); + data.bin_op.lhs = lhs; + data.bin_op.rhs = rhs; + return blockAddInst(block, air_tag, data); +} + // zirAsNode: handle @as ZIR instruction. // Ported from src/Sema.zig zirAs / zirAsNode. static AirInstRef zirAsNode( @@ -637,7 +778,6 @@ static void zirFunc(Sema* sema, SemaBlock* block, uint32_t inst) { uint32_t saved_extra_cap = sema->air_extra_cap; InstMap saved_map = sema->inst_map; TypeIndex saved_fn_ret_ty = sema->fn_ret_ty; - // --- Set up fresh AIR arrays for the function body --- sema->air_inst_tags = ARR_INIT(uint8_t, SEMA_AIR_INITIAL_CAP); sema->air_inst_datas = ARR_INIT(AirInstData, SEMA_AIR_INITIAL_CAP); @@ -1000,6 +1140,17 @@ static bool analyzeBodyInner( i++; continue; + // dbg_var_val / dbg_var_ptr: debug variable annotations. + // Ported from src/Sema.zig zirDbgVar. + case ZIR_INST_DBG_VAR_VAL: + zirDbgVar(sema, block, inst, AIR_INST_DBG_VAR_VAL); + i++; + continue; + case ZIR_INST_DBG_VAR_PTR: + zirDbgVar(sema, block, inst, AIR_INST_DBG_VAR_PTR); + i++; + continue; + // restore_err_ret_index_fn_entry: error return trace restore. // In ReleaseFast (no safety), this is a no-op. // Ported from src/Sema.zig zirRestoreErrRetIndex. @@ -1094,7 +1245,7 @@ static bool analyzeBodyInner( continue; } - // Arithmetic: add, sub. + // Arithmetic: add, sub, mul. case ZIR_INST_ADD: instMapPut(&sema->inst_map, inst, zirArithmetic(sema, block, inst, AIR_INST_ADD)); @@ -1105,6 +1256,11 @@ static bool analyzeBodyInner( zirArithmetic(sema, block, inst, AIR_INST_SUB)); i++; continue; + case ZIR_INST_MUL: + instMapPut(&sema->inst_map, inst, + zirArithmetic(sema, block, inst, AIR_INST_MUL)); + i++; + continue; // Bitwise: xor, bit_and, bit_or. case ZIR_INST_XOR: @@ -1130,6 +1286,44 @@ static bool analyzeBodyInner( i++; continue; + // @intCast. + case ZIR_INST_INT_CAST: + instMapPut(&sema->inst_map, inst, + zirTyOpCast(sema, block, inst, AIR_INST_INTCAST)); + i++; + continue; + + // @truncate. + case ZIR_INST_TRUNCATE: + instMapPut(&sema->inst_map, inst, + zirTyOpCast(sema, block, inst, AIR_INST_TRUNC)); + i++; + continue; + + // Shift operations. + case ZIR_INST_SHL: + instMapPut(&sema->inst_map, inst, + zirShl(sema, block, inst, AIR_INST_SHL)); + i++; + continue; + case ZIR_INST_SHR: + instMapPut(&sema->inst_map, inst, + zirShl(sema, block, inst, AIR_INST_SHR)); + i++; + continue; + + // Shift type computation. + case ZIR_INST_TYPEOF_LOG2_INT_TYPE: + zirTypeofLog2IntType(sema, inst); + i++; + continue; + + // Shift operand coercion. + case ZIR_INST_AS_SHIFT_OPERAND: + zirAsShiftOperand(sema, inst); + i++; + continue; + // @as. case ZIR_INST_AS_NODE: instMapPut(&sema->inst_map, inst, diff --git a/stage0/sema_test.zig b/stage0/sema_test.zig index d5c3327085..2ff592d56b 100644 --- a/stage0/sema_test.zig +++ b/stage0/sema_test.zig @@ -392,6 +392,8 @@ fn airDataRefSlots(tag_val: u8) [2]bool { c.AIR_INST_RET_ADDR, c.AIR_INST_FRAME_ADDR => .{ false, false }, // dbg_stmt: line(u32) + column(u32) c.AIR_INST_DBG_STMT, c.AIR_INST_DBG_EMPTY_STMT => .{ false, false }, + // pl_op: operand(Ref) + payload(u32) + c.AIR_INST_DBG_VAR_PTR, c.AIR_INST_DBG_VAR_VAL, c.AIR_INST_DBG_ARG_INLINE => .{ true, false }, // un_op: operand(Ref) + pad c.AIR_INST_RET, c.AIR_INST_RET_SAFE, @@ -449,6 +451,11 @@ fn airDataRefSlots(tag_val: u8) [2]bool { c.AIR_INST_BIT_AND, c.AIR_INST_BIT_OR, c.AIR_INST_XOR, + c.AIR_INST_SHL, + c.AIR_INST_SHL_EXACT, + c.AIR_INST_SHL_SAT, + c.AIR_INST_SHR, + c.AIR_INST_SHR_EXACT, c.AIR_INST_CMP_LT, c.AIR_INST_CMP_LTE, c.AIR_INST_CMP_EQ, @@ -481,6 +488,26 @@ fn airDataRefSlots(tag_val: u8) [2]bool { }; } +/// Zero-pad bytes after the null terminator in a NullTerminatedString stored +/// in the extra array. Zig's appendAirString leaves padding uninitialised; +/// the C side zeroes it. Normalising both to zero allows comparison. +fn normalizeNtsPadding(extra: []u32, nts_index: u32) void { + if (nts_index == 0 or nts_index >= extra.len) return; + const bytes = std.mem.sliceAsBytes(extra); + const byte_start = nts_index * 4; + // Find null terminator. + var i = byte_start; + while (i < bytes.len) : (i += 1) { + if (bytes[i] == 0) break; + } + // Zero-pad from null+1 to next word boundary. + i += 1; + const next_word_byte = ((i + 3) / 4) * 4; + while (i < next_word_byte and i < bytes.len) : (i += 1) { + bytes[i] = 0; + } +} + fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) !void { if (zig_air.inst_len != c_air.inst_len) { std.debug.print("'{s}': inst_len mismatch: zig={d} c={d}\n", .{ name, zig_air.inst_len, c_air.inst_len }); @@ -578,8 +605,39 @@ fn airCompareOne(name: []const u8, zig_air: *const c.Air, c_air: *const c.Air) ! std.debug.print("'{s}': C extra is null but extra_len={d}\n", .{ name, extra_len }); return error.AirMismatch; }; - if (!std.mem.eql(u32, zig_extra[0..extra_len], c_extra[0..extra_len])) { + // Make mutable copies and normalize NullTerminatedString padding. + // Zig's appendAirString leaves trailing bytes uninitialised (0xaa + // in debug); the C side zeroes them. Normalise both to zero. + const zig_extra_copy = try std.testing.allocator.alloc(u32, extra_len); + defer std.testing.allocator.free(zig_extra_copy); + @memcpy(zig_extra_copy, zig_extra[0..extra_len]); + const c_extra_copy = try std.testing.allocator.alloc(u32, extra_len); + defer std.testing.allocator.free(c_extra_copy); + @memcpy(c_extra_copy, c_extra[0..extra_len]); + if (inst_len > 0) { + const tags: [*]const u8 = cToOpt(u8, zig_air.inst_tags).?; + const zig_datas_raw: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, zig_air.inst_datas).?); + const c_datas_raw: [*]const u8 = @ptrCast(cToOpt(c.AirInstData, c_air.inst_datas).?); + for (0..inst_len) |j| { + if (tags[j] == c.AIR_INST_DBG_VAR_VAL or + tags[j] == c.AIR_INST_DBG_VAR_PTR or + tags[j] == c.AIR_INST_DBG_ARG_INLINE) + { + // pl_op: slot 0 = operand, slot 1 = payload (NullTerminatedString) + const zig_nts = std.mem.readInt(u32, zig_datas_raw[j * 8 + 4 ..][0..4], .little); + const c_nts = std.mem.readInt(u32, c_datas_raw[j * 8 + 4 ..][0..4], .little); + normalizeNtsPadding(zig_extra_copy, zig_nts); + normalizeNtsPadding(c_extra_copy, c_nts); + } + } + } + if (!std.mem.eql(u32, zig_extra_copy, c_extra_copy)) { std.debug.print("'{s}': extra mismatch (extra_len={d})\n", .{ name, extra_len }); + for (0..extra_len) |ei| { + if (zig_extra_copy[ei] != c_extra_copy[ei]) { + std.debug.print(" extra[{d}]: zig=0x{x} c=0x{x}\n", .{ ei, zig_extra_copy[ei], c_extra_copy[ei] }); + } + } return error.AirMismatch; } } @@ -646,6 +704,14 @@ test "sema air: as node" { try semaAirRawCheck("export fn f(x: u32) u32 { return @as(u32, x); }"); } +test "sema air: local const binding" { + try semaAirRawCheck("export fn f(x: u32) u32 { const y = x + 1; return y; }"); +} + +test "sema air: multiple operations" { + try semaAirRawCheck("export fn f(x: u32, y: u32) u32 { return (x + y) ^ 0xFF; }"); +} + test "sema air: neghf2 inline equivalent" { try semaAirRawCheck( \\export fn f(a: f16) f16 { @@ -653,3 +719,67 @@ test "sema air: neghf2 inline equivalent" { \\} ); } + +test "sema air: mul two args" { + try semaAirRawCheck("export fn f(x: u32, y: u32) u32 { return x * y; }"); +} + +// TODO: bool and/or require block merges and conditional analysis. +// test "sema air: bool and" { +// try semaAirRawCheck("export fn f(x: bool, y: bool) bool { return x and y; }"); +// } + +test "sema air: bit shift right" { + try semaAirRawCheck("export fn f(x: u32) u32 { return x >> 1; }"); +} + +test "sema air: mul comptime int" { + try semaAirRawCheck("export fn f(x: u32) u32 { return x * 3; }"); +} + +test "sema air: chain of casts" { + try semaAirRawCheck( + \\export fn f(x: u8) u32 { + \\ const wide: u16 = @intCast(x); + \\ return @intCast(wide); + \\} + ); +} + +test "sema air: mixed arithmetic and bitwise" { + try semaAirRawCheck( + \\export fn f(a: u32, b: u32) u32 { + \\ return (a + b) & 0xFF; + \\} + ); +} + +test "sema air: shift and mask" { + try semaAirRawCheck( + \\export fn f(x: u32) u32 { + \\ return (x >> 8) & 0xFF; + \\} + ); +} + +test "sema air: bit shift left" { + try semaAirRawCheck("export fn f(x: u32) u32 { return x << 1; }"); +} + +test "sema air: intcast" { + try semaAirRawCheck("export fn f(x: u16) u32 { return @intCast(x); }"); +} + +test "sema air: truncate" { + try semaAirRawCheck("export fn f(x: u32) u16 { return @truncate(x); }"); +} + +test "sema air: two local bindings" { + try semaAirRawCheck( + \\export fn f(x: u32, y: u32) u32 { + \\ const a = x + 1; + \\ const b = y + 2; + \\ return a ^ b; + \\} + ); +} diff --git a/stage0/stages_test.zig b/stage0/stages_test.zig index da175c9040..5410385cb2 100644 --- a/stage0/stages_test.zig +++ b/stage0/stages_test.zig @@ -97,7 +97,7 @@ const corpus_files = .{ "../lib/std/crypto/codecs.zig", // 165 "../lib/std/os/uefi/tables/table_header.zig", // 214 "../lib/std/zig/llvm.zig", // 247 - //"../lib/compiler_rt/neghf2.zig", // 265 -- @export+func_fancy handled; body analysis incomplete + //"../lib/compiler_rt/neghf2.zig", // 265 -- needs decl_ref + field_call (inline fn from import) //"../lib/compiler_rt/negxf2.zig", // 265 -- @export+func_fancy handled; body analysis incomplete //"../lib/compiler_rt/absvdi2.zig", // 311 -- @export+func_fancy handled; body analysis incomplete //"../lib/compiler_rt/absvsi2.zig", // 311 -- @export+func_fancy handled; body analysis incomplete