zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit f26cdb2771a4bb4d5f1d5acc446ec51c3e177f75 (tree)
parent b40b1178ef29ea9b013f73cc3cd4f9b976d0e120
Author: David Rubin <sinon@vortan.dev>
Date:   Tue, 16 Jun 2026 19:58:06 -0700

x86_64: better support for splatting bool vectors

Diffstat:
Msrc/codegen/x86_64/CodeGen.zig | 118++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mtest/behavior/x86_64/unary.zig | 1+
2 files changed, 67 insertions(+), 52 deletions(-)

diff --git a/src/codegen/x86_64/CodeGen.zig b/src/codegen/x86_64/CodeGen.zig @@ -169090,12 +169090,56 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }; try res[0].finish(inst, &.{reduce.operand}, &ops, cg); }, - .splat => |air_tag| fallback: { + .splat => |air_tag| { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - if (cg.typeOf(ty_op.operand).toIntern() == .bool_type) break :fallback try cg.airSplat(inst); var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); var res: [1]Temp = undefined; cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{ + .dst_constraints = .{ .{ .bool_vec = .qword }, .any }, + .src_constraints = .{ .bool, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .bt, .src0d, .si(0), ._, ._ }, + .{ ._, ._, .sbb, .dst0q, .dst0q, ._, ._ }, + .{ ._, ._r, .sh, .dst0q, .uia(64, .dst0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{ .any_bool_vec, .any }, + .src_constraints = .{ .bool, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .mem, .unused }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .reg = .rdi } }, + .{ .type = .u8, .kind = .{ .reg = .rax } }, + .{ .type = .u32, .kind = .{ .reg = .rcx } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .bt, .src0d, .si(0), ._, ._ }, + .{ ._, ._, .sbb, .tmp1b, .tmp1b, ._, ._ }, + .{ ._, ._, .lea, .tmp0q, .dst0b, ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .sia(1, .dst0, .add_bit_size_sub_1_div_8_down_1), ._, ._ }, + .{ ._, .@"rep _sb", .sto, ._, ._, ._, ._ }, + .{ ._, ._, .@"and", .memad(.dst0b, .add_bit_size_sub_1_div_8_down_1, 0), .ua(.dst0, .bit_size_last_byte_mask), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .sa(.dst0, .add_size_sub_bit_size_div_8_down_1_sub_1), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ ._, .@"rep _sb", .sto, ._, ._, ._, ._ }, + } }, + }, .{ .required_features = .{ .avx2, null, null, null }, .dst_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .byte } }, .any }, .src_constraints = .{ .{ .int = .byte }, .any, .any }, @@ -180803,56 +180847,6 @@ fn airMemset(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const vector_ty = self.typeOfIndex(inst); - const vector_len = vector_ty.vectorLen(zcu); - const scalar_ty = self.typeOf(ty_op.operand); - - const result: MCValue = result: { - if (scalar_ty.toIntern() != .bool_type) return self.fail("TODO implement airSplat for {f}", .{ - vector_ty.fmt(pt), - }); - const regs = - try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp); - const reg_locks = self.register_manager.lockRegsAssumeUnused(2, regs); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - - try self.genSetReg(regs[1], vector_ty, .{ .immediate = 0 }, .{}); - try self.genSetReg( - regs[1], - vector_ty, - .{ .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - vector_len) }, - .{}, - ); - const src_mcv = try self.resolveInst(ty_op.operand); - const abi_size = @max(std.math.divCeil(u32, vector_len, 8) catch unreachable, 4); - try self.asmCmovccRegisterRegister( - switch (src_mcv) { - .eflags => |cc| cc, - .register => |src_reg| cc: { - try self.asmRegisterImmediate(.{ ._, .@"test" }, src_reg.to8(), .u(1)); - break :cc .nz; - }, - else => cc: { - try self.asmMemoryImmediate( - .{ ._, .@"test" }, - try src_mcv.mem(self, .{ .size = .byte }), - .u(1), - ); - break :cc .nz; - }, - }, - registerAlias(regs[0], abi_size), - registerAlias(regs[1], abi_size), - ); - break :result .{ .register = regs[0] }; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; @@ -191236,6 +191230,9 @@ const Select = struct { unaligned_size_add_elem_size, unaligned_size_sub_elem_size, unaligned_size_sub_2_elem_size, + size_sub_bit_size_div_8_down_1_sub_1, + bit_size_sub_1_div_8_down_1, + bit_size_last_byte_mask, bit_size, src0_bit_size, @"8_size_sub_bit_size", @@ -191288,6 +191285,9 @@ const Select = struct { const add_unaligned_size_add_elem_size: Adjust = .{ .sign = .pos, .lhs = .unaligned_size_add_elem_size, .op = .mul, .rhs = .@"1" }; const add_unaligned_size_sub_elem_size: Adjust = .{ .sign = .pos, .lhs = .unaligned_size_sub_elem_size, .op = .mul, .rhs = .@"1" }; const add_unaligned_size_sub_2_elem_size: Adjust = .{ .sign = .pos, .lhs = .unaligned_size_sub_2_elem_size, .op = .mul, .rhs = .@"1" }; + const add_size_sub_bit_size_div_8_down_1_sub_1: Adjust = .{ .sign = .pos, .lhs = .size_sub_bit_size_div_8_down_1_sub_1, .op = .mul, .rhs = .@"1" }; + const add_bit_size_sub_1_div_8_down_1: Adjust = .{ .sign = .pos, .lhs = .bit_size_sub_1_div_8_down_1, .op = .mul, .rhs = .@"1" }; + const bit_size_last_byte_mask: Adjust = .{ .sign = .pos, .lhs = .bit_size_last_byte_mask, .op = .mul, .rhs = .@"1" }; const add_2_bit_size: Adjust = .{ .sign = .pos, .lhs = .bit_size, .op = .mul, .rhs = .@"2" }; const add_bit_size: Adjust = .{ .sign = .pos, .lhs = .bit_size, .op = .mul, .rhs = .@"1" }; const add_bit_size_rem_8: Adjust = .{ .sign = .pos, .lhs = .bit_size, .op = .rem_8_mul, .rhs = .@"1" }; @@ -192224,6 +192224,20 @@ const Select = struct { const ty = op.flags.base.ref.typeOf(s); break :lhs @intCast(s.cg.unalignedSize(ty) - ty.scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) * 2); }, + .size_sub_bit_size_div_8_down_1_sub_1 => { + const ty = op.flags.base.ref.typeOf(s); + const size: SignedImm = @intCast(ty.abiSize(s.cg.pt.zcu)); + const bit_size: SignedImm = @intCast(s.cg.nonBoolScalarBitSize(ty)); + break :lhs size - @divFloor(bit_size - 1, 8) - 1; + }, + .bit_size_sub_1_div_8_down_1 => { + const bit_size: SignedImm = @intCast(s.cg.nonBoolScalarBitSize(op.flags.base.ref.typeOf(s))); + break :lhs @divFloor(bit_size - 1, 8); + }, + .bit_size_last_byte_mask => { + const bit_size = s.cg.nonBoolScalarBitSize(op.flags.base.ref.typeOf(s)); + break :lhs @as(u8, std.math.maxInt(u8)) >> @intCast(7 - (bit_size - 1) % 8); + }, .bit_size => @intCast(s.cg.nonBoolScalarBitSize(op.flags.base.ref.typeOf(s))), .src0_bit_size => @intCast(s.cg.nonBoolScalarBitSize(Select.Operand.Ref.src0.typeOf(s))), .@"8_size_sub_bit_size" => { diff --git a/test/behavior/x86_64/unary.zig b/test/behavior/x86_64/unary.zig @@ -5262,6 +5262,7 @@ inline fn splat(comptime Type: type, rhs: Type) Type { } test splat { const test_splat = unary(splat, .{}); + try test_splat.testBoolVectors(); try test_splat.testIntVectors(); try test_splat.testFloatVectors(); }