x86_64: fix avx2 @truncacte
This commit is contained in:
@@ -3274,8 +3274,8 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
|
||||
try self.genCopy(dst_ty, dst_mcv, src_mcv, .{});
|
||||
break :dst dst_mcv;
|
||||
} else dst: {
|
||||
const dst_mcv = try self.allocRegOrMem(inst, true);
|
||||
try self.genCopy(dst_ty, dst_mcv, src_mcv, .{});
|
||||
const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, inst, true);
|
||||
try self.genCopy(src_ty, dst_mcv, src_mcv, .{});
|
||||
break :dst dst_mcv;
|
||||
};
|
||||
|
||||
@@ -3333,22 +3333,40 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
|
||||
else => .{ .register = try self.copyToTmpRegister(Type.usize, splat_mcv.address()) },
|
||||
};
|
||||
|
||||
const dst_reg = registerAlias(dst_mcv.getReg().?, src_abi_size);
|
||||
const dst_reg = dst_mcv.getReg().?;
|
||||
const dst_alias = registerAlias(dst_reg, src_abi_size);
|
||||
if (self.hasFeature(.avx)) {
|
||||
try self.asmRegisterRegisterMemory(
|
||||
.{ .vp_, .@"and" },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
dst_alias,
|
||||
dst_alias,
|
||||
try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)),
|
||||
);
|
||||
try self.asmRegisterRegisterRegister(mir_tag, dst_reg, dst_reg, dst_reg);
|
||||
if (src_abi_size > 16) {
|
||||
const temp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse);
|
||||
const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
|
||||
defer self.register_manager.unlockReg(temp_lock);
|
||||
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.{ if (self.hasFeature(.avx2)) .v_i128 else .v_f128, .extract },
|
||||
registerAlias(temp_reg, dst_abi_size),
|
||||
dst_alias,
|
||||
Immediate.u(1),
|
||||
);
|
||||
try self.asmRegisterRegisterRegister(
|
||||
mir_tag,
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
registerAlias(temp_reg, dst_abi_size),
|
||||
);
|
||||
} else try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, dst_alias);
|
||||
} else {
|
||||
try self.asmRegisterMemory(
|
||||
.{ .p_, .@"and" },
|
||||
dst_reg,
|
||||
dst_alias,
|
||||
try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)),
|
||||
);
|
||||
try self.asmRegisterRegister(mir_tag, dst_reg, dst_reg);
|
||||
try self.asmRegisterRegister(mir_tag, dst_alias, dst_alias);
|
||||
}
|
||||
break :result dst_mcv;
|
||||
}
|
||||
@@ -16404,7 +16422,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
|
||||
},
|
||||
65...128 => switch (vector_len) {
|
||||
else => null,
|
||||
1...2 => .{ .vp_i128, .broadcast },
|
||||
1...2 => .{ .v_i128, .broadcast },
|
||||
},
|
||||
}) orelse break :avx2;
|
||||
|
||||
@@ -16418,7 +16436,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
|
||||
registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod))),
|
||||
try src_mcv.mem(self, self.memSize(scalar_ty)),
|
||||
) else {
|
||||
if (mir_tag[0] == .vp_i128) break :avx2;
|
||||
if (mir_tag[0] == .v_i128) break :avx2;
|
||||
try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{});
|
||||
try self.asmRegisterRegister(
|
||||
mir_tag,
|
||||
|
||||
@@ -415,7 +415,8 @@ pub const Mnemonic = enum {
|
||||
vfmadd132sd, vfmadd213sd, vfmadd231sd,
|
||||
vfmadd132ss, vfmadd213ss, vfmadd231ss,
|
||||
// AVX2
|
||||
vpbroadcastb, vpbroadcastd, vpbroadcasti128, vpbroadcastq, vpbroadcastw,
|
||||
vbroadcasti128, vpbroadcastb, vpbroadcastd, vpbroadcastq, vpbroadcastw,
|
||||
vextracti128, vinserti128,
|
||||
// zig fmt: on
|
||||
};
|
||||
|
||||
|
||||
@@ -230,6 +230,8 @@ pub const Inst = struct {
|
||||
v_d,
|
||||
/// VEX-Encoded ___ QuadWord
|
||||
v_q,
|
||||
/// VEX-Encoded ___ Integer Data
|
||||
v_i128,
|
||||
/// VEX-Encoded Packed ___
|
||||
vp_,
|
||||
/// VEX-Encoded Packed ___ Byte
|
||||
@@ -242,8 +244,6 @@ pub const Inst = struct {
|
||||
vp_q,
|
||||
/// VEX-Encoded Packed ___ Double Quadword
|
||||
vp_dq,
|
||||
/// VEX-Encoded Packed ___ Integer Data
|
||||
vp_i128,
|
||||
/// VEX-Encoded ___ Scalar Single-Precision Values
|
||||
v_ss,
|
||||
/// VEX-Encoded ___ Packed Single-Precision Values
|
||||
@@ -654,6 +654,7 @@ pub const Inst = struct {
|
||||
/// Variable blend scalar double-precision floating-point values
|
||||
blendv,
|
||||
/// Extract packed floating-point values
|
||||
/// Extract packed integer values
|
||||
extract,
|
||||
/// Insert scalar single-precision floating-point value
|
||||
/// Insert packed floating-point values
|
||||
@@ -696,6 +697,7 @@ pub const Inst = struct {
|
||||
sha256rnds2,
|
||||
|
||||
/// Load with broadcast floating-point data
|
||||
/// Load integer and broadcast
|
||||
broadcast,
|
||||
|
||||
/// Convert 16-bit floating-point values to single-precision floating-point values
|
||||
|
||||
@@ -1769,6 +1769,10 @@ pub const table = [_]Entry{
|
||||
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
|
||||
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
|
||||
|
||||
.{ .vextracti128, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x39 }, 0, .vex_256_w0, .avx2 },
|
||||
|
||||
.{ .vinserti128, .rvmi, &.{ .ymm, .ymm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x38 }, 0, .vex_256_w0, .avx2 },
|
||||
|
||||
.{ .vpabsb, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpabsd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpabsw, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_256_wig, .avx2 },
|
||||
@@ -1809,7 +1813,7 @@ pub const table = [_]Entry{
|
||||
.{ .vpbroadcastd, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x58 }, 0, .vex_256_w0, .avx2 },
|
||||
.{ .vpbroadcastq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_128_w0, .avx2 },
|
||||
.{ .vpbroadcastq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_256_w0, .avx2 },
|
||||
.{ .vpbroadcasti128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x5a }, 0, .vex_256_w0, .avx2 },
|
||||
.{ .vbroadcasti128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x5a }, 0, .vex_256_w0, .avx2 },
|
||||
|
||||
.{ .vpcmpeqb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpcmpeqw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x75 }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
Reference in New Issue
Block a user