x86_64: implement integer vector min/max
This commit is contained in:
@@ -6534,6 +6534,34 @@ fn genBinOp(
|
||||
.bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
|
||||
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
|
||||
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
|
||||
.min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
|
||||
.signed => if (self.hasFeature(.avx))
|
||||
.{ .vp_b, .mins }
|
||||
else if (self.hasFeature(.sse4_1))
|
||||
.{ .p_b, .mins }
|
||||
else
|
||||
null,
|
||||
.unsigned => if (self.hasFeature(.avx))
|
||||
.{ .vp_b, .minu }
|
||||
else if (self.hasFeature(.sse4_1))
|
||||
.{ .p_b, .minu }
|
||||
else
|
||||
null,
|
||||
},
|
||||
.max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
|
||||
.signed => if (self.hasFeature(.avx))
|
||||
.{ .vp_b, .maxs }
|
||||
else if (self.hasFeature(.sse4_1))
|
||||
.{ .p_b, .maxs }
|
||||
else
|
||||
null,
|
||||
.unsigned => if (self.hasFeature(.avx))
|
||||
.{ .vp_b, .maxu }
|
||||
else if (self.hasFeature(.sse4_1))
|
||||
.{ .p_b, .maxu }
|
||||
else
|
||||
null,
|
||||
},
|
||||
else => null,
|
||||
},
|
||||
17...32 => switch (air_tag) {
|
||||
@@ -6546,6 +6574,14 @@ fn genBinOp(
|
||||
.bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
|
||||
.bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
|
||||
.xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
|
||||
.min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
|
||||
.signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null,
|
||||
.unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null,
|
||||
},
|
||||
.max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
|
||||
.signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null,
|
||||
.unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null,
|
||||
},
|
||||
else => null,
|
||||
},
|
||||
else => null,
|
||||
@@ -6564,6 +6600,26 @@ fn genBinOp(
|
||||
.bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
|
||||
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
|
||||
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
|
||||
.min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
|
||||
.signed => if (self.hasFeature(.avx))
|
||||
.{ .vp_w, .mins }
|
||||
else
|
||||
.{ .p_w, .mins },
|
||||
.unsigned => if (self.hasFeature(.avx))
|
||||
.{ .vp_w, .minu }
|
||||
else
|
||||
.{ .p_w, .minu },
|
||||
},
|
||||
.max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
|
||||
.signed => if (self.hasFeature(.avx))
|
||||
.{ .vp_w, .maxs }
|
||||
else
|
||||
.{ .p_w, .maxs },
|
||||
.unsigned => if (self.hasFeature(.avx))
|
||||
.{ .vp_w, .maxu }
|
||||
else
|
||||
.{ .p_w, .maxu },
|
||||
},
|
||||
else => null,
|
||||
},
|
||||
9...16 => switch (air_tag) {
|
||||
@@ -6579,6 +6635,14 @@ fn genBinOp(
|
||||
.bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
|
||||
.bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
|
||||
.xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
|
||||
.min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
|
||||
.signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null,
|
||||
.unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null,
|
||||
},
|
||||
.max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
|
||||
.signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null,
|
||||
.unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null,
|
||||
},
|
||||
else => null,
|
||||
},
|
||||
else => null,
|
||||
@@ -6602,6 +6666,34 @@ fn genBinOp(
|
||||
.bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
|
||||
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
|
||||
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
|
||||
.min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
|
||||
.signed => if (self.hasFeature(.avx))
|
||||
.{ .vp_d, .mins }
|
||||
else if (self.hasFeature(.sse4_1))
|
||||
.{ .p_d, .mins }
|
||||
else
|
||||
null,
|
||||
.unsigned => if (self.hasFeature(.avx))
|
||||
.{ .vp_d, .minu }
|
||||
else if (self.hasFeature(.sse4_1))
|
||||
.{ .p_d, .minu }
|
||||
else
|
||||
null,
|
||||
},
|
||||
.max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
|
||||
.signed => if (self.hasFeature(.avx))
|
||||
.{ .vp_d, .maxs }
|
||||
else if (self.hasFeature(.sse4_1))
|
||||
.{ .p_d, .maxs }
|
||||
else
|
||||
null,
|
||||
.unsigned => if (self.hasFeature(.avx))
|
||||
.{ .vp_d, .maxu }
|
||||
else if (self.hasFeature(.sse4_1))
|
||||
.{ .p_d, .maxu }
|
||||
else
|
||||
null,
|
||||
},
|
||||
else => null,
|
||||
},
|
||||
5...8 => switch (air_tag) {
|
||||
@@ -6617,6 +6709,14 @@ fn genBinOp(
|
||||
.bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
|
||||
.bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
|
||||
.xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
|
||||
.min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
|
||||
.signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null,
|
||||
.unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null,
|
||||
},
|
||||
.max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
|
||||
.signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null,
|
||||
.unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null,
|
||||
},
|
||||
else => null,
|
||||
},
|
||||
else => null,
|
||||
|
||||
@@ -280,6 +280,7 @@ pub const Mnemonic = enum {
|
||||
mulps, mulss,
|
||||
orps,
|
||||
pextrw, pinsrw,
|
||||
pmaxsw, pmaxub, pminsw, pminub,
|
||||
shufps,
|
||||
sqrtps, sqrtss,
|
||||
subps, subss,
|
||||
@@ -318,6 +319,7 @@ pub const Mnemonic = enum {
|
||||
insertps,
|
||||
pextrb, pextrd, pextrq,
|
||||
pinsrb, pinsrd, pinsrq,
|
||||
pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw,
|
||||
pmulld,
|
||||
roundpd, roundps, roundsd, roundss,
|
||||
// AVX
|
||||
@@ -349,6 +351,8 @@ pub const Mnemonic = enum {
|
||||
vpand, vpandn,
|
||||
vpextrb, vpextrd, vpextrq, vpextrw,
|
||||
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
|
||||
vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw,
|
||||
vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw,
|
||||
vpmulhw, vpmulld, vpmullw,
|
||||
vpor,
|
||||
vpshufhw, vpshuflw,
|
||||
|
||||
@@ -453,6 +453,14 @@ pub const Inst = struct {
|
||||
/// Bitwise logical and not of packed single-precision floating-point values
|
||||
/// Bitwise logical and not of packed double-precision floating-point values
|
||||
andn,
|
||||
/// Maximum of packed signed integers
|
||||
maxs,
|
||||
/// Maximum of packed unsigned integers
|
||||
maxu,
|
||||
/// Minimum of packed signed integers
|
||||
mins,
|
||||
/// Minimum of packed unsigned integers
|
||||
minu,
|
||||
/// Multiply packed signed integers and store low result
|
||||
mull,
|
||||
/// Multiply packed signed integers and store high result
|
||||
|
||||
@@ -1011,6 +1011,14 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pmaxsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pmaxub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pminsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pminub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pmulhw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pmullw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 },
|
||||
@@ -1091,6 +1099,20 @@ pub const table = [_]Entry{
|
||||
.{ .pinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 },
|
||||
.{ .pinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 },
|
||||
|
||||
.{ .pmaxsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .none, .sse4_1 },
|
||||
.{ .pmaxsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .pmaxuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .pmaxud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .pminsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .none, .sse4_1 },
|
||||
.{ .pminsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .pminuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .pminud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 },
|
||||
|
||||
.{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 },
|
||||
@@ -1318,6 +1340,24 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmaxsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpmaxsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpmaxsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmaxub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpmaxuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmaxud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpminsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpminsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpminsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpminub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpminuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpminud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx },
|
||||
@@ -1449,6 +1489,24 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx },
|
||||
.{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_256_wig, .avx },
|
||||
.{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpmaxub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_256_wig, .avx },
|
||||
.{ .vpmaxuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpmaxud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpminsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_256_wig, .avx },
|
||||
.{ .vpminsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_256_wig, .avx },
|
||||
.{ .vpminsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpminub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_256_wig, .avx },
|
||||
.{ .vpminuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpminud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
Reference in New Issue
Block a user