x86_64: implement integer vector min/max

This commit is contained in:
Jacob Young
2023-05-15 02:55:41 -04:00
parent 40457a3696
commit cea9ac772a
4 changed files with 170 additions and 0 deletions

View File

@@ -6534,6 +6534,34 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
.min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
.signed => if (self.hasFeature(.avx))
.{ .vp_b, .mins }
else if (self.hasFeature(.sse4_1))
.{ .p_b, .mins }
else
null,
.unsigned => if (self.hasFeature(.avx))
.{ .vp_b, .minu }
else if (self.hasFeature(.sse4_1))
.{ .p_b, .minu }
else
null,
},
.max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
.signed => if (self.hasFeature(.avx))
.{ .vp_b, .maxs }
else if (self.hasFeature(.sse4_1))
.{ .p_b, .maxs }
else
null,
.unsigned => if (self.hasFeature(.avx))
.{ .vp_b, .maxu }
else if (self.hasFeature(.sse4_1))
.{ .p_b, .maxu }
else
null,
},
else => null,
},
17...32 => switch (air_tag) {
@@ -6546,6 +6574,14 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
.bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
.xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
.min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
.signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null,
.unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null,
},
.max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
.signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null,
.unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null,
},
else => null,
},
else => null,
@@ -6564,6 +6600,26 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
.min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
.signed => if (self.hasFeature(.avx))
.{ .vp_w, .mins }
else
.{ .p_w, .mins },
.unsigned => if (self.hasFeature(.avx))
.{ .vp_w, .minu }
else
.{ .p_w, .minu },
},
.max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
.signed => if (self.hasFeature(.avx))
.{ .vp_w, .maxs }
else
.{ .p_w, .maxs },
.unsigned => if (self.hasFeature(.avx))
.{ .vp_w, .maxu }
else
.{ .p_w, .maxu },
},
else => null,
},
9...16 => switch (air_tag) {
@@ -6579,6 +6635,14 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
.bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
.xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
.min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
.signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null,
.unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null,
},
.max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
.signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null,
.unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null,
},
else => null,
},
else => null,
@@ -6602,6 +6666,34 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
.min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
.signed => if (self.hasFeature(.avx))
.{ .vp_d, .mins }
else if (self.hasFeature(.sse4_1))
.{ .p_d, .mins }
else
null,
.unsigned => if (self.hasFeature(.avx))
.{ .vp_d, .minu }
else if (self.hasFeature(.sse4_1))
.{ .p_d, .minu }
else
null,
},
.max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
.signed => if (self.hasFeature(.avx))
.{ .vp_d, .maxs }
else if (self.hasFeature(.sse4_1))
.{ .p_d, .maxs }
else
null,
.unsigned => if (self.hasFeature(.avx))
.{ .vp_d, .maxu }
else if (self.hasFeature(.sse4_1))
.{ .p_d, .maxu }
else
null,
},
else => null,
},
5...8 => switch (air_tag) {
@@ -6617,6 +6709,14 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
.bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
.xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
.min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
.signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null,
.unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null,
},
.max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
.signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null,
.unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null,
},
else => null,
},
else => null,

View File

@@ -280,6 +280,7 @@ pub const Mnemonic = enum {
mulps, mulss,
orps,
pextrw, pinsrw,
pmaxsw, pmaxub, pminsw, pminub,
shufps,
sqrtps, sqrtss,
subps, subss,
@@ -318,6 +319,7 @@ pub const Mnemonic = enum {
insertps,
pextrb, pextrd, pextrq,
pinsrb, pinsrd, pinsrq,
pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw,
pmulld,
roundpd, roundps, roundsd, roundss,
// AVX
@@ -349,6 +351,8 @@ pub const Mnemonic = enum {
vpand, vpandn,
vpextrb, vpextrd, vpextrq, vpextrw,
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw,
vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw,
vpmulhw, vpmulld, vpmullw,
vpor,
vpshufhw, vpshuflw,

View File

@@ -453,6 +453,14 @@ pub const Inst = struct {
/// Bitwise logical and not of packed single-precision floating-point values
/// Bitwise logical and not of packed double-precision floating-point values
andn,
/// Maximum of packed signed integers
maxs,
/// Maximum of packed unsigned integers
maxu,
/// Minimum of packed signed integers
mins,
/// Minimum of packed unsigned integers
minu,
/// Multiply packed signed integers and store low result
mull,
/// Multiply packed signed integers and store high result

View File

@@ -1011,6 +1011,14 @@ pub const table = [_]Entry{
.{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 },
.{ .pmaxsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .none, .sse2 },
.{ .pmaxub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .none, .sse2 },
.{ .pminsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .none, .sse2 },
.{ .pminub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .none, .sse2 },
.{ .pmulhw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .none, .sse2 },
.{ .pmullw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 },
@@ -1091,6 +1099,20 @@ pub const table = [_]Entry{
.{ .pinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 },
.{ .pinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 },
.{ .pmaxsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .none, .sse4_1 },
.{ .pmaxsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .none, .sse4_1 },
.{ .pmaxuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .none, .sse4_1 },
.{ .pmaxud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .none, .sse4_1 },
.{ .pminsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .none, .sse4_1 },
.{ .pminsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .none, .sse4_1 },
.{ .pminuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .none, .sse4_1 },
.{ .pminud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .none, .sse4_1 },
.{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 },
.{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 },
@@ -1318,6 +1340,24 @@ pub const table = [_]Entry{
.{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx },
.{ .vpmaxsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_128_wig, .avx },
.{ .vpmaxsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_128_wig, .avx },
.{ .vpmaxsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_128_wig, .avx },
.{ .vpmaxub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_128_wig, .avx },
.{ .vpmaxuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_128_wig, .avx },
.{ .vpmaxud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_128_wig, .avx },
.{ .vpminsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_128_wig, .avx },
.{ .vpminsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_128_wig, .avx },
.{ .vpminsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_128_wig, .avx },
.{ .vpminub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_128_wig, .avx },
.{ .vpminuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_128_wig, .avx },
.{ .vpminud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_128_wig, .avx },
.{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx },
.{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx },
@@ -1449,6 +1489,24 @@ pub const table = [_]Entry{
.{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
.{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx },
.{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_256_wig, .avx },
.{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx },
.{ .vpmaxub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_256_wig, .avx },
.{ .vpmaxuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_256_wig, .avx },
.{ .vpmaxud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_256_wig, .avx },
.{ .vpminsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_256_wig, .avx },
.{ .vpminsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_256_wig, .avx },
.{ .vpminsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_256_wig, .avx },
.{ .vpminub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_256_wig, .avx },
.{ .vpminuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_256_wig, .avx },
.{ .vpminud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_256_wig, .avx },
.{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx },
.{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx },