x86_64: finish optimizing mir tag usage

Final tag count is 95.
This commit is contained in:
Jacob Young
2023-05-08 07:35:31 -04:00
parent ecb5feaf94
commit 1f5aa7747f
2 changed files with 266 additions and 466 deletions

View File

@@ -2443,7 +2443,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(src_ty, src_mcv);
try self.asmRegisterRegisterImmediate(
.{ ._, .vcvtps2ph },
.{ .v_, .cvtps2ph },
dst_reg,
mat_src_reg.to128(),
Immediate.u(0b1_00),
@@ -2455,12 +2455,12 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
}
} else if (src_bits == 64 and dst_bits == 32) {
if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
.{ ._, .vcvtsd2ss },
.{ .v_, .cvtsd2ss },
dst_reg,
dst_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegisterRegister(
.{ ._, .vcvtsd2ss },
.{ .v_, .cvtsd2ss },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -2506,22 +2506,22 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
src_mcv.getReg().?
else
try self.copyToTmpRegister(src_ty, src_mcv);
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, mat_src_reg.to128());
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128());
switch (dst_bits) {
32 => {},
64 => try self.asmRegisterRegisterRegister(.{ ._, .vcvtss2sd }, dst_reg, dst_reg, dst_reg),
64 => try self.asmRegisterRegisterRegister(.{ .v_, .cvtss2sd }, dst_reg, dst_reg, dst_reg),
else => return self.fail("TODO implement airFpext from {} to {}", .{
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
}),
}
} else if (src_bits == 32 and dst_bits == 64) {
if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
.{ ._, .vcvtss2sd },
.{ .v_, .cvtss2sd },
dst_reg,
dst_reg,
src_mcv.mem(.dword),
) else try self.asmRegisterRegisterRegister(
.{ ._, .vcvtss2sd },
.{ .v_, .cvtss2sd },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -4678,8 +4678,8 @@ fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
try self.genBinOpMir(switch (ty_bits) {
// No point using an extra prefix byte for *pd which performs the same operation.
16, 32, 64, 128 => switch (tag) {
.neg => .{ ._, .xorps },
.fabs => .{ ._, .andnps },
.neg => .{ ._ps, .xor },
.fabs => .{ ._ps, .andn },
else => unreachable,
},
80 => return self.fail("TODO implement airFloatSign for {}", .{
@@ -4712,23 +4712,23 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4
const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag()) {
.Float => switch (ty.floatBits(self.target.*)) {
32 => if (self.hasFeature(.avx)) .{ ._, .vroundss } else .{ ._, .roundss },
64 => if (self.hasFeature(.avx)) .{ ._, .vroundsd } else .{ ._, .roundsd },
32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (ty.childType().zigTypeTag()) {
.Float => switch (ty.childType().floatBits(self.target.*)) {
32 => switch (ty.vectorLen()) {
1 => if (self.hasFeature(.avx)) .{ ._, .vroundss } else .{ ._, .roundss },
2...4 => if (self.hasFeature(.avx)) .{ ._, .vroundps } else .{ ._, .roundps },
5...8 => if (self.hasFeature(.avx)) .{ ._, .vroundps } else null,
1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round },
5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null,
else => null,
},
64 => switch (ty.vectorLen()) {
1 => if (self.hasFeature(.avx)) .{ ._, .vroundsd } else .{ ._, .roundsd },
2 => if (self.hasFeature(.avx)) .{ ._, .vroundpd } else .{ ._, .roundpd },
3...4 => if (self.hasFeature(.avx)) .{ ._, .vroundpd } else null,
1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round },
3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null,
else => null,
},
16, 80, 128 => null,
@@ -4743,8 +4743,8 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4
const abi_size = @intCast(u32, ty.abiSize(self.target.*));
const dst_alias = registerAlias(dst_reg, abi_size);
switch (mir_tag[1]) {
.vroundss, .vroundsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
switch (mir_tag[0]) {
.v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
mir_tag,
dst_alias,
dst_alias,
@@ -4799,18 +4799,18 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
src_mcv.getReg().?
else
try self.copyToTmpRegister(ty, src_mcv);
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, mat_src_reg.to128());
try self.asmRegisterRegisterRegister(.{ ._, .vsqrtss }, dst_reg, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128());
try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg);
try self.asmRegisterRegisterImmediate(
.{ ._, .vcvtps2ph },
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
);
break :result dst_mcv;
} else null,
32 => if (self.hasFeature(.avx)) .{ ._, .vsqrtss } else .{ ._, .sqrtss },
64 => if (self.hasFeature(.avx)) .{ ._, .vsqrtsd } else .{ ._, .sqrtsd },
32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
80, 128 => null,
else => unreachable,
},
@@ -4819,7 +4819,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) {
1 => {
try self.asmRegisterRegister(
.{ ._, .vcvtph2ps },
.{ .v_, .cvtph2ps },
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -4827,13 +4827,13 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
try self.copyToTmpRegister(ty, src_mcv)).to128(),
);
try self.asmRegisterRegisterRegister(
.{ ._, .vsqrtss },
.{ .v_ss, .sqrt },
dst_reg,
dst_reg,
dst_reg,
);
try self.asmRegisterRegisterImmediate(
.{ ._, .vcvtps2ph },
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
@@ -4843,22 +4843,22 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
2...8 => {
const wide_reg = registerAlias(dst_reg, abi_size * 2);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._, .vcvtph2ps },
.{ .v_, .cvtph2ps },
wide_reg,
src_mcv.mem(Memory.PtrSize.fromSize(
@intCast(u32, @divExact(wide_reg.bitSize(), 16)),
)),
) else try self.asmRegisterRegister(
.{ ._, .vcvtph2ps },
.{ .v_, .cvtph2ps },
wide_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.{ ._, .vsqrtps }, wide_reg, wide_reg);
try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg);
try self.asmRegisterRegisterImmediate(
.{ ._, .vcvtps2ph },
.{ .v_, .cvtps2ph },
dst_reg,
wide_reg,
Immediate.u(0b1_00),
@@ -4868,15 +4868,15 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
else => null,
} else null,
32 => switch (ty.vectorLen()) {
1 => if (self.hasFeature(.avx)) .{ ._, .vsqrtss } else .{ ._, .sqrtss },
2...4 => if (self.hasFeature(.avx)) .{ ._, .vsqrtps } else .{ ._, .sqrtps },
5...8 => if (self.hasFeature(.avx)) .{ ._, .vsqrtps } else null,
1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt },
5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null,
else => null,
},
64 => switch (ty.vectorLen()) {
1 => if (self.hasFeature(.avx)) .{ ._, .vsqrtsd } else .{ ._, .sqrtsd },
2 => if (self.hasFeature(.avx)) .{ ._, .vsqrtpd } else .{ ._, .sqrtpd },
3...4 => if (self.hasFeature(.avx)) .{ ._, .vsqrtpd } else null,
1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt },
3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null,
else => null,
},
80, 128 => null,
@@ -4888,8 +4888,8 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
})) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{
ty.fmt(self.bin_file.options.module.?),
});
switch (mir_tag[1]) {
.vsqrtss, .vsqrtsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
switch (mir_tag[0]) {
.v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
mir_tag,
dst_reg,
dst_reg,
@@ -6325,13 +6325,13 @@ fn genBinOp(
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
.{ ._, .vpinsrw },
.{ .vp_w, .insr },
dst_reg,
dst_reg,
src_mcv.mem(.word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ ._, .vpunpcklwd },
.{ .vp_, .unpcklwd },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -6339,15 +6339,15 @@ fn genBinOp(
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ ._, .vaddss },
.sub => .{ ._, .vsubss },
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivss },
.max => .{ ._, .vmaxss },
.min => .{ ._, .vmaxss },
.add => .{ .v_ss, .add },
.sub => .{ .v_ss, .sub },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
.max => .{ .v_ss, .max },
.min => .{ .v_ss, .max },
else => unreachable,
},
dst_reg,
@@ -6355,7 +6355,7 @@ fn genBinOp(
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ ._, .vcvtps2ph },
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
@@ -6363,29 +6363,29 @@ fn genBinOp(
return dst_mcv;
} else null,
32 => switch (air_tag) {
.add => if (self.hasFeature(.avx)) .{ ._, .vaddss } else .{ ._, .addss },
.sub => if (self.hasFeature(.avx)) .{ ._, .vsubss } else .{ ._, .subss },
.mul => if (self.hasFeature(.avx)) .{ ._, .vmulss } else .{ ._, .mulss },
.add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
.sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
.mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
.div_float,
.div_trunc,
.div_floor,
.div_exact,
=> if (self.hasFeature(.avx)) .{ ._, .vdivss } else .{ ._, .divss },
.max => if (self.hasFeature(.avx)) .{ ._, .vmaxss } else .{ ._, .maxss },
.min => if (self.hasFeature(.avx)) .{ ._, .vminss } else .{ ._, .minss },
=> if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
.max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
.min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
else => unreachable,
},
64 => switch (air_tag) {
.add => if (self.hasFeature(.avx)) .{ ._, .vaddsd } else .{ ._, .addsd },
.sub => if (self.hasFeature(.avx)) .{ ._, .vsubsd } else .{ ._, .subsd },
.mul => if (self.hasFeature(.avx)) .{ ._, .vmulsd } else .{ ._, .mulsd },
.add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
.sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
.mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
.div_float,
.div_trunc,
.div_floor,
.div_exact,
=> if (self.hasFeature(.avx)) .{ ._, .vdivsd } else .{ ._, .divsd },
.max => if (self.hasFeature(.avx)) .{ ._, .vmaxsd } else .{ ._, .maxsd },
.min => if (self.hasFeature(.avx)) .{ ._, .vminsd } else .{ ._, .minsd },
=> if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
.max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
.min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
else => unreachable,
},
80, 128 => null,
@@ -6401,13 +6401,13 @@ fn genBinOp(
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
.{ ._, .vpinsrw },
.{ .vp_w, .insr },
dst_reg,
dst_reg,
src_mcv.mem(.word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ ._, .vpunpcklwd },
.{ .vp_, .unpcklwd },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -6415,15 +6415,15 @@ fn genBinOp(
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ ._, .vaddss },
.sub => .{ ._, .vsubss },
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivss },
.max => .{ ._, .vmaxss },
.min => .{ ._, .vmaxss },
.add => .{ .v_ss, .add },
.sub => .{ .v_ss, .sub },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
.max => .{ .v_ss, .max },
.min => .{ .v_ss, .max },
else => unreachable,
},
dst_reg,
@@ -6431,7 +6431,7 @@ fn genBinOp(
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ ._, .vcvtps2ph },
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
@@ -6444,12 +6444,12 @@ fn genBinOp(
defer self.register_manager.unlockReg(tmp_lock);
if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
.{ ._, .vpinsrd },
.{ .vp_d, .insr },
dst_reg,
src_mcv.mem(.dword),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ ._, .vunpcklps },
.{ .v_ps, .unpckl },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -6457,20 +6457,20 @@ fn genBinOp(
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
try self.asmRegisterRegisterRegister(
.{ ._, .vmovhlps },
.{ .v_ps, .movhl },
tmp_reg,
dst_reg,
dst_reg,
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ ._, .vaddps },
.sub => .{ ._, .vsubps },
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps },
.max => .{ ._, .vmaxps },
.min => .{ ._, .vmaxps },
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .max },
else => unreachable,
},
dst_reg,
@@ -6478,7 +6478,7 @@ fn genBinOp(
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ ._, .vcvtps2ph },
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
@@ -6490,13 +6490,13 @@ fn genBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._, .vcvtph2ps },
.{ .v_, .cvtph2ps },
tmp_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegister(
.{ ._, .vcvtph2ps },
.{ .v_, .cvtph2ps },
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -6505,11 +6505,11 @@ fn genBinOp(
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ ._, .vaddps },
.sub => .{ ._, .vsubps },
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps },
.max => .{ ._, .vmaxps },
.min => .{ ._, .vmaxps },
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .max },
else => unreachable,
},
dst_reg,
@@ -6517,7 +6517,7 @@ fn genBinOp(
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ ._, .vcvtps2ph },
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg,
Immediate.u(0b1_00),
@@ -6529,13 +6529,13 @@ fn genBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg.to256(), dst_reg);
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg.to256(), dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
.{ ._, .vcvtph2ps },
.{ .v_, .cvtph2ps },
tmp_reg,
src_mcv.mem(.xword),
) else try self.asmRegisterRegister(
.{ ._, .vcvtph2ps },
.{ .v_, .cvtph2ps },
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -6544,11 +6544,11 @@ fn genBinOp(
);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ ._, .vaddps },
.sub => .{ ._, .vsubps },
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps },
.max => .{ ._, .vmaxps },
.min => .{ ._, .vmaxps },
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .max },
else => unreachable,
},
dst_reg.to256(),
@@ -6556,7 +6556,7 @@ fn genBinOp(
tmp_reg,
);
try self.asmRegisterRegisterImmediate(
.{ ._, .vcvtps2ph },
.{ .v_, .cvtps2ph },
dst_reg,
dst_reg.to256(),
Immediate.u(0b1_00),
@@ -6567,76 +6567,76 @@ fn genBinOp(
} else null,
32 => switch (lhs_ty.vectorLen()) {
1 => switch (air_tag) {
.add => if (self.hasFeature(.avx)) .{ ._, .vaddss } else .{ ._, .addss },
.sub => if (self.hasFeature(.avx)) .{ ._, .vsubss } else .{ ._, .subss },
.mul => if (self.hasFeature(.avx)) .{ ._, .vmulss } else .{ ._, .mulss },
.add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
.sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
.mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
.div_float,
.div_trunc,
.div_floor,
.div_exact,
=> if (self.hasFeature(.avx)) .{ ._, .vdivss } else .{ ._, .divss },
.max => if (self.hasFeature(.avx)) .{ ._, .vmaxss } else .{ ._, .maxss },
.min => if (self.hasFeature(.avx)) .{ ._, .vminss } else .{ ._, .minss },
=> if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
.max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
.min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
else => unreachable,
},
2...4 => switch (air_tag) {
.add => if (self.hasFeature(.avx)) .{ ._, .vaddps } else .{ ._, .addps },
.sub => if (self.hasFeature(.avx)) .{ ._, .vsubps } else .{ ._, .subps },
.mul => if (self.hasFeature(.avx)) .{ ._, .vmulps } else .{ ._, .mulps },
.add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add },
.sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub },
.mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul },
.div_float,
.div_trunc,
.div_floor,
.div_exact,
=> if (self.hasFeature(.avx)) .{ ._, .vdivps } else .{ ._, .divps },
.max => if (self.hasFeature(.avx)) .{ ._, .vmaxps } else .{ ._, .maxps },
.min => if (self.hasFeature(.avx)) .{ ._, .vminps } else .{ ._, .minps },
=> if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div },
.max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max },
.min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min },
else => unreachable,
},
5...8 => if (self.hasFeature(.avx)) switch (air_tag) {
.add => .{ ._, .vaddps },
.sub => .{ ._, .vsubps },
.mul => .{ ._, .vmulps },
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps },
.max => .{ ._, .vmaxps },
.min => .{ ._, .vminps },
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
.mul => .{ .v_ps, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .min },
else => unreachable,
} else null,
else => null,
},
64 => switch (lhs_ty.vectorLen()) {
1 => switch (air_tag) {
.add => if (self.hasFeature(.avx)) .{ ._, .vaddsd } else .{ ._, .addsd },
.sub => if (self.hasFeature(.avx)) .{ ._, .vsubsd } else .{ ._, .subsd },
.mul => if (self.hasFeature(.avx)) .{ ._, .vmulsd } else .{ ._, .mulsd },
.add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
.sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
.mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
.div_float,
.div_trunc,
.div_floor,
.div_exact,
=> if (self.hasFeature(.avx)) .{ ._, .vdivsd } else .{ ._, .divsd },
.max => if (self.hasFeature(.avx)) .{ ._, .vmaxsd } else .{ ._, .maxsd },
.min => if (self.hasFeature(.avx)) .{ ._, .vminsd } else .{ ._, .minsd },
=> if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
.max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
.min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
else => unreachable,
},
2 => switch (air_tag) {
.add => if (self.hasFeature(.avx)) .{ ._, .vaddpd } else .{ ._, .addpd },
.sub => if (self.hasFeature(.avx)) .{ ._, .vsubpd } else .{ ._, .subpd },
.mul => if (self.hasFeature(.avx)) .{ ._, .vmulpd } else .{ ._, .mulpd },
.add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add },
.sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub },
.mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul },
.div_float,
.div_trunc,
.div_floor,
.div_exact,
=> if (self.hasFeature(.avx)) .{ ._, .vdivpd } else .{ ._, .divpd },
.max => if (self.hasFeature(.avx)) .{ ._, .vmaxpd } else .{ ._, .maxpd },
.min => if (self.hasFeature(.avx)) .{ ._, .vminpd } else .{ ._, .minpd },
=> if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div },
.max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max },
.min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min },
else => unreachable,
},
3...4 => if (self.hasFeature(.avx)) switch (air_tag) {
.add => .{ ._, .vaddpd },
.sub => .{ ._, .vsubpd },
.mul => .{ ._, .vmulpd },
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivpd },
.max => .{ ._, .vmaxpd },
.min => .{ ._, .vminpd },
.add => .{ .v_pd, .add },
.sub => .{ .v_pd, .sub },
.mul => .{ .v_pd, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div },
.max => .{ .v_pd, .max },
.min => .{ .v_pd, .min },
else => unreachable,
} else null,
else => null,
@@ -7563,13 +7563,13 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
defer self.register_manager.unlockReg(tmp2_lock);
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
.{ ._, .vpinsrw },
.{ .vp_w, .insr },
tmp1_reg,
dst_reg.to128(),
src_mcv.mem(.word),
Immediate.u(1),
) else try self.asmRegisterRegisterRegister(
.{ ._, .vpunpcklwd },
.{ .vp_, .unpcklwd },
tmp1_reg,
dst_reg.to128(),
(if (src_mcv.isRegister())
@@ -7577,20 +7577,20 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
else
try self.copyToTmpRegister(ty, src_mcv)).to128(),
);
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, tmp1_reg, tmp1_reg);
try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp2_reg, tmp1_reg);
try self.genBinOpMir(.{ ._, .ucomiss }, ty, tmp1_mcv, tmp2_mcv);
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, tmp1_reg, tmp1_reg);
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg);
try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv);
} else return self.fail("TODO implement airCmp for {}", .{
ty.fmt(self.bin_file.options.module.?),
}),
32 => try self.genBinOpMir(
.{ ._, .ucomiss },
.{ ._ss, .ucomi },
ty,
.{ .register = dst_reg },
src_mcv,
),
64 => try self.genBinOpMir(
.{ ._, .ucomisd },
.{ ._sd, .ucomi },
ty,
.{ .register = dst_reg },
src_mcv,
@@ -8573,42 +8573,42 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag {
else => return .{ ._, .mov },
.Float => switch (ty.floatBits(self.target.*)) {
16 => unreachable, // needs special handling
32 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss },
64 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd },
32 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
64 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
128 => return if (self.hasFeature(.avx))
if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }
else if (aligned) .{ ._, .movaps } else .{ ._, .movups },
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
else => {},
},
.Vector => switch (ty.childType().zigTypeTag()) {
.Float => switch (ty.childType().floatBits(self.target.*)) {
16 => switch (ty.vectorLen()) {
1 => unreachable, // needs special handling
2 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss },
3...4 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd },
2 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
3...4 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
5...8 => return if (self.hasFeature(.avx))
if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }
else if (aligned) .{ ._, .movaps } else .{ ._, .movups },
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
9...16 => if (self.hasFeature(.avx))
return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups },
return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
else => {},
},
32 => switch (ty.vectorLen()) {
1 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss },
1 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
2...4 => return if (self.hasFeature(.avx))
if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }
else if (aligned) .{ ._, .movaps } else .{ ._, .movups },
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
5...8 => if (self.hasFeature(.avx))
return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups },
return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
else => {},
},
64 => switch (ty.vectorLen()) {
1 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd },
1 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
2 => return if (self.hasFeature(.avx))
if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }
else if (aligned) .{ ._, .movaps } else .{ ._, .movups },
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
3...4 => if (self.hasFeature(.avx))
return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups },
return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
else => {},
},
else => {},
@@ -8724,11 +8724,11 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point))
switch (ty.zigTypeTag()) {
else => .{ ._, .mov },
.Float, .Vector => .{ ._, .movaps },
.Float, .Vector => .{ ._ps, .mova },
}
else switch (abi_size) {
2 => return try self.asmRegisterRegisterImmediate(
if (dst_reg.class() == .floating_point) .{ ._, .pinsrw } else .{ ._, .pextrw },
if (dst_reg.class() == .floating_point) .{ .p_w, .insr } else .{ .p_w, .extr },
registerAlias(dst_reg, 4),
registerAlias(src_reg, 4),
Immediate.u(0),
@@ -8761,7 +8761,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
});
if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
try self.asmRegisterMemoryImmediate(
.{ ._, .pinsrw },
.{ .p_w, .insr },
registerAlias(dst_reg, abi_size),
src_mem,
Immediate.u(0),
@@ -8794,7 +8794,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
});
return if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
self.asmRegisterMemoryImmediate(
.{ ._, .pinsrw },
.{ .p_w, .insr },
registerAlias(dst_reg, abi_size),
src_mem,
Immediate.u(0),
@@ -8838,7 +8838,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
});
if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
try self.asmRegisterMemoryImmediate(
.{ ._, .pinsrw },
.{ .p_w, .insr },
registerAlias(dst_reg, abi_size),
src_mem,
Immediate.u(0),
@@ -8952,7 +8952,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal
);
if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
try self.asmMemoryRegisterImmediate(
.{ ._, .pextrw },
.{ .p_w, .extr },
dst_mem,
src_reg.to128(),
Immediate.u(0),
@@ -9069,7 +9069,7 @@ fn genInlineMemcpyRegisterRegister(
try self.asmMemoryRegister(
switch (src_reg.class()) {
.general_purpose, .segment => .{ ._, .mov },
.floating_point => .{ ._, .movss },
.floating_point => .{ ._ss, .mov },
},
Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }),
registerAlias(src_reg, abi_size),
@@ -10197,21 +10197,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 }))
switch (ty.zigTypeTag()) {
.Float => switch (ty.floatBits(self.target.*)) {
32 => .{ ._, .vfmadd132ss },
64 => .{ ._, .vfmadd132sd },
32 => .{ .v_ss, .fmadd132 },
64 => .{ .v_sd, .fmadd132 },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (ty.childType().zigTypeTag()) {
.Float => switch (ty.childType().floatBits(self.target.*)) {
32 => switch (ty.vectorLen()) {
1 => .{ ._, .vfmadd132ss },
2...8 => .{ ._, .vfmadd132ps },
1 => .{ .v_ss, .fmadd132 },
2...8 => .{ .v_ps, .fmadd132 },
else => null,
},
64 => switch (ty.vectorLen()) {
1 => .{ ._, .vfmadd132sd },
2...4 => .{ ._, .vfmadd132pd },
1 => .{ .v_sd, .fmadd132 },
2...4 => .{ .v_pd, .fmadd132 },
else => null,
},
16, 80, 128 => null,
@@ -10224,21 +10224,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 }))
switch (ty.zigTypeTag()) {
.Float => switch (ty.floatBits(self.target.*)) {
32 => .{ ._, .vfmadd213ss },
64 => .{ ._, .vfmadd213sd },
32 => .{ .v_ss, .fmadd213 },
64 => .{ .v_sd, .fmadd213 },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (ty.childType().zigTypeTag()) {
.Float => switch (ty.childType().floatBits(self.target.*)) {
32 => switch (ty.vectorLen()) {
1 => .{ ._, .vfmadd213ss },
2...8 => .{ ._, .vfmadd213ps },
1 => .{ .v_ss, .fmadd213 },
2...8 => .{ .v_ps, .fmadd213 },
else => null,
},
64 => switch (ty.vectorLen()) {
1 => .{ ._, .vfmadd213sd },
2...4 => .{ ._, .vfmadd213pd },
1 => .{ .v_sd, .fmadd213 },
2...4 => .{ .v_pd, .fmadd213 },
else => null,
},
16, 80, 128 => null,
@@ -10251,21 +10251,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 }))
switch (ty.zigTypeTag()) {
.Float => switch (ty.floatBits(self.target.*)) {
32 => .{ ._, .vfmadd231ss },
64 => .{ ._, .vfmadd231sd },
32 => .{ .v_ss, .fmadd231 },
64 => .{ .v_sd, .fmadd231 },
16, 80, 128 => null,
else => unreachable,
},
.Vector => switch (ty.childType().zigTypeTag()) {
.Float => switch (ty.childType().floatBits(self.target.*)) {
32 => switch (ty.vectorLen()) {
1 => .{ ._, .vfmadd231ss },
2...8 => .{ ._, .vfmadd231ps },
1 => .{ .v_ss, .fmadd231 },
2...8 => .{ .v_ps, .fmadd231 },
else => null,
},
64 => switch (ty.vectorLen()) {
1 => .{ ._, .vfmadd231sd },
2...4 => .{ ._, .vfmadd231pd },
1 => .{ .v_sd, .fmadd231 },
2...4 => .{ .v_pd, .fmadd231 },
else => null,
},
16, 80, 128 => null,

View File

@@ -278,8 +278,14 @@ pub const Inst = struct {
/// Add with carry
adc,
/// Add
/// Add packed single-precision floating-point values
/// Add scalar single-precision floating-point values
/// Add packed double-precision floating-point values
/// Add scalar double-precision floating-point values
add,
/// Logical and
/// Bitwise logical and of packed single-precision floating-point values
/// Bitwise logical and of packed double-precision floating-point values
@"and",
/// Bit scan forward
bsf,
@@ -304,6 +310,8 @@ pub const Inst = struct {
cmov,
/// Logical compare
/// Compare string
/// Compare scalar single-precision floating-point values
/// Compare scalar double-precision floating-point values
cmp,
/// Compare and exchange
/// Compare and exchange bytes
@@ -316,6 +324,10 @@ pub const Inst = struct {
cwde,
/// Unsigned division
/// Signed division
/// Divide packed single-precision floating-point values
/// Divide scalar single-precision floating-point values
/// Divide packed double-precision floating-point values
/// Divide scalar double-precision floating-point values
div,
///
int3,
@@ -339,6 +351,8 @@ pub const Inst = struct {
mfence,
/// Move
/// Move data from string to string
/// Move scalar single-precision floating-point value
/// Move scalar double-precision floating-point value
/// Move doubleword
/// Move quadword
mov,
@@ -350,6 +364,10 @@ pub const Inst = struct {
movzx,
/// Multiply
/// Signed multiplication
/// Multiply packed single-precision floating-point values
/// Multiply scalar single-precision floating-point values
/// Multiply packed double-precision floating-point values
/// Multiply scalar double-precision floating-point values
mul,
/// Two's complement negation
neg,
@@ -358,6 +376,8 @@ pub const Inst = struct {
/// One's complement negation
not,
/// Logical or
/// Bitwise logical or of packed single-precision floating-point values
/// Bitwise logical or of packed double-precision floating-point values
@"or",
/// Pop
pop,
@@ -390,6 +410,10 @@ pub const Inst = struct {
/// Double precision shift right
sh,
/// Subtract
/// Subtract packed single-precision floating-point values
/// Subtract scalar single-precision floating-point values
/// Subtract packed double-precision floating-point values
/// Subtract scalar double-precision floating-point values
sub,
/// Store string
sto,
@@ -406,145 +430,88 @@ pub const Inst = struct {
/// Exchange register/memory with register
xchg,
/// Logical exclusive-or
/// Bitwise logical xor of packed single-precision floating-point values
/// Bitwise logical xor of packed double-precision floating-point values
xor,
/// Add packed single-precision floating-point values
addps,
/// Add scalar single-precision floating-point values
addss,
/// Bitwise logical and of packed single precision floating-point values
andps,
/// Bitwise logical and not of packed single precision floating-point values
andnps,
/// Compare scalar single-precision floating-point values
cmpss,
/// Bitwise logical and not of packed single-precision floating-point values
/// Bitwise logical and not of packed double-precision floating-point values
andn,
/// Convert doubleword integer to scalar single-precision floating-point value
cvtsi2ss,
/// Divide packed single-precision floating-point values
divps,
/// Divide scalar single-precision floating-point values
divss,
/// Maximum of packed single-precision floating-point values
maxps,
/// Maximum of scalar single-precision floating-point values
maxss,
/// Maximum of packed double-precision floating-point values
/// Maximum of scalar double-precision floating-point values
max,
/// Minimum of packed single-precision floating-point values
minps,
/// Minimum of scalar single-precision floating-point values
minss,
/// Minimum of packed double-precision floating-point values
/// Minimum of scalar double-precision floating-point values
min,
/// Move aligned packed single-precision floating-point values
movaps,
/// Move aligned packed double-precision floating-point values
mova,
/// Move packed single-precision floating-point values high to low
movhlps,
/// Move scalar single-precision floating-point value
movss,
movhl,
/// Move unaligned packed single-precision floating-point values
movups,
/// Multiply packed single-precision floating-point values
mulps,
/// Multiply scalar single-precision floating-point values
mulss,
/// Bitwise logical or of packed single precision floating-point values
orps,
/// Move unaligned packed double-precision floating-point values
movu,
/// Extract byte
/// Extract word
pextrw,
/// Extract doubleword
/// Extract quadword
extr,
/// Insert byte
/// Insert word
pinsrw,
/// Insert doubleword
/// Insert quadword
insr,
/// Square root of packed single-precision floating-point values
sqrtps,
/// Square root of scalar single-precision floating-point value
sqrtss,
/// Subtract packed single-precision floating-point values
subps,
/// Subtract scalar single-precision floating-point values
subss,
/// Square root of packed double-precision floating-point values
/// Square root of scalar double-precision floating-point value
sqrt,
/// Unordered compare scalar single-precision floating-point values
ucomiss,
/// Unordered compare scalar double-precision floating-point values
ucomi,
/// Unpack and interleave high packed single-precision floating-point values
unpckhps,
/// Unpack and interleave high packed double-precision floating-point values
unpckh,
/// Unpack and interleave low packed single-precision floating-point values
unpcklps,
/// Bitwise logical xor of packed single precision floating-point values
xorps,
/// Unpack and interleave low packed double-precision floating-point values
unpckl,
/// Add packed double-precision floating-point values
addpd,
/// Add scalar double-precision floating-point values
addsd,
/// Bitwise logical and not of packed double precision floating-point values
andnpd,
/// Bitwise logical and of packed double precision floating-point values
andpd,
/// Compare scalar double-precision floating-point values
cmpsd,
/// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
cvtsd2ss,
/// Convert doubleword integer to scalar double-precision floating-point value
cvtsi2sd,
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
cvtss2sd,
/// Divide packed double-precision floating-point values
divpd,
/// Divide scalar double-precision floating-point values
divsd,
/// Maximum of packed double-precision floating-point values
maxpd,
/// Maximum of scalar double-precision floating-point values
maxsd,
/// Minimum of packed double-precision floating-point values
minpd,
/// Minimum of scalar double-precision floating-point values
minsd,
/// Move scalar double-precision floating-point value
movsd,
/// Multiply packed double-precision floating-point values
mulpd,
/// Multiply scalar double-precision floating-point values
mulsd,
/// Bitwise logical or of packed double precision floating-point values
orpd,
/// Shuffle packed high words
pshufhw,
shufh,
/// Shuffle packed low words
pshuflw,
shufl,
/// Shift packed data right logical
psrld,
/// Shift packed data right logical
psrlq,
/// Shift packed data right logical
psrlw,
srl,
/// Unpack high data
punpckhbw,
unpckhbw,
/// Unpack high data
punpckhdq,
unpckhdq,
/// Unpack high data
punpckhqdq,
unpckhqdq,
/// Unpack high data
punpckhwd,
unpckhwd,
/// Unpack low data
punpcklbw,
unpcklbw,
/// Unpack low data
punpckldq,
unpckldq,
/// Unpack low data
punpcklqdq,
unpcklqdq,
/// Unpack low data
punpcklwd,
/// Square root of double precision floating-point values
sqrtpd,
/// Square root of scalar double precision floating-point value
sqrtsd,
/// Subtract packed double-precision floating-point values
subpd,
/// Subtract scalar double-precision floating-point values
subsd,
/// Unordered compare scalar double-precision floating-point values
ucomisd,
/// Unpack and interleave high packed double-precision floating-point values
unpckhpd,
/// Unpack and interleave low packed double-precision floating-point values
unpcklpd,
/// Bitwise logical xor of packed double precision floating-point values
xorpd,
unpcklwd,
/// Replicate double floating-point values
movddup,
@@ -553,199 +520,32 @@ pub const Inst = struct {
/// Replicate single floating-point values
movsldup,
/// Extract Byte
pextrb,
/// Extract Doubleword
pextrd,
/// Extract Quadword
pextrq,
/// Insert Byte
pinsrb,
/// Insert Doubleword
pinsrd,
/// Insert Quadword
pinsrq,
/// Round packed double-precision floating-point values
roundpd,
/// Round packed single-precision floating-point values
roundps,
/// Round scalar double-precision floating-point value
roundsd,
/// Round scalar single-precision floating-point value
roundss,
/// Add packed double-precision floating-point values
vaddpd,
/// Add packed single-precision floating-point values
vaddps,
/// Add scalar double-precision floating-point values
vaddsd,
/// Add scalar single-precision floating-point values
vaddss,
/// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
vcvtsd2ss,
/// Convert doubleword integer to scalar double-precision floating-point value
vcvtsi2sd,
/// Convert doubleword integer to scalar single-precision floating-point value
vcvtsi2ss,
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
vcvtss2sd,
/// Divide packed double-precision floating-point values
vdivpd,
/// Divide packed single-precision floating-point values
vdivps,
/// Divide scalar double-precision floating-point values
vdivsd,
/// Divide scalar single-precision floating-point values
vdivss,
/// Maximum of packed double-precision floating-point values
vmaxpd,
/// Maximum of packed single-precision floating-point values
vmaxps,
/// Maximum of scalar double-precision floating-point values
vmaxsd,
/// Maximum of scalar single-precision floating-point values
vmaxss,
/// Minimum of packed double-precision floating-point values
vminpd,
/// Minimum of packed single-precision floating-point values
vminps,
/// Minimum of scalar double-precision floating-point values
vminsd,
/// Minimum of scalar single-precision floating-point values
vminss,
/// Move aligned packed double-precision floating-point values
vmovapd,
/// Move aligned packed single-precision floating-point values
vmovaps,
/// Move packed single-precision floating-point values high to low
vmovhlps,
/// Replicate double floating-point values
vmovddup,
/// Move or merge scalar double-precision floating-point value
vmovsd,
/// Replicate single floating-point values
vmovshdup,
/// Replicate single floating-point values
vmovsldup,
/// Move or merge scalar single-precision floating-point value
vmovss,
/// Move unaligned packed double-precision floating-point values
vmovupd,
/// Move unaligned packed single-precision floating-point values
vmovups,
/// Multiply packed double-precision floating-point values
vmulpd,
/// Multiply packed single-precision floating-point values
vmulps,
/// Multiply scalar double-precision floating-point values
vmulsd,
/// Multiply scalar single-precision floating-point values
vmulss,
/// Extract Byte
vpextrb,
/// Extract Doubleword
vpextrd,
/// Extract Quadword
vpextrq,
/// Extract word
vpextrw,
/// Insert Byte
vpinsrb,
/// Insert Doubleword
vpinsrd,
/// Insert Quadword
vpinsrq,
/// Insert word
vpinsrw,
/// Shuffle packed high words
vpshufhw,
/// Shuffle packed low words
vpshuflw,
/// Shift packed data right logical
vpsrld,
/// Shift packed data right logical
vpsrlq,
/// Shift packed data right logical
vpsrlw,
/// Unpack high data
vpunpckhbw,
/// Unpack high data
vpunpckhdq,
/// Unpack high data
vpunpckhqdq,
/// Unpack high data
vpunpckhwd,
/// Unpack low data
vpunpcklbw,
/// Unpack low data
vpunpckldq,
/// Unpack low data
vpunpcklqdq,
/// Unpack low data
vpunpcklwd,
/// Round packed double-precision floating-point values
vroundpd,
/// Round packed single-precision floating-point values
vroundps,
/// Round scalar double-precision floating-point value
vroundsd,
/// Round scalar single-precision floating-point value
vroundss,
/// Square root of packed double-precision floating-point value
vsqrtpd,
/// Square root of packed single-precision floating-point value
vsqrtps,
/// Square root of scalar double-precision floating-point value
vsqrtsd,
/// Square root of scalar single-precision floating-point value
vsqrtss,
/// Subtract packed double-precision floating-point values
vsubpd,
/// Subtract packed single-precision floating-point values
vsubps,
/// Subtract scalar double-precision floating-point values
vsubsd,
/// Subtract scalar single-precision floating-point values
vsubss,
/// Unpack and interleave high packed double-precision floating-point values
vunpckhpd,
/// Unpack and interleave high packed single-precision floating-point values
vunpckhps,
/// Unpack and interleave low packed double-precision floating-point values
vunpcklpd,
/// Unpack and interleave low packed single-precision floating-point values
vunpcklps,
round,
/// Convert 16-bit floating-point values to single-precision floating-point values
vcvtph2ps,
cvtph2ps,
/// Convert single-precision floating-point values to 16-bit floating-point values
vcvtps2ph,
cvtps2ph,
/// Fused multiply-add of packed double-precision floating-point values
vfmadd132pd,
/// Fused multiply-add of packed double-precision floating-point values
vfmadd213pd,
/// Fused multiply-add of packed double-precision floating-point values
vfmadd231pd,
/// Fused multiply-add of packed single-precision floating-point values
vfmadd132ps,
/// Fused multiply-add of scalar single-precision floating-point values
/// Fused multiply-add of packed double-precision floating-point values
/// Fused multiply-add of scalar double-precision floating-point values
fmadd132,
/// Fused multiply-add of packed single-precision floating-point values
vfmadd213ps,
/// Fused multiply-add of scalar single-precision floating-point values
/// Fused multiply-add of packed double-precision floating-point values
/// Fused multiply-add of scalar double-precision floating-point values
fmadd213,
/// Fused multiply-add of packed single-precision floating-point values
vfmadd231ps,
/// Fused multiply-add of scalar double-precision floating-point values
vfmadd132sd,
/// Fused multiply-add of scalar double-precision floating-point values
vfmadd213sd,
/// Fused multiply-add of scalar double-precision floating-point values
vfmadd231sd,
/// Fused multiply-add of scalar single-precision floating-point values
vfmadd132ss,
/// Fused multiply-add of scalar single-precision floating-point values
vfmadd213ss,
/// Fused multiply-add of scalar single-precision floating-point values
vfmadd231ss,
/// Fused multiply-add of packed double-precision floating-point values
/// Fused multiply-add of scalar double-precision floating-point values
fmadd231,
/// A pseudo instruction that requires special lowering.
/// This should be the only tag in this enum that doesn't