x86_64: finish optimizing mir tag usage
Final tag count is 95.
This commit is contained in:
@@ -2443,7 +2443,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
|
||||
else
|
||||
try self.copyToTmpRegister(src_ty, src_mcv);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.{ ._, .vcvtps2ph },
|
||||
.{ .v_, .cvtps2ph },
|
||||
dst_reg,
|
||||
mat_src_reg.to128(),
|
||||
Immediate.u(0b1_00),
|
||||
@@ -2455,12 +2455,12 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
|
||||
}
|
||||
} else if (src_bits == 64 and dst_bits == 32) {
|
||||
if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
|
||||
.{ ._, .vcvtsd2ss },
|
||||
.{ .v_, .cvtsd2ss },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.mem(.qword),
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
.{ ._, .vcvtsd2ss },
|
||||
.{ .v_, .cvtsd2ss },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
@@ -2506,22 +2506,22 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(src_ty, src_mcv);
|
||||
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, mat_src_reg.to128());
|
||||
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128());
|
||||
switch (dst_bits) {
|
||||
32 => {},
|
||||
64 => try self.asmRegisterRegisterRegister(.{ ._, .vcvtss2sd }, dst_reg, dst_reg, dst_reg),
|
||||
64 => try self.asmRegisterRegisterRegister(.{ .v_, .cvtss2sd }, dst_reg, dst_reg, dst_reg),
|
||||
else => return self.fail("TODO implement airFpext from {} to {}", .{
|
||||
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
}
|
||||
} else if (src_bits == 32 and dst_bits == 64) {
|
||||
if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
|
||||
.{ ._, .vcvtss2sd },
|
||||
.{ .v_, .cvtss2sd },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.mem(.dword),
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
.{ ._, .vcvtss2sd },
|
||||
.{ .v_, .cvtss2sd },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
@@ -4678,8 +4678,8 @@ fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
|
||||
try self.genBinOpMir(switch (ty_bits) {
|
||||
// No point using an extra prefix byte for *pd which performs the same operation.
|
||||
16, 32, 64, 128 => switch (tag) {
|
||||
.neg => .{ ._, .xorps },
|
||||
.fabs => .{ ._, .andnps },
|
||||
.neg => .{ ._ps, .xor },
|
||||
.fabs => .{ ._ps, .andn },
|
||||
else => unreachable,
|
||||
},
|
||||
80 => return self.fail("TODO implement airFloatSign for {}", .{
|
||||
@@ -4712,23 +4712,23 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4
|
||||
|
||||
const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag()) {
|
||||
.Float => switch (ty.floatBits(self.target.*)) {
|
||||
32 => if (self.hasFeature(.avx)) .{ ._, .vroundss } else .{ ._, .roundss },
|
||||
64 => if (self.hasFeature(.avx)) .{ ._, .vroundsd } else .{ ._, .roundsd },
|
||||
32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
|
||||
64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (ty.childType().zigTypeTag()) {
|
||||
.Float => switch (ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (ty.vectorLen()) {
|
||||
1 => if (self.hasFeature(.avx)) .{ ._, .vroundss } else .{ ._, .roundss },
|
||||
2...4 => if (self.hasFeature(.avx)) .{ ._, .vroundps } else .{ ._, .roundps },
|
||||
5...8 => if (self.hasFeature(.avx)) .{ ._, .vroundps } else null,
|
||||
1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round },
|
||||
2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round },
|
||||
5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null,
|
||||
else => null,
|
||||
},
|
||||
64 => switch (ty.vectorLen()) {
|
||||
1 => if (self.hasFeature(.avx)) .{ ._, .vroundsd } else .{ ._, .roundsd },
|
||||
2 => if (self.hasFeature(.avx)) .{ ._, .vroundpd } else .{ ._, .roundpd },
|
||||
3...4 => if (self.hasFeature(.avx)) .{ ._, .vroundpd } else null,
|
||||
1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round },
|
||||
2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round },
|
||||
3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null,
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
@@ -4743,8 +4743,8 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4
|
||||
|
||||
const abi_size = @intCast(u32, ty.abiSize(self.target.*));
|
||||
const dst_alias = registerAlias(dst_reg, abi_size);
|
||||
switch (mir_tag[1]) {
|
||||
.vroundss, .vroundsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
|
||||
switch (mir_tag[0]) {
|
||||
.v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
dst_alias,
|
||||
@@ -4799,18 +4799,18 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(ty, src_mcv);
|
||||
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, mat_src_reg.to128());
|
||||
try self.asmRegisterRegisterRegister(.{ ._, .vsqrtss }, dst_reg, dst_reg, dst_reg);
|
||||
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128());
|
||||
try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.{ ._, .vcvtps2ph },
|
||||
.{ .v_, .cvtps2ph },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
Immediate.u(0b1_00),
|
||||
);
|
||||
break :result dst_mcv;
|
||||
} else null,
|
||||
32 => if (self.hasFeature(.avx)) .{ ._, .vsqrtss } else .{ ._, .sqrtss },
|
||||
64 => if (self.hasFeature(.avx)) .{ ._, .vsqrtsd } else .{ ._, .sqrtsd },
|
||||
32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
|
||||
64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
|
||||
80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
@@ -4819,7 +4819,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
|
||||
16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) {
|
||||
1 => {
|
||||
try self.asmRegisterRegister(
|
||||
.{ ._, .vcvtph2ps },
|
||||
.{ .v_, .cvtph2ps },
|
||||
dst_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
@@ -4827,13 +4827,13 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
|
||||
try self.copyToTmpRegister(ty, src_mcv)).to128(),
|
||||
);
|
||||
try self.asmRegisterRegisterRegister(
|
||||
.{ ._, .vsqrtss },
|
||||
.{ .v_ss, .sqrt },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.{ ._, .vcvtps2ph },
|
||||
.{ .v_, .cvtps2ph },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
Immediate.u(0b1_00),
|
||||
@@ -4843,22 +4843,22 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
|
||||
2...8 => {
|
||||
const wide_reg = registerAlias(dst_reg, abi_size * 2);
|
||||
if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
.{ ._, .vcvtph2ps },
|
||||
.{ .v_, .cvtph2ps },
|
||||
wide_reg,
|
||||
src_mcv.mem(Memory.PtrSize.fromSize(
|
||||
@intCast(u32, @divExact(wide_reg.bitSize(), 16)),
|
||||
)),
|
||||
) else try self.asmRegisterRegister(
|
||||
.{ ._, .vcvtph2ps },
|
||||
.{ .v_, .cvtph2ps },
|
||||
wide_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(ty, src_mcv)).to128(),
|
||||
);
|
||||
try self.asmRegisterRegister(.{ ._, .vsqrtps }, wide_reg, wide_reg);
|
||||
try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.{ ._, .vcvtps2ph },
|
||||
.{ .v_, .cvtps2ph },
|
||||
dst_reg,
|
||||
wide_reg,
|
||||
Immediate.u(0b1_00),
|
||||
@@ -4868,15 +4868,15 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
|
||||
else => null,
|
||||
} else null,
|
||||
32 => switch (ty.vectorLen()) {
|
||||
1 => if (self.hasFeature(.avx)) .{ ._, .vsqrtss } else .{ ._, .sqrtss },
|
||||
2...4 => if (self.hasFeature(.avx)) .{ ._, .vsqrtps } else .{ ._, .sqrtps },
|
||||
5...8 => if (self.hasFeature(.avx)) .{ ._, .vsqrtps } else null,
|
||||
1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt },
|
||||
2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt },
|
||||
5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null,
|
||||
else => null,
|
||||
},
|
||||
64 => switch (ty.vectorLen()) {
|
||||
1 => if (self.hasFeature(.avx)) .{ ._, .vsqrtsd } else .{ ._, .sqrtsd },
|
||||
2 => if (self.hasFeature(.avx)) .{ ._, .vsqrtpd } else .{ ._, .sqrtpd },
|
||||
3...4 => if (self.hasFeature(.avx)) .{ ._, .vsqrtpd } else null,
|
||||
1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt },
|
||||
2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt },
|
||||
3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null,
|
||||
else => null,
|
||||
},
|
||||
80, 128 => null,
|
||||
@@ -4888,8 +4888,8 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
|
||||
})) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{
|
||||
ty.fmt(self.bin_file.options.module.?),
|
||||
});
|
||||
switch (mir_tag[1]) {
|
||||
.vsqrtss, .vsqrtsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
|
||||
switch (mir_tag[0]) {
|
||||
.v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
|
||||
mir_tag,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
@@ -6325,13 +6325,13 @@ fn genBinOp(
|
||||
defer self.register_manager.unlockReg(tmp_lock);
|
||||
|
||||
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
|
||||
.{ ._, .vpinsrw },
|
||||
.{ .vp_w, .insr },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.mem(.word),
|
||||
Immediate.u(1),
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
.{ ._, .vpunpcklwd },
|
||||
.{ .vp_, .unpcklwd },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
@@ -6339,15 +6339,15 @@ fn genBinOp(
|
||||
else
|
||||
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
|
||||
);
|
||||
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg);
|
||||
try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp_reg, dst_reg);
|
||||
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
|
||||
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
|
||||
try self.asmRegisterRegisterRegister(
|
||||
switch (air_tag) {
|
||||
.add => .{ ._, .vaddss },
|
||||
.sub => .{ ._, .vsubss },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivss },
|
||||
.max => .{ ._, .vmaxss },
|
||||
.min => .{ ._, .vmaxss },
|
||||
.add => .{ .v_ss, .add },
|
||||
.sub => .{ .v_ss, .sub },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
|
||||
.max => .{ .v_ss, .max },
|
||||
.min => .{ .v_ss, .max },
|
||||
else => unreachable,
|
||||
},
|
||||
dst_reg,
|
||||
@@ -6355,7 +6355,7 @@ fn genBinOp(
|
||||
tmp_reg,
|
||||
);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.{ ._, .vcvtps2ph },
|
||||
.{ .v_, .cvtps2ph },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
Immediate.u(0b1_00),
|
||||
@@ -6363,29 +6363,29 @@ fn genBinOp(
|
||||
return dst_mcv;
|
||||
} else null,
|
||||
32 => switch (air_tag) {
|
||||
.add => if (self.hasFeature(.avx)) .{ ._, .vaddss } else .{ ._, .addss },
|
||||
.sub => if (self.hasFeature(.avx)) .{ ._, .vsubss } else .{ ._, .subss },
|
||||
.mul => if (self.hasFeature(.avx)) .{ ._, .vmulss } else .{ ._, .mulss },
|
||||
.add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
|
||||
.sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
|
||||
.mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
=> if (self.hasFeature(.avx)) .{ ._, .vdivss } else .{ ._, .divss },
|
||||
.max => if (self.hasFeature(.avx)) .{ ._, .vmaxss } else .{ ._, .maxss },
|
||||
.min => if (self.hasFeature(.avx)) .{ ._, .vminss } else .{ ._, .minss },
|
||||
=> if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
|
||||
.max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
|
||||
.min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
|
||||
else => unreachable,
|
||||
},
|
||||
64 => switch (air_tag) {
|
||||
.add => if (self.hasFeature(.avx)) .{ ._, .vaddsd } else .{ ._, .addsd },
|
||||
.sub => if (self.hasFeature(.avx)) .{ ._, .vsubsd } else .{ ._, .subsd },
|
||||
.mul => if (self.hasFeature(.avx)) .{ ._, .vmulsd } else .{ ._, .mulsd },
|
||||
.add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
|
||||
.sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
|
||||
.mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
=> if (self.hasFeature(.avx)) .{ ._, .vdivsd } else .{ ._, .divsd },
|
||||
.max => if (self.hasFeature(.avx)) .{ ._, .vmaxsd } else .{ ._, .maxsd },
|
||||
.min => if (self.hasFeature(.avx)) .{ ._, .vminsd } else .{ ._, .minsd },
|
||||
=> if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
|
||||
.max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
|
||||
.min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
|
||||
else => unreachable,
|
||||
},
|
||||
80, 128 => null,
|
||||
@@ -6401,13 +6401,13 @@ fn genBinOp(
|
||||
defer self.register_manager.unlockReg(tmp_lock);
|
||||
|
||||
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
|
||||
.{ ._, .vpinsrw },
|
||||
.{ .vp_w, .insr },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.mem(.word),
|
||||
Immediate.u(1),
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
.{ ._, .vpunpcklwd },
|
||||
.{ .vp_, .unpcklwd },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
@@ -6415,15 +6415,15 @@ fn genBinOp(
|
||||
else
|
||||
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
|
||||
);
|
||||
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg);
|
||||
try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp_reg, dst_reg);
|
||||
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
|
||||
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
|
||||
try self.asmRegisterRegisterRegister(
|
||||
switch (air_tag) {
|
||||
.add => .{ ._, .vaddss },
|
||||
.sub => .{ ._, .vsubss },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivss },
|
||||
.max => .{ ._, .vmaxss },
|
||||
.min => .{ ._, .vmaxss },
|
||||
.add => .{ .v_ss, .add },
|
||||
.sub => .{ .v_ss, .sub },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
|
||||
.max => .{ .v_ss, .max },
|
||||
.min => .{ .v_ss, .max },
|
||||
else => unreachable,
|
||||
},
|
||||
dst_reg,
|
||||
@@ -6431,7 +6431,7 @@ fn genBinOp(
|
||||
tmp_reg,
|
||||
);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.{ ._, .vcvtps2ph },
|
||||
.{ .v_, .cvtps2ph },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
Immediate.u(0b1_00),
|
||||
@@ -6444,12 +6444,12 @@ fn genBinOp(
|
||||
defer self.register_manager.unlockReg(tmp_lock);
|
||||
|
||||
if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
|
||||
.{ ._, .vpinsrd },
|
||||
.{ .vp_d, .insr },
|
||||
dst_reg,
|
||||
src_mcv.mem(.dword),
|
||||
Immediate.u(1),
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
.{ ._, .vunpcklps },
|
||||
.{ .v_ps, .unpckl },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
@@ -6457,20 +6457,20 @@ fn genBinOp(
|
||||
else
|
||||
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
|
||||
);
|
||||
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg);
|
||||
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
|
||||
try self.asmRegisterRegisterRegister(
|
||||
.{ ._, .vmovhlps },
|
||||
.{ .v_ps, .movhl },
|
||||
tmp_reg,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
);
|
||||
try self.asmRegisterRegisterRegister(
|
||||
switch (air_tag) {
|
||||
.add => .{ ._, .vaddps },
|
||||
.sub => .{ ._, .vsubps },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps },
|
||||
.max => .{ ._, .vmaxps },
|
||||
.min => .{ ._, .vmaxps },
|
||||
.add => .{ .v_ps, .add },
|
||||
.sub => .{ .v_ps, .sub },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
|
||||
.max => .{ .v_ps, .max },
|
||||
.min => .{ .v_ps, .max },
|
||||
else => unreachable,
|
||||
},
|
||||
dst_reg,
|
||||
@@ -6478,7 +6478,7 @@ fn genBinOp(
|
||||
tmp_reg,
|
||||
);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.{ ._, .vcvtps2ph },
|
||||
.{ .v_, .cvtps2ph },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
Immediate.u(0b1_00),
|
||||
@@ -6490,13 +6490,13 @@ fn genBinOp(
|
||||
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
|
||||
defer self.register_manager.unlockReg(tmp_lock);
|
||||
|
||||
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg);
|
||||
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
|
||||
if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
.{ ._, .vcvtph2ps },
|
||||
.{ .v_, .cvtph2ps },
|
||||
tmp_reg,
|
||||
src_mcv.mem(.qword),
|
||||
) else try self.asmRegisterRegister(
|
||||
.{ ._, .vcvtph2ps },
|
||||
.{ .v_, .cvtph2ps },
|
||||
tmp_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
@@ -6505,11 +6505,11 @@ fn genBinOp(
|
||||
);
|
||||
try self.asmRegisterRegisterRegister(
|
||||
switch (air_tag) {
|
||||
.add => .{ ._, .vaddps },
|
||||
.sub => .{ ._, .vsubps },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps },
|
||||
.max => .{ ._, .vmaxps },
|
||||
.min => .{ ._, .vmaxps },
|
||||
.add => .{ .v_ps, .add },
|
||||
.sub => .{ .v_ps, .sub },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
|
||||
.max => .{ .v_ps, .max },
|
||||
.min => .{ .v_ps, .max },
|
||||
else => unreachable,
|
||||
},
|
||||
dst_reg,
|
||||
@@ -6517,7 +6517,7 @@ fn genBinOp(
|
||||
tmp_reg,
|
||||
);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.{ ._, .vcvtps2ph },
|
||||
.{ .v_, .cvtps2ph },
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
Immediate.u(0b1_00),
|
||||
@@ -6529,13 +6529,13 @@ fn genBinOp(
|
||||
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
|
||||
defer self.register_manager.unlockReg(tmp_lock);
|
||||
|
||||
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg.to256(), dst_reg);
|
||||
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg.to256(), dst_reg);
|
||||
if (src_mcv.isMemory()) try self.asmRegisterMemory(
|
||||
.{ ._, .vcvtph2ps },
|
||||
.{ .v_, .cvtph2ps },
|
||||
tmp_reg,
|
||||
src_mcv.mem(.xword),
|
||||
) else try self.asmRegisterRegister(
|
||||
.{ ._, .vcvtph2ps },
|
||||
.{ .v_, .cvtph2ps },
|
||||
tmp_reg,
|
||||
(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
@@ -6544,11 +6544,11 @@ fn genBinOp(
|
||||
);
|
||||
try self.asmRegisterRegisterRegister(
|
||||
switch (air_tag) {
|
||||
.add => .{ ._, .vaddps },
|
||||
.sub => .{ ._, .vsubps },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps },
|
||||
.max => .{ ._, .vmaxps },
|
||||
.min => .{ ._, .vmaxps },
|
||||
.add => .{ .v_ps, .add },
|
||||
.sub => .{ .v_ps, .sub },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
|
||||
.max => .{ .v_ps, .max },
|
||||
.min => .{ .v_ps, .max },
|
||||
else => unreachable,
|
||||
},
|
||||
dst_reg.to256(),
|
||||
@@ -6556,7 +6556,7 @@ fn genBinOp(
|
||||
tmp_reg,
|
||||
);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.{ ._, .vcvtps2ph },
|
||||
.{ .v_, .cvtps2ph },
|
||||
dst_reg,
|
||||
dst_reg.to256(),
|
||||
Immediate.u(0b1_00),
|
||||
@@ -6567,76 +6567,76 @@ fn genBinOp(
|
||||
} else null,
|
||||
32 => switch (lhs_ty.vectorLen()) {
|
||||
1 => switch (air_tag) {
|
||||
.add => if (self.hasFeature(.avx)) .{ ._, .vaddss } else .{ ._, .addss },
|
||||
.sub => if (self.hasFeature(.avx)) .{ ._, .vsubss } else .{ ._, .subss },
|
||||
.mul => if (self.hasFeature(.avx)) .{ ._, .vmulss } else .{ ._, .mulss },
|
||||
.add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add },
|
||||
.sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub },
|
||||
.mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul },
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
=> if (self.hasFeature(.avx)) .{ ._, .vdivss } else .{ ._, .divss },
|
||||
.max => if (self.hasFeature(.avx)) .{ ._, .vmaxss } else .{ ._, .maxss },
|
||||
.min => if (self.hasFeature(.avx)) .{ ._, .vminss } else .{ ._, .minss },
|
||||
=> if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div },
|
||||
.max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max },
|
||||
.min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min },
|
||||
else => unreachable,
|
||||
},
|
||||
2...4 => switch (air_tag) {
|
||||
.add => if (self.hasFeature(.avx)) .{ ._, .vaddps } else .{ ._, .addps },
|
||||
.sub => if (self.hasFeature(.avx)) .{ ._, .vsubps } else .{ ._, .subps },
|
||||
.mul => if (self.hasFeature(.avx)) .{ ._, .vmulps } else .{ ._, .mulps },
|
||||
.add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add },
|
||||
.sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub },
|
||||
.mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul },
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
=> if (self.hasFeature(.avx)) .{ ._, .vdivps } else .{ ._, .divps },
|
||||
.max => if (self.hasFeature(.avx)) .{ ._, .vmaxps } else .{ ._, .maxps },
|
||||
.min => if (self.hasFeature(.avx)) .{ ._, .vminps } else .{ ._, .minps },
|
||||
=> if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div },
|
||||
.max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max },
|
||||
.min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min },
|
||||
else => unreachable,
|
||||
},
|
||||
5...8 => if (self.hasFeature(.avx)) switch (air_tag) {
|
||||
.add => .{ ._, .vaddps },
|
||||
.sub => .{ ._, .vsubps },
|
||||
.mul => .{ ._, .vmulps },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps },
|
||||
.max => .{ ._, .vmaxps },
|
||||
.min => .{ ._, .vminps },
|
||||
.add => .{ .v_ps, .add },
|
||||
.sub => .{ .v_ps, .sub },
|
||||
.mul => .{ .v_ps, .mul },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
|
||||
.max => .{ .v_ps, .max },
|
||||
.min => .{ .v_ps, .min },
|
||||
else => unreachable,
|
||||
} else null,
|
||||
else => null,
|
||||
},
|
||||
64 => switch (lhs_ty.vectorLen()) {
|
||||
1 => switch (air_tag) {
|
||||
.add => if (self.hasFeature(.avx)) .{ ._, .vaddsd } else .{ ._, .addsd },
|
||||
.sub => if (self.hasFeature(.avx)) .{ ._, .vsubsd } else .{ ._, .subsd },
|
||||
.mul => if (self.hasFeature(.avx)) .{ ._, .vmulsd } else .{ ._, .mulsd },
|
||||
.add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add },
|
||||
.sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub },
|
||||
.mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul },
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
=> if (self.hasFeature(.avx)) .{ ._, .vdivsd } else .{ ._, .divsd },
|
||||
.max => if (self.hasFeature(.avx)) .{ ._, .vmaxsd } else .{ ._, .maxsd },
|
||||
.min => if (self.hasFeature(.avx)) .{ ._, .vminsd } else .{ ._, .minsd },
|
||||
=> if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div },
|
||||
.max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max },
|
||||
.min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min },
|
||||
else => unreachable,
|
||||
},
|
||||
2 => switch (air_tag) {
|
||||
.add => if (self.hasFeature(.avx)) .{ ._, .vaddpd } else .{ ._, .addpd },
|
||||
.sub => if (self.hasFeature(.avx)) .{ ._, .vsubpd } else .{ ._, .subpd },
|
||||
.mul => if (self.hasFeature(.avx)) .{ ._, .vmulpd } else .{ ._, .mulpd },
|
||||
.add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add },
|
||||
.sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub },
|
||||
.mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul },
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
=> if (self.hasFeature(.avx)) .{ ._, .vdivpd } else .{ ._, .divpd },
|
||||
.max => if (self.hasFeature(.avx)) .{ ._, .vmaxpd } else .{ ._, .maxpd },
|
||||
.min => if (self.hasFeature(.avx)) .{ ._, .vminpd } else .{ ._, .minpd },
|
||||
=> if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div },
|
||||
.max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max },
|
||||
.min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min },
|
||||
else => unreachable,
|
||||
},
|
||||
3...4 => if (self.hasFeature(.avx)) switch (air_tag) {
|
||||
.add => .{ ._, .vaddpd },
|
||||
.sub => .{ ._, .vsubpd },
|
||||
.mul => .{ ._, .vmulpd },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivpd },
|
||||
.max => .{ ._, .vmaxpd },
|
||||
.min => .{ ._, .vminpd },
|
||||
.add => .{ .v_pd, .add },
|
||||
.sub => .{ .v_pd, .sub },
|
||||
.mul => .{ .v_pd, .mul },
|
||||
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div },
|
||||
.max => .{ .v_pd, .max },
|
||||
.min => .{ .v_pd, .min },
|
||||
else => unreachable,
|
||||
} else null,
|
||||
else => null,
|
||||
@@ -7563,13 +7563,13 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
|
||||
defer self.register_manager.unlockReg(tmp2_lock);
|
||||
|
||||
if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
|
||||
.{ ._, .vpinsrw },
|
||||
.{ .vp_w, .insr },
|
||||
tmp1_reg,
|
||||
dst_reg.to128(),
|
||||
src_mcv.mem(.word),
|
||||
Immediate.u(1),
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
.{ ._, .vpunpcklwd },
|
||||
.{ .vp_, .unpcklwd },
|
||||
tmp1_reg,
|
||||
dst_reg.to128(),
|
||||
(if (src_mcv.isRegister())
|
||||
@@ -7577,20 +7577,20 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
|
||||
else
|
||||
try self.copyToTmpRegister(ty, src_mcv)).to128(),
|
||||
);
|
||||
try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, tmp1_reg, tmp1_reg);
|
||||
try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp2_reg, tmp1_reg);
|
||||
try self.genBinOpMir(.{ ._, .ucomiss }, ty, tmp1_mcv, tmp2_mcv);
|
||||
try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, tmp1_reg, tmp1_reg);
|
||||
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg);
|
||||
try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv);
|
||||
} else return self.fail("TODO implement airCmp for {}", .{
|
||||
ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
32 => try self.genBinOpMir(
|
||||
.{ ._, .ucomiss },
|
||||
.{ ._ss, .ucomi },
|
||||
ty,
|
||||
.{ .register = dst_reg },
|
||||
src_mcv,
|
||||
),
|
||||
64 => try self.genBinOpMir(
|
||||
.{ ._, .ucomisd },
|
||||
.{ ._sd, .ucomi },
|
||||
ty,
|
||||
.{ .register = dst_reg },
|
||||
src_mcv,
|
||||
@@ -8573,42 +8573,42 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag {
|
||||
else => return .{ ._, .mov },
|
||||
.Float => switch (ty.floatBits(self.target.*)) {
|
||||
16 => unreachable, // needs special handling
|
||||
32 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss },
|
||||
64 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd },
|
||||
32 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
|
||||
64 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
|
||||
128 => return if (self.hasFeature(.avx))
|
||||
if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }
|
||||
else if (aligned) .{ ._, .movaps } else .{ ._, .movups },
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
|
||||
else => {},
|
||||
},
|
||||
.Vector => switch (ty.childType().zigTypeTag()) {
|
||||
.Float => switch (ty.childType().floatBits(self.target.*)) {
|
||||
16 => switch (ty.vectorLen()) {
|
||||
1 => unreachable, // needs special handling
|
||||
2 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss },
|
||||
3...4 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd },
|
||||
2 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
|
||||
3...4 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
|
||||
5...8 => return if (self.hasFeature(.avx))
|
||||
if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }
|
||||
else if (aligned) .{ ._, .movaps } else .{ ._, .movups },
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
|
||||
9...16 => if (self.hasFeature(.avx))
|
||||
return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups },
|
||||
return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
|
||||
else => {},
|
||||
},
|
||||
32 => switch (ty.vectorLen()) {
|
||||
1 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss },
|
||||
1 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
|
||||
2...4 => return if (self.hasFeature(.avx))
|
||||
if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }
|
||||
else if (aligned) .{ ._, .movaps } else .{ ._, .movups },
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
|
||||
5...8 => if (self.hasFeature(.avx))
|
||||
return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups },
|
||||
return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
|
||||
else => {},
|
||||
},
|
||||
64 => switch (ty.vectorLen()) {
|
||||
1 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd },
|
||||
1 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
|
||||
2 => return if (self.hasFeature(.avx))
|
||||
if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }
|
||||
else if (aligned) .{ ._, .movaps } else .{ ._, .movups },
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
|
||||
3...4 => if (self.hasFeature(.avx))
|
||||
return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups },
|
||||
return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
|
||||
else => {},
|
||||
},
|
||||
else => {},
|
||||
@@ -8724,11 +8724,11 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
|
||||
if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point))
|
||||
switch (ty.zigTypeTag()) {
|
||||
else => .{ ._, .mov },
|
||||
.Float, .Vector => .{ ._, .movaps },
|
||||
.Float, .Vector => .{ ._ps, .mova },
|
||||
}
|
||||
else switch (abi_size) {
|
||||
2 => return try self.asmRegisterRegisterImmediate(
|
||||
if (dst_reg.class() == .floating_point) .{ ._, .pinsrw } else .{ ._, .pextrw },
|
||||
if (dst_reg.class() == .floating_point) .{ .p_w, .insr } else .{ .p_w, .extr },
|
||||
registerAlias(dst_reg, 4),
|
||||
registerAlias(src_reg, 4),
|
||||
Immediate.u(0),
|
||||
@@ -8761,7 +8761,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
|
||||
});
|
||||
if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
|
||||
try self.asmRegisterMemoryImmediate(
|
||||
.{ ._, .pinsrw },
|
||||
.{ .p_w, .insr },
|
||||
registerAlias(dst_reg, abi_size),
|
||||
src_mem,
|
||||
Immediate.u(0),
|
||||
@@ -8794,7 +8794,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
|
||||
});
|
||||
return if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
|
||||
self.asmRegisterMemoryImmediate(
|
||||
.{ ._, .pinsrw },
|
||||
.{ .p_w, .insr },
|
||||
registerAlias(dst_reg, abi_size),
|
||||
src_mem,
|
||||
Immediate.u(0),
|
||||
@@ -8838,7 +8838,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
|
||||
});
|
||||
if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
|
||||
try self.asmRegisterMemoryImmediate(
|
||||
.{ ._, .pinsrw },
|
||||
.{ .p_w, .insr },
|
||||
registerAlias(dst_reg, abi_size),
|
||||
src_mem,
|
||||
Immediate.u(0),
|
||||
@@ -8952,7 +8952,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal
|
||||
);
|
||||
if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
|
||||
try self.asmMemoryRegisterImmediate(
|
||||
.{ ._, .pextrw },
|
||||
.{ .p_w, .extr },
|
||||
dst_mem,
|
||||
src_reg.to128(),
|
||||
Immediate.u(0),
|
||||
@@ -9069,7 +9069,7 @@ fn genInlineMemcpyRegisterRegister(
|
||||
try self.asmMemoryRegister(
|
||||
switch (src_reg.class()) {
|
||||
.general_purpose, .segment => .{ ._, .mov },
|
||||
.floating_point => .{ ._, .movss },
|
||||
.floating_point => .{ ._ss, .mov },
|
||||
},
|
||||
Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }),
|
||||
registerAlias(src_reg, abi_size),
|
||||
@@ -10197,21 +10197,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
|
||||
if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 }))
|
||||
switch (ty.zigTypeTag()) {
|
||||
.Float => switch (ty.floatBits(self.target.*)) {
|
||||
32 => .{ ._, .vfmadd132ss },
|
||||
64 => .{ ._, .vfmadd132sd },
|
||||
32 => .{ .v_ss, .fmadd132 },
|
||||
64 => .{ .v_sd, .fmadd132 },
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (ty.childType().zigTypeTag()) {
|
||||
.Float => switch (ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (ty.vectorLen()) {
|
||||
1 => .{ ._, .vfmadd132ss },
|
||||
2...8 => .{ ._, .vfmadd132ps },
|
||||
1 => .{ .v_ss, .fmadd132 },
|
||||
2...8 => .{ .v_ps, .fmadd132 },
|
||||
else => null,
|
||||
},
|
||||
64 => switch (ty.vectorLen()) {
|
||||
1 => .{ ._, .vfmadd132sd },
|
||||
2...4 => .{ ._, .vfmadd132pd },
|
||||
1 => .{ .v_sd, .fmadd132 },
|
||||
2...4 => .{ .v_pd, .fmadd132 },
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
@@ -10224,21 +10224,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
|
||||
else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 }))
|
||||
switch (ty.zigTypeTag()) {
|
||||
.Float => switch (ty.floatBits(self.target.*)) {
|
||||
32 => .{ ._, .vfmadd213ss },
|
||||
64 => .{ ._, .vfmadd213sd },
|
||||
32 => .{ .v_ss, .fmadd213 },
|
||||
64 => .{ .v_sd, .fmadd213 },
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (ty.childType().zigTypeTag()) {
|
||||
.Float => switch (ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (ty.vectorLen()) {
|
||||
1 => .{ ._, .vfmadd213ss },
|
||||
2...8 => .{ ._, .vfmadd213ps },
|
||||
1 => .{ .v_ss, .fmadd213 },
|
||||
2...8 => .{ .v_ps, .fmadd213 },
|
||||
else => null,
|
||||
},
|
||||
64 => switch (ty.vectorLen()) {
|
||||
1 => .{ ._, .vfmadd213sd },
|
||||
2...4 => .{ ._, .vfmadd213pd },
|
||||
1 => .{ .v_sd, .fmadd213 },
|
||||
2...4 => .{ .v_pd, .fmadd213 },
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
@@ -10251,21 +10251,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void {
|
||||
else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 }))
|
||||
switch (ty.zigTypeTag()) {
|
||||
.Float => switch (ty.floatBits(self.target.*)) {
|
||||
32 => .{ ._, .vfmadd231ss },
|
||||
64 => .{ ._, .vfmadd231sd },
|
||||
32 => .{ .v_ss, .fmadd231 },
|
||||
64 => .{ .v_sd, .fmadd231 },
|
||||
16, 80, 128 => null,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vector => switch (ty.childType().zigTypeTag()) {
|
||||
.Float => switch (ty.childType().floatBits(self.target.*)) {
|
||||
32 => switch (ty.vectorLen()) {
|
||||
1 => .{ ._, .vfmadd231ss },
|
||||
2...8 => .{ ._, .vfmadd231ps },
|
||||
1 => .{ .v_ss, .fmadd231 },
|
||||
2...8 => .{ .v_ps, .fmadd231 },
|
||||
else => null,
|
||||
},
|
||||
64 => switch (ty.vectorLen()) {
|
||||
1 => .{ ._, .vfmadd231sd },
|
||||
2...4 => .{ ._, .vfmadd231pd },
|
||||
1 => .{ .v_sd, .fmadd231 },
|
||||
2...4 => .{ .v_pd, .fmadd231 },
|
||||
else => null,
|
||||
},
|
||||
16, 80, 128 => null,
|
||||
|
||||
@@ -278,8 +278,14 @@ pub const Inst = struct {
|
||||
/// Add with carry
|
||||
adc,
|
||||
/// Add
|
||||
/// Add packed single-precision floating-point values
|
||||
/// Add scalar single-precision floating-point values
|
||||
/// Add packed double-precision floating-point values
|
||||
/// Add scalar double-precision floating-point values
|
||||
add,
|
||||
/// Logical and
|
||||
/// Bitwise logical and of packed single-precision floating-point values
|
||||
/// Bitwise logical and of packed double-precision floating-point values
|
||||
@"and",
|
||||
/// Bit scan forward
|
||||
bsf,
|
||||
@@ -304,6 +310,8 @@ pub const Inst = struct {
|
||||
cmov,
|
||||
/// Logical compare
|
||||
/// Compare string
|
||||
/// Compare scalar single-precision floating-point values
|
||||
/// Compare scalar double-precision floating-point values
|
||||
cmp,
|
||||
/// Compare and exchange
|
||||
/// Compare and exchange bytes
|
||||
@@ -316,6 +324,10 @@ pub const Inst = struct {
|
||||
cwde,
|
||||
/// Unsigned division
|
||||
/// Signed division
|
||||
/// Divide packed single-precision floating-point values
|
||||
/// Divide scalar single-precision floating-point values
|
||||
/// Divide packed double-precision floating-point values
|
||||
/// Divide scalar double-precision floating-point values
|
||||
div,
|
||||
///
|
||||
int3,
|
||||
@@ -339,6 +351,8 @@ pub const Inst = struct {
|
||||
mfence,
|
||||
/// Move
|
||||
/// Move data from string to string
|
||||
/// Move scalar single-precision floating-point value
|
||||
/// Move scalar double-precision floating-point value
|
||||
/// Move doubleword
|
||||
/// Move quadword
|
||||
mov,
|
||||
@@ -350,6 +364,10 @@ pub const Inst = struct {
|
||||
movzx,
|
||||
/// Multiply
|
||||
/// Signed multiplication
|
||||
/// Multiply packed single-precision floating-point values
|
||||
/// Multiply scalar single-precision floating-point values
|
||||
/// Multiply packed double-precision floating-point values
|
||||
/// Multiply scalar double-precision floating-point values
|
||||
mul,
|
||||
/// Two's complement negation
|
||||
neg,
|
||||
@@ -358,6 +376,8 @@ pub const Inst = struct {
|
||||
/// One's complement negation
|
||||
not,
|
||||
/// Logical or
|
||||
/// Bitwise logical or of packed single-precision floating-point values
|
||||
/// Bitwise logical or of packed double-precision floating-point values
|
||||
@"or",
|
||||
/// Pop
|
||||
pop,
|
||||
@@ -390,6 +410,10 @@ pub const Inst = struct {
|
||||
/// Double precision shift right
|
||||
sh,
|
||||
/// Subtract
|
||||
/// Subtract packed single-precision floating-point values
|
||||
/// Subtract scalar single-precision floating-point values
|
||||
/// Subtract packed double-precision floating-point values
|
||||
/// Subtract scalar double-precision floating-point values
|
||||
sub,
|
||||
/// Store string
|
||||
sto,
|
||||
@@ -406,145 +430,88 @@ pub const Inst = struct {
|
||||
/// Exchange register/memory with register
|
||||
xchg,
|
||||
/// Logical exclusive-or
|
||||
/// Bitwise logical xor of packed single-precision floating-point values
|
||||
/// Bitwise logical xor of packed double-precision floating-point values
|
||||
xor,
|
||||
|
||||
/// Add packed single-precision floating-point values
|
||||
addps,
|
||||
/// Add scalar single-precision floating-point values
|
||||
addss,
|
||||
/// Bitwise logical and of packed single precision floating-point values
|
||||
andps,
|
||||
/// Bitwise logical and not of packed single precision floating-point values
|
||||
andnps,
|
||||
/// Compare scalar single-precision floating-point values
|
||||
cmpss,
|
||||
/// Bitwise logical and not of packed single-precision floating-point values
|
||||
/// Bitwise logical and not of packed double-precision floating-point values
|
||||
andn,
|
||||
/// Convert doubleword integer to scalar single-precision floating-point value
|
||||
cvtsi2ss,
|
||||
/// Divide packed single-precision floating-point values
|
||||
divps,
|
||||
/// Divide scalar single-precision floating-point values
|
||||
divss,
|
||||
/// Maximum of packed single-precision floating-point values
|
||||
maxps,
|
||||
/// Maximum of scalar single-precision floating-point values
|
||||
maxss,
|
||||
/// Maximum of packed double-precision floating-point values
|
||||
/// Maximum of scalar double-precision floating-point values
|
||||
max,
|
||||
/// Minimum of packed single-precision floating-point values
|
||||
minps,
|
||||
/// Minimum of scalar single-precision floating-point values
|
||||
minss,
|
||||
/// Minimum of packed double-precision floating-point values
|
||||
/// Minimum of scalar double-precision floating-point values
|
||||
min,
|
||||
/// Move aligned packed single-precision floating-point values
|
||||
movaps,
|
||||
/// Move aligned packed double-precision floating-point values
|
||||
mova,
|
||||
/// Move packed single-precision floating-point values high to low
|
||||
movhlps,
|
||||
/// Move scalar single-precision floating-point value
|
||||
movss,
|
||||
movhl,
|
||||
/// Move unaligned packed single-precision floating-point values
|
||||
movups,
|
||||
/// Multiply packed single-precision floating-point values
|
||||
mulps,
|
||||
/// Multiply scalar single-precision floating-point values
|
||||
mulss,
|
||||
/// Bitwise logical or of packed single precision floating-point values
|
||||
orps,
|
||||
/// Move unaligned packed double-precision floating-point values
|
||||
movu,
|
||||
/// Extract byte
|
||||
/// Extract word
|
||||
pextrw,
|
||||
/// Extract doubleword
|
||||
/// Extract quadword
|
||||
extr,
|
||||
/// Insert byte
|
||||
/// Insert word
|
||||
pinsrw,
|
||||
/// Insert doubleword
|
||||
/// Insert quadword
|
||||
insr,
|
||||
/// Square root of packed single-precision floating-point values
|
||||
sqrtps,
|
||||
/// Square root of scalar single-precision floating-point value
|
||||
sqrtss,
|
||||
/// Subtract packed single-precision floating-point values
|
||||
subps,
|
||||
/// Subtract scalar single-precision floating-point values
|
||||
subss,
|
||||
/// Square root of packed double-precision floating-point values
|
||||
/// Square root of scalar double-precision floating-point value
|
||||
sqrt,
|
||||
/// Unordered compare scalar single-precision floating-point values
|
||||
ucomiss,
|
||||
/// Unordered compare scalar double-precision floating-point values
|
||||
ucomi,
|
||||
/// Unpack and interleave high packed single-precision floating-point values
|
||||
unpckhps,
|
||||
/// Unpack and interleave high packed double-precision floating-point values
|
||||
unpckh,
|
||||
/// Unpack and interleave low packed single-precision floating-point values
|
||||
unpcklps,
|
||||
/// Bitwise logical xor of packed single precision floating-point values
|
||||
xorps,
|
||||
/// Unpack and interleave low packed double-precision floating-point values
|
||||
unpckl,
|
||||
|
||||
/// Add packed double-precision floating-point values
|
||||
addpd,
|
||||
/// Add scalar double-precision floating-point values
|
||||
addsd,
|
||||
/// Bitwise logical and not of packed double precision floating-point values
|
||||
andnpd,
|
||||
/// Bitwise logical and of packed double precision floating-point values
|
||||
andpd,
|
||||
/// Compare scalar double-precision floating-point values
|
||||
cmpsd,
|
||||
/// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
|
||||
cvtsd2ss,
|
||||
/// Convert doubleword integer to scalar double-precision floating-point value
|
||||
cvtsi2sd,
|
||||
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
|
||||
cvtss2sd,
|
||||
/// Divide packed double-precision floating-point values
|
||||
divpd,
|
||||
/// Divide scalar double-precision floating-point values
|
||||
divsd,
|
||||
/// Maximum of packed double-precision floating-point values
|
||||
maxpd,
|
||||
/// Maximum of scalar double-precision floating-point values
|
||||
maxsd,
|
||||
/// Minimum of packed double-precision floating-point values
|
||||
minpd,
|
||||
/// Minimum of scalar double-precision floating-point values
|
||||
minsd,
|
||||
/// Move scalar double-precision floating-point value
|
||||
movsd,
|
||||
/// Multiply packed double-precision floating-point values
|
||||
mulpd,
|
||||
/// Multiply scalar double-precision floating-point values
|
||||
mulsd,
|
||||
/// Bitwise logical or of packed double precision floating-point values
|
||||
orpd,
|
||||
/// Shuffle packed high words
|
||||
pshufhw,
|
||||
shufh,
|
||||
/// Shuffle packed low words
|
||||
pshuflw,
|
||||
shufl,
|
||||
/// Shift packed data right logical
|
||||
psrld,
|
||||
/// Shift packed data right logical
|
||||
psrlq,
|
||||
/// Shift packed data right logical
|
||||
psrlw,
|
||||
srl,
|
||||
/// Unpack high data
|
||||
punpckhbw,
|
||||
unpckhbw,
|
||||
/// Unpack high data
|
||||
punpckhdq,
|
||||
unpckhdq,
|
||||
/// Unpack high data
|
||||
punpckhqdq,
|
||||
unpckhqdq,
|
||||
/// Unpack high data
|
||||
punpckhwd,
|
||||
unpckhwd,
|
||||
/// Unpack low data
|
||||
punpcklbw,
|
||||
unpcklbw,
|
||||
/// Unpack low data
|
||||
punpckldq,
|
||||
unpckldq,
|
||||
/// Unpack low data
|
||||
punpcklqdq,
|
||||
unpcklqdq,
|
||||
/// Unpack low data
|
||||
punpcklwd,
|
||||
/// Square root of double precision floating-point values
|
||||
sqrtpd,
|
||||
/// Square root of scalar double precision floating-point value
|
||||
sqrtsd,
|
||||
/// Subtract packed double-precision floating-point values
|
||||
subpd,
|
||||
/// Subtract scalar double-precision floating-point values
|
||||
subsd,
|
||||
/// Unordered compare scalar double-precision floating-point values
|
||||
ucomisd,
|
||||
/// Unpack and interleave high packed double-precision floating-point values
|
||||
unpckhpd,
|
||||
/// Unpack and interleave low packed double-precision floating-point values
|
||||
unpcklpd,
|
||||
/// Bitwise logical xor of packed double precision floating-point values
|
||||
xorpd,
|
||||
unpcklwd,
|
||||
|
||||
/// Replicate double floating-point values
|
||||
movddup,
|
||||
@@ -553,199 +520,32 @@ pub const Inst = struct {
|
||||
/// Replicate single floating-point values
|
||||
movsldup,
|
||||
|
||||
/// Extract Byte
|
||||
pextrb,
|
||||
/// Extract Doubleword
|
||||
pextrd,
|
||||
/// Extract Quadword
|
||||
pextrq,
|
||||
/// Insert Byte
|
||||
pinsrb,
|
||||
/// Insert Doubleword
|
||||
pinsrd,
|
||||
/// Insert Quadword
|
||||
pinsrq,
|
||||
/// Round packed double-precision floating-point values
|
||||
roundpd,
|
||||
/// Round packed single-precision floating-point values
|
||||
roundps,
|
||||
/// Round scalar double-precision floating-point value
|
||||
roundsd,
|
||||
/// Round scalar single-precision floating-point value
|
||||
roundss,
|
||||
|
||||
/// Add packed double-precision floating-point values
|
||||
vaddpd,
|
||||
/// Add packed single-precision floating-point values
|
||||
vaddps,
|
||||
/// Add scalar double-precision floating-point values
|
||||
vaddsd,
|
||||
/// Add scalar single-precision floating-point values
|
||||
vaddss,
|
||||
/// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
|
||||
vcvtsd2ss,
|
||||
/// Convert doubleword integer to scalar double-precision floating-point value
|
||||
vcvtsi2sd,
|
||||
/// Convert doubleword integer to scalar single-precision floating-point value
|
||||
vcvtsi2ss,
|
||||
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
|
||||
vcvtss2sd,
|
||||
/// Divide packed double-precision floating-point values
|
||||
vdivpd,
|
||||
/// Divide packed single-precision floating-point values
|
||||
vdivps,
|
||||
/// Divide scalar double-precision floating-point values
|
||||
vdivsd,
|
||||
/// Divide scalar single-precision floating-point values
|
||||
vdivss,
|
||||
/// Maximum of packed double-precision floating-point values
|
||||
vmaxpd,
|
||||
/// Maximum of packed single-precision floating-point values
|
||||
vmaxps,
|
||||
/// Maximum of scalar double-precision floating-point values
|
||||
vmaxsd,
|
||||
/// Maximum of scalar single-precision floating-point values
|
||||
vmaxss,
|
||||
/// Minimum of packed double-precision floating-point values
|
||||
vminpd,
|
||||
/// Minimum of packed single-precision floating-point values
|
||||
vminps,
|
||||
/// Minimum of scalar double-precision floating-point values
|
||||
vminsd,
|
||||
/// Minimum of scalar single-precision floating-point values
|
||||
vminss,
|
||||
/// Move aligned packed double-precision floating-point values
|
||||
vmovapd,
|
||||
/// Move aligned packed single-precision floating-point values
|
||||
vmovaps,
|
||||
/// Move packed single-precision floating-point values high to low
|
||||
vmovhlps,
|
||||
/// Replicate double floating-point values
|
||||
vmovddup,
|
||||
/// Move or merge scalar double-precision floating-point value
|
||||
vmovsd,
|
||||
/// Replicate single floating-point values
|
||||
vmovshdup,
|
||||
/// Replicate single floating-point values
|
||||
vmovsldup,
|
||||
/// Move or merge scalar single-precision floating-point value
|
||||
vmovss,
|
||||
/// Move unaligned packed double-precision floating-point values
|
||||
vmovupd,
|
||||
/// Move unaligned packed single-precision floating-point values
|
||||
vmovups,
|
||||
/// Multiply packed double-precision floating-point values
|
||||
vmulpd,
|
||||
/// Multiply packed single-precision floating-point values
|
||||
vmulps,
|
||||
/// Multiply scalar double-precision floating-point values
|
||||
vmulsd,
|
||||
/// Multiply scalar single-precision floating-point values
|
||||
vmulss,
|
||||
/// Extract Byte
|
||||
vpextrb,
|
||||
/// Extract Doubleword
|
||||
vpextrd,
|
||||
/// Extract Quadword
|
||||
vpextrq,
|
||||
/// Extract word
|
||||
vpextrw,
|
||||
/// Insert Byte
|
||||
vpinsrb,
|
||||
/// Insert Doubleword
|
||||
vpinsrd,
|
||||
/// Insert Quadword
|
||||
vpinsrq,
|
||||
/// Insert word
|
||||
vpinsrw,
|
||||
/// Shuffle packed high words
|
||||
vpshufhw,
|
||||
/// Shuffle packed low words
|
||||
vpshuflw,
|
||||
/// Shift packed data right logical
|
||||
vpsrld,
|
||||
/// Shift packed data right logical
|
||||
vpsrlq,
|
||||
/// Shift packed data right logical
|
||||
vpsrlw,
|
||||
/// Unpack high data
|
||||
vpunpckhbw,
|
||||
/// Unpack high data
|
||||
vpunpckhdq,
|
||||
/// Unpack high data
|
||||
vpunpckhqdq,
|
||||
/// Unpack high data
|
||||
vpunpckhwd,
|
||||
/// Unpack low data
|
||||
vpunpcklbw,
|
||||
/// Unpack low data
|
||||
vpunpckldq,
|
||||
/// Unpack low data
|
||||
vpunpcklqdq,
|
||||
/// Unpack low data
|
||||
vpunpcklwd,
|
||||
/// Round packed double-precision floating-point values
|
||||
vroundpd,
|
||||
/// Round packed single-precision floating-point values
|
||||
vroundps,
|
||||
/// Round scalar double-precision floating-point value
|
||||
vroundsd,
|
||||
/// Round scalar single-precision floating-point value
|
||||
vroundss,
|
||||
/// Square root of packed double-precision floating-point value
|
||||
vsqrtpd,
|
||||
/// Square root of packed single-precision floating-point value
|
||||
vsqrtps,
|
||||
/// Square root of scalar double-precision floating-point value
|
||||
vsqrtsd,
|
||||
/// Square root of scalar single-precision floating-point value
|
||||
vsqrtss,
|
||||
/// Subtract packed double-precision floating-point values
|
||||
vsubpd,
|
||||
/// Subtract packed single-precision floating-point values
|
||||
vsubps,
|
||||
/// Subtract scalar double-precision floating-point values
|
||||
vsubsd,
|
||||
/// Subtract scalar single-precision floating-point values
|
||||
vsubss,
|
||||
/// Unpack and interleave high packed double-precision floating-point values
|
||||
vunpckhpd,
|
||||
/// Unpack and interleave high packed single-precision floating-point values
|
||||
vunpckhps,
|
||||
/// Unpack and interleave low packed double-precision floating-point values
|
||||
vunpcklpd,
|
||||
/// Unpack and interleave low packed single-precision floating-point values
|
||||
vunpcklps,
|
||||
round,
|
||||
|
||||
/// Convert 16-bit floating-point values to single-precision floating-point values
|
||||
vcvtph2ps,
|
||||
cvtph2ps,
|
||||
/// Convert single-precision floating-point values to 16-bit floating-point values
|
||||
vcvtps2ph,
|
||||
cvtps2ph,
|
||||
|
||||
/// Fused multiply-add of packed double-precision floating-point values
|
||||
vfmadd132pd,
|
||||
/// Fused multiply-add of packed double-precision floating-point values
|
||||
vfmadd213pd,
|
||||
/// Fused multiply-add of packed double-precision floating-point values
|
||||
vfmadd231pd,
|
||||
/// Fused multiply-add of packed single-precision floating-point values
|
||||
vfmadd132ps,
|
||||
/// Fused multiply-add of scalar single-precision floating-point values
|
||||
/// Fused multiply-add of packed double-precision floating-point values
|
||||
/// Fused multiply-add of scalar double-precision floating-point values
|
||||
fmadd132,
|
||||
/// Fused multiply-add of packed single-precision floating-point values
|
||||
vfmadd213ps,
|
||||
/// Fused multiply-add of scalar single-precision floating-point values
|
||||
/// Fused multiply-add of packed double-precision floating-point values
|
||||
/// Fused multiply-add of scalar double-precision floating-point values
|
||||
fmadd213,
|
||||
/// Fused multiply-add of packed single-precision floating-point values
|
||||
vfmadd231ps,
|
||||
/// Fused multiply-add of scalar double-precision floating-point values
|
||||
vfmadd132sd,
|
||||
/// Fused multiply-add of scalar double-precision floating-point values
|
||||
vfmadd213sd,
|
||||
/// Fused multiply-add of scalar double-precision floating-point values
|
||||
vfmadd231sd,
|
||||
/// Fused multiply-add of scalar single-precision floating-point values
|
||||
vfmadd132ss,
|
||||
/// Fused multiply-add of scalar single-precision floating-point values
|
||||
vfmadd213ss,
|
||||
/// Fused multiply-add of scalar single-precision floating-point values
|
||||
vfmadd231ss,
|
||||
/// Fused multiply-add of packed double-precision floating-point values
|
||||
/// Fused multiply-add of scalar double-precision floating-point values
|
||||
fmadd231,
|
||||
|
||||
/// A pseudo instruction that requires special lowering.
|
||||
/// This should be the only tag in this enum that doesn't
|
||||
|
||||
Reference in New Issue
Block a user