x86_64: implement float cast from f16 to f64
This commit is contained in:
@@ -2287,26 +2287,46 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
|
||||
src_mcv
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
|
||||
const dst_lock = self.register_manager.lockReg(dst_mcv.register);
|
||||
const dst_reg = dst_mcv.getReg().?.to128();
|
||||
const dst_lock = self.register_manager.lockReg(dst_reg);
|
||||
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
if (src_bits == 32 and dst_bits == 16 and self.hasFeature(.f16c))
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.vcvtps2ph,
|
||||
dst_mcv.register,
|
||||
if (src_mcv.isRegister()) src_mcv.getReg().? else src_reg: {
|
||||
const src_reg = dst_mcv.register;
|
||||
try self.genSetReg(src_reg, src_ty, src_mcv);
|
||||
break :src_reg src_reg;
|
||||
if (dst_bits == 16 and self.hasFeature(.f16c)) {
|
||||
switch (src_bits) {
|
||||
32 => {
|
||||
const mat_src_reg = if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(src_ty, src_mcv);
|
||||
try self.asmRegisterRegisterImmediate(
|
||||
.vcvtps2ph,
|
||||
dst_reg,
|
||||
mat_src_reg.to128(),
|
||||
Immediate.u(0b1_00),
|
||||
);
|
||||
},
|
||||
Immediate.u(0b1_00),
|
||||
)
|
||||
else if (src_bits == 64 and dst_bits == 32)
|
||||
try self.genBinOpMir(.cvtsd2ss, src_ty, dst_mcv, src_mcv)
|
||||
else
|
||||
return self.fail("TODO implement airFptrunc from {} to {}", .{
|
||||
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
|
||||
});
|
||||
else => return self.fail("TODO implement airFptrunc from {} to {}", .{
|
||||
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
}
|
||||
} else if (src_bits == 64 and dst_bits == 32) {
|
||||
if (self.hasFeature(.avx)) if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister(
|
||||
.vcvtsd2ss,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.getReg().?.to128(),
|
||||
) else try self.asmRegisterRegisterMemory(
|
||||
.vcvtsd2ss,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.mem(.qword),
|
||||
) else if (src_mcv.isRegister())
|
||||
try self.asmRegisterRegister(.cvtsd2ss, dst_reg, src_mcv.getReg().?.to128())
|
||||
else
|
||||
try self.asmRegisterMemory(.cvtsd2ss, dst_reg, src_mcv.mem(.qword));
|
||||
} else return self.fail("TODO implement airFptrunc from {} to {}", .{
|
||||
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
|
||||
});
|
||||
return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
|
||||
}
|
||||
|
||||
@@ -2322,22 +2342,41 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
|
||||
src_mcv
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
|
||||
const dst_lock = self.register_manager.lockReg(dst_mcv.register);
|
||||
const dst_reg = dst_mcv.getReg().?.to128();
|
||||
const dst_lock = self.register_manager.lockReg(dst_reg);
|
||||
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
try self.genBinOpMir(
|
||||
if (src_bits == 16 and dst_bits == 32 and self.hasFeature(.f16c))
|
||||
.vcvtph2ps
|
||||
else if (src_bits == 32 and dst_bits == 64)
|
||||
.cvtss2sd
|
||||
if (src_bits == 16 and self.hasFeature(.f16c)) {
|
||||
const mat_src_reg = if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
return self.fail("TODO implement airFpext from {} to {}", .{
|
||||
try self.copyToTmpRegister(src_ty, src_mcv);
|
||||
try self.asmRegisterRegister(.vcvtph2ps, dst_reg, mat_src_reg.to128());
|
||||
switch (dst_bits) {
|
||||
32 => {},
|
||||
64 => try self.asmRegisterRegisterRegister(.vcvtss2sd, dst_reg, dst_reg, dst_reg),
|
||||
else => return self.fail("TODO implement airFpext from {} to {}", .{
|
||||
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
|
||||
}),
|
||||
src_ty,
|
||||
dst_mcv,
|
||||
src_mcv,
|
||||
);
|
||||
}
|
||||
} else if (src_bits == 32 and dst_bits == 64) {
|
||||
if (self.hasFeature(.avx)) if (src_mcv.isRegister()) try self.asmRegisterRegisterRegister(
|
||||
.vcvtss2sd,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.getReg().?.to128(),
|
||||
) else try self.asmRegisterRegisterMemory(
|
||||
.vcvtss2sd,
|
||||
dst_reg,
|
||||
dst_reg,
|
||||
src_mcv.mem(.dword),
|
||||
) else if (src_mcv.isRegister())
|
||||
try self.asmRegisterRegister(.cvtss2sd, dst_reg, src_mcv.getReg().?.to128())
|
||||
else
|
||||
try self.asmRegisterMemory(.cvtss2sd, dst_reg, src_mcv.mem(.dword));
|
||||
} else return self.fail("TODO implement airFpext from {} to {}", .{
|
||||
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
|
||||
});
|
||||
return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
|
||||
}
|
||||
|
||||
|
||||
@@ -89,30 +89,13 @@ pub fn findByOpcode(opc: []const u8, prefixes: struct {
|
||||
if (modrm_ext) |ext| if (ext != data.modrm_ext) continue;
|
||||
if (!std.mem.eql(u8, opc, enc.opcode())) continue;
|
||||
if (prefixes.rex.w) {
|
||||
switch (data.mode) {
|
||||
.none, .short, .rex, .rex_short, .vex_128, .vex_256 => continue,
|
||||
.long, .vex_128_long, .vex_256_long => {},
|
||||
}
|
||||
if (!data.mode.isLong()) continue;
|
||||
} else if (prefixes.rex.present and !prefixes.rex.isSet()) {
|
||||
switch (data.mode) {
|
||||
.rex, .rex_short => {},
|
||||
else => continue,
|
||||
}
|
||||
if (!data.mode.isRex()) continue;
|
||||
} else if (prefixes.legacy.prefix_66) {
|
||||
switch (data.mode) {
|
||||
.short, .rex_short => {},
|
||||
.none, .rex, .vex_128, .vex_256 => continue,
|
||||
.long, .vex_128_long, .vex_256_long => continue,
|
||||
}
|
||||
if (!data.mode.isShort()) continue;
|
||||
} else {
|
||||
switch (data.mode) {
|
||||
.none => switch (data.mode) {
|
||||
.short, .rex_short => continue,
|
||||
.none, .rex, .vex_128, .vex_256 => {},
|
||||
.long, .vex_128_long, .vex_256_long => {},
|
||||
},
|
||||
else => continue,
|
||||
}
|
||||
if (data.mode.isShort()) continue;
|
||||
}
|
||||
return enc;
|
||||
};
|
||||
@@ -148,50 +131,39 @@ pub fn format(
|
||||
_ = fmt;
|
||||
|
||||
var opc = encoding.opcode();
|
||||
switch (encoding.data.mode) {
|
||||
else => {},
|
||||
.long => try writer.writeAll("REX.W + "),
|
||||
.vex_128, .vex_128_long, .vex_256, .vex_256_long => {
|
||||
try writer.writeAll("VEX.");
|
||||
if (encoding.data.mode.isVex()) {
|
||||
try writer.writeAll("VEX.");
|
||||
|
||||
switch (encoding.data.mode) {
|
||||
.vex_128, .vex_128_long => try writer.writeAll("128"),
|
||||
.vex_256, .vex_256_long => try writer.writeAll("256"),
|
||||
else => unreachable,
|
||||
}
|
||||
try writer.writeAll(switch (encoding.data.mode) {
|
||||
.vex_128_w0, .vex_128_w1, .vex_128_wig => "128",
|
||||
.vex_256_w0, .vex_256_w1, .vex_256_wig => "256",
|
||||
.vex_lig_w0, .vex_lig_w1, .vex_lig_wig => "LIG",
|
||||
.vex_lz_w0, .vex_lz_w1, .vex_lz_wig => "LZ",
|
||||
else => unreachable,
|
||||
});
|
||||
|
||||
switch (opc[0]) {
|
||||
else => {},
|
||||
0x66, 0xf3, 0xf2 => {
|
||||
try writer.print(".{X:0>2}", .{opc[0]});
|
||||
opc = opc[1..];
|
||||
},
|
||||
}
|
||||
switch (opc[0]) {
|
||||
else => {},
|
||||
0x66, 0xf3, 0xf2 => {
|
||||
try writer.print(".{X:0>2}", .{opc[0]});
|
||||
opc = opc[1..];
|
||||
},
|
||||
}
|
||||
|
||||
try writer.print(".{X:0>2}", .{opc[0]});
|
||||
opc = opc[1..];
|
||||
try writer.print(".{}", .{std.fmt.fmtSliceHexUpper(opc[0 .. opc.len - 1])});
|
||||
opc = opc[opc.len - 1 ..];
|
||||
|
||||
switch (opc[0]) {
|
||||
else => {},
|
||||
0x38, 0x3A => {
|
||||
try writer.print("{X:0>2}", .{opc[0]});
|
||||
opc = opc[1..];
|
||||
},
|
||||
}
|
||||
try writer.writeAll(".W");
|
||||
try writer.writeAll(switch (encoding.data.mode) {
|
||||
.vex_128_w0, .vex_256_w0, .vex_lig_w0, .vex_lz_w0 => "0",
|
||||
.vex_128_w1, .vex_256_w1, .vex_lig_w1, .vex_lz_w1 => "1",
|
||||
.vex_128_wig, .vex_256_wig, .vex_lig_wig, .vex_lz_wig => "IG",
|
||||
else => unreachable,
|
||||
});
|
||||
|
||||
try writer.writeByte('.');
|
||||
try writer.writeAll(switch (encoding.data.mode) {
|
||||
.vex_128, .vex_256 => "W0",
|
||||
.vex_128_long, .vex_256_long => "W1",
|
||||
else => unreachable,
|
||||
});
|
||||
try writer.writeByte(' ');
|
||||
},
|
||||
}
|
||||
|
||||
for (opc) |byte| {
|
||||
try writer.print("{x:0>2} ", .{byte});
|
||||
}
|
||||
try writer.writeByte(' ');
|
||||
} else if (encoding.data.mode.isLong()) try writer.writeAll("REX.W + ");
|
||||
for (opc) |byte| try writer.print("{x:0>2} ", .{byte});
|
||||
|
||||
switch (encoding.data.op_en) {
|
||||
.np, .fd, .td, .i, .zi, .d => {},
|
||||
@@ -332,6 +304,7 @@ pub const Mnemonic = enum {
|
||||
// SSE4.1
|
||||
roundsd, roundss,
|
||||
// AVX
|
||||
vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd,
|
||||
vmovapd, vmovaps,
|
||||
vmovddup,
|
||||
vmovsd,
|
||||
@@ -629,20 +602,74 @@ pub const Op = enum {
|
||||
};
|
||||
|
||||
pub const Mode = enum {
|
||||
// zig fmt: off
|
||||
none,
|
||||
short,
|
||||
long,
|
||||
rex,
|
||||
rex_short,
|
||||
vex_128,
|
||||
vex_128_long,
|
||||
vex_256,
|
||||
vex_256_long,
|
||||
short, long,
|
||||
rex, rex_short,
|
||||
vex_128_w0, vex_128_w1, vex_128_wig,
|
||||
vex_256_w0, vex_256_w1, vex_256_wig,
|
||||
vex_lig_w0, vex_lig_w1, vex_lig_wig,
|
||||
vex_lz_w0, vex_lz_w1, vex_lz_wig,
|
||||
// zig fmt: on
|
||||
|
||||
pub fn isShort(mode: Mode) bool {
|
||||
return switch (mode) {
|
||||
.short, .rex_short => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn isLong(mode: Mode) bool {
|
||||
return switch (mode) {
|
||||
.long,
|
||||
.vex_128_w1,
|
||||
.vex_256_w1,
|
||||
.vex_lig_w1,
|
||||
.vex_lz_w1,
|
||||
=> true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn isRex(mode: Mode) bool {
|
||||
return switch (mode) {
|
||||
else => false,
|
||||
.rex, .rex_short => true,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn isVex(mode: Mode) bool {
|
||||
return switch (mode) {
|
||||
// zig fmt: off
|
||||
else => false,
|
||||
.vex_128_w0, .vex_128_w1, .vex_128_wig,
|
||||
.vex_256_w0, .vex_256_w1, .vex_256_wig,
|
||||
.vex_lig_w0, .vex_lig_w1, .vex_lig_wig,
|
||||
.vex_lz_w0, .vex_lz_w1, .vex_lz_wig,
|
||||
=> true,
|
||||
// zig fmt: on
|
||||
};
|
||||
}
|
||||
|
||||
pub fn isVecLong(mode: Mode) bool {
|
||||
return switch (mode) {
|
||||
// zig fmt: off
|
||||
else => unreachable,
|
||||
.vex_128_w0, .vex_128_w1, .vex_128_wig,
|
||||
.vex_lig_w0, .vex_lig_w1, .vex_lig_wig,
|
||||
.vex_lz_w0, .vex_lz_w1, .vex_lz_wig,
|
||||
=> false,
|
||||
.vex_256_w0, .vex_256_w1, .vex_256_wig,
|
||||
=> true,
|
||||
// zig fmt: on
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pub const Feature = enum {
|
||||
none,
|
||||
avx,
|
||||
avx2,
|
||||
f16c,
|
||||
fma,
|
||||
sse,
|
||||
|
||||
@@ -184,6 +184,10 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
||||
.roundsd,
|
||||
.roundss,
|
||||
|
||||
.vcvtsd2ss,
|
||||
.vcvtsi2sd,
|
||||
.vcvtsi2ss,
|
||||
.vcvtss2sd,
|
||||
.vmovapd,
|
||||
.vmovaps,
|
||||
.vmovddup,
|
||||
|
||||
@@ -282,6 +282,14 @@ pub const Inst = struct {
|
||||
/// Round scalar single-precision floating-point values
|
||||
roundss,
|
||||
|
||||
/// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
|
||||
vcvtsd2ss,
|
||||
/// Convert doubleword integer to scalar double-precision floating-point value
|
||||
vcvtsi2sd,
|
||||
/// Convert doubleword integer to scalar single-precision floating-point value
|
||||
vcvtsi2ss,
|
||||
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
|
||||
vcvtss2sd,
|
||||
/// Move aligned packed double-precision floating-point values
|
||||
vmovapd,
|
||||
/// Move aligned packed single-precision floating-point values
|
||||
|
||||
@@ -206,18 +206,15 @@ pub const Instruction = struct {
|
||||
const enc = inst.encoding;
|
||||
const data = enc.data;
|
||||
|
||||
switch (data.mode) {
|
||||
.none, .short, .long, .rex, .rex_short => {
|
||||
try inst.encodeLegacyPrefixes(encoder);
|
||||
try inst.encodeMandatoryPrefix(encoder);
|
||||
try inst.encodeRexPrefix(encoder);
|
||||
try inst.encodeOpcode(encoder);
|
||||
},
|
||||
.vex_128, .vex_128_long, .vex_256, .vex_256_long => {
|
||||
try inst.encodeVexPrefix(encoder);
|
||||
const opc = inst.encoding.opcode();
|
||||
try encoder.opcode_1byte(opc[opc.len - 1]);
|
||||
},
|
||||
if (data.mode.isVex()) {
|
||||
try inst.encodeVexPrefix(encoder);
|
||||
const opc = inst.encoding.opcode();
|
||||
try encoder.opcode_1byte(opc[opc.len - 1]);
|
||||
} else {
|
||||
try inst.encodeLegacyPrefixes(encoder);
|
||||
try inst.encodeMandatoryPrefix(encoder);
|
||||
try inst.encodeRexPrefix(encoder);
|
||||
try inst.encodeOpcode(encoder);
|
||||
}
|
||||
|
||||
switch (data.op_en) {
|
||||
@@ -365,11 +362,7 @@ pub const Instruction = struct {
|
||||
|
||||
var vex = Vex{};
|
||||
|
||||
vex.w = switch (inst.encoding.data.mode) {
|
||||
.vex_128, .vex_256 => false,
|
||||
.vex_128_long, .vex_256_long => true,
|
||||
else => unreachable,
|
||||
};
|
||||
vex.w = inst.encoding.data.mode.isLong();
|
||||
|
||||
switch (op_en) {
|
||||
.np, .i, .zi, .fd, .td, .d => {},
|
||||
@@ -395,11 +388,7 @@ pub const Instruction = struct {
|
||||
},
|
||||
}
|
||||
|
||||
vex.l = switch (inst.encoding.data.mode) {
|
||||
.vex_128, .vex_128_long => false,
|
||||
.vex_256, .vex_256_long => true,
|
||||
else => unreachable,
|
||||
};
|
||||
vex.l = inst.encoding.data.mode.isVecLong();
|
||||
|
||||
vex.p = if (mand_pre) |mand| switch (mand) {
|
||||
0x66 => .@"66",
|
||||
|
||||
@@ -918,7 +918,6 @@ pub const table = [_]Entry{
|
||||
.{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 },
|
||||
.{ .pextrw, .rmi, &.{ .r64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .long, .sse2 },
|
||||
|
||||
.{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 },
|
||||
|
||||
@@ -926,31 +925,23 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .pshuflw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf2, 0x0f, 0x70 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .psrlw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .none, .sse2 },
|
||||
.{ .psrlw, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .none, .sse2 },
|
||||
.{ .psrld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .none, .sse2 },
|
||||
.{ .psrld, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .none, .sse2 },
|
||||
|
||||
.{ .psrlq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .none, .sse2 },
|
||||
.{ .psrlq, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .none, .sse2 },
|
||||
|
||||
.{ .psrlw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .none, .sse2 },
|
||||
.{ .psrlw, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .none, .sse2 },
|
||||
|
||||
.{ .punpckhbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .punpckhdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .none, .sse2 },
|
||||
|
||||
.{ .punpckhbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .none, .sse2 },
|
||||
.{ .punpckhwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .none, .sse2 },
|
||||
.{ .punpckhdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .none, .sse2 },
|
||||
.{ .punpckhqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .none, .sse2 },
|
||||
|
||||
.{ .punpckhwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .punpcklbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .punpcklbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .none, .sse2 },
|
||||
.{ .punpcklwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .none, .sse2 },
|
||||
.{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 },
|
||||
.{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 },
|
||||
|
||||
.{ .punpcklwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .none, .sse2 },
|
||||
|
||||
.{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 },
|
||||
.{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 },
|
||||
|
||||
@@ -972,106 +963,128 @@ pub const table = [_]Entry{
|
||||
|
||||
// SSE4.1
|
||||
.{ .pextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 },
|
||||
.{ .pextrw, .mri, &.{ .r64_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .long, .sse4_1 },
|
||||
|
||||
.{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 },
|
||||
.{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .none, .sse4_1 },
|
||||
|
||||
// AVX
|
||||
.{ .vmovapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128, .avx },
|
||||
.{ .vmovapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128, .avx },
|
||||
.{ .vmovapd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256, .avx },
|
||||
.{ .vmovapd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_256, .avx },
|
||||
.{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vmovaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .vex_128, .avx },
|
||||
.{ .vmovaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .vex_128, .avx },
|
||||
.{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256, .avx },
|
||||
.{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256, .avx },
|
||||
.{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx },
|
||||
.{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx },
|
||||
|
||||
.{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128, .avx },
|
||||
.{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx },
|
||||
.{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx },
|
||||
|
||||
.{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_128, .avx },
|
||||
.{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_128, .avx },
|
||||
.{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_128, .avx },
|
||||
.{ .vmovsd, .mr, &.{ .m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_128, .avx },
|
||||
.{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vmovshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_128, .avx },
|
||||
.{ .vmovapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovapd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256_wig, .avx },
|
||||
.{ .vmovapd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vmovsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_128, .avx },
|
||||
.{ .vmovaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256_wig, .avx },
|
||||
.{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vmovss, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_128, .avx },
|
||||
.{ .vmovss, .rm, &.{ .xmm, .m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_128, .avx },
|
||||
.{ .vmovss, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_128, .avx },
|
||||
.{ .vmovss, .mr, &.{ .m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_128, .avx },
|
||||
.{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vmovupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_128, .avx },
|
||||
.{ .vmovupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_128, .avx },
|
||||
.{ .vmovupd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_256, .avx },
|
||||
.{ .vmovupd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_256, .avx },
|
||||
.{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
|
||||
.{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
|
||||
.{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx },
|
||||
.{ .vmovsd, .mr, &.{ .m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vmovups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .vex_128, .avx },
|
||||
.{ .vmovups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .vex_128, .avx },
|
||||
.{ .vmovups, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256, .avx },
|
||||
.{ .vmovups, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x11 }, 0, .vex_256, .avx },
|
||||
.{ .vmovshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovshdup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpextrw, .mri, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128, .avx },
|
||||
.{ .vpextrw, .mri, &.{ .r64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_long, .avx },
|
||||
.{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128, .avx },
|
||||
.{ .vpextrw, .mri, &.{ .r64_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_long, .avx },
|
||||
.{ .vmovsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovsldup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128, .avx },
|
||||
.{ .vmovss, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
|
||||
.{ .vmovss, .rm, &.{ .xmm, .m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
|
||||
.{ .vmovss, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx },
|
||||
.{ .vmovss, .mr, &.{ .m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx },
|
||||
|
||||
.{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128, .avx },
|
||||
.{ .vpsrld, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_128, .avx },
|
||||
.{ .vmovupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovupd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_256_wig, .avx },
|
||||
.{ .vmovupd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpsrlq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_128, .avx },
|
||||
.{ .vpsrlq, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_128, .avx },
|
||||
.{ .vmovups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vmovups, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256_wig, .avx },
|
||||
.{ .vmovups, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x11 }, 0, .vex_256_wig, .avx },
|
||||
|
||||
.{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128, .avx },
|
||||
.{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128, .avx },
|
||||
.{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128, .avx },
|
||||
.{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128, .avx },
|
||||
.{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128_wig, .avx },
|
||||
.{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpsrld, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_128_wig, .avx },
|
||||
.{ .vpsrlq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpsrlq, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpunpckhqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_128, .avx },
|
||||
.{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpunpckhqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128, .avx },
|
||||
|
||||
.{ .vpunpcklbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_128, .avx },
|
||||
|
||||
.{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128, .avx },
|
||||
|
||||
.{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128, .avx },
|
||||
|
||||
.{ .vpunpcklwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_128, .avx },
|
||||
.{ .vpunpcklbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpunpcklwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx },
|
||||
.{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx },
|
||||
|
||||
// F16C
|
||||
.{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128, .f16c },
|
||||
.{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c },
|
||||
.{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c },
|
||||
|
||||
.{ .vcvtps2ph, .mri, &.{ .xmm_m64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_128, .f16c },
|
||||
.{ .vcvtps2ph, .mri, &.{ .xmm_m64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_128_w0, .f16c },
|
||||
.{ .vcvtps2ph, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_256_w0, .f16c },
|
||||
|
||||
// FMA
|
||||
.{ .vfmadd132pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_long, .fma },
|
||||
.{ .vfmadd132pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_long, .fma },
|
||||
.{ .vfmadd213pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_long, .fma },
|
||||
.{ .vfmadd213pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_long, .fma },
|
||||
.{ .vfmadd231pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_long, .fma },
|
||||
.{ .vfmadd231pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_long, .fma },
|
||||
.{ .vfmadd132pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_w1, .fma },
|
||||
.{ .vfmadd213pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_w1, .fma },
|
||||
.{ .vfmadd231pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_w1, .fma },
|
||||
.{ .vfmadd132pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_w1, .fma },
|
||||
.{ .vfmadd213pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_w1, .fma },
|
||||
.{ .vfmadd231pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_w1, .fma },
|
||||
|
||||
.{ .vfmadd132ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128, .fma },
|
||||
.{ .vfmadd132ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256, .fma },
|
||||
.{ .vfmadd213ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128, .fma },
|
||||
.{ .vfmadd213ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256, .fma },
|
||||
.{ .vfmadd231ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128, .fma },
|
||||
.{ .vfmadd231ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256, .fma },
|
||||
.{ .vfmadd132ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_w0, .fma },
|
||||
.{ .vfmadd213ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_w0, .fma },
|
||||
.{ .vfmadd231ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_w0, .fma },
|
||||
.{ .vfmadd132ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_w0, .fma },
|
||||
.{ .vfmadd213ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_w0, .fma },
|
||||
.{ .vfmadd231ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_w0, .fma },
|
||||
|
||||
.{ .vfmadd132sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_128_long, .fma },
|
||||
.{ .vfmadd213sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_128_long, .fma },
|
||||
.{ .vfmadd231sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_128_long, .fma },
|
||||
.{ .vfmadd132sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_lig_w1, .fma },
|
||||
.{ .vfmadd213sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_lig_w1, .fma },
|
||||
.{ .vfmadd231sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w1, .fma },
|
||||
|
||||
.{ .vfmadd132ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_128, .fma },
|
||||
.{ .vfmadd213ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_128, .fma },
|
||||
.{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_128, .fma },
|
||||
.{ .vfmadd132ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_lig_w0, .fma },
|
||||
.{ .vfmadd213ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_lig_w0, .fma },
|
||||
.{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w0, .fma },
|
||||
|
||||
// AVX2
|
||||
.{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 },
|
||||
.{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpsrld, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_256_wig, .avx2 },
|
||||
.{ .vpsrlq, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpsrlq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpunpckhbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpunpckhwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpunpckhdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpunpckhqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpunpcklbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpunpcklwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpunpckldq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 },
|
||||
};
|
||||
// zig fmt: on
|
||||
|
||||
@@ -52,7 +52,8 @@ fn testFloatComparisons() !void {
|
||||
}
|
||||
|
||||
test "different sized float comparisons" {
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64 and
|
||||
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
|
||||
Reference in New Issue
Block a user