commit 2286c19c20897727825a6fd8dd63de1ada6724ee (tree)
parent de6cafa80fba3c1fb93bf82b76c1efffbc61bf98
Author: Andrew Kelley <andrew@ziglang.org>
Date: Mon, 15 May 2023 09:51:57 -0700
Merge pull request #15628 from jacobly0/x86_64-behavior
Diffstat:
27 files changed, 2115 insertions(+), 719 deletions(-)
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
@@ -1079,9 +1079,9 @@ fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void {
.fixes = Mir.Inst.Fixes.fromCondition(cc),
.r1 = reg,
} },
- .z_and_np, .nz_or_p => .{ .r_scratch = .{
+ .z_and_np, .nz_or_p => .{ .rr = .{
.r1 = reg,
- .scratch_reg = (try self.register_manager.allocReg(null, gp)).to8(),
+ .r2 = (try self.register_manager.allocReg(null, gp)).to8(),
} },
},
});
@@ -1120,8 +1120,8 @@ fn asmSetccMemory(self: *Self, m: Memory, cc: bits.Condition) !void {
.fixes = Mir.Inst.Fixes.fromCondition(cc),
.payload = payload,
} },
- .z_and_np, .nz_or_p => .{ .x_scratch = .{
- .scratch_reg = (try self.register_manager.allocReg(null, gp)).to8(),
+ .z_and_np, .nz_or_p => .{ .rx = .{
+ .r1 = (try self.register_manager.allocReg(null, gp)).to8(),
.payload = payload,
} },
},
@@ -1460,6 +1460,15 @@ fn asmMemoryRegister(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, reg: Regist
}
fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, imm: Immediate) !void {
+ const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) {
+ .signed => |s| @bitCast(u32, s),
+ .unsigned => |u| @intCast(u32, u),
+ } });
+ assert(payload + 1 == switch (m) {
+ .sib => try self.addExtra(Mir.MemorySib.encode(m)),
+ .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+ else => unreachable,
+ });
_ = try self.addInst(.{
.tag = tag[1],
.ops = switch (m) {
@@ -1475,17 +1484,9 @@ fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, imm: Immed
},
.data = .{ .x = .{
.fixes = tag[0],
- .payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) {
- .signed => |s| @bitCast(u32, s),
- .unsigned => |u| @intCast(u32, u),
- } }),
+ .payload = payload,
} },
});
- _ = switch (m) {
- .sib => try self.addExtra(Mir.MemorySib.encode(m)),
- .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
- else => unreachable,
- };
}
fn asmMemoryRegisterRegister(
@@ -1549,7 +1550,9 @@ fn gen(self: *Self) InnerError!void {
const backpatch_push_callee_preserved_regs = try self.asmPlaceholder();
try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp);
const backpatch_frame_align = try self.asmPlaceholder();
+ const backpatch_frame_align_extra = try self.asmPlaceholder();
const backpatch_stack_alloc = try self.asmPlaceholder();
+ const backpatch_stack_alloc_extra = try self.asmPlaceholder();
switch (self.ret_mcv.long) {
.none, .unreach => {},
@@ -1598,24 +1601,67 @@ fn gen(self: *Self) InnerError!void {
const need_stack_adjust = frame_layout.stack_adjust > 0;
const need_save_reg = frame_layout.save_reg_list.count() > 0;
if (need_frame_align) {
+ const page_align = @as(u32, math.maxInt(u32)) << 12;
self.mir_instructions.set(backpatch_frame_align, .{
.tag = .@"and",
.ops = .ri_s,
.data = .{ .ri = .{
.r1 = .rsp,
- .i = frame_layout.stack_mask,
+ .i = @max(frame_layout.stack_mask, page_align),
} },
});
+ if (frame_layout.stack_mask < page_align) {
+ self.mir_instructions.set(backpatch_frame_align_extra, .{
+ .tag = .pseudo,
+ .ops = .pseudo_probe_align_ri_s,
+ .data = .{ .ri = .{
+ .r1 = .rsp,
+ .i = ~frame_layout.stack_mask & page_align,
+ } },
+ });
+ }
}
if (need_stack_adjust) {
- self.mir_instructions.set(backpatch_stack_alloc, .{
- .tag = .sub,
- .ops = .ri_s,
- .data = .{ .ri = .{
- .r1 = .rsp,
- .i = frame_layout.stack_adjust,
- } },
- });
+ const page_size: u32 = 1 << 12;
+ if (frame_layout.stack_adjust <= page_size) {
+ self.mir_instructions.set(backpatch_stack_alloc, .{
+ .tag = .sub,
+ .ops = .ri_s,
+ .data = .{ .ri = .{
+ .r1 = .rsp,
+ .i = frame_layout.stack_adjust,
+ } },
+ });
+ } else if (frame_layout.stack_adjust <
+ page_size * Lower.pseudo_probe_adjust_unrolled_max_insts)
+ {
+ self.mir_instructions.set(backpatch_stack_alloc, .{
+ .tag = .pseudo,
+ .ops = .pseudo_probe_adjust_unrolled_ri_s,
+ .data = .{ .ri = .{
+ .r1 = .rsp,
+ .i = frame_layout.stack_adjust,
+ } },
+ });
+ } else {
+ self.mir_instructions.set(backpatch_stack_alloc, .{
+ .tag = .pseudo,
+ .ops = .pseudo_probe_adjust_setup_rri_s,
+ .data = .{ .rri = .{
+ .r1 = .rsp,
+ .r2 = .rax,
+ .i = frame_layout.stack_adjust,
+ } },
+ });
+ self.mir_instructions.set(backpatch_stack_alloc_extra, .{
+ .tag = .pseudo,
+ .ops = .pseudo_probe_adjust_loop_rr,
+ .data = .{ .rr = .{
+ .r1 = .rsp,
+ .r2 = .rax,
+ } },
+ });
+ }
}
if (need_frame_align or need_stack_adjust) {
self.mir_instructions.set(backpatch_stack_dealloc, .{
@@ -2215,11 +2261,11 @@ fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: b
},
.Vector => switch (ty.childType().zigTypeTag()) {
.Float => switch (ty.childType().floatBits(self.target.*)) {
- 16, 32, 64 => if (self.hasFeature(.avx)) 32 else 16,
- 80, 128 => break :need_mem,
+ 16, 32, 64, 128 => if (self.hasFeature(.avx)) 32 else 16,
+ 80 => break :need_mem,
else => unreachable,
},
- else => break :need_mem,
+ else => if (self.hasFeature(.avx)) 32 else 16,
},
else => 8,
})) {
@@ -2455,12 +2501,12 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
}
} else if (src_bits == 64 and dst_bits == 32) {
if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
- .{ .v_, .cvtsd2ss },
+ .{ .v_ss, .cvtsd2 },
dst_reg,
dst_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegisterRegister(
- .{ .v_, .cvtsd2ss },
+ .{ .v_ss, .cvtsd2 },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -2468,11 +2514,11 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ ._, .cvtsd2ss },
+ .{ ._ss, .cvtsd2 },
dst_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegister(
- .{ ._, .cvtsd2ss },
+ .{ ._ss, .cvtsd2 },
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -2506,22 +2552,22 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
src_mcv.getReg().?
else
try self.copyToTmpRegister(src_ty, src_mcv);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128());
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128());
switch (dst_bits) {
32 => {},
- 64 => try self.asmRegisterRegisterRegister(.{ .v_, .cvtss2sd }, dst_reg, dst_reg, dst_reg),
+ 64 => try self.asmRegisterRegisterRegister(.{ .v_sd, .cvtss2 }, dst_reg, dst_reg, dst_reg),
else => return self.fail("TODO implement airFpext from {} to {}", .{
src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
}),
}
} else if (src_bits == 32 and dst_bits == 64) {
if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
- .{ .v_, .cvtss2sd },
+ .{ .v_sd, .cvtss2 },
dst_reg,
dst_reg,
src_mcv.mem(.dword),
) else try self.asmRegisterRegisterRegister(
- .{ .v_, .cvtss2sd },
+ .{ .v_sd, .cvtss2 },
dst_reg,
dst_reg,
(if (src_mcv.isRegister())
@@ -2529,11 +2575,11 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
else
try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ ._, .cvtss2sd },
+ .{ ._sd, .cvtss2 },
dst_reg,
src_mcv.mem(.dword),
) else try self.asmRegisterRegister(
- .{ ._, .cvtss2sd },
+ .{ ._sd, .cvtss2 },
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -2754,8 +2800,10 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void {
const result = result: {
const tag = self.air.instructions.items(.tag)[inst];
const dst_ty = self.air.typeOfIndex(inst);
- if (dst_ty.zigTypeTag() == .Float)
- break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
+ switch (dst_ty.zigTypeTag()) {
+ .Float, .Vector => break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs),
+ else => {},
+ }
const dst_info = dst_ty.intInfo(self.target.*);
var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) {
@@ -3421,14 +3469,17 @@ fn airOptionalPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
}
- const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
+ const dst_mcv: MCValue = if (src_mcv.isRegister() and
+ self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
src_mcv
+ else if (self.liveness.isUnused(inst))
+ .{ .register = try self.copyToTmpRegister(dst_ty, src_mcv) }
else
try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
const pl_ty = dst_ty.childType();
const pl_abi_size = @intCast(i32, pl_ty.abiSize(self.target.*));
- try self.genSetMem(.{ .reg = dst_mcv.register }, pl_abi_size, Type.bool, .{ .immediate = 1 });
+ try self.genSetMem(.{ .reg = dst_mcv.getReg().? }, pl_abi_size, Type.bool, .{ .immediate = 1 });
break :result if (self.liveness.isUnused(inst)) .unreach else dst_mcv;
};
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
@@ -4177,9 +4228,18 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void {
const src_bits = src_ty.bitSize(self.target.*);
if (self.hasFeature(.lzcnt)) {
- if (src_bits <= 64) {
+ if (src_bits <= 8) {
+ const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
+ try self.truncateRegister(src_ty, wide_reg);
+ try self.genBinOpMir(.{ ._, .lzcnt }, Type.u32, dst_mcv, .{ .register = wide_reg });
+ try self.genBinOpMir(
+ .{ ._, .sub },
+ dst_ty,
+ dst_mcv,
+ .{ .immediate = 8 + self.regExtraBits(src_ty) },
+ );
+ } else if (src_bits <= 64) {
try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv);
-
const extra_bits = self.regExtraBits(src_ty);
if (extra_bits > 0) {
try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .immediate = extra_bits });
@@ -4218,7 +4278,17 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void {
const imm_reg = try self.copyToTmpRegister(dst_ty, .{
.immediate = src_bits ^ (src_bits - 1),
});
- try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv);
+ const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg);
+ defer self.register_manager.unlockReg(imm_lock);
+
+ if (src_bits <= 8) {
+ const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
+ const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
+ defer self.register_manager.unlockReg(wide_lock);
+
+ try self.truncateRegister(src_ty, wide_reg);
+ try self.genBinOpMir(.{ ._, .bsr }, Type.u16, dst_mcv, .{ .register = wide_reg });
+ } else try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv);
const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2);
try self.asmCmovccRegisterRegister(
@@ -4232,7 +4302,20 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void {
const imm_reg = try self.copyToTmpRegister(dst_ty, .{
.immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - self.regBitSize(dst_ty)),
});
- try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv);
+ const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg);
+ defer self.register_manager.unlockReg(imm_lock);
+
+ const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
+ const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
+ defer self.register_manager.unlockReg(wide_lock);
+
+ try self.truncateRegister(src_ty, wide_reg);
+ try self.genBinOpMir(
+ .{ ._, .bsr },
+ if (src_bits <= 8) Type.u16 else src_ty,
+ dst_mcv,
+ .{ .register = wide_reg },
+ );
const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2);
try self.asmCmovccRegisterRegister(
@@ -4274,24 +4357,25 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
if (self.hasFeature(.bmi)) {
if (src_bits <= 64) {
- const extra_bits = self.regExtraBits(src_ty);
+ const extra_bits = self.regExtraBits(src_ty) + @as(u64, if (src_bits <= 8) 8 else 0);
+ const wide_ty = if (src_bits <= 8) Type.u16 else src_ty;
const masked_mcv = if (extra_bits > 0) masked: {
const tmp_mcv = tmp: {
if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0))
break :tmp src_mcv;
- try self.genSetReg(dst_reg, src_ty, src_mcv);
+ try self.genSetReg(dst_reg, wide_ty, src_mcv);
break :tmp dst_mcv;
};
try self.genBinOpMir(
.{ ._, .@"or" },
- src_ty,
+ wide_ty,
tmp_mcv,
.{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - extra_bits)) <<
@intCast(u6, src_bits) },
);
break :masked tmp_mcv;
} else mat_src_mcv;
- try self.genBinOpMir(.{ ._, .tzcnt }, src_ty, dst_mcv, masked_mcv);
+ try self.genBinOpMir(.{ ._, .tzcnt }, wide_ty, dst_mcv, masked_mcv);
} else if (src_bits <= 128) {
const tmp_reg = try self.register_manager.allocReg(null, gp);
const tmp_mcv = MCValue{ .register = tmp_reg };
@@ -4320,7 +4404,17 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits });
- try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv);
+ const width_lock = self.register_manager.lockRegAssumeUnused(width_reg);
+ defer self.register_manager.unlockReg(width_lock);
+
+ if (src_bits <= 8 or !math.isPowerOfTwo(src_bits)) {
+ const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv);
+ const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg);
+ defer self.register_manager.unlockReg(wide_lock);
+
+ try self.truncateRegister(src_ty, wide_reg);
+ try self.genBinOpMir(.{ ._, .bsf }, Type.u16, dst_mcv, .{ .register = wide_reg });
+ } else try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv);
const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2);
try self.asmCmovccRegisterRegister(
@@ -4632,61 +4726,136 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
}
fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
+ const tag = self.air.instructions.items(.tag)[inst];
const un_op = self.air.instructions.items(.data)[inst].un_op;
const ty = self.air.typeOf(un_op);
- const ty_bits = ty.floatBits(self.target.*);
+ const abi_size: u32 = switch (ty.abiSize(self.target.*)) {
+ 1...16 => 16,
+ 17...32 => 32,
+ else => return self.fail("TODO implement airFloatSign for {}", .{
+ ty.fmt(self.bin_file.options.module.?),
+ }),
+ };
+ const scalar_bits = ty.scalarType().floatBits(self.target.*);
+
+ const src_mcv = try self.resolveInst(un_op);
+ const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
+ defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
+
+ const dst_mcv: MCValue = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
+ src_mcv
+ else if (self.hasFeature(.avx))
+ .{ .register = try self.register_manager.allocReg(inst, sse) }
+ else
+ try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_lock = self.register_manager.lockReg(dst_reg);
+ defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
var arena = std.heap.ArenaAllocator.init(self.gpa);
defer arena.deinit();
- const ExpectedContents = union {
- f16: Value.Payload.Float_16,
- f32: Value.Payload.Float_32,
- f64: Value.Payload.Float_64,
- f80: Value.Payload.Float_80,
- f128: Value.Payload.Float_128,
+ const ExpectedContents = struct {
+ scalar: union {
+ i64: Value.Payload.I64,
+ big: struct {
+ limbs: [
+ @max(
+ std.math.big.int.Managed.default_capacity,
+ std.math.big.int.calcTwosCompLimbCount(128),
+ )
+ ]std.math.big.Limb,
+ pl: Value.Payload.BigInt,
+ },
+ },
+ repeated: Value.Payload.SubValue,
};
var stack align(@alignOf(ExpectedContents)) =
std.heap.stackFallback(@sizeOf(ExpectedContents), arena.allocator());
+ var int_pl = Type.Payload.Bits{
+ .base = .{ .tag = .int_signed },
+ .data = scalar_bits,
+ };
var vec_pl = Type.Payload.Array{
.base = .{ .tag = .vector },
.data = .{
- .len = @divExact(128, ty_bits),
- .elem_type = ty,
+ .len = @divExact(abi_size * 8, scalar_bits),
+ .elem_type = Type.initPayload(&int_pl.base),
},
};
const vec_ty = Type.initPayload(&vec_pl.base);
-
- var sign_pl = Value.Payload.SubValue{
- .base = .{ .tag = .repeated },
- .data = try Value.floatToValue(-0.0, stack.get(), ty, self.target.*),
+ const sign_val = switch (tag) {
+ .neg => try vec_ty.minInt(stack.get(), self.target.*),
+ .fabs => try vec_ty.maxInt(stack.get(), self.target.*),
+ else => unreachable,
};
- const sign_val = Value.initPayload(&sign_pl.base);
const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val });
-
- const src_mcv = try self.resolveInst(un_op);
- const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv))
- src_mcv
+ const sign_mem = if (sign_mcv.isMemory())
+ sign_mcv.mem(Memory.PtrSize.fromSize(abi_size))
else
- try self.copyToRegisterWithInstTracking(inst, ty, src_mcv);
- const dst_lock = self.register_manager.lockReg(dst_mcv.register);
- defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
+ Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
+ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) },
+ });
- const tag = self.air.instructions.items(.tag)[inst];
- try self.genBinOpMir(switch (ty_bits) {
- // No point using an extra prefix byte for *pd which performs the same operation.
- 16, 32, 64, 128 => switch (tag) {
- .neg => .{ ._ps, .xor },
- .fabs => .{ ._ps, .andn },
+ if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory(
+ switch (scalar_bits) {
+ 16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) {
+ .neg => .{ .vp_, .xor },
+ .fabs => .{ .vp_, .@"and" },
+ else => unreachable,
+ } else switch (tag) {
+ .neg => .{ .v_ps, .xor },
+ .fabs => .{ .v_ps, .@"and" },
+ else => unreachable,
+ },
+ 32 => switch (tag) {
+ .neg => .{ .v_ps, .xor },
+ .fabs => .{ .v_ps, .@"and" },
+ else => unreachable,
+ },
+ 64 => switch (tag) {
+ .neg => .{ .v_pd, .xor },
+ .fabs => .{ .v_pd, .@"and" },
+ else => unreachable,
+ },
+ 80 => return self.fail("TODO implement airFloatSign for {}", .{
+ ty.fmt(self.bin_file.options.module.?),
+ }),
else => unreachable,
},
- 80 => return self.fail("TODO implement airFloatSign for {}", .{
- ty.fmt(self.bin_file.options.module.?),
- }),
- else => unreachable,
- }, vec_ty, dst_mcv, sign_mcv);
+ registerAlias(dst_reg, abi_size),
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(ty, src_mcv), abi_size),
+ sign_mem,
+ ) else try self.asmRegisterMemory(
+ switch (scalar_bits) {
+ 16, 128 => switch (tag) {
+ .neg => .{ .p_, .xor },
+ .fabs => .{ .p_, .@"and" },
+ else => unreachable,
+ },
+ 32 => switch (tag) {
+ .neg => .{ ._ps, .xor },
+ .fabs => .{ ._ps, .@"and" },
+ else => unreachable,
+ },
+ 64 => switch (tag) {
+ .neg => .{ ._pd, .xor },
+ .fabs => .{ ._pd, .@"and" },
+ else => unreachable,
+ },
+ 80 => return self.fail("TODO implement airFloatSign for {}", .{
+ ty.fmt(self.bin_file.options.module.?),
+ }),
+ else => unreachable,
+ },
+ registerAlias(dst_reg, abi_size),
+ sign_mem,
+ );
return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
}
@@ -4740,7 +4909,6 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4
})) |tag| tag else return self.fail("TODO implement genRound for {}", .{
ty.fmt(self.bin_file.options.module.?),
});
-
const abi_size = @intCast(u32, ty.abiSize(self.target.*));
const dst_alias = registerAlias(dst_reg, abi_size);
switch (mir_tag[0]) {
@@ -4799,7 +4967,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
src_mcv.getReg().?
else
try self.copyToTmpRegister(ty, src_mcv);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128());
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128());
try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg);
try self.asmRegisterRegisterImmediate(
.{ .v_, .cvtps2ph },
@@ -4819,7 +4987,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) {
1 => {
try self.asmRegisterRegister(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
dst_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -4843,13 +5011,13 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void {
2...8 => {
const wide_reg = registerAlias(dst_reg, abi_size * 2);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
wide_reg,
src_mcv.mem(Memory.PtrSize.fromSize(
@intCast(u32, @divExact(wide_reg.bitSize(), 16)),
)),
) else try self.asmRegisterRegister(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
wide_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -5256,64 +5424,24 @@ fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void {
fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue {
const ptr_field_ty = self.air.typeOfIndex(inst);
- const mcv = try self.resolveInst(operand);
const ptr_container_ty = self.air.typeOf(operand);
const container_ty = ptr_container_ty.childType();
- const field_offset = switch (container_ty.containerLayout()) {
- .Auto, .Extern => @intCast(u32, container_ty.structFieldOffset(index, self.target.*)),
+ const field_offset = @intCast(i32, switch (container_ty.containerLayout()) {
+ .Auto, .Extern => container_ty.structFieldOffset(index, self.target.*),
.Packed => if (container_ty.zigTypeTag() == .Struct and
ptr_field_ty.ptrInfo().data.host_size == 0)
container_ty.packedStructFieldByteOffset(index, self.target.*)
else
0,
- };
-
- const result: MCValue = result: {
- switch (mcv) {
- .load_frame, .lea_tlv, .load_tlv => {
- const offset_reg = try self.copyToTmpRegister(Type.usize, .{
- .immediate = field_offset,
- });
- const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
- defer self.register_manager.unlockReg(offset_reg_lock);
-
- const dst_mcv = try self.copyToRegisterWithInstTracking(inst, Type.usize, switch (mcv) {
- .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
- else => mcv,
- });
- try self.genBinOpMir(.{ ._, .add }, Type.usize, dst_mcv, .{ .register = offset_reg });
- break :result dst_mcv;
- },
- .indirect => |reg_off| break :result .{ .indirect = .{
- .reg = reg_off.reg,
- .off = reg_off.off + @intCast(i32, field_offset),
- } },
- .lea_frame => |frame_addr| break :result .{ .lea_frame = .{
- .index = frame_addr.index,
- .off = frame_addr.off + @intCast(i32, field_offset),
- } },
- .register, .register_offset => {
- const src_reg = mcv.getReg().?;
- const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
- defer self.register_manager.unlockReg(src_lock);
+ });
- const dst_mcv: MCValue = if (self.reuseOperand(inst, operand, 0, mcv))
- mcv
- else
- .{ .register = try self.copyToTmpRegister(ptr_field_ty, mcv) };
- break :result .{ .register_offset = .{
- .reg = dst_mcv.getReg().?,
- .off = switch (dst_mcv) {
- .register => 0,
- .register_offset => |reg_off| reg_off.off,
- else => unreachable,
- } + @intCast(i32, field_offset),
- } };
- },
- else => return self.fail("TODO implement fieldPtr for {}", .{mcv}),
- }
- };
- return result;
+ const src_mcv = try self.resolveInst(operand);
+ const dst_mcv = if (switch (src_mcv) {
+ .immediate, .lea_frame => true,
+ .register, .register_offset => self.reuseOperand(inst, operand, 0, src_mcv),
+ else => false,
+ }) src_mcv else try self.copyToRegisterWithInstTracking(inst, ptr_field_ty, src_mcv);
+ return dst_mcv.offset(field_offset);
}
fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
@@ -5324,8 +5452,11 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
const index = extra.field_index;
const container_ty = self.air.typeOf(operand);
+ const container_rc = regClassForType(container_ty);
const field_ty = container_ty.structFieldType(index);
if (!field_ty.hasRuntimeBitsIgnoreComptime()) break :result .none;
+ const field_rc = regClassForType(field_ty);
+ const field_is_gp = field_rc.supersetOf(gp);
const src_mcv = try self.resolveInst(operand);
const field_off = switch (container_ty.containerLayout()) {
@@ -5358,7 +5489,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
return self.fail("TODO implement struct_field_val with large packed field", .{});
}
- const dst_reg = try self.register_manager.allocReg(inst, gp);
+ const dst_reg = try self.register_manager.allocReg(if (field_is_gp) inst else null, gp);
const field_extra_bits = self.regExtraBits(field_ty);
const load_abi_size =
if (field_bit_off < field_extra_bits) field_abi_size else field_abi_size * 2;
@@ -5409,30 +5540,23 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
if (field_extra_bits > 0) try self.truncateRegister(field_ty, dst_reg);
const dst_mcv = MCValue{ .register = dst_reg };
- const dst_rc = regClassForType(field_ty);
- if (dst_rc.eql(gp)) break :result dst_mcv;
-
- const result_reg = try self.register_manager.allocReg(inst, dst_rc);
- try self.genSetReg(result_reg, field_ty, dst_mcv);
- break :result .{ .register = result_reg };
+ break :result if (field_is_gp)
+ dst_mcv
+ else
+ try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
},
.register => |reg| {
const reg_lock = self.register_manager.lockRegAssumeUnused(reg);
defer self.register_manager.unlockReg(reg_lock);
- const dst_mcv = if (self.reuseOperand(inst, operand, 0, src_mcv))
- src_mcv
+ const dst_reg = if (src_mcv.isRegister() and field_rc.supersetOf(container_rc) and
+ self.reuseOperand(inst, operand, 0, src_mcv))
+ src_mcv.getReg().?
else
- try self.copyToRegisterWithInstTracking(
- inst,
- Type.usize,
- .{ .register = reg.to64() },
- );
- const dst_mcv_lock: ?RegisterLock = switch (dst_mcv) {
- .register => |a_reg| self.register_manager.lockReg(a_reg),
- else => null,
- };
- defer if (dst_mcv_lock) |lock| self.register_manager.unlockReg(lock);
+ try self.copyToTmpRegister(Type.usize, .{ .register = reg.to64() });
+ const dst_mcv = MCValue{ .register = dst_reg };
+ const dst_lock = self.register_manager.lockReg(dst_reg);
+ defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
// Shift by struct_field_offset.
try self.genShiftBinOpMir(
@@ -5459,7 +5583,11 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
registerAlias(dst_mcv.register, field_byte_size),
);
}
- break :result dst_mcv;
+
+ break :result if (field_rc.supersetOf(gp))
+ dst_mcv
+ else
+ try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv);
},
.register_overflow => |ro| {
switch (index) {
@@ -6339,12 +6467,13 @@ fn genBinOp(
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ .v_ss, .add },
.sub => .{ .v_ss, .sub },
+ .mul => .{ .v_ss, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
.max => .{ .v_ss, .max },
.min => .{ .v_ss, .max },
@@ -6393,6 +6522,234 @@ fn genBinOp(
},
.Vector => switch (lhs_ty.childType().zigTypeTag()) {
else => null,
+ .Int => switch (lhs_ty.childType().intInfo(self.target.*).bits) {
+ 8 => switch (lhs_ty.vectorLen()) {
+ 1...16 => switch (air_tag) {
+ .add,
+ .addwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_b, .add } else .{ .p_b, .add },
+ .sub,
+ .subwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub },
+ .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
+ .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
+ .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
+ .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_b, .mins }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_b, .mins }
+ else
+ null,
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_b, .minu }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_b, .minu }
+ else
+ null,
+ },
+ .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_b, .maxs }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_b, .maxs }
+ else
+ null,
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_b, .maxu }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_b, .maxu }
+ else
+ null,
+ },
+ else => null,
+ },
+ 17...32 => switch (air_tag) {
+ .add,
+ .addwrap,
+ => if (self.hasFeature(.avx2)) .{ .vp_b, .add } else null,
+ .sub,
+ .subwrap,
+ => if (self.hasFeature(.avx2)) .{ .vp_b, .sub } else null,
+ .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
+ .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
+ .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
+ .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null,
+ .unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null,
+ },
+ .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null,
+ .unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null,
+ },
+ else => null,
+ },
+ else => null,
+ },
+ 16 => switch (lhs_ty.vectorLen()) {
+ 1...8 => switch (air_tag) {
+ .add,
+ .addwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_w, .add } else .{ .p_w, .add },
+ .sub,
+ .subwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub },
+ .mul,
+ .mulwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull },
+ .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
+ .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
+ .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
+ .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_w, .mins }
+ else
+ .{ .p_w, .mins },
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_w, .minu }
+ else
+ .{ .p_w, .minu },
+ },
+ .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_w, .maxs }
+ else
+ .{ .p_w, .maxs },
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_w, .maxu }
+ else
+ .{ .p_w, .maxu },
+ },
+ else => null,
+ },
+ 9...16 => switch (air_tag) {
+ .add,
+ .addwrap,
+ => if (self.hasFeature(.avx2)) .{ .vp_w, .add } else null,
+ .sub,
+ .subwrap,
+ => if (self.hasFeature(.avx2)) .{ .vp_w, .sub } else null,
+ .mul,
+ .mulwrap,
+ => if (self.hasFeature(.avx2)) .{ .vp_w, .mull } else null,
+ .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
+ .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
+ .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
+ .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null,
+ .unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null,
+ },
+ .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null,
+ .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null,
+ },
+ else => null,
+ },
+ else => null,
+ },
+ 32 => switch (lhs_ty.vectorLen()) {
+ 1...4 => switch (air_tag) {
+ .add,
+ .addwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_d, .add } else .{ .p_d, .add },
+ .sub,
+ .subwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub },
+ .mul,
+ .mulwrap,
+ => if (self.hasFeature(.avx))
+ .{ .vp_d, .mull }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_d, .mull }
+ else
+ null,
+ .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
+ .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
+ .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
+ .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_d, .mins }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_d, .mins }
+ else
+ null,
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_d, .minu }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_d, .minu }
+ else
+ null,
+ },
+ .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_d, .maxs }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_d, .maxs }
+ else
+ null,
+ .unsigned => if (self.hasFeature(.avx))
+ .{ .vp_d, .maxu }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_d, .maxu }
+ else
+ null,
+ },
+ else => null,
+ },
+ 5...8 => switch (air_tag) {
+ .add,
+ .addwrap,
+ => if (self.hasFeature(.avx2)) .{ .vp_d, .add } else null,
+ .sub,
+ .subwrap,
+ => if (self.hasFeature(.avx2)) .{ .vp_d, .sub } else null,
+ .mul,
+ .mulwrap,
+ => if (self.hasFeature(.avx2)) .{ .vp_d, .mull } else null,
+ .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
+ .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
+ .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
+ .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null,
+ .unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null,
+ },
+ .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) {
+ .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null,
+ .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null,
+ },
+ else => null,
+ },
+ else => null,
+ },
+ 64 => switch (lhs_ty.vectorLen()) {
+ 1...2 => switch (air_tag) {
+ .add,
+ .addwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_q, .add } else .{ .p_q, .add },
+ .sub,
+ .subwrap,
+ => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub },
+ .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
+ .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
+ .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
+ else => null,
+ },
+ 3...4 => switch (air_tag) {
+ .add,
+ .addwrap,
+ => if (self.hasFeature(.avx2)) .{ .vp_q, .add } else null,
+ .sub,
+ .subwrap,
+ => if (self.hasFeature(.avx2)) .{ .vp_q, .sub } else null,
+ .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
+ .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
+ .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
+ else => null,
+ },
+ else => null,
+ },
+ else => null,
+ },
.Float => switch (lhs_ty.childType().floatBits(self.target.*)) {
16 => if (self.hasFeature(.f16c)) switch (lhs_ty.vectorLen()) {
1 => {
@@ -6415,12 +6772,13 @@ fn genBinOp(
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg);
try self.asmRegisterRegisterRegister(
switch (air_tag) {
.add => .{ .v_ss, .add },
.sub => .{ .v_ss, .sub },
+ .mul => .{ .v_ss, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div },
.max => .{ .v_ss, .max },
.min => .{ .v_ss, .max },
@@ -6457,7 +6815,7 @@ fn genBinOp(
else
try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
try self.asmRegisterRegisterRegister(
.{ .v_ps, .movhl },
tmp_reg,
@@ -6468,6 +6826,7 @@ fn genBinOp(
switch (air_tag) {
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
+ .mul => .{ .v_ps, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .max },
@@ -6490,13 +6849,13 @@ fn genBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
tmp_reg,
src_mcv.mem(.qword),
) else try self.asmRegisterRegister(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -6507,6 +6866,7 @@ fn genBinOp(
switch (air_tag) {
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
+ .mul => .{ .v_ps, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .max },
@@ -6529,13 +6889,13 @@ fn genBinOp(
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg.to256(), dst_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), dst_reg);
if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
tmp_reg,
src_mcv.mem(.xword),
) else try self.asmRegisterRegister(
- .{ .v_, .cvtph2ps },
+ .{ .v_ps, .cvtph2 },
tmp_reg,
(if (src_mcv.isRegister())
src_mcv.getReg().?
@@ -6546,6 +6906,7 @@ fn genBinOp(
switch (air_tag) {
.add => .{ .v_ps, .add },
.sub => .{ .v_ps, .sub },
+ .mul => .{ .v_ps, .mul },
.div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div },
.max => .{ .v_ps, .max },
.min => .{ .v_ps, .max },
@@ -6681,7 +7042,7 @@ fn genBinOp(
);
}
switch (air_tag) {
- .add, .sub, .mul, .div_float, .div_exact => {},
+ .add, .addwrap, .sub, .subwrap, .mul, .mulwrap, .div_float, .div_exact => {},
.div_trunc, .div_floor => try self.genRound(
lhs_ty,
dst_reg,
@@ -6692,6 +7053,7 @@ fn genBinOp(
else => unreachable,
},
),
+ .bit_and, .bit_or, .xor => {},
.max, .min => {}, // TODO: unordered select
else => unreachable,
}
@@ -6712,7 +7074,6 @@ fn genBinOpMir(
.dead,
.undef,
.immediate,
- .register_offset,
.eflags,
.register_overflow,
.lea_direct,
@@ -6721,7 +7082,9 @@ fn genBinOpMir(
.lea_frame,
.reserved_frame,
=> unreachable, // unmodifiable destination
- .register => |dst_reg| {
+ .register, .register_offset => {
+ assert(dst_mcv.isRegister());
+ const dst_reg = dst_mcv.getReg().?;
const dst_alias = registerAlias(dst_reg, abi_size);
switch (src_mcv) {
.none,
@@ -7185,13 +7548,13 @@ fn genArgDbgInfo(self: Self, ty: Type, name: [:0]const u8, mcv: MCValue) !void {
switch (self.debug_output) {
.dwarf => |dw| {
const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) {
- .register => |reg| .{ .register = reg.dwarfLocOp() },
+ .register => |reg| .{ .register = reg.dwarfNum() },
// TODO use a frame index
.load_frame => return,
//.stack_offset => |off| .{
// .stack = .{
// // TODO handle -fomit-frame-pointer
- // .fp_register = Register.rbp.dwarfLocOpDeref(),
+ // .fp_register = Register.rbp.dwarfNum(),
// .offset = -off,
// },
//},
@@ -7223,11 +7586,11 @@ fn genVarDbgInfo(
switch (self.debug_output) {
.dwarf => |dw| {
const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) {
- .register => |reg| .{ .register = reg.dwarfLocOp() },
+ .register => |reg| .{ .register = reg.dwarfNum() },
// TODO use a frame index
.load_frame, .lea_frame => return,
//=> |off| .{ .stack = .{
- // .fp_register = Register.rbp.dwarfLocOpDeref(),
+ // .fp_register = Register.rbp.dwarfNum(),
// .offset = -off,
//} },
.memory => |address| .{ .memory = address },
@@ -7364,11 +7727,15 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
// on linking.
const mod = self.bin_file.options.module.?;
if (self.air.value(callee)) |func_value| {
- if (func_value.castTag(.function)) |func_payload| {
- const func = func_payload.data;
-
+ if (if (func_value.castTag(.function)) |func_payload|
+ func_payload.data.owner_decl
+ else if (func_value.castTag(.decl_ref)) |decl_ref_payload|
+ decl_ref_payload.data
+ else
+ null) |owner_decl|
+ {
if (self.bin_file.cast(link.File.Elf)) |elf_file| {
- const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl);
+ const atom_index = try elf_file.getOrCreateAtomForDecl(owner_decl);
const atom = elf_file.getAtom(atom_index);
_ = try atom.getOrCreateOffsetTableEntry(elf_file);
const got_addr = atom.getOffsetTableAddress(elf_file);
@@ -7377,17 +7744,17 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
.disp = @intCast(i32, got_addr),
}));
} else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
- const atom = try coff_file.getOrCreateAtomForDecl(func.owner_decl);
+ const atom = try coff_file.getOrCreateAtomForDecl(owner_decl);
const sym_index = coff_file.getAtom(atom).getSymbolIndex().?;
try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
try self.asmRegister(.{ ._, .call }, .rax);
} else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
- const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl);
+ const atom = try macho_file.getOrCreateAtomForDecl(owner_decl);
const sym_index = macho_file.getAtom(atom).getSymbolIndex().?;
try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
try self.asmRegister(.{ ._, .call }, .rax);
} else if (self.bin_file.cast(link.File.Plan9)) |p9| {
- const decl_block_index = try p9.seeDecl(func.owner_decl);
+ const decl_block_index = try p9.seeDecl(owner_decl);
const decl_block = p9.getDeclBlock(decl_block_index);
const ptr_bits = self.target.cpu.arch.ptrBitWidth();
const ptr_bytes: u64 = @divExact(ptr_bits, 8);
@@ -7577,7 +7944,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
else
try self.copyToTmpRegister(ty, src_mcv)).to128(),
);
- try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, tmp1_reg, tmp1_reg);
+ try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, tmp1_reg, tmp1_reg);
try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg);
try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv);
} else return self.fail("TODO implement airCmp for {}", .{
@@ -8568,47 +8935,190 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
return self.finishAirResult(inst, result);
}
-fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag {
+const MoveStrategy = union(enum) {
+ move: Mir.Inst.FixedTag,
+ insert_extract: InsertExtract,
+ vex_insert_extract: InsertExtract,
+
+ const InsertExtract = struct {
+ insert: Mir.Inst.FixedTag,
+ extract: Mir.Inst.FixedTag,
+ };
+};
+fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy {
switch (ty.zigTypeTag()) {
- else => return .{ ._, .mov },
+ else => return .{ .move = .{ ._, .mov } },
.Float => switch (ty.floatBits(self.target.*)) {
- 16 => unreachable, // needs special handling
- 32 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
- 64 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
- 128 => return if (self.hasFeature(.avx))
- if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
- else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
+ 16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
+ .insert = .{ .vp_w, .insr },
+ .extract = .{ .vp_w, .extr },
+ } } else .{ .insert_extract = .{
+ .insert = .{ .p_w, .insr },
+ .extract = .{ .p_w, .extr },
+ } },
+ 32 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov } },
+ 64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov } },
+ 128 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
else => {},
},
.Vector => switch (ty.childType().zigTypeTag()) {
+ .Int => switch (ty.childType().intInfo(self.target.*).bits) {
+ 8 => switch (ty.vectorLen()) {
+ 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
+ .insert = .{ .vp_b, .insr },
+ .extract = .{ .vp_b, .extr },
+ } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
+ .insert = .{ .p_b, .insr },
+ .extract = .{ .p_b, .extr },
+ } },
+ 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
+ .insert = .{ .vp_w, .insr },
+ .extract = .{ .vp_w, .extr },
+ } } else .{ .insert_extract = .{
+ .insert = .{ .p_w, .insr },
+ .extract = .{ .p_w, .extr },
+ } },
+ 3...4 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_d, .mov }
+ else
+ .{ ._d, .mov } },
+ 5...8 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_q, .mov }
+ else
+ .{ ._q, .mov } },
+ 9...16 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 17...32 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
+ else => {},
+ },
+ 16 => switch (ty.vectorLen()) {
+ 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
+ .insert = .{ .vp_w, .insr },
+ .extract = .{ .vp_w, .extr },
+ } } else .{ .insert_extract = .{
+ .insert = .{ .p_w, .insr },
+ .extract = .{ .p_w, .extr },
+ } },
+ 2 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_d, .mov }
+ else
+ .{ ._d, .mov } },
+ 3...4 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_q, .mov }
+ else
+ .{ ._q, .mov } },
+ 5...8 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 9...16 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
+ else => {},
+ },
+ 32 => switch (ty.vectorLen()) {
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_d, .mov }
+ else
+ .{ ._d, .mov } },
+ 2 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_q, .mov }
+ else
+ .{ ._q, .mov } },
+ 3...4 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 5...8 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
+ else => {},
+ },
+ 64 => switch (ty.vectorLen()) {
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_q, .mov }
+ else
+ .{ ._q, .mov } },
+ 2 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 3...4 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
+ else => {},
+ },
+ 128 => switch (ty.vectorLen()) {
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 2 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
+ else => {},
+ },
+ 256 => switch (ty.vectorLen()) {
+ 1 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
+ else => {},
+ },
+ else => {},
+ },
.Float => switch (ty.childType().floatBits(self.target.*)) {
16 => switch (ty.vectorLen()) {
- 1 => unreachable, // needs special handling
- 2 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
- 3...4 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
- 5...8 => return if (self.hasFeature(.avx))
- if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
- else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
+ 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
+ .insert = .{ .vp_w, .insr },
+ .extract = .{ .vp_w, .extr },
+ } } else .{ .insert_extract = .{
+ .insert = .{ .p_w, .insr },
+ .extract = .{ .p_w, .extr },
+ } },
+ 2 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_d, .mov }
+ else
+ .{ ._d, .mov } },
+ 3...4 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_q, .mov }
+ else
+ .{ ._q, .mov } },
+ 5...8 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
9...16 => if (self.hasFeature(.avx))
- return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
32 => switch (ty.vectorLen()) {
- 1 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov },
- 2...4 => return if (self.hasFeature(.avx))
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_ss, .mov }
+ else
+ .{ ._ss, .mov } },
+ 2 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_sd, .mov }
+ else
+ .{ ._sd, .mov } },
+ 3...4 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
- else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
+ else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
5...8 => if (self.hasFeature(.avx))
- return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
+ return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } },
else => {},
},
64 => switch (ty.vectorLen()) {
- 1 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov },
- 2 => return if (self.hasFeature(.avx))
- if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
- else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu },
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ .{ .v_sd, .mov }
+ else
+ .{ ._sd, .mov } },
+ 2 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
+ else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
3...4 => if (self.hasFeature(.avx))
- return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu },
+ return .{ .move = if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } },
+ else => {},
+ },
+ 128 => switch (ty.vectorLen()) {
+ 1 => return .{ .move = if (self.hasFeature(.avx))
+ if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
+ else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
+ 2 => if (self.hasFeature(.avx))
+ return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } },
else => {},
},
else => {},
@@ -8616,15 +9126,11 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag {
else => {},
},
}
- return self.fail("TODO movMirTag for {}", .{ty.fmt(self.bin_file.options.module.?)});
+ return self.fail("TODO moveStrategy for {}", .{ty.fmt(self.bin_file.options.module.?)});
}
fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
- const src_lock = switch (src_mcv) {
- .register => |reg| self.register_manager.lockReg(reg),
- .register_overflow => |ro| self.register_manager.lockReg(ro.reg),
- else => null,
- };
+ const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null;
defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
switch (dst_mcv) {
@@ -8720,34 +9226,94 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
);
}
},
- .register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister(
- if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point))
- switch (ty.zigTypeTag()) {
- else => .{ ._, .mov },
- .Float, .Vector => .{ ._ps, .mova },
- }
- else switch (abi_size) {
- 2 => return try self.asmRegisterRegisterImmediate(
- if (dst_reg.class() == .floating_point) .{ .p_w, .insr } else .{ .p_w, .extr },
- registerAlias(dst_reg, 4),
- registerAlias(src_reg, 4),
- Immediate.u(0),
+ .register => |src_reg| if (dst_reg.id() != src_reg.id()) switch (dst_reg.class()) {
+ .general_purpose => switch (src_reg.class()) {
+ .general_purpose => try self.asmRegisterRegister(
+ .{ ._, .mov },
+ registerAlias(dst_reg, abi_size),
+ registerAlias(src_reg, abi_size),
),
- 4 => .{ ._d, .mov },
- 8 => .{ ._q, .mov },
- else => return self.fail(
- "unsupported register copy from {s} to {s}",
- .{ @tagName(src_reg), @tagName(dst_reg) },
+ .segment => try self.asmRegisterRegister(
+ .{ ._, .mov },
+ registerAlias(dst_reg, abi_size),
+ src_reg,
),
+ .sse => try self.asmRegisterRegister(
+ switch (abi_size) {
+ 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
+ 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
+ else => unreachable,
+ },
+ registerAlias(dst_reg, @max(abi_size, 4)),
+ src_reg.to128(),
+ ),
+ .x87, .mmx => unreachable,
},
- registerAlias(dst_reg, abi_size),
- registerAlias(src_reg, abi_size),
- ),
+ .segment => try self.asmRegisterRegister(
+ .{ ._, .mov },
+ dst_reg,
+ switch (src_reg.class()) {
+ .general_purpose, .segment => registerAlias(src_reg, abi_size),
+ .sse => try self.copyToTmpRegister(ty, src_mcv),
+ .x87, .mmx => unreachable,
+ },
+ ),
+ .sse => switch (src_reg.class()) {
+ .general_purpose => try self.asmRegisterRegister(
+ switch (abi_size) {
+ 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
+ 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
+ else => unreachable,
+ },
+ dst_reg.to128(),
+ registerAlias(src_reg, @max(abi_size, 4)),
+ ),
+ .segment => try self.genSetReg(
+ dst_reg,
+ ty,
+ .{ .register = try self.copyToTmpRegister(ty, src_mcv) },
+ ),
+ .sse => try self.asmRegisterRegister(
+ if (@as(?Mir.Inst.FixedTag, switch (ty.scalarType().zigTypeTag()) {
+ else => switch (abi_size) {
+ 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
+ 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
+ 9...16 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa },
+ 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
+ else => null,
+ },
+ .Float => switch (ty.floatBits(self.target.*)) {
+ 16, 128 => switch (abi_size) {
+ 2...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
+ 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
+ 9...16 => if (self.hasFeature(.avx))
+ .{ .v_, .movdqa }
+ else
+ .{ ._, .movdqa },
+ 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null,
+ else => null,
+ },
+ 32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova },
+ 64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova },
+ 80 => null,
+ else => unreachable,
+ },
+ })) |tag| tag else return self.fail("TODO implement genSetReg for {}", .{
+ ty.fmt(self.bin_file.options.module.?),
+ }),
+ registerAlias(dst_reg, abi_size),
+ registerAlias(src_reg, abi_size),
+ ),
+ .x87, .mmx => unreachable,
+ },
+ .x87, .mmx => unreachable,
+ },
.register_offset,
.indirect,
.load_frame,
.lea_frame,
=> {
+ const dst_alias = registerAlias(dst_reg, abi_size);
const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
.register_offset, .indirect => |reg_off| .{
.base = .{ .reg = reg_off.reg },
@@ -8759,71 +9325,81 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
},
else => unreachable,
});
- if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
- try self.asmRegisterMemoryImmediate(
- .{ .p_w, .insr },
- registerAlias(dst_reg, abi_size),
+ switch (@as(MoveStrategy, switch (src_mcv) {
+ .register_offset => |reg_off| switch (reg_off.off) {
+ 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }),
+ else => .{ .move = .{ ._, .lea } },
+ },
+ .indirect => try self.moveStrategy(ty, false),
+ .load_frame => |frame_addr| try self.moveStrategy(
+ ty,
+ self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(self.target.*),
+ ),
+ .lea_frame => .{ .move = .{ ._, .lea } },
+ else => unreachable,
+ })) {
+ .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem),
+ .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
+ ie.insert,
+ dst_alias,
src_mem,
Immediate.u(0),
- )
- else
- try self.asmRegisterMemory(
- switch (src_mcv) {
- .register_offset => |reg_off| switch (reg_off.off) {
- 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }),
- else => .{ ._, .lea },
- },
- .indirect => try self.movMirTag(ty, false),
- .load_frame => |frame_addr| try self.movMirTag(
- ty,
- self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(self.target.*),
- ),
- .lea_frame => .{ ._, .lea },
- else => unreachable,
- },
- registerAlias(dst_reg, abi_size),
+ ),
+ .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
+ ie.insert,
+ dst_alias,
+ dst_alias,
src_mem,
- );
+ Immediate.u(0),
+ ),
+ }
},
.memory, .load_direct, .load_got, .load_tlv => {
switch (src_mcv) {
.memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr| {
+ const dst_alias = registerAlias(dst_reg, abi_size);
const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
.base = .{ .reg = .ds },
.disp = small_addr,
});
- return if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
- self.asmRegisterMemoryImmediate(
- .{ .p_w, .insr },
- registerAlias(dst_reg, abi_size),
+ switch (try self.moveStrategy(ty, mem.isAlignedGeneric(
+ u32,
+ @bitCast(u32, small_addr),
+ ty.abiAlignment(self.target.*),
+ ))) {
+ .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem),
+ .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
+ ie.insert,
+ dst_alias,
src_mem,
Immediate.u(0),
- )
- else
- self.asmRegisterMemory(
- try self.movMirTag(ty, mem.isAlignedGeneric(
- u32,
- @bitCast(u32, small_addr),
- ty.abiAlignment(self.target.*),
- )),
- registerAlias(dst_reg, abi_size),
+ ),
+ .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
+ ie.insert,
+ dst_alias,
+ dst_alias,
src_mem,
- );
+ Immediate.u(0),
+ ),
+ }
},
- .load_direct => |sym_index| if (!ty.isRuntimeFloat()) {
- const atom_index = try self.owner.getSymbolIndex(self);
- _ = try self.addInst(.{
- .tag = .mov,
- .ops = .direct_reloc,
- .data = .{ .rx = .{
- .r1 = dst_reg.to64(),
- .payload = try self.addExtra(Mir.Reloc{
- .atom_index = atom_index,
- .sym_index = sym_index,
- }),
- } },
- });
- return;
+ .load_direct => |sym_index| switch (ty.zigTypeTag()) {
+ else => {
+ const atom_index = try self.owner.getSymbolIndex(self);
+ _ = try self.addInst(.{
+ .tag = .mov,
+ .ops = .direct_reloc,
+ .data = .{ .rx = .{
+ .r1 = dst_reg.to64(),
+ .payload = try self.addExtra(Mir.Reloc{
+ .atom_index = atom_index,
+ .sym_index = sym_index,
+ }),
+ } },
+ });
+ return;
+ },
+ .Float, .Vector => {},
},
.load_got, .load_tlv => {},
else => unreachable,
@@ -8833,22 +9409,26 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr
const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
defer self.register_manager.unlockReg(addr_lock);
+ const dst_alias = registerAlias(dst_reg, abi_size);
const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
.base = .{ .reg = addr_reg },
});
- if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
- try self.asmRegisterMemoryImmediate(
- .{ .p_w, .insr },
- registerAlias(dst_reg, abi_size),
+ switch (try self.moveStrategy(ty, false)) {
+ .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem),
+ .insert_extract => |ie| try self.asmRegisterMemoryImmediate(
+ ie.insert,
+ dst_alias,
src_mem,
Immediate.u(0),
- )
- else
- try self.asmRegisterMemory(
- try self.movMirTag(ty, false),
- registerAlias(dst_reg, abi_size),
+ ),
+ .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate(
+ ie.insert,
+ dst_alias,
+ dst_alias,
src_mem,
- );
+ Immediate.u(0),
+ ),
+ }
},
.lea_direct, .lea_got => |sym_index| {
const atom_index = try self.owner.getSymbolIndex(self);
@@ -8950,36 +9530,33 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal
Memory.PtrSize.fromSize(abi_size),
.{ .base = base, .disp = disp },
);
- if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16)
- try self.asmMemoryRegisterImmediate(
- .{ .p_w, .extr },
+ const src_alias = registerAlias(src_reg, abi_size);
+ switch (try self.moveStrategy(ty, switch (base) {
+ .none => mem.isAlignedGeneric(
+ u32,
+ @bitCast(u32, disp),
+ ty.abiAlignment(self.target.*),
+ ),
+ .reg => |reg| switch (reg) {
+ .es, .cs, .ss, .ds => mem.isAlignedGeneric(
+ u32,
+ @bitCast(u32, disp),
+ ty.abiAlignment(self.target.*),
+ ),
+ else => false,
+ },
+ .frame => |frame_index| self.getFrameAddrAlignment(
+ .{ .index = frame_index, .off = disp },
+ ) >= ty.abiAlignment(self.target.*),
+ })) {
+ .move => |tag| try self.asmMemoryRegister(tag, dst_mem, src_alias),
+ .insert_extract, .vex_insert_extract => |ie| try self.asmMemoryRegisterImmediate(
+ ie.extract,
dst_mem,
- src_reg.to128(),
+ src_alias,
Immediate.u(0),
- )
- else
- try self.asmMemoryRegister(
- try self.movMirTag(ty, switch (base) {
- .none => mem.isAlignedGeneric(
- u32,
- @bitCast(u32, disp),
- ty.abiAlignment(self.target.*),
- ),
- .reg => |reg| switch (reg) {
- .es, .cs, .ss, .ds => mem.isAlignedGeneric(
- u32,
- @bitCast(u32, disp),
- ty.abiAlignment(self.target.*),
- ),
- else => false,
- },
- .frame => |frame_index| self.getFrameAddrAlignment(
- .{ .index = frame_index, .off = disp },
- ) >= ty.abiAlignment(self.target.*),
- }),
- dst_mem,
- registerAlias(src_reg, abi_size),
- );
+ ),
+ }
},
.register_overflow => |ro| {
try self.genSetMem(
@@ -9069,7 +9646,7 @@ fn genInlineMemcpyRegisterRegister(
try self.asmMemoryRegister(
switch (src_reg.class()) {
.general_purpose, .segment => .{ ._, .mov },
- .floating_point => .{ ._ss, .mov },
+ .sse => .{ ._ss, .mov },
},
Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }),
registerAlias(src_reg, abi_size),
@@ -9235,10 +9812,10 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void {
});
const src_mcv = try self.resolveInst(ty_op.operand);
- const src_reg = switch (src_mcv) {
- .register => |reg| reg,
- else => try self.copyToTmpRegister(src_ty, src_mcv),
- };
+ const src_reg = if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(src_ty, src_mcv);
const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
defer self.register_manager.unlockReg(src_lock);
@@ -9249,23 +9826,23 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void {
const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
defer self.register_manager.unlockReg(dst_lock);
- try self.asmRegisterRegister(switch (dst_ty.floatBits(self.target.*)) {
- 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse))
- .{ ._, .cvtsi2ss }
- else
- return self.fail("TODO implement airIntToFloat from {} to {} without sse", .{
- src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
- }),
- 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2))
- .{ ._, .cvtsi2sd }
- else
- return self.fail("TODO implement airIntToFloat from {} to {} without sse2", .{
- src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
- }),
- else => return self.fail("TODO implement airIntToFloat from {} to {}", .{
- src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
- }),
- }, dst_reg.to128(), registerAlias(src_reg, src_size));
+ const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (dst_ty.zigTypeTag()) {
+ .Float => switch (dst_ty.floatBits(self.target.*)) {
+ 32 => if (self.hasFeature(.avx)) .{ .v_ss, .cvtsi2 } else .{ ._ss, .cvtsi2 },
+ 64 => if (self.hasFeature(.avx)) .{ .v_sd, .cvtsi2 } else .{ ._sd, .cvtsi2 },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ else => null,
+ })) |tag| tag else return self.fail("TODO implement airIntToFloat from {} to {}", .{
+ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
+ });
+ const dst_alias = dst_reg.to128();
+ const src_alias = registerAlias(src_reg, src_size);
+ switch (mir_tag[0]) {
+ .v_ss, .v_sd => try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, src_alias),
+ else => try self.asmRegisterRegister(mir_tag, dst_alias, src_alias),
+ }
return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
}
@@ -9275,46 +9852,50 @@ fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void {
const src_ty = self.air.typeOf(ty_op.operand);
const dst_ty = self.air.typeOfIndex(inst);
- const operand = try self.resolveInst(ty_op.operand);
- const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*));
- const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
+ const dst_bits = @intCast(u32, dst_ty.bitSize(self.target.*));
+ const dst_signedness =
+ if (dst_ty.isAbiInt()) dst_ty.intInfo(self.target.*).signedness else .unsigned;
- switch (src_abi_size) {
- 4, 8 => {},
- else => |size| return self.fail("TODO load ST(0) with abiSize={}", .{size}),
- }
- if (dst_abi_size > 8) {
- return self.fail("TODO convert float with abiSize={}", .{dst_abi_size});
- }
+ const dst_size = std.math.divCeil(u32, @max(switch (dst_signedness) {
+ .signed => dst_bits,
+ .unsigned => dst_bits + 1,
+ }, 32), 8) catch unreachable;
+ if (dst_size > 8) return self.fail("TODO implement airFloatToInt from {} to {}", .{
+ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
+ });
- // move float src to ST(0)
- const frame_addr: FrameAddr = switch (operand) {
- .load_frame => |frame_addr| frame_addr,
- else => frame_addr: {
- const frame_index = try self.allocFrameIndex(FrameAlloc.initType(src_ty, self.target.*));
- try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, operand);
- break :frame_addr .{ .index = frame_index };
- },
- };
- try self.asmMemory(
- .{ .f_, .ld },
- Memory.sib(Memory.PtrSize.fromSize(src_abi_size), .{
- .base = .{ .frame = frame_addr.index },
- .disp = frame_addr.off,
- }),
- );
+ const src_mcv = try self.resolveInst(ty_op.operand);
+ const src_reg = if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(src_ty, src_mcv);
+ const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
+ defer self.register_manager.unlockReg(src_lock);
+
+ const dst_reg = try self.register_manager.allocReg(inst, regClassForType(dst_ty));
+ const dst_mcv = MCValue{ .register = dst_reg };
+ const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
+ defer self.register_manager.unlockReg(dst_lock);
- // convert
- const stack_dst = try self.allocRegOrMem(inst, false);
- try self.asmMemory(
- .{ .f_p, .istt },
- Memory.sib(Memory.PtrSize.fromSize(dst_abi_size), .{
- .base = .{ .frame = stack_dst.load_frame.index },
- .disp = stack_dst.load_frame.off,
+ try self.asmRegisterRegister(
+ if (@as(?Mir.Inst.FixedTag, switch (src_ty.zigTypeTag()) {
+ .Float => switch (src_ty.floatBits(self.target.*)) {
+ 32 => if (self.hasFeature(.avx)) .{ .v_, .cvttss2si } else .{ ._, .cvttss2si },
+ 64 => if (self.hasFeature(.avx)) .{ .v_, .cvttsd2si } else .{ ._, .cvttsd2si },
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ else => null,
+ })) |tag| tag else return self.fail("TODO implement airFloatToInt from {} to {}", .{
+ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?),
}),
+ registerAlias(dst_reg, dst_size),
+ src_reg.to128(),
);
- return self.finishAir(inst, stack_dst, .{ ty_op.operand, .none, .none });
+ if (dst_bits < dst_size * 8) try self.truncateRegister(dst_ty, dst_reg);
+
+ return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
}
fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void {
@@ -9977,9 +10558,200 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void {
fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
- _ = ty_op;
- return self.fail("TODO implement airSplat for x86_64", .{});
- //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+ const vector_ty = self.air.typeOfIndex(inst);
+ const dst_rc = regClassForType(vector_ty);
+ const scalar_ty = vector_ty.scalarType();
+
+ const src_mcv = try self.resolveInst(ty_op.operand);
+ const result: MCValue = result: {
+ switch (scalar_ty.zigTypeTag()) {
+ else => {},
+ .Float => switch (scalar_ty.floatBits(self.target.*)) {
+ 32 => switch (vector_ty.vectorLen()) {
+ 1 => {
+ if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ try self.genSetReg(dst_reg, scalar_ty, src_mcv);
+ break :result .{ .register = dst_reg };
+ },
+ 2...4 => {
+ if (self.hasFeature(.avx)) {
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ .v_ss, .broadcast },
+ dst_reg.to128(),
+ src_mcv.mem(.dword),
+ ) else {
+ const src_reg = if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(scalar_ty, src_mcv);
+ try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_ps, .shuf },
+ dst_reg.to128(),
+ src_reg.to128(),
+ src_reg.to128(),
+ Immediate.u(0),
+ );
+ }
+ break :result .{ .register = dst_reg };
+ } else {
+ const dst_mcv = if (src_mcv.isRegister() and
+ self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
+ src_mcv
+ else
+ try self.copyToRegisterWithInstTracking(inst, scalar_ty, src_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+ try self.asmRegisterRegisterImmediate(
+ .{ ._ps, .shuf },
+ dst_reg.to128(),
+ dst_reg.to128(),
+ Immediate.u(0),
+ );
+ break :result dst_mcv;
+ }
+ },
+ 5...8 => if (self.hasFeature(.avx)) {
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ .v_ss, .broadcast },
+ dst_reg.to256(),
+ src_mcv.mem(.dword),
+ ) else {
+ const src_reg = if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(scalar_ty, src_mcv);
+ if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
+ .{ .v_ss, .broadcast },
+ dst_reg.to256(),
+ src_reg.to128(),
+ ) else {
+ try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_ps, .shuf },
+ dst_reg.to128(),
+ src_reg.to128(),
+ src_reg.to128(),
+ Immediate.u(0),
+ );
+ try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_f128, .insert },
+ dst_reg.to256(),
+ dst_reg.to256(),
+ dst_reg.to128(),
+ Immediate.u(1),
+ );
+ }
+ }
+ break :result .{ .register = dst_reg };
+ },
+ else => {},
+ },
+ 64 => switch (vector_ty.vectorLen()) {
+ 1 => {
+ if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ try self.genSetReg(dst_reg, scalar_ty, src_mcv);
+ break :result .{ .register = dst_reg };
+ },
+ 2 => {
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ if (self.hasFeature(.sse3)) {
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
+ dst_reg.to128(),
+ src_mcv.mem(.qword),
+ ) else try self.asmRegisterRegister(
+ if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup },
+ dst_reg.to128(),
+ (if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
+ );
+ break :result .{ .register = dst_reg };
+ } else try self.asmRegisterRegister(
+ .{ ._ps, .movlh },
+ dst_reg.to128(),
+ (if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(),
+ );
+ },
+ 3...4 => if (self.hasFeature(.avx)) {
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ .v_sd, .broadcast },
+ dst_reg.to256(),
+ src_mcv.mem(.qword),
+ ) else {
+ const src_reg = if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(scalar_ty, src_mcv);
+ if (self.hasFeature(.avx2)) try self.asmRegisterRegister(
+ .{ .v_sd, .broadcast },
+ dst_reg.to256(),
+ src_reg.to128(),
+ ) else {
+ try self.asmRegisterRegister(
+ .{ .v_, .movddup },
+ dst_reg.to128(),
+ src_reg.to128(),
+ );
+ try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_f128, .insert },
+ dst_reg.to256(),
+ dst_reg.to256(),
+ dst_reg.to128(),
+ Immediate.u(1),
+ );
+ }
+ }
+ break :result .{ .register = dst_reg };
+ },
+ else => {},
+ },
+ 128 => switch (vector_ty.vectorLen()) {
+ 1 => {
+ if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv;
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ try self.genSetReg(dst_reg, scalar_ty, src_mcv);
+ break :result .{ .register = dst_reg };
+ },
+ 2 => if (self.hasFeature(.avx)) {
+ const dst_reg = try self.register_manager.allocReg(inst, dst_rc);
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ .v_f128, .broadcast },
+ dst_reg.to256(),
+ src_mcv.mem(.xword),
+ ) else {
+ const src_reg = if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(scalar_ty, src_mcv);
+ try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_f128, .insert },
+ dst_reg.to256(),
+ src_reg.to256(),
+ src_reg.to128(),
+ Immediate.u(1),
+ );
+ }
+ break :result .{ .register = dst_reg };
+ },
+ else => {},
+ },
+ 16, 80 => {},
+ else => unreachable,
+ },
+ }
+ return self.fail("TODO implement airSplat for {}", .{
+ vector_ty.fmt(self.bin_file.options.module.?),
+ });
+ };
+ return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
}
fn airSelect(self: *Self, inst: Air.Inst.Index) !void {
@@ -10142,9 +10914,46 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void {
const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
const extra = self.air.extraData(Air.UnionInit, ty_pl.payload).data;
- _ = extra;
- return self.fail("TODO implement airUnionInit for x86_64", .{});
- //return self.finishAir(inst, result, .{ extra.init, .none, .none });
+ const result: MCValue = result: {
+ const union_ty = self.air.typeOfIndex(inst);
+ const layout = union_ty.unionGetLayout(self.target.*);
+
+ const src_ty = self.air.typeOf(extra.init);
+ const src_mcv = try self.resolveInst(extra.init);
+ if (layout.tag_size == 0) {
+ if (self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv;
+
+ const dst_mcv = try self.allocRegOrMem(inst, true);
+ try self.genCopy(union_ty, dst_mcv, src_mcv);
+ break :result dst_mcv;
+ }
+
+ const dst_mcv = try self.allocRegOrMem(inst, false);
+
+ const union_obj = union_ty.cast(Type.Payload.Union).?.data;
+ const field_name = union_obj.fields.keys()[extra.field_index];
+ const tag_ty = union_ty.unionTagTypeSafety().?;
+ const field_index = @intCast(u32, tag_ty.enumFieldIndex(field_name).?);
+ var tag_pl = Value.Payload.U32{ .base = .{ .tag = .enum_field_index }, .data = field_index };
+ const tag_val = Value.initPayload(&tag_pl.base);
+ var tag_int_pl: Value.Payload.U64 = undefined;
+ const tag_int_val = tag_val.enumToInt(tag_ty, &tag_int_pl);
+ const tag_int = tag_int_val.toUnsignedInt(self.target.*);
+ const tag_off = if (layout.tag_align < layout.payload_align)
+ @intCast(i32, layout.payload_size)
+ else
+ 0;
+ try self.genCopy(tag_ty, dst_mcv.address().offset(tag_off).deref(), .{ .immediate = tag_int });
+
+ const pl_off = if (layout.tag_align < layout.payload_align)
+ 0
+ else
+ @intCast(i32, layout.tag_size);
+ try self.genCopy(src_ty, dst_mcv.address().offset(pl_off).deref(), src_mcv);
+
+ break :result dst_mcv;
+ };
+ return self.finishAir(inst, result, .{ extra.init, .none, .none });
}
fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void {
@@ -10616,13 +11425,21 @@ fn registerAlias(reg: Register, size_bytes: u32) Register {
reg.to64()
else
unreachable,
- .floating_point => if (size_bytes <= 16)
+ .segment => if (size_bytes <= 2)
+ reg
+ else
+ unreachable,
+ .x87 => unreachable,
+ .mmx => if (size_bytes <= 8)
+ reg
+ else
+ unreachable,
+ .sse => if (size_bytes <= 16)
reg.to128()
else if (size_bytes <= 32)
reg.to256()
else
unreachable,
- .segment => unreachable,
};
}
diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig
@@ -233,7 +233,6 @@ pub const Mnemonic = enum {
cmpxchg, cmpxchg8b, cmpxchg16b,
cqo, cwd, cwde,
div,
- fisttp, fld,
idiv, imul, int3,
ja, jae, jb, jbe, jc, jrcxz, je, jg, jge, jl, jle, jna, jnae, jnb, jnbe,
jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, js, jz,
@@ -259,21 +258,30 @@ pub const Mnemonic = enum {
@"test", tzcnt,
ud2,
xadd, xchg, xor,
+ // X87
+ fisttp, fld,
// MMX
- movd,
+ movd, movq,
+ paddb, paddd, paddq, paddsb, paddsw, paddusb, paddusw, paddw,
+ pand, pandn, por, pxor,
+ pmulhw, pmullw,
+ psubb, psubd, psubq, psubsb, psubsw, psubusb, psubusw, psubw,
// SSE
addps, addss,
andps,
andnps,
cmpss,
- cvtsi2ss,
+ cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si,
divps, divss,
maxps, maxss,
minps, minss,
- movaps, movhlps, movss, movups,
+ movaps, movhlps, movlhps,
+ movss, movups,
mulps, mulss,
orps,
pextrw, pinsrw,
+ pmaxsw, pmaxub, pminsw, pminub,
+ shufps,
sqrtps, sqrtss,
subps, subss,
ucomiss,
@@ -283,12 +291,15 @@ pub const Mnemonic = enum {
andpd,
andnpd,
//cmpsd,
- cvtsd2ss, cvtsi2sd, cvtss2sd,
+ cvtdq2pd, cvtdq2ps, cvtpd2dq, cvtpd2pi, cvtpd2ps, cvtpi2pd,
+ cvtps2dq, cvtps2pd, cvtsd2si, cvtsd2ss, cvtsi2sd, cvtss2sd,
+ cvttpd2dq, cvttpd2pi, cvttps2dq, cvttsd2si,
divpd, divsd,
maxpd, maxsd,
minpd, minsd,
movapd,
- movq, //movd, movsd,
+ movdqa, movdqu,
+ //movsd,
movupd,
mulpd, mulsd,
orpd,
@@ -296,6 +307,7 @@ pub const Mnemonic = enum {
psrld, psrlq, psrlw,
punpckhbw, punpckhdq, punpckhqdq, punpckhwd,
punpcklbw, punpckldq, punpcklqdq, punpcklwd,
+ shufpd,
sqrtpd, sqrtsd,
subpd, subsd,
ucomisd,
@@ -303,31 +315,57 @@ pub const Mnemonic = enum {
// SSE3
movddup, movshdup, movsldup,
// SSE4.1
+ extractps,
+ insertps,
pextrb, pextrd, pextrq,
pinsrb, pinsrd, pinsrq,
+ pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw,
+ pmulld,
roundpd, roundps, roundsd, roundss,
// AVX
vaddpd, vaddps, vaddsd, vaddss,
- vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd,
+ vandnpd, vandnps, vandpd, vandps,
+ vbroadcastf128, vbroadcastsd, vbroadcastss,
+ vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps,
+ vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss,
+ vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vcvtss2si,
+ vcvttpd2dq, vcvttps2dq, vcvttsd2si, vcvttss2si,
vdivpd, vdivps, vdivsd, vdivss,
+ vextractf128, vextractps,
+ vinsertf128, vinsertps,
vmaxpd, vmaxps, vmaxsd, vmaxss,
vminpd, vminps, vminsd, vminss,
vmovapd, vmovaps,
- vmovddup, vmovhlps,
+ vmovd,
+ vmovddup,
+ vmovdqa, vmovdqu,
+ vmovhlps, vmovlhps,
+ vmovq,
vmovsd,
vmovshdup, vmovsldup,
vmovss,
vmovupd, vmovups,
vmulpd, vmulps, vmulsd, vmulss,
+ vorpd, vorps,
+ vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw,
+ vpand, vpandn,
vpextrb, vpextrd, vpextrq, vpextrw,
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
+ vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw,
+ vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw,
+ vpmulhw, vpmulld, vpmullw,
+ vpor,
vpshufhw, vpshuflw,
vpsrld, vpsrlq, vpsrlw,
+ vpsubb, vpsubd, vpsubq, vpsubsb, vpsubsw, vpsubusb, vpsubusw, vpsubw,
vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd,
vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd,
+ vpxor,
vroundpd, vroundps, vroundsd, vroundss,
+ vshufpd, vshufps,
vsqrtpd, vsqrtps, vsqrtsd, vsqrtss,
vsubpd, vsubps, vsubsd, vsubss,
+ vxorpd, vxorps,
// F16C
vcvtph2ps, vcvtps2ph,
// FMA
@@ -368,80 +406,84 @@ pub const Op = enum {
m,
moffs,
sreg,
+ st, mm, mm_m64,
xmm, xmm_m32, xmm_m64, xmm_m128,
ymm, ymm_m256,
// zig fmt: on
pub fn fromOperand(operand: Instruction.Operand) Op {
- switch (operand) {
- .none => return .none,
-
- .reg => |reg| {
- switch (reg.class()) {
- .segment => return .sreg,
- .floating_point => return switch (reg.bitSize()) {
- 128 => .xmm,
- 256 => .ymm,
+ return switch (operand) {
+ .none => .none,
+
+ .reg => |reg| switch (reg.class()) {
+ .general_purpose => if (reg.to64() == .rax)
+ switch (reg) {
+ .al => .al,
+ .ax => .ax,
+ .eax => .eax,
+ .rax => .rax,
else => unreachable,
- },
- .general_purpose => {
- if (reg.to64() == .rax) return switch (reg) {
- .al => .al,
- .ax => .ax,
- .eax => .eax,
- .rax => .rax,
- else => unreachable,
- };
- if (reg == .cl) return .cl;
- return switch (reg.bitSize()) {
- 8 => .r8,
- 16 => .r16,
- 32 => .r32,
- 64 => .r64,
- else => unreachable,
- };
- },
- }
+ }
+ else if (reg == .cl)
+ .cl
+ else switch (reg.bitSize()) {
+ 8 => .r8,
+ 16 => .r16,
+ 32 => .r32,
+ 64 => .r64,
+ else => unreachable,
+ },
+ .segment => .sreg,
+ .x87 => .st,
+ .mmx => .mm,
+ .sse => switch (reg.bitSize()) {
+ 128 => .xmm,
+ 256 => .ymm,
+ else => unreachable,
+ },
},
.mem => |mem| switch (mem) {
- .moffs => return .moffs,
- .sib, .rip => {
- const bit_size = mem.bitSize();
- return switch (bit_size) {
- 8 => .m8,
- 16 => .m16,
- 32 => .m32,
- 64 => .m64,
- 80 => .m80,
- 128 => .m128,
- 256 => .m256,
- else => unreachable,
- };
+ .moffs => .moffs,
+ .sib, .rip => switch (mem.bitSize()) {
+ 8 => .m8,
+ 16 => .m16,
+ 32 => .m32,
+ 64 => .m64,
+ 80 => .m80,
+ 128 => .m128,
+ 256 => .m256,
+ else => unreachable,
},
},
- .imm => |imm| {
- switch (imm) {
- .signed => |x| {
- if (x == 1) return .unity;
- if (math.cast(i8, x)) |_| return .imm8s;
- if (math.cast(i16, x)) |_| return .imm16s;
- return .imm32s;
- },
- .unsigned => |x| {
- if (x == 1) return .unity;
- if (math.cast(i8, x)) |_| return .imm8s;
- if (math.cast(u8, x)) |_| return .imm8;
- if (math.cast(i16, x)) |_| return .imm16s;
- if (math.cast(u16, x)) |_| return .imm16;
- if (math.cast(i32, x)) |_| return .imm32s;
- if (math.cast(u32, x)) |_| return .imm32;
- return .imm64;
- },
- }
+ .imm => |imm| switch (imm) {
+ .signed => |x| if (x == 1)
+ .unity
+ else if (math.cast(i8, x)) |_|
+ .imm8s
+ else if (math.cast(i16, x)) |_|
+ .imm16s
+ else
+ .imm32s,
+ .unsigned => |x| if (x == 1)
+ .unity
+ else if (math.cast(i8, x)) |_|
+ .imm8s
+ else if (math.cast(u8, x)) |_|
+ .imm8
+ else if (math.cast(i16, x)) |_|
+ .imm16s
+ else if (math.cast(u16, x)) |_|
+ .imm16
+ else if (math.cast(i32, x)) |_|
+ .imm32s
+ else if (math.cast(u32, x)) |_|
+ .imm32
+ else
+ .imm64,
},
- }
+ };
}
pub fn immBitSize(op: Op) u64 {
@@ -451,6 +493,7 @@ pub const Op = enum {
.ax, .r16, .rm16 => unreachable,
.eax, .r32, .rm32, .r32_m16 => unreachable,
.rax, .r64, .rm64, .r64_m16 => unreachable,
+ .st, .mm, .mm_m64 => unreachable,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable,
.ymm, .ymm_m256 => unreachable,
.m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable,
@@ -471,7 +514,8 @@ pub const Op = enum {
.al, .cl, .r8, .rm8 => 8,
.ax, .r16, .rm16 => 16,
.eax, .r32, .rm32, .r32_m8, .r32_m16 => 32,
- .rax, .r64, .rm64, .r64_m16 => 64,
+ .rax, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64,
+ .st => 80,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128,
.ymm, .ymm_m256 => 256,
};
@@ -482,11 +526,11 @@ pub const Op = enum {
.none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable,
.unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
.rel8, .rel16, .rel32 => unreachable,
- .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .xmm, .ymm => unreachable,
+ .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .st, .mm, .xmm, .ymm => unreachable,
.m8, .rm8, .r32_m8 => 8,
.m16, .rm16, .r32_m16, .r64_m16 => 16,
.m32, .rm32, .xmm_m32 => 32,
- .m64, .rm64, .xmm_m64 => 64,
+ .m64, .rm64, .mm_m64, .xmm_m64 => 64,
.m80 => 80,
.m128, .xmm_m128 => 128,
.m256, .ymm_m256 => 256,
@@ -513,6 +557,7 @@ pub const Op = enum {
.r8, .r16, .r32, .r64,
.rm8, .rm16, .rm32, .rm64,
.r32_m8, .r32_m16, .r64_m16,
+ .st, .mm, .mm_m64,
.xmm, .xmm_m32, .xmm_m64, .xmm_m128,
.ymm, .ymm_m256,
=> true,
@@ -541,6 +586,7 @@ pub const Op = enum {
.r32_m8, .r32_m16, .r64_m16,
.m8, .m16, .m32, .m64, .m80, .m128, .m256,
.m,
+ .mm_m64,
.xmm_m32, .xmm_m64, .xmm_m128,
.ymm_m256,
=> true,
@@ -564,8 +610,10 @@ pub const Op = enum {
.rm8, .rm16, .rm32, .rm64 => .general_purpose,
.r32_m8, .r32_m16, .r64_m16 => .general_purpose,
.sreg => .segment,
- .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point,
- .ymm, .ymm_m256 => .floating_point,
+ .st => .x87,
+ .mm, .mm_m64 => .mmx,
+ .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse,
+ .ymm, .ymm_m256 => .sse,
};
}
@@ -682,8 +730,12 @@ pub const Feature = enum {
none,
avx,
avx2,
+ bmi,
f16c,
fma,
+ lzcnt,
+ movbe,
+ popcnt,
sse,
sse2,
sse3,
@@ -705,7 +757,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op
}
const mnemonic_to_encodings_map = init: {
- @setEvalBranchQuota(20_000);
+ @setEvalBranchQuota(30_000);
const encodings = @import("encodings.zig");
var entries = encodings.table;
std.sort.sort(encodings.Entry, &entries, {}, struct {
diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig
@@ -9,19 +9,33 @@ result_insts_len: u8 = undefined,
result_relocs_len: u8 = undefined,
result_insts: [
std.mem.max(usize, &.{
+ 1, // non-pseudo instructions
2, // cmovcc: cmovcc \ cmovcc
3, // setcc: setcc \ setcc \ logicop
2, // jcc: jcc \ jcc
+ pseudo_probe_align_insts,
+ pseudo_probe_adjust_unrolled_max_insts,
+ pseudo_probe_adjust_setup_insts,
+ pseudo_probe_adjust_loop_insts,
abi.Win64.callee_preserved_regs.len, // push_regs/pop_regs
abi.SysV.callee_preserved_regs.len, // push_regs/pop_regs
})
]Instruction = undefined,
result_relocs: [
std.mem.max(usize, &.{
+ 1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea
2, // jcc: jcc \ jcc
+ 2, // test \ jcc \ probe \ sub \ jmp
+ 1, // probe \ sub \ jcc
})
]Reloc = undefined,
+pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp
+pub const pseudo_probe_adjust_unrolled_max_insts =
+ pseudo_probe_adjust_setup_insts + pseudo_probe_adjust_loop_insts;
+pub const pseudo_probe_adjust_setup_insts = 2; // mov \ sub
+pub const pseudo_probe_adjust_loop_insts = 3; // probe \ sub \ jcc
+
pub const Error = error{
OutOfMemory,
LowerFail,
@@ -62,6 +76,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
else => try lower.generic(inst),
.pseudo => switch (inst.ops) {
.pseudo_cmov_z_and_np_rr => {
+ assert(inst.data.rr.fixes == ._);
try lower.emit(.none, .cmovnz, &.{
.{ .reg = inst.data.rr.r2 },
.{ .reg = inst.data.rr.r1 },
@@ -72,6 +87,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_cmov_nz_or_p_rr => {
+ assert(inst.data.rr.fixes == ._);
try lower.emit(.none, .cmovnz, &.{
.{ .reg = inst.data.rr.r1 },
.{ .reg = inst.data.rr.r2 },
@@ -84,6 +100,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_cmov_nz_or_p_rm_sib,
.pseudo_cmov_nz_or_p_rm_rip,
=> {
+ assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .cmovnz, &.{
.{ .reg = inst.data.rx.r1 },
.{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
@@ -94,58 +111,63 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_set_z_and_np_r => {
+ assert(inst.data.rr.fixes == ._);
try lower.emit(.none, .setz, &.{
- .{ .reg = inst.data.r_scratch.r1 },
+ .{ .reg = inst.data.rr.r1 },
});
try lower.emit(.none, .setnp, &.{
- .{ .reg = inst.data.r_scratch.scratch_reg },
+ .{ .reg = inst.data.rr.r2 },
});
try lower.emit(.none, .@"and", &.{
- .{ .reg = inst.data.r_scratch.r1 },
- .{ .reg = inst.data.r_scratch.scratch_reg },
+ .{ .reg = inst.data.rr.r1 },
+ .{ .reg = inst.data.rr.r2 },
});
},
.pseudo_set_z_and_np_m_sib,
.pseudo_set_z_and_np_m_rip,
=> {
+ assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .setz, &.{
- .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) },
+ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
});
try lower.emit(.none, .setnp, &.{
- .{ .reg = inst.data.x_scratch.scratch_reg },
+ .{ .reg = inst.data.rx.r1 },
});
try lower.emit(.none, .@"and", &.{
- .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) },
- .{ .reg = inst.data.x_scratch.scratch_reg },
+ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
+ .{ .reg = inst.data.rx.r1 },
});
},
.pseudo_set_nz_or_p_r => {
+ assert(inst.data.rr.fixes == ._);
try lower.emit(.none, .setnz, &.{
- .{ .reg = inst.data.r_scratch.r1 },
+ .{ .reg = inst.data.rr.r1 },
});
try lower.emit(.none, .setp, &.{
- .{ .reg = inst.data.r_scratch.scratch_reg },
+ .{ .reg = inst.data.rr.r2 },
});
try lower.emit(.none, .@"or", &.{
- .{ .reg = inst.data.r_scratch.r1 },
- .{ .reg = inst.data.r_scratch.scratch_reg },
+ .{ .reg = inst.data.rr.r1 },
+ .{ .reg = inst.data.rr.r2 },
});
},
.pseudo_set_nz_or_p_m_sib,
.pseudo_set_nz_or_p_m_rip,
=> {
+ assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .setnz, &.{
- .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) },
+ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
});
try lower.emit(.none, .setp, &.{
- .{ .reg = inst.data.x_scratch.scratch_reg },
+ .{ .reg = inst.data.rx.r1 },
});
try lower.emit(.none, .@"or", &.{
- .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) },
- .{ .reg = inst.data.x_scratch.scratch_reg },
+ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
+ .{ .reg = inst.data.rx.r1 },
});
},
.pseudo_j_z_and_np_inst => {
+ assert(inst.data.inst.fixes == ._);
try lower.emit(.none, .jnz, &.{
.{ .imm = lower.reloc(.{ .inst = index + 1 }) },
});
@@ -154,6 +176,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_j_nz_or_p_inst => {
+ assert(inst.data.inst.fixes == ._);
try lower.emit(.none, .jnz, &.{
.{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) },
});
@@ -162,6 +185,78 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
+ .pseudo_probe_align_ri_s => {
+ try lower.emit(.none, .@"test", &.{
+ .{ .reg = inst.data.ri.r1 },
+ .{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) },
+ });
+ try lower.emit(.none, .jz, &.{
+ .{ .imm = lower.reloc(.{ .inst = index + 1 }) },
+ });
+ try lower.emit(.none, .lea, &.{
+ .{ .reg = inst.data.ri.r1 },
+ .{ .mem = Memory.sib(.qword, .{
+ .base = .{ .reg = inst.data.ri.r1 },
+ .disp = -page_size,
+ }) },
+ });
+ try lower.emit(.none, .@"test", &.{
+ .{ .mem = Memory.sib(.dword, .{
+ .base = .{ .reg = inst.data.ri.r1 },
+ }) },
+ .{ .reg = inst.data.ri.r1.to32() },
+ });
+ try lower.emit(.none, .jmp, &.{
+ .{ .imm = lower.reloc(.{ .inst = index }) },
+ });
+ assert(lower.result_insts_len == pseudo_probe_align_insts);
+ },
+ .pseudo_probe_adjust_unrolled_ri_s => {
+ var offset = page_size;
+ while (offset < @bitCast(i32, inst.data.ri.i)) : (offset += page_size) {
+ try lower.emit(.none, .@"test", &.{
+ .{ .mem = Memory.sib(.dword, .{
+ .base = .{ .reg = inst.data.ri.r1 },
+ .disp = -offset,
+ }) },
+ .{ .reg = inst.data.ri.r1.to32() },
+ });
+ }
+ try lower.emit(.none, .sub, &.{
+ .{ .reg = inst.data.ri.r1 },
+ .{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) },
+ });
+ assert(lower.result_insts_len <= pseudo_probe_adjust_unrolled_max_insts);
+ },
+ .pseudo_probe_adjust_setup_rri_s => {
+ try lower.emit(.none, .mov, &.{
+ .{ .reg = inst.data.rri.r2.to32() },
+ .{ .imm = Immediate.s(@bitCast(i32, inst.data.rri.i)) },
+ });
+ try lower.emit(.none, .sub, &.{
+ .{ .reg = inst.data.rri.r1 },
+ .{ .reg = inst.data.rri.r2 },
+ });
+ assert(lower.result_insts_len == pseudo_probe_adjust_setup_insts);
+ },
+ .pseudo_probe_adjust_loop_rr => {
+ try lower.emit(.none, .@"test", &.{
+ .{ .mem = Memory.sib(.dword, .{
+ .base = .{ .reg = inst.data.rr.r1 },
+ .scale_index = .{ .scale = 1, .index = inst.data.rr.r2 },
+ .disp = -page_size,
+ }) },
+ .{ .reg = inst.data.rr.r1.to32() },
+ });
+ try lower.emit(.none, .sub, &.{
+ .{ .reg = inst.data.rr.r2 },
+ .{ .imm = Immediate.s(page_size) },
+ });
+ try lower.emit(.none, .jae, &.{
+ .{ .imm = lower.reloc(.{ .inst = index }) },
+ });
+ assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts);
+ },
.pseudo_push_reg_list => try lower.pushPopRegList(.push, inst),
.pseudo_pop_reg_list => try lower.pushPopRegList(.pop, inst),
@@ -300,6 +395,8 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
else
.none,
}, mnemonic: {
+ @setEvalBranchQuota(2_000);
+
comptime var max_len = 0;
inline for (@typeInfo(Mnemonic).Enum.fields) |field| max_len = @max(field.name.len, max_len);
var buf: [max_len]u8 = undefined;
@@ -438,6 +535,8 @@ fn pushPopRegList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Er
}});
}
+const page_size: i32 = 1 << 12;
+
const abi = @import("abi.zig");
const assert = std.debug.assert;
const bits = @import("bits.zig");
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
@@ -236,6 +236,14 @@ pub const Inst = struct {
/// VEX-Encoded ___
v_,
+ /// VEX-Encoded ___ Byte
+ v_b,
+ /// VEX-Encoded ___ Word
+ v_w,
+ /// VEX-Encoded ___ Doubleword
+ v_d,
+ /// VEX-Encoded ___ QuadWord
+ v_q,
/// VEX-Encoded Packed ___
vp_,
/// VEX-Encoded Packed ___ Byte
@@ -256,6 +264,8 @@ pub const Inst = struct {
v_sd,
/// VEX-Encoded ___ Packed Double-Precision Values
v_pd,
+ /// VEX-Encoded ___ 128-Bits Of Floating-Point Data
+ v_f128,
/// Mask ___ Byte
k_b,
@@ -278,6 +288,7 @@ pub const Inst = struct {
/// Add with carry
adc,
/// Add
+ /// Add packed integers
/// Add packed single-precision floating-point values
/// Add scalar single-precision floating-point values
/// Add packed double-precision floating-point values
@@ -410,6 +421,7 @@ pub const Inst = struct {
/// Double precision shift right
sh,
/// Subtract
+ /// Subtract packed integers
/// Subtract packed single-precision floating-point values
/// Subtract scalar single-precision floating-point values
/// Subtract packed double-precision floating-point values
@@ -434,11 +446,45 @@ pub const Inst = struct {
/// Bitwise logical xor of packed double-precision floating-point values
xor,
+ /// Add packed signed integers with signed saturation
+ adds,
+ /// Add packed unsigned integers with unsigned saturation
+ addus,
/// Bitwise logical and not of packed single-precision floating-point values
/// Bitwise logical and not of packed double-precision floating-point values
andn,
+ /// Maximum of packed signed integers
+ maxs,
+ /// Maximum of packed unsigned integers
+ maxu,
+ /// Minimum of packed signed integers
+ mins,
+ /// Minimum of packed unsigned integers
+ minu,
+ /// Multiply packed signed integers and store low result
+ mull,
+ /// Multiply packed signed integers and store high result
+ mulh,
+ /// Subtract packed signed integers with signed saturation
+ subs,
+ /// Subtract packed unsigned integers with unsigned saturation
+ subus,
+
+ /// Convert packed doubleword integers to packed single-precision floating-point values
+ /// Convert packed doubleword integers to packed double-precision floating-point values
+ cvtpi2,
+ /// Convert packed single-precision floating-point values to packed doubleword integers
+ cvtps2pi,
/// Convert doubleword integer to scalar single-precision floating-point value
- cvtsi2ss,
+ /// Convert doubleword integer to scalar double-precision floating-point value
+ cvtsi2,
+ /// Convert scalar single-precision floating-point value to doubleword integer
+ cvtss2si,
+ /// Convert with truncation packed single-precision floating-point values to packed doubleword integers
+ cvttps2pi,
+ /// Convert with truncation scalar single-precision floating-point value to doubleword integer
+ cvttss2si,
+
/// Maximum of packed single-precision floating-point values
/// Maximum of scalar single-precision floating-point values
/// Maximum of packed double-precision floating-point values
@@ -454,6 +500,8 @@ pub const Inst = struct {
mova,
/// Move packed single-precision floating-point values high to low
movhl,
+ /// Move packed single-precision floating-point values low to high
+ movlh,
/// Move unaligned packed single-precision floating-point values
/// Move unaligned packed double-precision floating-point values
movu,
@@ -482,12 +530,40 @@ pub const Inst = struct {
/// Unpack and interleave low packed double-precision floating-point values
unpckl,
+ /// Convert packed doubleword integers to packed single-precision floating-point values
+ /// Convert packed doubleword integers to packed double-precision floating-point values
+ cvtdq2,
+ /// Convert packed double-precision floating-point values to packed doubleword integers
+ cvtpd2dq,
+ /// Convert packed double-precision floating-point values to packed doubleword integers
+ cvtpd2pi,
+ /// Convert packed double-precision floating-point values to packed single-precision floating-point values
+ cvtpd2,
+ /// Convert packed single-precision floating-point values to packed doubleword integers
+ cvtps2dq,
+ /// Convert packed single-precision floating-point values to packed double-precision floating-point values
+ cvtps2,
+ /// Convert scalar double-precision floating-point value to doubleword integer
+ cvtsd2si,
/// Convert scalar double-precision floating-point value to scalar single-precision floating-point value
- cvtsd2ss,
- /// Convert doubleword integer to scalar double-precision floating-point value
- cvtsi2sd,
+ cvtsd2,
/// Convert scalar single-precision floating-point value to scalar double-precision floating-point value
- cvtss2sd,
+ cvtss2,
+ /// Convert with truncation packed double-precision floating-point values to packed doubleword integers
+ cvttpd2dq,
+ /// Convert with truncation packed double-precision floating-point values to packed doubleword integers
+ cvttpd2pi,
+ /// Convert with truncation packed single-precision floating-point values to packed doubleword integers
+ cvttps2dq,
+ /// Convert with truncation scalar double-precision floating-point value to doubleword integer
+ cvttsd2si,
+ /// Move aligned packed integer values
+ movdqa,
+ /// Move unaligned packed integer values
+ movdqu,
+ /// Packed interleave shuffle of quadruplets of single-precision floating-point values
+ /// Packed interleave shuffle of pairs of double-precision floating-point values
+ shuf,
/// Shuffle packed high words
shufh,
/// Shuffle packed low words
@@ -520,14 +596,22 @@ pub const Inst = struct {
/// Replicate single floating-point values
movsldup,
+ /// Extract packed floating-point values
+ extract,
+ /// Insert scalar single-precision floating-point value
+ /// Insert packed floating-point values
+ insert,
/// Round packed single-precision floating-point values
/// Round scalar single-precision floating-point value
/// Round packed double-precision floating-point values
/// Round scalar double-precision floating-point value
round,
+ /// Load with broadcast floating-point data
+ broadcast,
+
/// Convert 16-bit floating-point values to single-precision floating-point values
- cvtph2ps,
+ cvtph2,
/// Convert single-precision floating-point values to 16-bit floating-point values
cvtps2ph,
@@ -696,27 +780,27 @@ pub const Inst = struct {
pseudo_cmov_nz_or_p_rm_rip,
/// Set byte if zero flag set and parity flag not set
/// Requires a scratch register!
- /// Uses `r_scratch` payload.
+ /// Uses `rr` payload.
pseudo_set_z_and_np_r,
/// Set byte if zero flag set and parity flag not set
/// Requires a scratch register!
- /// Uses `x_scratch` payload.
+ /// Uses `rx` payload.
pseudo_set_z_and_np_m_sib,
/// Set byte if zero flag set and parity flag not set
/// Requires a scratch register!
- /// Uses `x_scratch` payload.
+ /// Uses `rx` payload.
pseudo_set_z_and_np_m_rip,
/// Set byte if zero flag not set or parity flag set
/// Requires a scratch register!
- /// Uses `r_scratch` payload.
+ /// Uses `rr` payload.
pseudo_set_nz_or_p_r,
/// Set byte if zero flag not set or parity flag set
/// Requires a scratch register!
- /// Uses `x_scratch` payload.
+ /// Uses `rx` payload.
pseudo_set_nz_or_p_m_sib,
/// Set byte if zero flag not set or parity flag set
/// Requires a scratch register!
- /// Uses `x_scratch` payload.
+ /// Uses `rx` payload.
pseudo_set_nz_or_p_m_rip,
/// Jump if zero flag set and parity flag not set
/// Uses `inst` payload.
@@ -725,6 +809,18 @@ pub const Inst = struct {
/// Uses `inst` payload.
pseudo_j_nz_or_p_inst,
+ /// Probe alignment
+ /// Uses `ri` payload
+ pseudo_probe_align_ri_s,
+ /// Probe adjust unrolled
+ /// Uses `ri` payload
+ pseudo_probe_adjust_unrolled_ri_s,
+ /// Probe adjust setup
+ /// Uses `rri` payload
+ pseudo_probe_adjust_setup_rri_s,
+ /// Probe adjust loop
+ /// Uses `rr` payload
+ pseudo_probe_adjust_loop_rr,
/// Push registers
/// Uses `reg_list` payload.
pseudo_push_reg_list,
@@ -821,18 +917,6 @@ pub const Inst = struct {
i: u8,
payload: u32,
},
- /// Register, scratch register
- r_scratch: struct {
- fixes: Fixes = ._,
- r1: Register,
- scratch_reg: Register,
- },
- /// Scratch register, followed by Custom payload found in extra.
- x_scratch: struct {
- fixes: Fixes = ._,
- scratch_reg: Register,
- payload: u32,
- },
/// Custom payload found in extra.
x: struct {
fixes: Fixes = ._,
diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig
@@ -165,34 +165,6 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class {
},
.Vector => {
const elem_ty = ty.childType();
- if (ctx == .arg) {
- const bit_size = ty.bitSize(target);
- if (bit_size > 128) {
- const has_avx512 = target.cpu.features.isEnabled(@enumToInt(std.Target.x86.Feature.avx512f));
- if (has_avx512 and bit_size <= 512) return .{
- .integer, .integer, .integer, .integer,
- .integer, .integer, .integer, .integer,
- };
- const has_avx = target.cpu.features.isEnabled(@enumToInt(std.Target.x86.Feature.avx));
- if (has_avx and bit_size <= 256) return .{
- .integer, .integer, .integer, .integer,
- .none, .none, .none, .none,
- };
- return memory_class;
- }
- if (bit_size > 80) return .{
- .integer, .integer, .none, .none,
- .none, .none, .none, .none,
- };
- if (bit_size > 64) return .{
- .x87, .none, .none, .none,
- .none, .none, .none, .none,
- };
- return .{
- .integer, .none, .none, .none,
- .none, .none, .none, .none,
- };
- }
const bits = elem_ty.bitSize(target) * ty.arrayLen();
if (bits <= 64) return .{
.sse, .none, .none, .none,
@@ -202,6 +174,7 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class {
.sse, .sseup, .none, .none,
.none, .none, .none, .none,
};
+ if (ctx == .arg and !std.Target.x86.featureSetHas(target.cpu.features, .avx)) return memory_class;
if (bits <= 192) return .{
.sse, .sseup, .sseup, .none,
.none, .none, .none, .none,
@@ -210,6 +183,7 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class {
.sse, .sseup, .sseup, .sseup,
.none, .none, .none, .none,
};
+ if (ctx == .arg and !std.Target.x86.featureSetHas(target.cpu.features, .avx512f)) return memory_class;
if (bits <= 320) return .{
.sse, .sseup, .sseup, .sseup,
.sseup, .none, .none, .none,
diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig
@@ -175,15 +175,21 @@ pub const Register = enum(u7) {
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+ mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7,
+
+ st0, st1, st2, st3, st4, st5, st6, st7,
+
es, cs, ss, ds, fs, gs,
none,
// zig fmt: on
- pub const Class = enum(u2) {
+ pub const Class = enum {
general_purpose,
- floating_point,
segment,
+ x87,
+ mmx,
+ sse,
};
pub fn class(reg: Register) Class {
@@ -195,8 +201,10 @@ pub const Register = enum(u7) {
@enumToInt(Register.al) ... @enumToInt(Register.r15b) => .general_purpose,
@enumToInt(Register.ah) ... @enumToInt(Register.bh) => .general_purpose,
- @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => .floating_point,
- @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => .floating_point,
+ @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => .sse,
+ @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => .sse,
+ @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => .mmx,
+ @enumToInt(Register.st0) ... @enumToInt(Register.st7) => .x87,
@enumToInt(Register.es) ... @enumToInt(Register.gs) => .segment,
@@ -216,8 +224,10 @@ pub const Register = enum(u7) {
@enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => @enumToInt(Register.ymm0) - 16,
@enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => @enumToInt(Register.xmm0) - 16,
+ @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => @enumToInt(Register.mm0) - 32,
+ @enumToInt(Register.st0) ... @enumToInt(Register.st7) => @enumToInt(Register.st0) - 40,
- @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es) - 32,
+ @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es) - 48,
else => unreachable,
// zig fmt: on
@@ -236,6 +246,8 @@ pub const Register = enum(u7) {
@enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => 256,
@enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => 128,
+ @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => 64,
+ @enumToInt(Register.st0) ... @enumToInt(Register.st7) => 80,
@enumToInt(Register.es) ... @enumToInt(Register.gs) => 16,
@@ -271,6 +283,8 @@ pub const Register = enum(u7) {
@enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => @enumToInt(Register.ymm0),
@enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => @enumToInt(Register.xmm0),
+ @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => @enumToInt(Register.mm0),
+ @enumToInt(Register.st0) ... @enumToInt(Register.st7) => @enumToInt(Register.st0),
@enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es),
@@ -326,8 +340,8 @@ pub const Register = enum(u7) {
return @intToEnum(Register, @enumToInt(reg) - reg.gpBase() + @enumToInt(Register.al));
}
- fn fpBase(reg: Register) u7 {
- assert(reg.class() == .floating_point);
+ fn sseBase(reg: Register) u7 {
+ assert(reg.class() == .sse);
return switch (@enumToInt(reg)) {
@enumToInt(Register.ymm0)...@enumToInt(Register.ymm15) => @enumToInt(Register.ymm0),
@enumToInt(Register.xmm0)...@enumToInt(Register.xmm15) => @enumToInt(Register.xmm0),
@@ -336,49 +350,24 @@ pub const Register = enum(u7) {
}
pub fn to256(reg: Register) Register {
- return @intToEnum(Register, @enumToInt(reg) - reg.fpBase() + @enumToInt(Register.ymm0));
+ return @intToEnum(Register, @enumToInt(reg) - reg.sseBase() + @enumToInt(Register.ymm0));
}
pub fn to128(reg: Register) Register {
- return @intToEnum(Register, @enumToInt(reg) - reg.fpBase() + @enumToInt(Register.xmm0));
- }
-
- pub fn dwarfLocOp(reg: Register) u8 {
- return switch (reg.class()) {
- .general_purpose => switch (reg.to64()) {
- .rax => DW.OP.reg0,
- .rdx => DW.OP.reg1,
- .rcx => DW.OP.reg2,
- .rbx => DW.OP.reg3,
- .rsi => DW.OP.reg4,
- .rdi => DW.OP.reg5,
- .rbp => DW.OP.reg6,
- .rsp => DW.OP.reg7,
- else => @intCast(u8, @enumToInt(reg) - reg.gpBase()) + DW.OP.reg0,
- },
- .floating_point => @intCast(u8, @enumToInt(reg) - reg.fpBase()) + DW.OP.reg17,
- else => unreachable,
- };
+ return @intToEnum(Register, @enumToInt(reg) - reg.sseBase() + @enumToInt(Register.xmm0));
}
- /// DWARF encodings that push a value onto the DWARF stack that is either
- /// the contents of a register or the result of adding the contents a given
- /// register to a given signed offset.
- pub fn dwarfLocOpDeref(reg: Register) u8 {
+ /// DWARF register encoding
+ pub fn dwarfNum(reg: Register) u6 {
return switch (reg.class()) {
- .general_purpose => switch (reg.to64()) {
- .rax => DW.OP.breg0,
- .rdx => DW.OP.breg1,
- .rcx => DW.OP.breg2,
- .rbx => DW.OP.breg3,
- .rsi => DW.OP.breg4,
- .rdi => DW.OP.breg5,
- .rbp => DW.OP.breg6,
- .rsp => DW.OP.breg7,
- else => @intCast(u8, @enumToInt(reg) - reg.gpBase()) + DW.OP.breg0,
- },
- .floating_point => @intCast(u8, @enumToInt(reg) - reg.fpBase()) + DW.OP.breg17,
- else => unreachable,
+ .general_purpose => if (reg.isExtended())
+ reg.enc()
+ else
+ @truncate(u3, @as(u24, 0o54673120) >> @as(u5, reg.enc()) * 3),
+ .sse => 17 + @as(u6, reg.enc()),
+ .x87 => 33 + @as(u6, reg.enc()),
+ .mmx => 41 + @as(u6, reg.enc()),
+ .segment => 50 + @as(u6, reg.enc()),
};
}
};
@@ -392,6 +381,8 @@ test "Register id - different classes" {
try expect(Register.ymm0.id() == 0b10000);
try expect(Register.ymm0.id() != Register.rax.id());
try expect(Register.xmm0.id() == Register.ymm0.id());
+ try expect(Register.xmm0.id() != Register.mm0.id());
+ try expect(Register.mm0.id() != Register.st0.id());
try expect(Register.es.id() == 0b100000);
}
@@ -407,7 +398,9 @@ test "Register enc - different classes" {
test "Register classes" {
try expect(Register.r11.class() == .general_purpose);
- try expect(Register.ymm11.class() == .floating_point);
+ try expect(Register.ymm11.class() == .sse);
+ try expect(Register.mm3.class() == .mmx);
+ try expect(Register.st3.class() == .x87);
try expect(Register.fs.class() == .segment);
}
diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig
@@ -272,14 +272,6 @@ pub const table = [_]Entry{
.{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none, .none },
.{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long, .none },
- .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 },
- .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 },
- .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 },
-
- .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 },
- .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 },
- .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 },
-
.{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none, .none },
.{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex, .none },
.{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .short, .none },
@@ -354,9 +346,9 @@ pub const table = [_]Entry{
.{ .lodsd, .np, &.{}, &.{ 0xad }, 0, .none, .none },
.{ .lodsq, .np, &.{}, &.{ 0xad }, 0, .long, .none },
- .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .none },
- .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .none },
- .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .none },
+ .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt },
+ .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt },
+ .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .lzcnt },
.{ .mfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none },
@@ -395,12 +387,12 @@ pub const table = [_]Entry{
.{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none, .none },
.{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long, .none },
- .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .none },
- .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .none },
- .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .none },
- .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .short, .none },
- .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .none },
- .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .none },
+ .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .movbe },
+ .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .movbe },
+ .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .movbe },
+ .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .short, .movbe },
+ .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .movbe },
+ .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .movbe },
.{ .movs, .np, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none, .none },
.{ .movs, .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none },
@@ -482,9 +474,9 @@ pub const table = [_]Entry{
.{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .short, .none },
.{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none },
- .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .none },
- .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .none },
- .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .none },
+ .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .popcnt },
+ .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt },
+ .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt },
.{ .push, .o, &.{ .r16 }, &.{ 0x50 }, 0, .short, .none },
.{ .push, .o, &.{ .r64 }, &.{ 0x50 }, 0, .none, .none },
@@ -784,9 +776,9 @@ pub const table = [_]Entry{
.{ .@"test", .mr, &.{ .rm32, .r32 }, &.{ 0x85 }, 0, .none, .none },
.{ .@"test", .mr, &.{ .rm64, .r64 }, &.{ 0x85 }, 0, .long, .none },
- .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .none },
- .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .none },
- .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .none },
+ .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi },
+ .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi },
+ .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi },
.{ .ud2, .np, &.{}, &.{ 0x0f, 0x0b }, 0, .none, .none },
@@ -836,6 +828,15 @@ pub const table = [_]Entry{
.{ .xor, .rm, &.{ .r32, .rm32 }, &.{ 0x33 }, 0, .none, .none },
.{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none },
+ // X87
+ .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 },
+ .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 },
+ .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 },
+
+ .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 },
+ .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 },
+ .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 },
+
// SSE
.{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse },
@@ -847,9 +848,21 @@ pub const table = [_]Entry{
.{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse },
+ .{ .cvtpi2ps, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x0f, 0x2a }, 0, .none, .sse },
+
+ .{ .cvtps2pi, .rm, &.{ .mm, .xmm_m64 }, &.{ 0x0f, 0x2d }, 0, .none, .sse },
+
.{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .none, .sse },
.{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .long, .sse },
+ .{ .cvtss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .none, .sse },
+ .{ .cvtss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .long, .sse },
+
+ .{ .cvttps2pi, .rm, &.{ .mm, .xmm_m64 }, &.{ 0x0f, 0x2c }, 0, .none, .sse },
+
+ .{ .cvttss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .none, .sse },
+ .{ .cvttss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .long, .sse },
+
.{ .divps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .none, .sse },
.{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse },
@@ -867,6 +880,8 @@ pub const table = [_]Entry{
.{ .movhlps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .none, .sse },
+ .{ .movlhps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .none, .sse },
+
.{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .none, .sse },
.{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .none, .sse },
@@ -879,14 +894,16 @@ pub const table = [_]Entry{
.{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .none, .sse },
- .{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse },
-
- .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse },
+ .{ .shufps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .none, .sse },
.{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse },
.{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .none, .sse },
+ .{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse },
+
+ .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse },
+
.{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .none, .sse },
.{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .none, .sse },
@@ -902,6 +919,25 @@ pub const table = [_]Entry{
.{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 },
+ .{ .cvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .none, .sse2 },
+
+ .{ .cvtdq2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5b }, 0, .none, .sse2 },
+
+ .{ .cvtpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .none, .sse2 },
+
+ .{ .cvtpd2pi, .rm, &.{ .mm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x2d }, 0, .none, .sse2 },
+
+ .{ .cvtpd2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5a }, 0, .none, .sse2 },
+
+ .{ .cvtpi2pd, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x66, 0x0f, 0x2a }, 0, .none, .sse2 },
+
+ .{ .cvtps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5b }, 0, .none, .sse2 },
+
+ .{ .cvtps2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x0f, 0x5a }, 0, .none, .sse2 },
+
+ .{ .cvtsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .none, .sse2 },
+ .{ .cvtsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .long, .sse2 },
+
.{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .none, .sse2 },
.{ .cvtsi2sd, .rm, &.{ .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .none, .sse2 },
@@ -909,6 +945,15 @@ pub const table = [_]Entry{
.{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .none, .sse2 },
+ .{ .cvttpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .none, .sse2 },
+
+ .{ .cvttpd2pi, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x2c }, 0, .none, .sse2 },
+
+ .{ .cvttps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .none, .sse2 },
+
+ .{ .cvttsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .none, .sse2 },
+ .{ .cvttsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .long, .sse2 },
+
.{ .divpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .none, .sse2 },
.{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .none, .sse2 },
@@ -925,11 +970,16 @@ pub const table = [_]Entry{
.{ .movapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .none, .sse2 },
.{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .none, .sse2 },
- .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 },
-
.{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .long, .sse2 },
+ .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 },
.{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .long, .sse2 },
+ .{ .movdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .none, .sse2 },
+ .{ .movdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .none, .sse2 },
+
+ .{ .movdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .none, .sse2 },
+ .{ .movdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .none, .sse2 },
+
.{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .none, .sse2 },
.{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .none, .sse2 },
@@ -942,10 +992,39 @@ pub const table = [_]Entry{
.{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 },
+ .{ .paddb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfc }, 0, .none, .sse2 },
+ .{ .paddw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfd }, 0, .none, .sse2 },
+ .{ .paddd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfe }, 0, .none, .sse2 },
+ .{ .paddq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd4 }, 0, .none, .sse2 },
+
+ .{ .paddsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xec }, 0, .none, .sse2 },
+ .{ .paddsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xed }, 0, .none, .sse2 },
+
+ .{ .paddusb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdc }, 0, .none, .sse2 },
+ .{ .paddusw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdd }, 0, .none, .sse2 },
+
+ .{ .pand, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .none, .sse2 },
+
+ .{ .pandn, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .none, .sse2 },
+
.{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 },
.{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 },
+ .{ .pmaxsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .none, .sse2 },
+
+ .{ .pmaxub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .none, .sse2 },
+
+ .{ .pminsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .none, .sse2 },
+
+ .{ .pminub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .none, .sse2 },
+
+ .{ .pmulhw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .none, .sse2 },
+
+ .{ .pmullw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 },
+
+ .{ .por, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 },
+
.{ .pshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 },
.{ .pshuflw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf2, 0x0f, 0x70 }, 0, .none, .sse2 },
@@ -957,6 +1036,18 @@ pub const table = [_]Entry{
.{ .psrlq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .none, .sse2 },
.{ .psrlq, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .none, .sse2 },
+ .{ .psubb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .none, .sse2 },
+ .{ .psubw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .none, .sse2 },
+ .{ .psubd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfa }, 0, .none, .sse2 },
+
+ .{ .psubsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe8 }, 0, .none, .sse2 },
+ .{ .psubsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe9 }, 0, .none, .sse2 },
+
+ .{ .psubq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfb }, 0, .none, .sse2 },
+
+ .{ .psubusb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .none, .sse2 },
+ .{ .psubusw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .none, .sse2 },
+
.{ .punpckhbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .none, .sse2 },
.{ .punpckhwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .none, .sse2 },
.{ .punpckhdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .none, .sse2 },
@@ -967,6 +1058,10 @@ pub const table = [_]Entry{
.{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 },
.{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 },
+ .{ .pxor, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .none, .sse2 },
+
+ .{ .shufpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .none, .sse2 },
+
.{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 },
.{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 },
@@ -990,6 +1085,10 @@ pub const table = [_]Entry{
.{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 },
// SSE4.1
+ .{ .extractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .none, .sse4_1 },
+
+ .{ .insertps, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .none, .sse4_1 },
+
.{ .pextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .none, .sse4_1 },
.{ .pextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .none, .sse4_1 },
.{ .pextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .long, .sse4_1 },
@@ -1000,6 +1099,22 @@ pub const table = [_]Entry{
.{ .pinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 },
.{ .pinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 },
+ .{ .pmaxsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .none, .sse4_1 },
+ .{ .pmaxsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .none, .sse4_1 },
+
+ .{ .pmaxuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .none, .sse4_1 },
+
+ .{ .pmaxud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .none, .sse4_1 },
+
+ .{ .pminsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .none, .sse4_1 },
+ .{ .pminsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .none, .sse4_1 },
+
+ .{ .pminuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .none, .sse4_1 },
+
+ .{ .pminud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .none, .sse4_1 },
+
+ .{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 },
+
.{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 },
.{ .roundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .none, .sse4_1 },
@@ -1019,15 +1134,68 @@ pub const table = [_]Entry{
.{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx },
+ .{ .vandnpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_128_wig, .avx },
+ .{ .vandnpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_256_wig, .avx },
+
+ .{ .vandnps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .vex_128_wig, .avx },
+ .{ .vandnps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x55 }, 0, .vex_256_wig, .avx },
+
+ .{ .vandpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_128_wig, .avx },
+ .{ .vandpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_256_wig, .avx },
+
+ .{ .vandps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .vex_128_wig, .avx },
+ .{ .vandps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x54 }, 0, .vex_256_wig, .avx },
+
+ .{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx },
+ .{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx },
+ .{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx },
+ .{ .vbroadcastf128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x1a }, 0, .vex_256_w0, .avx },
+
+ .{ .vcvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx },
+ .{ .vcvtdq2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvtdq2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5b }, 0, .vex_128_wig, .avx },
+ .{ .vcvtdq2ps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x5b }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvtpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx },
+ .{ .vcvtpd2dq, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvtpd2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5a }, 0, .vex_128_wig, .avx },
+ .{ .vcvtpd2ps, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5a }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvtps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5b }, 0, .vex_128_wig, .avx },
+ .{ .vcvtps2dq, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5b }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvtps2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x0f, 0x5a }, 0, .vex_128_wig, .avx },
+ .{ .vcvtps2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x0f, 0x5a }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvtsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .vex_lig_w0, .sse2 },
+ .{ .vcvtsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .vex_lig_w1, .sse2 },
+
.{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
.{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx },
.{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx },
- .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx },
- .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx },
+ .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx },
+ .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx },
+
+ .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
+
+ .{ .vcvtss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .vex_lig_w0, .avx },
+ .{ .vcvtss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .vex_lig_w1, .avx },
+
+ .{ .vcvttpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx },
+ .{ .vcvttpd2dq, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx },
- .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx },
+ .{ .vcvttps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .vex_128_wig, .avx },
+ .{ .vcvttps2dq, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .vex_256_wig, .avx },
+
+ .{ .vcvttsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .vex_lig_w0, .sse2 },
+ .{ .vcvttsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .vex_lig_w1, .sse2 },
+
+ .{ .vcvttss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w0, .avx },
+ .{ .vcvttss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w1, .avx },
.{ .vdivpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_128_wig, .avx },
.{ .vdivpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_256_wig, .avx },
@@ -1039,6 +1207,14 @@ pub const table = [_]Entry{
.{ .vdivss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx },
+ .{ .vextractf128, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x19 }, 0, .vex_256_w0, .avx },
+
+ .{ .vextractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .vex_128_wig, .avx },
+
+ .{ .vinsertf128, .rvmi, &.{ .ymm, .ymm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x18 }, 0, .vex_256_w0, .avx },
+
+ .{ .vinsertps, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .vex_128_wig, .avx },
+
.{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx },
.{ .vmaxpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_256_wig, .avx },
@@ -1069,11 +1245,31 @@ pub const table = [_]Entry{
.{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256_wig, .avx },
.{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256_wig, .avx },
+ .{ .vmovd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w0, .avx },
+ .{ .vmovq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w1, .avx },
+ .{ .vmovd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w0, .avx },
+ .{ .vmovq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w1, .avx },
+
.{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
.{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx },
+ .{ .vmovdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_128_wig, .avx },
+ .{ .vmovdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_128_wig, .avx },
+ .{ .vmovdqa, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_256_wig, .avx },
+ .{ .vmovdqa, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_256_wig, .avx },
+
+ .{ .vmovdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_128_wig, .avx },
+ .{ .vmovdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_128_wig, .avx },
+ .{ .vmovdqu, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_256_wig, .avx },
+ .{ .vmovdqu, .mr, &.{ .ymm_m256, .ymm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_256_wig, .avx },
+
.{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx },
+ .{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx },
+
+ .{ .vmovq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .vex_128_wig, .avx },
+ .{ .vmovq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .vex_128_wig, .avx },
+
.{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
.{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx },
.{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx },
@@ -1110,6 +1306,27 @@ pub const table = [_]Entry{
.{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx },
+ .{ .vorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
+ .{ .vorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
+
+ .{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
+ .{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
+
+ .{ .vpaddb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfc }, 0, .vex_128_wig, .avx },
+ .{ .vpaddw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfd }, 0, .vex_128_wig, .avx },
+ .{ .vpaddd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfe }, 0, .vex_128_wig, .avx },
+ .{ .vpaddq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd4 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpaddsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xec }, 0, .vex_128_wig, .avx },
+ .{ .vpaddsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xed }, 0, .vex_128_wig, .avx },
+
+ .{ .vpaddusb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdc }, 0, .vex_128_wig, .avx },
+ .{ .vpaddusw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdd }, 0, .vex_128_wig, .avx },
+
+ .{ .vpand, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_128_wig, .avx },
+
+ .{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx },
+
.{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx },
.{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx },
.{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx },
@@ -1123,6 +1340,32 @@ pub const table = [_]Entry{
.{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx },
+ .{ .vpmaxsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_128_wig, .avx },
+ .{ .vpmaxsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_128_wig, .avx },
+ .{ .vpmaxsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_128_wig, .avx },
+
+ .{ .vpmaxub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_128_wig, .avx },
+ .{ .vpmaxuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_128_wig, .avx },
+
+ .{ .vpmaxud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_128_wig, .avx },
+
+ .{ .vpminsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_128_wig, .avx },
+ .{ .vpminsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_128_wig, .avx },
+ .{ .vpminsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpminub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_128_wig, .avx },
+ .{ .vpminuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_128_wig, .avx },
+
+ .{ .vpminud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_128_wig, .avx },
+
+ .{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpmullw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx },
+
.{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx },
.{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128_wig, .avx },
.{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128_wig, .avx },
@@ -1130,6 +1373,18 @@ pub const table = [_]Entry{
.{ .vpsrlq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_128_wig, .avx },
.{ .vpsrlq, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_128_wig, .avx },
+ .{ .vpsubb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .vex_128_wig, .avx },
+ .{ .vpsubw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .vex_128_wig, .avx },
+ .{ .vpsubd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfa }, 0, .vex_128_wig, .avx },
+
+ .{ .vpsubsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe8 }, 0, .vex_128_wig, .avx },
+ .{ .vpsubsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe9 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpsubq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfb }, 0, .vex_128_wig, .avx },
+
+ .{ .vpsubusb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .vex_128_wig, .avx },
+ .{ .vpsubusw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .vex_128_wig, .avx },
+
.{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128_wig, .avx },
.{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128_wig, .avx },
.{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128_wig, .avx },
@@ -1140,6 +1395,8 @@ pub const table = [_]Entry{
.{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx },
.{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx },
+ .{ .vpxor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_128_wig, .avx },
+
.{ .vroundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_128_wig, .avx },
.{ .vroundpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_256_wig, .avx },
@@ -1150,6 +1407,12 @@ pub const table = [_]Entry{
.{ .vroundss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .vex_lig_wig, .avx },
+ .{ .vshufpd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .vex_128_wig, .avx },
+ .{ .vshufpd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .vex_256_wig, .avx },
+
+ .{ .vshufps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .vex_128_wig, .avx },
+ .{ .vshufps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .vex_256_wig, .avx },
+
.{ .vsqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_128_wig, .avx },
.{ .vsqrtpd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_256_wig, .avx },
@@ -1170,6 +1433,12 @@ pub const table = [_]Entry{
.{ .vsubss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx },
+ .{ .vxorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_128_wig, .avx },
+ .{ .vxorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_256_wig, .avx },
+
+ .{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx },
+ .{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx },
+
// F16C
.{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c },
.{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c },
@@ -1201,6 +1470,51 @@ pub const table = [_]Entry{
.{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w0, .fma },
// AVX2
+ .{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 },
+ .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
+ .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
+
+ .{ .vpaddb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfc }, 0, .vex_256_wig, .avx2 },
+ .{ .vpaddw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfd }, 0, .vex_256_wig, .avx2 },
+ .{ .vpaddd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfe }, 0, .vex_256_wig, .avx2 },
+ .{ .vpaddq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd4 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpaddsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xec }, 0, .vex_256_wig, .avx2 },
+ .{ .vpaddsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xed }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpaddusb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdc }, 0, .vex_256_wig, .avx2 },
+ .{ .vpaddusw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdd }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpand, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx },
+ .{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_256_wig, .avx },
+ .{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx },
+
+ .{ .vpmaxub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_256_wig, .avx },
+ .{ .vpmaxuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_256_wig, .avx },
+
+ .{ .vpmaxud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_256_wig, .avx },
+
+ .{ .vpminsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_256_wig, .avx },
+ .{ .vpminsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_256_wig, .avx },
+ .{ .vpminsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_256_wig, .avx },
+
+ .{ .vpminub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_256_wig, .avx },
+ .{ .vpminuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_256_wig, .avx },
+
+ .{ .vpminud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_256_wig, .avx },
+
+ .{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx },
+
+ .{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx },
+
+ .{ .vpmullw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_256_wig, .avx },
+
+ .{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 },
+
.{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 },
.{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 },
.{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 },
@@ -1208,6 +1522,18 @@ pub const table = [_]Entry{
.{ .vpsrlq, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_256_wig, .avx2 },
.{ .vpsrlq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_256_wig, .avx2 },
+ .{ .vpsubb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpsubw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpsubd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfa }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpsubsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe8 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpsubsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe9 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpsubq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfb }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpsubusb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpsubusw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .vex_256_wig, .avx2 },
+
.{ .vpunpckhbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_256_wig, .avx2 },
.{ .vpunpckhwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_256_wig, .avx2 },
.{ .vpunpckhdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_256_wig, .avx2 },
@@ -1217,5 +1543,7 @@ pub const table = [_]Entry{
.{ .vpunpcklwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_256_wig, .avx2 },
.{ .vpunpckldq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_256_wig, .avx2 },
.{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpxor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_256_wig, .avx2 },
};
// zig fmt: on
diff --git a/src/codegen.zig b/src/codegen.zig
@@ -380,7 +380,7 @@ pub fn generateSymbol(
return Result.ok;
},
- .field_ptr, .elem_ptr => return lowerParentPtr(
+ .field_ptr, .elem_ptr, .opt_payload_ptr => return lowerParentPtr(
bin_file,
src_loc,
typed_value,
@@ -812,7 +812,6 @@ fn lowerParentPtr(
reloc_info: RelocInfo,
) CodeGenError!Result {
const target = bin_file.options.target;
-
switch (parent_ptr.tag()) {
.field_ptr => {
const field_ptr = parent_ptr.castTag(.field_ptr).?.data;
@@ -858,6 +857,31 @@ fn lowerParentPtr(
reloc_info.offset(@intCast(u32, elem_ptr.index * elem_ptr.elem_ty.abiSize(target))),
);
},
+ .opt_payload_ptr => {
+ const opt_payload_ptr = parent_ptr.castTag(.opt_payload_ptr).?.data;
+ return lowerParentPtr(
+ bin_file,
+ src_loc,
+ typed_value,
+ opt_payload_ptr.container_ptr,
+ code,
+ debug_output,
+ reloc_info,
+ );
+ },
+ .eu_payload_ptr => {
+ const eu_payload_ptr = parent_ptr.castTag(.eu_payload_ptr).?.data;
+ const pl_ty = eu_payload_ptr.container_ty.errorUnionPayload();
+ return lowerParentPtr(
+ bin_file,
+ src_loc,
+ typed_value,
+ eu_payload_ptr.container_ptr,
+ code,
+ debug_output,
+ reloc_info.offset(@intCast(u32, errUnionPayloadOffset(pl_ty, target))),
+ );
+ },
.variable, .decl_ref, .decl_ref_mut => |tag| return lowerDeclRef(
bin_file,
src_loc,
@@ -1189,12 +1213,16 @@ pub fn genTypedValue(
.enum_simple => {
return GenResult.mcv(.{ .immediate = field_index.data });
},
- .enum_full, .enum_nonexhaustive => {
- const enum_full = typed_value.ty.cast(Type.Payload.EnumFull).?.data;
- if (enum_full.values.count() != 0) {
- const tag_val = enum_full.values.keys()[field_index.data];
+ .enum_numbered, .enum_full, .enum_nonexhaustive => {
+ const enum_values = if (typed_value.ty.castTag(.enum_numbered)) |pl|
+ pl.data.values
+ else
+ typed_value.ty.cast(Type.Payload.EnumFull).?.data.values;
+ if (enum_values.count() != 0) {
+ const tag_val = enum_values.keys()[field_index.data];
+ var buf: Type.Payload.Bits = undefined;
return genTypedValue(bin_file, src_loc, .{
- .ty = enum_full.tag_ty,
+ .ty = typed_value.ty.intTagType(&buf),
.val = tag_val,
}, owner_decl_index);
} else {
@@ -1258,9 +1286,10 @@ pub fn genTypedValue(
}
pub fn errUnionPayloadOffset(payload_ty: Type, target: std.Target) u64 {
+ if (!payload_ty.hasRuntimeBitsIgnoreComptime()) return 0;
const payload_align = payload_ty.abiAlignment(target);
const error_align = Type.anyerror.abiAlignment(target);
- if (payload_align >= error_align) {
+ if (payload_align >= error_align or !payload_ty.hasRuntimeBitsIgnoreComptime()) {
return 0;
} else {
return mem.alignForwardGeneric(u64, Type.anyerror.abiSize(target), payload_align);
@@ -1268,9 +1297,10 @@ pub fn errUnionPayloadOffset(payload_ty: Type, target: std.Target) u64 {
}
pub fn errUnionErrorOffset(payload_ty: Type, target: std.Target) u64 {
+ if (!payload_ty.hasRuntimeBitsIgnoreComptime()) return 0;
const payload_align = payload_ty.abiAlignment(target);
const error_align = Type.anyerror.abiAlignment(target);
- if (payload_align >= error_align) {
+ if (payload_align >= error_align and payload_ty.hasRuntimeBitsIgnoreComptime()) {
return mem.alignForwardGeneric(u64, payload_ty.abiSize(target), error_align);
} else {
return 0;
diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig
@@ -184,12 +184,14 @@ pub const DeclState = struct {
try dbg_info_buffer.append(@enumToInt(AbbrevKind.pad1));
},
.Bool => {
- try dbg_info_buffer.appendSlice(&[_]u8{
- @enumToInt(AbbrevKind.base_type),
- DW.ATE.boolean, // DW.AT.encoding , DW.FORM.data1
- 1, // DW.AT.byte_size, DW.FORM.data1
- 'b', 'o', 'o', 'l', 0, // DW.AT.name, DW.FORM.string
- });
+ try dbg_info_buffer.ensureUnusedCapacity(12);
+ dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.base_type));
+ // DW.AT.encoding, DW.FORM.data1
+ dbg_info_buffer.appendAssumeCapacity(DW.ATE.boolean);
+ // DW.AT.byte_size, DW.FORM.udata
+ try leb128.writeULEB128(dbg_info_buffer.writer(), ty.abiSize(target));
+ // DW.AT.name, DW.FORM.string
+ try dbg_info_buffer.writer().print("{}\x00", .{ty.fmt(module)});
},
.Int => {
const info = ty.intInfo(target);
@@ -200,9 +202,9 @@ pub const DeclState = struct {
.signed => DW.ATE.signed,
.unsigned => DW.ATE.unsigned,
});
- // DW.AT.byte_size, DW.FORM.data1
- dbg_info_buffer.appendAssumeCapacity(@intCast(u8, ty.abiSize(target)));
- // DW.AT.name, DW.FORM.string
+ // DW.AT.byte_size, DW.FORM.udata
+ try leb128.writeULEB128(dbg_info_buffer.writer(), ty.abiSize(target));
+ // DW.AT.name, DW.FORM.string
try dbg_info_buffer.writer().print("{}\x00", .{ty.fmt(module)});
},
.Optional => {
@@ -211,9 +213,9 @@ pub const DeclState = struct {
dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.base_type));
// DW.AT.encoding, DW.FORM.data1
dbg_info_buffer.appendAssumeCapacity(DW.ATE.address);
- // DW.AT.byte_size, DW.FORM.data1
- dbg_info_buffer.appendAssumeCapacity(@intCast(u8, ty.abiSize(target)));
- // DW.AT.name, DW.FORM.string
+ // DW.AT.byte_size, DW.FORM.udata
+ try leb128.writeULEB128(dbg_info_buffer.writer(), ty.abiSize(target));
+ // DW.AT.name, DW.FORM.string
try dbg_info_buffer.writer().print("{}\x00", .{ty.fmt(module)});
} else {
// Non-pointer optionals are structs: struct { .maybe = *, .val = * }
@@ -221,7 +223,7 @@ pub const DeclState = struct {
const payload_ty = ty.optionalChild(buf);
// DW.AT.structure_type
try dbg_info_buffer.append(@enumToInt(AbbrevKind.struct_type));
- // DW.AT.byte_size, DW.FORM.sdata
+ // DW.AT.byte_size, DW.FORM.udata
const abi_size = ty.abiSize(target);
try leb128.writeULEB128(dbg_info_buffer.writer(), abi_size);
// DW.AT.name, DW.FORM.string
@@ -236,7 +238,7 @@ pub const DeclState = struct {
var index = dbg_info_buffer.items.len;
try dbg_info_buffer.resize(index + 4);
try self.addTypeRelocGlobal(atom_index, Type.bool, @intCast(u32, index));
- // DW.AT.data_member_location, DW.FORM.sdata
+ // DW.AT.data_member_location, DW.FORM.udata
try dbg_info_buffer.ensureUnusedCapacity(6);
dbg_info_buffer.appendAssumeCapacity(0);
// DW.AT.member
@@ -248,7 +250,7 @@ pub const DeclState = struct {
index = dbg_info_buffer.items.len;
try dbg_info_buffer.resize(index + 4);
try self.addTypeRelocGlobal(atom_index, payload_ty, @intCast(u32, index));
- // DW.AT.data_member_location, DW.FORM.sdata
+ // DW.AT.data_member_location, DW.FORM.udata
const offset = abi_size - payload_ty.abiSize(target);
try leb128.writeULEB128(dbg_info_buffer.writer(), offset);
// DW.AT.structure_type delimit children
@@ -263,8 +265,8 @@ pub const DeclState = struct {
// DW.AT.structure_type
try dbg_info_buffer.ensureUnusedCapacity(2);
dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.struct_type));
- // DW.AT.byte_size, DW.FORM.sdata
- dbg_info_buffer.appendAssumeCapacity(ptr_bytes * 2);
+ // DW.AT.byte_size, DW.FORM.udata
+ try leb128.writeULEB128(dbg_info_buffer.writer(), ty.abiSize(target));
// DW.AT.name, DW.FORM.string
try dbg_info_buffer.writer().print("{}\x00", .{ty.fmt(module)});
// DW.AT.member
@@ -279,7 +281,7 @@ pub const DeclState = struct {
var buf = try arena.create(Type.SlicePtrFieldTypeBuffer);
const ptr_ty = ty.slicePtrFieldType(buf);
try self.addTypeRelocGlobal(atom_index, ptr_ty, @intCast(u32, index));
- // DW.AT.data_member_location, DW.FORM.sdata
+ // DW.AT.data_member_location, DW.FORM.udata
try dbg_info_buffer.ensureUnusedCapacity(6);
dbg_info_buffer.appendAssumeCapacity(0);
// DW.AT.member
@@ -291,7 +293,7 @@ pub const DeclState = struct {
index = dbg_info_buffer.items.len;
try dbg_info_buffer.resize(index + 4);
try self.addTypeRelocGlobal(atom_index, Type.usize, @intCast(u32, index));
- // DW.AT.data_member_location, DW.FORM.sdata
+ // DW.AT.data_member_location, DW.FORM.udata
try dbg_info_buffer.ensureUnusedCapacity(2);
dbg_info_buffer.appendAssumeCapacity(ptr_bytes);
// DW.AT.structure_type delimit children
@@ -329,9 +331,8 @@ pub const DeclState = struct {
.Struct => blk: {
// DW.AT.structure_type
try dbg_info_buffer.append(@enumToInt(AbbrevKind.struct_type));
- // DW.AT.byte_size, DW.FORM.sdata
- const abi_size = ty.abiSize(target);
- try leb128.writeULEB128(dbg_info_buffer.writer(), abi_size);
+ // DW.AT.byte_size, DW.FORM.udata
+ try leb128.writeULEB128(dbg_info_buffer.writer(), ty.abiSize(target));
switch (ty.tag()) {
.tuple, .anon_struct => {
@@ -348,7 +349,7 @@ pub const DeclState = struct {
var index = dbg_info_buffer.items.len;
try dbg_info_buffer.resize(index + 4);
try self.addTypeRelocGlobal(atom_index, field, @intCast(u32, index));
- // DW.AT.data_member_location, DW.FORM.sdata
+ // DW.AT.data_member_location, DW.FORM.udata
const field_off = ty.structFieldOffset(field_index, target);
try leb128.writeULEB128(dbg_info_buffer.writer(), field_off);
}
@@ -380,7 +381,7 @@ pub const DeclState = struct {
var index = dbg_info_buffer.items.len;
try dbg_info_buffer.resize(index + 4);
try self.addTypeRelocGlobal(atom_index, field.ty, @intCast(u32, index));
- // DW.AT.data_member_location, DW.FORM.sdata
+ // DW.AT.data_member_location, DW.FORM.udata
const field_off = ty.structFieldOffset(field_index, target);
try leb128.writeULEB128(dbg_info_buffer.writer(), field_off);
}
@@ -393,9 +394,8 @@ pub const DeclState = struct {
.Enum => {
// DW.AT.enumeration_type
try dbg_info_buffer.append(@enumToInt(AbbrevKind.enum_type));
- // DW.AT.byte_size, DW.FORM.sdata
- const abi_size = ty.abiSize(target);
- try leb128.writeULEB128(dbg_info_buffer.writer(), abi_size);
+ // DW.AT.byte_size, DW.FORM.udata
+ try leb128.writeULEB128(dbg_info_buffer.writer(), ty.abiSize(target));
// DW.AT.name, DW.FORM.string
const enum_name = try ty.nameAllocArena(arena, module);
try dbg_info_buffer.ensureUnusedCapacity(enum_name.len + 1);
@@ -446,7 +446,7 @@ pub const DeclState = struct {
if (is_tagged) {
// DW.AT.structure_type
try dbg_info_buffer.append(@enumToInt(AbbrevKind.struct_type));
- // DW.AT.byte_size, DW.FORM.sdata
+ // DW.AT.byte_size, DW.FORM.udata
try leb128.writeULEB128(dbg_info_buffer.writer(), layout.abi_size);
// DW.AT.name, DW.FORM.string
try dbg_info_buffer.ensureUnusedCapacity(union_name.len + 1);
@@ -463,13 +463,13 @@ pub const DeclState = struct {
const inner_union_index = dbg_info_buffer.items.len;
try dbg_info_buffer.resize(inner_union_index + 4);
try self.addTypeRelocLocal(atom_index, @intCast(u32, inner_union_index), 5);
- // DW.AT.data_member_location, DW.FORM.sdata
+ // DW.AT.data_member_location, DW.FORM.udata
try leb128.writeULEB128(dbg_info_buffer.writer(), payload_offset);
}
// DW.AT.union_type
try dbg_info_buffer.append(@enumToInt(AbbrevKind.union_type));
- // DW.AT.byte_size, DW.FORM.sdata,
+ // DW.AT.byte_size, DW.FORM.udata,
try leb128.writeULEB128(dbg_info_buffer.writer(), layout.payload_size);
// DW.AT.name, DW.FORM.string
if (is_tagged) {
@@ -490,7 +490,7 @@ pub const DeclState = struct {
const index = dbg_info_buffer.items.len;
try dbg_info_buffer.resize(index + 4);
try self.addTypeRelocGlobal(atom_index, field.ty, @intCast(u32, index));
- // DW.AT.data_member_location, DW.FORM.sdata
+ // DW.AT.data_member_location, DW.FORM.udata
try dbg_info_buffer.append(0);
}
// DW.AT.union_type delimit children
@@ -507,7 +507,7 @@ pub const DeclState = struct {
const index = dbg_info_buffer.items.len;
try dbg_info_buffer.resize(index + 4);
try self.addTypeRelocGlobal(atom_index, union_obj.tag_ty, @intCast(u32, index));
- // DW.AT.data_member_location, DW.FORM.sdata
+ // DW.AT.data_member_location, DW.FORM.udata
try leb128.writeULEB128(dbg_info_buffer.writer(), tag_offset);
// DW.AT.structure_type delimit children
@@ -534,7 +534,7 @@ pub const DeclState = struct {
// DW.AT.structure_type
try dbg_info_buffer.append(@enumToInt(AbbrevKind.struct_type));
- // DW.AT.byte_size, DW.FORM.sdata
+ // DW.AT.byte_size, DW.FORM.udata
try leb128.writeULEB128(dbg_info_buffer.writer(), abi_size);
// DW.AT.name, DW.FORM.string
const name = try ty.nameAllocArena(arena, module);
@@ -551,7 +551,7 @@ pub const DeclState = struct {
const index = dbg_info_buffer.items.len;
try dbg_info_buffer.resize(index + 4);
try self.addTypeRelocGlobal(atom_index, payload_ty, @intCast(u32, index));
- // DW.AT.data_member_location, DW.FORM.sdata
+ // DW.AT.data_member_location, DW.FORM.udata
try leb128.writeULEB128(dbg_info_buffer.writer(), payload_off);
}
@@ -566,7 +566,7 @@ pub const DeclState = struct {
const index = dbg_info_buffer.items.len;
try dbg_info_buffer.resize(index + 4);
try self.addTypeRelocGlobal(atom_index, error_ty, @intCast(u32, index));
- // DW.AT.data_member_location, DW.FORM.sdata
+ // DW.AT.data_member_location, DW.FORM.udata
try leb128.writeULEB128(dbg_info_buffer.writer(), error_off);
}
@@ -608,23 +608,44 @@ pub const DeclState = struct {
switch (loc) {
.register => |reg| {
- try dbg_info.ensureUnusedCapacity(3);
+ try dbg_info.ensureUnusedCapacity(4);
dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter));
- dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc
- 1, // ULEB128 dwarf expression length
- reg,
- });
+ // DW.AT.location, DW.FORM.exprloc
+ var expr_len = std.io.countingWriter(std.io.null_writer);
+ if (reg < 32) {
+ expr_len.writer().writeByte(DW.OP.reg0 + reg) catch unreachable;
+ } else {
+ expr_len.writer().writeByte(DW.OP.regx) catch unreachable;
+ leb128.writeULEB128(expr_len.writer(), reg) catch unreachable;
+ }
+ leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable;
+ if (reg < 32) {
+ dbg_info.appendAssumeCapacity(DW.OP.reg0 + reg);
+ } else {
+ dbg_info.appendAssumeCapacity(DW.OP.regx);
+ leb128.writeULEB128(dbg_info.writer(), reg) catch unreachable;
+ }
},
.stack => |info| {
- try dbg_info.ensureUnusedCapacity(8);
+ try dbg_info.ensureUnusedCapacity(9);
dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter));
- const fixup = dbg_info.items.len;
- dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc
- 1, // we will backpatch it after we encode the displacement in LEB128
- info.fp_register, // frame pointer
- });
+ // DW.AT.location, DW.FORM.exprloc
+ var expr_len = std.io.countingWriter(std.io.null_writer);
+ if (info.fp_register < 32) {
+ expr_len.writer().writeByte(DW.OP.breg0 + info.fp_register) catch unreachable;
+ } else {
+ expr_len.writer().writeByte(DW.OP.bregx) catch unreachable;
+ leb128.writeULEB128(expr_len.writer(), info.fp_register) catch unreachable;
+ }
+ leb128.writeILEB128(expr_len.writer(), info.offset) catch unreachable;
+ leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable;
+ if (info.fp_register < 32) {
+ dbg_info.appendAssumeCapacity(DW.OP.breg0 + info.fp_register);
+ } else {
+ dbg_info.appendAssumeCapacity(DW.OP.bregx);
+ leb128.writeULEB128(dbg_info.writer(), info.fp_register) catch unreachable;
+ }
leb128.writeILEB128(dbg_info.writer(), info.offset) catch unreachable;
- dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2);
},
.wasm_local => |value| {
const leb_size = link.File.Wasm.getULEB128Size(value);
@@ -647,8 +668,8 @@ pub const DeclState = struct {
try dbg_info.ensureUnusedCapacity(5 + name_with_null.len);
const index = dbg_info.items.len;
- try dbg_info.resize(index + 4); // dw.at.type, dw.form.ref4
- try self.addTypeRelocGlobal(atom_index, ty, @intCast(u32, index)); // DW.AT.type, DW.FORM.ref4
+ try dbg_info.resize(index + 4); // dw.at.type, dw.form.ref4
+ try self.addTypeRelocGlobal(atom_index, ty, @intCast(u32, index)); // DW.AT.type, DW.FORM.ref4
dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string
}
@@ -670,22 +691,45 @@ pub const DeclState = struct {
switch (loc) {
.register => |reg| {
- try dbg_info.ensureUnusedCapacity(2);
- dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc
- 1, // ULEB128 dwarf expression length
- reg,
- });
+ try dbg_info.ensureUnusedCapacity(4);
+ dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter));
+ // DW.AT.location, DW.FORM.exprloc
+ var expr_len = std.io.countingWriter(std.io.null_writer);
+ if (reg < 32) {
+ expr_len.writer().writeByte(DW.OP.reg0 + reg) catch unreachable;
+ } else {
+ expr_len.writer().writeByte(DW.OP.regx) catch unreachable;
+ leb128.writeULEB128(expr_len.writer(), reg) catch unreachable;
+ }
+ leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable;
+ if (reg < 32) {
+ dbg_info.appendAssumeCapacity(DW.OP.reg0 + reg);
+ } else {
+ dbg_info.appendAssumeCapacity(DW.OP.regx);
+ leb128.writeULEB128(dbg_info.writer(), reg) catch unreachable;
+ }
},
.stack => |info| {
- try dbg_info.ensureUnusedCapacity(7);
- const fixup = dbg_info.items.len;
- dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc
- 1, // we will backpatch it after we encode the displacement in LEB128
- info.fp_register,
- });
+ try dbg_info.ensureUnusedCapacity(9);
+ dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter));
+ // DW.AT.location, DW.FORM.exprloc
+ var expr_len = std.io.countingWriter(std.io.null_writer);
+ if (info.fp_register < 32) {
+ expr_len.writer().writeByte(DW.OP.breg0 + info.fp_register) catch unreachable;
+ } else {
+ expr_len.writer().writeByte(DW.OP.bregx) catch unreachable;
+ leb128.writeULEB128(expr_len.writer(), info.fp_register) catch unreachable;
+ }
+ leb128.writeILEB128(expr_len.writer(), info.offset) catch unreachable;
+ leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable;
+ if (info.fp_register < 32) {
+ dbg_info.appendAssumeCapacity(DW.OP.breg0 + info.fp_register);
+ } else {
+ dbg_info.appendAssumeCapacity(DW.OP.bregx);
+ leb128.writeULEB128(dbg_info.writer(), info.fp_register) catch unreachable;
+ }
leb128.writeILEB128(dbg_info.writer(), info.offset) catch unreachable;
- dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2);
},
.wasm_local => |value| {
@@ -790,7 +834,7 @@ pub const DeclState = struct {
try dbg_info.ensureUnusedCapacity(5 + name_with_null.len);
const index = dbg_info.items.len;
- try dbg_info.resize(index + 4); // dw.at.type, dw.form.ref4
+ try dbg_info.resize(index + 4); // dw.at.type, dw.form.ref4
try self.addTypeRelocGlobal(atom_index, child_ty, @intCast(u32, index));
dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string
}
@@ -993,13 +1037,13 @@ pub fn initDeclState(self: *Dwarf, mod: *Module, decl_index: Module.Decl.Index)
// "relocations" and have to be in this fixed place so that functions can be
// moved in virtual address space.
assert(dbg_info_low_pc_reloc_index == dbg_info_buffer.items.len);
- dbg_info_buffer.items.len += ptr_width_bytes; // DW.AT.low_pc, DW.FORM.addr
+ dbg_info_buffer.items.len += ptr_width_bytes; // DW.AT.low_pc, DW.FORM.addr
assert(self.getRelocDbgInfoSubprogramHighPC() == dbg_info_buffer.items.len);
- dbg_info_buffer.items.len += 4; // DW.AT.high_pc, DW.FORM.data4
+ dbg_info_buffer.items.len += 4; // DW.AT.high_pc, DW.FORM.data4
//
if (fn_ret_has_bits) {
try decl_state.addTypeRelocGlobal(di_atom_index, fn_ret_type, @intCast(u32, dbg_info_buffer.items.len));
- dbg_info_buffer.items.len += 4; // DW.AT.type, DW.FORM.ref4
+ dbg_info_buffer.items.len += 4; // DW.AT.type, DW.FORM.ref4
}
dbg_info_buffer.appendSliceAssumeCapacity(decl_name_with_null); // DW.AT.name, DW.FORM.string
@@ -1619,7 +1663,7 @@ pub fn writeDbgAbbrev(self: *Dwarf) !void {
DW.AT.encoding,
DW.FORM.data1,
DW.AT.byte_size,
- DW.FORM.data1,
+ DW.FORM.udata,
DW.AT.name,
DW.FORM.string,
0,
@@ -1635,7 +1679,7 @@ pub fn writeDbgAbbrev(self: *Dwarf) !void {
DW.TAG.structure_type,
DW.CHILDREN.yes, // header
DW.AT.byte_size,
- DW.FORM.sdata,
+ DW.FORM.udata,
DW.AT.name,
DW.FORM.string,
0,
@@ -1648,14 +1692,14 @@ pub fn writeDbgAbbrev(self: *Dwarf) !void {
DW.AT.type,
DW.FORM.ref4,
DW.AT.data_member_location,
- DW.FORM.sdata,
+ DW.FORM.udata,
0,
0, // table sentinel
@enumToInt(AbbrevKind.enum_type),
DW.TAG.enumeration_type,
DW.CHILDREN.yes, // header
DW.AT.byte_size,
- DW.FORM.sdata,
+ DW.FORM.udata,
DW.AT.name,
DW.FORM.string,
0,
@@ -1673,7 +1717,7 @@ pub fn writeDbgAbbrev(self: *Dwarf) !void {
DW.TAG.union_type,
DW.CHILDREN.yes, // header
DW.AT.byte_size,
- DW.FORM.sdata,
+ DW.FORM.udata,
DW.AT.name,
DW.FORM.string,
0,
@@ -2628,7 +2672,7 @@ fn addDbgInfoErrorSet(
// DW.AT.enumeration_type
try dbg_info_buffer.append(@enumToInt(AbbrevKind.enum_type));
- // DW.AT.byte_size, DW.FORM.sdata
+ // DW.AT.byte_size, DW.FORM.udata
const abi_size = Type.anyerror.abiSize(target);
try leb128.writeULEB128(dbg_info_buffer.writer(), abi_size);
// DW.AT.name, DW.FORM.string
diff --git a/src/type.zig b/src/type.zig
@@ -5433,8 +5433,18 @@ pub const Type = extern union {
}
}
+ // Works for vectors and vectors of integers.
+ pub fn maxInt(ty: Type, arena: Allocator, target: Target) !Value {
+ const scalar = try maxIntScalar(ty.scalarType(), arena, target);
+ if (ty.zigTypeTag() == .Vector and scalar.tag() != .the_only_possible_value) {
+ return Value.Tag.repeated.create(arena, scalar);
+ } else {
+ return scalar;
+ }
+ }
+
/// Asserts that self.zigTypeTag() == .Int.
- pub fn maxInt(self: Type, arena: Allocator, target: Target) !Value {
+ pub fn maxIntScalar(self: Type, arena: Allocator, target: Target) !Value {
assert(self.zigTypeTag() == .Int);
const info = self.intInfo(target);
diff --git a/test/behavior/bugs/1277.zig b/test/behavior/bugs/1277.zig
@@ -14,7 +14,6 @@ fn f() i32 {
test "don't emit an LLVM global for a const function when it's in an optional in a struct" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
diff --git a/test/behavior/bugs/12801-2.zig b/test/behavior/bugs/12801-2.zig
@@ -16,7 +16,6 @@ const Auto = struct {
test {
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
diff --git a/test/behavior/bugs/13366.zig b/test/behavior/bugs/13366.zig
@@ -14,7 +14,6 @@ const Block = struct {
test {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig
@@ -153,7 +153,6 @@ test "@intToFloat(f80)" {
test "@floatToInt" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
diff --git a/test/behavior/field_parent_ptr.zig b/test/behavior/field_parent_ptr.zig
@@ -11,7 +11,6 @@ test "@fieldParentPtr non-first field" {
}
test "@fieldParentPtr first field" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
@@ -51,7 +50,6 @@ fn testParentFieldPtrFirst(a: *const bool) !void {
}
test "@fieldParentPtr untagged union" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
@@ -79,7 +77,6 @@ fn testFieldParentPtrUnion(c: *const i32) !void {
}
test "@fieldParentPtr tagged union" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
@@ -107,7 +104,6 @@ fn testFieldParentPtrTaggedUnion(c: *const i32) !void {
}
test "@fieldParentPtr extern union" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig
@@ -184,7 +184,7 @@ test "more @sqrt f16 tests" {
test "another, possibly redundant @sqrt test" {
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+ if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
try testSqrtLegacy(f64, 12.0);
@@ -532,7 +532,6 @@ fn testFabs() !void {
test "@fabs with vectors" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
diff --git a/test/behavior/fn.zig b/test/behavior/fn.zig
@@ -502,7 +502,6 @@ test "method call with optional pointer first param" {
}
test "using @ptrCast on function pointers" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
diff --git a/test/behavior/int_comparison_elision.zig b/test/behavior/int_comparison_elision.zig
@@ -15,7 +15,6 @@ test "int comparison elision" {
// TODO: support int types > 128 bits wide in other backends
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
diff --git a/test/behavior/math.zig b/test/behavior/math.zig
@@ -61,7 +61,6 @@ fn assertFalse(b: bool) !void {
}
test "@clz" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -142,7 +141,6 @@ fn expectVectorsEqual(a: anytype, b: anytype) !void {
}
test "@ctz" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -1612,7 +1610,6 @@ test "absFloat" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
diff --git a/test/behavior/maximum_minimum.zig b/test/behavior/maximum_minimum.zig
@@ -146,7 +146,6 @@ test "@min/@max more than two arguments" {
test "@min/@max more than two vector arguments" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
diff --git a/test/behavior/memset.zig b/test/behavior/memset.zig
@@ -120,7 +120,6 @@ test "memset with large array element, runtime known" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) return error.SkipZigTest;
const A = [128]u64;
var buf: [5]A = undefined;
@@ -139,7 +138,6 @@ test "memset with large array element, comptime known" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) return error.SkipZigTest;
const A = [128]u64;
var buf: [5]A = undefined;
diff --git a/test/behavior/optional.zig b/test/behavior/optional.zig
@@ -74,7 +74,6 @@ test "optional with void type" {
test "address of unwrap optional" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
@@ -365,7 +364,6 @@ test "optional pointer to zero bit optional payload" {
}
test "optional pointer to zero bit error union payload" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
diff --git a/test/behavior/struct.zig b/test/behavior/struct.zig
@@ -1352,7 +1352,6 @@ test "struct field init value is size of the struct" {
}
test "under-aligned struct field" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
diff --git a/test/behavior/translate_c_macros.zig b/test/behavior/translate_c_macros.zig
@@ -65,7 +65,6 @@ test "cast negative integer to pointer" {
test "casting to union with a macro" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
diff --git a/test/behavior/tuple.zig b/test/behavior/tuple.zig
@@ -367,7 +367,6 @@ test "branching inside tuple literal" {
test "tuple initialized with a runtime known value" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
diff --git a/test/behavior/union.zig b/test/behavior/union.zig
@@ -362,7 +362,6 @@ const MultipleChoice = union(enum(u32)) {
D = 1000,
};
test "simple union(enum(u32))" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -641,7 +640,6 @@ test "tagged union with all void fields but a meaningful tag" {
}
test "union(enum(u32)) with specified and unspecified tag values" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -821,7 +819,6 @@ test "return union init with void payload" {
}
test "@unionInit stored to a const" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
@@ -994,7 +991,6 @@ test "function call result coerces from tagged union to the tag" {
}
test "cast from anonymous struct to union" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
@@ -1028,7 +1024,6 @@ test "cast from anonymous struct to union" {
}
test "cast from pointer to anonymous struct to pointer to union" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
@@ -1169,7 +1164,6 @@ test "union enum type gets a separate scope" {
}
test "global variable struct contains union initialized to non-most-aligned field" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -1199,7 +1193,6 @@ test "global variable struct contains union initialized to non-most-aligned fiel
test "union with no result loc initiated with a runtime value" {
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
@@ -1217,7 +1210,6 @@ test "union with no result loc initiated with a runtime value" {
test "union with a large struct field" {
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
@@ -1288,7 +1280,6 @@ test "extern union most-aligned field is smaller" {
}
test "return an extern union from C calling convention" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -1397,7 +1388,6 @@ test "union and enum field order doesn't match" {
}
test "@unionInit uses tag value instead of field index" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig
@@ -26,7 +26,8 @@ test "implicit cast vector to array - bool" {
test "vector wrap operators" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+ if (builtin.zig_backend == .stage2_x86_64 and
+ !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -119,7 +120,6 @@ test "vector float operators" {
test "vector bit operators" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -1129,7 +1129,6 @@ test "loading the second vector from a slice of vectors" {
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
@setRuntimeSafety(false);
var small_bases = [2]@Vector(2, u8){
@@ -1219,7 +1218,6 @@ test "zero multiplicand" {
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
const zeros = @Vector(2, u32){ 0.0, 0.0 };
var ones = @Vector(2, u32){ 1.0, 1.0 };
@@ -1324,7 +1322,6 @@ test "store to vector in slice" {
test "addition of vectors represented as strings" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
const V = @Vector(3, u8);