x86_64: implement clz, ctz, and popCount
This commit is contained in:
@@ -2597,28 +2597,143 @@ fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
|
||||
|
||||
fn airClz(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
|
||||
const result: MCValue = if (self.liveness.isUnused(inst))
|
||||
.dead
|
||||
else
|
||||
return self.fail("TODO implement airClz for {}", .{self.target.cpu.arch});
|
||||
const result = result: {
|
||||
if (self.liveness.isUnused(inst)) break :result .dead;
|
||||
|
||||
const dst_ty = self.air.typeOfIndex(inst);
|
||||
const src_ty = self.air.typeOf(ty_op.operand);
|
||||
const src_bits = src_ty.bitSize(self.target.*);
|
||||
|
||||
const src_mcv = try self.resolveInst(ty_op.operand);
|
||||
const mat_src_mcv = switch (src_mcv) {
|
||||
.immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
|
||||
else => src_mcv,
|
||||
};
|
||||
const mat_src_lock = switch (mat_src_mcv) {
|
||||
.register => |reg| self.register_manager.lockReg(reg),
|
||||
else => null,
|
||||
};
|
||||
defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const dst_reg = try self.register_manager.allocReg(inst, gp);
|
||||
const dst_mcv = MCValue{ .register = dst_reg };
|
||||
const dst_lock = self.register_manager.lockReg(dst_reg);
|
||||
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
if (Target.x86.featureSetHas(self.target.cpu.features, .lzcnt)) {
|
||||
try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv);
|
||||
const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*));
|
||||
const extra_bits = registerAlias(dst_reg, src_abi_size).bitSize() - src_bits;
|
||||
if (extra_bits > 0) {
|
||||
try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = extra_bits });
|
||||
}
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits });
|
||||
const width_mcv = MCValue{ .register = width_reg };
|
||||
try self.genBinOpMir(.bsr, src_ty, dst_mcv, mat_src_mcv);
|
||||
|
||||
const dst_abi_size = @intCast(u32, @max(dst_ty.abiSize(self.target.*), 2));
|
||||
try self.asmCmovccRegisterRegister(
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
registerAlias(width_reg, dst_abi_size),
|
||||
.z,
|
||||
);
|
||||
|
||||
try self.genBinOpMir(.sub, dst_ty, width_mcv, dst_mcv);
|
||||
break :result width_mcv;
|
||||
};
|
||||
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
|
||||
}
|
||||
|
||||
fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
|
||||
const result: MCValue = if (self.liveness.isUnused(inst))
|
||||
.dead
|
||||
else
|
||||
return self.fail("TODO implement airCtz for {}", .{self.target.cpu.arch});
|
||||
const result = result: {
|
||||
if (self.liveness.isUnused(inst)) break :result .dead;
|
||||
|
||||
const dst_ty = self.air.typeOfIndex(inst);
|
||||
const src_ty = self.air.typeOf(ty_op.operand);
|
||||
const src_bits = src_ty.bitSize(self.target.*);
|
||||
|
||||
const src_mcv = try self.resolveInst(ty_op.operand);
|
||||
const mat_src_mcv = switch (src_mcv) {
|
||||
.immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) },
|
||||
else => src_mcv,
|
||||
};
|
||||
const mat_src_lock = switch (mat_src_mcv) {
|
||||
.register => |reg| self.register_manager.lockReg(reg),
|
||||
else => null,
|
||||
};
|
||||
defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const dst_reg = try self.register_manager.allocReg(inst, gp);
|
||||
const dst_mcv = MCValue{ .register = dst_reg };
|
||||
const dst_lock = self.register_manager.lockReg(dst_reg);
|
||||
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
if (Target.x86.featureSetHas(self.target.cpu.features, .bmi)) {
|
||||
const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*));
|
||||
const extra_bits = registerAlias(dst_reg, src_abi_size).bitSize() - src_bits;
|
||||
const masked_mcv = if (extra_bits > 0) masked: {
|
||||
const mask_mcv = MCValue{
|
||||
.immediate = ((@as(u64, 1) << @intCast(u6, extra_bits)) - 1) << @intCast(u6, src_bits),
|
||||
};
|
||||
const tmp_mcv = tmp: {
|
||||
if (src_mcv.isImmediate() or self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) {
|
||||
break :tmp src_mcv;
|
||||
}
|
||||
try self.genSetReg(src_ty, dst_reg, src_mcv);
|
||||
break :tmp dst_mcv;
|
||||
};
|
||||
try self.genBinOpMir(.@"or", src_ty, tmp_mcv, mask_mcv);
|
||||
break :masked tmp_mcv;
|
||||
} else mat_src_mcv;
|
||||
try self.genBinOpMir(.tzcnt, src_ty, dst_mcv, masked_mcv);
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits });
|
||||
try self.genBinOpMir(.bsf, src_ty, dst_mcv, mat_src_mcv);
|
||||
|
||||
const abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2);
|
||||
try self.asmCmovccRegisterRegister(
|
||||
registerAlias(dst_reg, abi_size),
|
||||
registerAlias(width_reg, abi_size),
|
||||
.z,
|
||||
);
|
||||
|
||||
break :result dst_mcv;
|
||||
};
|
||||
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
|
||||
}
|
||||
|
||||
fn airPopcount(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
|
||||
const result: MCValue = if (self.liveness.isUnused(inst))
|
||||
.dead
|
||||
else
|
||||
return self.fail("TODO implement airPopcount for {}", .{self.target.cpu.arch});
|
||||
const result = result: {
|
||||
if (self.liveness.isUnused(inst)) break :result .dead;
|
||||
|
||||
const op_ty = self.air.typeOf(ty_op.operand);
|
||||
|
||||
if (Target.x86.featureSetHas(self.target.cpu.features, .popcnt)) {
|
||||
const op_mcv = try self.resolveInst(ty_op.operand);
|
||||
const mat_op_mcv = switch (op_mcv) {
|
||||
.immediate => MCValue{ .register = try self.copyToTmpRegister(op_ty, op_mcv) },
|
||||
else => op_mcv,
|
||||
};
|
||||
const mat_op_lock = switch (mat_op_mcv) {
|
||||
.register => |reg| self.register_manager.lockReg(reg),
|
||||
else => null,
|
||||
};
|
||||
defer if (mat_op_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, gp) };
|
||||
try self.genBinOpMir(.popcnt, op_ty, dst_mcv, mat_op_mcv);
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
return self.fail("TODO implement airPopcount for {}", .{op_ty.fmt(self.bin_file.options.module.?)});
|
||||
};
|
||||
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
|
||||
}
|
||||
|
||||
@@ -3491,7 +3606,7 @@ fn genBinOp(
|
||||
if (lhs.isRegister() and self.reuseOperand(inst, lhs_air, 0, lhs)) {
|
||||
break :blk lhs;
|
||||
}
|
||||
if (rhs.isRegister() and is_commutative and self.reuseOperand(inst, rhs_air, 1, rhs)) {
|
||||
if (is_commutative and rhs.isRegister() and self.reuseOperand(inst, rhs_air, 1, rhs)) {
|
||||
flipped = true;
|
||||
break :blk rhs;
|
||||
}
|
||||
|
||||
@@ -73,6 +73,8 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
|
||||
.adc,
|
||||
.add,
|
||||
.@"and",
|
||||
.bsf,
|
||||
.bsr,
|
||||
.call,
|
||||
.cbw,
|
||||
.cwde,
|
||||
@@ -89,12 +91,14 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
|
||||
.int3,
|
||||
.jmp,
|
||||
.lea,
|
||||
.lzcnt,
|
||||
.mov,
|
||||
.movzx,
|
||||
.mul,
|
||||
.nop,
|
||||
.@"or",
|
||||
.pop,
|
||||
.popcnt,
|
||||
.push,
|
||||
.ret,
|
||||
.sal,
|
||||
@@ -105,6 +109,7 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
|
||||
.sub,
|
||||
.syscall,
|
||||
.@"test",
|
||||
.tzcnt,
|
||||
.ud2,
|
||||
.xor,
|
||||
|
||||
|
||||
@@ -307,6 +307,7 @@ pub const Mnemonic = enum {
|
||||
// zig fmt: off
|
||||
// General-purpose
|
||||
adc, add, @"and",
|
||||
bsf, bsr,
|
||||
call, cbw, cdq, cdqe,
|
||||
cmova, cmovae, cmovb, cmovbe, cmovc, cmove, cmovg, cmovge, cmovl, cmovle, cmovna,
|
||||
cmovnae, cmovnb, cmovnbe, cmovnc, cmovne, cmovng, cmovnge, cmovnl, cmovnle, cmovno,
|
||||
@@ -322,12 +323,13 @@ pub const Mnemonic = enum {
|
||||
jmp,
|
||||
lea,
|
||||
lods, lodsb, lodsd, lodsq, lodsw,
|
||||
lzcnt,
|
||||
mov,
|
||||
movs, movsb, movsd, movsq, movsw,
|
||||
movsx, movsxd, movzx, mul,
|
||||
nop,
|
||||
@"or",
|
||||
pop, push,
|
||||
pop, popcnt, push,
|
||||
ret,
|
||||
sal, sar, sbb,
|
||||
scas, scasb, scasd, scasq, scasw,
|
||||
@@ -336,7 +338,7 @@ pub const Mnemonic = enum {
|
||||
setnb, setnbe, setnc, setne, setng, setnge, setnl, setnle, setno, setnp, setns,
|
||||
setnz, seto, setp, setpe, setpo, sets, setz,
|
||||
stos, stosb, stosd, stosq, stosw,
|
||||
@"test",
|
||||
@"test", tzcnt,
|
||||
ud2,
|
||||
xor,
|
||||
// SSE
|
||||
|
||||
@@ -38,6 +38,10 @@ pub const Inst = struct {
|
||||
add,
|
||||
/// Logical and
|
||||
@"and",
|
||||
/// Bit scan forward
|
||||
bsf,
|
||||
/// Bit scan reverse
|
||||
bsr,
|
||||
/// Call
|
||||
call,
|
||||
/// Convert byte to word
|
||||
@@ -70,6 +74,8 @@ pub const Inst = struct {
|
||||
jmp,
|
||||
/// Load effective address
|
||||
lea,
|
||||
/// Count the number of leading zero bits
|
||||
lzcnt,
|
||||
/// Move
|
||||
mov,
|
||||
/// Move with sign extension
|
||||
@@ -84,6 +90,8 @@ pub const Inst = struct {
|
||||
@"or",
|
||||
/// Pop
|
||||
pop,
|
||||
/// Return the count of number of bits set to 1
|
||||
popcnt,
|
||||
/// Push
|
||||
push,
|
||||
/// Return
|
||||
@@ -104,6 +112,8 @@ pub const Inst = struct {
|
||||
syscall,
|
||||
/// Test condition
|
||||
@"test",
|
||||
/// Count the number of trailing zero bits
|
||||
tzcnt,
|
||||
/// Undefined instruction
|
||||
ud2,
|
||||
/// Logical exclusive-or
|
||||
|
||||
@@ -81,6 +81,14 @@ pub const table = &[_]Entry{
|
||||
.{ .@"and", .rm, .r32, .rm32, .none, .none, &.{ 0x23 }, 0, .none },
|
||||
.{ .@"and", .rm, .r64, .rm64, .none, .none, &.{ 0x23 }, 0, .long },
|
||||
|
||||
.{ .bsf, .rm, .r16, .rm16, .none, .none, &.{ 0x0f, 0xbc }, 0, .none },
|
||||
.{ .bsf, .rm, .r32, .rm32, .none, .none, &.{ 0x0f, 0xbc }, 0, .none },
|
||||
.{ .bsf, .rm, .r64, .rm64, .none, .none, &.{ 0x0f, 0xbc }, 0, .long },
|
||||
|
||||
.{ .bsr, .rm, .r16, .rm16, .none, .none, &.{ 0x0f, 0xbd }, 0, .none },
|
||||
.{ .bsr, .rm, .r32, .rm32, .none, .none, &.{ 0x0f, 0xbd }, 0, .none },
|
||||
.{ .bsr, .rm, .r64, .rm64, .none, .none, &.{ 0x0f, 0xbd }, 0, .long },
|
||||
|
||||
// This is M encoding according to Intel, but D makes more sense here.
|
||||
.{ .call, .d, .rel32, .none, .none, .none, &.{ 0xe8 }, 0, .none },
|
||||
.{ .call, .m, .rm64, .none, .none, .none, &.{ 0xff }, 2, .none },
|
||||
@@ -301,6 +309,10 @@ pub const table = &[_]Entry{
|
||||
.{ .lodsd, .np, .none, .none, .none, .none, &.{ 0xad }, 0, .none },
|
||||
.{ .lodsq, .np, .none, .none, .none, .none, &.{ 0xad }, 0, .long },
|
||||
|
||||
.{ .lzcnt, .rm, .r16, .rm16, .none, .none, &.{ 0xf3, 0x0f, 0xbd }, 0, .none },
|
||||
.{ .lzcnt, .rm, .r32, .rm32, .none, .none, &.{ 0xf3, 0x0f, 0xbd }, 0, .none },
|
||||
.{ .lzcnt, .rm, .r64, .rm64, .none, .none, &.{ 0xf3, 0x0f, 0xbd }, 0, .long },
|
||||
|
||||
.{ .mov, .mr, .rm8, .r8, .none, .none, &.{ 0x88 }, 0, .none },
|
||||
.{ .mov, .mr, .rm8, .r8, .none, .none, &.{ 0x88 }, 0, .rex },
|
||||
.{ .mov, .mr, .rm16, .r16, .none, .none, &.{ 0x89 }, 0, .none },
|
||||
@@ -397,6 +409,10 @@ pub const table = &[_]Entry{
|
||||
.{ .pop, .m, .rm16, .none, .none, .none, &.{ 0x8f }, 0, .none },
|
||||
.{ .pop, .m, .rm64, .none, .none, .none, &.{ 0x8f }, 0, .none },
|
||||
|
||||
.{ .popcnt, .rm, .r16, .rm16, .none, .none, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none },
|
||||
.{ .popcnt, .rm, .r32, .rm32, .none, .none, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none },
|
||||
.{ .popcnt, .rm, .r64, .rm64, .none, .none, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long },
|
||||
|
||||
.{ .push, .o, .r16, .none, .none, .none, &.{ 0x50 }, 0, .none },
|
||||
.{ .push, .o, .r64, .none, .none, .none, &.{ 0x50 }, 0, .none },
|
||||
.{ .push, .m, .rm16, .none, .none, .none, &.{ 0xff }, 6, .none },
|
||||
@@ -596,8 +612,8 @@ pub const table = &[_]Entry{
|
||||
.{ .sub, .rm, .r32, .rm32, .none, .none, &.{ 0x2b }, 0, .none },
|
||||
.{ .sub, .rm, .r64, .rm64, .none, .none, &.{ 0x2b }, 0, .long },
|
||||
|
||||
.{ .syscall, .np, .none, .none, .none, .none, &.{ 0x0f, 0x05 }, 0, .none },
|
||||
|
||||
.{ .syscall, .np, .none, .none, .none, .none, &.{ 0x0f, 0x05 }, 0, .none }
|
||||
,
|
||||
.{ .@"test", .zi, .al, .imm8, .none, .none, &.{ 0xa8 }, 0, .none },
|
||||
.{ .@"test", .zi, .ax, .imm16, .none, .none, &.{ 0xa9 }, 0, .none },
|
||||
.{ .@"test", .zi, .eax, .imm32, .none, .none, &.{ 0xa9 }, 0, .none },
|
||||
@@ -613,6 +629,10 @@ pub const table = &[_]Entry{
|
||||
.{ .@"test", .mr, .rm32, .r32, .none, .none, &.{ 0x85 }, 0, .none },
|
||||
.{ .@"test", .mr, .rm64, .r64, .none, .none, &.{ 0x85 }, 0, .long },
|
||||
|
||||
.{ .tzcnt, .rm, .r16, .rm16, .none, .none, &.{ 0xf3, 0x0f, 0xbc }, 0, .none },
|
||||
.{ .tzcnt, .rm, .r32, .rm32, .none, .none, &.{ 0xf3, 0x0f, 0xbc }, 0, .none },
|
||||
.{ .tzcnt, .rm, .r64, .rm64, .none, .none, &.{ 0xf3, 0x0f, 0xbc }, 0, .long },
|
||||
|
||||
.{ .ud2, .np, .none, .none, .none, .none, &.{ 0x0f, 0x0b }, 0, .none },
|
||||
|
||||
.{ .xor, .zi, .al, .imm8, .none, .none, &.{ 0x34 }, 0, .none },
|
||||
|
||||
Reference in New Issue
Block a user