x86_64: implement @ctz and @clz for u128

This commit is contained in:
Jacob Young
2023-05-04 03:36:04 -04:00
parent 9bea854dc2
commit 1a261917ce
3 changed files with 74 additions and 25 deletions

View File

@@ -3798,19 +3798,38 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void {
const dst_reg = try self.register_manager.allocReg(inst, gp);
const dst_mcv = MCValue{ .register = dst_reg };
const dst_lock = self.register_manager.lockReg(dst_reg);
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
defer self.register_manager.unlockReg(dst_lock);
const src_bits = src_ty.bitSize(self.target.*);
if (Target.x86.featureSetHas(self.target.cpu.features, .lzcnt)) {
try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv);
const extra_bits = self.regExtraBits(src_ty);
if (extra_bits > 0) {
try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = extra_bits });
}
if (src_bits <= 64) {
try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv);
const extra_bits = self.regExtraBits(src_ty);
if (extra_bits > 0) {
try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = extra_bits });
}
} else if (src_bits <= 128) {
const tmp_reg = try self.register_manager.allocReg(null, gp);
const tmp_mcv = MCValue{ .register = tmp_reg };
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
try self.genBinOpMir(.lzcnt, Type.u64, dst_mcv, mat_src_mcv);
try self.genBinOpMir(.add, dst_ty, dst_mcv, .{ .immediate = 64 });
try self.genBinOpMir(.lzcnt, Type.u64, tmp_mcv, mat_src_mcv.address().offset(8).deref());
try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc);
if (src_bits < 128) {
try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = 128 - src_bits });
}
} else return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
break :result dst_mcv;
}
const src_bits = src_ty.bitSize(self.target.*);
if (src_bits > 64)
return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
if (math.isPowerOfTwo(src_bits)) {
const imm_reg = try self.copyToTmpRegister(dst_ty, .{
.immediate = src_bits ^ (src_bits - 1),
@@ -3870,24 +3889,52 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
if (Target.x86.featureSetHas(self.target.cpu.features, .bmi)) {
const extra_bits = self.regExtraBits(src_ty);
const masked_mcv = if (extra_bits > 0) masked: {
const mask_mcv = MCValue{
.immediate = ((@as(u64, 1) << @intCast(u6, extra_bits)) - 1) <<
@intCast(u6, src_bits),
};
const tmp_mcv = tmp: {
if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) break :tmp src_mcv;
try self.genSetReg(dst_reg, src_ty, src_mcv);
break :tmp dst_mcv;
};
try self.genBinOpMir(.@"or", src_ty, tmp_mcv, mask_mcv);
break :masked tmp_mcv;
} else mat_src_mcv;
try self.genBinOpMir(.tzcnt, src_ty, dst_mcv, masked_mcv);
if (src_bits <= 64) {
const extra_bits = self.regExtraBits(src_ty);
const masked_mcv = if (extra_bits > 0) masked: {
const tmp_mcv = tmp: {
if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0))
break :tmp src_mcv;
try self.genSetReg(dst_reg, src_ty, src_mcv);
break :tmp dst_mcv;
};
try self.genBinOpMir(
.@"or",
src_ty,
tmp_mcv,
.{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - extra_bits)) <<
@intCast(u6, src_bits) },
);
break :masked tmp_mcv;
} else mat_src_mcv;
try self.genBinOpMir(.tzcnt, src_ty, dst_mcv, masked_mcv);
} else if (src_bits <= 128) {
const tmp_reg = try self.register_manager.allocReg(null, gp);
const tmp_mcv = MCValue{ .register = tmp_reg };
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
defer self.register_manager.unlockReg(tmp_lock);
const masked_mcv = if (src_bits < 128) masked: {
try self.genCopy(Type.u64, dst_mcv, mat_src_mcv.address().offset(8).deref());
try self.genBinOpMir(
.@"or",
Type.u64,
dst_mcv,
.{ .immediate = @as(u64, math.maxInt(u64)) << @intCast(u6, src_bits - 64) },
);
break :masked dst_mcv;
} else mat_src_mcv.address().offset(8).deref();
try self.genBinOpMir(.tzcnt, Type.u64, dst_mcv, masked_mcv);
try self.genBinOpMir(.add, dst_ty, dst_mcv, .{ .immediate = 64 });
try self.genBinOpMir(.tzcnt, Type.u64, tmp_mcv, mat_src_mcv);
try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc);
} else return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
break :result dst_mcv;
}
if (src_bits > 64)
return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)});
const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits });
try self.genBinOpMir(.bsf, src_ty, dst_mcv, mat_src_mcv);

View File

@@ -9,7 +9,8 @@ fn ctz(x: anytype) usize {
test "fixed" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64 and
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .bmi)) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO

View File

@@ -77,7 +77,8 @@ fn testClz() !void {
}
test "@clz big ints" {
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64 and
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .lzcnt)) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO