diff --git a/lib/std/math/big.zig b/lib/std/math/big.zig index e7f8a7fb34..c7fc0b17f5 100644 --- a/lib/std/math/big.zig +++ b/lib/std/math/big.zig @@ -7,6 +7,7 @@ pub const Limb = usize; const limb_info = @typeInfo(Limb).Int; pub const SignedLimb = std.meta.Int(.signed, limb_info.bits); pub const DoubleLimb = std.meta.Int(.unsigned, 2 * limb_info.bits); +pub const HalfLimb = std.meta.Int(.unsigned, limb_info.bits / 2); pub const SignedDoubleLimb = std.meta.Int(.signed, 2 * limb_info.bits); pub const Log2Limb = std.math.Log2Int(Limb); diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index ec0143a3d7..87a62bf66c 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -2,6 +2,8 @@ const std = @import("../../std.zig"); const math = std.math; const Limb = std.math.big.Limb; const limb_bits = @typeInfo(Limb).Int.bits; +const HalfLimb = std.math.big.HalfLimb; +const half_limb_bits = @typeInfo(HalfLimb).Int.bits; const DoubleLimb = std.math.big.DoubleLimb; const SignedDoubleLimb = std.math.big.SignedDoubleLimb; const Log2Limb = std.math.big.Log2Limb; @@ -1335,7 +1337,16 @@ pub const Mutable = struct { const xy_trailing = math.min(x_trailing, y_trailing); if (y.len - xy_trailing == 1) { - lldiv1(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], y.limbs[y.len - 1]); + const divisor = y.limbs[y.len - 1]; + + // Optimization for small divisor. By using a half limb we can avoid requiring DoubleLimb + // divisions in the hot code path. This may often require compiler_rt software-emulation. + if (divisor < maxInt(HalfLimb)) { + lldiv0p5(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], @intCast(HalfLimb, divisor)); + } else { + lldiv1(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], divisor); + } + q.normalize(x.len - xy_trailing); q.positive = q_positive; @@ -1939,7 +1950,8 @@ pub const Const = struct { } } else { // Non power-of-two: batch divisions per word size. - const digits_per_limb = math.log(Limb, base, maxInt(Limb)); + // We use a HalfLimb here so the division uses the faster lldiv0p5 over lldiv1 codepath. + const digits_per_limb = math.log(HalfLimb, base, maxInt(HalfLimb)); var limb_base: Limb = 1; var j: usize = 0; while (j < digits_per_limb) : (j += 1) { @@ -3208,6 +3220,30 @@ fn lldiv1(quo: []Limb, rem: *Limb, a: []const Limb, b: Limb) void { } } +fn lldiv0p5(quo: []Limb, rem: *Limb, a: []const Limb, b: HalfLimb) void { + @setRuntimeSafety(debug_safety); + assert(a.len > 1 or a[0] >= b); + assert(quo.len >= a.len); + + rem.* = 0; + for (a) |_, ri| { + const i = a.len - ri - 1; + const ai_high = a[i] >> half_limb_bits; + const ai_low = a[i] & ((1 << half_limb_bits) - 1); + + // Split the division into two divisions acting on half a limb each. Carry remainder. + const ai_high_with_carry = (rem.* << half_limb_bits) | ai_high; + const ai_high_quo = ai_high_with_carry / b; + rem.* = ai_high_with_carry % b; + + const ai_low_with_carry = (rem.* << half_limb_bits) | ai_low; + const ai_low_quo = ai_low_with_carry / b; + rem.* = ai_low_with_carry % b; + + quo[i] = (ai_high_quo << half_limb_bits) | ai_low_quo; + } +} + fn llshl(r: []Limb, a: []const Limb, shift: usize) void { @setRuntimeSafety(debug_safety); assert(a.len >= 1); diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig index 4c1d12116e..70a9b97a38 100644 --- a/lib/std/math/big/int_test.zig +++ b/lib/std/math/big/int_test.zig @@ -1064,7 +1064,7 @@ test "big.int mulWrap large" { try testing.expect(b.eq(c)); } -test "big.int div single-single no rem" { +test "big.int div single-half no rem" { var a = try Managed.initSet(testing.allocator, 50); defer a.deinit(); var b = try Managed.initSet(testing.allocator, 5); @@ -1080,7 +1080,7 @@ test "big.int div single-single no rem" { try testing.expect((try r.to(u32)) == 0); } -test "big.int div single-single with rem" { +test "big.int div single-half with rem" { var a = try Managed.initSet(testing.allocator, 49); defer a.deinit(); var b = try Managed.initSet(testing.allocator, 5); @@ -1096,6 +1096,39 @@ test "big.int div single-single with rem" { try testing.expect((try r.to(u32)) == 4); } +test "big.int div single-single no rem" { + // assumes usize is <= 64 bits. + var a = try Managed.initSet(testing.allocator, 1 << 52); + defer a.deinit(); + var b = try Managed.initSet(testing.allocator, 1 << 35); + defer b.deinit(); + + var q = try Managed.init(testing.allocator); + defer q.deinit(); + var r = try Managed.init(testing.allocator); + defer r.deinit(); + try Managed.divTrunc(&q, &r, a.toConst(), b.toConst()); + + try testing.expect((try q.to(u32)) == 131072); + try testing.expect((try r.to(u32)) == 0); +} + +test "big.int div single-single with rem" { + var a = try Managed.initSet(testing.allocator, (1 << 52) | (1 << 33)); + defer a.deinit(); + var b = try Managed.initSet(testing.allocator, (1 << 35)); + defer b.deinit(); + + var q = try Managed.init(testing.allocator); + defer q.deinit(); + var r = try Managed.init(testing.allocator); + defer r.deinit(); + try Managed.divTrunc(&q, &r, a.toConst(), b.toConst()); + + try testing.expect((try q.to(u64)) == 131072); + try testing.expect((try r.to(u64)) == 8589934592); +} + test "big.int div multi-single no rem" { const op1 = 0xffffeeeeddddcccc; const op2 = 34;