diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e8c66d374..cfadaf480c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -472,6 +472,7 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/lib/compiler_rt/truncxfhf2.zig" "${CMAKE_SOURCE_DIR}/lib/compiler_rt/truncxfsf2.zig" "${CMAKE_SOURCE_DIR}/lib/compiler_rt/udivmod.zig" + "${CMAKE_SOURCE_DIR}/lib/compiler_rt/udivmodei4.zig" "${CMAKE_SOURCE_DIR}/lib/compiler_rt/udivmodti4.zig" "${CMAKE_SOURCE_DIR}/lib/compiler_rt/udivti3.zig" "${CMAKE_SOURCE_DIR}/lib/compiler_rt/umodti3.zig" diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig index 5cce3daf29..96fdc0d4ab 100644 --- a/lib/compiler_rt.zig +++ b/lib/compiler_rt.zig @@ -112,6 +112,7 @@ comptime { _ = @import("compiler_rt/modti3.zig"); _ = @import("compiler_rt/multi3.zig"); _ = @import("compiler_rt/udivti3.zig"); + _ = @import("compiler_rt/udivmodei4.zig"); _ = @import("compiler_rt/udivmodti4.zig"); _ = @import("compiler_rt/umodti3.zig"); diff --git a/lib/compiler_rt/udivmodei4.zig b/lib/compiler_rt/udivmodei4.zig new file mode 100644 index 0000000000..3f541f00e5 --- /dev/null +++ b/lib/compiler_rt/udivmodei4.zig @@ -0,0 +1,143 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const common = @import("common.zig"); +const shr = std.math.shr; +const shl = std.math.shl; + +const max_limbs = std.math.divCeil(usize, 65535, 32) catch unreachable; // max supported type is u65535 + +comptime { + @export(__udivei4, .{ .name = "__udivei4", .linkage = common.linkage }); + @export(__umodei4, .{ .name = "__umodei4", .linkage = common.linkage }); +} + +const endian = builtin.cpu.arch.endian(); + +/// Get the value of a limb. +inline fn limb(x: []const u32, i: usize) u32 { + return if (endian == .Little) x[i] else x[x.len - 1 - i]; +} + +/// Change the value of a limb. +inline fn limb_set(x: []u32, i: usize, v: u32) void { + if (endian == .Little) { + x[i] = v; + } else { + x[x.len - 1 - i] = v; + } +} + +// Uses Knuth's Algorithm D, 4.3.1, p. 272. +fn divmod(q: ?[]u32, r: ?[]u32, u: []const u32, v: []const u32) !void { + if (q) |q_| std.mem.set(u32, q_[0..], 0); + if (r) |r_| std.mem.set(u32, r_[0..], 0); + + if (u.len == 0 or v.len == 0) return error.DivisionByZero; + + var m = u.len - 1; + var n = v.len - 1; + while (limb(u, m) == 0) : (m -= 1) { + if (m == 0) return; + } + while (limb(v, n) == 0) : (n -= 1) { + if (n == 0) return error.DivisionByZero; + } + + if (n > m) { + if (r) |r_| std.mem.copy(u32, r_[0..], u[0..]); + return; + } + + const s = @clz(limb(v, n)); + + var vn: [max_limbs]u32 = undefined; + var i = n; + while (i > 0) : (i -= 1) { + limb_set(&vn, i, shl(u32, limb(v, i), s) | shr(u32, limb(v, i - 1), 32 - s)); + } + limb_set(&vn, 0, shl(u32, limb(v, 0), s)); + + var un: [max_limbs + 1]u32 = undefined; + limb_set(&un, m + 1, shr(u32, limb(u, m), 32 - s)); + i = m; + while (i > 0) : (i -= 1) { + limb_set(&un, i, shl(u32, limb(u, i), s) | shr(u32, limb(u, i - 1), 32 - s)); + } + limb_set(&un, 0, shl(u32, limb(u, 0), s)); + + var j = m - n; + while (true) : (j -= 1) { + const uu = (@as(u64, limb(&un, j + n + 1)) << 32) + limb(&un, j + n); + var qhat = uu / limb(&vn, n); + var rhat = uu % limb(&vn, n); + + while (true) { + if (qhat >= (1 << 32) or (n > 0 and qhat * limb(&vn, n - 1) > (rhat << 32) + limb(&un, j + n - 1))) { + qhat -= 1; + rhat += limb(&vn, n); + if (rhat < (1 << 32)) continue; + } + break; + } + var carry: u64 = 0; + i = 0; + while (i <= n) : (i += 1) { + const p = qhat * limb(&vn, i); + const t = limb(&un, i + j) - carry - @truncate(u32, p); + limb_set(&un, i + j, @truncate(u32, t)); + carry = @intCast(u64, p >> 32) - @intCast(u64, t >> 32); + } + const t = limb(&un, j + n + 1) - carry; + limb_set(&un, j + n + 1, @truncate(u32, t)); + if (q) |q_| limb_set(q_, j, @truncate(u32, qhat)); + if (t < 0) { + if (q) |q_| limb_set(q_, j, limb(q_, j) - 1); + var carry2: u64 = 0; + i = 0; + while (i <= n) : (i += 1) { + const t2 = @as(u64, limb(&un, i + j)) + @as(u64, limb(&vn, i)) + carry2; + limb_set(&un, i + j, @truncate(u32, t2)); + carry2 = t2 >> 32; + } + limb_set(un, j + n + 1, @truncate(u32, limb(&un, j + n + 1) + carry2)); + } + if (j == 0) break; + } + if (r) |r_| { + i = 0; + while (i <= n) : (i += 1) { + limb_set(r_, i, shr(u32, limb(&un, i), s) | shl(u32, limb(&un, i + 1), 32 - s)); + } + limb_set(r_, n, shr(u32, limb(&un, n), s)); + } +} + +pub fn __udivei4(r_q: [*c]u32, u_p: [*c]const u32, v_p: [*c]const u32, bits: usize) callconv(.C) void { + @setRuntimeSafety(builtin.is_test); + const u = u_p[0 .. bits / 32]; + const v = v_p[0 .. bits / 32]; + var q = r_q[0 .. bits / 32]; + @call(.always_inline, divmod, .{ q, null, u, v }) catch unreachable; +} + +pub fn __umodei4(r_p: [*c]u32, u_p: [*c]const u32, v_p: [*c]const u32, bits: usize) callconv(.C) void { + @setRuntimeSafety(builtin.is_test); + const u = u_p[0 .. bits / 32]; + const v = v_p[0 .. bits / 32]; + var r = r_p[0 .. bits / 32]; + @call(.always_inline, divmod, .{ null, r, u, v }) catch unreachable; +} + +test "__udivei4/__umodei4" { + const RndGen = std.rand.DefaultPrng; + var rnd = RndGen.init(42); + var i: usize = 10000; + while (i > 0) : (i -= 1) { + const u = rnd.random().int(u1000); + const v = 1 + rnd.random().int(u1200); + const q = u / v; + const r = u % v; + const z = q * v + r; + try std.testing.expect(z == u); + } +} diff --git a/tools/gen_stubs.zig b/tools/gen_stubs.zig index 2b21731224..83fd12e7bd 100644 --- a/tools/gen_stubs.zig +++ b/tools/gen_stubs.zig @@ -865,12 +865,14 @@ const blacklisted_symbols = [_][]const u8{ "__ucmpsi2", "__ucmpti2", "__udivdi3", + "__udivei4", "__udivmoddi4", "__udivmodsi4", "__udivmodti4", "__udivsi3", "__udivti3", "__umoddi3", + "__umodei4", "__umodsi3", "__umodti3", "__unorddf2",