udivmodei4.zig (5345B) - Raw
1 const std = @import("std"); 2 const builtin = @import("builtin"); 3 const common = @import("common.zig"); 4 const shr = std.math.shr; 5 const shl = std.math.shl; 6 7 const max_limbs = std.math.divCeil(usize, 65535, 32) catch unreachable; // max supported type is u65535 8 9 comptime { 10 @export(&__udivei4, .{ .name = "__udivei4", .linkage = common.linkage, .visibility = common.visibility }); 11 @export(&__umodei4, .{ .name = "__umodei4", .linkage = common.linkage, .visibility = common.visibility }); 12 } 13 14 const endian = builtin.cpu.arch.endian(); 15 16 /// Get the value of a limb. 17 inline fn limb(x: []const u32, i: usize) u32 { 18 return if (endian == .little) x[i] else x[x.len - 1 - i]; 19 } 20 21 /// Change the value of a limb. 22 inline fn limb_set(x: []u32, i: usize, v: u32) void { 23 if (endian == .little) { 24 x[i] = v; 25 } else { 26 x[x.len - 1 - i] = v; 27 } 28 } 29 30 /// Uses Knuth's Algorithm D, 4.3.1, p. 272. 31 pub fn divmod(q: ?[]u32, r: ?[]u32, u: []const u32, v: []const u32) !void { 32 if (q) |q_| @memset(q_[0..], 0); 33 if (r) |r_| @memset(r_[0..], 0); 34 35 if (u.len == 0 or v.len == 0) return error.DivisionByZero; 36 37 var m = u.len - 1; 38 var n = v.len - 1; 39 while (limb(u, m) == 0) : (m -= 1) { 40 if (m == 0) return; 41 } 42 while (limb(v, n) == 0) : (n -= 1) { 43 if (n == 0) return error.DivisionByZero; 44 } 45 46 if (n > m) { 47 if (r) |r_| @memcpy(r_[0..u.len], u); 48 return; 49 } 50 51 const s = @clz(limb(v, n)); 52 53 var vn: [max_limbs]u32 = undefined; 54 var i = n; 55 while (i > 0) : (i -= 1) { 56 limb_set(&vn, i, shl(u32, limb(v, i), s) | shr(u32, limb(v, i - 1), 32 - s)); 57 } 58 limb_set(&vn, 0, shl(u32, limb(v, 0), s)); 59 60 var un: [max_limbs + 1]u32 = undefined; 61 limb_set(&un, m + 1, shr(u32, limb(u, m), 32 - s)); 62 i = m; 63 while (i > 0) : (i -= 1) { 64 limb_set(&un, i, shl(u32, limb(u, i), s) | shr(u32, limb(u, i - 1), 32 - s)); 65 } 66 limb_set(&un, 0, shl(u32, limb(u, 0), s)); 67 68 var j = m - n; 69 while (true) : (j -= 1) { 70 const uu = (@as(u64, limb(&un, j + n + 1)) << 32) + limb(&un, j + n); 71 var qhat = uu / limb(&vn, n); 72 var rhat = uu % limb(&vn, n); 73 74 while (true) { 75 if (qhat >= (1 << 32) or (n > 0 and qhat * limb(&vn, n - 1) > (rhat << 32) + limb(&un, j + n - 1))) { 76 qhat -= 1; 77 rhat += limb(&vn, n); 78 if (rhat < (1 << 32)) continue; 79 } 80 break; 81 } 82 var carry: i64 = 0; 83 i = 0; 84 while (i <= n) : (i += 1) { 85 const p = qhat * limb(&vn, i); 86 const t = limb(&un, i + j) - carry - @as(u32, @truncate(p)); 87 limb_set(&un, i + j, @as(u32, @truncate(@as(u64, @bitCast(t))))); 88 carry = @as(i64, @intCast(p >> 32)) - @as(i64, @intCast(t >> 32)); 89 } 90 const t = limb(&un, j + n + 1) -% carry; 91 limb_set(&un, j + n + 1, @as(u32, @truncate(@as(u64, @bitCast(t))))); 92 if (q) |q_| limb_set(q_, j, @as(u32, @truncate(qhat))); 93 if (t < 0) { 94 if (q) |q_| limb_set(q_, j, limb(q_, j) - 1); 95 var carry2: u64 = 0; 96 i = 0; 97 while (i <= n) : (i += 1) { 98 const t2 = @as(u64, limb(&un, i + j)) + @as(u64, limb(&vn, i)) + carry2; 99 limb_set(&un, i + j, @as(u32, @truncate(t2))); 100 carry2 = t2 >> 32; 101 } 102 limb_set(&un, j + n + 1, @as(u32, @truncate(limb(&un, j + n + 1) + carry2))); 103 } 104 if (j == 0) break; 105 } 106 if (r) |r_| { 107 i = 0; 108 while (i <= n) : (i += 1) { 109 limb_set(r_, i, shr(u32, limb(&un, i), s) | shl(u32, limb(&un, i + 1), 32 - s)); 110 } 111 limb_set(r_, n, shr(u32, limb(&un, n), s)); 112 } 113 } 114 115 pub fn __udivei4(q_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) callconv(.c) void { 116 @setRuntimeSafety(common.test_safety); 117 const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits)); 118 const q: []u32 = @ptrCast(@alignCast(q_p[0..byte_size])); 119 const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size])); 120 const v: []const u32 = @ptrCast(@alignCast(v_p[0..byte_size])); 121 @call(.always_inline, divmod, .{ q, null, u, v }) catch unreachable; 122 } 123 124 pub fn __umodei4(r_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) callconv(.c) void { 125 @setRuntimeSafety(common.test_safety); 126 const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits)); 127 const r: []u32 = @ptrCast(@alignCast(r_p[0..byte_size])); 128 const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size])); 129 const v: []const u32 = @ptrCast(@alignCast(v_p[0..byte_size])); 130 @call(.always_inline, divmod, .{ null, r, u, v }) catch unreachable; 131 } 132 133 test "__udivei4/__umodei4" { 134 if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; 135 if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; 136 if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; 137 138 const RndGen = std.Random.DefaultPrng; 139 var rnd = RndGen.init(42); 140 var i: usize = 10000; 141 while (i > 0) : (i -= 1) { 142 const u = rnd.random().int(u1000); 143 const v = 1 + rnd.random().int(u1200); 144 const q = u / v; 145 const r = u % v; 146 const z = q * v + r; 147 try std.testing.expect(z == u); 148 } 149 }