zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

udivmod.zig (4285B) - Raw


      1 const std = @import("std");
      2 const builtin = @import("builtin");
      3 const Log2Int = std.math.Log2Int;
      4 const common = @import("common.zig");
      5 const HalveInt = common.HalveInt;
      6 
      7 const lo = switch (builtin.cpu.arch.endian()) {
      8     .big => 1,
      9     .little => 0,
     10 };
     11 const hi = 1 - lo;
     12 
     13 // Let _u1 and _u0 be the high and low limbs of U respectively.
     14 // Returns U / v_ and sets r = U % v_.
     15 fn divwide_generic(comptime T: type, _u1: T, _u0: T, v_: T, r: *T) T {
     16     const HalfT = HalveInt(T, false).HalfT;
     17     @setRuntimeSafety(common.test_safety);
     18     var v = v_;
     19 
     20     const b = @as(T, 1) << (@bitSizeOf(T) / 2);
     21     var un64: T = undefined;
     22     var un10: T = undefined;
     23 
     24     const s: Log2Int(T) = @intCast(@clz(v));
     25     if (s > 0) {
     26         // Normalize divisor
     27         v <<= s;
     28         un64 = (_u1 << s) | (_u0 >> @intCast((@bitSizeOf(T) - @as(T, @intCast(s)))));
     29         un10 = _u0 << s;
     30     } else {
     31         // Avoid undefined behavior of (u0 >> @bitSizeOf(T))
     32         un64 = _u1;
     33         un10 = _u0;
     34     }
     35 
     36     // Break divisor up into two 32-bit digits
     37     const vn1 = v >> (@bitSizeOf(T) / 2);
     38     const vn0 = v & std.math.maxInt(HalfT);
     39 
     40     // Break right half of dividend into two digits
     41     const un1 = un10 >> (@bitSizeOf(T) / 2);
     42     const un0 = un10 & std.math.maxInt(HalfT);
     43 
     44     // Compute the first quotient digit, q1
     45     var q1 = un64 / vn1;
     46     var rhat = un64 -% q1 *% vn1;
     47 
     48     // q1 has at most error 2. No more than 2 iterations
     49     while (q1 >= b or q1 * vn0 > b * rhat + un1) {
     50         q1 -= 1;
     51         rhat += vn1;
     52         if (rhat >= b) break;
     53     }
     54 
     55     const un21 = un64 *% b +% un1 -% q1 *% v;
     56 
     57     // Compute the second quotient digit
     58     var q0 = un21 / vn1;
     59     rhat = un21 -% q0 *% vn1;
     60 
     61     // q0 has at most error 2. No more than 2 iterations.
     62     while (q0 >= b or q0 * vn0 > b * rhat + un0) {
     63         q0 -= 1;
     64         rhat += vn1;
     65         if (rhat >= b) break;
     66     }
     67 
     68     r.* = (un21 *% b +% un0 -% q0 *% v) >> s;
     69     return q1 *% b +% q0;
     70 }
     71 
     72 fn divwide(comptime T: type, _u1: T, _u0: T, v: T, r: *T) T {
     73     @setRuntimeSafety(common.test_safety);
     74     if (T == u64 and builtin.target.cpu.arch == .x86_64 and builtin.target.os.tag != .windows) {
     75         var rem: T = undefined;
     76         const quo = asm (
     77             \\divq %[v]
     78             : [_] "={rax}" (-> T),
     79               [_] "={rdx}" (rem),
     80             : [v] "r" (v),
     81               [_] "{rax}" (_u0),
     82               [_] "{rdx}" (_u1),
     83         );
     84         r.* = rem;
     85         return quo;
     86     } else {
     87         return divwide_generic(T, _u1, _u0, v, r);
     88     }
     89 }
     90 
     91 // Returns a_ / b_ and sets maybe_rem = a_ % b.
     92 pub fn udivmod(comptime T: type, a_: T, b_: T, maybe_rem: ?*T) T {
     93     @setRuntimeSafety(common.test_safety);
     94     const HalfT = HalveInt(T, false).HalfT;
     95     const SignedT = std.meta.Int(.signed, @bitSizeOf(T));
     96 
     97     if (b_ > a_) {
     98         if (maybe_rem) |rem| {
     99             rem.* = a_;
    100         }
    101         return 0;
    102     }
    103 
    104     const a: [2]HalfT = @bitCast(a_);
    105     const b: [2]HalfT = @bitCast(b_);
    106     var q: [2]HalfT = undefined;
    107     var r: [2]HalfT = undefined;
    108 
    109     // When the divisor fits in 64 bits, we can use an optimized path
    110     if (b[hi] == 0) {
    111         r[hi] = 0;
    112         if (a[hi] < b[lo]) {
    113             // The result fits in 64 bits
    114             q[hi] = 0;
    115             q[lo] = divwide(HalfT, a[hi], a[lo], b[lo], &r[lo]);
    116         } else {
    117             // First, divide with the high part to get the remainder. After that a_hi < b_lo.
    118             q[hi] = a[hi] / b[lo];
    119             q[lo] = divwide(HalfT, a[hi] % b[lo], a[lo], b[lo], &r[lo]);
    120         }
    121         if (maybe_rem) |rem| {
    122             rem.* = @bitCast(r);
    123         }
    124         return @bitCast(q);
    125     }
    126 
    127     // 0 <= shift <= 63
    128     const shift: Log2Int(T) = @clz(b[hi]) - @clz(a[hi]);
    129     var af: T = @bitCast(a);
    130     var bf = @as(T, @bitCast(b)) << shift;
    131     q = @bitCast(@as(T, 0));
    132 
    133     for (0..shift + 1) |_| {
    134         q[lo] <<= 1;
    135         // Branchless version of:
    136         // if (af >= bf) {
    137         //     af -= bf;
    138         //     q[lo] |= 1;
    139         // }
    140         const s = @as(SignedT, @bitCast(bf -% af -% 1)) >> (@bitSizeOf(T) - 1);
    141         q[lo] |= @intCast(s & 1);
    142         af -= bf & @as(T, @bitCast(s));
    143         bf >>= 1;
    144     }
    145     if (maybe_rem) |rem| {
    146         rem.* = @bitCast(af);
    147     }
    148     return @bitCast(q);
    149 }