divtf3.zig (9925B) - Raw
1 const std = @import("std"); 2 const builtin = @import("builtin"); 3 4 const common = @import("common.zig"); 5 const normalize = common.normalize; 6 const wideMultiply = common.wideMultiply; 7 8 pub const panic = common.panic; 9 10 comptime { 11 if (common.want_ppc_abi) { 12 @export(&__divtf3, .{ .name = "__divkf3", .linkage = common.linkage, .visibility = common.visibility }); 13 } else if (common.want_sparc_abi) { 14 @export(&_Qp_div, .{ .name = "_Qp_div", .linkage = common.linkage, .visibility = common.visibility }); 15 } 16 @export(&__divtf3, .{ .name = "__divtf3", .linkage = common.linkage, .visibility = common.visibility }); 17 } 18 19 pub fn __divtf3(a: f128, b: f128) callconv(.c) f128 { 20 return div(a, b); 21 } 22 23 fn _Qp_div(c: *f128, a: *const f128, b: *const f128) callconv(.c) void { 24 c.* = div(a.*, b.*); 25 } 26 27 inline fn div(a: f128, b: f128) f128 { 28 const Z = std.meta.Int(.unsigned, 128); 29 30 const significandBits = std.math.floatMantissaBits(f128); 31 const exponentBits = std.math.floatExponentBits(f128); 32 33 const signBit = (@as(Z, 1) << (significandBits + exponentBits)); 34 const maxExponent = ((1 << exponentBits) - 1); 35 const exponentBias = (maxExponent >> 1); 36 37 const implicitBit = (@as(Z, 1) << significandBits); 38 const quietBit = implicitBit >> 1; 39 const significandMask = implicitBit - 1; 40 41 const absMask = signBit - 1; 42 const exponentMask = absMask ^ significandMask; 43 const qnanRep = exponentMask | quietBit; 44 const infRep: Z = @bitCast(std.math.inf(f128)); 45 46 const aExponent: u32 = @truncate((@as(Z, @bitCast(a)) >> significandBits) & maxExponent); 47 const bExponent: u32 = @truncate((@as(Z, @bitCast(b)) >> significandBits) & maxExponent); 48 const quotientSign: Z = (@as(Z, @bitCast(a)) ^ @as(Z, @bitCast(b))) & signBit; 49 50 var aSignificand: Z = @as(Z, @bitCast(a)) & significandMask; 51 var bSignificand: Z = @as(Z, @bitCast(b)) & significandMask; 52 var scale: i32 = 0; 53 54 // Detect if a or b is zero, denormal, infinity, or NaN. 55 if (aExponent -% 1 >= maxExponent - 1 or bExponent -% 1 >= maxExponent - 1) { 56 const aAbs: Z = @as(Z, @bitCast(a)) & absMask; 57 const bAbs: Z = @as(Z, @bitCast(b)) & absMask; 58 59 // NaN / anything = qNaN 60 if (aAbs > infRep) return @bitCast(@as(Z, @bitCast(a)) | quietBit); 61 // anything / NaN = qNaN 62 if (bAbs > infRep) return @bitCast(@as(Z, @bitCast(b)) | quietBit); 63 64 if (aAbs == infRep) { 65 // infinity / infinity = NaN 66 if (bAbs == infRep) { 67 return @bitCast(qnanRep); 68 } 69 // infinity / anything else = +/- infinity 70 else { 71 return @bitCast(aAbs | quotientSign); 72 } 73 } 74 75 // anything else / infinity = +/- 0 76 if (bAbs == infRep) return @bitCast(quotientSign); 77 78 if (aAbs == 0) { 79 // zero / zero = NaN 80 if (bAbs == 0) { 81 return @bitCast(qnanRep); 82 } 83 // zero / anything else = +/- zero 84 else { 85 return @bitCast(quotientSign); 86 } 87 } 88 // anything else / zero = +/- infinity 89 if (bAbs == 0) return @bitCast(infRep | quotientSign); 90 91 // one or both of a or b is denormal, the other (if applicable) is a 92 // normal number. Renormalize one or both of a and b, and set scale to 93 // include the necessary exponent adjustment. 94 if (aAbs < implicitBit) scale +%= normalize(f128, &aSignificand); 95 if (bAbs < implicitBit) scale -%= normalize(f128, &bSignificand); 96 } 97 98 // Set the implicit significand bit. If we fell through from the 99 // denormal path it was already set by normalize( ), but setting it twice 100 // won't hurt anything. 101 aSignificand |= implicitBit; 102 bSignificand |= implicitBit; 103 var quotientExponent: i32 = @as(i32, @bitCast(aExponent -% bExponent)) +% scale; 104 105 // Align the significand of b as a Q63 fixed-point number in the range 106 // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax 107 // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This 108 // is accurate to about 3.5 binary digits. 109 const q63b: u64 = @truncate(bSignificand >> 49); 110 var recip64 = @as(u64, 0x7504f333F9DE6484) -% q63b; 111 // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2) 112 113 // Now refine the reciprocal estimate using a Newton-Raphson iteration: 114 // 115 // x1 = x0 * (2 - x0 * b) 116 // 117 // This doubles the number of correct binary digits in the approximation 118 // with each iteration. 119 var correction64: u64 = undefined; 120 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1); 121 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63); 122 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1); 123 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63); 124 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1); 125 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63); 126 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1); 127 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63); 128 correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1); 129 recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63); 130 131 // The reciprocal may have overflowed to zero if the upper half of b is 132 // exactly 1.0. This would sabatoge the full-width final stage of the 133 // computation that follows, so we adjust the reciprocal down by one bit. 134 recip64 -%= 1; 135 136 // We need to perform one more iteration to get us to 112 binary digits; 137 // The last iteration needs to happen with extra precision. 138 const q127blo: u64 = @truncate(bSignificand << 15); 139 var correction: u128 = undefined; 140 var reciprocal: u128 = undefined; 141 142 // NOTE: This operation is equivalent to __multi3, which is not implemented 143 // in some architecture 144 var r64q63: u128 = undefined; 145 var r64q127: u128 = undefined; 146 var r64cH: u128 = undefined; 147 var r64cL: u128 = undefined; 148 var dummy: u128 = undefined; 149 wideMultiply(u128, recip64, q63b, &dummy, &r64q63); 150 wideMultiply(u128, recip64, q127blo, &dummy, &r64q127); 151 152 correction = -%(r64q63 + (r64q127 >> 64)); 153 154 const cHi: u64 = @truncate(correction >> 64); 155 const cLo: u64 = @truncate(correction); 156 157 wideMultiply(u128, recip64, cHi, &dummy, &r64cH); 158 wideMultiply(u128, recip64, cLo, &dummy, &r64cL); 159 160 reciprocal = r64cH + (r64cL >> 64); 161 162 // Adjust the final 128-bit reciprocal estimate downward to ensure that it 163 // is strictly smaller than the infinitely precise exact reciprocal. Because 164 // the computation of the Newton-Raphson step is truncating at every step, 165 // this adjustment is small; most of the work is already done. 166 reciprocal -%= 2; 167 168 // The numerical reciprocal is accurate to within 2^-112, lies in the 169 // interval [0.5, 1.0), and is strictly smaller than the true reciprocal 170 // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b 171 // in Q127 with the following properties: 172 // 173 // 1. q < a/b 174 // 2. q is in the interval [0.5, 2.0) 175 // 3. The error in q is bounded away from 2^-113 (actually, we have a 176 // couple of bits to spare, but this is all we need). 177 178 // We need a 128 x 128 multiply high to compute q. 179 var quotient: u128 = undefined; 180 var quotientLo: u128 = undefined; 181 wideMultiply(u128, aSignificand << 2, reciprocal, "ient, "ientLo); 182 183 // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). 184 // In either case, we are going to compute a residual of the form 185 // 186 // r = a - q*b 187 // 188 // We know from the construction of q that r satisfies: 189 // 190 // 0 <= r < ulp(q)*b 191 // 192 // If r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we 193 // already have the correct result. The exact halfway case cannot occur. 194 // We also take this time to right shift quotient if it falls in the [1,2) 195 // range and adjust the exponent accordingly. 196 var residual: u128 = undefined; 197 var qb: u128 = undefined; 198 199 if (quotient < (implicitBit << 1)) { 200 wideMultiply(u128, quotient, bSignificand, &dummy, &qb); 201 residual = (aSignificand << 113) -% qb; 202 quotientExponent -%= 1; 203 } else { 204 quotient >>= 1; 205 wideMultiply(u128, quotient, bSignificand, &dummy, &qb); 206 residual = (aSignificand << 112) -% qb; 207 } 208 209 const writtenExponent = quotientExponent +% exponentBias; 210 211 if (writtenExponent >= maxExponent) { 212 // If we have overflowed the exponent, return infinity. 213 return @bitCast(infRep | quotientSign); 214 } else if (writtenExponent < 1) { 215 if (writtenExponent == 0) { 216 // Check whether the rounded result is normal. 217 const round = @intFromBool((residual << 1) > bSignificand); 218 // Clear the implicit bit. 219 var absResult = quotient & significandMask; 220 // Round. 221 absResult += round; 222 if ((absResult & ~significandMask) > 0) { 223 // The rounded result is normal; return it. 224 return @bitCast(absResult | quotientSign); 225 } 226 } 227 // Flush denormals to zero. In the future, it would be nice to add 228 // code to round them correctly. 229 return @bitCast(quotientSign); 230 } else { 231 const round = @intFromBool((residual << 1) >= bSignificand); 232 // Clear the implicit bit 233 var absResult = quotient & significandMask; 234 // Insert the exponent 235 absResult |= @as(Z, @intCast(writtenExponent)) << significandBits; 236 // Round 237 absResult +%= round; 238 // Insert the sign and return 239 return @bitCast(absResult | quotientSign); 240 } 241 } 242 243 test { 244 _ = @import("divtf3_test.zig"); 245 }