sqrt.zig (8445B) - Raw
1 const std = @import("std"); 2 const builtin = @import("builtin"); 3 const arch = builtin.cpu.arch; 4 const math = std.math; 5 const common = @import("common.zig"); 6 7 pub const panic = common.panic; 8 9 comptime { 10 @export(&__sqrth, .{ .name = "__sqrth", .linkage = common.linkage, .visibility = common.visibility }); 11 @export(&sqrtf, .{ .name = "sqrtf", .linkage = common.linkage, .visibility = common.visibility }); 12 @export(&sqrt, .{ .name = "sqrt", .linkage = common.linkage, .visibility = common.visibility }); 13 @export(&__sqrtx, .{ .name = "__sqrtx", .linkage = common.linkage, .visibility = common.visibility }); 14 if (common.want_ppc_abi) { 15 @export(&sqrtq, .{ .name = "sqrtf128", .linkage = common.linkage, .visibility = common.visibility }); 16 } else if (common.want_sparc_abi) { 17 @export(&_Qp_sqrt, .{ .name = "_Qp_sqrt", .linkage = common.linkage, .visibility = common.visibility }); 18 } 19 @export(&sqrtq, .{ .name = "sqrtq", .linkage = common.linkage, .visibility = common.visibility }); 20 @export(&sqrtl, .{ .name = "sqrtl", .linkage = common.linkage, .visibility = common.visibility }); 21 } 22 23 pub fn __sqrth(x: f16) callconv(.c) f16 { 24 // TODO: more efficient implementation 25 return @floatCast(sqrtf(x)); 26 } 27 28 pub fn sqrtf(x: f32) callconv(.c) f32 { 29 const tiny: f32 = 1.0e-30; 30 const sign: i32 = @bitCast(@as(u32, 0x80000000)); 31 var ix: i32 = @bitCast(x); 32 33 if ((ix & 0x7F800000) == 0x7F800000) { 34 return x * x + x; // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan 35 } 36 37 // zero 38 if (ix <= 0) { 39 if (ix & ~sign == 0) { 40 return x; // sqrt (+-0) = +-0 41 } 42 if (ix < 0) { 43 return math.nan(f32); 44 } 45 } 46 47 // normalize 48 var m = ix >> 23; 49 if (m == 0) { 50 // subnormal 51 var i: i32 = 0; 52 while (ix & 0x00800000 == 0) : (i += 1) { 53 ix <<= 1; 54 } 55 m -= i - 1; 56 } 57 58 m -= 127; // unbias exponent 59 ix = (ix & 0x007FFFFF) | 0x00800000; 60 61 if (m & 1 != 0) { // odd m, double x to even 62 ix += ix; 63 } 64 65 m >>= 1; // m = [m / 2] 66 67 // sqrt(x) bit by bit 68 ix += ix; 69 var q: i32 = 0; // q = sqrt(x) 70 var s: i32 = 0; 71 var r: i32 = 0x01000000; // r = moving bit right -> left 72 73 while (r != 0) { 74 const t = s + r; 75 if (t <= ix) { 76 s = t + r; 77 ix -= t; 78 q += r; 79 } 80 ix += ix; 81 r >>= 1; 82 } 83 84 // floating add to find rounding direction 85 if (ix != 0) { 86 var z = 1.0 - tiny; // inexact 87 if (z >= 1.0) { 88 z = 1.0 + tiny; 89 if (z > 1.0) { 90 q += 2; 91 } else { 92 if (q & 1 != 0) { 93 q += 1; 94 } 95 } 96 } 97 } 98 99 ix = (q >> 1) + 0x3f000000; 100 ix += m << 23; 101 return @bitCast(ix); 102 } 103 104 /// NOTE: The original code is full of implicit signed -> unsigned assumptions and u32 wraparound 105 /// behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are 106 /// potentially some edge cases remaining that are not handled in the same way. 107 pub fn sqrt(x: f64) callconv(.c) f64 { 108 const tiny: f64 = 1.0e-300; 109 const sign: u32 = 0x80000000; 110 const u: u64 = @bitCast(x); 111 112 var ix0: u32 = @intCast(u >> 32); 113 var ix1: u32 = @intCast(u & 0xFFFFFFFF); 114 115 // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan 116 if (ix0 & 0x7FF00000 == 0x7FF00000) { 117 return x * x + x; 118 } 119 120 // sqrt(+-0) = +-0 121 if (x == 0.0) { 122 return x; 123 } 124 // sqrt(-ve) = nan 125 if (ix0 & sign != 0) { 126 return math.nan(f64); 127 } 128 129 // normalize x 130 var m: i32 = @intCast(ix0 >> 20); 131 if (m == 0) { 132 // subnormal 133 while (ix0 == 0) { 134 m -= 21; 135 ix0 |= ix1 >> 11; 136 ix1 <<= 21; 137 } 138 139 // subnormal 140 var i: u32 = 0; 141 while (ix0 & 0x00100000 == 0) : (i += 1) { 142 ix0 <<= 1; 143 } 144 m -= @as(i32, @intCast(i)) - 1; 145 ix0 |= ix1 >> @intCast(32 - i); 146 ix1 <<= @intCast(i); 147 } 148 149 // unbias exponent 150 m -= 1023; 151 ix0 = (ix0 & 0x000FFFFF) | 0x00100000; 152 if (m & 1 != 0) { 153 ix0 += ix0 + (ix1 >> 31); 154 ix1 = ix1 +% ix1; 155 } 156 m >>= 1; 157 158 // sqrt(x) bit by bit 159 ix0 += ix0 + (ix1 >> 31); 160 ix1 = ix1 +% ix1; 161 162 var q: u32 = 0; 163 var q1: u32 = 0; 164 var s0: u32 = 0; 165 var s1: u32 = 0; 166 var r: u32 = 0x00200000; 167 var t: u32 = undefined; 168 var t1: u32 = undefined; 169 170 while (r != 0) { 171 t = s0 +% r; 172 if (t <= ix0) { 173 s0 = t + r; 174 ix0 -= t; 175 q += r; 176 } 177 ix0 = ix0 +% ix0 +% (ix1 >> 31); 178 ix1 = ix1 +% ix1; 179 r >>= 1; 180 } 181 182 r = sign; 183 while (r != 0) { 184 t1 = s1 +% r; 185 t = s0; 186 if (t < ix0 or (t == ix0 and t1 <= ix1)) { 187 s1 = t1 +% r; 188 if (t1 & sign == sign and s1 & sign == 0) { 189 s0 += 1; 190 } 191 ix0 -= t; 192 if (ix1 < t1) { 193 ix0 -= 1; 194 } 195 ix1 = ix1 -% t1; 196 q1 += r; 197 } 198 ix0 = ix0 +% ix0 +% (ix1 >> 31); 199 ix1 = ix1 +% ix1; 200 r >>= 1; 201 } 202 203 // rounding direction 204 if (ix0 | ix1 != 0) { 205 var z = 1.0 - tiny; // raise inexact 206 if (z >= 1.0) { 207 z = 1.0 + tiny; 208 if (q1 == 0xFFFFFFFF) { 209 q1 = 0; 210 q += 1; 211 } else if (z > 1.0) { 212 if (q1 == 0xFFFFFFFE) { 213 q += 1; 214 } 215 q1 += 2; 216 } else { 217 q1 += q1 & 1; 218 } 219 } 220 } 221 222 ix0 = (q >> 1) + 0x3FE00000; 223 ix1 = q1 >> 1; 224 if (q & 1 != 0) { 225 ix1 |= 0x80000000; 226 } 227 228 // NOTE: musl here appears to rely on signed twos-complement wraparound. +% has the same 229 // behaviour at least. 230 var iix0: i32 = @intCast(ix0); 231 iix0 = iix0 +% (m << 20); 232 233 const uz = (@as(u64, @intCast(iix0)) << 32) | ix1; 234 return @bitCast(uz); 235 } 236 237 pub fn __sqrtx(x: f80) callconv(.c) f80 { 238 // TODO: more efficient implementation 239 return @floatCast(sqrtq(x)); 240 } 241 242 pub fn sqrtq(x: f128) callconv(.c) f128 { 243 // TODO: more correct implementation 244 return sqrt(@floatCast(x)); 245 } 246 247 fn _Qp_sqrt(c: *f128, a: *f128) callconv(.c) void { 248 c.* = sqrt(@floatCast(a.*)); 249 } 250 251 pub fn sqrtl(x: c_longdouble) callconv(.c) c_longdouble { 252 switch (@typeInfo(c_longdouble).float.bits) { 253 16 => return __sqrth(x), 254 32 => return sqrtf(x), 255 64 => return sqrt(x), 256 80 => return __sqrtx(x), 257 128 => return sqrtq(x), 258 else => @compileError("unreachable"), 259 } 260 } 261 262 test "sqrtf" { 263 const V = [_]f32{ 264 0.0, 265 4.089288054930154, 266 7.538757127071935, 267 8.97780793672623, 268 5.304443821913729, 269 5.682408965311888, 270 0.5846878579110049, 271 3.650338664297043, 272 0.3178091951800732, 273 7.1505232436382835, 274 3.6589165881946464, 275 }; 276 277 // Note that @sqrt will either generate the sqrt opcode (if supported by the 278 // target ISA) or a call to `sqrtf` otherwise. 279 for (V) |val| 280 try std.testing.expectEqual(@sqrt(val), sqrtf(val)); 281 } 282 283 test "sqrtf special" { 284 try std.testing.expect(math.isPositiveInf(sqrtf(math.inf(f32)))); 285 try std.testing.expect(sqrtf(0.0) == 0.0); 286 try std.testing.expect(sqrtf(-0.0) == -0.0); 287 try std.testing.expect(math.isNan(sqrtf(-1.0))); 288 try std.testing.expect(math.isNan(sqrtf(math.nan(f32)))); 289 } 290 291 test "sqrt" { 292 const V = [_]f64{ 293 0.0, 294 4.089288054930154, 295 7.538757127071935, 296 8.97780793672623, 297 5.304443821913729, 298 5.682408965311888, 299 0.5846878579110049, 300 3.650338664297043, 301 0.3178091951800732, 302 7.1505232436382835, 303 3.6589165881946464, 304 }; 305 306 // Note that @sqrt will either generate the sqrt opcode (if supported by the 307 // target ISA) or a call to `sqrtf` otherwise. 308 for (V) |val| 309 try std.testing.expectEqual(@sqrt(val), sqrt(val)); 310 } 311 312 test "sqrt special" { 313 try std.testing.expect(math.isPositiveInf(sqrt(math.inf(f64)))); 314 try std.testing.expect(sqrt(0.0) == 0.0); 315 try std.testing.expect(sqrt(-0.0) == -0.0); 316 try std.testing.expect(math.isNan(sqrt(-1.0))); 317 try std.testing.expect(math.isNan(sqrt(math.nan(f64)))); 318 }