zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

sqrt.zig (8445B) - Raw


      1 const std = @import("std");
      2 const builtin = @import("builtin");
      3 const arch = builtin.cpu.arch;
      4 const math = std.math;
      5 const common = @import("common.zig");
      6 
      7 pub const panic = common.panic;
      8 
      9 comptime {
     10     @export(&__sqrth, .{ .name = "__sqrth", .linkage = common.linkage, .visibility = common.visibility });
     11     @export(&sqrtf, .{ .name = "sqrtf", .linkage = common.linkage, .visibility = common.visibility });
     12     @export(&sqrt, .{ .name = "sqrt", .linkage = common.linkage, .visibility = common.visibility });
     13     @export(&__sqrtx, .{ .name = "__sqrtx", .linkage = common.linkage, .visibility = common.visibility });
     14     if (common.want_ppc_abi) {
     15         @export(&sqrtq, .{ .name = "sqrtf128", .linkage = common.linkage, .visibility = common.visibility });
     16     } else if (common.want_sparc_abi) {
     17         @export(&_Qp_sqrt, .{ .name = "_Qp_sqrt", .linkage = common.linkage, .visibility = common.visibility });
     18     }
     19     @export(&sqrtq, .{ .name = "sqrtq", .linkage = common.linkage, .visibility = common.visibility });
     20     @export(&sqrtl, .{ .name = "sqrtl", .linkage = common.linkage, .visibility = common.visibility });
     21 }
     22 
     23 pub fn __sqrth(x: f16) callconv(.c) f16 {
     24     // TODO: more efficient implementation
     25     return @floatCast(sqrtf(x));
     26 }
     27 
     28 pub fn sqrtf(x: f32) callconv(.c) f32 {
     29     const tiny: f32 = 1.0e-30;
     30     const sign: i32 = @bitCast(@as(u32, 0x80000000));
     31     var ix: i32 = @bitCast(x);
     32 
     33     if ((ix & 0x7F800000) == 0x7F800000) {
     34         return x * x + x; // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan
     35     }
     36 
     37     // zero
     38     if (ix <= 0) {
     39         if (ix & ~sign == 0) {
     40             return x; // sqrt (+-0) = +-0
     41         }
     42         if (ix < 0) {
     43             return math.nan(f32);
     44         }
     45     }
     46 
     47     // normalize
     48     var m = ix >> 23;
     49     if (m == 0) {
     50         // subnormal
     51         var i: i32 = 0;
     52         while (ix & 0x00800000 == 0) : (i += 1) {
     53             ix <<= 1;
     54         }
     55         m -= i - 1;
     56     }
     57 
     58     m -= 127; // unbias exponent
     59     ix = (ix & 0x007FFFFF) | 0x00800000;
     60 
     61     if (m & 1 != 0) { // odd m, double x to even
     62         ix += ix;
     63     }
     64 
     65     m >>= 1; // m = [m / 2]
     66 
     67     // sqrt(x) bit by bit
     68     ix += ix;
     69     var q: i32 = 0; // q = sqrt(x)
     70     var s: i32 = 0;
     71     var r: i32 = 0x01000000; // r = moving bit right -> left
     72 
     73     while (r != 0) {
     74         const t = s + r;
     75         if (t <= ix) {
     76             s = t + r;
     77             ix -= t;
     78             q += r;
     79         }
     80         ix += ix;
     81         r >>= 1;
     82     }
     83 
     84     // floating add to find rounding direction
     85     if (ix != 0) {
     86         var z = 1.0 - tiny; // inexact
     87         if (z >= 1.0) {
     88             z = 1.0 + tiny;
     89             if (z > 1.0) {
     90                 q += 2;
     91             } else {
     92                 if (q & 1 != 0) {
     93                     q += 1;
     94                 }
     95             }
     96         }
     97     }
     98 
     99     ix = (q >> 1) + 0x3f000000;
    100     ix += m << 23;
    101     return @bitCast(ix);
    102 }
    103 
    104 /// NOTE: The original code is full of implicit signed -> unsigned assumptions and u32 wraparound
    105 /// behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are
    106 /// potentially some edge cases remaining that are not handled in the same way.
    107 pub fn sqrt(x: f64) callconv(.c) f64 {
    108     const tiny: f64 = 1.0e-300;
    109     const sign: u32 = 0x80000000;
    110     const u: u64 = @bitCast(x);
    111 
    112     var ix0: u32 = @intCast(u >> 32);
    113     var ix1: u32 = @intCast(u & 0xFFFFFFFF);
    114 
    115     // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan
    116     if (ix0 & 0x7FF00000 == 0x7FF00000) {
    117         return x * x + x;
    118     }
    119 
    120     // sqrt(+-0) = +-0
    121     if (x == 0.0) {
    122         return x;
    123     }
    124     // sqrt(-ve) = nan
    125     if (ix0 & sign != 0) {
    126         return math.nan(f64);
    127     }
    128 
    129     // normalize x
    130     var m: i32 = @intCast(ix0 >> 20);
    131     if (m == 0) {
    132         // subnormal
    133         while (ix0 == 0) {
    134             m -= 21;
    135             ix0 |= ix1 >> 11;
    136             ix1 <<= 21;
    137         }
    138 
    139         // subnormal
    140         var i: u32 = 0;
    141         while (ix0 & 0x00100000 == 0) : (i += 1) {
    142             ix0 <<= 1;
    143         }
    144         m -= @as(i32, @intCast(i)) - 1;
    145         ix0 |= ix1 >> @intCast(32 - i);
    146         ix1 <<= @intCast(i);
    147     }
    148 
    149     // unbias exponent
    150     m -= 1023;
    151     ix0 = (ix0 & 0x000FFFFF) | 0x00100000;
    152     if (m & 1 != 0) {
    153         ix0 += ix0 + (ix1 >> 31);
    154         ix1 = ix1 +% ix1;
    155     }
    156     m >>= 1;
    157 
    158     // sqrt(x) bit by bit
    159     ix0 += ix0 + (ix1 >> 31);
    160     ix1 = ix1 +% ix1;
    161 
    162     var q: u32 = 0;
    163     var q1: u32 = 0;
    164     var s0: u32 = 0;
    165     var s1: u32 = 0;
    166     var r: u32 = 0x00200000;
    167     var t: u32 = undefined;
    168     var t1: u32 = undefined;
    169 
    170     while (r != 0) {
    171         t = s0 +% r;
    172         if (t <= ix0) {
    173             s0 = t + r;
    174             ix0 -= t;
    175             q += r;
    176         }
    177         ix0 = ix0 +% ix0 +% (ix1 >> 31);
    178         ix1 = ix1 +% ix1;
    179         r >>= 1;
    180     }
    181 
    182     r = sign;
    183     while (r != 0) {
    184         t1 = s1 +% r;
    185         t = s0;
    186         if (t < ix0 or (t == ix0 and t1 <= ix1)) {
    187             s1 = t1 +% r;
    188             if (t1 & sign == sign and s1 & sign == 0) {
    189                 s0 += 1;
    190             }
    191             ix0 -= t;
    192             if (ix1 < t1) {
    193                 ix0 -= 1;
    194             }
    195             ix1 = ix1 -% t1;
    196             q1 += r;
    197         }
    198         ix0 = ix0 +% ix0 +% (ix1 >> 31);
    199         ix1 = ix1 +% ix1;
    200         r >>= 1;
    201     }
    202 
    203     // rounding direction
    204     if (ix0 | ix1 != 0) {
    205         var z = 1.0 - tiny; // raise inexact
    206         if (z >= 1.0) {
    207             z = 1.0 + tiny;
    208             if (q1 == 0xFFFFFFFF) {
    209                 q1 = 0;
    210                 q += 1;
    211             } else if (z > 1.0) {
    212                 if (q1 == 0xFFFFFFFE) {
    213                     q += 1;
    214                 }
    215                 q1 += 2;
    216             } else {
    217                 q1 += q1 & 1;
    218             }
    219         }
    220     }
    221 
    222     ix0 = (q >> 1) + 0x3FE00000;
    223     ix1 = q1 >> 1;
    224     if (q & 1 != 0) {
    225         ix1 |= 0x80000000;
    226     }
    227 
    228     // NOTE: musl here appears to rely on signed twos-complement wraparound. +% has the same
    229     // behaviour at least.
    230     var iix0: i32 = @intCast(ix0);
    231     iix0 = iix0 +% (m << 20);
    232 
    233     const uz = (@as(u64, @intCast(iix0)) << 32) | ix1;
    234     return @bitCast(uz);
    235 }
    236 
    237 pub fn __sqrtx(x: f80) callconv(.c) f80 {
    238     // TODO: more efficient implementation
    239     return @floatCast(sqrtq(x));
    240 }
    241 
    242 pub fn sqrtq(x: f128) callconv(.c) f128 {
    243     // TODO: more correct implementation
    244     return sqrt(@floatCast(x));
    245 }
    246 
    247 fn _Qp_sqrt(c: *f128, a: *f128) callconv(.c) void {
    248     c.* = sqrt(@floatCast(a.*));
    249 }
    250 
    251 pub fn sqrtl(x: c_longdouble) callconv(.c) c_longdouble {
    252     switch (@typeInfo(c_longdouble).float.bits) {
    253         16 => return __sqrth(x),
    254         32 => return sqrtf(x),
    255         64 => return sqrt(x),
    256         80 => return __sqrtx(x),
    257         128 => return sqrtq(x),
    258         else => @compileError("unreachable"),
    259     }
    260 }
    261 
    262 test "sqrtf" {
    263     const V = [_]f32{
    264         0.0,
    265         4.089288054930154,
    266         7.538757127071935,
    267         8.97780793672623,
    268         5.304443821913729,
    269         5.682408965311888,
    270         0.5846878579110049,
    271         3.650338664297043,
    272         0.3178091951800732,
    273         7.1505232436382835,
    274         3.6589165881946464,
    275     };
    276 
    277     // Note that @sqrt will either generate the sqrt opcode (if supported by the
    278     // target ISA) or a call to `sqrtf` otherwise.
    279     for (V) |val|
    280         try std.testing.expectEqual(@sqrt(val), sqrtf(val));
    281 }
    282 
    283 test "sqrtf special" {
    284     try std.testing.expect(math.isPositiveInf(sqrtf(math.inf(f32))));
    285     try std.testing.expect(sqrtf(0.0) == 0.0);
    286     try std.testing.expect(sqrtf(-0.0) == -0.0);
    287     try std.testing.expect(math.isNan(sqrtf(-1.0)));
    288     try std.testing.expect(math.isNan(sqrtf(math.nan(f32))));
    289 }
    290 
    291 test "sqrt" {
    292     const V = [_]f64{
    293         0.0,
    294         4.089288054930154,
    295         7.538757127071935,
    296         8.97780793672623,
    297         5.304443821913729,
    298         5.682408965311888,
    299         0.5846878579110049,
    300         3.650338664297043,
    301         0.3178091951800732,
    302         7.1505232436382835,
    303         3.6589165881946464,
    304     };
    305 
    306     // Note that @sqrt will either generate the sqrt opcode (if supported by the
    307     // target ISA) or a call to `sqrtf` otherwise.
    308     for (V) |val|
    309         try std.testing.expectEqual(@sqrt(val), sqrt(val));
    310 }
    311 
    312 test "sqrt special" {
    313     try std.testing.expect(math.isPositiveInf(sqrt(math.inf(f64))));
    314     try std.testing.expect(sqrt(0.0) == 0.0);
    315     try std.testing.expect(sqrt(-0.0) == -0.0);
    316     try std.testing.expect(math.isNan(sqrt(-1.0)));
    317     try std.testing.expect(math.isNan(sqrt(math.nan(f64))));
    318 }