zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

blob a4e96c76 (5737B) - Raw


      1 const math = @import("index.zig");
      2 const assert = @import("../debug.zig").assert;
      3 
      4 // TODO issue #393
      5 pub const log1p = log1p_workaround;
      6 
      7 pub fn log1p_workaround(x: var) -> @typeOf(x) {
      8     const T = @typeOf(x);
      9     switch (T) {
     10         f32 => @inlineCall(log1p_32, x),
     11         f64 => @inlineCall(log1p_64, x),
     12         else => @compileError("log1p not implemented for " ++ @typeName(T)),
     13     }
     14 }
     15 
     16 fn log1p_32(x: f32) -> f32 {
     17     const ln2_hi = 6.9313812256e-01;
     18     const ln2_lo = 9.0580006145e-06;
     19     const Lg1: f32 = 0xaaaaaa.0p-24;
     20     const Lg2: f32 = 0xccce13.0p-25;
     21     const Lg3: f32 = 0x91e9ee.0p-25;
     22     const Lg4: f32 = 0xf89e26.0p-26;
     23 
     24     const u = @bitCast(u32, x);
     25     var ix = u;
     26     var k: i32 = 1;
     27     var f: f32 = undefined;
     28     var c: f32 = undefined;
     29 
     30     // 1 + x < sqrt(2)+
     31     if (ix < 0x3ED413D0 or ix >> 31 != 0) {
     32         // x <= -1.0
     33         if (ix >= 0xBF800000) {
     34             // log1p(-1) = +inf
     35             if (x == -1) {
     36                 return x / 0.0;
     37             }
     38             // log1p(x < -1) = nan
     39             else {
     40                 return (x - x) / 0.0;
     41             }
     42         }
     43         // |x| < 2^(-24)
     44         if ((ix << 1) < (0x33800000 << 1)) {
     45             // underflow if subnormal
     46             if (ix & 0x7F800000 == 0) {
     47                 math.forceEval(x * x);
     48             }
     49             return x;
     50         }
     51         // sqrt(2) / 2- <= 1 + x < sqrt(2)+
     52         if (ix <= 0xBE95F619) {
     53             k = 0;
     54             c = 0;
     55             f = x;
     56         }
     57     } else if (ix >= 0x7F800000) {
     58         return x;
     59     }
     60 
     61     if (k != 0) {
     62         const uf = 1 + x;
     63         var iu = @bitCast(u32, uf);
     64         iu += 0x3F800000 - 0x3F3504F3;
     65         k = i32(iu >> 23) - 0x7F;
     66 
     67         // correction to avoid underflow in c / u
     68         if (k < 25) {
     69             c = if (k >= 2) 1 - (uf - x) else x - (uf - 1);
     70             c /= uf;
     71         } else {
     72             c = 0;
     73         }
     74 
     75         // u into [sqrt(2)/2, sqrt(2)]
     76         iu = (iu & 0x007FFFFF) + 0x3F3504F3;
     77         f = @bitCast(f32, iu) - 1;
     78     }
     79 
     80     const s = f / (2.0 + f);
     81     const z = s * s;
     82     const w = z * z;
     83     const t1 = w * (Lg2 + w * Lg4);
     84     const t2 = z * (Lg1 + w * Lg3);
     85     const R = t2 + t1;
     86     const hfsq = 0.5 * f * f;
     87     const dk = f32(k);
     88 
     89     s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi
     90 }
     91 
     92 fn log1p_64(x: f64) -> f64 {
     93     const ln2_hi: f64 = 6.93147180369123816490e-01;
     94     const ln2_lo: f64 = 1.90821492927058770002e-10;
     95     const Lg1: f64 = 6.666666666666735130e-01;
     96     const Lg2: f64 = 3.999999999940941908e-01;
     97     const Lg3: f64 = 2.857142874366239149e-01;
     98     const Lg4: f64 = 2.222219843214978396e-01;
     99     const Lg5: f64 = 1.818357216161805012e-01;
    100     const Lg6: f64 = 1.531383769920937332e-01;
    101     const Lg7: f64 = 1.479819860511658591e-01;
    102 
    103     var ix = @bitCast(u64, x);
    104     var hx = u32(ix >> 32);
    105     var k: i32 = 1;
    106     var c: f64 = undefined;
    107     var f: f64 = undefined;
    108 
    109     // 1 + x < sqrt(2)
    110     if (hx < 0x3FDA827A or hx >> 31 != 0) {
    111         // x <= -1.0
    112         if (hx >= 0xBFF00000) {
    113             // log1p(-1) = -inf
    114             if (x == 1) {
    115                 return x / 0.0;
    116             }
    117             // log1p(x < -1) = nan
    118             else {
    119                 return (x - x) / 0.0;
    120             }
    121         }
    122         // |x| < 2^(-53)
    123         if ((hx << 1) < (0x3CA00000 << 1)) {
    124             if ((hx & 0x7FF00000) == 0) {
    125                 math.raiseUnderflow();
    126             }
    127             return x;
    128         }
    129         // sqrt(2) / 2- <= 1 + x < sqrt(2)+
    130         if (hx <= 0xBFD2BEC4) {
    131             k = 0;
    132             c = 0;
    133             f = x;
    134         }
    135     }
    136     else if (hx >= 0x7FF00000) {
    137         return x;
    138     }
    139 
    140     if (k != 0) {
    141         const uf = 1 + x;
    142         const hu = @bitCast(u64, uf);
    143         var iu = u32(hu >> 32);
    144         iu += 0x3FF00000 - 0x3FE6A09E;
    145         k = i32(iu >> 20) - 0x3FF;
    146 
    147         // correction to avoid underflow in c / u
    148         if (k < 54) {
    149             c = if (k >= 2) 1 - (uf - x) else x - (uf - 1);
    150             c /= uf;
    151         } else {
    152             c = 0;
    153         }
    154 
    155         // u into [sqrt(2)/2, sqrt(2)]
    156         iu = (iu & 0x000FFFFF) + 0x3FE6A09E;
    157         const iq = (u64(iu) << 32) | (hu & 0xFFFFFFFF);
    158         f = @bitCast(f64, iq) - 1;
    159     }
    160 
    161     const hfsq = 0.5 * f * f;
    162     const s = f / (2.0 + f);
    163     const z = s * s;
    164     const w = z * z;
    165     const t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
    166     const t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
    167     const R = t2 + t1;
    168     const dk = f64(k);
    169 
    170     s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi
    171 }
    172 
    173 test "math.log1p" {
    174     assert(log1p(f32(0.0)) == log1p_32(0.0));
    175     assert(log1p(f64(0.0)) == log1p_64(0.0));
    176 }
    177 
    178 test "math.log1p_32" {
    179     const epsilon = 0.000001;
    180 
    181     assert(math.approxEq(f32, log1p_32(0.0), 0.0, epsilon));
    182     assert(math.approxEq(f32, log1p_32(0.2), 0.182322, epsilon));
    183     assert(math.approxEq(f32, log1p_32(0.8923), 0.637793, epsilon));
    184     assert(math.approxEq(f32, log1p_32(1.5), 0.916291, epsilon));
    185     assert(math.approxEq(f32, log1p_32(37.45), 3.649359, epsilon));
    186     assert(math.approxEq(f32, log1p_32(89.123), 4.501175, epsilon));
    187     assert(math.approxEq(f32, log1p_32(123123.234375), 11.720949, epsilon));
    188 }
    189 
    190 test "math.log1p_64" {
    191     const epsilon = 0.000001;
    192 
    193     assert(math.approxEq(f64, log1p_64(0.0), 0.0, epsilon));
    194     assert(math.approxEq(f64, log1p_64(0.2), 0.182322, epsilon));
    195     assert(math.approxEq(f64, log1p_64(0.8923), 0.637793, epsilon));
    196     assert(math.approxEq(f64, log1p_64(1.5), 0.916291, epsilon));
    197     assert(math.approxEq(f64, log1p_64(37.45), 3.649359, epsilon));
    198     assert(math.approxEq(f64, log1p_64(89.123), 4.501175, epsilon));
    199     assert(math.approxEq(f64, log1p_64(123123.234375), 11.720949, epsilon));
    200 }