blob a4e96c76 (5737B) - Raw
1 const math = @import("index.zig"); 2 const assert = @import("../debug.zig").assert; 3 4 // TODO issue #393 5 pub const log1p = log1p_workaround; 6 7 pub fn log1p_workaround(x: var) -> @typeOf(x) { 8 const T = @typeOf(x); 9 switch (T) { 10 f32 => @inlineCall(log1p_32, x), 11 f64 => @inlineCall(log1p_64, x), 12 else => @compileError("log1p not implemented for " ++ @typeName(T)), 13 } 14 } 15 16 fn log1p_32(x: f32) -> f32 { 17 const ln2_hi = 6.9313812256e-01; 18 const ln2_lo = 9.0580006145e-06; 19 const Lg1: f32 = 0xaaaaaa.0p-24; 20 const Lg2: f32 = 0xccce13.0p-25; 21 const Lg3: f32 = 0x91e9ee.0p-25; 22 const Lg4: f32 = 0xf89e26.0p-26; 23 24 const u = @bitCast(u32, x); 25 var ix = u; 26 var k: i32 = 1; 27 var f: f32 = undefined; 28 var c: f32 = undefined; 29 30 // 1 + x < sqrt(2)+ 31 if (ix < 0x3ED413D0 or ix >> 31 != 0) { 32 // x <= -1.0 33 if (ix >= 0xBF800000) { 34 // log1p(-1) = +inf 35 if (x == -1) { 36 return x / 0.0; 37 } 38 // log1p(x < -1) = nan 39 else { 40 return (x - x) / 0.0; 41 } 42 } 43 // |x| < 2^(-24) 44 if ((ix << 1) < (0x33800000 << 1)) { 45 // underflow if subnormal 46 if (ix & 0x7F800000 == 0) { 47 math.forceEval(x * x); 48 } 49 return x; 50 } 51 // sqrt(2) / 2- <= 1 + x < sqrt(2)+ 52 if (ix <= 0xBE95F619) { 53 k = 0; 54 c = 0; 55 f = x; 56 } 57 } else if (ix >= 0x7F800000) { 58 return x; 59 } 60 61 if (k != 0) { 62 const uf = 1 + x; 63 var iu = @bitCast(u32, uf); 64 iu += 0x3F800000 - 0x3F3504F3; 65 k = i32(iu >> 23) - 0x7F; 66 67 // correction to avoid underflow in c / u 68 if (k < 25) { 69 c = if (k >= 2) 1 - (uf - x) else x - (uf - 1); 70 c /= uf; 71 } else { 72 c = 0; 73 } 74 75 // u into [sqrt(2)/2, sqrt(2)] 76 iu = (iu & 0x007FFFFF) + 0x3F3504F3; 77 f = @bitCast(f32, iu) - 1; 78 } 79 80 const s = f / (2.0 + f); 81 const z = s * s; 82 const w = z * z; 83 const t1 = w * (Lg2 + w * Lg4); 84 const t2 = z * (Lg1 + w * Lg3); 85 const R = t2 + t1; 86 const hfsq = 0.5 * f * f; 87 const dk = f32(k); 88 89 s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi 90 } 91 92 fn log1p_64(x: f64) -> f64 { 93 const ln2_hi: f64 = 6.93147180369123816490e-01; 94 const ln2_lo: f64 = 1.90821492927058770002e-10; 95 const Lg1: f64 = 6.666666666666735130e-01; 96 const Lg2: f64 = 3.999999999940941908e-01; 97 const Lg3: f64 = 2.857142874366239149e-01; 98 const Lg4: f64 = 2.222219843214978396e-01; 99 const Lg5: f64 = 1.818357216161805012e-01; 100 const Lg6: f64 = 1.531383769920937332e-01; 101 const Lg7: f64 = 1.479819860511658591e-01; 102 103 var ix = @bitCast(u64, x); 104 var hx = u32(ix >> 32); 105 var k: i32 = 1; 106 var c: f64 = undefined; 107 var f: f64 = undefined; 108 109 // 1 + x < sqrt(2) 110 if (hx < 0x3FDA827A or hx >> 31 != 0) { 111 // x <= -1.0 112 if (hx >= 0xBFF00000) { 113 // log1p(-1) = -inf 114 if (x == 1) { 115 return x / 0.0; 116 } 117 // log1p(x < -1) = nan 118 else { 119 return (x - x) / 0.0; 120 } 121 } 122 // |x| < 2^(-53) 123 if ((hx << 1) < (0x3CA00000 << 1)) { 124 if ((hx & 0x7FF00000) == 0) { 125 math.raiseUnderflow(); 126 } 127 return x; 128 } 129 // sqrt(2) / 2- <= 1 + x < sqrt(2)+ 130 if (hx <= 0xBFD2BEC4) { 131 k = 0; 132 c = 0; 133 f = x; 134 } 135 } 136 else if (hx >= 0x7FF00000) { 137 return x; 138 } 139 140 if (k != 0) { 141 const uf = 1 + x; 142 const hu = @bitCast(u64, uf); 143 var iu = u32(hu >> 32); 144 iu += 0x3FF00000 - 0x3FE6A09E; 145 k = i32(iu >> 20) - 0x3FF; 146 147 // correction to avoid underflow in c / u 148 if (k < 54) { 149 c = if (k >= 2) 1 - (uf - x) else x - (uf - 1); 150 c /= uf; 151 } else { 152 c = 0; 153 } 154 155 // u into [sqrt(2)/2, sqrt(2)] 156 iu = (iu & 0x000FFFFF) + 0x3FE6A09E; 157 const iq = (u64(iu) << 32) | (hu & 0xFFFFFFFF); 158 f = @bitCast(f64, iq) - 1; 159 } 160 161 const hfsq = 0.5 * f * f; 162 const s = f / (2.0 + f); 163 const z = s * s; 164 const w = z * z; 165 const t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); 166 const t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); 167 const R = t2 + t1; 168 const dk = f64(k); 169 170 s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi 171 } 172 173 test "math.log1p" { 174 assert(log1p(f32(0.0)) == log1p_32(0.0)); 175 assert(log1p(f64(0.0)) == log1p_64(0.0)); 176 } 177 178 test "math.log1p_32" { 179 const epsilon = 0.000001; 180 181 assert(math.approxEq(f32, log1p_32(0.0), 0.0, epsilon)); 182 assert(math.approxEq(f32, log1p_32(0.2), 0.182322, epsilon)); 183 assert(math.approxEq(f32, log1p_32(0.8923), 0.637793, epsilon)); 184 assert(math.approxEq(f32, log1p_32(1.5), 0.916291, epsilon)); 185 assert(math.approxEq(f32, log1p_32(37.45), 3.649359, epsilon)); 186 assert(math.approxEq(f32, log1p_32(89.123), 4.501175, epsilon)); 187 assert(math.approxEq(f32, log1p_32(123123.234375), 11.720949, epsilon)); 188 } 189 190 test "math.log1p_64" { 191 const epsilon = 0.000001; 192 193 assert(math.approxEq(f64, log1p_64(0.0), 0.0, epsilon)); 194 assert(math.approxEq(f64, log1p_64(0.2), 0.182322, epsilon)); 195 assert(math.approxEq(f64, log1p_64(0.8923), 0.637793, epsilon)); 196 assert(math.approxEq(f64, log1p_64(1.5), 0.916291, epsilon)); 197 assert(math.approxEq(f64, log1p_64(37.45), 3.649359, epsilon)); 198 assert(math.approxEq(f64, log1p_64(89.123), 4.501175, epsilon)); 199 assert(math.approxEq(f64, log1p_64(123123.234375), 11.720949, epsilon)); 200 }