zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

trig.zig (11743B) - Raw


      1 // Ported from musl, which is licensed under the MIT license:
      2 // https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
      3 //
      4 // https://git.musl-libc.org/cgit/musl/tree/src/math/__cos.c
      5 // https://git.musl-libc.org/cgit/musl/tree/src/math/__cosdf.c
      6 // https://git.musl-libc.org/cgit/musl/tree/src/math/__sin.c
      7 // https://git.musl-libc.org/cgit/musl/tree/src/math/__sindf.c
      8 // https://git.musl-libc.org/cgit/musl/tree/src/math/__tand.c
      9 // https://git.musl-libc.org/cgit/musl/tree/src/math/__tandf.c
     10 
     11 /// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
     12 /// Input x is assumed to be bounded by ~pi/4 in magnitude.
     13 /// Input y is the tail of x.
     14 ///
     15 /// Algorithm
     16 ///      1. Since cos(-x) = cos(x), we need only to consider positive x.
     17 ///      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
     18 ///      3. cos(x) is approximated by a polynomial of degree 14 on
     19 ///         [0,pi/4]
     20 ///                                       4            14
     21 ///              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
     22 ///         where the remez error is
     23 ///
     24 ///      |              2     4     6     8     10    12     14 |     -58
     25 ///      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
     26 ///      |                                                      |
     27 ///
     28 ///                     4     6     8     10    12     14
     29 ///      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
     30 ///             cos(x) ~ 1 - x*x/2 + r
     31 ///         since cos(x+y) ~ cos(x) - sin(x)*y
     32 ///                        ~ cos(x) - x*y,
     33 ///         a correction term is necessary in cos(x) and hence
     34 ///              cos(x+y) = 1 - (x*x/2 - (r - x*y))
     35 ///         For better accuracy, rearrange to
     36 ///              cos(x+y) ~ w + (tmp + (r-x*y))
     37 ///         where w = 1 - x*x/2 and tmp is a tiny correction term
     38 ///         (1 - x*x/2 == w + tmp exactly in infinite precision).
     39 ///         The exactness of w + tmp in infinite precision depends on w
     40 ///         and tmp having the same precision as x.  If they have extra
     41 ///         precision due to compiler bugs, then the extra precision is
     42 ///         only good provided it is retained in all terms of the final
     43 ///         expression for cos().  Retention happens in all cases tested
     44 ///         under FreeBSD, so don't pessimize things by forcibly clipping
     45 ///         any extra precision in w.
     46 pub fn __cos(x: f64, y: f64) f64 {
     47     const C1 = 4.16666666666666019037e-02; // 0x3FA55555, 0x5555554C
     48     const C2 = -1.38888888888741095749e-03; // 0xBF56C16C, 0x16C15177
     49     const C3 = 2.48015872894767294178e-05; // 0x3EFA01A0, 0x19CB1590
     50     const C4 = -2.75573143513906633035e-07; // 0xBE927E4F, 0x809C52AD
     51     const C5 = 2.08757232129817482790e-09; // 0x3E21EE9E, 0xBDB4B1C4
     52     const C6 = -1.13596475577881948265e-11; // 0xBDA8FAE9, 0xBE8838D4
     53 
     54     const z = x * x;
     55     const zs = z * z;
     56     const r = z * (C1 + z * (C2 + z * C3)) + zs * zs * (C4 + z * (C5 + z * C6));
     57     const hz = 0.5 * z;
     58     const w = 1.0 - hz;
     59     return w + (((1.0 - w) - hz) + (z * r - x * y));
     60 }
     61 
     62 pub fn __cosdf(x: f64) f32 {
     63     // |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]).
     64     const C0 = -0x1ffffffd0c5e81.0p-54; // -0.499999997251031003120
     65     const C1 = 0x155553e1053a42.0p-57; //  0.0416666233237390631894
     66     const C2 = -0x16c087e80f1e27.0p-62; // -0.00138867637746099294692
     67     const C3 = 0x199342e0ee5069.0p-68; //  0.0000243904487962774090654
     68 
     69     // Try to optimize for parallel evaluation as in __tandf.c.
     70     const z = x * x;
     71     const w = z * z;
     72     const r = C2 + z * C3;
     73     return @floatCast(((1.0 + z * C0) + w * C1) + (w * z) * r);
     74 }
     75 
     76 /// kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
     77 /// Input x is assumed to be bounded by ~pi/4 in magnitude.
     78 /// Input y is the tail of x.
     79 /// Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
     80 ///
     81 /// Algorithm
     82 ///      1. Since sin(-x) = -sin(x), we need only to consider positive x.
     83 ///      2. Callers must return sin(-0) = -0 without calling here since our
     84 ///         odd polynomial is not evaluated in a way that preserves -0.
     85 ///         Callers may do the optimization sin(x) ~ x for tiny x.
     86 ///      3. sin(x) is approximated by a polynomial of degree 13 on
     87 ///         [0,pi/4]
     88 ///                               3            13
     89 ///              sin(x) ~ x + S1*x + ... + S6*x
     90 ///         where
     91 ///
     92 ///      |sin(x)         2     4     6     8     10     12  |     -58
     93 ///      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
     94 ///      |  x                                               |
     95 ///
     96 ///      4. sin(x+y) = sin(x) + sin'(x')*y
     97 ///                  ~ sin(x) + (1-x*x/2)*y
     98 ///         For better accuracy, let
     99 ///                   3      2      2      2      2
    100 ///              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
    101 ///         then                   3    2
    102 ///              sin(x) = x + (S1*x + (x *(r-y/2)+y))
    103 pub fn __sin(x: f64, y: f64, iy: i32) f64 {
    104     const S1 = -1.66666666666666324348e-01; // 0xBFC55555, 0x55555549
    105     const S2 = 8.33333333332248946124e-03; // 0x3F811111, 0x1110F8A6
    106     const S3 = -1.98412698298579493134e-04; // 0xBF2A01A0, 0x19C161D5
    107     const S4 = 2.75573137070700676789e-06; // 0x3EC71DE3, 0x57B1FE7D
    108     const S5 = -2.50507602534068634195e-08; // 0xBE5AE5E6, 0x8A2B9CEB
    109     const S6 = 1.58969099521155010221e-10; // 0x3DE5D93A, 0x5ACFD57C
    110 
    111     const z = x * x;
    112     const w = z * z;
    113     const r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6);
    114     const v = z * x;
    115     if (iy == 0) {
    116         return x + v * (S1 + z * r);
    117     } else {
    118         return x - ((z * (0.5 * y - v * r) - y) - v * S1);
    119     }
    120 }
    121 
    122 pub fn __sindf(x: f64) f32 {
    123     // |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]).
    124     const S1 = -0x15555554cbac77.0p-55; // -0.166666666416265235595
    125     const S2 = 0x111110896efbb2.0p-59; //  0.0083333293858894631756
    126     const S3 = -0x1a00f9e2cae774.0p-65; // -0.000198393348360966317347
    127     const S4 = 0x16cd878c3b46a7.0p-71; //  0.0000027183114939898219064
    128 
    129     // Try to optimize for parallel evaluation as in __tandf.c.
    130     const z = x * x;
    131     const w = z * z;
    132     const r = S3 + z * S4;
    133     const s = z * x;
    134     return @floatCast((x + s * (S1 + z * S2)) + s * w * r);
    135 }
    136 
    137 /// kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
    138 /// Input x is assumed to be bounded by ~pi/4 in magnitude.
    139 /// Input y is the tail of x.
    140 /// Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned.
    141 ///
    142 /// Algorithm
    143 ///      1. Since tan(-x) = -tan(x), we need only to consider positive x.
    144 ///      2. Callers must return tan(-0) = -0 without calling here since our
    145 ///         odd polynomial is not evaluated in a way that preserves -0.
    146 ///         Callers may do the optimization tan(x) ~ x for tiny x.
    147 ///      3. tan(x) is approximated by a odd polynomial of degree 27 on
    148 ///         [0,0.67434]
    149 ///                               3             27
    150 ///              tan(x) ~ x + T1*x + ... + T13*x
    151 ///         where
    152 ///
    153 ///              |tan(x)         2     4            26   |     -59.2
    154 ///              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
    155 ///              |  x                                    |
    156 ///
    157 ///         Note: tan(x+y) = tan(x) + tan'(x)*y
    158 ///                        ~ tan(x) + (1+x*x)*y
    159 ///         Therefore, for better accuracy in computing tan(x+y), let
    160 ///                   3      2      2       2       2
    161 ///              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
    162 ///         then
    163 ///                                  3    2
    164 ///              tan(x+y) = x + (T1*x + (x *(r+y)+y))
    165 ///
    166 ///      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
    167 ///              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
    168 ///                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
    169 pub fn __tan(x_: f64, y_: f64, odd: bool) f64 {
    170     var x = x_;
    171     var y = y_;
    172 
    173     const T = [_]f64{
    174         3.33333333333334091986e-01, // 3FD55555, 55555563
    175         1.33333333333201242699e-01, // 3FC11111, 1110FE7A
    176         5.39682539762260521377e-02, // 3FABA1BA, 1BB341FE
    177         2.18694882948595424599e-02, // 3F9664F4, 8406D637
    178         8.86323982359930005737e-03, // 3F8226E3, E96E8493
    179         3.59207910759131235356e-03, // 3F6D6D22, C9560328
    180         1.45620945432529025516e-03, // 3F57DBC8, FEE08315
    181         5.88041240820264096874e-04, // 3F4344D8, F2F26501
    182         2.46463134818469906812e-04, // 3F3026F7, 1A8D1068
    183         7.81794442939557092300e-05, // 3F147E88, A03792A6
    184         7.14072491382608190305e-05, // 3F12B80F, 32F0A7E9
    185         -1.85586374855275456654e-05, // BEF375CB, DB605373
    186         2.59073051863633712884e-05, // 3EFB2A70, 74BF7AD4
    187     };
    188     const pio4 = 7.85398163397448278999e-01; // 3FE921FB, 54442D18
    189     const pio4lo = 3.06161699786838301793e-17; // 3C81A626, 33145C07
    190 
    191     var z: f64 = undefined;
    192     var r: f64 = undefined;
    193     var v: f64 = undefined;
    194     var w: f64 = undefined;
    195     var s: f64 = undefined;
    196     var a: f64 = undefined;
    197     var w0: f64 = undefined;
    198     var a0: f64 = undefined;
    199     var hx: u32 = undefined;
    200     var sign: bool = undefined;
    201 
    202     hx = @intCast(@as(u64, @bitCast(x)) >> 32);
    203     const big = (hx & 0x7fffffff) >= 0x3FE59428; // |x| >= 0.6744
    204     if (big) {
    205         sign = hx >> 31 != 0;
    206         if (sign) {
    207             x = -x;
    208             y = -y;
    209         }
    210         x = (pio4 - x) + (pio4lo - y);
    211         y = 0.0;
    212     }
    213     z = x * x;
    214     w = z * z;
    215 
    216     // Break x^5*(T[1]+x^2*T[2]+...) into
    217     // x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
    218     // x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
    219     r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
    220     v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
    221     s = z * x;
    222     r = y + z * (s * (r + v) + y) + s * T[0];
    223     w = x + r;
    224     if (big) {
    225         s = @floatFromInt(1 - 2 * @as(i3, @intFromBool(odd)));
    226         v = s - 2.0 * (x + (r - w * w / (w + s)));
    227         return if (sign) -v else v;
    228     }
    229     if (!odd) {
    230         return w;
    231     }
    232     // -1.0/(x+r) has up to 2ulp error, so compute it accurately
    233     w0 = w;
    234     w0 = @bitCast(@as(u64, @bitCast(w0)) & 0xffffffff00000000);
    235     v = r - (w0 - x); // w0+v = r+x
    236     a = -1.0 / w;
    237     a0 = a;
    238     a0 = @bitCast(@as(u64, @bitCast(a0)) & 0xffffffff00000000);
    239     return a0 + a * (1.0 + a0 * w0 + a0 * v);
    240 }
    241 
    242 pub fn __tandf(x: f64, odd: bool) f32 {
    243     // |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]).
    244     const T = [_]f64{
    245         0x15554d3418c99f.0p-54, // 0.333331395030791399758
    246         0x1112fd38999f72.0p-55, // 0.133392002712976742718
    247         0x1b54c91d865afe.0p-57, // 0.0533812378445670393523
    248         0x191df3908c33ce.0p-58, // 0.0245283181166547278873
    249         0x185dadfcecf44e.0p-61, // 0.00297435743359967304927
    250         0x1362b9bf971bcd.0p-59, // 0.00946564784943673166728
    251     };
    252 
    253     const z = x * x;
    254     // Split up the polynomial into small independent terms to give
    255     // opportunities for parallel evaluation.  The chosen splitting is
    256     // micro-optimized for Athlons (XP, X64).  It costs 2 multiplications
    257     // relative to Horner's method on sequential machines.
    258     //
    259     // We add the small terms from lowest degree up for efficiency on
    260     // non-sequential machines (the lowest degree terms tend to be ready
    261     // earlier).  Apart from this, we don't care about order of
    262     // operations, and don't need to to care since we have precision to
    263     // spare.  However, the chosen splitting is good for accuracy too,
    264     // and would give results as accurate as Horner's method if the
    265     // small terms were added from highest degree down.
    266     const r = T[4] + z * T[5];
    267     const t = T[2] + z * T[3];
    268     const w = z * z;
    269     const s = z * x;
    270     const u = T[0] + z * T[1];
    271     const r0 = (x + s * u) + (s * w) * (t + w * r);
    272     return @floatCast(if (odd) -1.0 / r0 else r0);
    273 }