`compiler_rt`: Make `long double` trig impls non-generic - zig - fork of https://codeberg.org/ziglang/zig

commit 4ccac1de416bff8846352ef3bf2fb592c5a419a9 (tree)
parent ad10c7600793547393848528e7d4a608d9fde59e
Author: mihael <hi@mihaelm.com>
Date:   Thu,  2 Apr 2026 23:53:13 +0200

`compiler_rt`: Make `long double` trig impls non-generic

Diffstat:
M lib/compiler_rt/cos.zig  | 44 ++++++++++++++++++++++++++++++--------------
M lib/compiler_rt/sin.zig  | 47 +++++++++++++++++++++++++++++++++--------------
M lib/compiler_rt/sincos.zig  | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
M lib/compiler_rt/tan.zig  | 39 ++++++++++++++++++++++++---------------
M lib/compiler_rt/trig.zig  | 371 +++++++++++++++++++++++++++++++++++++++++--------------------------------------

5 files changed, 336 insertions(+), 236 deletions(-)
diff --git a/lib/compiler_rt/cos.zig b/lib/compiler_rt/cos.zig
@@ -123,36 +123,52 @@ pub fn cos(x: f64) callconv(.c) f64 {
     };
 }
 
-fn coslGeneric(comptime T: type, x: T) T {
+pub fn cosx(x: f80) callconv(.c) f80 {
     const se = ld.signExponent(x) & 0x7fff;
     if (se == 0x7fff) {
         return x - x;
     }
 
     if (@abs(x) < trig.pi_4) {
-        if (se < 0x3fff - math.floatMantissaBits(T)) {
+        if (se < 0x3fff - math.floatMantissaBits(f80)) {
             // raise inexact if x!=0
             return 1.0 + x;
         }
-        return trig.cosl(T, x, 0.0);
+        return trig.cosx(x, 0.0);
     }
 
-    var y: [2]T = undefined;
-    const n = rem_pio2l(T, x, &y);
+    var y: [2]f80 = undefined;
+    const n = rem_pio2l(f80, x, &y);
     return switch (n & 3) {
-        0 => trig.cosl(T, y[0], y[1]),
-        1 => -trig.sinl(T, y[0], y[1], 1),
-        2 => -trig.cosl(T, y[0], y[1]),
-        else => trig.sinl(T, y[0], y[1], 1),
+        0 => trig.cosx(y[0], y[1]),
+        1 => -trig.sinx(y[0], y[1], 1),
+        2 => -trig.cosx(y[0], y[1]),
+        else => trig.sinx(y[0], y[1], 1),
     };
 }
 
-pub fn cosx(x: f80) callconv(.c) f80 {
-    return coslGeneric(f80, x);
-}
-
 pub fn cosq(x: f128) callconv(.c) f128 {
-    return coslGeneric(f128, x);
+    const se = ld.signExponent(x) & 0x7fff;
+    if (se == 0x7fff) {
+        return x - x;
+    }
+
+    if (@abs(x) < trig.pi_4) {
+        if (se < 0x3fff - math.floatMantissaBits(f128)) {
+            // raise inexact if x!=0
+            return 1.0 + x;
+        }
+        return trig.cosq(x, 0.0);
+    }
+
+    var y: [2]f128 = undefined;
+    const n = rem_pio2l(f128, x, &y);
+    return switch (n & 3) {
+        0 => trig.cosq(y[0], y[1]),
+        1 => -trig.sinq(y[0], y[1], 1),
+        2 => -trig.cosq(y[0], y[1]),
+        else => trig.sinq(y[0], y[1], 1),
+    };
 }
 
 pub fn cosl(x: c_longdouble) callconv(.c) c_longdouble {
diff --git a/lib/compiler_rt/sin.zig b/lib/compiler_rt/sin.zig
@@ -133,39 +133,58 @@ pub fn sin(x: f64) callconv(.c) f64 {
     };
 }
 
-fn sinlGeneric(comptime T: type, x: T) T {
+fn sinx(x: f80) callconv(.c) f80 {
     const se = ld.signExponent(x) & 0x7fff;
     if (se == 0x7fff) {
         return x - x;
     }
 
     if (@abs(x) < trig.pi_4) {
-        if (se < 0x3fff - (math.floatMantissaBits(T) / 2)) {
+        if (se < 0x3fff - (math.floatMantissaBits(f80) / 2)) {
             // raise inexact if x!=0 and underflow if subnormal
             if (compiler_rt.want_float_exceptions) {
                 mem.doNotOptimizeAway(if (se == 0) x * 0x1p-120 else x + 0x1p120);
             }
             return x;
         }
-        return trig.sinl(T, x, 0.0, 0);
+        return trig.sinx(x, 0.0, 0);
     }
 
-    var y: [2]T = undefined;
-    const n = rem_pio2l(T, x, &y);
+    var y: [2]f80 = undefined;
+    const n = rem_pio2l(f80, x, &y);
     return switch (n & 3) {
-        0 => trig.sinl(T, y[0], y[1], 1),
-        1 => trig.cosl(T, y[0], y[1]),
-        2 => -trig.sinl(T, y[0], y[1], 1),
-        else => -trig.cosl(T, y[0], y[1]),
+        0 => trig.sinx(y[0], y[1], 1),
+        1 => trig.cosx(y[0], y[1]),
+        2 => -trig.sinx(y[0], y[1], 1),
+        else => -trig.cosx(y[0], y[1]),
     };
 }
 
-pub fn sinx(x: f80) callconv(.c) f80 {
-    return sinlGeneric(f80, x);
-}
-
 pub fn sinq(x: f128) callconv(.c) f128 {
-    return sinlGeneric(f128, x);
+    const se = ld.signExponent(x) & 0x7fff;
+    if (se == 0x7fff) {
+        return x - x;
+    }
+
+    if (@abs(x) < trig.pi_4) {
+        if (se < 0x3fff - (math.floatMantissaBits(f128) / 2)) {
+            // raise inexact if x!=0 and underflow if subnormal
+            if (compiler_rt.want_float_exceptions) {
+                mem.doNotOptimizeAway(if (se == 0) x * 0x1p-120 else x + 0x1p120);
+            }
+            return x;
+        }
+        return trig.sinq(x, 0.0, 0);
+    }
+
+    var y: [2]f128 = undefined;
+    const n = rem_pio2l(f128, x, &y);
+    return switch (n & 3) {
+        0 => trig.sinq(y[0], y[1], 1),
+        1 => trig.cosq(y[0], y[1]),
+        2 => -trig.sinq(y[0], y[1], 1),
+        else => -trig.cosq(y[0], y[1]),
+    };
 }
 
 pub fn sinl(x: c_longdouble) callconv(.c) c_longdouble {
diff --git a/lib/compiler_rt/sincos.zig b/lib/compiler_rt/sincos.zig
@@ -192,11 +192,7 @@ pub fn sincos(x: f64, r_sin: *f64, r_cos: *f64) callconv(.c) void {
     }
 }
 
-fn sincoslGeneric(comptime T: type, x: T, r_sin: *T, r_cos: *T) void {
-    if (T != f80 and T != f128) {
-        @compileError("`sincoslGeneric` implemented only for `f80` and `f128`, got: " ++ @typeName(T));
-    }
-
+pub fn sincosx(x: f80, r_sin: *f80, r_cos: *f80) callconv(.c) void {
     const se = ld.signExponent(x) & 0x7fff;
     if (se == 0x7fff) {
         const result = x - x;
@@ -206,7 +202,7 @@ fn sincoslGeneric(comptime T: type, x: T, r_sin: *T, r_cos: *T) void {
     }
 
     if (@abs(x) < trig.pi_4) {
-        if (se < 0x3fff - math.floatMantissaBits(T)) {
+        if (se < 0x3fff - math.floatMantissaBits(f80)) {
             // raise underflow if subnormal
             if (compiler_rt.want_float_exceptions and se == 0) {
                 mem.doNotOptimizeAway(x * 0x1p-120);
@@ -216,15 +212,15 @@ fn sincoslGeneric(comptime T: type, x: T, r_sin: *T, r_cos: *T) void {
             r_cos.* = 1.0 + x;
             return;
         }
-        r_sin.* = trig.sinl(T, x, 0.0, 0);
-        r_cos.* = trig.cosl(T, x, 0.0);
+        r_sin.* = trig.sinx(x, 0.0, 0);
+        r_cos.* = trig.cosx(x, 0.0);
         return;
     }
 
-    var y: [2]T = undefined;
-    const n = rem_pio2l(T, x, &y);
-    const s = trig.sinl(T, y[0], y[1], 1);
-    const c = trig.cosl(T, y[0], y[1]);
+    var y: [2]f80 = undefined;
+    const n = rem_pio2l(f80, x, &y);
+    const s = trig.sinx(y[0], y[1], 1);
+    const c = trig.cosx(y[0], y[1]);
     switch (n & 3) {
         0 => {
             r_sin.* = s;
@@ -245,12 +241,53 @@ fn sincoslGeneric(comptime T: type, x: T, r_sin: *T, r_cos: *T) void {
     }
 }
 
-pub fn sincosx(x: f80, r_sin: *f80, r_cos: *f80) callconv(.c) void {
-    return sincoslGeneric(f80, x, r_sin, r_cos);
-}
-
 pub fn sincosq(x: f128, r_sin: *f128, r_cos: *f128) callconv(.c) void {
-    return sincoslGeneric(f128, x, r_sin, r_cos);
+    const se = ld.signExponent(x) & 0x7fff;
+    if (se == 0x7fff) {
+        const result = x - x;
+        r_sin.* = result;
+        r_cos.* = result;
+        return;
+    }
+
+    if (@abs(x) < trig.pi_4) {
+        if (se < 0x3fff - math.floatMantissaBits(f128)) {
+            // raise underflow if subnormal
+            if (compiler_rt.want_float_exceptions and se == 0) {
+                mem.doNotOptimizeAway(x * 0x1p-120);
+            }
+            r_sin.* = x;
+            // raise inexact if x!=0
+            r_cos.* = 1.0 + x;
+            return;
+        }
+        r_sin.* = trig.sinq(x, 0.0, 0);
+        r_cos.* = trig.cosq(x, 0.0);
+        return;
+    }
+
+    var y: [2]f128 = undefined;
+    const n = rem_pio2l(f128, x, &y);
+    const s = trig.sinq(y[0], y[1], 1);
+    const c = trig.cosq(y[0], y[1]);
+    switch (n & 3) {
+        0 => {
+            r_sin.* = s;
+            r_cos.* = c;
+        },
+        1 => {
+            r_sin.* = c;
+            r_cos.* = -s;
+        },
+        2 => {
+            r_sin.* = -s;
+            r_cos.* = -c;
+        },
+        else => {
+            r_sin.* = -c;
+            r_cos.* = s;
+        },
+    }
 }
 
 pub fn sincosl(x: c_longdouble, r_sin: *c_longdouble, r_cos: *c_longdouble) callconv(.c) void {
diff --git a/lib/compiler_rt/tan.zig b/lib/compiler_rt/tan.zig
@@ -120,37 +120,46 @@ pub fn tan(x: f64) callconv(.c) f64 {
     return kernel.tan(y[0], y[1], n & 1 != 0);
 }
 
-fn tanlGeneric(comptime T: type, x: T) T {
-    if (!(T == f80 or T == f128)) {
-        @compileError("`tanlGeneric` implemented only for `f80` and `f128`, got: " ++ T);
-    }
-
+pub fn tanx(x: f80) callconv(.c) f80 {
     const se = ld.signExponent(x) & 0x7fff;
     if (se == 0x7fff) {
         return x - x;
     }
 
     if (@abs(x) < kernel.pi_4) {
-        if (se < 0x3fff - math.floatMantissaBits(T) / 2) {
+        if (se < 0x3fff - math.floatMantissaBits(f80) / 2) {
             if (compiler_rt.want_float_exceptions) {
                 mem.doNotOptimizeAway(if (se == 0) x * 0x1p-120 else x + 0x1p120);
             }
             return x;
         }
-        return kernel.tanl(T, x, 0.0, 0);
+        return kernel.tanx(x, 0.0, 0);
     }
 
-    var y: [2]T = undefined;
-    const n = rem_pio2l(T, x, &y);
-    return kernel.tanl(T, y[0], y[1], n & 1);
-}
-
-pub fn tanx(x: f80) callconv(.c) f80 {
-    return tanlGeneric(f80, x);
+    var y: [2]f80 = undefined;
+    const n = rem_pio2l(f80, x, &y);
+    return kernel.tanx(y[0], y[1], n & 1);
 }
 
 pub fn tanq(x: f128) callconv(.c) f128 {
-    return tanlGeneric(f128, x);
+    const se = ld.signExponent(x) & 0x7fff;
+    if (se == 0x7fff) {
+        return x - x;
+    }
+
+    if (@abs(x) < kernel.pi_4) {
+        if (se < 0x3fff - math.floatMantissaBits(f128) / 2) {
+            if (compiler_rt.want_float_exceptions) {
+                mem.doNotOptimizeAway(if (se == 0) x * 0x1p-120 else x + 0x1p120);
+            }
+            return x;
+        }
+        return kernel.tanq(x, 0.0, 0);
+    }
+
+    var y: [2]f128 = undefined;
+    const n = rem_pio2l(f128, x, &y);
+    return kernel.tanq(y[0], y[1], n & 1);
 }
 
 pub fn tanl(x: c_longdouble) callconv(.c) c_longdouble {
diff --git a/lib/compiler_rt/trig.zig b/lib/compiler_rt/trig.zig
@@ -80,49 +80,43 @@ pub fn cosdf(x: f64) f32 {
     return @floatCast(((1.0 + z * C0) + w * C1) + (w * z) * r);
 }
 
-pub fn cosl(comptime T: type, x: T, y: T) T {
-    const impl = switch (T) {
-        f80 => struct {
-            const C1: T = 0.0416666666666666666136;
-
-            const C2: f64 = -0.0013888888888888874;
-            const C3: f64 = 0.000024801587301571716;
-            const C4: f64 = -0.00000027557319215507120;
-            const C5: f64 = 0.0000000020876754400407278;
-            const C6: f64 = -1.1470297442401303e-11;
-            const C7: f64 = 4.7383039476436467e-14;
-
-            inline fn poly(z: T) T {
-                return z * (C1 + z * (C2 + z * (C3 + z * (C4 +
-                    z * (C5 + z * (C6 + z * C7))))));
-            }
-        },
-        f128 => struct {
-            const C1: T = 0.04166666666666666666666666666666658424671;
-            const C2: T = -0.001388888888888888888888888888863490893732;
-            const C3: T = 0.00002480158730158730158730158600795304914210;
-            const C4: T = -0.2755731922398589065255474947078934284324e-6;
-            const C5: T = 0.2087675698786809897659225313136400793948e-8;
-            const C6: T = -0.1147074559772972315817149986812031204775e-10;
-            const C7: T = 0.4779477332386808976875457937252120293400e-13;
-
-            const C8: f64 = -0.1561920696721507929516718307820958119868e-15;
-            const C9: f64 = 0.4110317413744594971475941557607804508039e-18;
-            const C10: f64 = -0.8896592467191938803288521958313920156409e-21;
-            const C11: f64 = 0.1601061435794535138244346256065192782581e-23;
-
-            inline fn poly(z: T) T {
-                return z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * (C6 +
-                    z * (C7 + z * (C8 + z * (C9 + z * (C10 + z * C11))))))))));
-            }
-        },
-        else => @compileError("cosl supports only f80 and f128, got: " ++ @typeName(T)),
-    };
+pub fn cosx(x: f80, y: f80) f80 {
+    const C1: f80 = 0.0416666666666666666136;
+    const C2: f64 = -0.0013888888888888874;
+    const C3: f64 = 0.000024801587301571716;
+    const C4: f64 = -0.00000027557319215507120;
+    const C5: f64 = 0.0000000020876754400407278;
+    const C6: f64 = -1.1470297442401303e-11;
+    const C7: f64 = 4.7383039476436467e-14;
 
     const z = x * x;
-    const r = impl.poly(z);
+    const r = z * (C1 + z * (C2 + z * (C3 + z * (C4 +
+        z * (C5 + z * (C6 + z * C7))))));
     const hz = 0.5 * z;
     const w = 1.0 - hz;
+
+    return w + (((1.0 - w) - hz) + (z * r - x * y));
+}
+
+pub fn cosq(x: f128, y: f128) f128 {
+    const C1: f128 = 0.04166666666666666666666666666666658424671;
+    const C2: f128 = -0.001388888888888888888888888888863490893732;
+    const C3: f128 = 0.00002480158730158730158730158600795304914210;
+    const C4: f128 = -0.2755731922398589065255474947078934284324e-6;
+    const C5: f128 = 0.2087675698786809897659225313136400793948e-8;
+    const C6: f128 = -0.1147074559772972315817149986812031204775e-10;
+    const C7: f128 = 0.4779477332386808976875457937252120293400e-13;
+    const C8: f64 = -0.1561920696721507929516718307820958119868e-15;
+    const C9: f64 = 0.4110317413744594971475941557607804508039e-18;
+    const C10: f64 = -0.8896592467191938803288521958313920156409e-21;
+    const C11: f64 = 0.1601061435794535138244346256065192782581e-23;
+
+    const z = x * x;
+    const r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * (C6 +
+        z * (C7 + z * (C8 + z * (C9 + z * (C10 + z * C11))))))))));
+    const hz = 0.5 * z;
+    const w = 1.0 - hz;
+
     return w + (((1.0 - w) - hz) + (z * r - x * y));
 }
 
@@ -172,58 +166,6 @@ pub fn sin(x: f64, y: f64, iy: i32) f64 {
     }
 }
 
-pub fn sinl(comptime T: type, x: T, y: T, iy: i32) T {
-    const impl = switch (T) {
-        f80 => struct {
-            const S1: T = -0.166666666666666666671;
-
-            const S2: f64 = 0.0083333333333333332;
-            const S3: f64 = -0.00019841269841269427;
-            const S4: f64 = 0.0000027557319223597490;
-            const S5: f64 = -0.000000025052108218074604;
-            const S6: f64 = 1.6059006598854211e-10;
-            const S7: f64 = -7.6429779983024564e-13;
-            const S8: f64 = 2.6174587166648325e-15;
-
-            inline fn poly(z: T) T {
-                return S2 + z * (S3 + z * (S4 + z * (S5 +
-                    z * (S6 + z * (S7 + z * S8)))));
-            }
-        },
-        f128 => struct {
-            const S1: T = -0.16666666666666666666666666666666666606732416116558;
-            const S2: T = 0.0083333333333333333333333333333331135404851288270047;
-            const S3: T = -0.00019841269841269841269841269839935785325638310428717;
-            const S4: T = 0.27557319223985890652557316053039946268333231205686e-5;
-            const S5: T = -0.25052108385441718775048214826384312253862930064745e-7;
-            const S6: T = 0.16059043836821614596571832194524392581082444805729e-9;
-            const S7: T = -0.76471637318198151807063387954939213287488216303768e-12;
-            const S8: T = 0.28114572543451292625024967174638477283187397621303e-14;
-
-            const S9: f64 = -0.82206352458348947812512122163446202498005154296863e-17;
-            const S10: f64 = 0.19572940011906109418080609928334380560135358385256e-19;
-            const S11: f64 = -0.38680813379701966970673724299207480965452616911420e-22;
-            const S12: f64 = 0.64038150078671872796678569586315881020659912139412e-25;
-
-            inline fn poly(z: T) T {
-                return S2 + z * (S3 + z * (S4 + z * (S5 + z * (S6 + z * (S7 + z * (S8 +
-                    z * (S9 + z * (S10 + z * (S11 + z * S12)))))))));
-            }
-        },
-        else => @compileError("sinl supports only f80 and f128, got: " ++ @typeName(T)),
-    };
-
-    const z = x * x;
-    const v = z * x;
-    const r = impl.poly(z);
-
-    if (iy == 0) {
-        return x + v * (impl.S1 + z * r);
-    }
-
-    return x - ((z * (0.5 * y - v * r) - y) - v * impl.S1);
-}
-
 pub fn sindf(x: f64) f32 {
     // |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]).
     const S1 = -0x15555554cbac77.0p-55; // -0.166666666416265235595
@@ -239,6 +181,52 @@ pub fn sindf(x: f64) f32 {
     return @floatCast((x + s * (S1 + z * S2)) + s * w * r);
 }
 
+pub fn sinx(x: f80, y: f80, iy: i32) f80 {
+    const S1: f80 = -0.166666666666666666671;
+    const S2: f64 = 0.0083333333333333332;
+    const S3: f64 = -0.00019841269841269427;
+    const S4: f64 = 0.0000027557319223597490;
+    const S5: f64 = -0.000000025052108218074604;
+    const S6: f64 = 1.6059006598854211e-10;
+    const S7: f64 = -7.6429779983024564e-13;
+    const S8: f64 = 2.6174587166648325e-15;
+
+    const z = x * x;
+    const v = z * x;
+    const r = S2 + z * (S3 + z * (S4 + z * (S5 +
+        z * (S6 + z * (S7 + z * S8)))));
+
+    if (iy == 0)
+        return x + v * (S1 + z * r);
+
+    return x - ((z * (0.5 * y - v * r) - y) - v * S1);
+}
+
+pub fn sinq(x: f128, y: f128, iy: i32) f128 {
+    const S1: f128 = -0.16666666666666666666666666666666666606732416116558;
+    const S2: f128 = 0.0083333333333333333333333333333331135404851288270047;
+    const S3: f128 = -0.00019841269841269841269841269839935785325638310428717;
+    const S4: f128 = 0.27557319223985890652557316053039946268333231205686e-5;
+    const S5: f128 = -0.25052108385441718775048214826384312253862930064745e-7;
+    const S6: f128 = 0.16059043836821614596571832194524392581082444805729e-9;
+    const S7: f128 = -0.76471637318198151807063387954939213287488216303768e-12;
+    const S8: f128 = 0.28114572543451292625024967174638477283187397621303e-14;
+    const S9: f64 = -0.82206352458348947812512122163446202498005154296863e-17;
+    const S10: f64 = 0.19572940011906109418080609928334380560135358385256e-19;
+    const S11: f64 = -0.38680813379701966970673724299207480965452616911420e-22;
+    const S12: f64 = 0.64038150078671872796678569586315881020659912139412e-25;
+
+    const z = x * x;
+    const v = z * x;
+    const r = S2 + z * (S3 + z * (S4 + z * (S5 + z * (S6 + z * (S7 + z * (S8 +
+        z * (S9 + z * (S10 + z * (S11 + z * S12)))))))));
+
+    if (iy == 0)
+        return x + v * (S1 + z * r);
+
+    return x - ((z * (0.5 * y - v * r) - y) - v * S1);
+}
+
 /// kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
 /// Input x is assumed to be bounded by ~pi/4 in magnitude.
 /// Input y is the tail of x.
@@ -377,88 +365,112 @@ pub fn tandf(x: f64, odd: bool) f32 {
     return @floatCast(if (odd) -1.0 / r0 else r0);
 }
 
-pub fn tanl(comptime T: type, x_: T, y_: T, odd: i32) T {
+pub fn tanx(x_: f80, y_: f80, odd: i32) f80 {
+    const pio4: f80 = 0.785398163397448309628;
+    const pio4lo: f80 = -1.25413940316708300586e-20;
+
+    const T3: f80 = 0.333333333333333333180;
+    const T5: f80 = 0.133333333333333372290;
+    const T7: f80 = 0.0539682539682504975744;
+    const T9: f64 = 0.021869488536312216;
+    const T11: f64 = 0.0088632355256619590;
+    const T13: f64 = 0.0035921281113786528;
+    const T15: f64 = 0.0014558334756312418;
+    const T17: f64 = 0.00059003538700862256;
+    const T19: f64 = 0.00023907843576635544;
+    const T21: f64 = 0.000097154625656538905;
+    const T23: f64 = 0.000038440165747303162;
+    const T25: f64 = 0.000018082171885432524;
+    const T27: f64 = 0.0000024196006108814377;
+    const T29: f64 = 0.0000078293456938132840;
+    const T31: f64 = -0.0000032609076735050182;
+    const T33: f64 = 0.0000023261313142559411;
+
+    var x = x_;
+    var y = y_;
+    const big = @abs(x) >= 0.67434;
+    var sign: i8 = 0;
+
+    if (big) {
+        if (x < 0) {
+            sign = -1;
+            x = -x;
+            y = -y;
+        }
+        x = (pio4 - x) + (pio4lo - y);
+        y = 0.0;
+    }
+
+    var z = x * x;
+    var w = z * z;
+
+    var r = T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 +
+        w * (T25 + w * (T29 + w * T33))))));
+
+    var v = z * (T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 +
+        w * (T27 + w * T31))))));
+
+    var s = z * x;
+    r = y + z * (s * (r + v) + y) + T3 * s;
+    w = x + r;
+
+    if (big) {
+        s = @as(f80, @floatFromInt(1 - 2 * odd));
+        v = s - 2.0 * (x + (r - w * w / (w + s)));
+        return if (sign == -1) -v else v;
+    }
+
+    if (odd == 0) {
+        return w;
+    }
+
+    // if allow error up to 2 ulp, simply return
+    // -1.0 / (x+r) here
+    //
+    // compute -1.0 / (x+r) accurately
+    z = w + 0x1p32 - 0x1p32;
+    v = r - (z - x);
+    const a = -1.0 / w;
+    const t = a + 0x1p32 - 0x1p32;
+    s = 1.0 + t * z;
+    return t + a * (s + t * v);
+}
+
+pub fn tanq(x_: f128, y_: f128, odd: i32) f128 {
+    const pio4: f128 = 0x1.921fb54442d18469898cc51701b8p-1;
+    const pio4lo: f128 = 0x1.cd129024e088a67cc74020bbea60p-116;
+
+    const T3: f128 = 0x1.5555555555555555555555555553p-2;
+    const T5: f128 = 0x1.1111111111111111111111111eb5p-3;
+    const T7: f128 = 0x1.ba1ba1ba1ba1ba1ba1ba1b694cd6p-5;
+    const T9: f128 = 0x1.664f4882c10f9f32d6bbe09d8bcdp-6;
+    const T11: f128 = 0x1.226e355e6c23c8f5b4f5762322eep-7;
+    const T13: f128 = 0x1.d6d3d0e157ddfb5fed8e84e27b37p-9;
+    const T15: f128 = 0x1.7da36452b75e2b5fce9ee7c2c92ep-10;
+    const T17: f128 = 0x1.355824803674477dfcf726649efep-11;
+    const T19: f128 = 0x1.f57d7734d1656e0aceb716f614c2p-13;
+    const T21: f128 = 0x1.967e18afcb180ed942dfdc518d6cp-14;
+    const T23: f128 = 0x1.497d8eea21e95bc7e2aa79b9f2cdp-15;
+    const T25: f128 = 0x1.0b132d39f055c81be49eff7afd50p-16;
+    const T27: f128 = 0x1.b0f72d33eff7bfa2fbc1059d90b6p-18;
+    const T29: f128 = 0x1.5ef2daf21d1113df38d0fbc00267p-19;
+    const T31: f128 = 0x1.1c77d6eac0234988cdaa04c96626p-20;
+    const T33: f128 = 0x1.cd2a5a292b180e0bdd701057dfe3p-22;
+    const T35: f128 = 0x1.75c7357d0298c01a31d0a6f7d518p-23;
+    const T37: f128 = 0x1.2f3190f4718a9a520f98f50081fcp-24;
+    const T39: f64 = 0.000000028443389121318352;
+    const T41: f64 = 0.000000011981013102001973;
+    const T43: f64 = 0.0000000038303578044958070;
+    const T45: f64 = 0.0000000034664378216909893;
+    const T47: f64 = -0.0000000015090641701997785;
+    const T49: f64 = 0.0000000029449552300483952;
+    const T51: f64 = -0.0000000022006995706097711;
+    const T53: f64 = 0.0000000015468200913196612;
+    const T55: f64 = -0.00000000061311613386849674;
+    const T57: f64 = 1.4912469681508012e-10;
+
     var x = x_;
     var y = y_;
-    const impl = switch (T) {
-        f80 => struct {
-            const pio4: T = 0.785398163397448309628;
-            const pio4lo: T = -1.25413940316708300586e-20;
-
-            const T3: T = 0.333333333333333333180;
-            const T5: T = 0.133333333333333372290;
-            const T7: T = 0.0539682539682504975744;
-            const T9: f64 = 0.021869488536312216;
-            const T11: f64 = 0.0088632355256619590;
-            const T13: f64 = 0.0035921281113786528;
-            const T15: f64 = 0.0014558334756312418;
-            const T17: f64 = 0.00059003538700862256;
-            const T19: f64 = 0.00023907843576635544;
-            const T21: f64 = 0.000097154625656538905;
-            const T23: f64 = 0.000038440165747303162;
-            const T25: f64 = 0.000018082171885432524;
-            const T27: f64 = 0.0000024196006108814377;
-            const T29: f64 = 0.0000078293456938132840;
-            const T31: f64 = -0.0000032609076735050182;
-            const T33: f64 = 0.0000023261313142559411;
-
-            inline fn rpoly(w: T) T {
-                return T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 +
-                    w * (T25 + w * (T29 + w * T33))))));
-            }
-
-            inline fn vpoly(w: T) T {
-                return T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 +
-                    w * (T27 + w * T31)))));
-            }
-        },
-        f128 => struct {
-            const pio4: T = 0x1.921fb54442d18469898cc51701b8p-1;
-            const pio4lo: T = 0x1.cd129024e088a67cc74020bbea60p-116;
-
-            const T3: T = 0x1.5555555555555555555555555553p-2;
-            const T5: T = 0x1.1111111111111111111111111eb5p-3;
-            const T7: T = 0x1.ba1ba1ba1ba1ba1ba1ba1b694cd6p-5;
-            const T9: T = 0x1.664f4882c10f9f32d6bbe09d8bcdp-6;
-            const T11: T = 0x1.226e355e6c23c8f5b4f5762322eep-7;
-            const T13: T = 0x1.d6d3d0e157ddfb5fed8e84e27b37p-9;
-            const T15: T = 0x1.7da36452b75e2b5fce9ee7c2c92ep-10;
-            const T17: T = 0x1.355824803674477dfcf726649efep-11;
-            const T19: T = 0x1.f57d7734d1656e0aceb716f614c2p-13;
-            const T21: T = 0x1.967e18afcb180ed942dfdc518d6cp-14;
-            const T23: T = 0x1.497d8eea21e95bc7e2aa79b9f2cdp-15;
-            const T25: T = 0x1.0b132d39f055c81be49eff7afd50p-16;
-            const T27: T = 0x1.b0f72d33eff7bfa2fbc1059d90b6p-18;
-            const T29: T = 0x1.5ef2daf21d1113df38d0fbc00267p-19;
-            const T31: T = 0x1.1c77d6eac0234988cdaa04c96626p-20;
-            const T33: T = 0x1.cd2a5a292b180e0bdd701057dfe3p-22;
-            const T35: T = 0x1.75c7357d0298c01a31d0a6f7d518p-23;
-            const T37: T = 0x1.2f3190f4718a9a520f98f50081fcp-24;
-            const T39: f64 = 0.000000028443389121318352;
-            const T41: f64 = 0.000000011981013102001973;
-            const T43: f64 = 0.0000000038303578044958070;
-            const T45: f64 = 0.0000000034664378216909893;
-            const T47: f64 = -0.0000000015090641701997785;
-            const T49: f64 = 0.0000000029449552300483952;
-            const T51: f64 = -0.0000000022006995706097711;
-            const T53: f64 = 0.0000000015468200913196612;
-            const T55: f64 = -0.00000000061311613386849674;
-            const T57: f64 = 1.4912469681508012e-10;
-
-            inline fn rpoly(w: T) T {
-                return T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 +
-                    w * (T25 + w * (T29 + w * (T33 + w * (T37 + w * (T41 +
-                        w * (T45 + w * (T49 + w * (T53 + w * T57))))))))))));
-            }
-
-            inline fn vpoly(w: T) T {
-                return T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 +
-                    w * (T27 + w * (T31 + w * (T35 + w * (T39 + w * (T43 +
-                        w * (T47 + w * (T51 + w * T55)))))))))));
-            }
-        },
-        else => @compileError("tanl supports only f80 and f128, got: " ++ @typeName(T)),
-    };
 
     const big = @abs(x) >= 0.67434;
     var sign: i8 = 0;
@@ -469,20 +481,27 @@ pub fn tanl(comptime T: type, x_: T, y_: T, odd: i32) T {
             x = -x;
             y = -y;
         }
-        x = (impl.pio4 - x) + (impl.pio4lo - y);
+        x = (pio4 - x) + (pio4lo - y);
         y = 0.0;
     }
 
     var z = x * x;
     var w = z * z;
-    var r = impl.rpoly(w);
-    var v = z * impl.vpoly(w);
+
+    var r = T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 +
+        w * (T25 + w * (T29 + w * (T33 + w * (T37 + w * (T41 +
+            w * (T45 + w * (T49 + w * (T53 + w * T57))))))))))));
+
+    var v = z * (T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 +
+        w * (T27 + w * (T31 + w * (T35 + w * (T39 + w * (T43 +
+            w * (T47 + w * (T51 + w * T55))))))))))));
+
     var s = z * x;
-    r = y + z * (s * (r + v) + y) + impl.T3 * s;
+    r = y + z * (s * (r + v) + y) + T3 * s;
     w = x + r;
 
     if (big) {
-        s = @as(T, @floatFromInt(1 - 2 * odd));
+        s = @as(f128, @floatFromInt(1 - 2 * odd));
         v = s - 2.0 * (x + (r - w * w / (w + s)));
         return if (sign == -1) -v else v;
     }

	zig fork of https://codeberg.org/ziglang/zig
	Log \| Files \| Refs \| README \| LICENSE

M	lib/compiler_rt/cos.zig	\|	44	++++++++++++++++++++++++++++++--------------
M	lib/compiler_rt/sin.zig	\|	47	+++++++++++++++++++++++++++++++++--------------
M	lib/compiler_rt/sincos.zig	\|	71	++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
M	lib/compiler_rt/tan.zig	\|	39	++++++++++++++++++++++++---------------
M	lib/compiler_rt/trig.zig	\|	371	+++++++++++++++++++++++++++++++++++++++++--------------------------------------