commit ffd6f6cc6e05e84a4e029b02ff13a3e84cbf1aa4 (tree)
parent c76644caf3051dd2f837a6c40987a64e01b09c54
Author: mihael <hi@mihaelm.com>
Date: Tue, 24 Mar 2026 01:11:56 +0100
`libzigc/math`: Implement more precise `tanl` in `compiler_rt`
The logic was more or less ported from `musl`, with small adjustments
where it was convenient. The 'internal' `__tanl` function was implemented
in the `trig.zig` module along with other 'internal' trigonometric functions.
Now, the `tanl` implementation is precise enough to pass all the relevant `libc-test` suite tests:
```
$ ./build/stage3/bin/zig build -p stage4 -Denable-llvm -Dno-lib
$ stage4/bin/zig build test-libc -Dlibc-test-path=<LIBC-TEST-PATH> -Dtest-filter=tanl -fqemu -fwasmtime --summary line
Build Summary: 553/553 steps succeeded
```
The unit tests were also extended to include cases for `f80` and `f128`, and they're passing.
Diffstat:
2 files changed, 223 insertions(+), 33 deletions(-)
diff --git a/lib/compiler_rt/tan.zig b/lib/compiler_rt/tan.zig
@@ -3,6 +3,7 @@
//!
//! https://git.musl-libc.org/cgit/musl/tree/src/math/tanf.c
//! https://git.musl-libc.org/cgit/musl/tree/src/math/tan.c
+//! https://git.musl-libc.org/cgit/musl/tree/src/math/tanl.c
//! https://golang.org/src/math/tan.go
const std = @import("std");
@@ -10,10 +11,13 @@ const builtin = @import("builtin");
const math = std.math;
const mem = std.mem;
const expect = std.testing.expect;
+const expectApproxEqAbs = std.testing.expectApproxEqAbs;
const kernel = @import("trig.zig");
const rem_pio2 = @import("rem_pio2.zig").rem_pio2;
const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f;
+const rem_pio2l = @import("rem_pio2l.zig").rem_pio2l;
+const utils = @import("math_utils.zig");
const arch = builtin.cpu.arch;
const compiler_rt = @import("../compiler_rt.zig");
@@ -116,14 +120,38 @@ pub fn tan(x: f64) callconv(.c) f64 {
return kernel.__tan(y[0], y[1], n & 1 != 0);
}
+fn tanlGeneric(comptime T: type, x: T) T {
+ if (!(T == f80 or T == f128)) {
+ @compileError("`tanlGeneric` implemented only for `f80` and `f128`, got: " ++ T);
+ }
+
+ const se = utils.ldSignExponent(x) & 0x7fff;
+ if (se == 0x7fff) {
+ return x - x;
+ }
+
+ const pi_4 = 0.78539816339744830962;
+ if (@abs(x) < pi_4) {
+ if (se < 0x3fff - math.floatMantissaBits(T) / 2) {
+ if (compiler_rt.want_float_exceptions) {
+ mem.doNotOptimizeAway(if (se == 0) x * 0x1p-120 else x + 0x1p120);
+ }
+ return x;
+ }
+ return kernel.__tanl(T, x, 0.0, 0);
+ }
+
+ var y: [2]T = undefined;
+ const n = rem_pio2l(T, x, &y);
+ return kernel.__tanl(T, y[0], y[1], n & 1);
+}
+
pub fn __tanx(x: f80) callconv(.c) f80 {
- // TODO: more efficient implementation
- return @floatCast(tanq(x));
+ return tanlGeneric(f80, x);
}
pub fn tanq(x: f128) callconv(.c) f128 {
- // TODO: more correct implementation
- return tan(@floatCast(x));
+ return tanlGeneric(f128, x);
}
pub fn tanl(x: c_longdouble) callconv(.c) c_longdouble {
@@ -137,45 +165,80 @@ pub fn tanl(x: c_longdouble) callconv(.c) c_longdouble {
}
}
-test "tan" {
- try expect(tan(@as(f32, 0.0)) == tanf(0.0));
- try expect(tan(@as(f64, 0.0)) == tan(0.0));
+fn testTanNormal(comptime T: type) !void {
+ const f = switch (T) {
+ f32 => tanf,
+ f64 => tan,
+ else => @compileError("unimplemented"),
+ };
+ const epsilon = 0.00001;
+
+ try expectApproxEqAbs(@as(T, 0.0), f(0.0), epsilon);
+ try expectApproxEqAbs(@as(T, 0.202710), f(0.2), epsilon);
+ try expectApproxEqAbs(@as(T, 1.240422), f(0.8923), epsilon);
+ try expectApproxEqAbs(@as(T, 14.101420), f(1.5), epsilon);
+ try expectApproxEqAbs(@as(T, -0.254397), f(37.45), epsilon);
+ try expectApproxEqAbs(@as(T, 2.285837), f(89.123), epsilon);
}
-test "tan32" {
- const epsilon = 0.00001;
+fn testTanSpecial(comptime T: type) !void {
+ const f = switch (T) {
+ f32 => tanf,
+ f64 => tan,
+ f80 => __tanx,
+ f128 => tanq,
+ else => @compileError("unimplemented"),
+ };
+
+ try expect(math.isPositiveZero(f(0.0)));
+ try expect(math.isNegativeZero(f(-0.0)));
+ try expect(math.isNan(f(math.inf(f32))));
+ try expect(math.isNan(f(-math.inf(f32))));
+ try expect(math.isNan(f(math.nan(f32))));
+}
- try expect(math.approxEqAbs(f32, tanf(0.0), 0.0, epsilon));
- try expect(math.approxEqAbs(f32, tanf(0.2), 0.202710, epsilon));
- try expect(math.approxEqAbs(f32, tanf(0.8923), 1.240422, epsilon));
- try expect(math.approxEqAbs(f32, tanf(1.5), 14.101420, epsilon));
- try expect(math.approxEqAbs(f32, tanf(37.45), -0.254397, epsilon));
- try expect(math.approxEqAbs(f32, tanf(89.123), 2.285852, epsilon));
+test "tan32.normal" {
+ try testTanNormal(f32);
}
-test "tan64" {
- const epsilon = 0.000001;
+test "tan64.normal" {
+ try testTanNormal(f64);
+}
+
+test "tan80.normal" {
+ const epsilon = math.floatEps(f80);
- try expect(math.approxEqAbs(f64, tan(0.0), 0.0, epsilon));
- try expect(math.approxEqAbs(f64, tan(0.2), 0.202710, epsilon));
- try expect(math.approxEqAbs(f64, tan(0.8923), 1.240422, epsilon));
- try expect(math.approxEqAbs(f64, tan(1.5), 14.101420, epsilon));
- try expect(math.approxEqAbs(f64, tan(37.45), -0.254397, epsilon));
- try expect(math.approxEqAbs(f64, tan(89.123), 2.2858376, epsilon));
+ try expectApproxEqAbs(@as(f80, 0.0), __tanx(0.0), epsilon);
+ try expectApproxEqAbs(@as(f80, 0.2027100355086724833213582716475345), __tanx(0.2), epsilon);
+ try expectApproxEqAbs(@as(f80, 1.2404217445497097995561220131857544), __tanx(0.8923), epsilon);
+ try expectApproxEqAbs(@as(f80, 14.10141994717171938764), __tanx(1.5), epsilon);
+ try expectApproxEqAbs(@as(f80, -0.25439607116885656232), __tanx(37.45), epsilon);
+ try expectApproxEqAbs(@as(f80, 2.2858376251355320963), __tanx(89.123), epsilon);
+}
+
+test "tan128.normal" {
+ const epsilon = math.floatEps(f128);
+
+ try expectApproxEqAbs(@as(f128, 0.0), tanq(0.0), epsilon);
+ try expectApproxEqAbs(@as(f128, 0.2027100355086724833213582716475345), tanq(0.2), epsilon);
+ try expectApproxEqAbs(@as(f128, 1.2404217445497097995561220131857544), tanq(0.8923), epsilon);
+ try expectApproxEqAbs(@as(f128, 14.101419947171719387646083651987755), tanq(1.5), epsilon);
+ try expectApproxEqAbs(@as(f128, -0.2543960711688565630469573224504774), tanq(37.45), epsilon);
+ try expectApproxEqAbs(@as(f128, 2.2858376251355321074066028114094292), tanq(89.123), epsilon);
}
test "tan32.special" {
- try expect(tanf(0.0) == 0.0);
- try expect(tanf(-0.0) == -0.0);
- try expect(math.isNan(tanf(math.inf(f32))));
- try expect(math.isNan(tanf(-math.inf(f32))));
- try expect(math.isNan(tanf(math.nan(f32))));
+ try testTanSpecial(f32);
}
test "tan64.special" {
- try expect(tan(0.0) == 0.0);
- try expect(tan(-0.0) == -0.0);
- try expect(math.isNan(tan(math.inf(f64))));
- try expect(math.isNan(tan(-math.inf(f64))));
- try expect(math.isNan(tan(math.nan(f64))));
+ try testTanSpecial(f64);
+}
+
+test "tan80.special" {
+ try testTanSpecial(f80);
+}
+
+test "tan128.special" {
+ try testTanSpecial(f128);
}
diff --git a/lib/compiler_rt/trig.zig b/lib/compiler_rt/trig.zig
@@ -7,6 +7,7 @@
// https://git.musl-libc.org/cgit/musl/tree/src/math/__sindf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__tand.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__tandf.c
+// https://git.musl-libc.org/cgit/musl/tree/src/math/__tanl.c
/// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
/// Input x is assumed to be bounded by ~pi/4 in magnitude.
@@ -271,3 +272,129 @@ pub fn __tandf(x: f64, odd: bool) f32 {
const r0 = (x + s * u) + (s * w) * (t + w * r);
return @floatCast(if (odd) -1.0 / r0 else r0);
}
+
+pub fn __tanl(comptime T: type, x_: T, y_: T, odd: i32) T {
+ var x = x_;
+ var y = y_;
+ const impl = switch (T) {
+ f80 => struct {
+ const pio4: T = 0.785398163397448309628;
+ const pio4lo: T = -1.25413940316708300586e-20;
+
+ const T3: T = 0.333333333333333333180;
+ const T5: T = 0.133333333333333372290;
+ const T7: T = 0.0539682539682504975744;
+ const T9: f64 = 0.021869488536312216;
+ const T11: f64 = 0.0088632355256619590;
+ const T13: f64 = 0.0035921281113786528;
+ const T15: f64 = 0.0014558334756312418;
+ const T17: f64 = 0.00059003538700862256;
+ const T19: f64 = 0.00023907843576635544;
+ const T21: f64 = 0.000097154625656538905;
+ const T23: f64 = 0.000038440165747303162;
+ const T25: f64 = 0.000018082171885432524;
+ const T27: f64 = 0.0000024196006108814377;
+ const T29: f64 = 0.0000078293456938132840;
+ const T31: f64 = -0.0000032609076735050182;
+ const T33: f64 = 0.0000023261313142559411;
+
+ inline fn rpoly(w: T) T {
+ return T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 +
+ w * (T25 + w * (T29 + w * T33))))));
+ }
+
+ inline fn vpoly(w: T) T {
+ return T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 +
+ w * (T27 + w * T31)))));
+ }
+ },
+ f128 => struct {
+ const pio4: T = 0x1.921fb54442d18469898cc51701b8p-1;
+ const pio4lo: T = 0x1.cd129024e088a67cc74020bbea60p-116;
+
+ const T3: T = 0x1.5555555555555555555555555553p-2;
+ const T5: T = 0x1.1111111111111111111111111eb5p-3;
+ const T7: T = 0x1.ba1ba1ba1ba1ba1ba1ba1b694cd6p-5;
+ const T9: T = 0x1.664f4882c10f9f32d6bbe09d8bcdp-6;
+ const T11: T = 0x1.226e355e6c23c8f5b4f5762322eep-7;
+ const T13: T = 0x1.d6d3d0e157ddfb5fed8e84e27b37p-9;
+ const T15: T = 0x1.7da36452b75e2b5fce9ee7c2c92ep-10;
+ const T17: T = 0x1.355824803674477dfcf726649efep-11;
+ const T19: T = 0x1.f57d7734d1656e0aceb716f614c2p-13;
+ const T21: T = 0x1.967e18afcb180ed942dfdc518d6cp-14;
+ const T23: T = 0x1.497d8eea21e95bc7e2aa79b9f2cdp-15;
+ const T25: T = 0x1.0b132d39f055c81be49eff7afd50p-16;
+ const T27: T = 0x1.b0f72d33eff7bfa2fbc1059d90b6p-18;
+ const T29: T = 0x1.5ef2daf21d1113df38d0fbc00267p-19;
+ const T31: T = 0x1.1c77d6eac0234988cdaa04c96626p-20;
+ const T33: T = 0x1.cd2a5a292b180e0bdd701057dfe3p-22;
+ const T35: T = 0x1.75c7357d0298c01a31d0a6f7d518p-23;
+ const T37: T = 0x1.2f3190f4718a9a520f98f50081fcp-24;
+ const T39: f64 = 0.000000028443389121318352;
+ const T41: f64 = 0.000000011981013102001973;
+ const T43: f64 = 0.0000000038303578044958070;
+ const T45: f64 = 0.0000000034664378216909893;
+ const T47: f64 = -0.0000000015090641701997785;
+ const T49: f64 = 0.0000000029449552300483952;
+ const T51: f64 = -0.0000000022006995706097711;
+ const T53: f64 = 0.0000000015468200913196612;
+ const T55: f64 = -0.00000000061311613386849674;
+ const T57: f64 = 1.4912469681508012e-10;
+
+ inline fn rpoly(w: T) T {
+ return T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 +
+ w * (T25 + w * (T29 + w * (T33 + w * (T37 + w * (T41 +
+ w * (T45 + w * (T49 + w * (T53 + w * T57))))))))))));
+ }
+
+ inline fn vpoly(w: T) T {
+ return T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 +
+ w * (T27 + w * (T31 + w * (T35 + w * (T39 + w * (T43 +
+ w * (T47 + w * (T51 + w * T55)))))))))));
+ }
+ },
+ else => @compileError("__tanl supports only f80 and f128, got: " ++ @typeName(T)),
+ };
+
+ const big = @abs(x) >= 0.67434;
+ var sign: i8 = 0;
+
+ if (big) {
+ if (x < 0) {
+ sign = -1;
+ x = -x;
+ y = -y;
+ }
+ x = (impl.pio4 - x) + (impl.pio4lo - y);
+ y = 0.0;
+ }
+
+ var z = x * x;
+ var w = z * z;
+ var r = impl.rpoly(w);
+ var v = z * impl.vpoly(w);
+ var s = z * x;
+ r = y + z * (s * (r + v) + y) + impl.T3 * s;
+ w = x + r;
+
+ if (big) {
+ s = @as(T, @floatFromInt(1 - 2 * odd));
+ v = s - 2.0 * (x + (r - w * w / (w + s)));
+ return if (sign == -1) -v else v;
+ }
+
+ if (odd == 0) {
+ return w;
+ }
+
+ // if allow error up to 2 ulp, simply return
+ // -1.0 / (x+r) here
+ //
+ // compute -1.0 / (x+r) accurately
+ z = w + 0x1p32 - 0x1p32;
+ v = r - (z - x);
+ const a = -1.0 / w;
+ const t = a + 0x1p32 - 0x1p32;
+ s = 1.0 + t * z;
+ return t + a * (s + t * v);
+}