From f7f03c699d2dee4c3eec2108e159bbe300e24c6f Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 27 Apr 2022 22:37:07 -0700 Subject: [PATCH] compiler-rt: provide actual sincos implementations --- lib/std/special/compiler_rt/cos.zig | 34 ++-- lib/std/special/compiler_rt/sin.zig | 34 ++-- lib/std/special/compiler_rt/sincos.zig | 245 +++++++++++++++++++++++-- 3 files changed, 264 insertions(+), 49 deletions(-) diff --git a/lib/std/special/compiler_rt/cos.zig b/lib/std/special/compiler_rt/cos.zig index 295f6a47ea..957e5f9c91 100644 --- a/lib/std/special/compiler_rt/cos.zig +++ b/lib/std/special/compiler_rt/cos.zig @@ -2,7 +2,7 @@ const std = @import("std"); const math = std.math; const expect = std.testing.expect; -const kernel = @import("trig.zig"); +const trig = @import("trig.zig"); const rem_pio2 = @import("rem_pio2.zig").rem_pio2; const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f; @@ -28,27 +28,27 @@ pub fn cosf(x: f32) callconv(.C) f32 { math.doNotOptimizeAway(x + 0x1p120); return 1.0; } - return kernel.__cosdf(x); + return trig.__cosdf(x); } if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4 if (ix > 0x4016cbe3) { // |x| ~> 3*pi/4 - return -kernel.__cosdf(if (sign) x + c2pio2 else x - c2pio2); + return -trig.__cosdf(if (sign) x + c2pio2 else x - c2pio2); } else { if (sign) { - return kernel.__sindf(x + c1pio2); + return trig.__sindf(x + c1pio2); } else { - return kernel.__sindf(c1pio2 - x); + return trig.__sindf(c1pio2 - x); } } } if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4 if (ix > 0x40afeddf) { // |x| ~> 7*pi/4 - return kernel.__cosdf(if (sign) x + c4pio2 else x - c4pio2); + return trig.__cosdf(if (sign) x + c4pio2 else x - c4pio2); } else { if (sign) { - return kernel.__sindf(-x - c3pio2); + return trig.__sindf(-x - c3pio2); } else { - return kernel.__sindf(x - c3pio2); + return trig.__sindf(x - c3pio2); } } } @@ -61,10 +61,10 @@ pub fn cosf(x: f32) callconv(.C) f32 { var y: f64 = undefined; const n = rem_pio2f(x, &y); return switch (n & 3) { - 0 => kernel.__cosdf(y), - 1 => kernel.__sindf(-y), - 2 => -kernel.__cosdf(y), - else => kernel.__sindf(y), + 0 => trig.__cosdf(y), + 1 => trig.__sindf(-y), + 2 => -trig.__cosdf(y), + else => trig.__sindf(y), }; } @@ -79,7 +79,7 @@ pub fn cos(x: f64) callconv(.C) f64 { math.doNotOptimizeAway(x + 0x1p120); return 1.0; } - return kernel.__cos(x, 0); + return trig.__cos(x, 0); } // cos(Inf or NaN) is NaN @@ -90,10 +90,10 @@ pub fn cos(x: f64) callconv(.C) f64 { var y: [2]f64 = undefined; const n = rem_pio2(x, &y); return switch (n & 3) { - 0 => kernel.__cos(y[0], y[1]), - 1 => -kernel.__sin(y[0], y[1], 1), - 2 => -kernel.__cos(y[0], y[1]), - else => kernel.__sin(y[0], y[1], 1), + 0 => trig.__cos(y[0], y[1]), + 1 => -trig.__sin(y[0], y[1], 1), + 2 => -trig.__cos(y[0], y[1]), + else => trig.__sin(y[0], y[1], 1), }; } diff --git a/lib/std/special/compiler_rt/sin.zig b/lib/std/special/compiler_rt/sin.zig index a0876d30a0..3d5572a59f 100644 --- a/lib/std/special/compiler_rt/sin.zig +++ b/lib/std/special/compiler_rt/sin.zig @@ -8,7 +8,7 @@ const std = @import("std"); const math = std.math; const expect = std.testing.expect; -const kernel = @import("trig.zig"); +const trig = @import("trig.zig"); const rem_pio2 = @import("rem_pio2.zig").rem_pio2; const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f; @@ -34,27 +34,27 @@ pub fn sinf(x: f32) callconv(.C) f32 { math.doNotOptimizeAway(if (ix < 0x00800000) x / 0x1p120 else x + 0x1p120); return x; } - return kernel.__sindf(x); + return trig.__sindf(x); } if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4 if (ix <= 0x4016cbe3) { // |x| ~<= 3pi/4 if (sign) { - return -kernel.__cosdf(x + s1pio2); + return -trig.__cosdf(x + s1pio2); } else { - return kernel.__cosdf(x - s1pio2); + return trig.__cosdf(x - s1pio2); } } - return kernel.__sindf(if (sign) -(x + s2pio2) else -(x - s2pio2)); + return trig.__sindf(if (sign) -(x + s2pio2) else -(x - s2pio2)); } if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4 if (ix <= 0x40afeddf) { // |x| ~<= 7*pi/4 if (sign) { - return kernel.__cosdf(x + s3pio2); + return trig.__cosdf(x + s3pio2); } else { - return -kernel.__cosdf(x - s3pio2); + return -trig.__cosdf(x - s3pio2); } } - return kernel.__sindf(if (sign) x + s4pio2 else x - s4pio2); + return trig.__sindf(if (sign) x + s4pio2 else x - s4pio2); } // sin(Inf or NaN) is NaN @@ -65,10 +65,10 @@ pub fn sinf(x: f32) callconv(.C) f32 { var y: f64 = undefined; const n = rem_pio2f(x, &y); return switch (n & 3) { - 0 => kernel.__sindf(y), - 1 => kernel.__cosdf(y), - 2 => kernel.__sindf(-y), - else => -kernel.__cosdf(y), + 0 => trig.__sindf(y), + 1 => trig.__cosdf(y), + 2 => trig.__sindf(-y), + else => -trig.__cosdf(y), }; } @@ -83,7 +83,7 @@ pub fn sin(x: f64) callconv(.C) f64 { math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120); return x; } - return kernel.__sin(x, 0.0, 0); + return trig.__sin(x, 0.0, 0); } // sin(Inf or NaN) is NaN @@ -94,10 +94,10 @@ pub fn sin(x: f64) callconv(.C) f64 { var y: [2]f64 = undefined; const n = rem_pio2(x, &y); return switch (n & 3) { - 0 => kernel.__sin(y[0], y[1], 1), - 1 => kernel.__cos(y[0], y[1]), - 2 => -kernel.__sin(y[0], y[1], 1), - else => -kernel.__cos(y[0], y[1]), + 0 => trig.__sin(y[0], y[1], 1), + 1 => trig.__cos(y[0], y[1]), + 2 => -trig.__sin(y[0], y[1], 1), + else => -trig.__cos(y[0], y[1]), }; } diff --git a/lib/std/special/compiler_rt/sincos.zig b/lib/std/special/compiler_rt/sincos.zig index 0518e021ea..31ebd0d1d0 100644 --- a/lib/std/special/compiler_rt/sincos.zig +++ b/lib/std/special/compiler_rt/sincos.zig @@ -1,27 +1,242 @@ +const std = @import("std"); +const math = std.math; const sin = @import("sin.zig"); const cos = @import("cos.zig"); +const trig = @import("trig.zig"); +const rem_pio2 = @import("rem_pio2.zig").rem_pio2; +const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f; -pub fn __sincosh(a: f16, r_sin: *f16, r_cos: *f16) callconv(.C) void { - r_sin.* = sin.__sinh(a); - r_cos.* = cos.__cosh(a); +pub fn __sincosh(x: f16, r_sin: *f16, r_cos: *f16) callconv(.C) void { + // TODO: more efficient implementation + var big_sin: f32 = undefined; + var big_cos: f32 = undefined; + sincosf(x, &big_sin, &big_cos); + r_sin.* = @floatCast(f16, big_sin); + r_cos.* = @floatCast(f16, big_cos); } -pub fn sincosf(a: f32, r_sin: *f32, r_cos: *f32) callconv(.C) void { - r_sin.* = sin.sinf(a); - r_cos.* = cos.cosf(a); +pub fn sincosf(x: f32, r_sin: *f32, r_cos: *f32) callconv(.C) void { + const sc1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18 + const sc2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18 + const sc3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2 + const sc4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18 + + const pre_ix = @bitCast(u32, x); + const sign = pre_ix >> 31 != 0; + const ix = pre_ix & 0x7fffffff; + + // |x| ~<= pi/4 + if (ix <= 0x3f490fda) { + // |x| < 2**-12 + if (ix < 0x39800000) { + // raise inexact if x!=0 and underflow if subnormal + math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120); + r_sin.* = x; + r_cos.* = 1.0; + return; + } + r_sin.* = trig.__sindf(x); + r_cos.* = trig.__cosdf(x); + return; + } + + // |x| ~<= 5*pi/4 + if (ix <= 0x407b53d1) { + // |x| ~<= 3pi/4 + if (ix <= 0x4016cbe3) { + if (sign) { + r_sin.* = -trig.__cosdf(x + sc1pio2); + r_cos.* = trig.__sindf(x + sc1pio2); + } else { + r_sin.* = trig.__cosdf(sc1pio2 - x); + r_cos.* = trig.__sindf(sc1pio2 - x); + } + return; + } + // -sin(x+c) is not correct if x+c could be 0: -0 vs +0 + r_sin.* = -trig.__sindf(if (sign) x + sc2pio2 else x - sc2pio2); + r_cos.* = -trig.__cosdf(if (sign) x + sc2pio2 else x - sc2pio2); + return; + } + + // |x| ~<= 9*pi/4 + if (ix <= 0x40e231d5) { + // |x| ~<= 7*pi/4 + if (ix <= 0x40afeddf) { + if (sign) { + r_sin.* = trig.__cosdf(x + sc3pio2); + r_cos.* = -trig.__sindf(x + sc3pio2); + } else { + r_sin.* = -trig.__cosdf(x - sc3pio2); + r_cos.* = trig.__sindf(x - sc3pio2); + } + return; + } + r_sin.* = trig.__sindf(if (sign) x + sc4pio2 else x - sc4pio2); + r_cos.* = trig.__cosdf(if (sign) x + sc4pio2 else x - sc4pio2); + return; + } + + // sin(Inf or NaN) is NaN + if (ix >= 0x7f800000) { + const result = x - x; + r_sin.* = result; + r_cos.* = result; + return; + } + + // general argument reduction needed + var y: f64 = undefined; + const n = rem_pio2f(x, &y); + const s = trig.__sindf(y); + const c = trig.__cosdf(y); + switch (n & 3) { + 0 => { + r_sin.* = s; + r_cos.* = c; + }, + 1 => { + r_sin.* = c; + r_cos.* = -s; + }, + 2 => { + r_sin.* = -s; + r_cos.* = -c; + }, + else => { + r_sin.* = -c; + r_cos.* = s; + }, + } } -pub fn sincos(a: f64, r_sin: *f64, r_cos: *f64) callconv(.C) void { - r_sin.* = sin.sin(a); - r_cos.* = cos.cos(a); +pub fn sincos(x: f64, r_sin: *f64, r_cos: *f64) callconv(.C) void { + const ix = @truncate(u32, @bitCast(u64, x) >> 32) & 0x7fffffff; + + // |x| ~< pi/4 + if (ix <= 0x3fe921fb) { + // if |x| < 2**-27 * sqrt(2) + if (ix < 0x3e46a09e) { + // raise inexact if x != 0 and underflow if subnormal + math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120); + r_sin.* = x; + r_cos.* = 1.0; + return; + } + r_sin.* = trig.__sin(x, 0.0, 0); + r_cos.* = trig.__cos(x, 0.0); + return; + } + + // sincos(Inf or NaN) is NaN + if (ix >= 0x7ff00000) { + const result = x - x; + r_sin.* = result; + r_cos.* = result; + return; + } + + // argument reduction needed + var y: [2]f64 = undefined; + const n = rem_pio2(x, &y); + const s = trig.__sin(y[0], y[1], 1); + const c = trig.__cos(y[0], y[1]); + switch (n & 3) { + 0 => { + r_sin.* = s; + r_cos.* = c; + }, + 1 => { + r_sin.* = c; + r_cos.* = -s; + }, + 2 => { + r_sin.* = -s; + r_cos.* = -c; + }, + else => { + r_sin.* = -c; + r_cos.* = s; + }, + } } -pub fn __sincosx(a: f80, r_sin: *f80, r_cos: *f80) callconv(.C) void { - r_sin.* = sin.__sinx(a); - r_cos.* = cos.__cosx(a); +pub fn __sincosx(x: f80, r_sin: *f80, r_cos: *f80) callconv(.C) void { + // TODO: more efficient implementation + //return sincos_generic(f80, x, r_sin, r_cos); + var big_sin: f128 = undefined; + var big_cos: f128 = undefined; + sincosq(x, &big_sin, &big_cos); + r_sin.* = @floatCast(f80, big_sin); + r_cos.* = @floatCast(f80, big_cos); } -pub fn sincosq(a: f128, r_sin: *f128, r_cos: *f128) callconv(.C) void { - r_sin.* = sin.sinq(a); - r_cos.* = cos.cosq(a); +pub fn sincosq(x: f128, r_sin: *f128, r_cos: *f128) callconv(.C) void { + // TODO: more correct implementation + //return sincos_generic(f128, x, r_sin, r_cos); + var small_sin: f64 = undefined; + var small_cos: f64 = undefined; + sincos(@floatCast(f64, x), &small_sin, &small_cos); + r_sin.* = small_sin; + r_cos.* = small_cos; +} + +const rem_pio2_generic = @compileError("TODO"); + +/// Ported from musl sincosl.c. Needs the following dependencies to be complete: +/// * rem_pio2_generic ported from __rem_pio2l.c +/// * trig.sin_generic ported from __sinl.c +/// * trig.cos_generic ported from __cosl.c +inline fn sincos_generic(comptime F: type, x: F, r_sin: *F, r_cos: *F) void { + const sc1pio4: F = 1.0 * math.pi / 4.0; + const bits = @typeInfo(F).Float.bits; + const I = std.meta.Int(.unsigned, bits); + const ix = @bitCast(I, x) & (math.maxInt(I) >> 1); + const se = @truncate(u16, ix >> (bits - 16)); + + if (se == 0x7fff) { + const result = x - x; + r_sin.* = result; + r_cos.* = result; + return; + } + + if (@bitCast(F, ix) < sc1pio4) { + if (se < 0x3fff - math.floatFractionalBits(F) - 1) { + // raise underflow if subnormal + if (se == 0) { + math.doNotOptimizeAway(x * 0x1p-120); + } + r_sin.* = x; + // raise inexact if x!=0 + r_cos.* = 1.0 + x; + return; + } + r_sin.* = trig.sin_generic(F, x, 0, 0); + r_cos.* = trig.cos_generic(F, x, 0); + return; + } + + var y: [2]F = undefined; + const n = rem_pio2_generic(F, x, &y); + const s = trig.sin_generic(F, y[0], y[1], 1); + const c = trig.cos_generic(F, y[0], y[1]); + switch (n & 3) { + 0 => { + r_sin.* = s; + r_cos.* = c; + }, + 1 => { + r_sin.* = c; + r_cos.* = -s; + }, + 2 => { + r_sin.* = -s; + r_cos.* = -c; + }, + else => { + r_sin.* = -c; + r_cos.* = s; + }, + } }