commit e262a32ad1d039ee575009c8ea770743b72e8160 (tree)
parent 07b08b96389a1dd60bc150a5ed72586ee57b2303
Author: Andrew Kelley <andrew@ziglang.org>
Date: Fri, 13 Feb 2026 12:26:11 -0800
Merge branch 'std.math.atan: Add @Vector(?,f32) and @Vector(?,f64) support'
Add SIMD support for atan (f32 and f64), based on the [ARM
impl](https://github.com/ARM-software/optimized-routines/blob/master/math/aarch64/advsimd/atanf.c).
To reduce branching, more polynomial approximation is used.
Reviewed-on: https://codeberg.org/ziglang/zig/pulls/31195
Diffstat:
| M | lib/std/math/atan.zig | | | 213 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- |
1 file changed, 205 insertions(+), 8 deletions(-)
diff --git a/lib/std/math/atan.zig b/lib/std/math/atan.zig
@@ -4,6 +4,12 @@
// https://git.musl-libc.org/cgit/musl/tree/src/math/atanf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/atan.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/atanl.c
+//
+// Ported from ARM-software, which is licensed under the MIT license:
+// https://github.com/ARM-software/optimized-routines/blob/master/LICENSE
+//
+// https://github.com/ARM-software/optimized-routines/blob/master/math/aarch64/advsimd/atanf.c
+// https://github.com/ARM-software/optimized-routines/blob/master/math/aarch64/advsimd/atan.c
const std = @import("../std.zig");
const math = std.math;
@@ -17,14 +23,22 @@ const testing = std.testing;
/// - atan(+-inf) = +-pi/2
pub fn atan(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
- return switch (T) {
- f16 => atanBinary16(x),
- f32 => atanBinary32(x),
- f64 => atanBinary64(x),
- f80 => atanExtended80(x),
- f128 => atanBinary128(x),
- else => @compileError("atan not implemented for " ++ @typeName(T)),
- };
+ switch (@typeInfo(T)) {
+ .float => |info| switch (info.bits) {
+ 16 => return atanBinary16(x),
+ 32 => return atanBinary32(x),
+ 64 => return atanBinary64(x),
+ 80 => return atanExtended80(x),
+ 128 => return atanBinary128(x),
+ else => comptime unreachable,
+ },
+ .vector => |info| switch (info.child) {
+ f32 => return atanBinary32Vec(info.len, x),
+ f64 => return atanBinary64Vec(info.len, x),
+ else => @compileError("unimplemented"),
+ },
+ else => comptime unreachable,
+ }
}
fn atanBinary16(x: f16) f16 {
@@ -579,3 +593,186 @@ test "atanBinary128" {
try testing.expectApproxEqAbs(atanBinary128(0x1.299d54ac7d6afc5154643b601519p1), 0x1.2a24e22d861debfd6f974500567fp0, math.floatEpsAt(f128, 0x1.2a24e22d861debfd6f974500567fp0));
try testing.expectApproxEqAbs(atanBinary128(-0x1.0264fb9f3d50e4f0f966f0686064p1), -0x1.1c617825f97512b7f38656ab12cdp0, math.floatEpsAt(f128, -0x1.1c617825f97512b7f38656ab12cdp0));
}
+
+fn atanBinary32Vec(comptime vec_len: comptime_int, x: @Vector(vec_len, f32)) @TypeOf(x) {
+ const sign_mask: @Vector(vec_len, u32) = @splat(0x80000000);
+ const neg_one: @Vector(vec_len, f32) = @splat(-1.0);
+ const pi_over_2: @Vector(vec_len, u32) = @splat(0x3fc90fdb);
+ const zero: @Vector(vec_len, u32) = @splat(0);
+ const c0: @Vector(vec_len, f32) = @splat(-0x1.5554dcp-2);
+ const c1: @Vector(vec_len, f32) = @splat(0x1.9978ecp-3);
+ const c2: @Vector(vec_len, f32) = @splat(-0x1.230a94p-3);
+ const c3: @Vector(vec_len, f32) = @splat(0x1.b4debp-4);
+ const c4: @Vector(vec_len, f32) = @splat(-0x1.3550dap-4);
+ const c5: @Vector(vec_len, f32) = @splat(0x1.61eebp-5);
+ const c6: @Vector(vec_len, f32) = @splat(-0x1.0c17d4p-6);
+ const c7: @Vector(vec_len, f32) = @splat(0x1.7ea694p-9);
+
+ const ix: @Vector(vec_len, u32) = @bitCast(x);
+ const sign = ix & sign_mask;
+ const pred = @abs(x) > @abs(neg_one);
+ const z = @select(f32, pred, neg_one / x, x);
+ const shift: @Vector(vec_len, f32) = @bitCast(@select(u32, pred, pi_over_2 ^ sign, zero));
+ const z2 = z * z;
+ const z3 = z * z2;
+ const z4 = z2 * z2;
+ const z8 = z4 * z4;
+ const p0_1 = @mulAdd(@Vector(vec_len, f32), z2, c1, c0);
+ const p2_3 = @mulAdd(@Vector(vec_len, f32), z2, c3, c2);
+ const p4_5 = @mulAdd(@Vector(vec_len, f32), z2, c5, c4);
+ const p6_7 = @mulAdd(@Vector(vec_len, f32), z2, c7, c6);
+ const p0_3 = @mulAdd(@Vector(vec_len, f32), z4, p2_3, p0_1);
+ const p4_7 = @mulAdd(@Vector(vec_len, f32), z4, p6_7, p4_5);
+ const p0_7 = @mulAdd(@Vector(vec_len, f32), z8, p4_7, p0_3);
+ return @mulAdd(@Vector(vec_len, f32), z3, p0_7, shift + z);
+}
+
+fn atanBinary64Vec(comptime vec_len: comptime_int, x: @Vector(vec_len, f64)) @TypeOf(x) {
+ const sign_mask: @Vector(vec_len, u64) = @splat(0x8000000000000000);
+ const neg_one: @Vector(vec_len, f64) = @splat(-1.0);
+ const pi_over_2: @Vector(vec_len, u64) = @splat(0x3ff921fb54442d18);
+ const zero: @Vector(vec_len, u64) = @splat(0);
+ const c0: @Vector(vec_len, f64) = @splat(-0x1.555555555552ap-2);
+ const c1: @Vector(vec_len, f64) = @splat(0x1.9999999995aebp-3);
+ const c2: @Vector(vec_len, f64) = @splat(-0x1.24924923923f6p-3);
+ const c3: @Vector(vec_len, f64) = @splat(0x1.c71c7184288a2p-4);
+ const c4: @Vector(vec_len, f64) = @splat(-0x1.745d11fb3d32bp-4);
+ const c5: @Vector(vec_len, f64) = @splat(0x1.3b136a18051b9p-4);
+ const c6: @Vector(vec_len, f64) = @splat(-0x1.110e6d985f496p-4);
+ const c7: @Vector(vec_len, f64) = @splat(0x1.e1bcf7f08801dp-5);
+ const c8: @Vector(vec_len, f64) = @splat(-0x1.ae644e28058c3p-5);
+ const c9: @Vector(vec_len, f64) = @splat(0x1.82eeb1fed85c6p-5);
+ const c10: @Vector(vec_len, f64) = @splat(-0x1.59d7f901566cbp-5);
+ const c11: @Vector(vec_len, f64) = @splat(0x1.2c982855ab069p-5);
+ const c12: @Vector(vec_len, f64) = @splat(-0x1.eb49592998177p-6);
+ const c13: @Vector(vec_len, f64) = @splat(0x1.69d8b396e3d38p-6);
+ const c14: @Vector(vec_len, f64) = @splat(-0x1.ca980345c4204p-7);
+ const c15: @Vector(vec_len, f64) = @splat(0x1.dc050eafde0b3p-8);
+ const c16: @Vector(vec_len, f64) = @splat(-0x1.7ea70755b8eccp-9);
+ const c17: @Vector(vec_len, f64) = @splat(0x1.ba3da3de903e8p-11);
+ const c18: @Vector(vec_len, f64) = @splat(-0x1.44a4b059b6f67p-13);
+ const c19: @Vector(vec_len, f64) = @splat(0x1.c4a45029e5a91p-17);
+
+ const ix: @Vector(vec_len, u64) = @bitCast(x);
+ const sign = ix & sign_mask;
+ const pred = @abs(x) > @abs(neg_one);
+ const shift: @Vector(vec_len, f64) = @bitCast(@select(u64, pred, pi_over_2 ^ sign, zero));
+ const z = @select(f64, pred, neg_one / x, x);
+ const z2 = z * z;
+ const z3 = z * z2;
+ const z4 = z2 * z2;
+ const z8 = z4 * z4;
+ const z16 = z8 * z8;
+ const p0_1 = @mulAdd(@Vector(vec_len, f64), z2, c1, c0);
+ const p2_3 = @mulAdd(@Vector(vec_len, f64), z2, c3, c2);
+ const p0_3 = @mulAdd(@Vector(vec_len, f64), z4, p2_3, p0_1);
+ const p4_5 = @mulAdd(@Vector(vec_len, f64), z2, c5, c4);
+ const p6_7 = @mulAdd(@Vector(vec_len, f64), z2, c7, c6);
+ const p4_7 = @mulAdd(@Vector(vec_len, f64), z4, p6_7, p4_5);
+ const p0_7 = @mulAdd(@Vector(vec_len, f64), z8, p4_7, p0_3);
+ const p8_9 = @mulAdd(@Vector(vec_len, f64), z2, c9, c8);
+ const p10_11 = @mulAdd(@Vector(vec_len, f64), z2, c11, c10);
+ const p8_11 = @mulAdd(@Vector(vec_len, f64), z4, p10_11, p8_9);
+ const p12_13 = @mulAdd(@Vector(vec_len, f64), z2, c13, c12);
+ const p14_15 = @mulAdd(@Vector(vec_len, f64), z2, c15, c14);
+ const p12_15 = @mulAdd(@Vector(vec_len, f64), z4, p14_15, p12_13);
+ const p16_17 = @mulAdd(@Vector(vec_len, f64), z2, c17, c16);
+ const p18_19 = @mulAdd(@Vector(vec_len, f64), z2, c19, c18);
+ const p16_19 = @mulAdd(@Vector(vec_len, f64), z4, p18_19, p16_17);
+ const p8_15 = @mulAdd(@Vector(vec_len, f64), z8, p12_15, p8_11);
+ const p8_19 = @mulAdd(@Vector(vec_len, f64), z16, p16_19, p8_15);
+ const p0_19 = @mulAdd(@Vector(vec_len, f64), p8_19, z16, p0_7);
+ return @mulAdd(@Vector(vec_len, f64), z3, p0_19, shift + z);
+}
+
+test "atanBinary32Vec.special" {
+ const input: @Vector(7, f32) = .{
+ 0x0p+0,
+ -0x0p+0,
+ 0x1p+0,
+ -0x1p+0,
+ math.inf(f32),
+ -math.inf(f32),
+ math.nan(f32),
+ };
+ const output = atanBinary32Vec(7, input);
+ try testing.expectEqual(output[0], 0x0p+0);
+ try testing.expectEqual(output[1], -0x0p+0);
+ try testing.expectApproxEqAbs(output[2], 0x1.921fb6p-1, math.floatEpsAt(f32, 0x1.921fb6p-1));
+ try testing.expectApproxEqAbs(output[3], -0x1.921fb6p-1, math.floatEpsAt(f32, -0x1.921fb6p-1));
+ try testing.expectApproxEqAbs(output[4], 0x1.921fb6p+0, math.floatEpsAt(f32, 0x1.921fb6p+0));
+ try testing.expectApproxEqAbs(output[5], -0x1.921fb6p+0, math.floatEpsAt(f32, -0x1.921fb6p+0));
+ try testing.expect(math.isNan(output[6]));
+}
+
+test "atanBinary32Vec" {
+ const input: @Vector(10, f32) = .{
+ -0x1.8629dp-2,
+ -0x1.59d42ep1,
+ -0x1.d2dbe2p0,
+ -0x1.5f314ep-1,
+ 0x1.5869bp1,
+ -0x1.b13a06p-2,
+ 0x1.3cb0f2p1,
+ -0x1.0ed746p-2,
+ 0x1.299d54p1,
+ -0x1.0264fcp1,
+ };
+ const output = atanBinary32Vec(10, input);
+ try testing.expectApproxEqAbs(output[0], -0x1.74c62p-2, math.floatEpsAt(f32, -0x1.74c62p-2));
+ try testing.expectApproxEqAbs(output[1], -0x1.375fd8p0, math.floatEpsAt(f32, -0x1.375fd8p0));
+ try testing.expectApproxEqAbs(output[2], -0x1.11b8aep0, math.floatEpsAt(f32, -0x1.11b8aep0));
+ try testing.expectApproxEqAbs(output[3], -0x1.33d28cp-1, math.floatEpsAt(f32, -0x1.33d28cp-1));
+ try testing.expectApproxEqAbs(output[4], 0x1.37082ep0, math.floatEpsAt(f32, 0x1.37082ep0));
+ try testing.expectApproxEqAbs(output[5], -0x1.99d7cap-2, math.floatEpsAt(f32, -0x1.99d7cap-2));
+ try testing.expectApproxEqAbs(output[6], 0x1.2fcb12p0, math.floatEpsAt(f32, 0x1.2fcb12p0));
+ try testing.expectApproxEqAbs(output[7], -0x1.08c71ap-2, math.floatEpsAt(f32, -0x1.08c71ap-2));
+ try testing.expectApproxEqAbs(output[8], 0x1.2a24e2p0, math.floatEpsAt(f32, 0x1.2a24e2p0));
+ try testing.expectApproxEqAbs(output[9], -0x1.1c6178p0, math.floatEpsAt(f32, -0x1.1c6178p0));
+}
+
+test "atanBinary64Vec.special" {
+ const input: @Vector(7, f64) = .{
+ 0x0p+0,
+ -0x0p+0,
+ 0x1p+0,
+ -0x1p+0,
+ math.inf(f64),
+ -math.inf(f64),
+ math.nan(f64),
+ };
+ const output = atanBinary64Vec(7, input);
+ try testing.expectEqual(output[0], 0x0p+0);
+ try testing.expectEqual(output[1], -0x0p+0);
+ try testing.expectApproxEqAbs(output[2], 0x1.921fb54442d18p-1, math.floatEpsAt(f64, 0x1.921fb54442d18p-1));
+ try testing.expectApproxEqAbs(output[3], -0x1.921fb54442d18p-1, math.floatEpsAt(f64, -0x1.921fb54442d18p-1));
+ try testing.expectApproxEqAbs(output[4], 0x1.921fb54442d18p+0, math.floatEpsAt(f64, 0x1.921fb54442d18p+0));
+ try testing.expectApproxEqAbs(output[5], -0x1.921fb54442d18p+0, math.floatEpsAt(f64, -0x1.921fb54442d18p+0));
+ try testing.expect(math.isNan(output[6]));
+}
+
+test "atanBinary64Vec" {
+ const input: @Vector(10, f64) = .{
+ -0x1.8629d0244cdccp-2,
+ -0x1.59d42d4659937p1,
+ -0x1.d2dbe23d04f06p0,
+ -0x1.5f314e72398e8p-1,
+ 0x1.5869af37b7d08p1,
+ -0x1.b13a05a662618p-2,
+ 0x1.3cb0f12f39d8ap1,
+ -0x1.0ed746b39cbb7p-2,
+ 0x1.299d54ac7d6bp1,
+ -0x1.0264fb9f3d50ep1,
+ };
+ const output = atanBinary64Vec(10, input);
+ try testing.expectApproxEqAbs(output[0], -0x1.74c61f4377016p-2, math.floatEpsAt(f64, -0x1.74c61f4377016p-2));
+ try testing.expectApproxEqAbs(output[1], -0x1.375fd7987cc2p0, math.floatEpsAt(f64, -0x1.375fd7987cc2p0));
+ try testing.expectApproxEqAbs(output[2], -0x1.11b8adeba5616p0, math.floatEpsAt(f64, -0x1.11b8adeba5616p0));
+ try testing.expectApproxEqAbs(output[3], -0x1.33d28ca762539p-1, math.floatEpsAt(f64, -0x1.33d28ca762539p-1));
+ try testing.expectApproxEqAbs(output[4], 0x1.37082ce2dd03p0, math.floatEpsAt(f64, 0x1.37082ce2dd03p0));
+ try testing.expectApproxEqAbs(output[5], -0x1.99d7cac66dd44p-2, math.floatEpsAt(f64, -0x1.99d7cac66dd44p-2));
+ try testing.expectApproxEqAbs(output[6], 0x1.2fcb120468e8ep0, math.floatEpsAt(f64, 0x1.2fcb120468e8ep0));
+ try testing.expectApproxEqAbs(output[7], -0x1.08c71aa0e509p-2, math.floatEpsAt(f64, -0x1.08c71aa0e509p-2));
+ try testing.expectApproxEqAbs(output[8], 0x1.2a24e22d861dfp0, math.floatEpsAt(f64, 0x1.2a24e22d861dfp0));
+ try testing.expectApproxEqAbs(output[9], -0x1.1c617825f9751p0, math.floatEpsAt(f64, -0x1.1c617825f9751p0));
+}