compiler-rt: math functions reorg

* unify the logic for exporting math functions from compiler-rt,
   with the appropriate suffixes and prefixes.
   - add all missing f128 and f80 exports. Functions with missing
     implementations call other functions and have TODO comments.
   - also add f16 functions
 * move math functions from freestanding libc to compiler-rt (#7265)
 * enable all the f128 and f80 code in the stage2 compiler and behavior
   tests (#11161).
 * update std lib to use builtins rather than `std.math`.
This commit is contained in:
Andrew Kelley
2022-04-26 10:13:55 -07:00
parent 6f4343b61a
commit 41dd2beaac
66 changed files with 2617 additions and 2869 deletions

View File

@@ -1,198 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/__rem_pio2.c
const std = @import("../std.zig");
const __rem_pio2_large = @import("__rem_pio2_large.zig").__rem_pio2_large;
const math = std.math;
const toint = 1.5 / math.floatEps(f64);
// pi/4
const pio4 = 0x1.921fb54442d18p-1;
// invpio2: 53 bits of 2/pi
const invpio2 = 6.36619772367581382433e-01; // 0x3FE45F30, 0x6DC9C883
// pio2_1: first 33 bit of pi/2
const pio2_1 = 1.57079632673412561417e+00; // 0x3FF921FB, 0x54400000
// pio2_1t: pi/2 - pio2_1
const pio2_1t = 6.07710050650619224932e-11; // 0x3DD0B461, 0x1A626331
// pio2_2: second 33 bit of pi/2
const pio2_2 = 6.07710050630396597660e-11; // 0x3DD0B461, 0x1A600000
// pio2_2t: pi/2 - (pio2_1+pio2_2)
const pio2_2t = 2.02226624879595063154e-21; // 0x3BA3198A, 0x2E037073
// pio2_3: third 33 bit of pi/2
const pio2_3 = 2.02226624871116645580e-21; // 0x3BA3198A, 0x2E000000
// pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
const pio2_3t = 8.47842766036889956997e-32; // 0x397B839A, 0x252049C1
fn U(x: anytype) usize {
return @intCast(usize, x);
}
fn medium(ix: u32, x: f64, y: *[2]f64) i32 {
var w: f64 = undefined;
var t: f64 = undefined;
var r: f64 = undefined;
var @"fn": f64 = undefined;
var n: i32 = undefined;
var ex: i32 = undefined;
var ey: i32 = undefined;
var ui: u64 = undefined;
// rint(x/(pi/2))
@"fn" = x * invpio2 + toint - toint;
n = @floatToInt(i32, @"fn");
r = x - @"fn" * pio2_1;
w = @"fn" * pio2_1t; // 1st round, good to 85 bits
// Matters with directed rounding.
if (r - w < -pio4) {
n -= 1;
@"fn" -= 1;
r = x - @"fn" * pio2_1;
w = @"fn" * pio2_1t;
} else if (r - w > pio4) {
n += 1;
@"fn" += 1;
r = x - @"fn" * pio2_1;
w = @"fn" * pio2_1t;
}
y[0] = r - w;
ui = @bitCast(u64, y[0]);
ey = @intCast(i32, (ui >> 52) & 0x7ff);
ex = @intCast(i32, ix >> 20);
if (ex - ey > 16) { // 2nd round, good to 118 bits
t = r;
w = @"fn" * pio2_2;
r = t - w;
w = @"fn" * pio2_2t - ((t - r) - w);
y[0] = r - w;
ui = @bitCast(u64, y[0]);
ey = @intCast(i32, (ui >> 52) & 0x7ff);
if (ex - ey > 49) { // 3rd round, good to 151 bits, covers all cases
t = r;
w = @"fn" * pio2_3;
r = t - w;
w = @"fn" * pio2_3t - ((t - r) - w);
y[0] = r - w;
}
}
y[1] = (r - y[0]) - w;
return n;
}
// Returns the remainder of x rem pi/2 in y[0]+y[1]
//
// use __rem_pio2_large() for large x
//
// caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
pub fn __rem_pio2(x: f64, y: *[2]f64) i32 {
var z: f64 = undefined;
var tx: [3]f64 = undefined;
var ty: [2]f64 = undefined;
var n: i32 = undefined;
var ix: u32 = undefined;
var sign: bool = undefined;
var i: i32 = undefined;
var ui: u64 = undefined;
ui = @bitCast(u64, x);
sign = ui >> 63 != 0;
ix = @truncate(u32, (ui >> 32) & 0x7fffffff);
if (ix <= 0x400f6a7a) { // |x| ~<= 5pi/4
if ((ix & 0xfffff) == 0x921fb) { // |x| ~= pi/2 or 2pi/2
return medium(ix, x, y);
}
if (ix <= 0x4002d97c) { // |x| ~<= 3pi/4
if (!sign) {
z = x - pio2_1; // one round good to 85 bits
y[0] = z - pio2_1t;
y[1] = (z - y[0]) - pio2_1t;
return 1;
} else {
z = x + pio2_1;
y[0] = z + pio2_1t;
y[1] = (z - y[0]) + pio2_1t;
return -1;
}
} else {
if (!sign) {
z = x - 2 * pio2_1;
y[0] = z - 2 * pio2_1t;
y[1] = (z - y[0]) - 2 * pio2_1t;
return 2;
} else {
z = x + 2 * pio2_1;
y[0] = z + 2 * pio2_1t;
y[1] = (z - y[0]) + 2 * pio2_1t;
return -2;
}
}
}
if (ix <= 0x401c463b) { // |x| ~<= 9pi/4
if (ix <= 0x4015fdbc) { // |x| ~<= 7pi/4
if (ix == 0x4012d97c) { // |x| ~= 3pi/2
return medium(ix, x, y);
}
if (!sign) {
z = x - 3 * pio2_1;
y[0] = z - 3 * pio2_1t;
y[1] = (z - y[0]) - 3 * pio2_1t;
return 3;
} else {
z = x + 3 * pio2_1;
y[0] = z + 3 * pio2_1t;
y[1] = (z - y[0]) + 3 * pio2_1t;
return -3;
}
} else {
if (ix == 0x401921fb) { // |x| ~= 4pi/2 */
return medium(ix, x, y);
}
if (!sign) {
z = x - 4 * pio2_1;
y[0] = z - 4 * pio2_1t;
y[1] = (z - y[0]) - 4 * pio2_1t;
return 4;
} else {
z = x + 4 * pio2_1;
y[0] = z + 4 * pio2_1t;
y[1] = (z - y[0]) + 4 * pio2_1t;
return -4;
}
}
}
if (ix < 0x413921fb) { // |x| ~< 2^20*(pi/2), medium size
return medium(ix, x, y);
}
// all other (large) arguments
if (ix >= 0x7ff00000) { // x is inf or NaN
y[0] = x - x;
y[1] = y[0];
return 0;
}
// set z = scalbn(|x|,-ilogb(x)+23)
ui = @bitCast(u64, x);
ui &= std.math.maxInt(u64) >> 12;
ui |= @as(u64, 0x3ff + 23) << 52;
z = @bitCast(f64, ui);
i = 0;
while (i < 2) : (i += 1) {
tx[U(i)] = @intToFloat(f64, @floatToInt(i32, z));
z = (z - tx[U(i)]) * 0x1p24;
}
tx[U(i)] = z;
// skip zero terms, first term is non-zero
while (tx[U(i)] == 0.0) {
i -= 1;
}
n = __rem_pio2_large(tx[0..], ty[0..], @intCast(i32, (ix >> 20)) - (0x3ff + 23), i + 1, 1);
if (sign) {
y[0] = -ty[0];
y[1] = -ty[1];
return -n;
}
y[0] = ty[0];
y[1] = ty[1];
return n;
}

View File

@@ -1,510 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/__rem_pio2_large.c
const std = @import("../std.zig");
const math = std.math;
const init_jk = [_]i32{ 3, 4, 4, 6 }; // initial value for jk
//
// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
//
// integer array, contains the (24*i)-th to (24*i+23)-th
// bit of 2/pi after binary point. The corresponding
// floating value is
//
// ipio2[i] * 2^(-24(i+1)).
//
// NB: This table must have at least (e0-3)/24 + jk terms.
// For quad precision (e0 <= 16360, jk = 6), this is 686.
///
const ipio2 = [_]i32{
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
//#if LDBL_MAX_EXP > 1024
0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6,
0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2,
0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35,
0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30,
0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C,
0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4,
0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770,
0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7,
0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19,
0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522,
0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16,
0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6,
0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E,
0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48,
0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3,
0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF,
0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55,
0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612,
0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929,
0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC,
0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B,
0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C,
0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4,
0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB,
0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC,
0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C,
0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F,
0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5,
0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437,
0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B,
0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA,
0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD,
0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3,
0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3,
0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717,
0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F,
0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61,
0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB,
0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51,
0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0,
0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C,
0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6,
0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC,
0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED,
0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328,
0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D,
0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0,
0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B,
0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4,
0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3,
0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F,
0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD,
0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B,
0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4,
0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761,
0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31,
0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30,
0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262,
0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E,
0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1,
0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C,
0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4,
0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08,
0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196,
0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9,
0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4,
0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC,
0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C,
0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0,
0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C,
0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0,
0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC,
0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22,
0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893,
0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7,
0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5,
0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F,
0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4,
0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF,
0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B,
0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2,
0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138,
0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E,
0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569,
0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34,
0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9,
0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D,
0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F,
0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855,
0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569,
0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B,
0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE,
0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41,
0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49,
0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F,
0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110,
0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8,
0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365,
0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A,
0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270,
0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5,
0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616,
0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B,
0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901,
0x8071E0,
//#endif
};
const PIo2 = [_]f64{
1.57079625129699707031e+00, // 0x3FF921FB, 0x40000000
7.54978941586159635335e-08, // 0x3E74442D, 0x00000000
5.39030252995776476554e-15, // 0x3CF84698, 0x80000000
3.28200341580791294123e-22, // 0x3B78CC51, 0x60000000
1.27065575308067607349e-29, // 0x39F01B83, 0x80000000
1.22933308981111328932e-36, // 0x387A2520, 0x40000000
2.73370053816464559624e-44, // 0x36E38222, 0x80000000
2.16741683877804819444e-51, // 0x3569F31D, 0x00000000
};
fn U(x: anytype) usize {
return @intCast(usize, x);
}
// Returns the last three digits of N with y = x - N*pi/2 so that |y| < pi/2.
//
// The method is to compute the integer (mod 8) and fraction parts of
// (2/pi)*x without doing the full multiplication. In general we
// skip the part of the product that are known to be a huge integer (
// more accurately, = 0 mod 8 ). Thus the number of operations are
// independent of the exponent of the input.
//
// (2/pi) is represented by an array of 24-bit integers in ipio2[].
//
// Input parameters:
// x[] The input value (must be positive) is broken into nx
// pieces of 24-bit integers in double precision format.
// x[i] will be the i-th 24 bit of x. The scaled exponent
// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
// match x's up to 24 bits.
//
// Example of breaking a double positive z into x[0]+x[1]+x[2]:
// e0 = ilogb(z)-23
// z = scalbn(z,-e0)
// for i = 0,1,2
// x[i] = floor(z)
// z = (z-x[i])*2**24
//
//
// y[] ouput result in an array of double precision numbers.
// The dimension of y[] is:
// 24-bit precision 1
// 53-bit precision 2
// 64-bit precision 2
// 113-bit precision 3
// The actual value is the sum of them. Thus for 113-bit
// precison, one may have to do something like:
//
// long double t,w,r_head, r_tail;
// t = (long double)y[2] + (long double)y[1];
// w = (long double)y[0];
// r_head = t+w;
// r_tail = w - (r_head - t);
//
// e0 The exponent of x[0]. Must be <= 16360 or you need to
// expand the ipio2 table.
//
// nx dimension of x[]
//
// prec an integer indicating the precision:
// 0 24 bits (single)
// 1 53 bits (double)
// 2 64 bits (extended)
// 3 113 bits (quad)
//
// Here is the description of some local variables:
//
// jk jk+1 is the initial number of terms of ipio2[] needed
// in the computation. The minimum and recommended value
// for jk is 3,4,4,6 for single, double, extended, and quad.
// jk+1 must be 2 larger than you might expect so that our
// recomputation test works. (Up to 24 bits in the integer
// part (the 24 bits of it that we compute) and 23 bits in
// the fraction part may be lost to cancelation before we
// recompute.)
//
// jz local integer variable indicating the number of
// terms of ipio2[] used.
//
// jx nx - 1
//
// jv index for pointing to the suitable ipio2[] for the
// computation. In general, we want
// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
// is an integer. Thus
// e0-3-24*jv >= 0 or (e0-3)/24 >= jv
// Hence jv = max(0,(e0-3)/24).
//
// jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
//
// q[] double array with integral value, representing the
// 24-bits chunk of the product of x and 2/pi.
//
// q0 the corresponding exponent of q[0]. Note that the
// exponent for q[i] would be q0-24*i.
//
// PIo2[] double precision array, obtained by cutting pi/2
// into 24 bits chunks.
//
// f[] ipio2[] in floating point
//
// iq[] integer array by breaking up q[] in 24-bits chunk.
//
// fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
//
// ih integer. If >0 it indicates q[] is >= 0.5, hence
// it also indicates the *sign* of the result.
//
///
//
// Constants:
// The hexadecimal values are the intended ones for the following
// constants. The decimal values may be used, provided that the
// compiler will convert from decimal to binary accurately enough
// to produce the hexadecimal values shown.
///
pub fn __rem_pio2_large(x: []f64, y: []f64, e0: i32, nx: i32, prec: usize) i32 {
var jz: i32 = undefined;
var jx: i32 = undefined;
var jv: i32 = undefined;
var jp: i32 = undefined;
var jk: i32 = undefined;
var carry: i32 = undefined;
var n: i32 = undefined;
var iq: [20]i32 = undefined;
var i: i32 = undefined;
var j: i32 = undefined;
var k: i32 = undefined;
var m: i32 = undefined;
var q0: i32 = undefined;
var ih: i32 = undefined;
var z: f64 = undefined;
var fw: f64 = undefined;
var f: [20]f64 = undefined;
var fq: [20]f64 = undefined;
var q: [20]f64 = undefined;
// initialize jk
jk = init_jk[prec];
jp = jk;
// determine jx,jv,q0, note that 3>q0
jx = nx - 1;
jv = @divFloor(e0 - 3, 24);
if (jv < 0) jv = 0;
q0 = e0 - 24 * (jv + 1);
// set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk]
j = jv - jx;
m = jx + jk;
i = 0;
while (i <= m) : ({
i += 1;
j += 1;
}) {
f[U(i)] = if (j < 0) 0.0 else @intToFloat(f64, ipio2[U(j)]);
}
// compute q[0],q[1],...q[jk]
i = 0;
while (i <= jk) : (i += 1) {
j = 0;
fw = 0;
while (j <= jx) : (j += 1) {
fw += x[U(j)] * f[U(jx + i - j)];
}
q[U(i)] = fw;
}
jz = jk;
// This is to handle a non-trivial goto translation from C.
// An unconditional return statement is found at the end of this loop.
recompute: while (true) {
// distill q[] into iq[] reversingly
i = 0;
j = jz;
z = q[U(jz)];
while (j > 0) : ({
i += 1;
j -= 1;
}) {
fw = @intToFloat(f64, @floatToInt(i32, 0x1p-24 * z));
iq[U(i)] = @floatToInt(i32, z - 0x1p24 * fw);
z = q[U(j - 1)] + fw;
}
// compute n
z = math.scalbn(z, q0); // actual value of z
z -= 8.0 * math.floor(z * 0.125); // trim off integer >= 8
n = @floatToInt(i32, z);
z -= @intToFloat(f64, n);
ih = 0;
if (q0 > 0) { // need iq[jz-1] to determine n
i = iq[U(jz - 1)] >> @intCast(u5, 24 - q0);
n += i;
iq[U(jz - 1)] -= i << @intCast(u5, 24 - q0);
ih = iq[U(jz - 1)] >> @intCast(u5, 23 - q0);
} else if (q0 == 0) {
ih = iq[U(jz - 1)] >> 23;
} else if (z >= 0.5) {
ih = 2;
}
if (ih > 0) { // q > 0.5
n += 1;
carry = 0;
i = 0;
while (i < jz) : (i += 1) { // compute 1-q
j = iq[U(i)];
if (carry == 0) {
if (j != 0) {
carry = 1;
iq[U(i)] = 0x1000000 - j;
}
} else {
iq[U(i)] = 0xffffff - j;
}
}
if (q0 > 0) { // rare case: chance is 1 in 12
switch (q0) {
1 => iq[U(jz - 1)] &= 0x7fffff,
2 => iq[U(jz - 1)] &= 0x3fffff,
else => unreachable,
}
}
if (ih == 2) {
z = 1.0 - z;
if (carry != 0) {
z -= math.scalbn(@as(f64, 1.0), q0);
}
}
}
// check if recomputation is needed
if (z == 0.0) {
j = 0;
i = jz - 1;
while (i >= jk) : (i -= 1) {
j |= iq[U(i)];
}
if (j == 0) { // need recomputation
k = 1;
while (iq[U(jk - k)] == 0) : (k += 1) {
// k = no. of terms needed
}
i = jz + 1;
while (i <= jz + k) : (i += 1) { // add q[jz+1] to q[jz+k]
f[U(jx + i)] = @intToFloat(f64, ipio2[U(jv + i)]);
j = 0;
fw = 0;
while (j <= jx) : (j += 1) {
fw += x[U(j)] * f[U(jx + i - j)];
}
q[U(i)] = fw;
}
jz += k;
continue :recompute; // mimic goto recompute
}
}
// chop off zero terms
if (z == 0.0) {
jz -= 1;
q0 -= 24;
while (iq[U(jz)] == 0) {
jz -= 1;
q0 -= 24;
}
} else { // break z into 24-bit if necessary
z = math.scalbn(z, -q0);
if (z >= 0x1p24) {
fw = @intToFloat(f64, @floatToInt(i32, 0x1p-24 * z));
iq[U(jz)] = @floatToInt(i32, z - 0x1p24 * fw);
jz += 1;
q0 += 24;
iq[U(jz)] = @floatToInt(i32, fw);
} else {
iq[U(jz)] = @floatToInt(i32, z);
}
}
// convert integer "bit" chunk to floating-point value
fw = math.scalbn(@as(f64, 1.0), q0);
i = jz;
while (i >= 0) : (i -= 1) {
q[U(i)] = fw * @intToFloat(f64, iq[U(i)]);
fw *= 0x1p-24;
}
// compute PIo2[0,...,jp]*q[jz,...,0]
i = jz;
while (i >= 0) : (i -= 1) {
fw = 0;
k = 0;
while (k <= jp and k <= jz - i) : (k += 1) {
fw += PIo2[U(k)] * q[U(i + k)];
}
fq[U(jz - i)] = fw;
}
// compress fq[] into y[]
switch (prec) {
0 => {
fw = 0.0;
i = jz;
while (i >= 0) : (i -= 1) {
fw += fq[U(i)];
}
y[0] = if (ih == 0) fw else -fw;
},
1, 2 => {
fw = 0.0;
i = jz;
while (i >= 0) : (i -= 1) {
fw += fq[U(i)];
}
// TODO: drop excess precision here once double_t is used
fw = fw;
y[0] = if (ih == 0) fw else -fw;
fw = fq[0] - fw;
i = 1;
while (i <= jz) : (i += 1) {
fw += fq[U(i)];
}
y[1] = if (ih == 0) fw else -fw;
},
3 => { // painful
i = jz;
while (i > 0) : (i -= 1) {
fw = fq[U(i - 1)] + fq[U(i)];
fq[U(i)] += fq[U(i - 1)] - fw;
fq[U(i - 1)] = fw;
}
i = jz;
while (i > 1) : (i -= 1) {
fw = fq[U(i - 1)] + fq[U(i)];
fq[U(i)] += fq[U(i - 1)] - fw;
fq[U(i - 1)] = fw;
}
fw = 0;
i = jz;
while (i >= 2) : (i -= 1) {
fw += fq[U(i)];
}
if (ih == 0) {
y[0] = fq[0];
y[1] = fq[1];
y[2] = fw;
} else {
y[0] = -fq[0];
y[1] = -fq[1];
y[2] = -fw;
}
},
else => unreachable,
}
return n & 7;
}
}

View File

@@ -1,70 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/__rem_pio2f.c
const std = @import("../std.zig");
const __rem_pio2_large = @import("__rem_pio2_large.zig").__rem_pio2_large;
const math = std.math;
const toint = 1.5 / math.floatEps(f64);
// pi/4
const pio4 = 0x1.921fb6p-1;
// invpio2: 53 bits of 2/pi
const invpio2 = 6.36619772367581382433e-01; // 0x3FE45F30, 0x6DC9C883
// pio2_1: first 25 bits of pi/2
const pio2_1 = 1.57079631090164184570e+00; // 0x3FF921FB, 0x50000000
// pio2_1t: pi/2 - pio2_1
const pio2_1t = 1.58932547735281966916e-08; // 0x3E5110b4, 0x611A6263
// Returns the remainder of x rem pi/2 in *y
// use double precision for everything except passing x
// use __rem_pio2_large() for large x
pub fn __rem_pio2f(x: f32, y: *f64) i32 {
var tx: [1]f64 = undefined;
var ty: [1]f64 = undefined;
var @"fn": f64 = undefined;
var ix: u32 = undefined;
var n: i32 = undefined;
var sign: bool = undefined;
var e0: u32 = undefined;
var ui: u32 = undefined;
ui = @bitCast(u32, x);
ix = ui & 0x7fffffff;
// 25+53 bit pi is good enough for medium size
if (ix < 0x4dc90fdb) { // |x| ~< 2^28*(pi/2), medium size
// Use a specialized rint() to get fn.
@"fn" = @floatCast(f64, x) * invpio2 + toint - toint;
n = @floatToInt(i32, @"fn");
y.* = x - @"fn" * pio2_1 - @"fn" * pio2_1t;
// Matters with directed rounding.
if (y.* < -pio4) {
n -= 1;
@"fn" -= 1;
y.* = x - @"fn" * pio2_1 - @"fn" * pio2_1t;
} else if (y.* > pio4) {
n += 1;
@"fn" += 1;
y.* = x - @"fn" * pio2_1 - @"fn" * pio2_1t;
}
return n;
}
if (ix >= 0x7f800000) { // x is inf or NaN
y.* = x - x;
return 0;
}
// scale x into [2^23, 2^24-1]
sign = ui >> 31 != 0;
e0 = (ix >> 23) - (0x7f + 23); // e0 = ilogb(|x|)-23, positive
ui = ix - (e0 << 23);
tx[0] = @bitCast(f32, ui);
n = __rem_pio2_large(&tx, &ty, @intCast(i32, e0), 1, 0);
if (sign) {
y.* = -ty[0];
return -n;
}
y.* = ty[0];
return n;
}

View File

@@ -1,273 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/__cos.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__cosdf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__sin.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__sindf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__tand.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__tandf.c
// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
// Input x is assumed to be bounded by ~pi/4 in magnitude.
// Input y is the tail of x.
//
// Algorithm
// 1. Since cos(-x) = cos(x), we need only to consider positive x.
// 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
// 3. cos(x) is approximated by a polynomial of degree 14 on
// [0,pi/4]
// 4 14
// cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
// where the remez error is
//
// | 2 4 6 8 10 12 14 | -58
// |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
// | |
//
// 4 6 8 10 12 14
// 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
// cos(x) ~ 1 - x*x/2 + r
// since cos(x+y) ~ cos(x) - sin(x)*y
// ~ cos(x) - x*y,
// a correction term is necessary in cos(x) and hence
// cos(x+y) = 1 - (x*x/2 - (r - x*y))
// For better accuracy, rearrange to
// cos(x+y) ~ w + (tmp + (r-x*y))
// where w = 1 - x*x/2 and tmp is a tiny correction term
// (1 - x*x/2 == w + tmp exactly in infinite precision).
// The exactness of w + tmp in infinite precision depends on w
// and tmp having the same precision as x. If they have extra
// precision due to compiler bugs, then the extra precision is
// only good provided it is retained in all terms of the final
// expression for cos(). Retention happens in all cases tested
// under FreeBSD, so don't pessimize things by forcibly clipping
// any extra precision in w.
pub fn __cos(x: f64, y: f64) f64 {
const C1 = 4.16666666666666019037e-02; // 0x3FA55555, 0x5555554C
const C2 = -1.38888888888741095749e-03; // 0xBF56C16C, 0x16C15177
const C3 = 2.48015872894767294178e-05; // 0x3EFA01A0, 0x19CB1590
const C4 = -2.75573143513906633035e-07; // 0xBE927E4F, 0x809C52AD
const C5 = 2.08757232129817482790e-09; // 0x3E21EE9E, 0xBDB4B1C4
const C6 = -1.13596475577881948265e-11; // 0xBDA8FAE9, 0xBE8838D4
const z = x * x;
const zs = z * z;
const r = z * (C1 + z * (C2 + z * C3)) + zs * zs * (C4 + z * (C5 + z * C6));
const hz = 0.5 * z;
const w = 1.0 - hz;
return w + (((1.0 - w) - hz) + (z * r - x * y));
}
pub fn __cosdf(x: f64) f32 {
// |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]).
const C0 = -0x1ffffffd0c5e81.0p-54; // -0.499999997251031003120
const C1 = 0x155553e1053a42.0p-57; // 0.0416666233237390631894
const C2 = -0x16c087e80f1e27.0p-62; // -0.00138867637746099294692
const C3 = 0x199342e0ee5069.0p-68; // 0.0000243904487962774090654
// Try to optimize for parallel evaluation as in __tandf.c.
const z = x * x;
const w = z * z;
const r = C2 + z * C3;
return @floatCast(f32, ((1.0 + z * C0) + w * C1) + (w * z) * r);
}
// kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
// Input x is assumed to be bounded by ~pi/4 in magnitude.
// Input y is the tail of x.
// Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
//
// Algorithm
// 1. Since sin(-x) = -sin(x), we need only to consider positive x.
// 2. Callers must return sin(-0) = -0 without calling here since our
// odd polynomial is not evaluated in a way that preserves -0.
// Callers may do the optimization sin(x) ~ x for tiny x.
// 3. sin(x) is approximated by a polynomial of degree 13 on
// [0,pi/4]
// 3 13
// sin(x) ~ x + S1*x + ... + S6*x
// where
//
// |sin(x) 2 4 6 8 10 12 | -58
// |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
// | x |
//
// 4. sin(x+y) = sin(x) + sin'(x')*y
// ~ sin(x) + (1-x*x/2)*y
// For better accuracy, let
// 3 2 2 2 2
// r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
// then 3 2
// sin(x) = x + (S1*x + (x *(r-y/2)+y))
pub fn __sin(x: f64, y: f64, iy: i32) f64 {
const S1 = -1.66666666666666324348e-01; // 0xBFC55555, 0x55555549
const S2 = 8.33333333332248946124e-03; // 0x3F811111, 0x1110F8A6
const S3 = -1.98412698298579493134e-04; // 0xBF2A01A0, 0x19C161D5
const S4 = 2.75573137070700676789e-06; // 0x3EC71DE3, 0x57B1FE7D
const S5 = -2.50507602534068634195e-08; // 0xBE5AE5E6, 0x8A2B9CEB
const S6 = 1.58969099521155010221e-10; // 0x3DE5D93A, 0x5ACFD57C
const z = x * x;
const w = z * z;
const r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6);
const v = z * x;
if (iy == 0) {
return x + v * (S1 + z * r);
} else {
return x - ((z * (0.5 * y - v * r) - y) - v * S1);
}
}
pub fn __sindf(x: f64) f32 {
// |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]).
const S1 = -0x15555554cbac77.0p-55; // -0.166666666416265235595
const S2 = 0x111110896efbb2.0p-59; // 0.0083333293858894631756
const S3 = -0x1a00f9e2cae774.0p-65; // -0.000198393348360966317347
const S4 = 0x16cd878c3b46a7.0p-71; // 0.0000027183114939898219064
// Try to optimize for parallel evaluation as in __tandf.c.
const z = x * x;
const w = z * z;
const r = S3 + z * S4;
const s = z * x;
return @floatCast(f32, (x + s * (S1 + z * S2)) + s * w * r);
}
// kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
// Input x is assumed to be bounded by ~pi/4 in magnitude.
// Input y is the tail of x.
// Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned.
//
// Algorithm
// 1. Since tan(-x) = -tan(x), we need only to consider positive x.
// 2. Callers must return tan(-0) = -0 without calling here since our
// odd polynomial is not evaluated in a way that preserves -0.
// Callers may do the optimization tan(x) ~ x for tiny x.
// 3. tan(x) is approximated by a odd polynomial of degree 27 on
// [0,0.67434]
// 3 27
// tan(x) ~ x + T1*x + ... + T13*x
// where
//
// |tan(x) 2 4 26 | -59.2
// |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
// | x |
//
// Note: tan(x+y) = tan(x) + tan'(x)*y
// ~ tan(x) + (1+x*x)*y
// Therefore, for better accuracy in computing tan(x+y), let
// 3 2 2 2 2
// r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
// then
// 3 2
// tan(x+y) = x + (T1*x + (x *(r+y)+y))
//
// 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
// tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
// = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
pub fn __tan(x_: f64, y_: f64, odd: bool) f64 {
var x = x_;
var y = y_;
const T = [_]f64{
3.33333333333334091986e-01, // 3FD55555, 55555563
1.33333333333201242699e-01, // 3FC11111, 1110FE7A
5.39682539762260521377e-02, // 3FABA1BA, 1BB341FE
2.18694882948595424599e-02, // 3F9664F4, 8406D637
8.86323982359930005737e-03, // 3F8226E3, E96E8493
3.59207910759131235356e-03, // 3F6D6D22, C9560328
1.45620945432529025516e-03, // 3F57DBC8, FEE08315
5.88041240820264096874e-04, // 3F4344D8, F2F26501
2.46463134818469906812e-04, // 3F3026F7, 1A8D1068
7.81794442939557092300e-05, // 3F147E88, A03792A6
7.14072491382608190305e-05, // 3F12B80F, 32F0A7E9
-1.85586374855275456654e-05, // BEF375CB, DB605373
2.59073051863633712884e-05, // 3EFB2A70, 74BF7AD4
};
const pio4 = 7.85398163397448278999e-01; // 3FE921FB, 54442D18
const pio4lo = 3.06161699786838301793e-17; // 3C81A626, 33145C07
var z: f64 = undefined;
var r: f64 = undefined;
var v: f64 = undefined;
var w: f64 = undefined;
var s: f64 = undefined;
var a: f64 = undefined;
var w0: f64 = undefined;
var a0: f64 = undefined;
var hx: u32 = undefined;
var sign: bool = undefined;
hx = @intCast(u32, @bitCast(u64, x) >> 32);
const big = (hx & 0x7fffffff) >= 0x3FE59428; // |x| >= 0.6744
if (big) {
sign = hx >> 31 != 0;
if (sign) {
x = -x;
y = -y;
}
x = (pio4 - x) + (pio4lo - y);
y = 0.0;
}
z = x * x;
w = z * z;
// Break x^5*(T[1]+x^2*T[2]+...) into
// x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
// x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
s = z * x;
r = y + z * (s * (r + v) + y) + s * T[0];
w = x + r;
if (big) {
s = 1 - 2 * @intToFloat(f64, @boolToInt(odd));
v = s - 2.0 * (x + (r - w * w / (w + s)));
return if (sign) -v else v;
}
if (!odd) {
return w;
}
// -1.0/(x+r) has up to 2ulp error, so compute it accurately
w0 = w;
w0 = @bitCast(f64, @bitCast(u64, w0) & 0xffffffff00000000);
v = r - (w0 - x); // w0+v = r+x
a = -1.0 / w;
a0 = a;
a0 = @bitCast(f64, @bitCast(u64, a0) & 0xffffffff00000000);
return a0 + a * (1.0 + a0 * w0 + a0 * v);
}
pub fn __tandf(x: f64, odd: bool) f32 {
// |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]).
const T = [_]f64{
0x15554d3418c99f.0p-54, // 0.333331395030791399758
0x1112fd38999f72.0p-55, // 0.133392002712976742718
0x1b54c91d865afe.0p-57, // 0.0533812378445670393523
0x191df3908c33ce.0p-58, // 0.0245283181166547278873
0x185dadfcecf44e.0p-61, // 0.00297435743359967304927
0x1362b9bf971bcd.0p-59, // 0.00946564784943673166728
};
const z = x * x;
// Split up the polynomial into small independent terms to give
// opportunities for parallel evaluation. The chosen splitting is
// micro-optimized for Athlons (XP, X64). It costs 2 multiplications
// relative to Horner's method on sequential machines.
//
// We add the small terms from lowest degree up for efficiency on
// non-sequential machines (the lowest degree terms tend to be ready
// earlier). Apart from this, we don't care about order of
// operations, and don't need to to care since we have precision to
// spare. However, the chosen splitting is good for accuracy too,
// and would give results as accurate as Horner's method if the
// small terms were added from highest degree down.
const r = T[4] + z * T[5];
const t = T[2] + z * T[3];
const w = z * z;
const s = z * x;
const u = T[0] + z * T[1];
const r0 = (x + s * u) + (s * w) * (t + w * r);
return @floatCast(f32, if (odd) -1.0 / r0 else r0);
}

View File

@@ -64,14 +64,14 @@ fn acos32(x: f32) f32 {
// x < -0.5
if (hx >> 31 != 0) {
const z = (1 + x) * 0.5;
const s = math.sqrt(z);
const s = @sqrt(z);
const w = r32(z) * s - pio2_lo;
return 2 * (pio2_hi - (s + w));
}
// x > 0.5
const z = (1.0 - x) * 0.5;
const s = math.sqrt(z);
const s = @sqrt(z);
const jx = @bitCast(u32, s);
const df = @bitCast(f32, jx & 0xFFFFF000);
const c = (z - df * df) / (s + df);
@@ -133,14 +133,14 @@ fn acos64(x: f64) f64 {
// x < -0.5
if (hx >> 31 != 0) {
const z = (1.0 + x) * 0.5;
const s = math.sqrt(z);
const s = @sqrt(z);
const w = r64(z) * s - pio2_lo;
return 2 * (pio2_hi - (s + w));
}
// x > 0.5
const z = (1.0 - x) * 0.5;
const s = math.sqrt(z);
const s = @sqrt(z);
const jx = @bitCast(u64, s);
const df = @bitCast(f64, jx & 0xFFFFFFFF00000000);
const c = (z - df * df) / (s + df);

View File

@@ -29,15 +29,15 @@ fn acosh32(x: f32) f32 {
// |x| < 2, invalid if x < 1 or nan
if (i < 0x3F800000 + (1 << 23)) {
return math.log1p(x - 1 + math.sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
return math.log1p(x - 1 + @sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
}
// |x| < 0x1p12
else if (i < 0x3F800000 + (12 << 23)) {
return math.ln(2 * x - 1 / (x + math.sqrt(x * x - 1)));
return @log(2 * x - 1 / (x + @sqrt(x * x - 1)));
}
// |x| >= 0x1p12
else {
return math.ln(x) + 0.693147180559945309417232121458176568;
return @log(x) + 0.693147180559945309417232121458176568;
}
}
@@ -47,15 +47,15 @@ fn acosh64(x: f64) f64 {
// |x| < 2, invalid if x < 1 or nan
if (e < 0x3FF + 1) {
return math.log1p(x - 1 + math.sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
return math.log1p(x - 1 + @sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
}
// |x| < 0x1p26
else if (e < 0x3FF + 26) {
return math.ln(2 * x - 1 / (x + math.sqrt(x * x - 1)));
return @log(2 * x - 1 / (x + @sqrt(x * x - 1)));
}
// |x| >= 0x1p26 or nan
else {
return math.ln(x) + 0.693147180559945309417232121458176568;
return @log(x) + 0.693147180559945309417232121458176568;
}
}

View File

@@ -60,8 +60,8 @@ fn asin32(x: f32) f32 {
}
// 1 > |x| >= 0.5
const z = (1 - math.fabs(x)) * 0.5;
const s = math.sqrt(z);
const z = (1 - @fabs(x)) * 0.5;
const s = @sqrt(z);
const fx = pio2 - 2 * (s + s * r32(z));
if (hx >> 31 != 0) {
@@ -119,8 +119,8 @@ fn asin64(x: f64) f64 {
}
// 1 > |x| >= 0.5
const z = (1 - math.fabs(x)) * 0.5;
const s = math.sqrt(z);
const z = (1 - @fabs(x)) * 0.5;
const s = @sqrt(z);
const r = r64(z);
var fx: f64 = undefined;

View File

@@ -39,15 +39,15 @@ fn asinh32(x: f32) f32 {
// |x| >= 0x1p12 or inf or nan
if (i >= 0x3F800000 + (12 << 23)) {
rx = math.ln(rx) + 0.69314718055994530941723212145817656;
rx = @log(rx) + 0.69314718055994530941723212145817656;
}
// |x| >= 2
else if (i >= 0x3F800000 + (1 << 23)) {
rx = math.ln(2 * x + 1 / (math.sqrt(x * x + 1) + x));
rx = @log(2 * x + 1 / (@sqrt(x * x + 1) + x));
}
// |x| >= 0x1p-12, up to 1.6ulp error
else if (i >= 0x3F800000 - (12 << 23)) {
rx = math.log1p(x + x * x / (math.sqrt(x * x + 1) + 1));
rx = math.log1p(x + x * x / (@sqrt(x * x + 1) + 1));
}
// |x| < 0x1p-12, inexact if x != 0
else {
@@ -70,15 +70,15 @@ fn asinh64(x: f64) f64 {
// |x| >= 0x1p26 or inf or nan
if (e >= 0x3FF + 26) {
rx = math.ln(rx) + 0.693147180559945309417232121458176568;
rx = @log(rx) + 0.693147180559945309417232121458176568;
}
// |x| >= 2
else if (e >= 0x3FF + 1) {
rx = math.ln(2 * x + 1 / (math.sqrt(x * x + 1) + x));
rx = @log(2 * x + 1 / (@sqrt(x * x + 1) + x));
}
// |x| >= 0x1p-12, up to 1.6ulp error
else if (e >= 0x3FF - 26) {
rx = math.log1p(x + x * x / (math.sqrt(x * x + 1) + 1));
rx = math.log1p(x + x * x / (@sqrt(x * x + 1) + 1));
}
// |x| < 0x1p-12, inexact if x != 0
else {

View File

@@ -73,7 +73,7 @@ fn atan32(x_: f32) f32 {
}
id = null;
} else {
x = math.fabs(x);
x = @fabs(x);
// |x| < 1.1875
if (ix < 0x3F980000) {
// 7/16 <= |x| < 11/16
@@ -171,7 +171,7 @@ fn atan64(x_: f64) f64 {
}
id = null;
} else {
x = math.fabs(x);
x = @fabs(x);
// |x| < 1.1875
if (ix < 0x3FF30000) {
// 7/16 <= |x| < 11/16

View File

@@ -108,7 +108,7 @@ fn atan2_32(y: f32, x: f32) f32 {
if ((m & 2) != 0 and iy + (26 << 23) < ix) {
break :z 0.0;
} else {
break :z math.atan(math.fabs(y / x));
break :z math.atan(@fabs(y / x));
}
};
@@ -198,7 +198,7 @@ fn atan2_64(y: f64, x: f64) f64 {
if ((m & 2) != 0 and iy +% (64 << 20) < ix) {
break :z 0.0;
} else {
break :z math.atan(math.fabs(y / x));
break :z math.atan(@fabs(y / x));
}
};

View File

@@ -1,170 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/ceilf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/ceil.c
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
/// Returns the least integer value greater than of equal to x.
///
/// Special Cases:
/// - ceil(+-0) = +-0
/// - ceil(+-inf) = +-inf
/// - ceil(nan) = nan
pub fn ceil(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
return switch (T) {
f32 => ceil32(x),
f64 => ceil64(x),
f128 => ceil128(x),
// TODO this is not correct for some targets
c_longdouble => @floatCast(c_longdouble, ceil128(x)),
else => @compileError("ceil not implemented for " ++ @typeName(T)),
};
}
fn ceil32(x: f32) f32 {
var u = @bitCast(u32, x);
var e = @intCast(i32, (u >> 23) & 0xFF) - 0x7F;
var m: u32 = undefined;
// TODO: Shouldn't need this explicit check.
if (x == 0.0) {
return x;
}
if (e >= 23) {
return x;
} else if (e >= 0) {
m = @as(u32, 0x007FFFFF) >> @intCast(u5, e);
if (u & m == 0) {
return x;
}
math.doNotOptimizeAway(x + 0x1.0p120);
if (u >> 31 == 0) {
u += m;
}
u &= ~m;
return @bitCast(f32, u);
} else {
math.doNotOptimizeAway(x + 0x1.0p120);
if (u >> 31 != 0) {
return -0.0;
} else {
return 1.0;
}
}
}
fn ceil64(x: f64) f64 {
const f64_toint = 1.0 / math.floatEps(f64);
const u = @bitCast(u64, x);
const e = (u >> 52) & 0x7FF;
var y: f64 = undefined;
if (e >= 0x3FF + 52 or x == 0) {
return x;
}
if (u >> 63 != 0) {
y = x - f64_toint + f64_toint - x;
} else {
y = x + f64_toint - f64_toint - x;
}
if (e <= 0x3FF - 1) {
math.doNotOptimizeAway(y);
if (u >> 63 != 0) {
return -0.0;
} else {
return 1.0;
}
} else if (y < 0) {
return x + y + 1;
} else {
return x + y;
}
}
fn ceil128(x: f128) f128 {
const f128_toint = 1.0 / math.floatEps(f128);
const u = @bitCast(u128, x);
const e = (u >> 112) & 0x7FFF;
var y: f128 = undefined;
if (e >= 0x3FFF + 112 or x == 0) return x;
if (u >> 127 != 0) {
y = x - f128_toint + f128_toint - x;
} else {
y = x + f128_toint - f128_toint - x;
}
if (e <= 0x3FFF - 1) {
math.doNotOptimizeAway(y);
if (u >> 127 != 0) {
return -0.0;
} else {
return 1.0;
}
} else if (y < 0) {
return x + y + 1;
} else {
return x + y;
}
}
test "math.ceil" {
try expect(ceil(@as(f32, 0.0)) == ceil32(0.0));
try expect(ceil(@as(f64, 0.0)) == ceil64(0.0));
try expect(ceil(@as(f128, 0.0)) == ceil128(0.0));
}
test "math.ceil32" {
try expect(ceil32(1.3) == 2.0);
try expect(ceil32(-1.3) == -1.0);
try expect(ceil32(0.2) == 1.0);
}
test "math.ceil64" {
try expect(ceil64(1.3) == 2.0);
try expect(ceil64(-1.3) == -1.0);
try expect(ceil64(0.2) == 1.0);
}
test "math.ceil128" {
try expect(ceil128(1.3) == 2.0);
try expect(ceil128(-1.3) == -1.0);
try expect(ceil128(0.2) == 1.0);
}
test "math.ceil32.special" {
try expect(ceil32(0.0) == 0.0);
try expect(ceil32(-0.0) == -0.0);
try expect(math.isPositiveInf(ceil32(math.inf(f32))));
try expect(math.isNegativeInf(ceil32(-math.inf(f32))));
try expect(math.isNan(ceil32(math.nan(f32))));
}
test "math.ceil64.special" {
try expect(ceil64(0.0) == 0.0);
try expect(ceil64(-0.0) == -0.0);
try expect(math.isPositiveInf(ceil64(math.inf(f64))));
try expect(math.isNegativeInf(ceil64(-math.inf(f64))));
try expect(math.isNan(ceil64(math.nan(f64))));
}
test "math.ceil128.special" {
try expect(ceil128(0.0) == 0.0);
try expect(ceil128(-0.0) == -0.0);
try expect(math.isPositiveInf(ceil128(math.inf(f128))));
try expect(math.isNegativeInf(ceil128(-math.inf(f128))));
try expect(math.isNan(ceil128(math.nan(f128))));
}

View File

@@ -115,7 +115,7 @@ pub fn Complex(comptime T: type) type {
/// Returns the magnitude of a complex number.
pub fn magnitude(self: Self) T {
return math.sqrt(self.re * self.re + self.im * self.im);
return @sqrt(self.re * self.re + self.im * self.im);
}
};
}

View File

@@ -66,7 +66,7 @@ fn atan32(z: Complex(f32)) Complex(f32) {
t = y + 1.0;
a = (x2 + (t * t)) / a;
return Complex(f32).init(w, 0.25 * math.ln(a));
return Complex(f32).init(w, 0.25 * @log(a));
}
fn redupif64(x: f64) f64 {
@@ -115,7 +115,7 @@ fn atan64(z: Complex(f64)) Complex(f64) {
t = y + 1.0;
a = (x2 + (t * t)) / a;
return Complex(f64).init(w, 0.25 * math.ln(a));
return Complex(f64).init(w, 0.25 * @log(a));
}
const epsilon = 0.0001;

View File

@@ -44,12 +44,12 @@ fn cosh32(z: Complex(f32)) Complex(f32) {
// |x|>= 9, so cosh(x) ~= exp(|x|)
if (ix < 0x42b17218) {
// x < 88.7: exp(|x|) won't overflow
const h = math.exp(math.fabs(x)) * 0.5;
const h = @exp(@fabs(x)) * 0.5;
return Complex(f32).init(math.copysign(f32, h, x) * math.cos(y), h * math.sin(y));
}
// x < 192.7: scale to avoid overflow
else if (ix < 0x4340b1e7) {
const v = Complex(f32).init(math.fabs(x), y);
const v = Complex(f32).init(@fabs(x), y);
const r = ldexp_cexp(v, -1);
return Complex(f32).init(r.re, r.im * math.copysign(f32, 1, x));
}
@@ -112,12 +112,12 @@ fn cosh64(z: Complex(f64)) Complex(f64) {
// |x|>= 22, so cosh(x) ~= exp(|x|)
if (ix < 0x40862e42) {
// x < 710: exp(|x|) won't overflow
const h = math.exp(math.fabs(x)) * 0.5;
const h = @exp(@fabs(x)) * 0.5;
return Complex(f64).init(h * math.cos(y), math.copysign(f64, h, x) * math.sin(y));
}
// x < 1455: scale to avoid overflow
else if (ix < 0x4096bbaa) {
const v = Complex(f64).init(math.fabs(x), y);
const v = Complex(f64).init(@fabs(x), y);
const r = ldexp_cexp(v, -1);
return Complex(f64).init(r.re, r.im * math.copysign(f64, 1, x));
}

View File

@@ -33,7 +33,7 @@ fn exp32(z: Complex(f32)) Complex(f32) {
const hy = @bitCast(u32, y) & 0x7fffffff;
// cexp(x + i0) = exp(x) + i0
if (hy == 0) {
return Complex(f32).init(math.exp(x), y);
return Complex(f32).init(@exp(x), y);
}
const hx = @bitCast(u32, x);
@@ -63,7 +63,7 @@ fn exp32(z: Complex(f32)) Complex(f32) {
// - x = +-inf
// - x = nan
else {
const exp_x = math.exp(x);
const exp_x = @exp(x);
return Complex(f32).init(exp_x * math.cos(y), exp_x * math.sin(y));
}
}
@@ -81,7 +81,7 @@ fn exp64(z: Complex(f64)) Complex(f64) {
// cexp(x + i0) = exp(x) + i0
if (hy | ly == 0) {
return Complex(f64).init(math.exp(x), y);
return Complex(f64).init(@exp(x), y);
}
const fx = @bitCast(u64, x);
@@ -114,13 +114,13 @@ fn exp64(z: Complex(f64)) Complex(f64) {
// - x = +-inf
// - x = nan
else {
const exp_x = math.exp(x);
const exp_x = @exp(x);
return Complex(f64).init(exp_x * math.cos(y), exp_x * math.sin(y));
}
}
test "complex.cexp32" {
const tolerance_f32 = math.sqrt(math.floatEps(f32));
const tolerance_f32 = @sqrt(math.floatEps(f32));
{
const a = Complex(f32).init(5, 3);
@@ -140,7 +140,7 @@ test "complex.cexp32" {
}
test "complex.cexp64" {
const tolerance_f64 = math.sqrt(math.floatEps(f64));
const tolerance_f64 = @sqrt(math.floatEps(f64));
{
const a = Complex(f64).init(5, 3);

View File

@@ -26,7 +26,7 @@ fn frexp_exp32(x: f32, expt: *i32) f32 {
const k = 235; // reduction constant
const kln2 = 162.88958740; // k * ln2
const exp_x = math.exp(x - kln2);
const exp_x = @exp(x - kln2);
const hx = @bitCast(u32, exp_x);
// TODO zig should allow this cast implicitly because it should know the value is in range
expt.* = @intCast(i32, hx >> 23) - (0x7f + 127) + k;
@@ -54,7 +54,7 @@ fn frexp_exp64(x: f64, expt: *i32) f64 {
const k = 1799; // reduction constant
const kln2 = 1246.97177782734161156; // k * ln2
const exp_x = math.exp(x - kln2);
const exp_x = @exp(x - kln2);
const fx = @bitCast(u64, exp_x);
const hx = @intCast(u32, fx >> 32);

View File

@@ -10,7 +10,7 @@ pub fn log(z: anytype) Complex(@TypeOf(z.re)) {
const r = cmath.abs(z);
const phi = cmath.arg(z);
return Complex(T).init(math.ln(r), phi);
return Complex(T).init(@log(r), phi);
}
const epsilon = 0.0001;

View File

@@ -44,12 +44,12 @@ fn sinh32(z: Complex(f32)) Complex(f32) {
// |x|>= 9, so cosh(x) ~= exp(|x|)
if (ix < 0x42b17218) {
// x < 88.7: exp(|x|) won't overflow
const h = math.exp(math.fabs(x)) * 0.5;
const h = @exp(@fabs(x)) * 0.5;
return Complex(f32).init(math.copysign(f32, h, x) * math.cos(y), h * math.sin(y));
}
// x < 192.7: scale to avoid overflow
else if (ix < 0x4340b1e7) {
const v = Complex(f32).init(math.fabs(x), y);
const v = Complex(f32).init(@fabs(x), y);
const r = ldexp_cexp(v, -1);
return Complex(f32).init(r.re * math.copysign(f32, 1, x), r.im);
}
@@ -111,12 +111,12 @@ fn sinh64(z: Complex(f64)) Complex(f64) {
// |x|>= 22, so cosh(x) ~= exp(|x|)
if (ix < 0x40862e42) {
// x < 710: exp(|x|) won't overflow
const h = math.exp(math.fabs(x)) * 0.5;
const h = @exp(@fabs(x)) * 0.5;
return Complex(f64).init(math.copysign(f64, h, x) * math.cos(y), h * math.sin(y));
}
// x < 1455: scale to avoid overflow
else if (ix < 0x4096bbaa) {
const v = Complex(f64).init(math.fabs(x), y);
const v = Complex(f64).init(@fabs(x), y);
const r = ldexp_cexp(v, -1);
return Complex(f64).init(r.re * math.copysign(f64, 1, x), r.im);
}

View File

@@ -43,7 +43,7 @@ fn sqrt32(z: Complex(f32)) Complex(f32) {
// sqrt(-inf + i nan) = nan +- inf i
// sqrt(-inf + iy) = 0 + inf i
if (math.signbit(x)) {
return Complex(f32).init(math.fabs(x - y), math.copysign(f32, x, y));
return Complex(f32).init(@fabs(x - y), math.copysign(f32, x, y));
} else {
return Complex(f32).init(x, math.copysign(f32, y - y, y));
}
@@ -56,15 +56,15 @@ fn sqrt32(z: Complex(f32)) Complex(f32) {
const dy = @as(f64, y);
if (dx >= 0) {
const t = math.sqrt((dx + math.hypot(f64, dx, dy)) * 0.5);
const t = @sqrt((dx + math.hypot(f64, dx, dy)) * 0.5);
return Complex(f32).init(
@floatCast(f32, t),
@floatCast(f32, dy / (2.0 * t)),
);
} else {
const t = math.sqrt((-dx + math.hypot(f64, dx, dy)) * 0.5);
const t = @sqrt((-dx + math.hypot(f64, dx, dy)) * 0.5);
return Complex(f32).init(
@floatCast(f32, math.fabs(y) / (2.0 * t)),
@floatCast(f32, @fabs(y) / (2.0 * t)),
@floatCast(f32, math.copysign(f64, t, y)),
);
}
@@ -94,7 +94,7 @@ fn sqrt64(z: Complex(f64)) Complex(f64) {
// sqrt(-inf + i nan) = nan +- inf i
// sqrt(-inf + iy) = 0 + inf i
if (math.signbit(x)) {
return Complex(f64).init(math.fabs(x - y), math.copysign(f64, x, y));
return Complex(f64).init(@fabs(x - y), math.copysign(f64, x, y));
} else {
return Complex(f64).init(x, math.copysign(f64, y - y, y));
}
@@ -104,7 +104,7 @@ fn sqrt64(z: Complex(f64)) Complex(f64) {
// scale to avoid overflow
var scale = false;
if (math.fabs(x) >= threshold or math.fabs(y) >= threshold) {
if (@fabs(x) >= threshold or @fabs(y) >= threshold) {
x *= 0.25;
y *= 0.25;
scale = true;
@@ -112,11 +112,11 @@ fn sqrt64(z: Complex(f64)) Complex(f64) {
var result: Complex(f64) = undefined;
if (x >= 0) {
const t = math.sqrt((x + math.hypot(f64, x, y)) * 0.5);
const t = @sqrt((x + math.hypot(f64, x, y)) * 0.5);
result = Complex(f64).init(t, y / (2.0 * t));
} else {
const t = math.sqrt((-x + math.hypot(f64, x, y)) * 0.5);
result = Complex(f64).init(math.fabs(y) / (2.0 * t), math.copysign(f64, t, y));
const t = @sqrt((-x + math.hypot(f64, x, y)) * 0.5);
result = Complex(f64).init(@fabs(y) / (2.0 * t), math.copysign(f64, t, y));
}
if (scale) {

View File

@@ -44,7 +44,7 @@ fn tanh32(z: Complex(f32)) Complex(f32) {
// x >= 11
if (ix >= 0x41300000) {
const exp_mx = math.exp(-math.fabs(x));
const exp_mx = @exp(-@fabs(x));
return Complex(f32).init(math.copysign(f32, 1, x), 4 * math.sin(y) * math.cos(y) * exp_mx * exp_mx);
}
@@ -52,7 +52,7 @@ fn tanh32(z: Complex(f32)) Complex(f32) {
const t = math.tan(y);
const beta = 1.0 + t * t;
const s = math.sinh(x);
const rho = math.sqrt(1 + s * s);
const rho = @sqrt(1 + s * s);
const den = 1 + beta * s * s;
return Complex(f32).init((beta * rho * s) / den, t / den);
@@ -87,7 +87,7 @@ fn tanh64(z: Complex(f64)) Complex(f64) {
// x >= 22
if (ix >= 0x40360000) {
const exp_mx = math.exp(-math.fabs(x));
const exp_mx = @exp(-@fabs(x));
return Complex(f64).init(math.copysign(f64, 1, x), 4 * math.sin(y) * math.cos(y) * exp_mx * exp_mx);
}
@@ -95,7 +95,7 @@ fn tanh64(z: Complex(f64)) Complex(f64) {
const t = math.tan(y);
const beta = 1.0 + t * t;
const s = math.sinh(x);
const rho = math.sqrt(1 + s * s);
const rho = @sqrt(1 + s * s);
const den = 1 + beta * s * s;
return Complex(f64).init((beta * rho * s) / den, t / den);

View File

@@ -1,154 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/cosf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/cos.c
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
const kernel = @import("__trig.zig");
const __rem_pio2 = @import("__rem_pio2.zig").__rem_pio2;
const __rem_pio2f = @import("__rem_pio2f.zig").__rem_pio2f;
/// Returns the cosine of the radian value x.
///
/// Special Cases:
/// - cos(+-inf) = nan
/// - cos(nan) = nan
pub fn cos(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
return switch (T) {
f32 => cos32(x),
f64 => cos64(x),
else => @compileError("cos not implemented for " ++ @typeName(T)),
};
}
fn cos32(x: f32) f32 {
// Small multiples of pi/2 rounded to double precision.
const c1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
const c2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
const c3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
const c4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
var ix = @bitCast(u32, x);
const sign = ix >> 31 != 0;
ix &= 0x7fffffff;
if (ix <= 0x3f490fda) { // |x| ~<= pi/4
if (ix < 0x39800000) { // |x| < 2**-12
// raise inexact if x != 0
math.doNotOptimizeAway(x + 0x1p120);
return 1.0;
}
return kernel.__cosdf(x);
}
if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4
if (ix > 0x4016cbe3) { // |x| ~> 3*pi/4
return -kernel.__cosdf(if (sign) x + c2pio2 else x - c2pio2);
} else {
if (sign) {
return kernel.__sindf(x + c1pio2);
} else {
return kernel.__sindf(c1pio2 - x);
}
}
}
if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4
if (ix > 0x40afeddf) { // |x| ~> 7*pi/4
return kernel.__cosdf(if (sign) x + c4pio2 else x - c4pio2);
} else {
if (sign) {
return kernel.__sindf(-x - c3pio2);
} else {
return kernel.__sindf(x - c3pio2);
}
}
}
// cos(Inf or NaN) is NaN
if (ix >= 0x7f800000) {
return x - x;
}
var y: f64 = undefined;
const n = __rem_pio2f(x, &y);
return switch (n & 3) {
0 => kernel.__cosdf(y),
1 => kernel.__sindf(-y),
2 => -kernel.__cosdf(y),
else => kernel.__sindf(y),
};
}
fn cos64(x: f64) f64 {
var ix = @bitCast(u64, x) >> 32;
ix &= 0x7fffffff;
// |x| ~< pi/4
if (ix <= 0x3fe921fb) {
if (ix < 0x3e46a09e) { // |x| < 2**-27 * sqrt(2)
// raise inexact if x!=0
math.doNotOptimizeAway(x + 0x1p120);
return 1.0;
}
return kernel.__cos(x, 0);
}
// cos(Inf or NaN) is NaN
if (ix >= 0x7ff00000) {
return x - x;
}
var y: [2]f64 = undefined;
const n = __rem_pio2(x, &y);
return switch (n & 3) {
0 => kernel.__cos(y[0], y[1]),
1 => -kernel.__sin(y[0], y[1], 1),
2 => -kernel.__cos(y[0], y[1]),
else => kernel.__sin(y[0], y[1], 1),
};
}
test "math.cos" {
try expect(cos(@as(f32, 0.0)) == cos32(0.0));
try expect(cos(@as(f64, 0.0)) == cos64(0.0));
}
test "math.cos32" {
const epsilon = 0.00001;
try expect(math.approxEqAbs(f32, cos32(0.0), 1.0, epsilon));
try expect(math.approxEqAbs(f32, cos32(0.2), 0.980067, epsilon));
try expect(math.approxEqAbs(f32, cos32(0.8923), 0.627623, epsilon));
try expect(math.approxEqAbs(f32, cos32(1.5), 0.070737, epsilon));
try expect(math.approxEqAbs(f32, cos32(-1.5), 0.070737, epsilon));
try expect(math.approxEqAbs(f32, cos32(37.45), 0.969132, epsilon));
try expect(math.approxEqAbs(f32, cos32(89.123), 0.400798, epsilon));
}
test "math.cos64" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f64, cos64(0.0), 1.0, epsilon));
try expect(math.approxEqAbs(f64, cos64(0.2), 0.980067, epsilon));
try expect(math.approxEqAbs(f64, cos64(0.8923), 0.627623, epsilon));
try expect(math.approxEqAbs(f64, cos64(1.5), 0.070737, epsilon));
try expect(math.approxEqAbs(f64, cos64(-1.5), 0.070737, epsilon));
try expect(math.approxEqAbs(f64, cos64(37.45), 0.969132, epsilon));
try expect(math.approxEqAbs(f64, cos64(89.123), 0.40080, epsilon));
}
test "math.cos32.special" {
try expect(math.isNan(cos32(math.inf(f32))));
try expect(math.isNan(cos32(-math.inf(f32))));
try expect(math.isNan(cos32(math.nan(f32))));
}
test "math.cos64.special" {
try expect(math.isNan(cos64(math.inf(f64))));
try expect(math.isNan(cos64(-math.inf(f64))));
try expect(math.isNan(cos64(math.nan(f64))));
}

View File

@@ -45,7 +45,7 @@ fn cosh32(x: f32) f32 {
// |x| < log(FLT_MAX)
if (ux < 0x42B17217) {
const t = math.exp(ax);
const t = @exp(ax);
return 0.5 * (t + 1 / t);
}
@@ -77,7 +77,7 @@ fn cosh64(x: f64) f64 {
// |x| < log(DBL_MAX)
if (w < 0x40862E42) {
const t = math.exp(ax);
const t = @exp(ax);
// NOTE: If x > log(0x1p26) then 1/t is not required.
return 0.5 * (t + 1 / t);
}

View File

@@ -1,217 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/expf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/exp.c
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
/// Returns e raised to the power of x (e^x).
///
/// Special Cases:
/// - exp(+inf) = +inf
/// - exp(nan) = nan
pub fn exp(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
return switch (T) {
f32 => exp32(x),
f64 => exp64(x),
else => @compileError("exp not implemented for " ++ @typeName(T)),
};
}
fn exp32(x_: f32) f32 {
const half = [_]f32{ 0.5, -0.5 };
const ln2hi = 6.9314575195e-1;
const ln2lo = 1.4286067653e-6;
const invln2 = 1.4426950216e+0;
const P1 = 1.6666625440e-1;
const P2 = -2.7667332906e-3;
var x = x_;
var hx = @bitCast(u32, x);
const sign = @intCast(i32, hx >> 31);
hx &= 0x7FFFFFFF;
if (math.isNan(x)) {
return x;
}
// |x| >= -87.33655 or nan
if (hx >= 0x42AEAC50) {
// nan
if (hx > 0x7F800000) {
return x;
}
// x >= 88.722839
if (hx >= 0x42b17218 and sign == 0) {
return x * 0x1.0p127;
}
if (sign != 0) {
math.doNotOptimizeAway(-0x1.0p-149 / x); // overflow
// x <= -103.972084
if (hx >= 0x42CFF1B5) {
return 0;
}
}
}
var k: i32 = undefined;
var hi: f32 = undefined;
var lo: f32 = undefined;
// |x| > 0.5 * ln2
if (hx > 0x3EB17218) {
// |x| > 1.5 * ln2
if (hx > 0x3F851592) {
k = @floatToInt(i32, invln2 * x + half[@intCast(usize, sign)]);
} else {
k = 1 - sign - sign;
}
const fk = @intToFloat(f32, k);
hi = x - fk * ln2hi;
lo = fk * ln2lo;
x = hi - lo;
}
// |x| > 2^(-14)
else if (hx > 0x39000000) {
k = 0;
hi = x;
lo = 0;
} else {
math.doNotOptimizeAway(0x1.0p127 + x); // inexact
return 1 + x;
}
const xx = x * x;
const c = x - xx * (P1 + xx * P2);
const y = 1 + (x * c / (2 - c) - lo + hi);
if (k == 0) {
return y;
} else {
return math.scalbn(y, k);
}
}
fn exp64(x_: f64) f64 {
const half = [_]f64{ 0.5, -0.5 };
const ln2hi: f64 = 6.93147180369123816490e-01;
const ln2lo: f64 = 1.90821492927058770002e-10;
const invln2: f64 = 1.44269504088896338700e+00;
const P1: f64 = 1.66666666666666019037e-01;
const P2: f64 = -2.77777777770155933842e-03;
const P3: f64 = 6.61375632143793436117e-05;
const P4: f64 = -1.65339022054652515390e-06;
const P5: f64 = 4.13813679705723846039e-08;
var x = x_;
var ux = @bitCast(u64, x);
var hx = ux >> 32;
const sign = @intCast(i32, hx >> 31);
hx &= 0x7FFFFFFF;
if (math.isNan(x)) {
return x;
}
// |x| >= 708.39 or nan
if (hx >= 0x4086232B) {
// nan
if (hx > 0x7FF00000) {
return x;
}
if (x > 709.782712893383973096) {
// overflow if x != inf
if (!math.isInf(x)) {
math.raiseOverflow();
}
return math.inf(f64);
}
if (x < -708.39641853226410622) {
// underflow if x != -inf
// math.doNotOptimizeAway(@as(f32, -0x1.0p-149 / x));
if (x < -745.13321910194110842) {
return 0;
}
}
}
// argument reduction
var k: i32 = undefined;
var hi: f64 = undefined;
var lo: f64 = undefined;
// |x| > 0.5 * ln2
if (hx > 0x3FD62E42) {
// |x| >= 1.5 * ln2
if (hx > 0x3FF0A2B2) {
k = @floatToInt(i32, invln2 * x + half[@intCast(usize, sign)]);
} else {
k = 1 - sign - sign;
}
const dk = @intToFloat(f64, k);
hi = x - dk * ln2hi;
lo = dk * ln2lo;
x = hi - lo;
}
// |x| > 2^(-28)
else if (hx > 0x3E300000) {
k = 0;
hi = x;
lo = 0;
} else {
// inexact if x != 0
// math.doNotOptimizeAway(0x1.0p1023 + x);
return 1 + x;
}
const xx = x * x;
const c = x - xx * (P1 + xx * (P2 + xx * (P3 + xx * (P4 + xx * P5))));
const y = 1 + (x * c / (2 - c) - lo + hi);
if (k == 0) {
return y;
} else {
return math.scalbn(y, k);
}
}
test "math.exp" {
try expect(exp(@as(f32, 0.0)) == exp32(0.0));
try expect(exp(@as(f64, 0.0)) == exp64(0.0));
}
test "math.exp32" {
const epsilon = 0.000001;
try expect(exp32(0.0) == 1.0);
try expect(math.approxEqAbs(f32, exp32(0.0), 1.0, epsilon));
try expect(math.approxEqAbs(f32, exp32(0.2), 1.221403, epsilon));
try expect(math.approxEqAbs(f32, exp32(0.8923), 2.440737, epsilon));
try expect(math.approxEqAbs(f32, exp32(1.5), 4.481689, epsilon));
}
test "math.exp64" {
const epsilon = 0.000001;
try expect(exp64(0.0) == 1.0);
try expect(math.approxEqAbs(f64, exp64(0.0), 1.0, epsilon));
try expect(math.approxEqAbs(f64, exp64(0.2), 1.221403, epsilon));
try expect(math.approxEqAbs(f64, exp64(0.8923), 2.440737, epsilon));
try expect(math.approxEqAbs(f64, exp64(1.5), 4.481689, epsilon));
}
test "math.exp32.special" {
try expect(math.isPositiveInf(exp32(math.inf(f32))));
try expect(math.isNan(exp32(math.nan(f32))));
}
test "math.exp64.special" {
try expect(math.isPositiveInf(exp64(math.inf(f64))));
try expect(math.isNan(exp64(math.nan(f64))));
}

View File

@@ -1,465 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/exp2f.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/exp2.c
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
/// Returns 2 raised to the power of x (2^x).
///
/// Special Cases:
/// - exp2(+inf) = +inf
/// - exp2(nan) = nan
pub fn exp2(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
return switch (T) {
f32 => exp2_32(x),
f64 => exp2_64(x),
else => @compileError("exp2 not implemented for " ++ @typeName(T)),
};
}
const exp2ft = [_]f64{
0x1.6a09e667f3bcdp-1,
0x1.7a11473eb0187p-1,
0x1.8ace5422aa0dbp-1,
0x1.9c49182a3f090p-1,
0x1.ae89f995ad3adp-1,
0x1.c199bdd85529cp-1,
0x1.d5818dcfba487p-1,
0x1.ea4afa2a490dap-1,
0x1.0000000000000p+0,
0x1.0b5586cf9890fp+0,
0x1.172b83c7d517bp+0,
0x1.2387a6e756238p+0,
0x1.306fe0a31b715p+0,
0x1.3dea64c123422p+0,
0x1.4bfdad5362a27p+0,
0x1.5ab07dd485429p+0,
};
fn exp2_32(x: f32) f32 {
const tblsiz = @intCast(u32, exp2ft.len);
const redux: f32 = 0x1.8p23 / @intToFloat(f32, tblsiz);
const P1: f32 = 0x1.62e430p-1;
const P2: f32 = 0x1.ebfbe0p-3;
const P3: f32 = 0x1.c6b348p-5;
const P4: f32 = 0x1.3b2c9cp-7;
var u = @bitCast(u32, x);
const ix = u & 0x7FFFFFFF;
// |x| > 126
if (ix > 0x42FC0000) {
// nan
if (ix > 0x7F800000) {
return x;
}
// x >= 128
if (u >= 0x43000000 and u < 0x80000000) {
return x * 0x1.0p127;
}
// x < -126
if (u >= 0x80000000) {
if (u >= 0xC3160000 or u & 0x000FFFF != 0) {
math.doNotOptimizeAway(-0x1.0p-149 / x);
}
// x <= -150
if (u >= 0x3160000) {
return 0;
}
}
}
// |x| <= 0x1p-25
else if (ix <= 0x33000000) {
return 1.0 + x;
}
// NOTE: musl relies on unsafe behaviours which are replicated below
// (addition/bit-shift overflow). Appears that this produces the
// intended result but should confirm how GCC/Clang handle this to ensure.
var uf = x + redux;
var i_0 = @bitCast(u32, uf);
i_0 +%= tblsiz / 2;
const k = i_0 / tblsiz;
const uk = @bitCast(f64, @as(u64, 0x3FF + k) << 52);
i_0 &= tblsiz - 1;
uf -= redux;
const z: f64 = x - uf;
var r: f64 = exp2ft[@intCast(usize, i_0)];
const t: f64 = r * z;
r = r + t * (P1 + z * P2) + t * (z * z) * (P3 + z * P4);
return @floatCast(f32, r * uk);
}
const exp2dt = [_]f64{
// exp2(z + eps) eps
0x1.6a09e667f3d5dp-1, 0x1.9880p-44,
0x1.6b052fa751744p-1, 0x1.8000p-50,
0x1.6c012750bd9fep-1, -0x1.8780p-45,
0x1.6cfdcddd476bfp-1, 0x1.ec00p-46,
0x1.6dfb23c651a29p-1, -0x1.8000p-50,
0x1.6ef9298593ae3p-1, -0x1.c000p-52,
0x1.6ff7df9519386p-1, -0x1.fd80p-45,
0x1.70f7466f42da3p-1, -0x1.c880p-45,
0x1.71f75e8ec5fc3p-1, 0x1.3c00p-46,
0x1.72f8286eacf05p-1, -0x1.8300p-44,
0x1.73f9a48a58152p-1, -0x1.0c00p-47,
0x1.74fbd35d7ccfcp-1, 0x1.f880p-45,
0x1.75feb564267f1p-1, 0x1.3e00p-47,
0x1.77024b1ab6d48p-1, -0x1.7d00p-45,
0x1.780694fde5d38p-1, -0x1.d000p-50,
0x1.790b938ac1d00p-1, 0x1.3000p-49,
0x1.7a11473eb0178p-1, -0x1.d000p-49,
0x1.7b17b0976d060p-1, 0x1.0400p-45,
0x1.7c1ed0130c133p-1, 0x1.0000p-53,
0x1.7d26a62ff8636p-1, -0x1.6900p-45,
0x1.7e2f336cf4e3bp-1, -0x1.2e00p-47,
0x1.7f3878491c3e8p-1, -0x1.4580p-45,
0x1.80427543e1b4ep-1, 0x1.3000p-44,
0x1.814d2add1071ap-1, 0x1.f000p-47,
0x1.82589994ccd7ep-1, -0x1.1c00p-45,
0x1.8364c1eb942d0p-1, 0x1.9d00p-45,
0x1.8471a4623cab5p-1, 0x1.7100p-43,
0x1.857f4179f5bbcp-1, 0x1.2600p-45,
0x1.868d99b4491afp-1, -0x1.2c40p-44,
0x1.879cad931a395p-1, -0x1.3000p-45,
0x1.88ac7d98a65b8p-1, -0x1.a800p-45,
0x1.89bd0a4785800p-1, -0x1.d000p-49,
0x1.8ace5422aa223p-1, 0x1.3280p-44,
0x1.8be05bad619fap-1, 0x1.2b40p-43,
0x1.8cf3216b54383p-1, -0x1.ed00p-45,
0x1.8e06a5e08664cp-1, -0x1.0500p-45,
0x1.8f1ae99157807p-1, 0x1.8280p-45,
0x1.902fed0282c0ep-1, -0x1.cb00p-46,
0x1.9145b0b91ff96p-1, -0x1.5e00p-47,
0x1.925c353aa2ff9p-1, 0x1.5400p-48,
0x1.93737b0cdc64ap-1, 0x1.7200p-46,
0x1.948b82b5f98aep-1, -0x1.9000p-47,
0x1.95a44cbc852cbp-1, 0x1.5680p-45,
0x1.96bdd9a766f21p-1, -0x1.6d00p-44,
0x1.97d829fde4e2ap-1, -0x1.1000p-47,
0x1.98f33e47a23a3p-1, 0x1.d000p-45,
0x1.9a0f170ca0604p-1, -0x1.8a40p-44,
0x1.9b2bb4d53ff89p-1, 0x1.55c0p-44,
0x1.9c49182a3f15bp-1, 0x1.6b80p-45,
0x1.9d674194bb8c5p-1, -0x1.c000p-49,
0x1.9e86319e3238ep-1, 0x1.7d00p-46,
0x1.9fa5e8d07f302p-1, 0x1.6400p-46,
0x1.a0c667b5de54dp-1, -0x1.5000p-48,
0x1.a1e7aed8eb8f6p-1, 0x1.9e00p-47,
0x1.a309bec4a2e27p-1, 0x1.ad80p-45,
0x1.a42c980460a5dp-1, -0x1.af00p-46,
0x1.a5503b23e259bp-1, 0x1.b600p-47,
0x1.a674a8af46213p-1, 0x1.8880p-44,
0x1.a799e1330b3a7p-1, 0x1.1200p-46,
0x1.a8bfe53c12e8dp-1, 0x1.6c00p-47,
0x1.a9e6b5579fcd2p-1, -0x1.9b80p-45,
0x1.ab0e521356fb8p-1, 0x1.b700p-45,
0x1.ac36bbfd3f381p-1, 0x1.9000p-50,
0x1.ad5ff3a3c2780p-1, 0x1.4000p-49,
0x1.ae89f995ad2a3p-1, -0x1.c900p-45,
0x1.afb4ce622f367p-1, 0x1.6500p-46,
0x1.b0e07298db790p-1, 0x1.fd40p-45,
0x1.b20ce6c9a89a9p-1, 0x1.2700p-46,
0x1.b33a2b84f1a4bp-1, 0x1.d470p-43,
0x1.b468415b747e7p-1, -0x1.8380p-44,
0x1.b59728de5593ap-1, 0x1.8000p-54,
0x1.b6c6e29f1c56ap-1, 0x1.ad00p-47,
0x1.b7f76f2fb5e50p-1, 0x1.e800p-50,
0x1.b928cf22749b2p-1, -0x1.4c00p-47,
0x1.ba5b030a10603p-1, -0x1.d700p-47,
0x1.bb8e0b79a6f66p-1, 0x1.d900p-47,
0x1.bcc1e904bc1ffp-1, 0x1.2a00p-47,
0x1.bdf69c3f3a16fp-1, -0x1.f780p-46,
0x1.bf2c25bd71db8p-1, -0x1.0a00p-46,
0x1.c06286141b2e9p-1, -0x1.1400p-46,
0x1.c199bdd8552e0p-1, 0x1.be00p-47,
0x1.c2d1cd9fa64eep-1, -0x1.9400p-47,
0x1.c40ab5fffd02fp-1, -0x1.ed00p-47,
0x1.c544778fafd15p-1, 0x1.9660p-44,
0x1.c67f12e57d0cbp-1, -0x1.a100p-46,
0x1.c7ba88988c1b6p-1, -0x1.8458p-42,
0x1.c8f6d9406e733p-1, -0x1.a480p-46,
0x1.ca3405751c4dfp-1, 0x1.b000p-51,
0x1.cb720dcef9094p-1, 0x1.1400p-47,
0x1.ccb0f2e6d1689p-1, 0x1.0200p-48,
0x1.cdf0b555dc412p-1, 0x1.3600p-48,
0x1.cf3155b5bab3bp-1, -0x1.6900p-47,
0x1.d072d4a0789bcp-1, 0x1.9a00p-47,
0x1.d1b532b08c8fap-1, -0x1.5e00p-46,
0x1.d2f87080d8a85p-1, 0x1.d280p-46,
0x1.d43c8eacaa203p-1, 0x1.1a00p-47,
0x1.d5818dcfba491p-1, 0x1.f000p-50,
0x1.d6c76e862e6a1p-1, -0x1.3a00p-47,
0x1.d80e316c9834ep-1, -0x1.cd80p-47,
0x1.d955d71ff6090p-1, 0x1.4c00p-48,
0x1.da9e603db32aep-1, 0x1.f900p-48,
0x1.dbe7cd63a8325p-1, 0x1.9800p-49,
0x1.dd321f301b445p-1, -0x1.5200p-48,
0x1.de7d5641c05bfp-1, -0x1.d700p-46,
0x1.dfc97337b9aecp-1, -0x1.6140p-46,
0x1.e11676b197d5ep-1, 0x1.b480p-47,
0x1.e264614f5a3e7p-1, 0x1.0ce0p-43,
0x1.e3b333b16ee5cp-1, 0x1.c680p-47,
0x1.e502ee78b3fb4p-1, -0x1.9300p-47,
0x1.e653924676d68p-1, -0x1.5000p-49,
0x1.e7a51fbc74c44p-1, -0x1.7f80p-47,
0x1.e8f7977cdb726p-1, -0x1.3700p-48,
0x1.ea4afa2a490e8p-1, 0x1.5d00p-49,
0x1.eb9f4867ccae4p-1, 0x1.61a0p-46,
0x1.ecf482d8e680dp-1, 0x1.5500p-48,
0x1.ee4aaa2188514p-1, 0x1.6400p-51,
0x1.efa1bee615a13p-1, -0x1.e800p-49,
0x1.f0f9c1cb64106p-1, -0x1.a880p-48,
0x1.f252b376bb963p-1, -0x1.c900p-45,
0x1.f3ac948dd7275p-1, 0x1.a000p-53,
0x1.f50765b6e4524p-1, -0x1.4f00p-48,
0x1.f6632798844fdp-1, 0x1.a800p-51,
0x1.f7bfdad9cbe38p-1, 0x1.abc0p-48,
0x1.f91d802243c82p-1, -0x1.4600p-50,
0x1.fa7c1819e908ep-1, -0x1.b0c0p-47,
0x1.fbdba3692d511p-1, -0x1.0e00p-51,
0x1.fd3c22b8f7194p-1, -0x1.0de8p-46,
0x1.fe9d96b2a23eep-1, 0x1.e430p-49,
0x1.0000000000000p+0, 0x0.0000p+0,
0x1.00b1afa5abcbep+0, -0x1.3400p-52,
0x1.0163da9fb3303p+0, -0x1.2170p-46,
0x1.02168143b0282p+0, 0x1.a400p-52,
0x1.02c9a3e77806cp+0, 0x1.f980p-49,
0x1.037d42e11bbcap+0, -0x1.7400p-51,
0x1.04315e86e7f89p+0, 0x1.8300p-50,
0x1.04e5f72f65467p+0, -0x1.a3f0p-46,
0x1.059b0d315855ap+0, -0x1.2840p-47,
0x1.0650a0e3c1f95p+0, 0x1.1600p-48,
0x1.0706b29ddf71ap+0, 0x1.5240p-46,
0x1.07bd42b72a82dp+0, -0x1.9a00p-49,
0x1.0874518759bd0p+0, 0x1.6400p-49,
0x1.092bdf66607c8p+0, -0x1.0780p-47,
0x1.09e3ecac6f383p+0, -0x1.8000p-54,
0x1.0a9c79b1f3930p+0, 0x1.fa00p-48,
0x1.0b5586cf988fcp+0, -0x1.ac80p-48,
0x1.0c0f145e46c8ap+0, 0x1.9c00p-50,
0x1.0cc922b724816p+0, 0x1.5200p-47,
0x1.0d83b23395dd8p+0, -0x1.ad00p-48,
0x1.0e3ec32d3d1f3p+0, 0x1.bac0p-46,
0x1.0efa55fdfa9a6p+0, -0x1.4e80p-47,
0x1.0fb66affed2f0p+0, -0x1.d300p-47,
0x1.1073028d7234bp+0, 0x1.1500p-48,
0x1.11301d0125b5bp+0, 0x1.c000p-49,
0x1.11edbab5e2af9p+0, 0x1.6bc0p-46,
0x1.12abdc06c31d5p+0, 0x1.8400p-49,
0x1.136a814f2047dp+0, -0x1.ed00p-47,
0x1.1429aaea92de9p+0, 0x1.8e00p-49,
0x1.14e95934f3138p+0, 0x1.b400p-49,
0x1.15a98c8a58e71p+0, 0x1.5300p-47,
0x1.166a45471c3dfp+0, 0x1.3380p-47,
0x1.172b83c7d5211p+0, 0x1.8d40p-45,
0x1.17ed48695bb9fp+0, -0x1.5d00p-47,
0x1.18af9388c8d93p+0, -0x1.c880p-46,
0x1.1972658375d66p+0, 0x1.1f00p-46,
0x1.1a35beb6fcba7p+0, 0x1.0480p-46,
0x1.1af99f81387e3p+0, -0x1.7390p-43,
0x1.1bbe084045d54p+0, 0x1.4e40p-45,
0x1.1c82f95281c43p+0, -0x1.a200p-47,
0x1.1d4873168b9b2p+0, 0x1.3800p-49,
0x1.1e0e75eb44031p+0, 0x1.ac00p-49,
0x1.1ed5022fcd938p+0, 0x1.1900p-47,
0x1.1f9c18438cdf7p+0, -0x1.b780p-46,
0x1.2063b88628d8fp+0, 0x1.d940p-45,
0x1.212be3578a81ep+0, 0x1.8000p-50,
0x1.21f49917ddd41p+0, 0x1.b340p-45,
0x1.22bdda2791323p+0, 0x1.9f80p-46,
0x1.2387a6e7561e7p+0, -0x1.9c80p-46,
0x1.2451ffb821427p+0, 0x1.2300p-47,
0x1.251ce4fb2a602p+0, -0x1.3480p-46,
0x1.25e85711eceb0p+0, 0x1.2700p-46,
0x1.26b4565e27d16p+0, 0x1.1d00p-46,
0x1.2780e341de00fp+0, 0x1.1ee0p-44,
0x1.284dfe1f5633ep+0, -0x1.4c00p-46,
0x1.291ba7591bb30p+0, -0x1.3d80p-46,
0x1.29e9df51fdf09p+0, 0x1.8b00p-47,
0x1.2ab8a66d10e9bp+0, -0x1.27c0p-45,
0x1.2b87fd0dada3ap+0, 0x1.a340p-45,
0x1.2c57e39771af9p+0, -0x1.0800p-46,
0x1.2d285a6e402d9p+0, -0x1.ed00p-47,
0x1.2df961f641579p+0, -0x1.4200p-48,
0x1.2ecafa93e2ecfp+0, -0x1.4980p-45,
0x1.2f9d24abd8822p+0, -0x1.6300p-46,
0x1.306fe0a31b625p+0, -0x1.2360p-44,
0x1.31432edeea50bp+0, -0x1.0df8p-40,
0x1.32170fc4cd7b8p+0, -0x1.2480p-45,
0x1.32eb83ba8e9a2p+0, -0x1.5980p-45,
0x1.33c08b2641766p+0, 0x1.ed00p-46,
0x1.3496266e3fa27p+0, -0x1.c000p-50,
0x1.356c55f929f0fp+0, -0x1.0d80p-44,
0x1.36431a2de88b9p+0, 0x1.2c80p-45,
0x1.371a7373aaa39p+0, 0x1.0600p-45,
0x1.37f26231e74fep+0, -0x1.6600p-46,
0x1.38cae6d05d838p+0, -0x1.ae00p-47,
0x1.39a401b713ec3p+0, -0x1.4720p-43,
0x1.3a7db34e5a020p+0, 0x1.8200p-47,
0x1.3b57fbfec6e95p+0, 0x1.e800p-44,
0x1.3c32dc313a8f2p+0, 0x1.f800p-49,
0x1.3d0e544ede122p+0, -0x1.7a00p-46,
0x1.3dea64c1234bbp+0, 0x1.6300p-45,
0x1.3ec70df1c4eccp+0, -0x1.8a60p-43,
0x1.3fa4504ac7e8cp+0, -0x1.cdc0p-44,
0x1.40822c367a0bbp+0, 0x1.5b80p-45,
0x1.4160a21f72e95p+0, 0x1.ec00p-46,
0x1.423fb27094646p+0, -0x1.3600p-46,
0x1.431f5d950a920p+0, 0x1.3980p-45,
0x1.43ffa3f84b9ebp+0, 0x1.a000p-48,
0x1.44e0860618919p+0, -0x1.6c00p-48,
0x1.45c2042a7d201p+0, -0x1.bc00p-47,
0x1.46a41ed1d0016p+0, -0x1.2800p-46,
0x1.4786d668b3326p+0, 0x1.0e00p-44,
0x1.486a2b5c13c00p+0, -0x1.d400p-45,
0x1.494e1e192af04p+0, 0x1.c200p-47,
0x1.4a32af0d7d372p+0, -0x1.e500p-46,
0x1.4b17dea6db801p+0, 0x1.7800p-47,
0x1.4bfdad53629e1p+0, -0x1.3800p-46,
0x1.4ce41b817c132p+0, 0x1.0800p-47,
0x1.4dcb299fddddbp+0, 0x1.c700p-45,
0x1.4eb2d81d8ab96p+0, -0x1.ce00p-46,
0x1.4f9b2769d2d02p+0, 0x1.9200p-46,
0x1.508417f4531c1p+0, -0x1.8c00p-47,
0x1.516daa2cf662ap+0, -0x1.a000p-48,
0x1.5257de83f51eap+0, 0x1.a080p-43,
0x1.5342b569d4edap+0, -0x1.6d80p-45,
0x1.542e2f4f6ac1ap+0, -0x1.2440p-44,
0x1.551a4ca5d94dbp+0, 0x1.83c0p-43,
0x1.56070dde9116bp+0, 0x1.4b00p-45,
0x1.56f4736b529dep+0, 0x1.15a0p-43,
0x1.57e27dbe2c40ep+0, -0x1.9e00p-45,
0x1.58d12d497c76fp+0, -0x1.3080p-45,
0x1.59c0827ff0b4cp+0, 0x1.dec0p-43,
0x1.5ab07dd485427p+0, -0x1.4000p-51,
0x1.5ba11fba87af4p+0, 0x1.0080p-44,
0x1.5c9268a59460bp+0, -0x1.6c80p-45,
0x1.5d84590998e3fp+0, 0x1.69a0p-43,
0x1.5e76f15ad20e1p+0, -0x1.b400p-46,
0x1.5f6a320dcebcap+0, 0x1.7700p-46,
0x1.605e1b976dcb8p+0, 0x1.6f80p-45,
0x1.6152ae6cdf715p+0, 0x1.1000p-47,
0x1.6247eb03a5531p+0, -0x1.5d00p-46,
0x1.633dd1d1929b5p+0, -0x1.2d00p-46,
0x1.6434634ccc313p+0, -0x1.a800p-49,
0x1.652b9febc8efap+0, -0x1.8600p-45,
0x1.6623882553397p+0, 0x1.1fe0p-40,
0x1.671c1c708328ep+0, -0x1.7200p-44,
0x1.68155d44ca97ep+0, 0x1.6800p-49,
0x1.690f4b19e9471p+0, -0x1.9780p-45,
};
fn exp2_64(x: f64) f64 {
const tblsiz: u32 = @intCast(u32, exp2dt.len / 2);
const redux: f64 = 0x1.8p52 / @intToFloat(f64, tblsiz);
const P1: f64 = 0x1.62e42fefa39efp-1;
const P2: f64 = 0x1.ebfbdff82c575p-3;
const P3: f64 = 0x1.c6b08d704a0a6p-5;
const P4: f64 = 0x1.3b2ab88f70400p-7;
const P5: f64 = 0x1.5d88003875c74p-10;
const ux = @bitCast(u64, x);
const ix = @intCast(u32, ux >> 32) & 0x7FFFFFFF;
// TODO: This should be handled beneath.
if (math.isNan(x)) {
return math.nan(f64);
}
// |x| >= 1022 or nan
if (ix >= 0x408FF000) {
// x >= 1024 or nan
if (ix >= 0x40900000 and ux >> 63 == 0) {
math.raiseOverflow();
return math.inf(f64);
}
// -inf or -nan
if (ix >= 0x7FF00000) {
return -1 / x;
}
// x <= -1022
if (ux >> 63 != 0) {
// underflow
if (x <= -1075 or x - 0x1.0p52 + 0x1.0p52 != x) {
math.doNotOptimizeAway(@floatCast(f32, -0x1.0p-149 / x));
}
if (x <= -1075) {
return 0;
}
}
}
// |x| < 0x1p-54
else if (ix < 0x3C900000) {
return 1.0 + x;
}
// NOTE: musl relies on unsafe behaviours which are replicated below
// (addition overflow, division truncation, casting). Appears that this
// produces the intended result but should confirm how GCC/Clang handle this
// to ensure.
// reduce x
var uf: f64 = x + redux;
// NOTE: musl performs an implicit 64-bit to 32-bit u32 truncation here
var i_0: u32 = @truncate(u32, @bitCast(u64, uf));
i_0 +%= tblsiz / 2;
const k: u32 = i_0 / tblsiz * tblsiz;
const ik: i32 = @divTrunc(@bitCast(i32, k), tblsiz);
i_0 %= tblsiz;
uf -= redux;
// r = exp2(y) = exp2t[i_0] * p(z - eps[i])
var z: f64 = x - uf;
const t: f64 = exp2dt[@intCast(usize, 2 * i_0)];
z -= exp2dt[@intCast(usize, 2 * i_0 + 1)];
const r: f64 = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5))));
return math.scalbn(r, ik);
}
test "math.exp2" {
try expect(exp2(@as(f32, 0.8923)) == exp2_32(0.8923));
try expect(exp2(@as(f64, 0.8923)) == exp2_64(0.8923));
}
test "math.exp2_32" {
const epsilon = 0.000001;
try expect(exp2_32(0.0) == 1.0);
try expect(math.approxEqAbs(f32, exp2_32(0.2), 1.148698, epsilon));
try expect(math.approxEqAbs(f32, exp2_32(0.8923), 1.856133, epsilon));
try expect(math.approxEqAbs(f32, exp2_32(1.5), 2.828427, epsilon));
try expect(math.approxEqAbs(f32, exp2_32(37.45), 187747237888, epsilon));
try expect(math.approxEqAbs(f32, exp2_32(-1), 0.5, epsilon));
}
test "math.exp2_64" {
const epsilon = 0.000001;
try expect(exp2_64(0.0) == 1.0);
try expect(math.approxEqAbs(f64, exp2_64(0.2), 1.148698, epsilon));
try expect(math.approxEqAbs(f64, exp2_64(0.8923), 1.856133, epsilon));
try expect(math.approxEqAbs(f64, exp2_64(1.5), 2.828427, epsilon));
try expect(math.approxEqAbs(f64, exp2_64(-1), 0.5, epsilon));
try expect(math.approxEqAbs(f64, exp2_64(-0x1.a05cc754481d1p-2), 0x1.824056efc687cp-1, epsilon));
}
test "math.exp2_32.special" {
try expect(math.isPositiveInf(exp2_32(math.inf(f32))));
try expect(math.isNan(exp2_32(math.nan(f32))));
}
test "math.exp2_64.special" {
try expect(math.isPositiveInf(exp2_64(math.inf(f64))));
try expect(math.isNan(exp2_64(math.nan(f64))));
}

View File

@@ -22,7 +22,7 @@ fn expo2f(x: f32) f32 {
const u = (0x7F + k / 2) << 23;
const scale = @bitCast(f32, u);
return math.exp(x - kln2) * scale * scale;
return @exp(x - kln2) * scale * scale;
}
fn expo2d(x: f64) f64 {
@@ -31,5 +31,5 @@ fn expo2d(x: f64) f64 {
const u = (0x3FF + k / 2) << 20;
const scale = @bitCast(f64, @as(u64, u) << 32);
return math.exp(x - kln2) * scale * scale;
return @exp(x - kln2) * scale * scale;
}

View File

@@ -1,45 +0,0 @@
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
/// Returns the absolute value of x.
///
/// Special Cases:
/// - fabs(+-inf) = +inf
/// - fabs(nan) = nan
pub fn fabs(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
const TBits = std.meta.Int(.unsigned, @bitSizeOf(T));
if (@typeInfo(T) != .Float) {
@compileError("fabs not implemented for " ++ @typeName(T));
}
const float_bits = @bitCast(TBits, x);
const remove_sign = ~@as(TBits, 0) >> 1;
return @bitCast(T, float_bits & remove_sign);
}
test "math.fabs" {
// TODO add support for c_longdouble here
inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
// normals
try expect(fabs(@as(T, 1.0)) == 1.0);
try expect(fabs(@as(T, -1.0)) == 1.0);
try expect(fabs(math.floatMin(T)) == math.floatMin(T));
try expect(fabs(-math.floatMin(T)) == math.floatMin(T));
try expect(fabs(math.floatMax(T)) == math.floatMax(T));
try expect(fabs(-math.floatMax(T)) == math.floatMax(T));
// subnormals
try expect(fabs(@as(T, 0.0)) == 0.0);
try expect(fabs(@as(T, -0.0)) == 0.0);
try expect(fabs(math.floatTrueMin(T)) == math.floatTrueMin(T));
try expect(fabs(-math.floatTrueMin(T)) == math.floatTrueMin(T));
// non-finite numbers
try expect(math.isPositiveInf(fabs(math.inf(T))));
try expect(math.isPositiveInf(fabs(-math.inf(T))));
try expect(math.isNan(fabs(math.nan(T))));
}
}

View File

@@ -1,221 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/floorf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/floor.c
const expect = std.testing.expect;
const std = @import("../std.zig");
const math = std.math;
/// Returns the greatest integer value less than or equal to x.
///
/// Special Cases:
/// - floor(+-0) = +-0
/// - floor(+-inf) = +-inf
/// - floor(nan) = nan
pub fn floor(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
return switch (T) {
f16 => floor16(x),
f32 => floor32(x),
f64 => floor64(x),
f128 => floor128(x),
// TODO this is not correct for some targets
c_longdouble => @floatCast(c_longdouble, floor128(x)),
else => @compileError("floor not implemented for " ++ @typeName(T)),
};
}
fn floor16(x: f16) f16 {
var u = @bitCast(u16, x);
const e = @intCast(i16, (u >> 10) & 31) - 15;
var m: u16 = undefined;
// TODO: Shouldn't need this explicit check.
if (x == 0.0) {
return x;
}
if (e >= 10) {
return x;
}
if (e >= 0) {
m = @as(u16, 1023) >> @intCast(u4, e);
if (u & m == 0) {
return x;
}
math.doNotOptimizeAway(x + 0x1.0p120);
if (u >> 15 != 0) {
u += m;
}
return @bitCast(f16, u & ~m);
} else {
math.doNotOptimizeAway(x + 0x1.0p120);
if (u >> 15 == 0) {
return 0.0;
} else {
return -1.0;
}
}
}
fn floor32(x: f32) f32 {
var u = @bitCast(u32, x);
const e = @intCast(i32, (u >> 23) & 0xFF) - 0x7F;
var m: u32 = undefined;
// TODO: Shouldn't need this explicit check.
if (x == 0.0) {
return x;
}
if (e >= 23) {
return x;
}
if (e >= 0) {
m = @as(u32, 0x007FFFFF) >> @intCast(u5, e);
if (u & m == 0) {
return x;
}
math.doNotOptimizeAway(x + 0x1.0p120);
if (u >> 31 != 0) {
u += m;
}
return @bitCast(f32, u & ~m);
} else {
math.doNotOptimizeAway(x + 0x1.0p120);
if (u >> 31 == 0) {
return 0.0;
} else {
return -1.0;
}
}
}
fn floor64(x: f64) f64 {
const f64_toint = 1.0 / math.floatEps(f64);
const u = @bitCast(u64, x);
const e = (u >> 52) & 0x7FF;
var y: f64 = undefined;
if (e >= 0x3FF + 52 or x == 0) {
return x;
}
if (u >> 63 != 0) {
y = x - f64_toint + f64_toint - x;
} else {
y = x + f64_toint - f64_toint - x;
}
if (e <= 0x3FF - 1) {
math.doNotOptimizeAway(y);
if (u >> 63 != 0) {
return -1.0;
} else {
return 0.0;
}
} else if (y > 0) {
return x + y - 1;
} else {
return x + y;
}
}
fn floor128(x: f128) f128 {
const f128_toint = 1.0 / math.floatEps(f128);
const u = @bitCast(u128, x);
const e = (u >> 112) & 0x7FFF;
var y: f128 = undefined;
if (e >= 0x3FFF + 112 or x == 0) return x;
if (u >> 127 != 0) {
y = x - f128_toint + f128_toint - x;
} else {
y = x + f128_toint - f128_toint - x;
}
if (e <= 0x3FFF - 1) {
math.doNotOptimizeAway(y);
if (u >> 127 != 0) {
return -1.0;
} else {
return 0.0;
}
} else if (y > 0) {
return x + y - 1;
} else {
return x + y;
}
}
test "math.floor" {
try expect(floor(@as(f16, 1.3)) == floor16(1.3));
try expect(floor(@as(f32, 1.3)) == floor32(1.3));
try expect(floor(@as(f64, 1.3)) == floor64(1.3));
try expect(floor(@as(f128, 1.3)) == floor128(1.3));
}
test "math.floor16" {
try expect(floor16(1.3) == 1.0);
try expect(floor16(-1.3) == -2.0);
try expect(floor16(0.2) == 0.0);
}
test "math.floor32" {
try expect(floor32(1.3) == 1.0);
try expect(floor32(-1.3) == -2.0);
try expect(floor32(0.2) == 0.0);
}
test "math.floor64" {
try expect(floor64(1.3) == 1.0);
try expect(floor64(-1.3) == -2.0);
try expect(floor64(0.2) == 0.0);
}
test "math.floor128" {
try expect(floor128(1.3) == 1.0);
try expect(floor128(-1.3) == -2.0);
try expect(floor128(0.2) == 0.0);
}
test "math.floor16.special" {
try expect(floor16(0.0) == 0.0);
try expect(floor16(-0.0) == -0.0);
try expect(math.isPositiveInf(floor16(math.inf(f16))));
try expect(math.isNegativeInf(floor16(-math.inf(f16))));
try expect(math.isNan(floor16(math.nan(f16))));
}
test "math.floor32.special" {
try expect(floor32(0.0) == 0.0);
try expect(floor32(-0.0) == -0.0);
try expect(math.isPositiveInf(floor32(math.inf(f32))));
try expect(math.isNegativeInf(floor32(-math.inf(f32))));
try expect(math.isNan(floor32(math.nan(f32))));
}
test "math.floor64.special" {
try expect(floor64(0.0) == 0.0);
try expect(floor64(-0.0) == -0.0);
try expect(math.isPositiveInf(floor64(math.inf(f64))));
try expect(math.isNegativeInf(floor64(-math.inf(f64))));
try expect(math.isNan(floor64(math.nan(f64))));
}
test "math.floor128.special" {
try expect(floor128(0.0) == 0.0);
try expect(floor128(-0.0) == -0.0);
try expect(math.isPositiveInf(floor128(math.inf(f128))));
try expect(math.isNegativeInf(floor128(-math.inf(f128))));
try expect(math.isNan(floor128(math.nan(f128))));
}

View File

@@ -1,339 +0,0 @@
// Ported from musl, which is MIT licensed:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/fmal.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/fmaf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/fma.c
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
/// Returns x * y + z with a single rounding error.
pub fn fma(comptime T: type, x: T, y: T, z: T) T {
return switch (T) {
f32 => fma32(x, y, z),
f64 => fma64(x, y, z),
f128 => fma128(x, y, z),
// TODO this is not correct for some targets
c_longdouble => @floatCast(c_longdouble, fma128(x, y, z)),
f80 => @floatCast(f80, fma128(x, y, z)),
else => @compileError("fma not implemented for " ++ @typeName(T)),
};
}
fn fma32(x: f32, y: f32, z: f32) f32 {
const xy = @as(f64, x) * y;
const xy_z = xy + z;
const u = @bitCast(u64, xy_z);
const e = (u >> 52) & 0x7FF;
if ((u & 0x1FFFFFFF) != 0x10000000 or e == 0x7FF or (xy_z - xy == z and xy_z - z == xy)) {
return @floatCast(f32, xy_z);
} else {
// TODO: Handle inexact case with double-rounding
return @floatCast(f32, xy_z);
}
}
// NOTE: Upstream fma.c has been rewritten completely to raise fp exceptions more accurately.
fn fma64(x: f64, y: f64, z: f64) f64 {
if (!math.isFinite(x) or !math.isFinite(y)) {
return x * y + z;
}
if (!math.isFinite(z)) {
return z;
}
if (x == 0.0 or y == 0.0) {
return x * y + z;
}
if (z == 0.0) {
return x * y;
}
const x1 = math.frexp(x);
var ex = x1.exponent;
var xs = x1.significand;
const x2 = math.frexp(y);
var ey = x2.exponent;
var ys = x2.significand;
const x3 = math.frexp(z);
var ez = x3.exponent;
var zs = x3.significand;
var spread = ex + ey - ez;
if (spread <= 53 * 2) {
zs = math.scalbn(zs, -spread);
} else {
zs = math.copysign(f64, math.floatMin(f64), zs);
}
const xy = dd_mul(xs, ys);
const r = dd_add(xy.hi, zs);
spread = ex + ey;
if (r.hi == 0.0) {
return xy.hi + zs + math.scalbn(xy.lo, spread);
}
const adj = add_adjusted(r.lo, xy.lo);
if (spread + math.ilogb(r.hi) > -1023) {
return math.scalbn(r.hi + adj, spread);
} else {
return add_and_denorm(r.hi, adj, spread);
}
}
const dd = struct {
hi: f64,
lo: f64,
};
fn dd_add(a: f64, b: f64) dd {
var ret: dd = undefined;
ret.hi = a + b;
const s = ret.hi - a;
ret.lo = (a - (ret.hi - s)) + (b - s);
return ret;
}
fn dd_mul(a: f64, b: f64) dd {
var ret: dd = undefined;
const split: f64 = 0x1.0p27 + 1.0;
var p = a * split;
var ha = a - p;
ha += p;
var la = a - ha;
p = b * split;
var hb = b - p;
hb += p;
var lb = b - hb;
p = ha * hb;
var q = ha * lb + la * hb;
ret.hi = p + q;
ret.lo = p - ret.hi + q + la * lb;
return ret;
}
fn add_adjusted(a: f64, b: f64) f64 {
var sum = dd_add(a, b);
if (sum.lo != 0) {
var uhii = @bitCast(u64, sum.hi);
if (uhii & 1 == 0) {
// hibits += copysign(1.0, sum.hi, sum.lo)
const uloi = @bitCast(u64, sum.lo);
uhii += 1 - ((uhii ^ uloi) >> 62);
sum.hi = @bitCast(f64, uhii);
}
}
return sum.hi;
}
fn add_and_denorm(a: f64, b: f64, scale: i32) f64 {
var sum = dd_add(a, b);
if (sum.lo != 0) {
var uhii = @bitCast(u64, sum.hi);
const bits_lost = -@intCast(i32, (uhii >> 52) & 0x7FF) - scale + 1;
if ((bits_lost != 1) == (uhii & 1 != 0)) {
const uloi = @bitCast(u64, sum.lo);
uhii += 1 - (((uhii ^ uloi) >> 62) & 2);
sum.hi = @bitCast(f64, uhii);
}
}
return math.scalbn(sum.hi, scale);
}
/// A struct that represents a floating-point number with twice the precision
/// of f128. We maintain the invariant that "hi" stores the high-order
/// bits of the result.
const dd128 = struct {
hi: f128,
lo: f128,
};
/// Compute a+b exactly, returning the exact result in a struct dd. We assume
/// that both a and b are finite, but make no assumptions about their relative
/// magnitudes.
fn dd_add128(a: f128, b: f128) dd128 {
var ret: dd128 = undefined;
ret.hi = a + b;
const s = ret.hi - a;
ret.lo = (a - (ret.hi - s)) + (b - s);
return ret;
}
/// Compute a+b, with a small tweak: The least significant bit of the
/// result is adjusted into a sticky bit summarizing all the bits that
/// were lost to rounding. This adjustment negates the effects of double
/// rounding when the result is added to another number with a higher
/// exponent. For an explanation of round and sticky bits, see any reference
/// on FPU design, e.g.,
///
/// J. Coonen. An Implementation Guide to a Proposed Standard for
/// Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.
fn add_adjusted128(a: f128, b: f128) f128 {
var sum = dd_add128(a, b);
if (sum.lo != 0) {
var uhii = @bitCast(u128, sum.hi);
if (uhii & 1 == 0) {
// hibits += copysign(1.0, sum.hi, sum.lo)
const uloi = @bitCast(u128, sum.lo);
uhii += 1 - ((uhii ^ uloi) >> 126);
sum.hi = @bitCast(f128, uhii);
}
}
return sum.hi;
}
/// Compute ldexp(a+b, scale) with a single rounding error. It is assumed
/// that the result will be subnormal, and care is taken to ensure that
/// double rounding does not occur.
fn add_and_denorm128(a: f128, b: f128, scale: i32) f128 {
var sum = dd_add128(a, b);
// If we are losing at least two bits of accuracy to denormalization,
// then the first lost bit becomes a round bit, and we adjust the
// lowest bit of sum.hi to make it a sticky bit summarizing all the
// bits in sum.lo. With the sticky bit adjusted, the hardware will
// break any ties in the correct direction.
//
// If we are losing only one bit to denormalization, however, we must
// break the ties manually.
if (sum.lo != 0) {
var uhii = @bitCast(u128, sum.hi);
const bits_lost = -@intCast(i32, (uhii >> 112) & 0x7FFF) - scale + 1;
if ((bits_lost != 1) == (uhii & 1 != 0)) {
const uloi = @bitCast(u128, sum.lo);
uhii += 1 - (((uhii ^ uloi) >> 126) & 2);
sum.hi = @bitCast(f128, uhii);
}
}
return math.scalbn(sum.hi, scale);
}
/// Compute a*b exactly, returning the exact result in a struct dd. We assume
/// that both a and b are normalized, so no underflow or overflow will occur.
/// The current rounding mode must be round-to-nearest.
fn dd_mul128(a: f128, b: f128) dd128 {
var ret: dd128 = undefined;
const split: f128 = 0x1.0p57 + 1.0;
var p = a * split;
var ha = a - p;
ha += p;
var la = a - ha;
p = b * split;
var hb = b - p;
hb += p;
var lb = b - hb;
p = ha * hb;
var q = ha * lb + la * hb;
ret.hi = p + q;
ret.lo = p - ret.hi + q + la * lb;
return ret;
}
/// Fused multiply-add: Compute x * y + z with a single rounding error.
///
/// We use scaling to avoid overflow/underflow, along with the
/// canonical precision-doubling technique adapted from:
///
/// Dekker, T. A Floating-Point Technique for Extending the
/// Available Precision. Numer. Math. 18, 224-242 (1971).
fn fma128(x: f128, y: f128, z: f128) f128 {
if (!math.isFinite(x) or !math.isFinite(y)) {
return x * y + z;
}
if (!math.isFinite(z)) {
return z;
}
if (x == 0.0 or y == 0.0) {
return x * y + z;
}
if (z == 0.0) {
return x * y;
}
const x1 = math.frexp(x);
var ex = x1.exponent;
var xs = x1.significand;
const x2 = math.frexp(y);
var ey = x2.exponent;
var ys = x2.significand;
const x3 = math.frexp(z);
var ez = x3.exponent;
var zs = x3.significand;
var spread = ex + ey - ez;
if (spread <= 113 * 2) {
zs = math.scalbn(zs, -spread);
} else {
zs = math.copysign(f128, math.floatMin(f128), zs);
}
const xy = dd_mul128(xs, ys);
const r = dd_add128(xy.hi, zs);
spread = ex + ey;
if (r.hi == 0.0) {
return xy.hi + zs + math.scalbn(xy.lo, spread);
}
const adj = add_adjusted128(r.lo, xy.lo);
if (spread + math.ilogb(r.hi) > -16383) {
return math.scalbn(r.hi + adj, spread);
} else {
return add_and_denorm128(r.hi, adj, spread);
}
}
test "type dispatch" {
try expect(fma(f32, 0.0, 1.0, 1.0) == fma32(0.0, 1.0, 1.0));
try expect(fma(f64, 0.0, 1.0, 1.0) == fma64(0.0, 1.0, 1.0));
try expect(fma(f128, 0.0, 1.0, 1.0) == fma128(0.0, 1.0, 1.0));
}
test "32" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f32, fma32(0.0, 5.0, 9.124), 9.124, epsilon));
try expect(math.approxEqAbs(f32, fma32(0.2, 5.0, 9.124), 10.124, epsilon));
try expect(math.approxEqAbs(f32, fma32(0.8923, 5.0, 9.124), 13.5855, epsilon));
try expect(math.approxEqAbs(f32, fma32(1.5, 5.0, 9.124), 16.624, epsilon));
try expect(math.approxEqAbs(f32, fma32(37.45, 5.0, 9.124), 196.374004, epsilon));
try expect(math.approxEqAbs(f32, fma32(89.123, 5.0, 9.124), 454.739005, epsilon));
try expect(math.approxEqAbs(f32, fma32(123123.234375, 5.0, 9.124), 615625.295875, epsilon));
}
test "64" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f64, fma64(0.0, 5.0, 9.124), 9.124, epsilon));
try expect(math.approxEqAbs(f64, fma64(0.2, 5.0, 9.124), 10.124, epsilon));
try expect(math.approxEqAbs(f64, fma64(0.8923, 5.0, 9.124), 13.5855, epsilon));
try expect(math.approxEqAbs(f64, fma64(1.5, 5.0, 9.124), 16.624, epsilon));
try expect(math.approxEqAbs(f64, fma64(37.45, 5.0, 9.124), 196.374, epsilon));
try expect(math.approxEqAbs(f64, fma64(89.123, 5.0, 9.124), 454.739, epsilon));
try expect(math.approxEqAbs(f64, fma64(123123.234375, 5.0, 9.124), 615625.295875, epsilon));
}
test "128" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f128, fma128(0.0, 5.0, 9.124), 9.124, epsilon));
try expect(math.approxEqAbs(f128, fma128(0.2, 5.0, 9.124), 10.124, epsilon));
try expect(math.approxEqAbs(f128, fma128(0.8923, 5.0, 9.124), 13.5855, epsilon));
try expect(math.approxEqAbs(f128, fma128(1.5, 5.0, 9.124), 16.624, epsilon));
try expect(math.approxEqAbs(f128, fma128(37.45, 5.0, 9.124), 196.374, epsilon));
try expect(math.approxEqAbs(f128, fma128(89.123, 5.0, 9.124), 454.739, epsilon));
try expect(math.approxEqAbs(f128, fma128(123123.234375, 5.0, 9.124), 615625.295875, epsilon));
}

View File

@@ -56,7 +56,7 @@ fn hypot32(x: f32, y: f32) f32 {
yy *= 0x1.0p-90;
}
return z * math.sqrt(@floatCast(f32, @as(f64, x) * x + @as(f64, y) * y));
return z * @sqrt(@floatCast(f32, @as(f64, x) * x + @as(f64, y) * y));
}
fn sq(hi: *f64, lo: *f64, x: f64) void {
@@ -117,7 +117,7 @@ fn hypot64(x: f64, y: f64) f64 {
sq(&hx, &lx, x);
sq(&hy, &ly, y);
return z * math.sqrt(ly + lx + hy + hx);
return z * @sqrt(ly + lx + hy + hx);
}
test "math.hypot" {

View File

@@ -1,12 +1,6 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/lnf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/ln.c
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
const testing = std.testing;
/// Returns the natural logarithm of x.
///
@@ -15,175 +9,26 @@ const expect = std.testing.expect;
/// - ln(0) = -inf
/// - ln(x) = nan if x < 0
/// - ln(nan) = nan
/// TODO remove this in favor of `@log`.
pub fn ln(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
switch (@typeInfo(T)) {
.ComptimeFloat => {
return @as(comptime_float, ln_64(x));
},
.Float => {
return switch (T) {
f32 => ln_32(x),
f64 => ln_64(x),
else => @compileError("ln not implemented for " ++ @typeName(T)),
};
return @as(comptime_float, @log(x));
},
.Float => return @log(x),
.ComptimeInt => {
return @as(comptime_int, math.floor(ln_64(@as(f64, x))));
return @as(comptime_int, @floor(@log(@as(f64, x))));
},
.Int => |IntType| switch (IntType.signedness) {
.signed => @compileError("ln not implemented for signed integers"),
.unsigned => return @as(T, math.floor(ln_64(@as(f64, x)))),
.unsigned => return @as(T, @floor(@log(@as(f64, x)))),
},
else => @compileError("ln not implemented for " ++ @typeName(T)),
}
}
pub fn ln_32(x_: f32) f32 {
const ln2_hi: f32 = 6.9313812256e-01;
const ln2_lo: f32 = 9.0580006145e-06;
const Lg1: f32 = 0xaaaaaa.0p-24;
const Lg2: f32 = 0xccce13.0p-25;
const Lg3: f32 = 0x91e9ee.0p-25;
const Lg4: f32 = 0xf89e26.0p-26;
var x = x_;
var ix = @bitCast(u32, x);
var k: i32 = 0;
// x < 2^(-126)
if (ix < 0x00800000 or ix >> 31 != 0) {
// log(+-0) = -inf
if (ix << 1 == 0) {
return -math.inf(f32);
}
// log(-#) = nan
if (ix >> 31 != 0) {
return math.nan(f32);
}
// subnormal, scale x
k -= 25;
x *= 0x1.0p25;
ix = @bitCast(u32, x);
} else if (ix >= 0x7F800000) {
return x;
} else if (ix == 0x3F800000) {
return 0;
}
// x into [sqrt(2) / 2, sqrt(2)]
ix += 0x3F800000 - 0x3F3504F3;
k += @intCast(i32, ix >> 23) - 0x7F;
ix = (ix & 0x007FFFFF) + 0x3F3504F3;
x = @bitCast(f32, ix);
const f = x - 1.0;
const s = f / (2.0 + f);
const z = s * s;
const w = z * z;
const t1 = w * (Lg2 + w * Lg4);
const t2 = z * (Lg1 + w * Lg3);
const R = t2 + t1;
const hfsq = 0.5 * f * f;
const dk = @intToFloat(f32, k);
return s * (hfsq + R) + dk * ln2_lo - hfsq + f + dk * ln2_hi;
}
pub fn ln_64(x_: f64) f64 {
const ln2_hi: f64 = 6.93147180369123816490e-01;
const ln2_lo: f64 = 1.90821492927058770002e-10;
const Lg1: f64 = 6.666666666666735130e-01;
const Lg2: f64 = 3.999999999940941908e-01;
const Lg3: f64 = 2.857142874366239149e-01;
const Lg4: f64 = 2.222219843214978396e-01;
const Lg5: f64 = 1.818357216161805012e-01;
const Lg6: f64 = 1.531383769920937332e-01;
const Lg7: f64 = 1.479819860511658591e-01;
var x = x_;
var ix = @bitCast(u64, x);
var hx = @intCast(u32, ix >> 32);
var k: i32 = 0;
if (hx < 0x00100000 or hx >> 31 != 0) {
// log(+-0) = -inf
if (ix << 1 == 0) {
return -math.inf(f64);
}
// log(-#) = nan
if (hx >> 31 != 0) {
return math.nan(f64);
}
// subnormal, scale x
k -= 54;
x *= 0x1.0p54;
hx = @intCast(u32, @bitCast(u64, ix) >> 32);
} else if (hx >= 0x7FF00000) {
return x;
} else if (hx == 0x3FF00000 and ix << 32 == 0) {
return 0;
}
// x into [sqrt(2) / 2, sqrt(2)]
hx += 0x3FF00000 - 0x3FE6A09E;
k += @intCast(i32, hx >> 20) - 0x3FF;
hx = (hx & 0x000FFFFF) + 0x3FE6A09E;
ix = (@as(u64, hx) << 32) | (ix & 0xFFFFFFFF);
x = @bitCast(f64, ix);
const f = x - 1.0;
const hfsq = 0.5 * f * f;
const s = f / (2.0 + f);
const z = s * s;
const w = z * z;
const t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
const t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
const R = t2 + t1;
const dk = @intToFloat(f64, k);
return s * (hfsq + R) + dk * ln2_lo - hfsq + f + dk * ln2_hi;
}
test "math.ln" {
try expect(ln(@as(f32, 0.2)) == ln_32(0.2));
try expect(ln(@as(f64, 0.2)) == ln_64(0.2));
}
test "math.ln32" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f32, ln_32(0.2), -1.609438, epsilon));
try expect(math.approxEqAbs(f32, ln_32(0.8923), -0.113953, epsilon));
try expect(math.approxEqAbs(f32, ln_32(1.5), 0.405465, epsilon));
try expect(math.approxEqAbs(f32, ln_32(37.45), 3.623007, epsilon));
try expect(math.approxEqAbs(f32, ln_32(89.123), 4.490017, epsilon));
try expect(math.approxEqAbs(f32, ln_32(123123.234375), 11.720941, epsilon));
}
test "math.ln64" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f64, ln_64(0.2), -1.609438, epsilon));
try expect(math.approxEqAbs(f64, ln_64(0.8923), -0.113953, epsilon));
try expect(math.approxEqAbs(f64, ln_64(1.5), 0.405465, epsilon));
try expect(math.approxEqAbs(f64, ln_64(37.45), 3.623007, epsilon));
try expect(math.approxEqAbs(f64, ln_64(89.123), 4.490017, epsilon));
try expect(math.approxEqAbs(f64, ln_64(123123.234375), 11.720941, epsilon));
}
test "math.ln32.special" {
try expect(math.isPositiveInf(ln_32(math.inf(f32))));
try expect(math.isNegativeInf(ln_32(0.0)));
try expect(math.isNan(ln_32(-1.0)));
try expect(math.isNan(ln_32(math.nan(f32))));
}
test "math.ln64.special" {
try expect(math.isPositiveInf(ln_64(math.inf(f64))));
try expect(math.isNegativeInf(ln_64(0.0)));
try expect(math.isNan(ln_64(-1.0)));
try expect(math.isNan(ln_64(math.nan(f64))));
try testing.expect(ln(@as(f32, 0.2)) == @log(0.2));
try testing.expect(ln(@as(f64, 0.2)) == @log(0.2));
}

View File

@@ -15,28 +15,28 @@ pub fn log(comptime T: type, base: T, x: T) T {
} else if (base == 10) {
return math.log10(x);
} else if ((@typeInfo(T) == .Float or @typeInfo(T) == .ComptimeFloat) and base == math.e) {
return math.ln(x);
return @log(x);
}
const float_base = math.lossyCast(f64, base);
switch (@typeInfo(T)) {
.ComptimeFloat => {
return @as(comptime_float, math.ln(@as(f64, x)) / math.ln(float_base));
return @as(comptime_float, @log(@as(f64, x)) / @log(float_base));
},
.ComptimeInt => {
return @as(comptime_int, math.floor(math.ln(@as(f64, x)) / math.ln(float_base)));
return @as(comptime_int, @floor(@log(@as(f64, x)) / @log(float_base)));
},
// TODO implement integer log without using float math
.Int => |IntType| switch (IntType.signedness) {
.signed => @compileError("log not implemented for signed integers"),
.unsigned => return @floatToInt(T, math.floor(math.ln(@intToFloat(f64, x)) / math.ln(float_base))),
.unsigned => return @floatToInt(T, @floor(@log(@intToFloat(f64, x)) / @log(float_base))),
},
.Float => {
switch (T) {
f32 => return @floatCast(f32, math.ln(@as(f64, x)) / math.ln(float_base)),
f64 => return math.ln(x) / math.ln(float_base),
f32 => return @floatCast(f32, @log(@as(f64, x)) / @log(float_base)),
f64 => return @log(x) / @log(float_base),
else => @compileError("log not implemented for " ++ @typeName(T)),
}
},

View File

@@ -1,9 +1,3 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/log10f.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/log10.c
const std = @import("../std.zig");
const math = std.math;
const testing = std.testing;
@@ -20,198 +14,16 @@ pub fn log10(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
switch (@typeInfo(T)) {
.ComptimeFloat => {
return @as(comptime_float, log10_64(x));
},
.Float => {
return switch (T) {
f32 => log10_32(x),
f64 => log10_64(x),
else => @compileError("log10 not implemented for " ++ @typeName(T)),
};
return @as(comptime_float, @log10(x));
},
.Float => return @log10(x),
.ComptimeInt => {
return @as(comptime_int, math.floor(log10_64(@as(f64, x))));
return @as(comptime_int, @floor(@log10(@as(f64, x))));
},
.Int => |IntType| switch (IntType.signedness) {
.signed => @compileError("log10 not implemented for signed integers"),
.unsigned => return @floatToInt(T, math.floor(log10_64(@intToFloat(f64, x)))),
.unsigned => return @floatToInt(T, @floor(@log10(@intToFloat(f64, x)))),
},
else => @compileError("log10 not implemented for " ++ @typeName(T)),
}
}
pub fn log10_32(x_: f32) f32 {
const ivln10hi: f32 = 4.3432617188e-01;
const ivln10lo: f32 = -3.1689971365e-05;
const log10_2hi: f32 = 3.0102920532e-01;
const log10_2lo: f32 = 7.9034151668e-07;
const Lg1: f32 = 0xaaaaaa.0p-24;
const Lg2: f32 = 0xccce13.0p-25;
const Lg3: f32 = 0x91e9ee.0p-25;
const Lg4: f32 = 0xf89e26.0p-26;
var x = x_;
var u = @bitCast(u32, x);
var ix = u;
var k: i32 = 0;
// x < 2^(-126)
if (ix < 0x00800000 or ix >> 31 != 0) {
// log(+-0) = -inf
if (ix << 1 == 0) {
return -math.inf(f32);
}
// log(-#) = nan
if (ix >> 31 != 0) {
return math.nan(f32);
}
k -= 25;
x *= 0x1.0p25;
ix = @bitCast(u32, x);
} else if (ix >= 0x7F800000) {
return x;
} else if (ix == 0x3F800000) {
return 0;
}
// x into [sqrt(2) / 2, sqrt(2)]
ix += 0x3F800000 - 0x3F3504F3;
k += @intCast(i32, ix >> 23) - 0x7F;
ix = (ix & 0x007FFFFF) + 0x3F3504F3;
x = @bitCast(f32, ix);
const f = x - 1.0;
const s = f / (2.0 + f);
const z = s * s;
const w = z * z;
const t1 = w * (Lg2 + w * Lg4);
const t2 = z * (Lg1 + w * Lg3);
const R = t2 + t1;
const hfsq = 0.5 * f * f;
var hi = f - hfsq;
u = @bitCast(u32, hi);
u &= 0xFFFFF000;
hi = @bitCast(f32, u);
const lo = f - hi - hfsq + s * (hfsq + R);
const dk = @intToFloat(f32, k);
return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi;
}
pub fn log10_64(x_: f64) f64 {
const ivln10hi: f64 = 4.34294481878168880939e-01;
const ivln10lo: f64 = 2.50829467116452752298e-11;
const log10_2hi: f64 = 3.01029995663611771306e-01;
const log10_2lo: f64 = 3.69423907715893078616e-13;
const Lg1: f64 = 6.666666666666735130e-01;
const Lg2: f64 = 3.999999999940941908e-01;
const Lg3: f64 = 2.857142874366239149e-01;
const Lg4: f64 = 2.222219843214978396e-01;
const Lg5: f64 = 1.818357216161805012e-01;
const Lg6: f64 = 1.531383769920937332e-01;
const Lg7: f64 = 1.479819860511658591e-01;
var x = x_;
var ix = @bitCast(u64, x);
var hx = @intCast(u32, ix >> 32);
var k: i32 = 0;
if (hx < 0x00100000 or hx >> 31 != 0) {
// log(+-0) = -inf
if (ix << 1 == 0) {
return -math.inf(f32);
}
// log(-#) = nan
if (hx >> 31 != 0) {
return math.nan(f32);
}
// subnormal, scale x
k -= 54;
x *= 0x1.0p54;
hx = @intCast(u32, @bitCast(u64, x) >> 32);
} else if (hx >= 0x7FF00000) {
return x;
} else if (hx == 0x3FF00000 and ix << 32 == 0) {
return 0;
}
// x into [sqrt(2) / 2, sqrt(2)]
hx += 0x3FF00000 - 0x3FE6A09E;
k += @intCast(i32, hx >> 20) - 0x3FF;
hx = (hx & 0x000FFFFF) + 0x3FE6A09E;
ix = (@as(u64, hx) << 32) | (ix & 0xFFFFFFFF);
x = @bitCast(f64, ix);
const f = x - 1.0;
const hfsq = 0.5 * f * f;
const s = f / (2.0 + f);
const z = s * s;
const w = z * z;
const t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
const t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
const R = t2 + t1;
// hi + lo = f - hfsq + s * (hfsq + R) ~ log(1 + f)
var hi = f - hfsq;
var hii = @bitCast(u64, hi);
hii &= @as(u64, maxInt(u64)) << 32;
hi = @bitCast(f64, hii);
const lo = f - hi - hfsq + s * (hfsq + R);
// val_hi + val_lo ~ log10(1 + f) + k * log10(2)
var val_hi = hi * ivln10hi;
const dk = @intToFloat(f64, k);
const y = dk * log10_2hi;
var val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
// Extra precision multiplication
const ww = y + val_hi;
val_lo += (y - ww) + val_hi;
val_hi = ww;
return val_lo + val_hi;
}
test "math.log10" {
try testing.expect(log10(@as(f32, 0.2)) == log10_32(0.2));
try testing.expect(log10(@as(f64, 0.2)) == log10_64(0.2));
}
test "math.log10_32" {
const epsilon = 0.000001;
try testing.expect(math.approxEqAbs(f32, log10_32(0.2), -0.698970, epsilon));
try testing.expect(math.approxEqAbs(f32, log10_32(0.8923), -0.049489, epsilon));
try testing.expect(math.approxEqAbs(f32, log10_32(1.5), 0.176091, epsilon));
try testing.expect(math.approxEqAbs(f32, log10_32(37.45), 1.573452, epsilon));
try testing.expect(math.approxEqAbs(f32, log10_32(89.123), 1.94999, epsilon));
try testing.expect(math.approxEqAbs(f32, log10_32(123123.234375), 5.09034, epsilon));
}
test "math.log10_64" {
const epsilon = 0.000001;
try testing.expect(math.approxEqAbs(f64, log10_64(0.2), -0.698970, epsilon));
try testing.expect(math.approxEqAbs(f64, log10_64(0.8923), -0.049489, epsilon));
try testing.expect(math.approxEqAbs(f64, log10_64(1.5), 0.176091, epsilon));
try testing.expect(math.approxEqAbs(f64, log10_64(37.45), 1.573452, epsilon));
try testing.expect(math.approxEqAbs(f64, log10_64(89.123), 1.94999, epsilon));
try testing.expect(math.approxEqAbs(f64, log10_64(123123.234375), 5.09034, epsilon));
}
test "math.log10_32.special" {
try testing.expect(math.isPositiveInf(log10_32(math.inf(f32))));
try testing.expect(math.isNegativeInf(log10_32(0.0)));
try testing.expect(math.isNan(log10_32(-1.0)));
try testing.expect(math.isNan(log10_32(math.nan(f32))));
}
test "math.log10_64.special" {
try testing.expect(math.isPositiveInf(log10_64(math.inf(f64))));
try testing.expect(math.isNegativeInf(log10_64(0.0)));
try testing.expect(math.isNan(log10_64(-1.0)));
try testing.expect(math.isNan(log10_64(math.nan(f64))));
}

View File

@@ -1,13 +1,6 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/log2f.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/log2.c
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
const maxInt = std.math.maxInt;
/// Returns the base-2 logarithm of x.
///
@@ -20,15 +13,9 @@ pub fn log2(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
switch (@typeInfo(T)) {
.ComptimeFloat => {
return @as(comptime_float, log2_64(x));
},
.Float => {
return switch (T) {
f32 => log2_32(x),
f64 => log2_64(x),
else => @compileError("log2 not implemented for " ++ @typeName(T)),
};
return @as(comptime_float, @log2(x));
},
.Float => return @log2(x),
.ComptimeInt => comptime {
var result = 0;
var x_shifted = x;
@@ -46,168 +33,7 @@ pub fn log2(x: anytype) @TypeOf(x) {
}
}
pub fn log2_32(x_: f32) f32 {
const ivln2hi: f32 = 1.4428710938e+00;
const ivln2lo: f32 = -1.7605285393e-04;
const Lg1: f32 = 0xaaaaaa.0p-24;
const Lg2: f32 = 0xccce13.0p-25;
const Lg3: f32 = 0x91e9ee.0p-25;
const Lg4: f32 = 0xf89e26.0p-26;
var x = x_;
var u = @bitCast(u32, x);
var ix = u;
var k: i32 = 0;
// x < 2^(-126)
if (ix < 0x00800000 or ix >> 31 != 0) {
// log(+-0) = -inf
if (ix << 1 == 0) {
return -math.inf(f32);
}
// log(-#) = nan
if (ix >> 31 != 0) {
return math.nan(f32);
}
k -= 25;
x *= 0x1.0p25;
ix = @bitCast(u32, x);
} else if (ix >= 0x7F800000) {
return x;
} else if (ix == 0x3F800000) {
return 0;
}
// x into [sqrt(2) / 2, sqrt(2)]
ix += 0x3F800000 - 0x3F3504F3;
k += @intCast(i32, ix >> 23) - 0x7F;
ix = (ix & 0x007FFFFF) + 0x3F3504F3;
x = @bitCast(f32, ix);
const f = x - 1.0;
const s = f / (2.0 + f);
const z = s * s;
const w = z * z;
const t1 = w * (Lg2 + w * Lg4);
const t2 = z * (Lg1 + w * Lg3);
const R = t2 + t1;
const hfsq = 0.5 * f * f;
var hi = f - hfsq;
u = @bitCast(u32, hi);
u &= 0xFFFFF000;
hi = @bitCast(f32, u);
const lo = f - hi - hfsq + s * (hfsq + R);
return (lo + hi) * ivln2lo + lo * ivln2hi + hi * ivln2hi + @intToFloat(f32, k);
}
pub fn log2_64(x_: f64) f64 {
const ivln2hi: f64 = 1.44269504072144627571e+00;
const ivln2lo: f64 = 1.67517131648865118353e-10;
const Lg1: f64 = 6.666666666666735130e-01;
const Lg2: f64 = 3.999999999940941908e-01;
const Lg3: f64 = 2.857142874366239149e-01;
const Lg4: f64 = 2.222219843214978396e-01;
const Lg5: f64 = 1.818357216161805012e-01;
const Lg6: f64 = 1.531383769920937332e-01;
const Lg7: f64 = 1.479819860511658591e-01;
var x = x_;
var ix = @bitCast(u64, x);
var hx = @intCast(u32, ix >> 32);
var k: i32 = 0;
if (hx < 0x00100000 or hx >> 31 != 0) {
// log(+-0) = -inf
if (ix << 1 == 0) {
return -math.inf(f64);
}
// log(-#) = nan
if (hx >> 31 != 0) {
return math.nan(f64);
}
// subnormal, scale x
k -= 54;
x *= 0x1.0p54;
hx = @intCast(u32, @bitCast(u64, x) >> 32);
} else if (hx >= 0x7FF00000) {
return x;
} else if (hx == 0x3FF00000 and ix << 32 == 0) {
return 0;
}
// x into [sqrt(2) / 2, sqrt(2)]
hx += 0x3FF00000 - 0x3FE6A09E;
k += @intCast(i32, hx >> 20) - 0x3FF;
hx = (hx & 0x000FFFFF) + 0x3FE6A09E;
ix = (@as(u64, hx) << 32) | (ix & 0xFFFFFFFF);
x = @bitCast(f64, ix);
const f = x - 1.0;
const hfsq = 0.5 * f * f;
const s = f / (2.0 + f);
const z = s * s;
const w = z * z;
const t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
const t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
const R = t2 + t1;
// hi + lo = f - hfsq + s * (hfsq + R) ~ log(1 + f)
var hi = f - hfsq;
var hii = @bitCast(u64, hi);
hii &= @as(u64, maxInt(u64)) << 32;
hi = @bitCast(f64, hii);
const lo = f - hi - hfsq + s * (hfsq + R);
var val_hi = hi * ivln2hi;
var val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
// spadd(val_hi, val_lo, y)
const y = @intToFloat(f64, k);
const ww = y + val_hi;
val_lo += (y - ww) + val_hi;
val_hi = ww;
return val_lo + val_hi;
}
test "math.log2" {
try expect(log2(@as(f32, 0.2)) == log2_32(0.2));
try expect(log2(@as(f64, 0.2)) == log2_64(0.2));
}
test "math.log2_32" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f32, log2_32(0.2), -2.321928, epsilon));
try expect(math.approxEqAbs(f32, log2_32(0.8923), -0.164399, epsilon));
try expect(math.approxEqAbs(f32, log2_32(1.5), 0.584962, epsilon));
try expect(math.approxEqAbs(f32, log2_32(37.45), 5.226894, epsilon));
try expect(math.approxEqAbs(f32, log2_32(123123.234375), 16.909744, epsilon));
}
test "math.log2_64" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f64, log2_64(0.2), -2.321928, epsilon));
try expect(math.approxEqAbs(f64, log2_64(0.8923), -0.164399, epsilon));
try expect(math.approxEqAbs(f64, log2_64(1.5), 0.584962, epsilon));
try expect(math.approxEqAbs(f64, log2_64(37.45), 5.226894, epsilon));
try expect(math.approxEqAbs(f64, log2_64(123123.234375), 16.909744, epsilon));
}
test "math.log2_32.special" {
try expect(math.isPositiveInf(log2_32(math.inf(f32))));
try expect(math.isNegativeInf(log2_32(0.0)));
try expect(math.isNan(log2_32(-1.0)));
try expect(math.isNan(log2_32(math.nan(f32))));
}
test "math.log2_64.special" {
try expect(math.isPositiveInf(log2_64(math.inf(f64))));
try expect(math.isNegativeInf(log2_64(0.0)));
try expect(math.isNan(log2_64(-1.0)));
try expect(math.isNan(log2_64(math.nan(f64))));
test "log2" {
try expect(log2(@as(f32, 0.2)) == @log2(0.2));
try expect(log2(@as(f64, 0.2)) == @log2(0.2));
}

View File

@@ -2,13 +2,13 @@ const math = @import("../math.zig");
/// Returns the nan representation for type T.
pub fn nan(comptime T: type) T {
return switch (T) {
f16 => math.nan_f16,
f32 => math.nan_f32,
f64 => math.nan_f64,
f80 => math.nan_f80,
f128 => math.nan_f128,
else => @compileError("nan not implemented for " ++ @typeName(T)),
return switch (@typeInfo(T).Float.bits) {
16 => math.nan_f16,
32 => math.nan_f32,
64 => math.nan_f64,
80 => math.nan_f80,
128 => math.nan_f128,
else => @compileError("unreachable"),
};
}
@@ -16,12 +16,12 @@ pub fn nan(comptime T: type) T {
pub fn snan(comptime T: type) T {
// Note: A signalling nan is identical to a standard right now by may have a different bit
// representation in the future when required.
return switch (T) {
f16 => @bitCast(f16, math.nan_u16),
f32 => @bitCast(f32, math.nan_u32),
f64 => @bitCast(f64, math.nan_u64),
f80 => @bitCast(f80, math.nan_u80),
f128 => @bitCast(f128, math.nan_u128),
else => @compileError("snan not implemented for " ++ @typeName(T)),
return switch (@typeInfo(T).Float.bits) {
16 => math.nan_u16,
32 => math.nan_u32,
64 => math.nan_u64,
80 => math.nan_u80,
128 => math.nan_u128,
else => @compileError("unreachable"),
};
}

View File

@@ -82,7 +82,7 @@ pub fn pow(comptime T: type, x: T, y: T) T {
}
// pow(x, +inf) = +0 for |x| < 1
// pow(x, -inf) = +0 for |x| > 1
else if ((math.fabs(x) < 1) == math.isPositiveInf(y)) {
else if ((@fabs(x) < 1) == math.isPositiveInf(y)) {
return 0;
}
// pow(x, -inf) = +inf for |x| < 1
@@ -108,14 +108,14 @@ pub fn pow(comptime T: type, x: T, y: T) T {
// special case sqrt
if (y == 0.5) {
return math.sqrt(x);
return @sqrt(x);
}
if (y == -0.5) {
return 1 / math.sqrt(x);
return 1 / @sqrt(x);
}
const r1 = math.modf(math.fabs(y));
const r1 = math.modf(@fabs(y));
var yi = r1.ipart;
var yf = r1.fpart;
@@ -123,7 +123,7 @@ pub fn pow(comptime T: type, x: T, y: T) T {
return math.nan(T);
}
if (yi >= 1 << (@typeInfo(T).Float.bits - 1)) {
return math.exp(y * math.ln(x));
return @exp(y * @log(x));
}
// a = a1 * 2^ae
@@ -136,7 +136,7 @@ pub fn pow(comptime T: type, x: T, y: T) T {
yf -= 1;
yi += 1;
}
a1 = math.exp(yf * math.ln(x));
a1 = @exp(yf * @log(x));
}
// a *= x^yi

View File

@@ -1,185 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/roundf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/round.c
const expect = std.testing.expect;
const std = @import("../std.zig");
const math = std.math;
/// Returns x rounded to the nearest integer, rounding half away from zero.
///
/// Special Cases:
/// - round(+-0) = +-0
/// - round(+-inf) = +-inf
/// - round(nan) = nan
pub fn round(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
return switch (T) {
f32 => round32(x),
f64 => round64(x),
f128 => round128(x),
// TODO this is not correct for some targets
c_longdouble => @floatCast(c_longdouble, round128(x)),
else => @compileError("round not implemented for " ++ @typeName(T)),
};
}
fn round32(x_: f32) f32 {
const f32_toint = 1.0 / math.floatEps(f32);
var x = x_;
const u = @bitCast(u32, x);
const e = (u >> 23) & 0xFF;
var y: f32 = undefined;
if (e >= 0x7F + 23) {
return x;
}
if (u >> 31 != 0) {
x = -x;
}
if (e < 0x7F - 1) {
math.doNotOptimizeAway(x + f32_toint);
return 0 * @bitCast(f32, u);
}
y = x + f32_toint - f32_toint - x;
if (y > 0.5) {
y = y + x - 1;
} else if (y <= -0.5) {
y = y + x + 1;
} else {
y = y + x;
}
if (u >> 31 != 0) {
return -y;
} else {
return y;
}
}
fn round64(x_: f64) f64 {
const f64_toint = 1.0 / math.floatEps(f64);
var x = x_;
const u = @bitCast(u64, x);
const e = (u >> 52) & 0x7FF;
var y: f64 = undefined;
if (e >= 0x3FF + 52) {
return x;
}
if (u >> 63 != 0) {
x = -x;
}
if (e < 0x3ff - 1) {
math.doNotOptimizeAway(x + f64_toint);
return 0 * @bitCast(f64, u);
}
y = x + f64_toint - f64_toint - x;
if (y > 0.5) {
y = y + x - 1;
} else if (y <= -0.5) {
y = y + x + 1;
} else {
y = y + x;
}
if (u >> 63 != 0) {
return -y;
} else {
return y;
}
}
fn round128(x_: f128) f128 {
const f128_toint = 1.0 / math.floatEps(f128);
var x = x_;
const u = @bitCast(u128, x);
const e = (u >> 112) & 0x7FFF;
var y: f128 = undefined;
if (e >= 0x3FFF + 112) {
return x;
}
if (u >> 127 != 0) {
x = -x;
}
if (e < 0x3FFF - 1) {
math.doNotOptimizeAway(x + f128_toint);
return 0 * @bitCast(f128, u);
}
y = x + f128_toint - f128_toint - x;
if (y > 0.5) {
y = y + x - 1;
} else if (y <= -0.5) {
y = y + x + 1;
} else {
y = y + x;
}
if (u >> 127 != 0) {
return -y;
} else {
return y;
}
}
test "math.round" {
try expect(round(@as(f32, 1.3)) == round32(1.3));
try expect(round(@as(f64, 1.3)) == round64(1.3));
try expect(round(@as(f128, 1.3)) == round128(1.3));
}
test "math.round32" {
try expect(round32(1.3) == 1.0);
try expect(round32(-1.3) == -1.0);
try expect(round32(0.2) == 0.0);
try expect(round32(1.8) == 2.0);
}
test "math.round64" {
try expect(round64(1.3) == 1.0);
try expect(round64(-1.3) == -1.0);
try expect(round64(0.2) == 0.0);
try expect(round64(1.8) == 2.0);
}
test "math.round128" {
try expect(round128(1.3) == 1.0);
try expect(round128(-1.3) == -1.0);
try expect(round128(0.2) == 0.0);
try expect(round128(1.8) == 2.0);
}
test "math.round32.special" {
try expect(round32(0.0) == 0.0);
try expect(round32(-0.0) == -0.0);
try expect(math.isPositiveInf(round32(math.inf(f32))));
try expect(math.isNegativeInf(round32(-math.inf(f32))));
try expect(math.isNan(round32(math.nan(f32))));
}
test "math.round64.special" {
try expect(round64(0.0) == 0.0);
try expect(round64(-0.0) == -0.0);
try expect(math.isPositiveInf(round64(math.inf(f64))));
try expect(math.isNegativeInf(round64(-math.inf(f64))));
try expect(math.isNan(round64(math.nan(f64))));
}
test "math.round128.special" {
try expect(round128(0.0) == 0.0);
try expect(round128(-0.0) == -0.0);
try expect(math.isPositiveInf(round128(math.inf(f128))));
try expect(math.isNegativeInf(round128(-math.inf(f128))));
try expect(math.isNan(round128(math.nan(f128))));
}

View File

@@ -1,168 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/sinf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/sin.c
//
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
const kernel = @import("__trig.zig");
const __rem_pio2 = @import("__rem_pio2.zig").__rem_pio2;
const __rem_pio2f = @import("__rem_pio2f.zig").__rem_pio2f;
/// Returns the sine of the radian value x.
///
/// Special Cases:
/// - sin(+-0) = +-0
/// - sin(+-inf) = nan
/// - sin(nan) = nan
pub fn sin(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
return switch (T) {
f32 => sin32(x),
f64 => sin64(x),
else => @compileError("sin not implemented for " ++ @typeName(T)),
};
}
fn sin32(x: f32) f32 {
// Small multiples of pi/2 rounded to double precision.
const s1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
const s2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
const s3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
const s4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
var ix = @bitCast(u32, x);
const sign = ix >> 31 != 0;
ix &= 0x7fffffff;
if (ix <= 0x3f490fda) { // |x| ~<= pi/4
if (ix < 0x39800000) { // |x| < 2**-12
// raise inexact if x!=0 and underflow if subnormal
math.doNotOptimizeAway(if (ix < 0x00800000) x / 0x1p120 else x + 0x1p120);
return x;
}
return kernel.__sindf(x);
}
if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4
if (ix <= 0x4016cbe3) { // |x| ~<= 3pi/4
if (sign) {
return -kernel.__cosdf(x + s1pio2);
} else {
return kernel.__cosdf(x - s1pio2);
}
}
return kernel.__sindf(if (sign) -(x + s2pio2) else -(x - s2pio2));
}
if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4
if (ix <= 0x40afeddf) { // |x| ~<= 7*pi/4
if (sign) {
return kernel.__cosdf(x + s3pio2);
} else {
return -kernel.__cosdf(x - s3pio2);
}
}
return kernel.__sindf(if (sign) x + s4pio2 else x - s4pio2);
}
// sin(Inf or NaN) is NaN
if (ix >= 0x7f800000) {
return x - x;
}
var y: f64 = undefined;
const n = __rem_pio2f(x, &y);
return switch (n & 3) {
0 => kernel.__sindf(y),
1 => kernel.__cosdf(y),
2 => kernel.__sindf(-y),
else => -kernel.__cosdf(y),
};
}
fn sin64(x: f64) f64 {
var ix = @bitCast(u64, x) >> 32;
ix &= 0x7fffffff;
// |x| ~< pi/4
if (ix <= 0x3fe921fb) {
if (ix < 0x3e500000) { // |x| < 2**-26
// raise inexact if x != 0 and underflow if subnormal
math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120);
return x;
}
return kernel.__sin(x, 0.0, 0);
}
// sin(Inf or NaN) is NaN
if (ix >= 0x7ff00000) {
return x - x;
}
var y: [2]f64 = undefined;
const n = __rem_pio2(x, &y);
return switch (n & 3) {
0 => kernel.__sin(y[0], y[1], 1),
1 => kernel.__cos(y[0], y[1]),
2 => -kernel.__sin(y[0], y[1], 1),
else => -kernel.__cos(y[0], y[1]),
};
}
test "math.sin" {
try expect(sin(@as(f32, 0.0)) == sin32(0.0));
try expect(sin(@as(f64, 0.0)) == sin64(0.0));
try expect(comptime (math.sin(@as(f64, 2))) == math.sin(@as(f64, 2)));
}
test "math.sin32" {
const epsilon = 0.00001;
try expect(math.approxEqAbs(f32, sin32(0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f32, sin32(0.2), 0.198669, epsilon));
try expect(math.approxEqAbs(f32, sin32(0.8923), 0.778517, epsilon));
try expect(math.approxEqAbs(f32, sin32(1.5), 0.997495, epsilon));
try expect(math.approxEqAbs(f32, sin32(-1.5), -0.997495, epsilon));
try expect(math.approxEqAbs(f32, sin32(37.45), -0.246544, epsilon));
try expect(math.approxEqAbs(f32, sin32(89.123), 0.916166, epsilon));
}
test "math.sin64" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f64, sin64(0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f64, sin64(0.2), 0.198669, epsilon));
try expect(math.approxEqAbs(f64, sin64(0.8923), 0.778517, epsilon));
try expect(math.approxEqAbs(f64, sin64(1.5), 0.997495, epsilon));
try expect(math.approxEqAbs(f64, sin64(-1.5), -0.997495, epsilon));
try expect(math.approxEqAbs(f64, sin64(37.45), -0.246543, epsilon));
try expect(math.approxEqAbs(f64, sin64(89.123), 0.916166, epsilon));
}
test "math.sin32.special" {
try expect(sin32(0.0) == 0.0);
try expect(sin32(-0.0) == -0.0);
try expect(math.isNan(sin32(math.inf(f32))));
try expect(math.isNan(sin32(-math.inf(f32))));
try expect(math.isNan(sin32(math.nan(f32))));
}
test "math.sin64.special" {
try expect(sin64(0.0) == 0.0);
try expect(sin64(-0.0) == -0.0);
try expect(math.isNan(sin64(math.inf(f64))));
try expect(math.isNan(sin64(-math.inf(f64))));
try expect(math.isNan(sin64(math.nan(f64))));
}
test "math.sin32 #9901" {
const float = @bitCast(f32, @as(u32, 0b11100011111111110000000000000000));
_ = std.math.sin(float);
}
test "math.sin64 #9901" {
const float = @bitCast(f64, @as(u64, 0b1111111101000001000000001111110111111111100000000000000000000001));
_ = std.math.sin(float);
}

View File

@@ -1,140 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/tanf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/tan.c
// https://golang.org/src/math/tan.go
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
const kernel = @import("__trig.zig");
const __rem_pio2 = @import("__rem_pio2.zig").__rem_pio2;
const __rem_pio2f = @import("__rem_pio2f.zig").__rem_pio2f;
/// Returns the tangent of the radian value x.
///
/// Special Cases:
/// - tan(+-0) = +-0
/// - tan(+-inf) = nan
/// - tan(nan) = nan
pub fn tan(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
return switch (T) {
f32 => tan32(x),
f64 => tan64(x),
else => @compileError("tan not implemented for " ++ @typeName(T)),
};
}
fn tan32(x: f32) f32 {
// Small multiples of pi/2 rounded to double precision.
const t1pio2: f64 = 1.0 * math.pi / 2.0; // 0x3FF921FB, 0x54442D18
const t2pio2: f64 = 2.0 * math.pi / 2.0; // 0x400921FB, 0x54442D18
const t3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
const t4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
var ix = @bitCast(u32, x);
const sign = ix >> 31 != 0;
ix &= 0x7fffffff;
if (ix <= 0x3f490fda) { // |x| ~<= pi/4
if (ix < 0x39800000) { // |x| < 2**-12
// raise inexact if x!=0 and underflow if subnormal
math.doNotOptimizeAway(if (ix < 0x00800000) x / 0x1p120 else x + 0x1p120);
return x;
}
return kernel.__tandf(x, false);
}
if (ix <= 0x407b53d1) { // |x| ~<= 5*pi/4
if (ix <= 0x4016cbe3) { // |x| ~<= 3pi/4
return kernel.__tandf((if (sign) x + t1pio2 else x - t1pio2), true);
} else {
return kernel.__tandf((if (sign) x + t2pio2 else x - t2pio2), false);
}
}
if (ix <= 0x40e231d5) { // |x| ~<= 9*pi/4
if (ix <= 0x40afeddf) { // |x| ~<= 7*pi/4
return kernel.__tandf((if (sign) x + t3pio2 else x - t3pio2), true);
} else {
return kernel.__tandf((if (sign) x + t4pio2 else x - t4pio2), false);
}
}
// tan(Inf or NaN) is NaN
if (ix >= 0x7f800000) {
return x - x;
}
var y: f64 = undefined;
const n = __rem_pio2f(x, &y);
return kernel.__tandf(y, n & 1 != 0);
}
fn tan64(x: f64) f64 {
var ix = @bitCast(u64, x) >> 32;
ix &= 0x7fffffff;
// |x| ~< pi/4
if (ix <= 0x3fe921fb) {
if (ix < 0x3e400000) { // |x| < 2**-27
// raise inexact if x!=0 and underflow if subnormal
math.doNotOptimizeAway(if (ix < 0x00100000) x / 0x1p120 else x + 0x1p120);
return x;
}
return kernel.__tan(x, 0.0, false);
}
// tan(Inf or NaN) is NaN
if (ix >= 0x7ff00000) {
return x - x;
}
var y: [2]f64 = undefined;
const n = __rem_pio2(x, &y);
return kernel.__tan(y[0], y[1], n & 1 != 0);
}
test "math.tan" {
try expect(tan(@as(f32, 0.0)) == tan32(0.0));
try expect(tan(@as(f64, 0.0)) == tan64(0.0));
}
test "math.tan32" {
const epsilon = 0.00001;
try expect(math.approxEqAbs(f32, tan32(0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f32, tan32(0.2), 0.202710, epsilon));
try expect(math.approxEqAbs(f32, tan32(0.8923), 1.240422, epsilon));
try expect(math.approxEqAbs(f32, tan32(1.5), 14.101420, epsilon));
try expect(math.approxEqAbs(f32, tan32(37.45), -0.254397, epsilon));
try expect(math.approxEqAbs(f32, tan32(89.123), 2.285852, epsilon));
}
test "math.tan64" {
const epsilon = 0.000001;
try expect(math.approxEqAbs(f64, tan64(0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f64, tan64(0.2), 0.202710, epsilon));
try expect(math.approxEqAbs(f64, tan64(0.8923), 1.240422, epsilon));
try expect(math.approxEqAbs(f64, tan64(1.5), 14.101420, epsilon));
try expect(math.approxEqAbs(f64, tan64(37.45), -0.254397, epsilon));
try expect(math.approxEqAbs(f64, tan64(89.123), 2.2858376, epsilon));
}
test "math.tan32.special" {
try expect(tan32(0.0) == 0.0);
try expect(tan32(-0.0) == -0.0);
try expect(math.isNan(tan32(math.inf(f32))));
try expect(math.isNan(tan32(-math.inf(f32))));
try expect(math.isNan(tan32(math.nan(f32))));
}
test "math.tan64.special" {
try expect(tan64(0.0) == 0.0);
try expect(tan64(-0.0) == -0.0);
try expect(math.isNan(tan64(math.inf(f64))));
try expect(math.isNan(tan64(-math.inf(f64))));
try expect(math.isNan(tan64(math.nan(f64))));
}

View File

@@ -1,141 +0,0 @@
// Ported from musl, which is licensed under the MIT license:
// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
//
// https://git.musl-libc.org/cgit/musl/tree/src/math/truncf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/trunc.c
const std = @import("../std.zig");
const math = std.math;
const expect = std.testing.expect;
const maxInt = std.math.maxInt;
/// Returns the integer value of x.
///
/// Special Cases:
/// - trunc(+-0) = +-0
/// - trunc(+-inf) = +-inf
/// - trunc(nan) = nan
pub fn trunc(x: anytype) @TypeOf(x) {
const T = @TypeOf(x);
return switch (T) {
f32 => trunc32(x),
f64 => trunc64(x),
f128 => trunc128(x),
// TODO this is not correct for some targets
c_longdouble => @floatCast(c_longdouble, trunc128(x)),
else => @compileError("trunc not implemented for " ++ @typeName(T)),
};
}
fn trunc32(x: f32) f32 {
const u = @bitCast(u32, x);
var e = @intCast(i32, ((u >> 23) & 0xFF)) - 0x7F + 9;
var m: u32 = undefined;
if (e >= 23 + 9) {
return x;
}
if (e < 9) {
e = 1;
}
m = @as(u32, maxInt(u32)) >> @intCast(u5, e);
if (u & m == 0) {
return x;
} else {
math.doNotOptimizeAway(x + 0x1p120);
return @bitCast(f32, u & ~m);
}
}
fn trunc64(x: f64) f64 {
const u = @bitCast(u64, x);
var e = @intCast(i32, ((u >> 52) & 0x7FF)) - 0x3FF + 12;
var m: u64 = undefined;
if (e >= 52 + 12) {
return x;
}
if (e < 12) {
e = 1;
}
m = @as(u64, maxInt(u64)) >> @intCast(u6, e);
if (u & m == 0) {
return x;
} else {
math.doNotOptimizeAway(x + 0x1p120);
return @bitCast(f64, u & ~m);
}
}
fn trunc128(x: f128) f128 {
const u = @bitCast(u128, x);
var e = @intCast(i32, ((u >> 112) & 0x7FFF)) - 0x3FFF + 16;
var m: u128 = undefined;
if (e >= 112 + 16) {
return x;
}
if (e < 16) {
e = 1;
}
m = @as(u128, maxInt(u128)) >> @intCast(u7, e);
if (u & m == 0) {
return x;
} else {
math.doNotOptimizeAway(x + 0x1p120);
return @bitCast(f128, u & ~m);
}
}
test "math.trunc" {
try expect(trunc(@as(f32, 1.3)) == trunc32(1.3));
try expect(trunc(@as(f64, 1.3)) == trunc64(1.3));
try expect(trunc(@as(f128, 1.3)) == trunc128(1.3));
}
test "math.trunc32" {
try expect(trunc32(1.3) == 1.0);
try expect(trunc32(-1.3) == -1.0);
try expect(trunc32(0.2) == 0.0);
}
test "math.trunc64" {
try expect(trunc64(1.3) == 1.0);
try expect(trunc64(-1.3) == -1.0);
try expect(trunc64(0.2) == 0.0);
}
test "math.trunc128" {
try expect(trunc128(1.3) == 1.0);
try expect(trunc128(-1.3) == -1.0);
try expect(trunc128(0.2) == 0.0);
}
test "math.trunc32.special" {
try expect(trunc32(0.0) == 0.0); // 0x3F800000
try expect(trunc32(-0.0) == -0.0);
try expect(math.isPositiveInf(trunc32(math.inf(f32))));
try expect(math.isNegativeInf(trunc32(-math.inf(f32))));
try expect(math.isNan(trunc32(math.nan(f32))));
}
test "math.trunc64.special" {
try expect(trunc64(0.0) == 0.0);
try expect(trunc64(-0.0) == -0.0);
try expect(math.isPositiveInf(trunc64(math.inf(f64))));
try expect(math.isNegativeInf(trunc64(-math.inf(f64))));
try expect(math.isNan(trunc64(math.nan(f64))));
}
test "math.trunc128.special" {
try expect(trunc128(0.0) == 0.0);
try expect(trunc128(-0.0) == -0.0);
try expect(math.isPositiveInf(trunc128(math.inf(f128))));
try expect(math.isNegativeInf(trunc128(-math.inf(f128))));
try expect(math.isNan(trunc128(math.nan(f128))));
}