compiler_rt: Implement floatXiYf/fixXfYi, incl f80

This change: - Adds generic implementation of the float -> integer conversion functions floatXiYf, including support for f80 - Updates the existing implementation of integer -> float conversion fixXiYf to support f16 and f80 - Fixes the handling of the explicit integer bit in `__trunctfxf2` - Combines the test cases for fixXfYi/floatXiYf into a single file - Renames `fmodl` to `fmodq`, since it operates on 128-bit floats The new implementation for floatXiYf has been benchmarked, and generally provides equal or better performance versus the current implementations: Throughput (MiB/s) - Before | u32 | i32 | u64 | i64 | u128 | i128 | -----|----------|----------|----------|----------|----------|----------| f16 | none | none | none | none | none | none | f32 | 2231.67 | 2001.19 | 1745.66 | 1405.77 | 2173.99 | 1874.63 | f64 | 1407.17 | 1055.83 | 2911.68 | 2437.21 | 1676.05 | 1476.67 | f80 | none | none | none | none | none | none | f128 | 327.56 | 321.25 | 645.92 | 654.52 | 1153.56 | 1096.27 | Throughput (MiB/s) - After | u32 | i32 | u64 | i64 | u128 | i128 | -----|----------|----------|----------|----------|----------|----------| f16 | 1407.61 | 1637.25 | 3555.03 | 2594.56 | 3680.60 | 3063.34 | f32 | 2101.36 | 2122.62 | 3225.46 | 3123.86 | 2860.05 | 1985.21 | f64 | 1395.57 | 1314.87 | 2409.24 | 2196.30 | 2384.95 | 1908.15 | f80 | 475.53 | 457.92 | 884.50 | 812.12 | 1475.27 | 1382.16 | f128 | 359.60 | 350.91 | 723.08 | 706.80 | 1296.42 | 1198.87 |
2022-04-08 18:54:53 -07:00
parent 17631cb2d3
commit b5d5685a4e
76 changed files with 2458 additions and 3115 deletions
--- a/lib/std/special/compiler_rt/floatfmodq.zig
+++ b/lib/std/special/compiler_rt/floatfmodq.zig
@@ -0,0 +1,126 @@
+const builtin = @import("builtin");
+const std = @import("std");
+
+// fmodq - floating modulo large, returns the remainder of division for f128 types
+// Logic and flow heavily inspired by MUSL fmodl for 113 mantissa digits
+pub fn fmodq(a: f128, b: f128) callconv(.C) f128 {
+    @setRuntimeSafety(builtin.is_test);
+    var amod = a;
+    var bmod = b;
+    const aPtr_u64 = @ptrCast([*]u64, &amod);
+    const bPtr_u64 = @ptrCast([*]u64, &bmod);
+    const aPtr_u16 = @ptrCast([*]u16, &amod);
+    const bPtr_u16 = @ptrCast([*]u16, &bmod);
+
+    const exp_and_sign_index = comptime switch (builtin.target.cpu.arch.endian()) {
+        .Little => 7,
+        .Big => 0,
+    };
+    const low_index = comptime switch (builtin.target.cpu.arch.endian()) {
+        .Little => 0,
+        .Big => 1,
+    };
+    const high_index = comptime switch (builtin.target.cpu.arch.endian()) {
+        .Little => 1,
+        .Big => 0,
+    };
+
+    const signA = aPtr_u16[exp_and_sign_index] & 0x8000;
+    var expA = @intCast(i32, (aPtr_u16[exp_and_sign_index] & 0x7fff));
+    var expB = bPtr_u16[exp_and_sign_index] & 0x7fff;
+
+    // There are 3 cases where the answer is undefined, check for:
+    //   - fmodq(val, 0)
+    //   - fmodq(val, NaN)
+    //   - fmodq(inf, val)
+    // The sign on checked values does not matter.
+    // Doing (a * b) / (a * b) procudes undefined results
+    // because the three cases always produce undefined calculations:
+    //   - 0 / 0
+    //   - val * NaN
+    //   - inf / inf
+    if (b == 0 or std.math.isNan(b) or expA == 0x7fff) {
+        return (a * b) / (a * b);
+    }
+
+    // Remove the sign from both
+    aPtr_u16[exp_and_sign_index] = @bitCast(u16, @intCast(i16, expA));
+    bPtr_u16[exp_and_sign_index] = @bitCast(u16, @intCast(i16, expB));
+    if (amod <= bmod) {
+        if (amod == bmod) {
+            return 0 * a;
+        }
+        return a;
+    }
+
+    if (expA == 0) {
+        amod *= 0x1p120;
+        expA = aPtr_u16[exp_and_sign_index] -% 120;
+    }
+
+    if (expB == 0) {
+        bmod *= 0x1p120;
+        expB = bPtr_u16[exp_and_sign_index] -% 120;
+    }
+
+    // OR in extra non-stored mantissa digit
+    var highA: u64 = (aPtr_u64[high_index] & (std.math.maxInt(u64) >> 16)) | 1 << 48;
+    var highB: u64 = (bPtr_u64[high_index] & (std.math.maxInt(u64) >> 16)) | 1 << 48;
+    var lowA: u64 = aPtr_u64[low_index];
+    var lowB: u64 = bPtr_u64[low_index];
+
+    while (expA > expB) : (expA -= 1) {
+        var high = highA -% highB;
+        var low = lowA -% lowB;
+        if (lowA < lowB) {
+            high = highA -% 1;
+        }
+        if (high >> 63 == 0) {
+            if ((high | low) == 0) {
+                return 0 * a;
+            }
+            highA = 2 *% high + (low >> 63);
+            lowA = 2 *% low;
+        } else {
+            highA = 2 *% highA + (lowA >> 63);
+            lowA = 2 *% lowA;
+        }
+    }
+
+    var high = highA -% highB;
+    var low = lowA -% lowB;
+    if (lowA < lowB) {
+        high -= 1;
+    }
+    if (high >> 63 == 0) {
+        if ((high | low) == 0) {
+            return 0 * a;
+        }
+        highA = high;
+        lowA = low;
+    }
+
+    while (highA >> 48 == 0) {
+        highA = 2 *% highA + (lowA >> 63);
+        lowA = 2 *% lowA;
+        expA = expA - 1;
+    }
+
+    // Overwrite the current amod with the values in highA and lowA
+    aPtr_u64[high_index] = highA;
+    aPtr_u64[low_index] = lowA;
+
+    // Combine the exponent with the sign, normalize if happend to be denormalized
+    if (expA <= 0) {
+        aPtr_u16[exp_and_sign_index] = @truncate(u16, @bitCast(u32, (expA +% 120))) | signA;
+        amod *= 0x1p-120;
+    } else {
+        aPtr_u16[exp_and_sign_index] = @truncate(u16, @bitCast(u32, expA)) | signA;
+    }
+
+    return amod;
+}
+
+test {
+    _ = @import("floatfmodq_test.zig");
+}