commit 5e8a9ed9e87f9152e40c7493b57b0622ceb20f29 (tree)
parent 9ed970090c1fe6b7d2d2f0eef1a59343e7b9a67d
Author: Christophe Delage <christ@ophe.net>
Date: Fri, 29 May 2026 13:11:42 +0200
Scaffolding for the code sharing between logq, log2q and log10q
Diffstat:
2 files changed, 167 insertions(+), 64 deletions(-)
diff --git a/lib/compiler_rt/log.zig b/lib/compiler_rt/log.zig
@@ -450,8 +450,7 @@ pub fn __logx(a: f80) callconv(.c) f80 {
/// Accuracy on 10 million random numbers near x = 1 (testing the proc2 case):
/// <= 0.5 ulp: 99.96%, worst case <= 0.528 ulp
pub fn logq(x: f128) callconv(.c) f128 {
- const bitsize = 7;
- const size = 1 << bitsize;
+ const impl = @import("log_f128.zig");
if (!math.isFinite(x)) {
if (math.isNan(x)) {
@@ -469,74 +468,42 @@ pub fn logq(x: f128) callconv(.c) f128 {
return math.nan(f128);
}
- // exp(-1 / 16) rounded down
- const proc2_lo: f128 = 0.939413062813475786119710824622305;
- // exp(1 / 16) rounded up
- const proc2_hi: f128 = 1.0644944589178594295633905946428897;
- if (proc2_lo < x and x < proc2_hi) {
- const f = x - 1.0;
-
- const g = 1 / (2 + f);
- const u = 2 * f * g;
- const v = u * u;
- const uv = u * v;
- const v64: f64 = @floatCast(v);
-
+ if (impl.Proc2.lo < x and x < impl.Proc2.hi) {
// Polynomial approximation of log((1 + u / 2) / (1 - u / 2))
// in [2 * a / (2 + a), 2 * b / (2 + b)]
// where a = exp(-1 / 16) - 1 and b = exp(1 / 16) - 1
- const p19 = 2.0165671588771827537210411918018159e-7;
- const p17 = 8.97568550755477160981619052649713e-7 + v64 * p19;
- const p15 = 4.069010449774280288178309893970754e-6 + v64 * p17;
- const p13 = 1.8780048076832339308077858301484127e-5 + v * p15;
- const p11 = 8.87784090909092440759545734146088e-5 + v * p13;
- const p9 = 4.340277777777777776216500817402857e-4 + v * p11;
- const p7 = 2.2321428571428571428572328745789477e-3 + v * p9;
- const p5 = 1.249999999999999999999999997455655e-2 + v * p7;
- const p3 = 8.333333333333333333333333333333581e-2;
-
- const q_hi = uv * p3;
- const q_lo = uv * v * p5;
- const q = q_hi + q_lo;
-
- const fa: f128 = @as(f64, @floatCast(f));
- const ua: f128 = @as(f64, @floatCast(u));
-
- const fb: f128 = f - fa;
- const ub: f128 = ((2 * (f - ua) - ua * fa) - ua * fb) * g;
-
- return ua + (ub + q);
+ const poly: impl.Proc2.Poly = .{
+ .b1_hi = 1.0,
+ .b1_lo = 0.0,
+ .b3 = 8.333333333333333333333333333333581e-2,
+ .b5 = 1.249999999999999999999999997455655e-2,
+ .b7 = 2.2321428571428571428572328745789477e-3,
+ .b9 = 4.340277777777777776216500817402857e-4,
+ .b11 = 8.87784090909092440759545734146088e-5,
+ .b13 = 1.8780048076832339308077858301484127e-5,
+ .b15 = 4.069010449774280288178309893970754e-6,
+ .b17 = 8.97568550755477160981619052649713e-7,
+ .b19 = 2.0165671588771827537210411918018159e-7,
+ };
+ return impl.proc2(.{ .poly = poly }, x);
}
- const ym = frexp2(x);
- const y = ym.significand;
- const m = ym.exponent;
-
- const F0 = @round(math.ldexp(y, bitsize));
- const j0: usize = @intFromFloat(F0);
- const j = j0 - size;
- const F = math.ldexp(F0, -bitsize);
- const f = y - F;
-
- const u = (f + f) / (y + F);
- const v = u * u;
- const v64: f64 = @floatCast(v);
-
// Polynomial approximation of log(1 + 2 * u / (2 - u))
// in [-(2 * fmax) / (2 + fmax), (2 * fmax) / (2 - fmax)]
// where fmax = 0.5 / size
- const p11 = 8.877925718782769769445565656611838e-5;
- const p9 = 4.340277777635300605611118803507141e-4 + v64 * p11;
- const p7 = 2.2321428571428572515097318595359542e-3 + v * p9;
- const p5 = 1.249999999999999999999963839743372e-2 + v * p7;
- const p3 = 8.333333333333333333333333333372414e-2 + v * p5;
-
- const q = u * v * p3;
+ const poly: impl.Proc1.Poly = .{
+ .a1 = 1.0,
+ .a3 = 8.333333333333333333333333333372414e-2,
+ .a5 = 1.249999999999999999999963839743372e-2,
+ .a7 = 2.2321428571428572515097318595359542e-3,
+ .a9 = 4.340277777635300605611118803507141e-4,
+ .a11 = 8.877925718782769769445565656611838e-5,
+ };
- // log1p_tab[j].hi = 2^-n * round-to-integer(2^n * l)
- // log1p_tab[j].lo = round-to-nearest-f128(l - log1p_tab[j].hi)
+ // tab[j].hi = 2^-n * round-to-integer(2^n * l)
+ // tab[j].lo = round-to-nearest-f128(l - tab[j].hi)
// where n = 97 and l = log(1 + j / size)
- const log1p_tab = [size + 1]struct { hi: f128, lo: f128 }{
+ const tab = [impl.size + 1]impl.Proc1.HiLo{
.{ .hi = 0, .lo = 0 },
.{ .hi = 0x1.fe02a6b106788fc3769039p-8, .lo = 0x1.dc282d2b3db2c3ef9a073a876702p-100 },
.{ .hi = 0x1.fc0a8b0fc03e3cf9eda74d4p-7, .lo = -0x1.0a8552414fc416fc223acca2ebfp-100 },
@@ -667,11 +634,8 @@ pub fn logq(x: f128) callconv(.c) f128 {
.{ .hi = 0x1.60e32f44788d8ca7c895a0b5p-1, .lo = -0x1.3557995d063914a66aa81ead3fdbp-101 },
.{ .hi = 0x1.62e42fefa39ef35793c7673p-1, .lo = 0x1.f97b57a079a193394c5b16c5068cp-103 },
};
- const xm: f128 = @floatFromInt(m);
- const l_hi = xm * log1p_tab[128].hi + log1p_tab[j].hi;
- const l_lo = xm * log1p_tab[128].lo + log1p_tab[j].lo;
- return l_hi + (u + (q + l_lo));
+ return impl.proc1(.{ .poly = poly, .tab = tab }, x);
}
pub fn logl(x: c_longdouble) callconv(.c) c_longdouble {
diff --git a/lib/compiler_rt/log_f128.zig b/lib/compiler_rt/log_f128.zig
@@ -0,0 +1,139 @@
+const std = @import("std");
+const math = std.math;
+
+pub const log2size = 7;
+pub const size = 1 << log2size;
+
+pub const Proc1 = struct {
+ pub const Poly = struct {
+ a1: f128,
+ a3: f128,
+ a5: f128,
+ a7: f128,
+ a9: f64,
+ a11: f64,
+ };
+ pub const HiLo = struct { hi: f128, lo: f128 };
+ poly: Poly,
+ tab: [size + 1]HiLo,
+};
+
+pub fn proc1(comptime p: Proc1, x: f128) f128 {
+ const ym = frexp2(x);
+ const y = ym.significand;
+ const m = ym.exponent;
+
+ const F0 = @round(math.ldexp(y, log2size));
+ const j0: usize = @intFromFloat(F0);
+ const j = j0 - size;
+ const F = math.ldexp(F0, -log2size);
+ const f = y - F;
+
+ const u = (f + f) / (y + F);
+ const v = u * u;
+ const v64: f64 = @floatCast(v);
+
+ const p9 = p.poly.a9 + v64 * p.poly.a11;
+ const p7 = p.poly.a7 + v * p9;
+ const p5 = p.poly.a5 + v * p7;
+ const p3 = p.poly.a3 + v * p5;
+
+ const q = u * v * p3;
+
+ const xm: f128 = @floatFromInt(m);
+ const l_hi = xm * p.tab[128].hi + p.tab[j].hi;
+ const l_lo = xm * p.tab[128].lo + p.tab[j].lo;
+
+ if (comptime p.poly.a1 == 1.0)
+ return l_hi + (u + (q + l_lo))
+ else
+ return l_hi + (u * p.poly.a1 + (q + l_lo));
+}
+
+pub const Proc2 = struct {
+ // exp(-1 / 16) rounded down
+ pub const lo: f128 = 0.939413062813475786119710824622305;
+ // exp(1 / 16) rounded up
+ pub const hi: f128 = 1.0644944589178594295633905946428897;
+
+ pub const Poly = struct {
+ b1_hi: f128,
+ b1_lo: f128,
+ b3: f128,
+ b5: f128,
+ b7: f128,
+ b9: f128,
+ b11: f128,
+ b13: f128,
+ b15: f64,
+ b17: f64,
+ b19: f64,
+ };
+
+ poly: Poly,
+};
+
+pub fn proc2(comptime p: Proc2, x: f128) f128 {
+ std.debug.assert(Proc2.lo < x and x < Proc2.hi);
+
+ const f = x - 1.0;
+ const g = 1 / (2 + f);
+ const u = 2 * f * g;
+ const v = u * u;
+ const uv = u * v;
+ const v64: f64 = @floatCast(v);
+
+ const p17 = p.poly.b17 + v64 * p.poly.b19;
+ const p15 = p.poly.b15 + v64 * p17;
+ const p13 = p.poly.b13 + v * p15;
+ const p11 = p.poly.b11 + v * p13;
+ const p9 = p.poly.b9 + v * p11;
+ const p7 = p.poly.b7 + v * p9;
+ const p5 = p.poly.b5 + v * p7;
+
+ const q_hi = uv * p.poly.b3;
+ const q_lo = uv * v * p5;
+
+ const f_hi: f128 = @as(f64, @floatCast(f));
+ const f_lo = f - f_hi;
+
+ const u_hi: f128 = @as(f64, @floatCast(u));
+ const u_lo = ((2 * (f - u_hi) - u_hi * f_hi) - u_hi * f_lo) * g;
+
+ if (comptime p.poly.b1_hi == 1.0 and p.poly.b1_lo == 0.0)
+ return u_hi + (u_lo + (q_hi + q_lo));
+
+ // t = u * p.poly.b1
+ const t_hi = u_hi * p.poly.b1_hi;
+ const t_lo = u_lo * p.poly.b1_hi + u * p.poly.b1_lo;
+
+ // y = t + q
+ const y_hi = t_hi + q_hi;
+ const y_lo = t_lo + (t_hi - y_hi + q_hi) + q_lo;
+
+ return y_hi + y_lo;
+}
+
+/// Returns (f, k) such that x = f * 2^k and f in [1,2).
+/// Asserts that x is finite and positive.
+pub fn frexp2(x: f128) math.Frexp(f128) {
+ std.debug.assert(math.isFinite(x));
+ std.debug.assert(x > 0.0);
+
+ const bits: u128 = @bitCast(x);
+ const uexp: i32 = @intCast(bits >> 112);
+
+ std.debug.assert(uexp >= 0);
+
+ if (uexp == 0) {
+ const shift: u7 = @intCast(@clz(bits) - 15);
+
+ const exp = -@as(i32, shift) - 0x3ffe;
+ const frac: f128 = @bitCast((bits << shift) | (0x3fff << 112));
+ return .{ .significand = frac, .exponent = exp };
+ }
+
+ const exp = uexp - 0x3fff;
+ const frac: f128 = @bitCast((0x3fff << 112) | ((bits << 16) >> 16));
+ return .{ .significand = frac, .exponent = exp };
+}