std.{fmt, math}: derive float constants from std
This also addresses a nit from #10133 where IntT might be a confusing name because it might imply signed integer (iX, not uX). We settled on TBits for math/float.zig so I've applied that change here too. When I originally wrote ldexp() I copied the name from parse_hex_float.
This commit is contained in:
@@ -12,17 +12,16 @@ const assert = std.debug.assert;
|
||||
pub fn parseHexFloat(comptime T: type, s: []const u8) !T {
|
||||
assert(@typeInfo(T) == .Float);
|
||||
|
||||
const IntT = std.meta.Int(.unsigned, @typeInfo(T).Float.bits);
|
||||
const TBits = std.meta.Int(.unsigned, @typeInfo(T).Float.bits);
|
||||
|
||||
const mantissa_bits = math.floatMantissaBits(T);
|
||||
const exponent_bits = math.floatExponentBits(T);
|
||||
const exponent_min = math.floatExponentMin(T);
|
||||
const exponent_max = math.floatExponentMax(T);
|
||||
|
||||
const exponent_bias = exponent_max;
|
||||
const sign_shift = mantissa_bits + exponent_bits;
|
||||
|
||||
const exponent_bias = (1 << (exponent_bits - 1)) - 1;
|
||||
const exponent_min = 1 - exponent_bias;
|
||||
const exponent_max = exponent_bias;
|
||||
|
||||
if (s.len == 0)
|
||||
return error.InvalidCharacter;
|
||||
|
||||
@@ -233,10 +232,10 @@ pub fn parseHexFloat(comptime T: type, s: []const u8) !T {
|
||||
// Remove the implicit bit.
|
||||
mantissa &= @as(u128, (1 << mantissa_bits) - 1);
|
||||
|
||||
const raw: IntT =
|
||||
(if (negative) @as(IntT, 1) << sign_shift else 0) |
|
||||
@as(IntT, @bitCast(u16, exponent + exponent_bias)) << mantissa_bits |
|
||||
@truncate(IntT, mantissa);
|
||||
const raw: TBits =
|
||||
(if (negative) @as(TBits, 1) << sign_shift else 0) |
|
||||
@as(TBits, @bitCast(u16, exponent + exponent_bias)) << mantissa_bits |
|
||||
@truncate(TBits, mantissa);
|
||||
|
||||
return @bitCast(T, raw);
|
||||
}
|
||||
|
||||
@@ -15,22 +15,22 @@ pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
|
||||
var shift = n;
|
||||
|
||||
const T = @TypeOf(base);
|
||||
const IntT = std.meta.Int(.unsigned, @bitSizeOf(T));
|
||||
const TBits = std.meta.Int(.unsigned, @bitSizeOf(T));
|
||||
if (@typeInfo(T) != .Float) {
|
||||
@compileError("ldexp not implemented for " ++ @typeName(T));
|
||||
}
|
||||
|
||||
const mantissa_bits = math.floatMantissaBits(T);
|
||||
const exponent_bits = math.floatExponentBits(T);
|
||||
const exponent_bias = (1 << (exponent_bits - 1)) - 1;
|
||||
const exponent_min = 1 - exponent_bias;
|
||||
const exponent_max = exponent_bias;
|
||||
const exponent_min = math.floatExponentMin(T);
|
||||
const exponent_max = math.floatExponentMax(T);
|
||||
|
||||
const exponent_bias = exponent_max;
|
||||
|
||||
// fix double rounding errors in subnormal ranges
|
||||
// https://git.musl-libc.org/cgit/musl/commit/src/math/ldexp.c?id=8c44a060243f04283ca68dad199aab90336141db
|
||||
const scale_min_expo = exponent_min + mantissa_bits + 1;
|
||||
const scale_min = @bitCast(T, @as(IntT, scale_min_expo + exponent_bias) << mantissa_bits);
|
||||
const scale_max = @bitCast(T, @intCast(IntT, exponent_max + exponent_bias) << mantissa_bits);
|
||||
const scale_min = @bitCast(T, @as(TBits, scale_min_expo + exponent_bias) << mantissa_bits);
|
||||
const scale_max = @bitCast(T, @intCast(TBits, exponent_max + exponent_bias) << mantissa_bits);
|
||||
|
||||
// scale `shift` within floating point limits, if possible
|
||||
// second pass is possible due to subnormal range
|
||||
@@ -53,7 +53,7 @@ pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
|
||||
}
|
||||
}
|
||||
|
||||
return base * @bitCast(T, @intCast(IntT, shift + exponent_bias) << mantissa_bits);
|
||||
return base * @bitCast(T, @intCast(TBits, shift + exponent_bias) << mantissa_bits);
|
||||
}
|
||||
|
||||
test "math.ldexp" {
|
||||
|
||||
Reference in New Issue
Block a user