std: add f80 bits
This commit is contained in:
committed by
Andrew Kelley
parent
a31a749c42
commit
67d04a988a
@@ -737,6 +737,11 @@ pub fn main() void {
|
||||
<td><code class="c">double</code></td>
|
||||
<td>64-bit floating point (52-bit mantissa) IEEE-754-2008 binary64</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th scope="row">{#syntax#}f80{#endsyntax#}</th>
|
||||
<td><code class="c">double</code></td>
|
||||
<td>64-bit floating point (64-bit mantissa) IEEE-754-2008 80-bit extended precision</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th scope="row">{#syntax#}f128{#endsyntax#}</th>
|
||||
<td><code class="c">_Float128</code></td>
|
||||
@@ -1500,6 +1505,7 @@ fn divide(a: i32, b: i32) i32 {
|
||||
<li>{#syntax#}f16{#endsyntax#} - IEEE-754-2008 binary16</li>
|
||||
<li>{#syntax#}f32{#endsyntax#} - IEEE-754-2008 binary32</li>
|
||||
<li>{#syntax#}f64{#endsyntax#} - IEEE-754-2008 binary64</li>
|
||||
<li>{#syntax#}f80{#endsyntax#} - IEEE-754-2008 80-bit extended precision</li>
|
||||
<li>{#syntax#}f128{#endsyntax#} - IEEE-754-2008 binary128</li>
|
||||
<li>{#syntax#}c_longdouble{#endsyntax#} - matches <code class="c">long double</code> for the target C ABI</li>
|
||||
</ul>
|
||||
|
||||
@@ -43,7 +43,21 @@ pub const f128_max = @bitCast(f128, @as(u128, 0x7FFEFFFFFFFFFFFFFFFFFFFFFFFFFFFF
|
||||
pub const f128_epsilon = @bitCast(f128, @as(u128, 0x3F8F0000000000000000000000000000));
|
||||
pub const f128_toint = 1.0 / f128_epsilon;
|
||||
|
||||
const F80Repr = if (@import("builtin").cpu.arch.endian() == .Little) extern struct {
|
||||
fraction: u64,
|
||||
exp: u16,
|
||||
} else extern struct {
|
||||
exp: u16,
|
||||
fraction: u64,
|
||||
};
|
||||
|
||||
// float.h details
|
||||
pub const f80_true_min = @ptrCast(*const f80, &F80Repr{ .fraction = 1, .exp = 0 }).*;
|
||||
pub const f80_min = @ptrCast(*const f80, &F80Repr{ .fraction = 0x8000000000000000, .exp = 1 }).*;
|
||||
pub const f80_max = @ptrCast(*const f80, &F80Repr{ .fraction = 0xFFFFFFFFFFFFFFFF, .exp = 0x7FFE }).*;
|
||||
pub const f80_epsilon = @ptrCast(*const f80, &F80Repr{ .fraction = 0x8000000000000000, .exp = 0x3FC0 }).*;
|
||||
pub const f80_toint = 1.0 / f80_epsilon;
|
||||
|
||||
pub const f64_true_min = 4.94065645841246544177e-324;
|
||||
pub const f64_min = 2.2250738585072014e-308;
|
||||
pub const f64_max = 1.79769313486231570815e+308;
|
||||
@@ -91,6 +105,10 @@ pub const qnan_f64 = @bitCast(f64, qnan_u64);
|
||||
pub const inf_u64 = @as(u64, 0x7FF << 52);
|
||||
pub const inf_f64 = @bitCast(f64, inf_u64);
|
||||
|
||||
pub const inf_f80 = @ptrCast(*const f80, &F80Repr{ .fraction = 0x8000000000000000, .exp = 0x7fff }).*;
|
||||
pub const nan_f80 = @ptrCast(*const f80, &F80Repr{ .fraction = 0xA000000000000000, .exp = 0x7fff }).*;
|
||||
pub const qnan_f80 = @ptrCast(*const f80, &F80Repr{ .fraction = 0xC000000000000000, .exp = 0x7fff }).*;
|
||||
|
||||
pub const nan_u128 = @as(u128, 0x7fff0000000000000000000000000001);
|
||||
pub const nan_f128 = @bitCast(f128, nan_u128);
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ pub fn epsilon(comptime T: type) T {
|
||||
f16 => math.f16_epsilon,
|
||||
f32 => math.f32_epsilon,
|
||||
f64 => math.f64_epsilon,
|
||||
f80 => math.f80_epsilon,
|
||||
f128 => math.f128_epsilon,
|
||||
else => @compileError("epsilon not implemented for " ++ @typeName(T)),
|
||||
};
|
||||
|
||||
@@ -7,6 +7,7 @@ pub fn inf(comptime T: type) T {
|
||||
f16 => math.inf_f16,
|
||||
f32 => math.inf_f32,
|
||||
f64 => math.inf_f64,
|
||||
f80 => math.inf_f80,
|
||||
f128 => math.inf_f128,
|
||||
else => @compileError("inf not implemented for " ++ @typeName(T)),
|
||||
};
|
||||
|
||||
@@ -6,6 +6,7 @@ pub fn nan(comptime T: type) T {
|
||||
f16 => math.nan_f16,
|
||||
f32 => math.nan_f32,
|
||||
f64 => math.nan_f64,
|
||||
f80 => math.nan_f80,
|
||||
f128 => math.nan_f128,
|
||||
else => @compileError("nan not implemented for " ++ @typeName(T)),
|
||||
};
|
||||
@@ -19,6 +20,8 @@ pub fn snan(comptime T: type) T {
|
||||
f16 => @bitCast(f16, math.nan_u16),
|
||||
f32 => @bitCast(f32, math.nan_u32),
|
||||
f64 => @bitCast(f64, math.nan_u64),
|
||||
f80 => @bitCast(f80, math.nan_u80),
|
||||
f128 => @bitCast(f128, math.nan_u128),
|
||||
else => @compileError("snan not implemented for " ++ @typeName(T)),
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user