std: add f80 bits

This commit is contained in:
Veikka Tuominen
2022-01-19 18:47:51 +02:00
committed by Andrew Kelley
parent a31a749c42
commit 67d04a988a
5 changed files with 29 additions and 0 deletions

View File

@@ -737,6 +737,11 @@ pub fn main() void {
<td><code class="c">double</code></td>
<td>64-bit floating point (52-bit mantissa) IEEE-754-2008 binary64</td>
</tr>
<tr>
<th scope="row">{#syntax#}f80{#endsyntax#}</th>
<td><code class="c">double</code></td>
<td>64-bit floating point (64-bit mantissa) IEEE-754-2008 80-bit extended precision</td>
</tr>
<tr>
<th scope="row">{#syntax#}f128{#endsyntax#}</th>
<td><code class="c">_Float128</code></td>
@@ -1500,6 +1505,7 @@ fn divide(a: i32, b: i32) i32 {
<li>{#syntax#}f16{#endsyntax#} - IEEE-754-2008 binary16</li>
<li>{#syntax#}f32{#endsyntax#} - IEEE-754-2008 binary32</li>
<li>{#syntax#}f64{#endsyntax#} - IEEE-754-2008 binary64</li>
<li>{#syntax#}f80{#endsyntax#} - IEEE-754-2008 80-bit extended precision</li>
<li>{#syntax#}f128{#endsyntax#} - IEEE-754-2008 binary128</li>
<li>{#syntax#}c_longdouble{#endsyntax#} - matches <code class="c">long double</code> for the target C ABI</li>
</ul>

View File

@@ -43,7 +43,21 @@ pub const f128_max = @bitCast(f128, @as(u128, 0x7FFEFFFFFFFFFFFFFFFFFFFFFFFFFFFF
pub const f128_epsilon = @bitCast(f128, @as(u128, 0x3F8F0000000000000000000000000000));
pub const f128_toint = 1.0 / f128_epsilon;
const F80Repr = if (@import("builtin").cpu.arch.endian() == .Little) extern struct {
fraction: u64,
exp: u16,
} else extern struct {
exp: u16,
fraction: u64,
};
// float.h details
pub const f80_true_min = @ptrCast(*const f80, &F80Repr{ .fraction = 1, .exp = 0 }).*;
pub const f80_min = @ptrCast(*const f80, &F80Repr{ .fraction = 0x8000000000000000, .exp = 1 }).*;
pub const f80_max = @ptrCast(*const f80, &F80Repr{ .fraction = 0xFFFFFFFFFFFFFFFF, .exp = 0x7FFE }).*;
pub const f80_epsilon = @ptrCast(*const f80, &F80Repr{ .fraction = 0x8000000000000000, .exp = 0x3FC0 }).*;
pub const f80_toint = 1.0 / f80_epsilon;
pub const f64_true_min = 4.94065645841246544177e-324;
pub const f64_min = 2.2250738585072014e-308;
pub const f64_max = 1.79769313486231570815e+308;
@@ -91,6 +105,10 @@ pub const qnan_f64 = @bitCast(f64, qnan_u64);
pub const inf_u64 = @as(u64, 0x7FF << 52);
pub const inf_f64 = @bitCast(f64, inf_u64);
pub const inf_f80 = @ptrCast(*const f80, &F80Repr{ .fraction = 0x8000000000000000, .exp = 0x7fff }).*;
pub const nan_f80 = @ptrCast(*const f80, &F80Repr{ .fraction = 0xA000000000000000, .exp = 0x7fff }).*;
pub const qnan_f80 = @ptrCast(*const f80, &F80Repr{ .fraction = 0xC000000000000000, .exp = 0x7fff }).*;
pub const nan_u128 = @as(u128, 0x7fff0000000000000000000000000001);
pub const nan_f128 = @bitCast(f128, nan_u128);

View File

@@ -8,6 +8,7 @@ pub fn epsilon(comptime T: type) T {
f16 => math.f16_epsilon,
f32 => math.f32_epsilon,
f64 => math.f64_epsilon,
f80 => math.f80_epsilon,
f128 => math.f128_epsilon,
else => @compileError("epsilon not implemented for " ++ @typeName(T)),
};

View File

@@ -7,6 +7,7 @@ pub fn inf(comptime T: type) T {
f16 => math.inf_f16,
f32 => math.inf_f32,
f64 => math.inf_f64,
f80 => math.inf_f80,
f128 => math.inf_f128,
else => @compileError("inf not implemented for " ++ @typeName(T)),
};

View File

@@ -6,6 +6,7 @@ pub fn nan(comptime T: type) T {
f16 => math.nan_f16,
f32 => math.nan_f32,
f64 => math.nan_f64,
f80 => math.nan_f80,
f128 => math.nan_f128,
else => @compileError("nan not implemented for " ++ @typeName(T)),
};
@@ -19,6 +20,8 @@ pub fn snan(comptime T: type) T {
f16 => @bitCast(f16, math.nan_u16),
f32 => @bitCast(f32, math.nan_u32),
f64 => @bitCast(f64, math.nan_u64),
f80 => @bitCast(f80, math.nan_u80),
f128 => @bitCast(f128, math.nan_u128),
else => @compileError("snan not implemented for " ++ @typeName(T)),
};
}