std: add f80 bits

2022-01-19 18:47:51 +02:00
parent a31a749c42
commit 67d04a988a
5 changed files with 29 additions and 0 deletions
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -737,6 +737,11 @@ pub fn main() void {
          <td><code class="c">double</code></td>
          <td>64-bit floating point (52-bit mantissa) IEEE-754-2008 binary64</td>
        </tr>
+        <tr>
+            <th scope="row">{#syntax#}f80{#endsyntax#}</th>
+          <td><code class="c">double</code></td>
+          <td>64-bit floating point (64-bit mantissa) IEEE-754-2008 80-bit extended precision</td>
+        </tr>
        <tr>
            <th scope="row">{#syntax#}f128{#endsyntax#}</th>
            <td><code class="c">_Float128</code></td>
@@ -1500,6 +1505,7 @@ fn divide(a: i32, b: i32) i32 {
          <li>{#syntax#}f16{#endsyntax#} - IEEE-754-2008 binary16</li>
          <li>{#syntax#}f32{#endsyntax#} - IEEE-754-2008 binary32</li>
          <li>{#syntax#}f64{#endsyntax#} - IEEE-754-2008 binary64</li>
+          <li>{#syntax#}f80{#endsyntax#} - IEEE-754-2008 80-bit extended precision</li>
          <li>{#syntax#}f128{#endsyntax#} - IEEE-754-2008 binary128</li>
          <li>{#syntax#}c_longdouble{#endsyntax#} - matches <code class="c">long double</code> for the target C ABI</li>
      </ul>
--- a/lib/std/math.zig
+++ b/lib/std/math.zig
@@ -43,7 +43,21 @@ pub const f128_max = @bitCast(f128, @as(u128, 0x7FFEFFFFFFFFFFFFFFFFFFFFFFFFFFFF
 pub const f128_epsilon = @bitCast(f128, @as(u128, 0x3F8F0000000000000000000000000000));
 pub const f128_toint = 1.0 / f128_epsilon;

+const F80Repr = if (@import("builtin").cpu.arch.endian() == .Little) extern struct {
+    fraction: u64,
+    exp: u16,
+} else extern struct {
+    exp: u16,
+    fraction: u64,
+};
+
 // float.h details
+pub const f80_true_min = @ptrCast(*const f80, &F80Repr{ .fraction = 1, .exp = 0 }).*;
+pub const f80_min = @ptrCast(*const f80, &F80Repr{ .fraction = 0x8000000000000000, .exp = 1 }).*;
+pub const f80_max = @ptrCast(*const f80, &F80Repr{ .fraction = 0xFFFFFFFFFFFFFFFF, .exp = 0x7FFE }).*;
+pub const f80_epsilon = @ptrCast(*const f80, &F80Repr{ .fraction = 0x8000000000000000, .exp = 0x3FC0 }).*;
+pub const f80_toint = 1.0 / f80_epsilon;
+
 pub const f64_true_min = 4.94065645841246544177e-324;
 pub const f64_min = 2.2250738585072014e-308;
 pub const f64_max = 1.79769313486231570815e+308;
@@ -91,6 +105,10 @@ pub const qnan_f64 = @bitCast(f64, qnan_u64);
 pub const inf_u64 = @as(u64, 0x7FF << 52);
 pub const inf_f64 = @bitCast(f64, inf_u64);

+pub const inf_f80 = @ptrCast(*const f80, &F80Repr{ .fraction = 0x8000000000000000, .exp = 0x7fff }).*;
+pub const nan_f80 = @ptrCast(*const f80, &F80Repr{ .fraction = 0xA000000000000000, .exp = 0x7fff }).*;
+pub const qnan_f80 = @ptrCast(*const f80, &F80Repr{ .fraction = 0xC000000000000000, .exp = 0x7fff }).*;
+
 pub const nan_u128 = @as(u128, 0x7fff0000000000000000000000000001);
 pub const nan_f128 = @bitCast(f128, nan_u128);

--- a/lib/std/math/epsilon.zig
+++ b/lib/std/math/epsilon.zig
@@ -8,6 +8,7 @@ pub fn epsilon(comptime T: type) T {
        f16 => math.f16_epsilon,
        f32 => math.f32_epsilon,
        f64 => math.f64_epsilon,
+        f80 => math.f80_epsilon,
        f128 => math.f128_epsilon,
        else => @compileError("epsilon not implemented for " ++ @typeName(T)),
    };
--- a/lib/std/math/inf.zig
+++ b/lib/std/math/inf.zig
@@ -7,6 +7,7 @@ pub fn inf(comptime T: type) T {
        f16 => math.inf_f16,
        f32 => math.inf_f32,
        f64 => math.inf_f64,
+        f80 => math.inf_f80,
        f128 => math.inf_f128,
        else => @compileError("inf not implemented for " ++ @typeName(T)),
    };
--- a/lib/std/math/nan.zig
+++ b/lib/std/math/nan.zig
@@ -6,6 +6,7 @@ pub fn nan(comptime T: type) T {
        f16 => math.nan_f16,
        f32 => math.nan_f32,
        f64 => math.nan_f64,
+        f80 => math.nan_f80,
        f128 => math.nan_f128,
        else => @compileError("nan not implemented for " ++ @typeName(T)),
    };
@@ -19,6 +20,8 @@ pub fn snan(comptime T: type) T {
        f16 => @bitCast(f16, math.nan_u16),
        f32 => @bitCast(f32, math.nan_u32),
        f64 => @bitCast(f64, math.nan_u64),
+        f80 => @bitCast(f80, math.nan_u80),
+        f128 => @bitCast(f128, math.nan_u128),
        else => @compileError("snan not implemented for " ++ @typeName(T)),
    };
 }