zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

truncf.zig (8121B) - Raw


      1 const std = @import("std");
      2 
      3 pub inline fn truncf(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t {
      4     const src_rep_t = std.meta.Int(.unsigned, @typeInfo(src_t).float.bits);
      5     const dst_rep_t = std.meta.Int(.unsigned, @typeInfo(dst_t).float.bits);
      6     const srcSigBits = std.math.floatMantissaBits(src_t);
      7     const dstSigBits = std.math.floatMantissaBits(dst_t);
      8 
      9     // Various constants whose values follow from the type parameters.
     10     // Any reasonable optimizer will fold and propagate all of these.
     11     const srcBits = @typeInfo(src_t).float.bits;
     12     const srcExpBits = srcBits - srcSigBits - 1;
     13     const srcInfExp = (1 << srcExpBits) - 1;
     14     const srcExpBias = srcInfExp >> 1;
     15 
     16     const srcMinNormal = 1 << srcSigBits;
     17     const srcSignificandMask = srcMinNormal - 1;
     18     const srcInfinity = srcInfExp << srcSigBits;
     19     const srcSignMask = 1 << (srcSigBits + srcExpBits);
     20     const srcAbsMask = srcSignMask - 1;
     21     const roundMask = (1 << (srcSigBits - dstSigBits)) - 1;
     22     const halfway = 1 << (srcSigBits - dstSigBits - 1);
     23     const srcQNaN = 1 << (srcSigBits - 1);
     24     const srcNaNCode = srcQNaN - 1;
     25 
     26     const dstBits = @typeInfo(dst_t).float.bits;
     27     const dstExpBits = dstBits - dstSigBits - 1;
     28     const dstInfExp = (1 << dstExpBits) - 1;
     29     const dstExpBias = dstInfExp >> 1;
     30 
     31     const underflowExponent = srcExpBias + 1 - dstExpBias;
     32     const overflowExponent = srcExpBias + dstInfExp - dstExpBias;
     33     const underflow = underflowExponent << srcSigBits;
     34     const overflow = overflowExponent << srcSigBits;
     35 
     36     const dstQNaN = 1 << (dstSigBits - 1);
     37     const dstNaNCode = dstQNaN - 1;
     38 
     39     // Break a into a sign and representation of the absolute value
     40     const aRep: src_rep_t = @bitCast(a);
     41     const aAbs: src_rep_t = aRep & srcAbsMask;
     42     const sign: src_rep_t = aRep & srcSignMask;
     43     var absResult: dst_rep_t = undefined;
     44 
     45     if (aAbs -% underflow < aAbs -% overflow) {
     46         // The exponent of a is within the range of normal numbers in the
     47         // destination format.  We can convert by simply right-shifting with
     48         // rounding and adjusting the exponent.
     49         absResult = @truncate(aAbs >> (srcSigBits - dstSigBits));
     50         absResult -%= @as(dst_rep_t, srcExpBias - dstExpBias) << dstSigBits;
     51 
     52         const roundBits: src_rep_t = aAbs & roundMask;
     53         if (roundBits > halfway) {
     54             // Round to nearest
     55             absResult += 1;
     56         } else if (roundBits == halfway) {
     57             // Ties to even
     58             absResult += absResult & 1;
     59         }
     60     } else if (aAbs > srcInfinity) {
     61         // a is NaN.
     62         // Conjure the result by beginning with infinity, setting the qNaN
     63         // bit and inserting the (truncated) trailing NaN field.
     64         absResult = @as(dst_rep_t, @intCast(dstInfExp)) << dstSigBits;
     65         absResult |= dstQNaN;
     66         absResult |= @intCast(((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode);
     67     } else if (aAbs >= overflow) {
     68         // a overflows to infinity.
     69         absResult = @as(dst_rep_t, @intCast(dstInfExp)) << dstSigBits;
     70     } else {
     71         // a underflows on conversion to the destination type or is an exact
     72         // zero.  The result may be a denormal or zero.  Extract the exponent
     73         // to get the shift amount for the denormalization.
     74         const aExp: u32 = @intCast(aAbs >> srcSigBits);
     75         const shift: u32 = @intCast(srcExpBias - dstExpBias - aExp + 1);
     76 
     77         const significand: src_rep_t = (aRep & srcSignificandMask) | srcMinNormal;
     78 
     79         // Right shift by the denormalization amount with sticky.
     80         if (shift > srcSigBits) {
     81             absResult = 0;
     82         } else {
     83             const sticky: src_rep_t = @intFromBool(significand << @intCast(srcBits - shift) != 0);
     84             const denormalizedSignificand: src_rep_t = significand >> @intCast(shift) | sticky;
     85             absResult = @intCast(denormalizedSignificand >> (srcSigBits - dstSigBits));
     86             const roundBits: src_rep_t = denormalizedSignificand & roundMask;
     87             if (roundBits > halfway) {
     88                 // Round to nearest
     89                 absResult += 1;
     90             } else if (roundBits == halfway) {
     91                 // Ties to even
     92                 absResult += absResult & 1;
     93             }
     94         }
     95     }
     96 
     97     const result: dst_rep_t align(@alignOf(dst_t)) = absResult |
     98         @as(dst_rep_t, @truncate(sign >> @intCast(srcBits - dstBits)));
     99     return @bitCast(result);
    100 }
    101 
    102 pub inline fn trunc_f80(comptime dst_t: type, a: f80) dst_t {
    103     const dst_rep_t = std.meta.Int(.unsigned, @typeInfo(dst_t).float.bits);
    104     const src_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit
    105     const dst_sig_bits = std.math.floatMantissaBits(dst_t);
    106 
    107     const src_exp_bias = 16383;
    108 
    109     const round_mask = (1 << (src_sig_bits - dst_sig_bits)) - 1;
    110     const halfway = 1 << (src_sig_bits - dst_sig_bits - 1);
    111 
    112     const dst_bits = @typeInfo(dst_t).float.bits;
    113     const dst_exp_bits = dst_bits - dst_sig_bits - 1;
    114     const dst_inf_exp = (1 << dst_exp_bits) - 1;
    115     const dst_exp_bias = dst_inf_exp >> 1;
    116 
    117     const underflow = src_exp_bias + 1 - dst_exp_bias;
    118     const overflow = src_exp_bias + dst_inf_exp - dst_exp_bias;
    119 
    120     const dst_qnan = 1 << (dst_sig_bits - 1);
    121     const dst_nan_mask = dst_qnan - 1;
    122 
    123     // Break a into a sign and representation of the absolute value
    124     var a_rep = std.math.F80.fromFloat(a);
    125     const sign = a_rep.exp & 0x8000;
    126     a_rep.exp &= 0x7FFF;
    127     a_rep.fraction &= 0x7FFFFFFFFFFFFFFF;
    128     var abs_result: dst_rep_t = undefined;
    129 
    130     if (a_rep.exp -% underflow < a_rep.exp -% overflow) {
    131         // The exponent of a is within the range of normal numbers in the
    132         // destination format.  We can convert by simply right-shifting with
    133         // rounding and adjusting the exponent.
    134         abs_result = @as(dst_rep_t, a_rep.exp) << dst_sig_bits;
    135         abs_result |= @truncate(a_rep.fraction >> (src_sig_bits - dst_sig_bits));
    136         abs_result -%= @as(dst_rep_t, src_exp_bias - dst_exp_bias) << dst_sig_bits;
    137 
    138         const round_bits = a_rep.fraction & round_mask;
    139         if (round_bits > halfway) {
    140             // Round to nearest
    141             abs_result += 1;
    142         } else if (round_bits == halfway) {
    143             // Ties to even
    144             abs_result += abs_result & 1;
    145         }
    146     } else if (a_rep.exp == 0x7FFF and a_rep.fraction != 0) {
    147         // a is NaN.
    148         // Conjure the result by beginning with infinity, setting the qNaN
    149         // bit and inserting the (truncated) trailing NaN field.
    150         abs_result = @as(dst_rep_t, @intCast(dst_inf_exp)) << dst_sig_bits;
    151         abs_result |= dst_qnan;
    152         abs_result |= @intCast((a_rep.fraction >> (src_sig_bits - dst_sig_bits)) & dst_nan_mask);
    153     } else if (a_rep.exp >= overflow) {
    154         // a overflows to infinity.
    155         abs_result = @as(dst_rep_t, @intCast(dst_inf_exp)) << dst_sig_bits;
    156     } else {
    157         // a underflows on conversion to the destination type or is an exact
    158         // zero.  The result may be a denormal or zero.  Extract the exponent
    159         // to get the shift amount for the denormalization.
    160         const shift = src_exp_bias - dst_exp_bias - a_rep.exp;
    161 
    162         // Right shift by the denormalization amount with sticky.
    163         if (shift > src_sig_bits) {
    164             abs_result = 0;
    165         } else {
    166             const sticky = @intFromBool(a_rep.fraction << @intCast(shift) != 0);
    167             const denormalized_significand = a_rep.fraction >> @intCast(shift) | sticky;
    168             abs_result = @intCast(denormalized_significand >> (src_sig_bits - dst_sig_bits));
    169             const round_bits = denormalized_significand & round_mask;
    170             if (round_bits > halfway) {
    171                 // Round to nearest
    172                 abs_result += 1;
    173             } else if (round_bits == halfway) {
    174                 // Ties to even
    175                 abs_result += abs_result & 1;
    176             }
    177         }
    178     }
    179 
    180     const result align(@alignOf(dst_t)) = abs_result | @as(dst_rep_t, sign) << dst_bits - 16;
    181     return @bitCast(result);
    182 }
    183 
    184 test {
    185     _ = @import("truncf_test.zig");
    186 }