truncf.zig (8121B) - Raw
1 const std = @import("std"); 2 3 pub inline fn truncf(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t { 4 const src_rep_t = std.meta.Int(.unsigned, @typeInfo(src_t).float.bits); 5 const dst_rep_t = std.meta.Int(.unsigned, @typeInfo(dst_t).float.bits); 6 const srcSigBits = std.math.floatMantissaBits(src_t); 7 const dstSigBits = std.math.floatMantissaBits(dst_t); 8 9 // Various constants whose values follow from the type parameters. 10 // Any reasonable optimizer will fold and propagate all of these. 11 const srcBits = @typeInfo(src_t).float.bits; 12 const srcExpBits = srcBits - srcSigBits - 1; 13 const srcInfExp = (1 << srcExpBits) - 1; 14 const srcExpBias = srcInfExp >> 1; 15 16 const srcMinNormal = 1 << srcSigBits; 17 const srcSignificandMask = srcMinNormal - 1; 18 const srcInfinity = srcInfExp << srcSigBits; 19 const srcSignMask = 1 << (srcSigBits + srcExpBits); 20 const srcAbsMask = srcSignMask - 1; 21 const roundMask = (1 << (srcSigBits - dstSigBits)) - 1; 22 const halfway = 1 << (srcSigBits - dstSigBits - 1); 23 const srcQNaN = 1 << (srcSigBits - 1); 24 const srcNaNCode = srcQNaN - 1; 25 26 const dstBits = @typeInfo(dst_t).float.bits; 27 const dstExpBits = dstBits - dstSigBits - 1; 28 const dstInfExp = (1 << dstExpBits) - 1; 29 const dstExpBias = dstInfExp >> 1; 30 31 const underflowExponent = srcExpBias + 1 - dstExpBias; 32 const overflowExponent = srcExpBias + dstInfExp - dstExpBias; 33 const underflow = underflowExponent << srcSigBits; 34 const overflow = overflowExponent << srcSigBits; 35 36 const dstQNaN = 1 << (dstSigBits - 1); 37 const dstNaNCode = dstQNaN - 1; 38 39 // Break a into a sign and representation of the absolute value 40 const aRep: src_rep_t = @bitCast(a); 41 const aAbs: src_rep_t = aRep & srcAbsMask; 42 const sign: src_rep_t = aRep & srcSignMask; 43 var absResult: dst_rep_t = undefined; 44 45 if (aAbs -% underflow < aAbs -% overflow) { 46 // The exponent of a is within the range of normal numbers in the 47 // destination format. We can convert by simply right-shifting with 48 // rounding and adjusting the exponent. 49 absResult = @truncate(aAbs >> (srcSigBits - dstSigBits)); 50 absResult -%= @as(dst_rep_t, srcExpBias - dstExpBias) << dstSigBits; 51 52 const roundBits: src_rep_t = aAbs & roundMask; 53 if (roundBits > halfway) { 54 // Round to nearest 55 absResult += 1; 56 } else if (roundBits == halfway) { 57 // Ties to even 58 absResult += absResult & 1; 59 } 60 } else if (aAbs > srcInfinity) { 61 // a is NaN. 62 // Conjure the result by beginning with infinity, setting the qNaN 63 // bit and inserting the (truncated) trailing NaN field. 64 absResult = @as(dst_rep_t, @intCast(dstInfExp)) << dstSigBits; 65 absResult |= dstQNaN; 66 absResult |= @intCast(((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode); 67 } else if (aAbs >= overflow) { 68 // a overflows to infinity. 69 absResult = @as(dst_rep_t, @intCast(dstInfExp)) << dstSigBits; 70 } else { 71 // a underflows on conversion to the destination type or is an exact 72 // zero. The result may be a denormal or zero. Extract the exponent 73 // to get the shift amount for the denormalization. 74 const aExp: u32 = @intCast(aAbs >> srcSigBits); 75 const shift: u32 = @intCast(srcExpBias - dstExpBias - aExp + 1); 76 77 const significand: src_rep_t = (aRep & srcSignificandMask) | srcMinNormal; 78 79 // Right shift by the denormalization amount with sticky. 80 if (shift > srcSigBits) { 81 absResult = 0; 82 } else { 83 const sticky: src_rep_t = @intFromBool(significand << @intCast(srcBits - shift) != 0); 84 const denormalizedSignificand: src_rep_t = significand >> @intCast(shift) | sticky; 85 absResult = @intCast(denormalizedSignificand >> (srcSigBits - dstSigBits)); 86 const roundBits: src_rep_t = denormalizedSignificand & roundMask; 87 if (roundBits > halfway) { 88 // Round to nearest 89 absResult += 1; 90 } else if (roundBits == halfway) { 91 // Ties to even 92 absResult += absResult & 1; 93 } 94 } 95 } 96 97 const result: dst_rep_t align(@alignOf(dst_t)) = absResult | 98 @as(dst_rep_t, @truncate(sign >> @intCast(srcBits - dstBits))); 99 return @bitCast(result); 100 } 101 102 pub inline fn trunc_f80(comptime dst_t: type, a: f80) dst_t { 103 const dst_rep_t = std.meta.Int(.unsigned, @typeInfo(dst_t).float.bits); 104 const src_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit 105 const dst_sig_bits = std.math.floatMantissaBits(dst_t); 106 107 const src_exp_bias = 16383; 108 109 const round_mask = (1 << (src_sig_bits - dst_sig_bits)) - 1; 110 const halfway = 1 << (src_sig_bits - dst_sig_bits - 1); 111 112 const dst_bits = @typeInfo(dst_t).float.bits; 113 const dst_exp_bits = dst_bits - dst_sig_bits - 1; 114 const dst_inf_exp = (1 << dst_exp_bits) - 1; 115 const dst_exp_bias = dst_inf_exp >> 1; 116 117 const underflow = src_exp_bias + 1 - dst_exp_bias; 118 const overflow = src_exp_bias + dst_inf_exp - dst_exp_bias; 119 120 const dst_qnan = 1 << (dst_sig_bits - 1); 121 const dst_nan_mask = dst_qnan - 1; 122 123 // Break a into a sign and representation of the absolute value 124 var a_rep = std.math.F80.fromFloat(a); 125 const sign = a_rep.exp & 0x8000; 126 a_rep.exp &= 0x7FFF; 127 a_rep.fraction &= 0x7FFFFFFFFFFFFFFF; 128 var abs_result: dst_rep_t = undefined; 129 130 if (a_rep.exp -% underflow < a_rep.exp -% overflow) { 131 // The exponent of a is within the range of normal numbers in the 132 // destination format. We can convert by simply right-shifting with 133 // rounding and adjusting the exponent. 134 abs_result = @as(dst_rep_t, a_rep.exp) << dst_sig_bits; 135 abs_result |= @truncate(a_rep.fraction >> (src_sig_bits - dst_sig_bits)); 136 abs_result -%= @as(dst_rep_t, src_exp_bias - dst_exp_bias) << dst_sig_bits; 137 138 const round_bits = a_rep.fraction & round_mask; 139 if (round_bits > halfway) { 140 // Round to nearest 141 abs_result += 1; 142 } else if (round_bits == halfway) { 143 // Ties to even 144 abs_result += abs_result & 1; 145 } 146 } else if (a_rep.exp == 0x7FFF and a_rep.fraction != 0) { 147 // a is NaN. 148 // Conjure the result by beginning with infinity, setting the qNaN 149 // bit and inserting the (truncated) trailing NaN field. 150 abs_result = @as(dst_rep_t, @intCast(dst_inf_exp)) << dst_sig_bits; 151 abs_result |= dst_qnan; 152 abs_result |= @intCast((a_rep.fraction >> (src_sig_bits - dst_sig_bits)) & dst_nan_mask); 153 } else if (a_rep.exp >= overflow) { 154 // a overflows to infinity. 155 abs_result = @as(dst_rep_t, @intCast(dst_inf_exp)) << dst_sig_bits; 156 } else { 157 // a underflows on conversion to the destination type or is an exact 158 // zero. The result may be a denormal or zero. Extract the exponent 159 // to get the shift amount for the denormalization. 160 const shift = src_exp_bias - dst_exp_bias - a_rep.exp; 161 162 // Right shift by the denormalization amount with sticky. 163 if (shift > src_sig_bits) { 164 abs_result = 0; 165 } else { 166 const sticky = @intFromBool(a_rep.fraction << @intCast(shift) != 0); 167 const denormalized_significand = a_rep.fraction >> @intCast(shift) | sticky; 168 abs_result = @intCast(denormalized_significand >> (src_sig_bits - dst_sig_bits)); 169 const round_bits = denormalized_significand & round_mask; 170 if (round_bits > halfway) { 171 // Round to nearest 172 abs_result += 1; 173 } else if (round_bits == halfway) { 174 // Ties to even 175 abs_result += abs_result & 1; 176 } 177 } 178 } 179 180 const result align(@alignOf(dst_t)) = abs_result | @as(dst_rep_t, sign) << dst_bits - 16; 181 return @bitCast(result); 182 } 183 184 test { 185 _ = @import("truncf_test.zig"); 186 }