extendf.zig (6009B) - Raw
1 const std = @import("std"); 2 3 pub inline fn extendf( 4 comptime dst_t: type, 5 comptime src_t: type, 6 a: std.meta.Int(.unsigned, @typeInfo(src_t).float.bits), 7 ) dst_t { 8 const src_rep_t = std.meta.Int(.unsigned, @typeInfo(src_t).float.bits); 9 const dst_rep_t = std.meta.Int(.unsigned, @typeInfo(dst_t).float.bits); 10 const srcSigBits = std.math.floatMantissaBits(src_t); 11 const dstSigBits = std.math.floatMantissaBits(dst_t); 12 13 // Various constants whose values follow from the type parameters. 14 // Any reasonable optimizer will fold and propagate all of these. 15 const srcBits = @bitSizeOf(src_t); 16 const srcExpBits = srcBits - srcSigBits - 1; 17 const srcInfExp = (1 << srcExpBits) - 1; 18 const srcExpBias = srcInfExp >> 1; 19 20 const srcMinNormal = 1 << srcSigBits; 21 const srcInfinity = srcInfExp << srcSigBits; 22 const srcSignMask = 1 << (srcSigBits + srcExpBits); 23 const srcAbsMask = srcSignMask - 1; 24 const srcQNaN = 1 << (srcSigBits - 1); 25 const srcNaNCode = srcQNaN - 1; 26 27 const dstBits = @bitSizeOf(dst_t); 28 const dstExpBits = dstBits - dstSigBits - 1; 29 const dstInfExp = (1 << dstExpBits) - 1; 30 const dstExpBias = dstInfExp >> 1; 31 32 const dstMinNormal: dst_rep_t = @as(dst_rep_t, 1) << dstSigBits; 33 34 // Break a into a sign and representation of the absolute value 35 const aRep: src_rep_t = @bitCast(a); 36 const aAbs: src_rep_t = aRep & srcAbsMask; 37 const sign: src_rep_t = aRep & srcSignMask; 38 var absResult: dst_rep_t = undefined; 39 40 if (aAbs -% srcMinNormal < srcInfinity - srcMinNormal) { 41 // a is a normal number. 42 // Extend to the destination type by shifting the significand and 43 // exponent into the proper position and rebiasing the exponent. 44 absResult = @as(dst_rep_t, aAbs) << (dstSigBits - srcSigBits); 45 absResult += (dstExpBias - srcExpBias) << dstSigBits; 46 } else if (aAbs >= srcInfinity) { 47 // a is NaN or infinity. 48 // Conjure the result by beginning with infinity, then setting the qNaN 49 // bit (if needed) and right-aligning the rest of the trailing NaN 50 // payload field. 51 absResult = dstInfExp << dstSigBits; 52 absResult |= @as(dst_rep_t, aAbs & srcQNaN) << (dstSigBits - srcSigBits); 53 absResult |= @as(dst_rep_t, aAbs & srcNaNCode) << (dstSigBits - srcSigBits); 54 } else if (aAbs != 0) { 55 // a is denormal. 56 // renormalize the significand and clear the leading bit, then insert 57 // the correct adjusted exponent in the destination type. 58 const scale: u32 = @clz(aAbs) - @clz(@as(src_rep_t, srcMinNormal)); 59 absResult = @as(dst_rep_t, aAbs) << @intCast(dstSigBits - srcSigBits + scale); 60 absResult ^= dstMinNormal; 61 const resultExponent: u32 = dstExpBias - srcExpBias - scale + 1; 62 absResult |= @as(dst_rep_t, @intCast(resultExponent)) << dstSigBits; 63 } else { 64 // a is zero. 65 absResult = 0; 66 } 67 68 // Apply the signbit to (dst_t)abs(a). 69 const result: dst_rep_t align(@alignOf(dst_t)) = absResult | @as(dst_rep_t, sign) << (dstBits - srcBits); 70 return @bitCast(result); 71 } 72 73 pub inline fn extend_f80(comptime src_t: type, a: std.meta.Int(.unsigned, @typeInfo(src_t).float.bits)) f80 { 74 const src_rep_t = std.meta.Int(.unsigned, @typeInfo(src_t).float.bits); 75 const src_sig_bits = std.math.floatMantissaBits(src_t); 76 const dst_int_bit = 0x8000000000000000; 77 const dst_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit 78 79 const dst_exp_bias = 16383; 80 81 const src_bits = @bitSizeOf(src_t); 82 const src_exp_bits = src_bits - src_sig_bits - 1; 83 const src_inf_exp = (1 << src_exp_bits) - 1; 84 const src_exp_bias = src_inf_exp >> 1; 85 86 const src_min_normal = 1 << src_sig_bits; 87 const src_inf = src_inf_exp << src_sig_bits; 88 const src_sign_mask = 1 << (src_sig_bits + src_exp_bits); 89 const src_abs_mask = src_sign_mask - 1; 90 const src_qnan = 1 << (src_sig_bits - 1); 91 const src_nan_code = src_qnan - 1; 92 93 var dst: std.math.F80 = undefined; 94 95 // Break a into a sign and representation of the absolute value 96 const a_abs = a & src_abs_mask; 97 const sign: u16 = if (a & src_sign_mask != 0) 0x8000 else 0; 98 99 if (a_abs -% src_min_normal < src_inf - src_min_normal) { 100 // a is a normal number. 101 // Extend to the destination type by shifting the significand and 102 // exponent into the proper position and rebiasing the exponent. 103 dst.exp = @intCast(a_abs >> src_sig_bits); 104 dst.exp += dst_exp_bias - src_exp_bias; 105 dst.fraction = @as(u64, a_abs) << (dst_sig_bits - src_sig_bits); 106 dst.fraction |= dst_int_bit; // bit 64 is always set for normal numbers 107 } else if (a_abs >= src_inf) { 108 // a is NaN or infinity. 109 // Conjure the result by beginning with infinity, then setting the qNaN 110 // bit (if needed) and right-aligning the rest of the trailing NaN 111 // payload field. 112 dst.exp = 0x7fff; 113 dst.fraction = dst_int_bit; 114 dst.fraction |= @as(u64, a_abs & src_qnan) << (dst_sig_bits - src_sig_bits); 115 dst.fraction |= @as(u64, a_abs & src_nan_code) << (dst_sig_bits - src_sig_bits); 116 } else if (a_abs != 0) { 117 // a is denormal. 118 // renormalize the significand and clear the leading bit, then insert 119 // the correct adjusted exponent in the destination type. 120 const scale: u16 = @clz(a_abs) - @clz(@as(src_rep_t, src_min_normal)); 121 122 dst.fraction = @as(u64, a_abs) << @intCast(dst_sig_bits - src_sig_bits + scale); 123 dst.fraction |= dst_int_bit; // bit 64 is always set for normal numbers 124 dst.exp = @truncate(a_abs >> @intCast(src_sig_bits - scale)); 125 dst.exp ^= 1; 126 dst.exp |= dst_exp_bias - src_exp_bias - scale + 1; 127 } else { 128 // a is zero. 129 dst.exp = 0; 130 dst.fraction = 0; 131 } 132 133 dst.exp |= sign; 134 return dst.toFloat(); 135 } 136 137 test { 138 _ = @import("extendf_test.zig"); 139 }