blob 2bb40fc2 (6133B) - Raw
1 const std = @import("std"); 2 3 pub inline fn extendf( 4 comptime dst_t: type, 5 comptime src_t: type, 6 a: std.meta.Int(.unsigned, @typeInfo(src_t).Float.bits), 7 ) dst_t { 8 const src_rep_t = std.meta.Int(.unsigned, @typeInfo(src_t).Float.bits); 9 const dst_rep_t = std.meta.Int(.unsigned, @typeInfo(dst_t).Float.bits); 10 const srcSigBits = std.math.floatMantissaBits(src_t); 11 const dstSigBits = std.math.floatMantissaBits(dst_t); 12 const DstShift = std.math.Log2Int(dst_rep_t); 13 14 // Various constants whose values follow from the type parameters. 15 // Any reasonable optimizer will fold and propagate all of these. 16 const srcBits = @bitSizeOf(src_t); 17 const srcExpBits = srcBits - srcSigBits - 1; 18 const srcInfExp = (1 << srcExpBits) - 1; 19 const srcExpBias = srcInfExp >> 1; 20 21 const srcMinNormal = 1 << srcSigBits; 22 const srcInfinity = srcInfExp << srcSigBits; 23 const srcSignMask = 1 << (srcSigBits + srcExpBits); 24 const srcAbsMask = srcSignMask - 1; 25 const srcQNaN = 1 << (srcSigBits - 1); 26 const srcNaNCode = srcQNaN - 1; 27 28 const dstBits = @bitSizeOf(dst_t); 29 const dstExpBits = dstBits - dstSigBits - 1; 30 const dstInfExp = (1 << dstExpBits) - 1; 31 const dstExpBias = dstInfExp >> 1; 32 33 const dstMinNormal: dst_rep_t = @as(dst_rep_t, 1) << dstSigBits; 34 35 // Break a into a sign and representation of the absolute value 36 const aRep: src_rep_t = @bitCast(src_rep_t, a); 37 const aAbs: src_rep_t = aRep & srcAbsMask; 38 const sign: src_rep_t = aRep & srcSignMask; 39 var absResult: dst_rep_t = undefined; 40 41 if (aAbs -% srcMinNormal < srcInfinity - srcMinNormal) { 42 // a is a normal number. 43 // Extend to the destination type by shifting the significand and 44 // exponent into the proper position and rebiasing the exponent. 45 absResult = @as(dst_rep_t, aAbs) << (dstSigBits - srcSigBits); 46 absResult += (dstExpBias - srcExpBias) << dstSigBits; 47 } else if (aAbs >= srcInfinity) { 48 // a is NaN or infinity. 49 // Conjure the result by beginning with infinity, then setting the qNaN 50 // bit (if needed) and right-aligning the rest of the trailing NaN 51 // payload field. 52 absResult = dstInfExp << dstSigBits; 53 absResult |= @as(dst_rep_t, aAbs & srcQNaN) << (dstSigBits - srcSigBits); 54 absResult |= @as(dst_rep_t, aAbs & srcNaNCode) << (dstSigBits - srcSigBits); 55 } else if (aAbs != 0) { 56 // a is denormal. 57 // renormalize the significand and clear the leading bit, then insert 58 // the correct adjusted exponent in the destination type. 59 const scale: u32 = @clz(aAbs) - 60 @clz(@as(src_rep_t, srcMinNormal)); 61 absResult = @as(dst_rep_t, aAbs) << @intCast(DstShift, dstSigBits - srcSigBits + scale); 62 absResult ^= dstMinNormal; 63 const resultExponent: u32 = dstExpBias - srcExpBias - scale + 1; 64 absResult |= @intCast(dst_rep_t, resultExponent) << dstSigBits; 65 } else { 66 // a is zero. 67 absResult = 0; 68 } 69 70 // Apply the signbit to (dst_t)abs(a). 71 const result: dst_rep_t align(@alignOf(dst_t)) = absResult | @as(dst_rep_t, sign) << (dstBits - srcBits); 72 return @bitCast(dst_t, result); 73 } 74 75 pub inline fn extend_f80(comptime src_t: type, a: std.meta.Int(.unsigned, @typeInfo(src_t).Float.bits)) f80 { 76 const src_rep_t = std.meta.Int(.unsigned, @typeInfo(src_t).Float.bits); 77 const src_sig_bits = std.math.floatMantissaBits(src_t); 78 const dst_int_bit = 0x8000000000000000; 79 const dst_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit 80 81 const dst_exp_bias = 16383; 82 83 const src_bits = @bitSizeOf(src_t); 84 const src_exp_bits = src_bits - src_sig_bits - 1; 85 const src_inf_exp = (1 << src_exp_bits) - 1; 86 const src_exp_bias = src_inf_exp >> 1; 87 88 const src_min_normal = 1 << src_sig_bits; 89 const src_inf = src_inf_exp << src_sig_bits; 90 const src_sign_mask = 1 << (src_sig_bits + src_exp_bits); 91 const src_abs_mask = src_sign_mask - 1; 92 const src_qnan = 1 << (src_sig_bits - 1); 93 const src_nan_code = src_qnan - 1; 94 95 var dst: std.math.F80 = undefined; 96 97 // Break a into a sign and representation of the absolute value 98 const a_abs = a & src_abs_mask; 99 const sign: u16 = if (a & src_sign_mask != 0) 0x8000 else 0; 100 101 if (a_abs -% src_min_normal < src_inf - src_min_normal) { 102 // a is a normal number. 103 // Extend to the destination type by shifting the significand and 104 // exponent into the proper position and rebiasing the exponent. 105 dst.exp = @intCast(u16, a_abs >> src_sig_bits); 106 dst.exp += dst_exp_bias - src_exp_bias; 107 dst.fraction = @as(u64, a_abs) << (dst_sig_bits - src_sig_bits); 108 dst.fraction |= dst_int_bit; // bit 64 is always set for normal numbers 109 } else if (a_abs >= src_inf) { 110 // a is NaN or infinity. 111 // Conjure the result by beginning with infinity, then setting the qNaN 112 // bit (if needed) and right-aligning the rest of the trailing NaN 113 // payload field. 114 dst.exp = 0x7fff; 115 dst.fraction = dst_int_bit; 116 dst.fraction |= @as(u64, a_abs & src_qnan) << (dst_sig_bits - src_sig_bits); 117 dst.fraction |= @as(u64, a_abs & src_nan_code) << (dst_sig_bits - src_sig_bits); 118 } else if (a_abs != 0) { 119 // a is denormal. 120 // renormalize the significand and clear the leading bit, then insert 121 // the correct adjusted exponent in the destination type. 122 const scale: u16 = @clz(a_abs) - 123 @clz(@as(src_rep_t, src_min_normal)); 124 125 dst.fraction = @as(u64, a_abs) << @intCast(u6, dst_sig_bits - src_sig_bits + scale); 126 dst.fraction |= dst_int_bit; // bit 64 is always set for normal numbers 127 dst.exp = @truncate(u16, a_abs >> @intCast(u4, src_sig_bits - scale)); 128 dst.exp ^= 1; 129 dst.exp |= dst_exp_bias - src_exp_bias - scale + 1; 130 } else { 131 // a is zero. 132 dst.exp = 0; 133 dst.fraction = 0; 134 } 135 136 dst.exp |= sign; 137 return std.math.make_f80(dst); 138 } 139 140 test { 141 _ = @import("extendf_test.zig"); 142 }