mulXi3.zig (3064B) - Raw
1 const builtin = @import("builtin"); 2 const std = @import("std"); 3 const testing = std.testing; 4 const common = @import("common.zig"); 5 const native_endian = builtin.cpu.arch.endian(); 6 7 pub const panic = common.panic; 8 9 comptime { 10 @export(&__mulsi3, .{ .name = "__mulsi3", .linkage = common.linkage, .visibility = common.visibility }); 11 if (common.want_aeabi) { 12 @export(&__aeabi_lmul, .{ .name = "__aeabi_lmul", .linkage = common.linkage, .visibility = common.visibility }); 13 } else { 14 @export(&__muldi3, .{ .name = "__muldi3", .linkage = common.linkage, .visibility = common.visibility }); 15 } 16 if (common.want_windows_v2u64_abi) { 17 @export(&__multi3_windows_x86_64, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility }); 18 } else { 19 @export(&__multi3, .{ .name = "__multi3", .linkage = common.linkage, .visibility = common.visibility }); 20 } 21 } 22 23 pub fn __mulsi3(a: i32, b: i32) callconv(.c) i32 { 24 var ua: u32 = @bitCast(a); 25 var ub: u32 = @bitCast(b); 26 var r: u32 = 0; 27 28 while (ua > 0) { 29 if ((ua & 1) != 0) r +%= ub; 30 ua >>= 1; 31 ub <<= 1; 32 } 33 34 return @bitCast(r); 35 } 36 37 pub fn __muldi3(a: i64, b: i64) callconv(.c) i64 { 38 return mulX(i64, a, b); 39 } 40 41 fn __aeabi_lmul(a: i64, b: i64) callconv(.{ .arm_aapcs = .{} }) i64 { 42 return mulX(i64, a, b); 43 } 44 45 inline fn mulX(comptime T: type, a: T, b: T) T { 46 const word_t = common.HalveInt(T, false); 47 const x = word_t{ .all = a }; 48 const y = word_t{ .all = b }; 49 var r = switch (T) { 50 i64, i128 => word_t{ .all = muldXi(word_t.HalfT, x.s.low, y.s.low) }, 51 else => unreachable, 52 }; 53 r.s.high +%= x.s.high *% y.s.low +% x.s.low *% y.s.high; 54 return r.all; 55 } 56 57 fn DoubleInt(comptime T: type) type { 58 return switch (T) { 59 u32 => i64, 60 u64 => i128, 61 i32 => i64, 62 i64 => i128, 63 else => unreachable, 64 }; 65 } 66 67 fn muldXi(comptime T: type, a: T, b: T) DoubleInt(T) { 68 const DT = DoubleInt(T); 69 const word_t = common.HalveInt(DT, false); 70 const bits_in_word_2 = @sizeOf(T) * 8 / 2; 71 const lower_mask = (~@as(T, 0)) >> bits_in_word_2; 72 73 var r: word_t = undefined; 74 r.s.low = (a & lower_mask) *% (b & lower_mask); 75 var t: T = r.s.low >> bits_in_word_2; 76 r.s.low &= lower_mask; 77 t += (a >> bits_in_word_2) *% (b & lower_mask); 78 r.s.low +%= (t & lower_mask) << bits_in_word_2; 79 r.s.high = t >> bits_in_word_2; 80 t = r.s.low >> bits_in_word_2; 81 r.s.low &= lower_mask; 82 t +%= (b >> bits_in_word_2) *% (a & lower_mask); 83 r.s.low +%= (t & lower_mask) << bits_in_word_2; 84 r.s.high +%= t >> bits_in_word_2; 85 r.s.high +%= (a >> bits_in_word_2) *% (b >> bits_in_word_2); 86 return r.all; 87 } 88 89 pub fn __multi3(a: i128, b: i128) callconv(.c) i128 { 90 return mulX(i128, a, b); 91 } 92 93 const v2u64 = @Vector(2, u64); 94 95 fn __multi3_windows_x86_64(a: v2u64, b: v2u64) callconv(.c) v2u64 { 96 return @bitCast(mulX(i128, @as(i128, @bitCast(a)), @as(i128, @bitCast(b)))); 97 } 98 99 test { 100 _ = @import("mulXi3_test.zig"); 101 }