mulo.zig (2643B) - Raw
1 const std = @import("std"); 2 const builtin = @import("builtin"); 3 const math = std.math; 4 const common = @import("common.zig"); 5 6 pub const panic = common.panic; 7 8 comptime { 9 @export(&__mulosi4, .{ .name = "__mulosi4", .linkage = common.linkage, .visibility = common.visibility }); 10 @export(&__mulodi4, .{ .name = "__mulodi4", .linkage = common.linkage, .visibility = common.visibility }); 11 @export(&__muloti4, .{ .name = "__muloti4", .linkage = common.linkage, .visibility = common.visibility }); 12 } 13 14 // mulo - multiplication overflow 15 // * return a*%b. 16 // * return if a*b overflows => 1 else => 0 17 // - muloXi4_genericSmall as default 18 // - muloXi4_genericFast for 2*bitsize <= usize 19 20 inline fn muloXi4_genericSmall(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { 21 overflow.* = 0; 22 const min = math.minInt(ST); 23 const res: ST = a *% b; 24 // Hacker's Delight section Overflow subsection Multiplication 25 // case a=-2^{31}, b=-1 problem, because 26 // on some machines a*b = -2^{31} with overflow 27 // Then -2^{31}/-1 overflows and any result is possible. 28 // => check with a<0 and b=-2^{31} 29 if ((a < 0 and b == min) or (a != 0 and @divTrunc(res, a) != b)) 30 overflow.* = 1; 31 return res; 32 } 33 34 inline fn muloXi4_genericFast(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { 35 overflow.* = 0; 36 const EST = switch (ST) { 37 i32 => i64, 38 i64 => i128, 39 i128 => i256, 40 else => unreachable, 41 }; 42 const min = math.minInt(ST); 43 const max = math.maxInt(ST); 44 const res: EST = @as(EST, a) * @as(EST, b); 45 //invariant: -2^{bitwidth(EST)} < res < 2^{bitwidth(EST)-1} 46 if (res < min or max < res) 47 overflow.* = 1; 48 return @as(ST, @truncate(res)); 49 } 50 51 pub fn __mulosi4(a: i32, b: i32, overflow: *c_int) callconv(.c) i32 { 52 if (2 * @bitSizeOf(i32) <= @bitSizeOf(usize)) { 53 return muloXi4_genericFast(i32, a, b, overflow); 54 } else { 55 return muloXi4_genericSmall(i32, a, b, overflow); 56 } 57 } 58 59 pub fn __mulodi4(a: i64, b: i64, overflow: *c_int) callconv(.c) i64 { 60 if (2 * @bitSizeOf(i64) <= @bitSizeOf(usize)) { 61 return muloXi4_genericFast(i64, a, b, overflow); 62 } else { 63 return muloXi4_genericSmall(i64, a, b, overflow); 64 } 65 } 66 67 pub fn __muloti4(a: i128, b: i128, overflow: *c_int) callconv(.c) i128 { 68 if (2 * @bitSizeOf(i128) <= @bitSizeOf(usize)) { 69 return muloXi4_genericFast(i128, a, b, overflow); 70 } else { 71 return muloXi4_genericSmall(i128, a, b, overflow); 72 } 73 } 74 75 test { 76 _ = @import("mulosi4_test.zig"); 77 _ = @import("mulodi4_test.zig"); 78 _ = @import("muloti4_test.zig"); 79 }