commit 0ebd270d907a3fb7e2e600891fe2f455b7b2d4ad (tree)
parent 58944586beca0a771ed03a9e6b1f281d9e23cc57
Author: Pavel Verigo <paul.verigo@gmail.com>
Date: Tue, 7 Apr 2026 00:08:17 +0200
stage2-wasm: bigint mulo
Diffstat:
5 files changed, 193 insertions(+), 66 deletions(-)
diff --git a/lib/compiler_rt/limb64.zig b/lib/compiler_rt/limb64.zig
@@ -839,3 +839,141 @@ test __byteswap_limb64 {
try test__byteswap_limb64(i128, 1 << 56, 1 << 64);
try test__byteswap_limb64(i248, minInt(i248), 128);
}
+
+comptime {
+ symbol(&__mulo_limb64, "__mulo_limb64");
+}
+
+inline fn add3(x: *[3]u64, start: usize, v0: u64) void {
+ var i = start;
+ var v = v0;
+ while (i < 3) : (i += 1) {
+ const s = @addWithOverflow(x[i], v);
+ x[i] = s[0];
+ if (s[1] == 0) break;
+ v = 1;
+ }
+}
+
+fn mulwide(a: u64, b: u64) [2]u64 {
+ const muldXi = @import("mulXi3.zig").muldXi;
+ return @bitCast(muldXi(u64, a, b));
+}
+
+fn __mulo_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, b_ptr: [*]const u64, is_signed: bool, bits: u16) callconv(.c) bool {
+ const limb_cnt = limbCount(bits);
+
+ const out = out_ptr[0..limb_cnt];
+ const a = a_ptr[0..limb_cnt];
+ const b = b_ptr[0..limb_cnt];
+
+ @memset(out, 0);
+
+ const all_ones = ~@as(u64, 0);
+ const a_neg = is_signed and ((limbGet(a, limb_cnt - 1) >> 63) != 0);
+ const b_neg = is_signed and ((limbGet(b, limb_cnt - 1) >> 63) != 0);
+
+ var carry: [3]u64 = @splat(0);
+ var hi_zero = true;
+ var hi_ones = true;
+ var hi_borrow: u1 = 0;
+ var raw_last: u64 = 0;
+
+ var k: usize = 0;
+ while (k < 2 * limb_cnt) : (k += 1) {
+ var acc = carry;
+
+ var i: usize = if (k < limb_cnt) 0 else k - (limb_cnt - 1);
+ while (i < limb_cnt and i <= k) : (i += 1) {
+ const j = k - i;
+ if (j >= limb_cnt) continue;
+
+ const p = mulwide(limbGet(a, i), limbGet(b, j));
+ add3(&acc, 0, p[0]);
+ add3(&acc, 1, p[1]);
+ }
+
+ var limb = acc[0];
+ if (k < limb_cnt) {
+ limbSet(out, k, limb);
+ if (k == limb_cnt - 1) raw_last = limb;
+ } else {
+ if (is_signed) {
+ const h = k - limb_cnt;
+
+ const s0 = @subWithOverflow(limb, if (a_neg) limbGet(b, h) else 0);
+ const s1 = @subWithOverflow(s0[0], if (b_neg) limbGet(a, h) else 0);
+ const s2 = @subWithOverflow(s1[0], hi_borrow);
+
+ limb = s2[0];
+ hi_borrow = @intFromBool(s0[1] != 0 or s1[1] != 0 or s2[1] != 0);
+ }
+
+ hi_zero = hi_zero and limb == 0;
+ hi_ones = hi_ones and limb == all_ones;
+ }
+
+ carry = .{ acc[1], acc[2], 0 };
+ }
+
+ const last = if (bits % 64 == 0) raw_last else limbWrap(raw_last, is_signed, bits);
+ if (bits % 64 != 0) {
+ limbSet(out, limb_cnt - 1, last);
+ }
+
+ if (!is_signed) {
+ return !hi_zero or raw_last != last;
+ }
+
+ const sign_extend: u64 = if ((last >> 63) == 1) all_ones else 0;
+ return (raw_last != last) or if (sign_extend == 0) !hi_zero else !hi_ones;
+}
+
+fn test__mulo_limb64(comptime T: type, a: T, b: T, expected: struct { T, bool }) !void {
+ const int_info = @typeInfo(T).int;
+ const is_signed = int_info.signedness == .signed;
+
+ var a_limbs = asLimbs(a);
+ var b_limbs = asLimbs(b);
+ var out: Limbs(T) = undefined;
+ const overflow = __mulo_limb64(&out, &a_limbs, &b_limbs, is_signed, int_info.bits);
+
+ const expected_limbs = asLimbs(expected[0]);
+ try testing.expectEqual(expected_limbs, out);
+ try testing.expectEqual(expected[1], overflow);
+}
+
+test __mulo_limb64 {
+ try test__mulo_limb64(u64, 3, 5, .{ 15, false });
+ try test__mulo_limb64(u64, maxInt(u64), 2, .{ maxInt(u64) - 1, true });
+ try test__mulo_limb64(u65, 1 << 32, 1 << 32, .{ 1 << 64, false });
+ try test__mulo_limb64(u65, 1 << 64, 2, .{ 0, true });
+ try test__mulo_limb64(u128, 1 << 80, 1 << 40, .{ 1 << 120, false });
+ try test__mulo_limb64(u128, 1 << 100, 1 << 40, .{ 0, true });
+ try test__mulo_limb64(u255, 7, 9, .{ 63, false });
+ try test__mulo_limb64(u255, maxInt(u255), 2, .{ maxInt(u255) - 1, true });
+
+ try test__mulo_limb64(i64, -3, 2, .{ -6, false });
+ try test__mulo_limb64(i64, maxInt(i64), 2, .{ -2, true });
+ try test__mulo_limb64(i65, 1 << 63, 2, .{ minInt(i65), true });
+ try test__mulo_limb64(i65, -1 << 32, 1 << 16, .{ -1 << 48, false });
+ try test__mulo_limb64(i128, 1 << 100, 1 << 27, .{ minInt(i128), true });
+ try test__mulo_limb64(i128, -1 << 80, 1 << 40, .{ -1 << 120, false });
+ try test__mulo_limb64(i255, -3, 2, .{ -6, false });
+ try test__mulo_limb64(i255, maxInt(i255), 2, .{ -2, true });
+
+ try test__mulo_limb64(u200, 0, maxInt(u200), .{ 0, false });
+ try test__mulo_limb64(u200, 1, maxInt(u200), .{ maxInt(u200), false });
+ try test__mulo_limb64(u200, 1 << 100, 1 << 99, .{ 1 << 199, false });
+ try test__mulo_limb64(u200, 1 << 100, 1 << 100, .{ 0, true });
+ try test__mulo_limb64(u200, maxInt(u200), maxInt(u200), .{ 1, true });
+
+ try test__mulo_limb64(i200, 0, -1, .{ 0, false });
+ try test__mulo_limb64(i200, -1, -1, .{ 1, false });
+ try test__mulo_limb64(i200, -1, minInt(i200), .{ minInt(i200), true });
+ try test__mulo_limb64(i200, maxInt(i200), 2, .{ -2, true });
+ try test__mulo_limb64(i200, 1 << 100, 1 << 98, .{ 1 << 198, false });
+ try test__mulo_limb64(i200, 1 << 100, 1 << 99, .{ minInt(i200), true });
+ try test__mulo_limb64(i200, maxInt(i200), maxInt(i200), .{ 1, true });
+ try test__mulo_limb64(i200, minInt(i200), minInt(i200), .{ 0, true });
+}
diff --git a/lib/compiler_rt/mulXi3.zig b/lib/compiler_rt/mulXi3.zig
@@ -63,7 +63,7 @@ fn DoubleInt(comptime T: type) type {
};
}
-fn muldXi(comptime T: type, a: T, b: T) DoubleInt(T) {
+pub fn muldXi(comptime T: type, a: T, b: T) DoubleInt(T) {
const DT = DoubleInt(T);
const word_t = compiler_rt.HalveInt(DT, false);
const bits_in_word_2 = @sizeOf(T) * 8 / 2;
diff --git a/src/codegen/wasm/CodeGen.zig b/src/codegen/wasm/CodeGen.zig
@@ -2480,7 +2480,19 @@ fn intMul(cg: *CodeGen, ty: IntType, lhs: WValue, rhs: WValue) InnerError!WValue
return .stack;
},
65...128 => return cg.callIntrinsic(.__multi3, &.{ .i128_type, .i128_type }, Type.i128, &.{ lhs, rhs }),
- else => return cg.fail("TODO: Support intMul for integer bitsize: {d}", .{ty.bits}),
+ else => {
+ const result = try cg.allocInt(ty);
+
+ try cg.lowerToStack(result);
+ try cg.lowerToStack(lhs);
+ try cg.lowerToStack(rhs);
+ try cg.addImm32(@intFromBool(ty.is_signed));
+ try cg.addImm32(ty.bits);
+ try cg.addCallIntrinsic(.__mulo_limb64);
+ try cg.addTag(.drop);
+
+ return result;
+ },
}
}
@@ -3680,68 +3692,6 @@ fn intMulOverflow(cg: *CodeGen, int_ty: IntType, lhs: WValue, rhs: WValue) Inner
_ = try cg.intCmp(new_ty, .neq, res_upcast, bin_op);
try cg.addLocal(.local_set, overflow_bit.local.value);
break :blk res_tmp;
- } else if (int_ty.bits == 128 and !int_ty.is_signed) blk: {
- var lhs_lsb = try (try cg.load(lhs, Type.u64, 0)).toLocal(cg, Type.u64);
- defer lhs_lsb.free(cg);
- var lhs_msb = try (try cg.load(lhs, Type.u64, 8)).toLocal(cg, Type.u64);
- defer lhs_msb.free(cg);
- var rhs_lsb = try (try cg.load(rhs, Type.u64, 0)).toLocal(cg, Type.u64);
- defer rhs_lsb.free(cg);
- var rhs_msb = try (try cg.load(rhs, Type.u64, 8)).toLocal(cg, Type.u64);
- defer rhs_msb.free(cg);
-
- const zero: WValue = .{ .imm64 = 0 };
-
- const cross_1 = try cg.callIntrinsic(
- .__multi3,
- &[_]InternPool.Index{.i64_type} ** 4,
- Type.i128,
- &.{ lhs_msb, zero, rhs_lsb, zero },
- );
- const cross_2 = try cg.callIntrinsic(
- .__multi3,
- &[_]InternPool.Index{.i64_type} ** 4,
- Type.i128,
- &.{ rhs_msb, zero, lhs_lsb, zero },
- );
- const mul_lsb = try cg.callIntrinsic(
- .__multi3,
- &[_]InternPool.Index{.i64_type} ** 4,
- Type.i128,
- &.{ rhs_lsb, zero, lhs_lsb, zero },
- );
-
- const rhs_msb_not_zero = try cg.intCmp(.u64, .neq, rhs_msb, zero);
- const lhs_msb_not_zero = try cg.intCmp(.u64, .neq, lhs_msb, zero);
- const both_msb_not_zero = try cg.intAnd(.u32, rhs_msb_not_zero, lhs_msb_not_zero);
-
- const cross_1_msb = try cg.load(cross_1, .u64, 8);
- const cross_1_msb_not_zero = try cg.intCmp(.u64, .neq, cross_1_msb, zero);
- const cond_1 = try cg.intOr(.u32, both_msb_not_zero, cross_1_msb_not_zero);
-
- const cross_2_msb = try cg.load(cross_2, Type.u64, 8);
- const cross_2_msb_not_zero = try cg.intCmp(.u64, .neq, cross_2_msb, zero);
- const cond_2 = try cg.intOr(.u32, cond_1, cross_2_msb_not_zero);
-
- const cross_1_lsb = try cg.load(cross_1, Type.u64, 0);
- const cross_2_lsb = try cg.load(cross_2, Type.u64, 0);
- const cross_add = try cg.intAdd(.u64, cross_1_lsb, cross_2_lsb);
-
- var mul_lsb_msb = try (try cg.load(mul_lsb, Type.u64, 8)).toLocal(cg, Type.u64);
- defer mul_lsb_msb.free(cg);
- var all_add = try (try cg.intAdd(.u64, cross_add, mul_lsb_msb)).toLocal(cg, Type.u64);
- defer all_add.free(cg);
- const add_overflow = try cg.intCmp(.u64, .lt, all_add, mul_lsb_msb);
-
- _ = try cg.intOr(.u32, cond_2, add_overflow);
- try cg.addLocal(.local_set, overflow_bit.local.value);
-
- const tmp_result = try cg.allocStack(Type.u128);
- try cg.emitWValue(tmp_result);
- const mul_lsb_lsb = try cg.load(mul_lsb, Type.u64, 0);
- try cg.store(.stack, mul_lsb_lsb, Type.u64, tmp_result.offset());
- try cg.store(tmp_result, all_add, Type.u64, 8);
- break :blk tmp_result;
} else if (int_ty.bits == 128 and int_ty.is_signed) blk: {
const overflow_ret = try cg.allocStack(Type.i32);
const res = try cg.callIntrinsic(
@@ -3753,7 +3703,18 @@ fn intMulOverflow(cg: *CodeGen, int_ty: IntType, lhs: WValue, rhs: WValue) Inner
_ = try cg.load(overflow_ret, Type.i32, 0);
try cg.addLocal(.local_set, overflow_bit.local.value);
break :blk res;
- } else return cg.fail("TODO: intMulOverflow for bitsize {d}", .{int_ty.bits});
+ } else {
+ const result = try cg.allocInt(int_ty);
+
+ try cg.lowerToStack(result);
+ try cg.lowerToStack(lhs);
+ try cg.lowerToStack(rhs);
+ try cg.addImm32(@intFromBool(int_ty.is_signed));
+ try cg.addImm32(int_ty.bits);
+ try cg.addCallIntrinsic(.__mulo_limb64);
+
+ return .{ .result = result, .ov = .stack };
+ };
return .{ .result = result_val, .ov = .{ .local = overflow_bit.local } };
}
diff --git a/src/codegen/wasm/Mir.zig b/src/codegen/wasm/Mir.zig
@@ -1018,4 +1018,5 @@ pub const Intrinsic = enum(u32) {
__popcount_limb64,
__bitreverse_limb64,
__byteswap_limb64,
+ __mulo_limb64,
};
diff --git a/test/behavior/math.zig b/test/behavior/math.zig
@@ -1100,10 +1100,37 @@ test "@mulWithOverflow bitsize 128 bits" {
try testMulWithOverflow(i128, -1 << 63, -1 << 64, -1 << 127, 1);
}
+test "@mulWithOverflow > 128 bits" {
+ if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
+
+ try testMulWithOverflow(u140, 0, maxInt(u140), 0, 0);
+ try testMulWithOverflow(u140, 1, maxInt(u140), maxInt(u140), 0);
+ try testMulWithOverflow(u140, 1 << 70, 1 << 69, 1 << 139, 0);
+ try testMulWithOverflow(u140, 1 << 70, 1 << 70, 0, 1);
+
+ try testMulWithOverflow(u200, 1 << 100, 1 << 99, 1 << 199, 0);
+ try testMulWithOverflow(u200, 1 << 100, 1 << 100, 0, 1);
+ try testMulWithOverflow(u200, maxInt(u200), maxInt(u200), 1, 1);
+ try testMulWithOverflow(u200, maxInt(u200) - 1, 2, maxInt(u200) - 3, 1);
+
+ try testMulWithOverflow(i140, 0, -1, 0, 0);
+ try testMulWithOverflow(i140, -1, -1, 1, 0);
+ try testMulWithOverflow(i140, 1 << 69, 1 << 69, 1 << 138, 0);
+ try testMulWithOverflow(i140, 1 << 69, 1 << 70, minInt(i140), 1);
+ try testMulWithOverflow(i140, -1 << 70, 1 << 20, -1 << 90, 0);
+ try testMulWithOverflow(i140, minInt(i140), -1, minInt(i140), 1);
+
+ try testMulWithOverflow(i200, 1 << 100, 1 << 98, 1 << 198, 0);
+ try testMulWithOverflow(i200, 1 << 100, 1 << 99, minInt(i200), 1);
+ try testMulWithOverflow(i200, -1 << 120, 1 << 30, -1 << 150, 0);
+ try testMulWithOverflow(i200, minInt(i200), minInt(i200), 0, 1);
+ try testMulWithOverflow(i200, maxInt(i200), 2, -2, 1);
+ try testMulWithOverflow(i200, maxInt(i200), maxInt(i200), 1, 1);
+}
+
test "@mulWithOverflow bitsize 256 bits" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;