aullrem.zig (2618B) - Raw
1 const std = @import("std"); 2 const builtin = @import("builtin"); 3 const arch = builtin.cpu.arch; 4 const os = builtin.os.tag; 5 const abi = builtin.abi; 6 const common = @import("common.zig"); 7 8 pub const panic = common.panic; 9 10 comptime { 11 if (arch == .x86 and common.want_windows_msvc_or_itanium_abi and !builtin.link_libc) { 12 // Don't let LLVM apply the stdcall name mangling on those MSVC builtins 13 @export(&_allrem, .{ .name = "\x01__allrem", .linkage = common.linkage, .visibility = common.visibility }); 14 @export(&_aullrem, .{ .name = "\x01__aullrem", .linkage = common.linkage, .visibility = common.visibility }); 15 } 16 } 17 18 pub fn _allrem(a: i64, b: i64) callconv(.{ .x86_stdcall = .{} }) i64 { 19 const s_a = a >> (64 - 1); 20 const s_b = b >> (64 - 1); 21 22 const an = (a ^ s_a) -% s_a; 23 const bn = (b ^ s_b) -% s_b; 24 25 const r = @as(u64, @bitCast(an)) % @as(u64, @bitCast(bn)); 26 const s = s_a ^ s_b; 27 return (@as(i64, @bitCast(r)) ^ s) -% s; 28 } 29 30 pub fn _aullrem() callconv(.naked) void { 31 @setRuntimeSafety(false); 32 33 // The stack layout is: 34 // ESP+16 divisor (hi) 35 // ESP+12 divisor (low) 36 // ESP+8 dividend (hi) 37 // ESP+4 dividend (low) 38 // ESP return address 39 40 asm volatile ( 41 \\ push %%ebx 42 \\ mov 0x14(%%esp),%%eax 43 \\ or %%eax,%%eax 44 \\ jne 1f 45 \\ mov 0x10(%%esp),%%ecx 46 \\ mov 0xc(%%esp),%%eax 47 \\ xor %%edx,%%edx 48 \\ div %%ecx 49 \\ mov 0x8(%%esp),%%eax 50 \\ div %%ecx 51 \\ mov %%edx,%%eax 52 \\ xor %%edx,%%edx 53 \\ jmp 6f 54 \\ 1: 55 \\ mov %%eax,%%ecx 56 \\ mov 0x10(%%esp),%%ebx 57 \\ mov 0xc(%%esp),%%edx 58 \\ mov 0x8(%%esp),%%eax 59 \\ 2: 60 \\ shr %%ecx 61 \\ rcr %%ebx 62 \\ shr %%edx 63 \\ rcr %%eax 64 \\ or %%ecx,%%ecx 65 \\ jne 2b 66 \\ div %%ebx 67 \\ mov %%eax,%%ecx 68 \\ mull 0x14(%%esp) 69 \\ xchg %%eax,%%ecx 70 \\ mull 0x10(%%esp) 71 \\ add %%ecx,%%edx 72 \\ jb 3f 73 \\ cmp 0xc(%%esp),%%edx 74 \\ ja 3f 75 \\ jb 4f 76 \\ cmp 0x8(%%esp),%%eax 77 \\ jbe 4f 78 \\ 3: 79 \\ sub 0x10(%%esp),%%eax 80 \\ sbb 0x14(%%esp),%%edx 81 \\ 4: 82 \\ sub 0x8(%%esp),%%eax 83 \\ sbb 0xc(%%esp),%%edx 84 \\ neg %%edx 85 \\ neg %%eax 86 \\ sbb $0x0,%%edx 87 \\ 6: 88 \\ pop %%ebx 89 \\ ret $0x10 90 ); 91 }