aulldiv.zig (2563B) - Raw
1 const std = @import("std"); 2 const builtin = @import("builtin"); 3 const arch = builtin.cpu.arch; 4 const os = builtin.os.tag; 5 const abi = builtin.abi; 6 const common = @import("common.zig"); 7 8 pub const panic = common.panic; 9 10 comptime { 11 if (arch == .x86 and common.want_windows_msvc_or_itanium_abi and !builtin.link_libc) { 12 // Don't let LLVM apply the stdcall name mangling on those MSVC builtins 13 @export(&_alldiv, .{ .name = "\x01__alldiv", .linkage = common.linkage, .visibility = common.visibility }); 14 @export(&_aulldiv, .{ .name = "\x01__aulldiv", .linkage = common.linkage, .visibility = common.visibility }); 15 } 16 } 17 18 pub fn _alldiv(a: i64, b: i64) callconv(.{ .x86_stdcall = .{} }) i64 { 19 const s_a = a >> (64 - 1); 20 const s_b = b >> (64 - 1); 21 22 const an = (a ^ s_a) -% s_a; 23 const bn = (b ^ s_b) -% s_b; 24 25 const r = @as(u64, @bitCast(an)) / @as(u64, @bitCast(bn)); 26 const s = s_a ^ s_b; 27 return (@as(i64, @bitCast(r)) ^ s) -% s; 28 } 29 30 pub fn _aulldiv() callconv(.naked) void { 31 @setRuntimeSafety(false); 32 33 // The stack layout is: 34 // ESP+16 divisor (hi) 35 // ESP+12 divisor (low) 36 // ESP+8 dividend (hi) 37 // ESP+4 dividend (low) 38 // ESP return address 39 40 asm volatile ( 41 \\ push %%ebx 42 \\ push %%esi 43 \\ mov 0x18(%%esp),%%eax 44 \\ or %%eax,%%eax 45 \\ jne 1f 46 \\ mov 0x14(%%esp),%%ecx 47 \\ mov 0x10(%%esp),%%eax 48 \\ xor %%edx,%%edx 49 \\ div %%ecx 50 \\ mov %%eax,%%ebx 51 \\ mov 0xc(%%esp),%%eax 52 \\ div %%ecx 53 \\ mov %%ebx,%%edx 54 \\ jmp 5f 55 \\ 1: 56 \\ mov %%eax,%%ecx 57 \\ mov 0x14(%%esp),%%ebx 58 \\ mov 0x10(%%esp),%%edx 59 \\ mov 0xc(%%esp),%%eax 60 \\ 2: 61 \\ shr %%ecx 62 \\ rcr %%ebx 63 \\ shr %%edx 64 \\ rcr %%eax 65 \\ or %%ecx,%%ecx 66 \\ jne 2b 67 \\ div %%ebx 68 \\ mov %%eax,%%esi 69 \\ mull 0x18(%%esp) 70 \\ mov %%eax,%%ecx 71 \\ mov 0x14(%%esp),%%eax 72 \\ mul %%esi 73 \\ add %%ecx,%%edx 74 \\ jb 3f 75 \\ cmp 0x10(%%esp),%%edx 76 \\ ja 3f 77 \\ jb 4f 78 \\ cmp 0xc(%%esp),%%eax 79 \\ jbe 4f 80 \\ 3: 81 \\ dec %%esi 82 \\ 4: 83 \\ xor %%edx,%%edx 84 \\ mov %%esi,%%eax 85 \\ 5: 86 \\ pop %%esi 87 \\ pop %%ebx 88 \\ ret $0x10 89 ); 90 }