zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

aulldiv.zig (2563B) - Raw


      1 const std = @import("std");
      2 const builtin = @import("builtin");
      3 const arch = builtin.cpu.arch;
      4 const os = builtin.os.tag;
      5 const abi = builtin.abi;
      6 const common = @import("common.zig");
      7 
      8 pub const panic = common.panic;
      9 
     10 comptime {
     11     if (arch == .x86 and common.want_windows_msvc_or_itanium_abi and !builtin.link_libc) {
     12         // Don't let LLVM apply the stdcall name mangling on those MSVC builtins
     13         @export(&_alldiv, .{ .name = "\x01__alldiv", .linkage = common.linkage, .visibility = common.visibility });
     14         @export(&_aulldiv, .{ .name = "\x01__aulldiv", .linkage = common.linkage, .visibility = common.visibility });
     15     }
     16 }
     17 
     18 pub fn _alldiv(a: i64, b: i64) callconv(.{ .x86_stdcall = .{} }) i64 {
     19     const s_a = a >> (64 - 1);
     20     const s_b = b >> (64 - 1);
     21 
     22     const an = (a ^ s_a) -% s_a;
     23     const bn = (b ^ s_b) -% s_b;
     24 
     25     const r = @as(u64, @bitCast(an)) / @as(u64, @bitCast(bn));
     26     const s = s_a ^ s_b;
     27     return (@as(i64, @bitCast(r)) ^ s) -% s;
     28 }
     29 
     30 pub fn _aulldiv() callconv(.naked) void {
     31     @setRuntimeSafety(false);
     32 
     33     // The stack layout is:
     34     // ESP+16 divisor (hi)
     35     // ESP+12 divisor (low)
     36     // ESP+8 dividend (hi)
     37     // ESP+4 dividend (low)
     38     // ESP   return address
     39 
     40     asm volatile (
     41         \\  push   %%ebx
     42         \\  push   %%esi
     43         \\  mov    0x18(%%esp),%%eax
     44         \\  or     %%eax,%%eax
     45         \\  jne    1f
     46         \\  mov    0x14(%%esp),%%ecx
     47         \\  mov    0x10(%%esp),%%eax
     48         \\  xor    %%edx,%%edx
     49         \\  div    %%ecx
     50         \\  mov    %%eax,%%ebx
     51         \\  mov    0xc(%%esp),%%eax
     52         \\  div    %%ecx
     53         \\  mov    %%ebx,%%edx
     54         \\  jmp    5f
     55         \\ 1:
     56         \\  mov    %%eax,%%ecx
     57         \\  mov    0x14(%%esp),%%ebx
     58         \\  mov    0x10(%%esp),%%edx
     59         \\  mov    0xc(%%esp),%%eax
     60         \\ 2:
     61         \\  shr    %%ecx
     62         \\  rcr    %%ebx
     63         \\  shr    %%edx
     64         \\  rcr    %%eax
     65         \\  or     %%ecx,%%ecx
     66         \\  jne    2b
     67         \\  div    %%ebx
     68         \\  mov    %%eax,%%esi
     69         \\  mull   0x18(%%esp)
     70         \\  mov    %%eax,%%ecx
     71         \\  mov    0x14(%%esp),%%eax
     72         \\  mul    %%esi
     73         \\  add    %%ecx,%%edx
     74         \\  jb     3f
     75         \\  cmp    0x10(%%esp),%%edx
     76         \\  ja     3f
     77         \\  jb     4f
     78         \\  cmp    0xc(%%esp),%%eax
     79         \\  jbe    4f
     80         \\ 3:
     81         \\  dec    %%esi
     82         \\ 4:
     83         \\  xor    %%edx,%%edx
     84         \\  mov    %%esi,%%eax
     85         \\ 5:
     86         \\  pop    %%esi
     87         \\  pop    %%ebx
     88         \\  ret    $0x10
     89     );
     90 }