zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

aullrem.zig (2618B) - Raw


      1 const std = @import("std");
      2 const builtin = @import("builtin");
      3 const arch = builtin.cpu.arch;
      4 const os = builtin.os.tag;
      5 const abi = builtin.abi;
      6 const common = @import("common.zig");
      7 
      8 pub const panic = common.panic;
      9 
     10 comptime {
     11     if (arch == .x86 and common.want_windows_msvc_or_itanium_abi and !builtin.link_libc) {
     12         // Don't let LLVM apply the stdcall name mangling on those MSVC builtins
     13         @export(&_allrem, .{ .name = "\x01__allrem", .linkage = common.linkage, .visibility = common.visibility });
     14         @export(&_aullrem, .{ .name = "\x01__aullrem", .linkage = common.linkage, .visibility = common.visibility });
     15     }
     16 }
     17 
     18 pub fn _allrem(a: i64, b: i64) callconv(.{ .x86_stdcall = .{} }) i64 {
     19     const s_a = a >> (64 - 1);
     20     const s_b = b >> (64 - 1);
     21 
     22     const an = (a ^ s_a) -% s_a;
     23     const bn = (b ^ s_b) -% s_b;
     24 
     25     const r = @as(u64, @bitCast(an)) % @as(u64, @bitCast(bn));
     26     const s = s_a ^ s_b;
     27     return (@as(i64, @bitCast(r)) ^ s) -% s;
     28 }
     29 
     30 pub fn _aullrem() callconv(.naked) void {
     31     @setRuntimeSafety(false);
     32 
     33     // The stack layout is:
     34     // ESP+16 divisor (hi)
     35     // ESP+12 divisor (low)
     36     // ESP+8 dividend (hi)
     37     // ESP+4 dividend (low)
     38     // ESP   return address
     39 
     40     asm volatile (
     41         \\  push   %%ebx
     42         \\  mov    0x14(%%esp),%%eax
     43         \\  or     %%eax,%%eax
     44         \\  jne    1f
     45         \\  mov    0x10(%%esp),%%ecx
     46         \\  mov    0xc(%%esp),%%eax
     47         \\  xor    %%edx,%%edx
     48         \\  div    %%ecx
     49         \\  mov    0x8(%%esp),%%eax
     50         \\  div    %%ecx
     51         \\  mov    %%edx,%%eax
     52         \\  xor    %%edx,%%edx
     53         \\  jmp    6f
     54         \\ 1:
     55         \\  mov    %%eax,%%ecx
     56         \\  mov    0x10(%%esp),%%ebx
     57         \\  mov    0xc(%%esp),%%edx
     58         \\  mov    0x8(%%esp),%%eax
     59         \\ 2:
     60         \\  shr    %%ecx
     61         \\  rcr    %%ebx
     62         \\  shr    %%edx
     63         \\  rcr    %%eax
     64         \\  or     %%ecx,%%ecx
     65         \\  jne    2b
     66         \\  div    %%ebx
     67         \\  mov    %%eax,%%ecx
     68         \\  mull   0x14(%%esp)
     69         \\  xchg   %%eax,%%ecx
     70         \\  mull   0x10(%%esp)
     71         \\  add    %%ecx,%%edx
     72         \\  jb     3f
     73         \\  cmp    0xc(%%esp),%%edx
     74         \\  ja     3f
     75         \\  jb     4f
     76         \\  cmp    0x8(%%esp),%%eax
     77         \\  jbe    4f
     78         \\ 3:
     79         \\  sub    0x10(%%esp),%%eax
     80         \\  sbb    0x14(%%esp),%%edx
     81         \\ 4:
     82         \\  sub    0x8(%%esp),%%eax
     83         \\  sbb    0xc(%%esp),%%edx
     84         \\  neg    %%edx
     85         \\  neg    %%eax
     86         \\  sbb    $0x0,%%edx
     87         \\ 6:
     88         \\  pop    %%ebx
     89         \\  ret    $0x10
     90     );
     91 }