zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

count0bits.zig (7394B) - Raw


      1 const std = @import("std");
      2 const builtin = @import("builtin");
      3 const common = @import("common.zig");
      4 
      5 pub const panic = common.panic;
      6 
      7 comptime {
      8     @export(&__clzsi2, .{ .name = "__clzsi2", .linkage = common.linkage, .visibility = common.visibility });
      9     @export(&__clzdi2, .{ .name = "__clzdi2", .linkage = common.linkage, .visibility = common.visibility });
     10     @export(&__clzti2, .{ .name = "__clzti2", .linkage = common.linkage, .visibility = common.visibility });
     11     @export(&__ctzsi2, .{ .name = "__ctzsi2", .linkage = common.linkage, .visibility = common.visibility });
     12     @export(&__ctzdi2, .{ .name = "__ctzdi2", .linkage = common.linkage, .visibility = common.visibility });
     13     @export(&__ctzti2, .{ .name = "__ctzti2", .linkage = common.linkage, .visibility = common.visibility });
     14     @export(&__ffssi2, .{ .name = "__ffssi2", .linkage = common.linkage, .visibility = common.visibility });
     15     @export(&__ffsdi2, .{ .name = "__ffsdi2", .linkage = common.linkage, .visibility = common.visibility });
     16     @export(&__ffsti2, .{ .name = "__ffsti2", .linkage = common.linkage, .visibility = common.visibility });
     17 }
     18 
     19 // clz - count leading zeroes
     20 // - clzXi2 for unoptimized little and big endian
     21 // - __clzsi2_thumb1: assume a != 0
     22 // - __clzsi2_arm32: assume a != 0
     23 
     24 // ctz - count trailing zeroes
     25 // - ctzXi2 for unoptimized little and big endian
     26 
     27 // ffs - find first set
     28 // * ffs = (a == 0) => 0, (a != 0) => ctz + 1
     29 // * dont pay for `if (x == 0) return shift;` inside ctz
     30 // - ffsXi2 for unoptimized little and big endian
     31 
     32 inline fn clzXi2(comptime T: type, a: T) i32 {
     33     var x = switch (@bitSizeOf(T)) {
     34         32 => @as(u32, @bitCast(a)),
     35         64 => @as(u64, @bitCast(a)),
     36         128 => @as(u128, @bitCast(a)),
     37         else => unreachable,
     38     };
     39     var n: T = @bitSizeOf(T);
     40     // Count first bit set using binary search, from Hacker's Delight
     41     var y: @TypeOf(x) = 0;
     42     comptime var shift: u8 = @bitSizeOf(T);
     43     inline while (shift > 0) {
     44         shift = shift >> 1;
     45         y = x >> shift;
     46         if (y != 0) {
     47             n = n - shift;
     48             x = y;
     49         }
     50     }
     51     return @intCast(n - @as(T, @bitCast(x)));
     52 }
     53 
     54 fn __clzsi2_thumb1() callconv(.naked) void {
     55     @setRuntimeSafety(false);
     56 
     57     // Similar to the generic version with the last two rounds replaced by a LUT
     58     asm volatile (
     59         \\ movs r1, #32
     60         \\ lsrs r2, r0, #16
     61         \\ beq 1f
     62         \\ subs r1, #16
     63         \\ movs r0, r2
     64         \\ 1:
     65         \\ lsrs r2, r0, #8
     66         \\ beq 1f
     67         \\ subs r1, #8
     68         \\ movs r0, r2
     69         \\ 1:
     70         \\ lsrs r2, r0, #4
     71         \\ beq 1f
     72         \\ subs r1, #4
     73         \\ movs r0, r2
     74         \\ 1:
     75         \\ adr r3, .lut
     76         \\ ldrb r0, [r3, r0]
     77         \\ subs r0, r1, r0
     78         \\ bx lr
     79         \\ .p2align 2
     80         \\ // Number of bits set in the 0-15 range
     81         \\ .lut:
     82         \\ .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4
     83     );
     84 
     85     unreachable;
     86 }
     87 
     88 fn __clzsi2_arm32() callconv(.naked) void {
     89     @setRuntimeSafety(false);
     90 
     91     asm volatile (
     92         \\ // Assumption: n != 0
     93         \\ // r0: n
     94         \\ // r1: count of leading zeros in n + 1
     95         \\ // r2: scratch register for shifted r0
     96         \\ mov r1, #1
     97         \\
     98         \\ // Basic block:
     99         \\ // if ((r0 >> SHIFT) == 0)
    100         \\ //   r1 += SHIFT;
    101         \\ // else
    102         \\ //   r0 >>= SHIFT;
    103         \\ // for descending powers of two as SHIFT.
    104         \\ lsrs r2, r0, #16
    105         \\ movne r0, r2
    106         \\ addeq r1, #16
    107         \\
    108         \\ lsrs r2, r0, #8
    109         \\ movne r0, r2
    110         \\ addeq r1, #8
    111         \\
    112         \\ lsrs r2, r0, #4
    113         \\ movne r0, r2
    114         \\ addeq r1, #4
    115         \\
    116         \\ lsrs r2, r0, #2
    117         \\ movne r0, r2
    118         \\ addeq r1, #2
    119         \\
    120         \\ // The basic block invariants at this point are (r0 >> 2) == 0 and
    121         \\ // r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
    122         \\ //
    123         \\ // r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)f
    124         \\ // ---+----------------+----------------+------------+--------------
    125         \\ // 1  | 1              | 0              | 0          | 1
    126         \\ // 2  | 0              | 1              | -1         | 0
    127         \\ // 3  | 0              | 1              | -1         | 0
    128         \\ //
    129         \\ // The r1's initial value of 1 compensates for the 1 here.
    130         \\ sub r0, r1, r0, lsr #1
    131         \\ bx lr
    132     );
    133 
    134     unreachable;
    135 }
    136 
    137 fn clzsi2_generic(a: i32) callconv(.c) i32 {
    138     return clzXi2(i32, a);
    139 }
    140 
    141 pub const __clzsi2 = switch (builtin.cpu.arch) {
    142     .arm, .armeb, .thumb, .thumbeb => impl: {
    143         const use_thumb1 =
    144             (builtin.cpu.arch.isThumb() or builtin.cpu.has(.arm, .noarm)) and !builtin.cpu.has(.arm, .thumb2);
    145 
    146         if (use_thumb1) {
    147             break :impl __clzsi2_thumb1;
    148         }
    149         // From here on we're either targeting Thumb2 or ARM.
    150         else if (!builtin.cpu.arch.isThumb()) {
    151             break :impl __clzsi2_arm32;
    152         }
    153         // Use the generic implementation otherwise.
    154         else break :impl clzsi2_generic;
    155     },
    156     else => clzsi2_generic,
    157 };
    158 
    159 pub fn __clzdi2(a: i64) callconv(.c) i32 {
    160     return clzXi2(i64, a);
    161 }
    162 
    163 pub fn __clzti2(a: i128) callconv(.c) i32 {
    164     return clzXi2(i128, a);
    165 }
    166 
    167 inline fn ctzXi2(comptime T: type, a: T) i32 {
    168     var x = switch (@bitSizeOf(T)) {
    169         32 => @as(u32, @bitCast(a)),
    170         64 => @as(u64, @bitCast(a)),
    171         128 => @as(u128, @bitCast(a)),
    172         else => unreachable,
    173     };
    174     var n: T = 1;
    175     // Number of trailing zeroes as binary search, from Hacker's Delight
    176     var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x));
    177     comptime var shift = @bitSizeOf(T);
    178     if (x == 0) return shift;
    179     inline while (shift > 1) {
    180         shift = shift >> 1;
    181         mask = mask >> shift;
    182         if ((x & mask) == 0) {
    183             n = n + shift;
    184             x = x >> shift;
    185         }
    186     }
    187     return @intCast(n - @as(T, @bitCast((x & 1))));
    188 }
    189 
    190 pub fn __ctzsi2(a: i32) callconv(.c) i32 {
    191     return ctzXi2(i32, a);
    192 }
    193 
    194 pub fn __ctzdi2(a: i64) callconv(.c) i32 {
    195     return ctzXi2(i64, a);
    196 }
    197 
    198 pub fn __ctzti2(a: i128) callconv(.c) i32 {
    199     return ctzXi2(i128, a);
    200 }
    201 
    202 inline fn ffsXi2(comptime T: type, a: T) i32 {
    203     var x: std.meta.Int(.unsigned, @typeInfo(T).int.bits) = @bitCast(a);
    204     var n: T = 1;
    205     // adapted from Number of trailing zeroes (see ctzXi2)
    206     var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x));
    207     comptime var shift = @bitSizeOf(T);
    208     // In contrast to ctz return 0
    209     if (x == 0) return 0;
    210     inline while (shift > 1) {
    211         shift = shift >> 1;
    212         mask = mask >> shift;
    213         if ((x & mask) == 0) {
    214             n = n + shift;
    215             x = x >> shift;
    216         }
    217     }
    218     // return ctz + 1
    219     return @as(i32, @intCast(n - @as(T, @bitCast((x & 1))))) + 1;
    220 }
    221 
    222 pub fn __ffssi2(a: i32) callconv(.c) i32 {
    223     return ffsXi2(i32, a);
    224 }
    225 
    226 pub fn __ffsdi2(a: i64) callconv(.c) i32 {
    227     return ffsXi2(i64, a);
    228 }
    229 
    230 pub fn __ffsti2(a: i128) callconv(.c) i32 {
    231     return ffsXi2(i128, a);
    232 }
    233 
    234 test {
    235     _ = @import("clzsi2_test.zig");
    236     _ = @import("clzdi2_test.zig");
    237     _ = @import("clzti2_test.zig");
    238 
    239     _ = @import("ctzsi2_test.zig");
    240     _ = @import("ctzdi2_test.zig");
    241     _ = @import("ctzti2_test.zig");
    242 
    243     _ = @import("ffssi2_test.zig");
    244     _ = @import("ffsdi2_test.zig");
    245     _ = @import("ffsti2_test.zig");
    246 }