count0bits.zig (7394B) - Raw
1 const std = @import("std"); 2 const builtin = @import("builtin"); 3 const common = @import("common.zig"); 4 5 pub const panic = common.panic; 6 7 comptime { 8 @export(&__clzsi2, .{ .name = "__clzsi2", .linkage = common.linkage, .visibility = common.visibility }); 9 @export(&__clzdi2, .{ .name = "__clzdi2", .linkage = common.linkage, .visibility = common.visibility }); 10 @export(&__clzti2, .{ .name = "__clzti2", .linkage = common.linkage, .visibility = common.visibility }); 11 @export(&__ctzsi2, .{ .name = "__ctzsi2", .linkage = common.linkage, .visibility = common.visibility }); 12 @export(&__ctzdi2, .{ .name = "__ctzdi2", .linkage = common.linkage, .visibility = common.visibility }); 13 @export(&__ctzti2, .{ .name = "__ctzti2", .linkage = common.linkage, .visibility = common.visibility }); 14 @export(&__ffssi2, .{ .name = "__ffssi2", .linkage = common.linkage, .visibility = common.visibility }); 15 @export(&__ffsdi2, .{ .name = "__ffsdi2", .linkage = common.linkage, .visibility = common.visibility }); 16 @export(&__ffsti2, .{ .name = "__ffsti2", .linkage = common.linkage, .visibility = common.visibility }); 17 } 18 19 // clz - count leading zeroes 20 // - clzXi2 for unoptimized little and big endian 21 // - __clzsi2_thumb1: assume a != 0 22 // - __clzsi2_arm32: assume a != 0 23 24 // ctz - count trailing zeroes 25 // - ctzXi2 for unoptimized little and big endian 26 27 // ffs - find first set 28 // * ffs = (a == 0) => 0, (a != 0) => ctz + 1 29 // * dont pay for `if (x == 0) return shift;` inside ctz 30 // - ffsXi2 for unoptimized little and big endian 31 32 inline fn clzXi2(comptime T: type, a: T) i32 { 33 var x = switch (@bitSizeOf(T)) { 34 32 => @as(u32, @bitCast(a)), 35 64 => @as(u64, @bitCast(a)), 36 128 => @as(u128, @bitCast(a)), 37 else => unreachable, 38 }; 39 var n: T = @bitSizeOf(T); 40 // Count first bit set using binary search, from Hacker's Delight 41 var y: @TypeOf(x) = 0; 42 comptime var shift: u8 = @bitSizeOf(T); 43 inline while (shift > 0) { 44 shift = shift >> 1; 45 y = x >> shift; 46 if (y != 0) { 47 n = n - shift; 48 x = y; 49 } 50 } 51 return @intCast(n - @as(T, @bitCast(x))); 52 } 53 54 fn __clzsi2_thumb1() callconv(.naked) void { 55 @setRuntimeSafety(false); 56 57 // Similar to the generic version with the last two rounds replaced by a LUT 58 asm volatile ( 59 \\ movs r1, #32 60 \\ lsrs r2, r0, #16 61 \\ beq 1f 62 \\ subs r1, #16 63 \\ movs r0, r2 64 \\ 1: 65 \\ lsrs r2, r0, #8 66 \\ beq 1f 67 \\ subs r1, #8 68 \\ movs r0, r2 69 \\ 1: 70 \\ lsrs r2, r0, #4 71 \\ beq 1f 72 \\ subs r1, #4 73 \\ movs r0, r2 74 \\ 1: 75 \\ adr r3, .lut 76 \\ ldrb r0, [r3, r0] 77 \\ subs r0, r1, r0 78 \\ bx lr 79 \\ .p2align 2 80 \\ // Number of bits set in the 0-15 range 81 \\ .lut: 82 \\ .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 83 ); 84 85 unreachable; 86 } 87 88 fn __clzsi2_arm32() callconv(.naked) void { 89 @setRuntimeSafety(false); 90 91 asm volatile ( 92 \\ // Assumption: n != 0 93 \\ // r0: n 94 \\ // r1: count of leading zeros in n + 1 95 \\ // r2: scratch register for shifted r0 96 \\ mov r1, #1 97 \\ 98 \\ // Basic block: 99 \\ // if ((r0 >> SHIFT) == 0) 100 \\ // r1 += SHIFT; 101 \\ // else 102 \\ // r0 >>= SHIFT; 103 \\ // for descending powers of two as SHIFT. 104 \\ lsrs r2, r0, #16 105 \\ movne r0, r2 106 \\ addeq r1, #16 107 \\ 108 \\ lsrs r2, r0, #8 109 \\ movne r0, r2 110 \\ addeq r1, #8 111 \\ 112 \\ lsrs r2, r0, #4 113 \\ movne r0, r2 114 \\ addeq r1, #4 115 \\ 116 \\ lsrs r2, r0, #2 117 \\ movne r0, r2 118 \\ addeq r1, #2 119 \\ 120 \\ // The basic block invariants at this point are (r0 >> 2) == 0 and 121 \\ // r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. 122 \\ // 123 \\ // r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)f 124 \\ // ---+----------------+----------------+------------+-------------- 125 \\ // 1 | 1 | 0 | 0 | 1 126 \\ // 2 | 0 | 1 | -1 | 0 127 \\ // 3 | 0 | 1 | -1 | 0 128 \\ // 129 \\ // The r1's initial value of 1 compensates for the 1 here. 130 \\ sub r0, r1, r0, lsr #1 131 \\ bx lr 132 ); 133 134 unreachable; 135 } 136 137 fn clzsi2_generic(a: i32) callconv(.c) i32 { 138 return clzXi2(i32, a); 139 } 140 141 pub const __clzsi2 = switch (builtin.cpu.arch) { 142 .arm, .armeb, .thumb, .thumbeb => impl: { 143 const use_thumb1 = 144 (builtin.cpu.arch.isThumb() or builtin.cpu.has(.arm, .noarm)) and !builtin.cpu.has(.arm, .thumb2); 145 146 if (use_thumb1) { 147 break :impl __clzsi2_thumb1; 148 } 149 // From here on we're either targeting Thumb2 or ARM. 150 else if (!builtin.cpu.arch.isThumb()) { 151 break :impl __clzsi2_arm32; 152 } 153 // Use the generic implementation otherwise. 154 else break :impl clzsi2_generic; 155 }, 156 else => clzsi2_generic, 157 }; 158 159 pub fn __clzdi2(a: i64) callconv(.c) i32 { 160 return clzXi2(i64, a); 161 } 162 163 pub fn __clzti2(a: i128) callconv(.c) i32 { 164 return clzXi2(i128, a); 165 } 166 167 inline fn ctzXi2(comptime T: type, a: T) i32 { 168 var x = switch (@bitSizeOf(T)) { 169 32 => @as(u32, @bitCast(a)), 170 64 => @as(u64, @bitCast(a)), 171 128 => @as(u128, @bitCast(a)), 172 else => unreachable, 173 }; 174 var n: T = 1; 175 // Number of trailing zeroes as binary search, from Hacker's Delight 176 var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x)); 177 comptime var shift = @bitSizeOf(T); 178 if (x == 0) return shift; 179 inline while (shift > 1) { 180 shift = shift >> 1; 181 mask = mask >> shift; 182 if ((x & mask) == 0) { 183 n = n + shift; 184 x = x >> shift; 185 } 186 } 187 return @intCast(n - @as(T, @bitCast((x & 1)))); 188 } 189 190 pub fn __ctzsi2(a: i32) callconv(.c) i32 { 191 return ctzXi2(i32, a); 192 } 193 194 pub fn __ctzdi2(a: i64) callconv(.c) i32 { 195 return ctzXi2(i64, a); 196 } 197 198 pub fn __ctzti2(a: i128) callconv(.c) i32 { 199 return ctzXi2(i128, a); 200 } 201 202 inline fn ffsXi2(comptime T: type, a: T) i32 { 203 var x: std.meta.Int(.unsigned, @typeInfo(T).int.bits) = @bitCast(a); 204 var n: T = 1; 205 // adapted from Number of trailing zeroes (see ctzXi2) 206 var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x)); 207 comptime var shift = @bitSizeOf(T); 208 // In contrast to ctz return 0 209 if (x == 0) return 0; 210 inline while (shift > 1) { 211 shift = shift >> 1; 212 mask = mask >> shift; 213 if ((x & mask) == 0) { 214 n = n + shift; 215 x = x >> shift; 216 } 217 } 218 // return ctz + 1 219 return @as(i32, @intCast(n - @as(T, @bitCast((x & 1))))) + 1; 220 } 221 222 pub fn __ffssi2(a: i32) callconv(.c) i32 { 223 return ffsXi2(i32, a); 224 } 225 226 pub fn __ffsdi2(a: i64) callconv(.c) i32 { 227 return ffsXi2(i64, a); 228 } 229 230 pub fn __ffsti2(a: i128) callconv(.c) i32 { 231 return ffsXi2(i128, a); 232 } 233 234 test { 235 _ = @import("clzsi2_test.zig"); 236 _ = @import("clzdi2_test.zig"); 237 _ = @import("clzti2_test.zig"); 238 239 _ = @import("ctzsi2_test.zig"); 240 _ = @import("ctzdi2_test.zig"); 241 _ = @import("ctzti2_test.zig"); 242 243 _ = @import("ffssi2_test.zig"); 244 _ = @import("ffsdi2_test.zig"); 245 _ = @import("ffsti2_test.zig"); 246 }