Optimized std.mem.eql with SIMD (#18389)
* optimized memeql * add `sched_setaffinity` to `std.os.linux` Co-authored-by: Protty <45520026+kprotty@users.noreply.github.com> Co-authored-by: Ryan Liptak <squeek502@hotmail.com>
This commit is contained in:
@@ -634,14 +634,77 @@ test "lessThan" {
|
||||
|
||||
/// Compares two slices and returns whether they are equal.
|
||||
pub fn eql(comptime T: type, a: []const T, b: []const T) bool {
|
||||
if (@sizeOf(T) == 0) return true;
|
||||
if (!@inComptime() and std.meta.hasUniqueRepresentation(T)) return eqlBytes(sliceAsBytes(a), sliceAsBytes(b));
|
||||
|
||||
if (a.len != b.len) return false;
|
||||
if (a.ptr == b.ptr) return true;
|
||||
if (a.len == 0 or a.ptr == b.ptr) return true;
|
||||
|
||||
for (a, b) |a_elem, b_elem| {
|
||||
if (a_elem != b_elem) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// std.mem.eql heavily optimized for slices of bytes.
|
||||
fn eqlBytes(a: []const u8, b: []const u8) bool {
|
||||
if (a.len != b.len) return false;
|
||||
if (a.len == 0 or a.ptr == b.ptr) return true;
|
||||
|
||||
if (a.len <= 16) {
|
||||
if (a.len < 4) {
|
||||
const x = (a[0] ^ b[0]) | (a[a.len - 1] ^ b[a.len - 1]) | (a[a.len / 2] ^ b[a.len / 2]);
|
||||
return x == 0;
|
||||
}
|
||||
var x: u32 = 0;
|
||||
for ([_]usize{ 0, a.len - 4, (a.len / 8) * 4, a.len - 4 - ((a.len / 8) * 4) }) |n| {
|
||||
x |= @as(u32, @bitCast(a[n..][0..4].*)) ^ @as(u32, @bitCast(b[n..][0..4].*));
|
||||
}
|
||||
return x == 0;
|
||||
}
|
||||
|
||||
// Figure out the fastest way to scan through the input in chunks.
|
||||
// Uses vectors when supported and falls back to usize/words when not.
|
||||
const Scan = if (std.simd.suggestVectorSize(u8)) |vec_size|
|
||||
struct {
|
||||
pub const size = vec_size;
|
||||
pub const Chunk = @Vector(size, u8);
|
||||
pub inline fn isNotEqual(chunk_a: Chunk, chunk_b: Chunk) bool {
|
||||
return @reduce(.Or, chunk_a != chunk_b);
|
||||
}
|
||||
}
|
||||
else
|
||||
struct {
|
||||
pub const size = @sizeOf(usize);
|
||||
pub const Chunk = usize;
|
||||
pub inline fn isNotEqual(chunk_a: Chunk, chunk_b: Chunk) bool {
|
||||
return chunk_a != chunk_b;
|
||||
}
|
||||
};
|
||||
|
||||
inline for (1..6) |s| {
|
||||
const n = 16 << s;
|
||||
if (n <= Scan.size and a.len <= n) {
|
||||
const V = @Vector(n / 2, u8);
|
||||
var x = @as(V, a[0 .. n / 2].*) ^ @as(V, b[0 .. n / 2].*);
|
||||
x |= @as(V, a[a.len - n / 2 ..][0 .. n / 2].*) ^ @as(V, b[a.len - n / 2 ..][0 .. n / 2].*);
|
||||
const zero: V = @splat(0);
|
||||
return !@reduce(.Or, x != zero);
|
||||
}
|
||||
}
|
||||
// Compare inputs in chunks at a time (excluding the last chunk).
|
||||
for (0..(a.len - 1) / Scan.size) |i| {
|
||||
const a_chunk: Scan.Chunk = @bitCast(a[i * Scan.size ..][0..Scan.size].*);
|
||||
const b_chunk: Scan.Chunk = @bitCast(b[i * Scan.size ..][0..Scan.size].*);
|
||||
if (Scan.isNotEqual(a_chunk, b_chunk)) return false;
|
||||
}
|
||||
|
||||
// Compare the last chunk using an overlapping read (similar to the previous size strategies).
|
||||
const last_a_chunk: Scan.Chunk = @bitCast(a[a.len - Scan.size ..][0..Scan.size].*);
|
||||
const last_b_chunk: Scan.Chunk = @bitCast(b[a.len - Scan.size ..][0..Scan.size].*);
|
||||
return !Scan.isNotEqual(last_a_chunk, last_b_chunk);
|
||||
}
|
||||
|
||||
/// Compares two slices and returns the index of the first inequality.
|
||||
/// Returns null if the slices are equal.
|
||||
pub fn indexOfDiff(comptime T: type, a: []const T, b: []const T) ?usize {
|
||||
|
||||
@@ -1553,6 +1553,16 @@ pub fn sched_getaffinity(pid: pid_t, size: usize, set: *cpu_set_t) usize {
|
||||
return 0;
|
||||
}
|
||||
|
||||
pub fn sched_setaffinity(pid: pid_t, set: *const cpu_set_t) !void {
|
||||
const size = @sizeOf(cpu_set_t);
|
||||
const rc = syscall3(.sched_setaffinity, @as(usize, @bitCast(@as(isize, pid))), size, @intFromPtr(set));
|
||||
|
||||
switch (std.os.errno(rc)) {
|
||||
.SUCCESS => return,
|
||||
else => |err| return std.os.unexpectedErrno(err),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn epoll_create() usize {
|
||||
return epoll_create1(0);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user