commit 9cf28d1e9bc940b13cd47efb778fd42a4b3b88de (tree)
parent f3227598ebe9ac7e330fea0259d4290ee31e96b9
Author: Matthew Lugg <mlugg@mlugg.co.uk>
Date: Fri, 8 Mar 2024 21:59:07 +0000
Merge pull request #19214 from mlugg/fuck-usingnamespace
std: fuck usingnamespace
Diffstat:
10 files changed, 5565 insertions(+), 5661 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -291,7 +291,8 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/lib/std/os/linux/errno/generic.zig"
"${CMAKE_SOURCE_DIR}/lib/std/os/linux/x86_64.zig"
"${CMAKE_SOURCE_DIR}/lib/std/os/linux.zig"
- "${CMAKE_SOURCE_DIR}/lib/std/os/linux/io_uring.zig"
+ "${CMAKE_SOURCE_DIR}/lib/std/os/linux/IoUring.zig"
+ "${CMAKE_SOURCE_DIR}/lib/std/os/linux/io_uring_sqe.zig"
"${CMAKE_SOURCE_DIR}/lib/std/os/linux/x86_64.zig"
"${CMAKE_SOURCE_DIR}/lib/std/os/windows.zig"
"${CMAKE_SOURCE_DIR}/lib/std/os/windows/ntstatus.zig"
diff --git a/lib/std/c.zig b/lib/std/c.zig
@@ -1494,38 +1494,33 @@ pub const speed_t = switch (native_os) {
pub const whence_t = if (native_os == .wasi) std.os.wasi.whence_t else c_int;
// Unix-like systems
-pub usingnamespace switch (native_os) {
- .netbsd, .windows => struct {},
- else => struct {
- pub const DIR = opaque {};
- pub extern "c" fn opendir(pathname: [*:0]const u8) ?*DIR;
- pub extern "c" fn fdopendir(fd: c_int) ?*DIR;
- pub extern "c" fn rewinddir(dp: *DIR) void;
- pub extern "c" fn closedir(dp: *DIR) c_int;
- pub extern "c" fn telldir(dp: *DIR) c_long;
- pub extern "c" fn seekdir(dp: *DIR, loc: c_long) void;
+pub const DIR = opaque {};
+pub extern "c" fn opendir(pathname: [*:0]const u8) ?*DIR;
+pub extern "c" fn fdopendir(fd: c_int) ?*DIR;
+pub extern "c" fn rewinddir(dp: *DIR) void;
+pub extern "c" fn closedir(dp: *DIR) c_int;
+pub extern "c" fn telldir(dp: *DIR) c_long;
+pub extern "c" fn seekdir(dp: *DIR, loc: c_long) void;
- pub extern "c" fn clock_gettime(clk_id: c_int, tp: *c.timespec) c_int;
- pub extern "c" fn clock_getres(clk_id: c_int, tp: *c.timespec) c_int;
- pub extern "c" fn gettimeofday(noalias tv: ?*c.timeval, noalias tz: ?*c.timezone) c_int;
- pub extern "c" fn nanosleep(rqtp: *const c.timespec, rmtp: ?*c.timespec) c_int;
+pub extern "c" fn clock_gettime(clk_id: c_int, tp: *c.timespec) c_int;
+pub extern "c" fn clock_getres(clk_id: c_int, tp: *c.timespec) c_int;
+pub extern "c" fn gettimeofday(noalias tv: ?*c.timeval, noalias tz: ?*c.timezone) c_int;
+pub extern "c" fn nanosleep(rqtp: *const c.timespec, rmtp: ?*c.timespec) c_int;
- pub extern "c" fn getrusage(who: c_int, usage: *c.rusage) c_int;
+pub extern "c" fn getrusage(who: c_int, usage: *c.rusage) c_int;
- pub extern "c" fn sched_yield() c_int;
+pub extern "c" fn sched_yield() c_int;
- pub extern "c" fn sigaction(sig: c_int, noalias act: ?*const c.Sigaction, noalias oact: ?*c.Sigaction) c_int;
- pub extern "c" fn sigprocmask(how: c_int, noalias set: ?*const c.sigset_t, noalias oset: ?*c.sigset_t) c_int;
- pub extern "c" fn sigfillset(set: ?*c.sigset_t) void;
- pub extern "c" fn sigwait(set: ?*c.sigset_t, sig: ?*c_int) c_int;
+pub extern "c" fn sigaction(sig: c_int, noalias act: ?*const c.Sigaction, noalias oact: ?*c.Sigaction) c_int;
+pub extern "c" fn sigprocmask(how: c_int, noalias set: ?*const c.sigset_t, noalias oset: ?*c.sigset_t) c_int;
+pub extern "c" fn sigfillset(set: ?*c.sigset_t) void;
+pub extern "c" fn sigwait(set: ?*c.sigset_t, sig: ?*c_int) c_int;
- pub extern "c" fn socket(domain: c_uint, sock_type: c_uint, protocol: c_uint) c_int;
+pub extern "c" fn socket(domain: c_uint, sock_type: c_uint, protocol: c_uint) c_int;
- pub extern "c" fn alarm(seconds: c_uint) c_uint;
+pub extern "c" fn alarm(seconds: c_uint) c_uint;
- pub extern "c" fn msync(addr: *align(page_size) const anyopaque, len: usize, flags: c_int) c_int;
- },
-};
+pub extern "c" fn msync(addr: *align(page_size) const anyopaque, len: usize, flags: c_int) c_int;
pub const fstat = switch (native_os) {
.macos => switch (native_arch) {
@@ -1863,16 +1858,14 @@ pub extern "c" fn setlogmask(maskpri: c_int) c_int;
pub extern "c" fn if_nametoindex([*:0]const u8) c_int;
-pub usingnamespace if (builtin.target.isAndroid()) struct {
- // android bionic libc does not implement getcontext,
- // and std.os.linux.getcontext also cannot be built for
- // bionic libc currently.
-} else if (native_os == .linux and builtin.target.isMusl()) struct {
- // musl does not implement getcontext
- pub const getcontext = std.os.linux.getcontext;
-} else struct {
- pub extern "c" fn getcontext(ucp: *std.os.ucontext_t) c_int;
-};
+pub const getcontext = if (builtin.target.isAndroid())
+ @compileError("android bionic libc does not implement getcontext")
+else if (native_os == .linux and builtin.target.isMusl())
+ std.os.linux.getcontext
+else
+ struct {
+ extern fn getcontext(ucp: *std.os.ucontext_t) c_int;
+ }.getcontext;
pub const max_align_t = if (native_abi == .msvc)
f64
diff --git a/lib/std/c/openbsd.zig b/lib/std/c/openbsd.zig
@@ -894,11 +894,6 @@ comptime {
std.debug.assert(@sizeOf(siginfo_t) == 136);
}
-pub usingnamespace switch (builtin.cpu.arch) {
- .x86_64 => struct {},
- else => struct {},
-};
-
pub const ucontext_t = switch (builtin.cpu.arch) {
.x86_64 => extern struct {
sc_rdi: c_long,
diff --git a/lib/std/enums.zig b/lib/std/enums.zig
@@ -241,957 +241,794 @@ test nameCast {
/// to dense indices. This type does no dynamic allocation and
/// can be copied by value.
pub fn EnumSet(comptime E: type) type {
- const mixin = struct {
- fn EnumSetExt(comptime Self: type) type {
- const Indexer = Self.Indexer;
- return struct {
- /// Initializes the set using a struct of bools
- pub fn init(init_values: EnumFieldStruct(E, bool, false)) Self {
- var result = Self{};
- comptime var i: usize = 0;
- inline while (i < Self.len) : (i += 1) {
- const key = comptime Indexer.keyForIndex(i);
- const tag = comptime @tagName(key);
- if (@field(init_values, tag)) {
- result.bits.set(i);
- }
- }
- return result;
- }
- };
- }
- };
- return IndexedSet(EnumIndexer(E), mixin.EnumSetExt);
-}
+ return struct {
+ const Self = @This();
-/// A map keyed by an enum, backed by a bitfield and a dense array.
-/// If the enum is not dense, a mapping will be constructed from
-/// enum values to dense indices. This type does no dynamic
-/// allocation and can be copied by value.
-pub fn EnumMap(comptime E: type, comptime V: type) type {
- const mixin = struct {
- fn EnumMapExt(comptime Self: type) type {
- const Indexer = Self.Indexer;
- return struct {
- /// Initializes the map using a sparse struct of optionals
- pub fn init(init_values: EnumFieldStruct(E, ?V, @as(?V, null))) Self {
- var result = Self{};
- comptime var i: usize = 0;
- inline while (i < Self.len) : (i += 1) {
- const key = comptime Indexer.keyForIndex(i);
- const tag = comptime @tagName(key);
- if (@field(init_values, tag)) |*v| {
- result.bits.set(i);
- result.values[i] = v.*;
- }
- }
- return result;
- }
- /// Initializes a full mapping with all keys set to value.
- /// Consider using EnumArray instead if the map will remain full.
- pub fn initFull(value: V) Self {
- var result = Self{
- .bits = Self.BitSet.initFull(),
- .values = undefined,
- };
- @memset(&result.values, value);
- return result;
- }
- /// Initializes a full mapping with supplied values.
- /// Consider using EnumArray instead if the map will remain full.
- pub fn initFullWith(init_values: EnumFieldStruct(E, V, @as(?V, null))) Self {
- return initFullWithDefault(@as(?V, null), init_values);
- }
- /// Initializes a full mapping with a provided default.
- /// Consider using EnumArray instead if the map will remain full.
- pub fn initFullWithDefault(comptime default: ?V, init_values: EnumFieldStruct(E, V, default)) Self {
- var result = Self{
- .bits = Self.BitSet.initFull(),
- .values = undefined,
- };
- comptime var i: usize = 0;
- inline while (i < Self.len) : (i += 1) {
- const key = comptime Indexer.keyForIndex(i);
- const tag = comptime @tagName(key);
- result.values[i] = @field(init_values, tag);
- }
- return result;
- }
- };
- }
- };
- return IndexedMap(EnumIndexer(E), V, mixin.EnumMapExt);
-}
+ /// The indexing rules for converting between keys and indices.
+ pub const Indexer = EnumIndexer(E);
+ /// The element type for this set.
+ pub const Key = Indexer.Key;
-/// A multiset of enum elements up to a count of usize. Backed
-/// by an EnumArray. This type does no dynamic allocation and can
-/// be copied by value.
-pub fn EnumMultiset(comptime E: type) type {
- return BoundedEnumMultiset(E, usize);
-}
+ const BitSet = std.StaticBitSet(Indexer.count);
-/// A multiset of enum elements up to CountSize. Backed by an
-/// EnumArray. This type does no dynamic allocation and can be
-/// copied by value.
-pub fn BoundedEnumMultiset(comptime E: type, comptime CountSize: type) type {
- return struct {
- const Self = @This();
+ /// The maximum number of items in this set.
+ pub const len = Indexer.count;
- counts: EnumArray(E, CountSize),
+ bits: BitSet = BitSet.initEmpty(),
- /// Initializes the multiset using a struct of counts.
- pub fn init(init_counts: EnumFieldStruct(E, CountSize, 0)) Self {
- var self = initWithCount(0);
- inline for (@typeInfo(E).Enum.fields) |field| {
- const c = @field(init_counts, field.name);
- const key = @as(E, @enumFromInt(field.value));
- self.counts.set(key, c);
+ /// Initializes the set using a struct of bools
+ pub fn init(init_values: EnumFieldStruct(E, bool, false)) Self {
+ var result: Self = .{};
+ inline for (0..Self.len) |i| {
+ const key = comptime Indexer.keyForIndex(i);
+ const tag = @tagName(key);
+ if (@field(init_values, tag)) {
+ result.bits.set(i);
+ }
}
- return self;
+ return result;
}
- /// Initializes the multiset with a count of zero.
+ /// Returns a set containing no keys.
pub fn initEmpty() Self {
- return initWithCount(0);
+ return .{ .bits = BitSet.initEmpty() };
}
- /// Initializes the multiset with all keys at the
- /// same count.
- pub fn initWithCount(comptime c: CountSize) Self {
- return .{
- .counts = EnumArray(E, CountSize).initDefault(c, .{}),
- };
+ /// Returns a set containing all possible keys.
+ pub fn initFull() Self {
+ return .{ .bits = BitSet.initFull() };
}
- /// Returns the total number of key counts in the multiset.
- pub fn count(self: Self) usize {
- var sum: usize = 0;
- for (self.counts.values) |c| {
- sum += c;
- }
- return sum;
+ /// Returns a set containing multiple keys.
+ pub fn initMany(keys: []const Key) Self {
+ var set = initEmpty();
+ for (keys) |key| set.insert(key);
+ return set;
}
- /// Checks if at least one key in multiset.
- pub fn contains(self: Self, key: E) bool {
- return self.counts.get(key) > 0;
+ /// Returns a set containing a single key.
+ pub fn initOne(key: Key) Self {
+ return initMany(&[_]Key{key});
}
- /// Removes all instance of a key from multiset. Same as
- /// setCount(key, 0).
- pub fn removeAll(self: *Self, key: E) void {
- return self.counts.set(key, 0);
+ /// Returns the number of keys in the set.
+ pub fn count(self: Self) usize {
+ return self.bits.count();
}
- /// Increases the key count by given amount. Caller asserts
- /// operation will not overflow.
- pub fn addAssertSafe(self: *Self, key: E, c: CountSize) void {
- self.counts.getPtr(key).* += c;
+ /// Checks if a key is in the set.
+ pub fn contains(self: Self, key: Key) bool {
+ return self.bits.isSet(Indexer.indexOf(key));
}
- /// Increases the key count by given amount.
- pub fn add(self: *Self, key: E, c: CountSize) error{Overflow}!void {
- self.counts.set(key, try std.math.add(CountSize, self.counts.get(key), c));
+ /// Puts a key in the set.
+ pub fn insert(self: *Self, key: Key) void {
+ self.bits.set(Indexer.indexOf(key));
}
- /// Decreases the key count by given amount. If amount is
- /// greater than the number of keys in multset, then key count
- /// will be set to zero.
- pub fn remove(self: *Self, key: E, c: CountSize) void {
- self.counts.getPtr(key).* -= @min(self.getCount(key), c);
+ /// Removes a key from the set.
+ pub fn remove(self: *Self, key: Key) void {
+ self.bits.unset(Indexer.indexOf(key));
}
- /// Returns the count for a key.
- pub fn getCount(self: Self, key: E) CountSize {
- return self.counts.get(key);
+ /// Changes the presence of a key in the set to match the passed bool.
+ pub fn setPresent(self: *Self, key: Key, present: bool) void {
+ self.bits.setValue(Indexer.indexOf(key), present);
}
- /// Set the count for a key.
- pub fn setCount(self: *Self, key: E, c: CountSize) void {
- self.counts.set(key, c);
+ /// Toggles the presence of a key in the set. If the key is in
+ /// the set, removes it. Otherwise adds it.
+ pub fn toggle(self: *Self, key: Key) void {
+ self.bits.toggle(Indexer.indexOf(key));
}
- /// Increases the all key counts by given multiset. Caller
- /// asserts operation will not overflow any key.
- pub fn addSetAssertSafe(self: *Self, other: Self) void {
- inline for (@typeInfo(E).Enum.fields) |field| {
- const key = @as(E, @enumFromInt(field.value));
- self.addAssertSafe(key, other.getCount(key));
- }
+ /// Toggles the presence of all keys in the passed set.
+ pub fn toggleSet(self: *Self, other: Self) void {
+ self.bits.toggleSet(other.bits);
}
- /// Increases the all key counts by given multiset.
- pub fn addSet(self: *Self, other: Self) error{Overflow}!void {
- inline for (@typeInfo(E).Enum.fields) |field| {
- const key = @as(E, @enumFromInt(field.value));
- try self.add(key, other.getCount(key));
- }
+ /// Toggles all possible keys in the set.
+ pub fn toggleAll(self: *Self) void {
+ self.bits.toggleAll();
}
- /// Decreases the all key counts by given multiset. If
- /// the given multiset has more key counts than this,
- /// then that key will have a key count of zero.
- pub fn removeSet(self: *Self, other: Self) void {
- inline for (@typeInfo(E).Enum.fields) |field| {
- const key = @as(E, @enumFromInt(field.value));
- self.remove(key, other.getCount(key));
- }
+ /// Adds all keys in the passed set to this set.
+ pub fn setUnion(self: *Self, other: Self) void {
+ self.bits.setUnion(other.bits);
}
- /// Returns true iff all key counts are the same as
- /// given multiset.
+ /// Removes all keys which are not in the passed set.
+ pub fn setIntersection(self: *Self, other: Self) void {
+ self.bits.setIntersection(other.bits);
+ }
+
+ /// Returns true iff both sets have the same keys.
pub fn eql(self: Self, other: Self) bool {
- inline for (@typeInfo(E).Enum.fields) |field| {
- const key = @as(E, @enumFromInt(field.value));
- if (self.getCount(key) != other.getCount(key)) {
- return false;
- }
- }
- return true;
+ return self.bits.eql(other.bits);
}
- /// Returns true iff all key counts less than or
- /// equal to the given multiset.
+ /// Returns true iff all the keys in this set are
+ /// in the other set. The other set may have keys
+ /// not found in this set.
pub fn subsetOf(self: Self, other: Self) bool {
- inline for (@typeInfo(E).Enum.fields) |field| {
- const key = @as(E, @enumFromInt(field.value));
- if (self.getCount(key) > other.getCount(key)) {
- return false;
- }
- }
- return true;
+ return self.bits.subsetOf(other.bits);
}
- /// Returns true iff all key counts greater than or
- /// equal to the given multiset.
+ /// Returns true iff this set contains all the keys
+ /// in the other set. This set may have keys not
+ /// found in the other set.
pub fn supersetOf(self: Self, other: Self) bool {
- inline for (@typeInfo(E).Enum.fields) |field| {
- const key = @as(E, @enumFromInt(field.value));
- if (self.getCount(key) < other.getCount(key)) {
- return false;
- }
- }
- return true;
+ return self.bits.supersetOf(other.bits);
}
- /// Returns a multiset with the total key count of this
- /// multiset and the other multiset. Caller asserts
- /// operation will not overflow any key.
- pub fn plusAssertSafe(self: Self, other: Self) Self {
- var result = self;
- result.addSetAssertSafe(other);
- return result;
+ /// Returns a set with all the keys not in this set.
+ pub fn complement(self: Self) Self {
+ return .{ .bits = self.bits.complement() };
}
- /// Returns a multiset with the total key count of this
- /// multiset and the other multiset.
- pub fn plus(self: Self, other: Self) error{Overflow}!Self {
- var result = self;
- try result.addSet(other);
- return result;
+ /// Returns a set with keys that are in either this
+ /// set or the other set.
+ pub fn unionWith(self: Self, other: Self) Self {
+ return .{ .bits = self.bits.unionWith(other.bits) };
}
- /// Returns a multiset with the key count of this
- /// multiset minus the corresponding key count in the
- /// other multiset. If the other multiset contains
- /// more key count than this set, that key will have
- /// a count of zero.
- pub fn minus(self: Self, other: Self) Self {
- var result = self;
- result.removeSet(other);
- return result;
+ /// Returns a set with keys that are in both this
+ /// set and the other set.
+ pub fn intersectWith(self: Self, other: Self) Self {
+ return .{ .bits = self.bits.intersectWith(other.bits) };
}
- pub const Entry = EnumArray(E, CountSize).Entry;
- pub const Iterator = EnumArray(E, CountSize).Iterator;
-
- /// Returns an iterator over this multiset. Keys with zero
- /// counts are included. Modifications to the set during
- /// iteration may or may not be observed by the iterator,
- /// but will not invalidate it.
- pub fn iterator(self: *Self) Iterator {
- return self.counts.iterator();
+ /// Returns a set with keys that are in either this
+ /// set or the other set, but not both.
+ pub fn xorWith(self: Self, other: Self) Self {
+ return .{ .bits = self.bits.xorWith(other.bits) };
}
- };
-}
-test EnumMultiset {
- const Ball = enum { red, green, blue };
+ /// Returns a set with keys that are in this set
+ /// except for keys in the other set.
+ pub fn differenceWith(self: Self, other: Self) Self {
+ return .{ .bits = self.bits.differenceWith(other.bits) };
+ }
- const empty = EnumMultiset(Ball).initEmpty();
- const r0_g1_b2 = EnumMultiset(Ball).init(.{
- .red = 0,
- .green = 1,
- .blue = 2,
- });
- const ten_of_each = EnumMultiset(Ball).initWithCount(10);
-
- try testing.expectEqual(empty.count(), 0);
- try testing.expectEqual(r0_g1_b2.count(), 3);
- try testing.expectEqual(ten_of_each.count(), 30);
-
- try testing.expect(!empty.contains(.red));
- try testing.expect(!empty.contains(.green));
- try testing.expect(!empty.contains(.blue));
-
- try testing.expect(!r0_g1_b2.contains(.red));
- try testing.expect(r0_g1_b2.contains(.green));
- try testing.expect(r0_g1_b2.contains(.blue));
-
- try testing.expect(ten_of_each.contains(.red));
- try testing.expect(ten_of_each.contains(.green));
- try testing.expect(ten_of_each.contains(.blue));
-
- {
- var copy = ten_of_each;
- copy.removeAll(.red);
- try testing.expect(!copy.contains(.red));
-
- // removeAll second time does nothing
- copy.removeAll(.red);
- try testing.expect(!copy.contains(.red));
- }
+ /// Returns an iterator over this set, which iterates in
+ /// index order. Modifications to the set during iteration
+ /// may or may not be observed by the iterator, but will
+ /// not invalidate it.
+ pub fn iterator(self: *const Self) Iterator {
+ return .{ .inner = self.bits.iterator(.{}) };
+ }
- {
- var copy = ten_of_each;
- copy.addAssertSafe(.red, 6);
- try testing.expectEqual(copy.getCount(.red), 16);
- }
+ pub const Iterator = struct {
+ inner: BitSet.Iterator(.{}),
- {
- var copy = ten_of_each;
- try copy.add(.red, 6);
- try testing.expectEqual(copy.getCount(.red), 16);
+ pub fn next(self: *Iterator) ?Key {
+ return if (self.inner.next()) |index|
+ Indexer.keyForIndex(index)
+ else
+ null;
+ }
+ };
+ };
+}
- try testing.expectError(error.Overflow, copy.add(.red, std.math.maxInt(usize)));
- }
+/// A map keyed by an enum, backed by a bitfield and a dense array.
+/// If the enum is not dense, a mapping will be constructed from
+/// enum values to dense indices. This type does no dynamic
+/// allocation and can be copied by value.
+pub fn EnumMap(comptime E: type, comptime V: type) type {
+ return struct {
+ const Self = @This();
- {
- var copy = ten_of_each;
- copy.remove(.red, 4);
- try testing.expectEqual(copy.getCount(.red), 6);
+ /// The index mapping for this map
+ pub const Indexer = EnumIndexer(E);
+ /// The key type used to index this map
+ pub const Key = Indexer.Key;
+ /// The value type stored in this map
+ pub const Value = V;
+ /// The number of possible keys in the map
+ pub const len = Indexer.count;
- // subtracting more it contains does not underflow
- copy.remove(.green, 14);
- try testing.expectEqual(copy.getCount(.green), 0);
- }
+ const BitSet = std.StaticBitSet(Indexer.count);
- try testing.expectEqual(empty.getCount(.green), 0);
- try testing.expectEqual(r0_g1_b2.getCount(.green), 1);
- try testing.expectEqual(ten_of_each.getCount(.green), 10);
+ /// Bits determining whether items are in the map
+ bits: BitSet = BitSet.initEmpty(),
+ /// Values of items in the map. If the associated
+ /// bit is zero, the value is undefined.
+ values: [Indexer.count]Value = undefined,
- {
- var copy = empty;
- copy.setCount(.red, 6);
- try testing.expectEqual(copy.getCount(.red), 6);
- }
+ /// Initializes the map using a sparse struct of optionals
+ pub fn init(init_values: EnumFieldStruct(E, ?Value, null)) Self {
+ var result: Self = .{};
+ inline for (0..Self.len) |i| {
+ const key = comptime Indexer.keyForIndex(i);
+ const tag = @tagName(key);
+ if (@field(init_values, tag)) |*v| {
+ result.bits.set(i);
+ result.values[i] = v.*;
+ }
+ }
+ }
- {
- var copy = r0_g1_b2;
- copy.addSetAssertSafe(ten_of_each);
- try testing.expectEqual(copy.getCount(.red), 10);
- try testing.expectEqual(copy.getCount(.green), 11);
- try testing.expectEqual(copy.getCount(.blue), 12);
- }
+ /// Initializes a full mapping with all keys set to value.
+ /// Consider using EnumArray instead if the map will remain full.
+ pub fn initFull(value: Value) Self {
+ var result: Self = .{
+ .bits = Self.BitSet.initFull(),
+ .values = undefined,
+ };
+ @memset(&result.values, value);
+ return result;
+ }
- {
- var copy = r0_g1_b2;
- try copy.addSet(ten_of_each);
- try testing.expectEqual(copy.getCount(.red), 10);
- try testing.expectEqual(copy.getCount(.green), 11);
- try testing.expectEqual(copy.getCount(.blue), 12);
+ /// Initializes a full mapping with supplied values.
+ /// Consider using EnumArray instead if the map will remain full.
+ pub fn initFullWith(init_values: EnumFieldStruct(E, Value, null)) Self {
+ return initFullWithDefault(null, init_values);
+ }
- const full = EnumMultiset(Ball).initWithCount(std.math.maxInt(usize));
- try testing.expectError(error.Overflow, copy.addSet(full));
- }
+ /// Initializes a full mapping with a provided default.
+ /// Consider using EnumArray instead if the map will remain full.
+ pub fn initFullWithDefault(comptime default: ?Value, init_values: EnumFieldStruct(E, Value, default)) Self {
+ var result: Self = .{
+ .bits = Self.BitSet.initFull(),
+ .values = undefined,
+ };
+ inline for (0..Self.len) |i| {
+ const key = comptime Indexer.keyForIndex(i);
+ const tag = @tagName(key);
+ result.values[i] = @field(init_values, tag);
+ }
+ return result;
+ }
- {
- var copy = ten_of_each;
- copy.removeSet(r0_g1_b2);
- try testing.expectEqual(copy.getCount(.red), 10);
- try testing.expectEqual(copy.getCount(.green), 9);
- try testing.expectEqual(copy.getCount(.blue), 8);
+ /// The number of items in the map.
+ pub fn count(self: Self) usize {
+ return self.bits.count();
+ }
- copy.removeSet(ten_of_each);
- try testing.expectEqual(copy.getCount(.red), 0);
- try testing.expectEqual(copy.getCount(.green), 0);
- try testing.expectEqual(copy.getCount(.blue), 0);
- }
+ /// Checks if the map contains an item.
+ pub fn contains(self: Self, key: Key) bool {
+ return self.bits.isSet(Indexer.indexOf(key));
+ }
- try testing.expect(empty.eql(empty));
- try testing.expect(r0_g1_b2.eql(r0_g1_b2));
- try testing.expect(ten_of_each.eql(ten_of_each));
- try testing.expect(!empty.eql(r0_g1_b2));
- try testing.expect(!r0_g1_b2.eql(ten_of_each));
- try testing.expect(!ten_of_each.eql(empty));
+ /// Gets the value associated with a key.
+ /// If the key is not in the map, returns null.
+ pub fn get(self: Self, key: Key) ?Value {
+ const index = Indexer.indexOf(key);
+ return if (self.bits.isSet(index)) self.values[index] else null;
+ }
- try testing.expect(empty.subsetOf(empty));
- try testing.expect(r0_g1_b2.subsetOf(r0_g1_b2));
- try testing.expect(empty.subsetOf(r0_g1_b2));
- try testing.expect(r0_g1_b2.subsetOf(ten_of_each));
- try testing.expect(!ten_of_each.subsetOf(r0_g1_b2));
- try testing.expect(!r0_g1_b2.subsetOf(empty));
+ /// Gets the value associated with a key, which must
+ /// exist in the map.
+ pub fn getAssertContains(self: Self, key: Key) Value {
+ const index = Indexer.indexOf(key);
+ assert(self.bits.isSet(index));
+ return self.values[index];
+ }
- try testing.expect(empty.supersetOf(empty));
- try testing.expect(r0_g1_b2.supersetOf(r0_g1_b2));
- try testing.expect(r0_g1_b2.supersetOf(empty));
- try testing.expect(ten_of_each.supersetOf(r0_g1_b2));
- try testing.expect(!r0_g1_b2.supersetOf(ten_of_each));
- try testing.expect(!empty.supersetOf(r0_g1_b2));
+ /// Gets the address of the value associated with a key.
+ /// If the key is not in the map, returns null.
+ pub fn getPtr(self: *Self, key: Key) ?*Value {
+ const index = Indexer.indexOf(key);
+ return if (self.bits.isSet(index)) &self.values[index] else null;
+ }
- {
- // with multisets it could be the case where two
- // multisets are neither subset nor superset of each
- // other.
+ /// Gets the address of the const value associated with a key.
+ /// If the key is not in the map, returns null.
+ pub fn getPtrConst(self: *const Self, key: Key) ?*const Value {
+ const index = Indexer.indexOf(key);
+ return if (self.bits.isSet(index)) &self.values[index] else null;
+ }
- const r10 = EnumMultiset(Ball).init(.{
- .red = 10,
- });
- const b10 = EnumMultiset(Ball).init(.{
- .blue = 10,
- });
+ /// Gets the address of the value associated with a key.
+ /// The key must be present in the map.
+ pub fn getPtrAssertContains(self: *Self, key: Key) *Value {
+ const index = Indexer.indexOf(key);
+ assert(self.bits.isSet(index));
+ return &self.values[index];
+ }
- try testing.expect(!r10.subsetOf(b10));
- try testing.expect(!b10.subsetOf(r10));
- try testing.expect(!r10.supersetOf(b10));
- try testing.expect(!b10.supersetOf(r10));
- }
+ /// Gets the address of the const value associated with a key.
+ /// The key must be present in the map.
+ pub fn getPtrConstAssertContains(self: *const Self, key: Key) *const Value {
+ const index = Indexer.indexOf(key);
+ assert(self.bits.isSet(index));
+ return &self.values[index];
+ }
- {
- const result = r0_g1_b2.plusAssertSafe(ten_of_each);
- try testing.expectEqual(result.getCount(.red), 10);
- try testing.expectEqual(result.getCount(.green), 11);
- try testing.expectEqual(result.getCount(.blue), 12);
- }
+ /// Adds the key to the map with the supplied value.
+ /// If the key is already in the map, overwrites the value.
+ pub fn put(self: *Self, key: Key, value: Value) void {
+ const index = Indexer.indexOf(key);
+ self.bits.set(index);
+ self.values[index] = value;
+ }
- {
- const result = try r0_g1_b2.plus(ten_of_each);
- try testing.expectEqual(result.getCount(.red), 10);
- try testing.expectEqual(result.getCount(.green), 11);
- try testing.expectEqual(result.getCount(.blue), 12);
+ /// Adds the key to the map with an undefined value.
+ /// If the key is already in the map, the value becomes undefined.
+ /// A pointer to the value is returned, which should be
+ /// used to initialize the value.
+ pub fn putUninitialized(self: *Self, key: Key) *Value {
+ const index = Indexer.indexOf(key);
+ self.bits.set(index);
+ self.values[index] = undefined;
+ return &self.values[index];
+ }
- const full = EnumMultiset(Ball).initWithCount(std.math.maxInt(usize));
- try testing.expectError(error.Overflow, result.plus(full));
- }
+ /// Sets the value associated with the key in the map,
+ /// and returns the old value. If the key was not in
+ /// the map, returns null.
+ pub fn fetchPut(self: *Self, key: Key, value: Value) ?Value {
+ const index = Indexer.indexOf(key);
+ const result: ?Value = if (self.bits.isSet(index)) self.values[index] else null;
+ self.bits.set(index);
+ self.values[index] = value;
+ return result;
+ }
- {
- const result = ten_of_each.minus(r0_g1_b2);
- try testing.expectEqual(result.getCount(.red), 10);
- try testing.expectEqual(result.getCount(.green), 9);
- try testing.expectEqual(result.getCount(.blue), 8);
- }
+ /// Removes a key from the map. If the key was not in the map,
+ /// does nothing.
+ pub fn remove(self: *Self, key: Key) void {
+ const index = Indexer.indexOf(key);
+ self.bits.unset(index);
+ self.values[index] = undefined;
+ }
- {
- const result = ten_of_each.minus(r0_g1_b2).minus(ten_of_each);
- try testing.expectEqual(result.getCount(.red), 0);
- try testing.expectEqual(result.getCount(.green), 0);
- try testing.expectEqual(result.getCount(.blue), 0);
- }
+ /// Removes a key from the map, and returns the old value.
+ /// If the key was not in the map, returns null.
+ pub fn fetchRemove(self: *Self, key: Key) ?Value {
+ const index = Indexer.indexOf(key);
+ const result: ?Value = if (self.bits.isSet(index)) self.values[index] else null;
+ self.bits.unset(index);
+ self.values[index] = undefined;
+ return result;
+ }
- {
- var copy = empty;
- var it = copy.iterator();
- var entry = it.next().?;
- try testing.expectEqual(entry.key, .red);
- try testing.expectEqual(entry.value.*, 0);
- entry = it.next().?;
- try testing.expectEqual(entry.key, .green);
- try testing.expectEqual(entry.value.*, 0);
- entry = it.next().?;
- try testing.expectEqual(entry.key, .blue);
- try testing.expectEqual(entry.value.*, 0);
- try testing.expectEqual(it.next(), null);
- }
+ /// Returns an iterator over the map, which visits items in index order.
+ /// Modifications to the underlying map may or may not be observed by
+ /// the iterator, but will not invalidate it.
+ pub fn iterator(self: *Self) Iterator {
+ return .{
+ .inner = self.bits.iterator(.{}),
+ .values = &self.values,
+ };
+ }
- {
- var copy = r0_g1_b2;
- var it = copy.iterator();
- var entry = it.next().?;
- try testing.expectEqual(entry.key, .red);
- try testing.expectEqual(entry.value.*, 0);
- entry = it.next().?;
- try testing.expectEqual(entry.key, .green);
- try testing.expectEqual(entry.value.*, 1);
- entry = it.next().?;
- try testing.expectEqual(entry.key, .blue);
- try testing.expectEqual(entry.value.*, 2);
- try testing.expectEqual(it.next(), null);
- }
-}
+ /// An entry in the map.
+ pub const Entry = struct {
+ /// The key associated with this entry.
+ /// Modifying this key will not change the map.
+ key: Key,
-/// An array keyed by an enum, backed by a dense array.
-/// If the enum is not dense, a mapping will be constructed from
-/// enum values to dense indices. This type does no dynamic
-/// allocation and can be copied by value.
-pub fn EnumArray(comptime E: type, comptime V: type) type {
- const mixin = struct {
- fn EnumArrayExt(comptime Self: type) type {
- const Indexer = Self.Indexer;
- return struct {
- /// Initializes all values in the enum array
- pub fn init(init_values: EnumFieldStruct(E, V, @as(?V, null))) Self {
- return initDefault(@as(?V, null), init_values);
- }
+ /// A pointer to the value in the map associated
+ /// with this key. Modifications through this
+ /// pointer will modify the underlying data.
+ value: *Value,
+ };
+
+ pub const Iterator = struct {
+ inner: BitSet.Iterator(.{}),
+ values: *[Indexer.count]Value,
- /// Initializes values in the enum array, with the specified default.
- pub fn initDefault(comptime default: ?V, init_values: EnumFieldStruct(E, V, default)) Self {
- var result = Self{ .values = undefined };
- comptime var i: usize = 0;
- inline while (i < Self.len) : (i += 1) {
- const key = comptime Indexer.keyForIndex(i);
- const tag = @tagName(key);
- result.values[i] = @field(init_values, tag);
+ pub fn next(self: *Iterator) ?Entry {
+ return if (self.inner.next()) |index|
+ Entry{
+ .key = Indexer.keyForIndex(index),
+ .value = &self.values[index],
}
- return result;
- }
- };
- }
+ else
+ null;
+ }
+ };
};
- return IndexedArray(EnumIndexer(E), V, mixin.EnumArrayExt);
}
-fn NoExtension(comptime Self: type) type {
- _ = Self;
- return NoExt;
+/// A multiset of enum elements up to a count of usize. Backed
+/// by an EnumArray. This type does no dynamic allocation and can
+/// be copied by value.
+pub fn EnumMultiset(comptime E: type) type {
+ return BoundedEnumMultiset(E, usize);
}
-const NoExt = struct {};
-/// A set type with an Indexer mapping from keys to indices.
-/// Presence or absence is stored as a dense bitfield. This
-/// type does no allocation and can be copied by value.
-pub fn IndexedSet(comptime I: type, comptime Ext: ?fn (type) type) type {
- comptime ensureIndexer(I);
+/// A multiset of enum elements up to CountSize. Backed by an
+/// EnumArray. This type does no dynamic allocation and can be
+/// copied by value.
+pub fn BoundedEnumMultiset(comptime E: type, comptime CountSize: type) type {
return struct {
const Self = @This();
- pub usingnamespace (Ext orelse NoExtension)(Self);
-
- /// The indexing rules for converting between keys and indices.
- pub const Indexer = I;
- /// The element type for this set.
- pub const Key = Indexer.Key;
-
- const BitSet = std.StaticBitSet(Indexer.count);
-
- /// The maximum number of items in this set.
- pub const len = Indexer.count;
-
- bits: BitSet = BitSet.initEmpty(),
+ counts: EnumArray(E, CountSize),
- /// Returns a set containing no keys.
- pub fn initEmpty() Self {
- return .{ .bits = BitSet.initEmpty() };
+ /// Initializes the multiset using a struct of counts.
+ pub fn init(init_counts: EnumFieldStruct(E, CountSize, 0)) Self {
+ var self = initWithCount(0);
+ inline for (@typeInfo(E).Enum.fields) |field| {
+ const c = @field(init_counts, field.name);
+ const key = @as(E, @enumFromInt(field.value));
+ self.counts.set(key, c);
+ }
+ return self;
}
- /// Returns a set containing all possible keys.
- pub fn initFull() Self {
- return .{ .bits = BitSet.initFull() };
+ /// Initializes the multiset with a count of zero.
+ pub fn initEmpty() Self {
+ return initWithCount(0);
}
- /// Returns a set containing multiple keys.
- pub fn initMany(keys: []const Key) Self {
- var set = initEmpty();
- for (keys) |key| set.insert(key);
- return set;
+ /// Initializes the multiset with all keys at the
+ /// same count.
+ pub fn initWithCount(comptime c: CountSize) Self {
+ return .{
+ .counts = EnumArray(E, CountSize).initDefault(c, .{}),
+ };
}
- /// Returns a set containing a single key.
- pub fn initOne(key: Key) Self {
- return initMany(&[_]Key{key});
+ /// Returns the total number of key counts in the multiset.
+ pub fn count(self: Self) usize {
+ var sum: usize = 0;
+ for (self.counts.values) |c| {
+ sum += c;
+ }
+ return sum;
}
- /// Returns the number of keys in the set.
- pub fn count(self: Self) usize {
- return self.bits.count();
+ /// Checks if at least one key in multiset.
+ pub fn contains(self: Self, key: E) bool {
+ return self.counts.get(key) > 0;
}
- /// Checks if a key is in the set.
- pub fn contains(self: Self, key: Key) bool {
- return self.bits.isSet(Indexer.indexOf(key));
+ /// Removes all instance of a key from multiset. Same as
+ /// setCount(key, 0).
+ pub fn removeAll(self: *Self, key: E) void {
+ return self.counts.set(key, 0);
}
- /// Puts a key in the set.
- pub fn insert(self: *Self, key: Key) void {
- self.bits.set(Indexer.indexOf(key));
+ /// Increases the key count by given amount. Caller asserts
+ /// operation will not overflow.
+ pub fn addAssertSafe(self: *Self, key: E, c: CountSize) void {
+ self.counts.getPtr(key).* += c;
}
- /// Removes a key from the set.
- pub fn remove(self: *Self, key: Key) void {
- self.bits.unset(Indexer.indexOf(key));
+ /// Increases the key count by given amount.
+ pub fn add(self: *Self, key: E, c: CountSize) error{Overflow}!void {
+ self.counts.set(key, try std.math.add(CountSize, self.counts.get(key), c));
}
- /// Changes the presence of a key in the set to match the passed bool.
- pub fn setPresent(self: *Self, key: Key, present: bool) void {
- self.bits.setValue(Indexer.indexOf(key), present);
+ /// Decreases the key count by given amount. If amount is
+ /// greater than the number of keys in multset, then key count
+ /// will be set to zero.
+ pub fn remove(self: *Self, key: E, c: CountSize) void {
+ self.counts.getPtr(key).* -= @min(self.getCount(key), c);
}
- /// Toggles the presence of a key in the set. If the key is in
- /// the set, removes it. Otherwise adds it.
- pub fn toggle(self: *Self, key: Key) void {
- self.bits.toggle(Indexer.indexOf(key));
+ /// Returns the count for a key.
+ pub fn getCount(self: Self, key: E) CountSize {
+ return self.counts.get(key);
}
- /// Toggles the presence of all keys in the passed set.
- pub fn toggleSet(self: *Self, other: Self) void {
- self.bits.toggleSet(other.bits);
+ /// Set the count for a key.
+ pub fn setCount(self: *Self, key: E, c: CountSize) void {
+ self.counts.set(key, c);
}
- /// Toggles all possible keys in the set.
- pub fn toggleAll(self: *Self) void {
- self.bits.toggleAll();
+ /// Increases the all key counts by given multiset. Caller
+ /// asserts operation will not overflow any key.
+ pub fn addSetAssertSafe(self: *Self, other: Self) void {
+ inline for (@typeInfo(E).Enum.fields) |field| {
+ const key = @as(E, @enumFromInt(field.value));
+ self.addAssertSafe(key, other.getCount(key));
+ }
}
- /// Adds all keys in the passed set to this set.
- pub fn setUnion(self: *Self, other: Self) void {
- self.bits.setUnion(other.bits);
+ /// Increases the all key counts by given multiset.
+ pub fn addSet(self: *Self, other: Self) error{Overflow}!void {
+ inline for (@typeInfo(E).Enum.fields) |field| {
+ const key = @as(E, @enumFromInt(field.value));
+ try self.add(key, other.getCount(key));
+ }
}
- /// Removes all keys which are not in the passed set.
- pub fn setIntersection(self: *Self, other: Self) void {
- self.bits.setIntersection(other.bits);
+ /// Decreases the all key counts by given multiset. If
+ /// the given multiset has more key counts than this,
+ /// then that key will have a key count of zero.
+ pub fn removeSet(self: *Self, other: Self) void {
+ inline for (@typeInfo(E).Enum.fields) |field| {
+ const key = @as(E, @enumFromInt(field.value));
+ self.remove(key, other.getCount(key));
+ }
}
- /// Returns true iff both sets have the same keys.
+ /// Returns true iff all key counts are the same as
+ /// given multiset.
pub fn eql(self: Self, other: Self) bool {
- return self.bits.eql(other.bits);
+ inline for (@typeInfo(E).Enum.fields) |field| {
+ const key = @as(E, @enumFromInt(field.value));
+ if (self.getCount(key) != other.getCount(key)) {
+ return false;
+ }
+ }
+ return true;
}
- /// Returns true iff all the keys in this set are
- /// in the other set. The other set may have keys
- /// not found in this set.
+ /// Returns true iff all key counts less than or
+ /// equal to the given multiset.
pub fn subsetOf(self: Self, other: Self) bool {
- return self.bits.subsetOf(other.bits);
+ inline for (@typeInfo(E).Enum.fields) |field| {
+ const key = @as(E, @enumFromInt(field.value));
+ if (self.getCount(key) > other.getCount(key)) {
+ return false;
+ }
+ }
+ return true;
}
- /// Returns true iff this set contains all the keys
- /// in the other set. This set may have keys not
- /// found in the other set.
+ /// Returns true iff all key counts greater than or
+ /// equal to the given multiset.
pub fn supersetOf(self: Self, other: Self) bool {
- return self.bits.supersetOf(other.bits);
- }
-
- /// Returns a set with all the keys not in this set.
- pub fn complement(self: Self) Self {
- return .{ .bits = self.bits.complement() };
- }
-
- /// Returns a set with keys that are in either this
- /// set or the other set.
- pub fn unionWith(self: Self, other: Self) Self {
- return .{ .bits = self.bits.unionWith(other.bits) };
- }
-
- /// Returns a set with keys that are in both this
- /// set and the other set.
- pub fn intersectWith(self: Self, other: Self) Self {
- return .{ .bits = self.bits.intersectWith(other.bits) };
+ inline for (@typeInfo(E).Enum.fields) |field| {
+ const key = @as(E, @enumFromInt(field.value));
+ if (self.getCount(key) < other.getCount(key)) {
+ return false;
+ }
+ }
+ return true;
}
- /// Returns a set with keys that are in either this
- /// set or the other set, but not both.
- pub fn xorWith(self: Self, other: Self) Self {
- return .{ .bits = self.bits.xorWith(other.bits) };
+ /// Returns a multiset with the total key count of this
+ /// multiset and the other multiset. Caller asserts
+ /// operation will not overflow any key.
+ pub fn plusAssertSafe(self: Self, other: Self) Self {
+ var result = self;
+ result.addSetAssertSafe(other);
+ return result;
}
- /// Returns a set with keys that are in this set
- /// except for keys in the other set.
- pub fn differenceWith(self: Self, other: Self) Self {
- return .{ .bits = self.bits.differenceWith(other.bits) };
+ /// Returns a multiset with the total key count of this
+ /// multiset and the other multiset.
+ pub fn plus(self: Self, other: Self) error{Overflow}!Self {
+ var result = self;
+ try result.addSet(other);
+ return result;
}
- /// Returns an iterator over this set, which iterates in
- /// index order. Modifications to the set during iteration
- /// may or may not be observed by the iterator, but will
- /// not invalidate it.
- pub fn iterator(self: *const Self) Iterator {
- return .{ .inner = self.bits.iterator(.{}) };
+ /// Returns a multiset with the key count of this
+ /// multiset minus the corresponding key count in the
+ /// other multiset. If the other multiset contains
+ /// more key count than this set, that key will have
+ /// a count of zero.
+ pub fn minus(self: Self, other: Self) Self {
+ var result = self;
+ result.removeSet(other);
+ return result;
}
- pub const Iterator = struct {
- inner: BitSet.Iterator(.{}),
+ pub const Entry = EnumArray(E, CountSize).Entry;
+ pub const Iterator = EnumArray(E, CountSize).Iterator;
- pub fn next(self: *Iterator) ?Key {
- return if (self.inner.next()) |index|
- Indexer.keyForIndex(index)
- else
- null;
- }
- };
+ /// Returns an iterator over this multiset. Keys with zero
+ /// counts are included. Modifications to the set during
+ /// iteration may or may not be observed by the iterator,
+ /// but will not invalidate it.
+ pub fn iterator(self: *Self) Iterator {
+ return self.counts.iterator();
+ }
};
}
-test "pure EnumSet fns" {
- const Suit = enum { spades, hearts, clubs, diamonds };
-
- const empty = EnumSet(Suit).initEmpty();
- const full = EnumSet(Suit).initFull();
- const black = EnumSet(Suit).initMany(&[_]Suit{ .spades, .clubs });
- const red = EnumSet(Suit).initMany(&[_]Suit{ .hearts, .diamonds });
-
- try testing.expect(empty.eql(empty));
- try testing.expect(full.eql(full));
- try testing.expect(!empty.eql(full));
- try testing.expect(!full.eql(empty));
- try testing.expect(!empty.eql(black));
- try testing.expect(!full.eql(red));
- try testing.expect(!red.eql(empty));
- try testing.expect(!black.eql(full));
-
- try testing.expect(empty.subsetOf(empty));
- try testing.expect(empty.subsetOf(full));
- try testing.expect(full.subsetOf(full));
- try testing.expect(!black.subsetOf(red));
- try testing.expect(!red.subsetOf(black));
-
- try testing.expect(full.supersetOf(full));
- try testing.expect(full.supersetOf(empty));
- try testing.expect(empty.supersetOf(empty));
- try testing.expect(!black.supersetOf(red));
- try testing.expect(!red.supersetOf(black));
-
- try testing.expect(empty.complement().eql(full));
- try testing.expect(full.complement().eql(empty));
- try testing.expect(black.complement().eql(red));
- try testing.expect(red.complement().eql(black));
+test EnumMultiset {
+ const Ball = enum { red, green, blue };
- try testing.expect(empty.unionWith(empty).eql(empty));
- try testing.expect(empty.unionWith(full).eql(full));
- try testing.expect(full.unionWith(full).eql(full));
- try testing.expect(full.unionWith(empty).eql(full));
- try testing.expect(black.unionWith(red).eql(full));
- try testing.expect(red.unionWith(black).eql(full));
+ const empty = EnumMultiset(Ball).initEmpty();
+ const r0_g1_b2 = EnumMultiset(Ball).init(.{
+ .red = 0,
+ .green = 1,
+ .blue = 2,
+ });
+ const ten_of_each = EnumMultiset(Ball).initWithCount(10);
- try testing.expect(empty.intersectWith(empty).eql(empty));
- try testing.expect(empty.intersectWith(full).eql(empty));
- try testing.expect(full.intersectWith(full).eql(full));
- try testing.expect(full.intersectWith(empty).eql(empty));
- try testing.expect(black.intersectWith(red).eql(empty));
- try testing.expect(red.intersectWith(black).eql(empty));
+ try testing.expectEqual(empty.count(), 0);
+ try testing.expectEqual(r0_g1_b2.count(), 3);
+ try testing.expectEqual(ten_of_each.count(), 30);
- try testing.expect(empty.xorWith(empty).eql(empty));
- try testing.expect(empty.xorWith(full).eql(full));
- try testing.expect(full.xorWith(full).eql(empty));
- try testing.expect(full.xorWith(empty).eql(full));
- try testing.expect(black.xorWith(red).eql(full));
- try testing.expect(red.xorWith(black).eql(full));
+ try testing.expect(!empty.contains(.red));
+ try testing.expect(!empty.contains(.green));
+ try testing.expect(!empty.contains(.blue));
- try testing.expect(empty.differenceWith(empty).eql(empty));
- try testing.expect(empty.differenceWith(full).eql(empty));
- try testing.expect(full.differenceWith(full).eql(empty));
- try testing.expect(full.differenceWith(empty).eql(full));
- try testing.expect(full.differenceWith(red).eql(black));
- try testing.expect(full.differenceWith(black).eql(red));
-}
+ try testing.expect(!r0_g1_b2.contains(.red));
+ try testing.expect(r0_g1_b2.contains(.green));
+ try testing.expect(r0_g1_b2.contains(.blue));
-test "EnumSet empty" {
- const E = enum {};
- const empty = EnumSet(E).initEmpty();
- const full = EnumSet(E).initFull();
+ try testing.expect(ten_of_each.contains(.red));
+ try testing.expect(ten_of_each.contains(.green));
+ try testing.expect(ten_of_each.contains(.blue));
- try std.testing.expect(empty.eql(full));
- try std.testing.expect(empty.complement().eql(full));
- try std.testing.expect(empty.complement().eql(full.complement()));
- try std.testing.expect(empty.eql(full.complement()));
-}
+ {
+ var copy = ten_of_each;
+ copy.removeAll(.red);
+ try testing.expect(!copy.contains(.red));
-test "EnumSet const iterator" {
- const Direction = enum { up, down, left, right };
- const diag_move = init: {
- var move = EnumSet(Direction).initEmpty();
- move.insert(.right);
- move.insert(.up);
- break :init move;
- };
+ // removeAll second time does nothing
+ copy.removeAll(.red);
+ try testing.expect(!copy.contains(.red));
+ }
- var result = EnumSet(Direction).initEmpty();
- var it = diag_move.iterator();
- while (it.next()) |dir| {
- result.insert(dir);
+ {
+ var copy = ten_of_each;
+ copy.addAssertSafe(.red, 6);
+ try testing.expectEqual(copy.getCount(.red), 16);
}
- try testing.expect(result.eql(diag_move));
-}
+ {
+ var copy = ten_of_each;
+ try copy.add(.red, 6);
+ try testing.expectEqual(copy.getCount(.red), 16);
-/// A map from keys to values, using an index lookup. Uses a
-/// bitfield to track presence and a dense array of values.
-/// This type does no allocation and can be copied by value.
-pub fn IndexedMap(comptime I: type, comptime V: type, comptime Ext: ?fn (type) type) type {
- comptime ensureIndexer(I);
- return struct {
- const Self = @This();
+ try testing.expectError(error.Overflow, copy.add(.red, std.math.maxInt(usize)));
+ }
- pub usingnamespace (Ext orelse NoExtension)(Self);
+ {
+ var copy = ten_of_each;
+ copy.remove(.red, 4);
+ try testing.expectEqual(copy.getCount(.red), 6);
- /// The index mapping for this map
- pub const Indexer = I;
- /// The key type used to index this map
- pub const Key = Indexer.Key;
- /// The value type stored in this map
- pub const Value = V;
- /// The number of possible keys in the map
- pub const len = Indexer.count;
+ // subtracting more it contains does not underflow
+ copy.remove(.green, 14);
+ try testing.expectEqual(copy.getCount(.green), 0);
+ }
- const BitSet = std.StaticBitSet(Indexer.count);
+ try testing.expectEqual(empty.getCount(.green), 0);
+ try testing.expectEqual(r0_g1_b2.getCount(.green), 1);
+ try testing.expectEqual(ten_of_each.getCount(.green), 10);
- /// Bits determining whether items are in the map
- bits: BitSet = BitSet.initEmpty(),
- /// Values of items in the map. If the associated
- /// bit is zero, the value is undefined.
- values: [Indexer.count]Value = undefined,
+ {
+ var copy = empty;
+ copy.setCount(.red, 6);
+ try testing.expectEqual(copy.getCount(.red), 6);
+ }
- /// The number of items in the map.
- pub fn count(self: Self) usize {
- return self.bits.count();
- }
+ {
+ var copy = r0_g1_b2;
+ copy.addSetAssertSafe(ten_of_each);
+ try testing.expectEqual(copy.getCount(.red), 10);
+ try testing.expectEqual(copy.getCount(.green), 11);
+ try testing.expectEqual(copy.getCount(.blue), 12);
+ }
- /// Checks if the map contains an item.
- pub fn contains(self: Self, key: Key) bool {
- return self.bits.isSet(Indexer.indexOf(key));
- }
+ {
+ var copy = r0_g1_b2;
+ try copy.addSet(ten_of_each);
+ try testing.expectEqual(copy.getCount(.red), 10);
+ try testing.expectEqual(copy.getCount(.green), 11);
+ try testing.expectEqual(copy.getCount(.blue), 12);
- /// Gets the value associated with a key.
- /// If the key is not in the map, returns null.
- pub fn get(self: Self, key: Key) ?Value {
- const index = Indexer.indexOf(key);
- return if (self.bits.isSet(index)) self.values[index] else null;
- }
+ const full = EnumMultiset(Ball).initWithCount(std.math.maxInt(usize));
+ try testing.expectError(error.Overflow, copy.addSet(full));
+ }
- /// Gets the value associated with a key, which must
- /// exist in the map.
- pub fn getAssertContains(self: Self, key: Key) Value {
- const index = Indexer.indexOf(key);
- assert(self.bits.isSet(index));
- return self.values[index];
- }
+ {
+ var copy = ten_of_each;
+ copy.removeSet(r0_g1_b2);
+ try testing.expectEqual(copy.getCount(.red), 10);
+ try testing.expectEqual(copy.getCount(.green), 9);
+ try testing.expectEqual(copy.getCount(.blue), 8);
- /// Gets the address of the value associated with a key.
- /// If the key is not in the map, returns null.
- pub fn getPtr(self: *Self, key: Key) ?*Value {
- const index = Indexer.indexOf(key);
- return if (self.bits.isSet(index)) &self.values[index] else null;
- }
+ copy.removeSet(ten_of_each);
+ try testing.expectEqual(copy.getCount(.red), 0);
+ try testing.expectEqual(copy.getCount(.green), 0);
+ try testing.expectEqual(copy.getCount(.blue), 0);
+ }
- /// Gets the address of the const value associated with a key.
- /// If the key is not in the map, returns null.
- pub fn getPtrConst(self: *const Self, key: Key) ?*const Value {
- const index = Indexer.indexOf(key);
- return if (self.bits.isSet(index)) &self.values[index] else null;
- }
+ try testing.expect(empty.eql(empty));
+ try testing.expect(r0_g1_b2.eql(r0_g1_b2));
+ try testing.expect(ten_of_each.eql(ten_of_each));
+ try testing.expect(!empty.eql(r0_g1_b2));
+ try testing.expect(!r0_g1_b2.eql(ten_of_each));
+ try testing.expect(!ten_of_each.eql(empty));
- /// Gets the address of the value associated with a key.
- /// The key must be present in the map.
- pub fn getPtrAssertContains(self: *Self, key: Key) *Value {
- const index = Indexer.indexOf(key);
- assert(self.bits.isSet(index));
- return &self.values[index];
- }
+ try testing.expect(empty.subsetOf(empty));
+ try testing.expect(r0_g1_b2.subsetOf(r0_g1_b2));
+ try testing.expect(empty.subsetOf(r0_g1_b2));
+ try testing.expect(r0_g1_b2.subsetOf(ten_of_each));
+ try testing.expect(!ten_of_each.subsetOf(r0_g1_b2));
+ try testing.expect(!r0_g1_b2.subsetOf(empty));
- /// Gets the address of the const value associated with a key.
- /// The key must be present in the map.
- pub fn getPtrConstAssertContains(self: *const Self, key: Key) *const Value {
- const index = Indexer.indexOf(key);
- assert(self.bits.isSet(index));
- return &self.values[index];
- }
+ try testing.expect(empty.supersetOf(empty));
+ try testing.expect(r0_g1_b2.supersetOf(r0_g1_b2));
+ try testing.expect(r0_g1_b2.supersetOf(empty));
+ try testing.expect(ten_of_each.supersetOf(r0_g1_b2));
+ try testing.expect(!r0_g1_b2.supersetOf(ten_of_each));
+ try testing.expect(!empty.supersetOf(r0_g1_b2));
- /// Adds the key to the map with the supplied value.
- /// If the key is already in the map, overwrites the value.
- pub fn put(self: *Self, key: Key, value: Value) void {
- const index = Indexer.indexOf(key);
- self.bits.set(index);
- self.values[index] = value;
- }
+ {
+ // with multisets it could be the case where two
+ // multisets are neither subset nor superset of each
+ // other.
- /// Adds the key to the map with an undefined value.
- /// If the key is already in the map, the value becomes undefined.
- /// A pointer to the value is returned, which should be
- /// used to initialize the value.
- pub fn putUninitialized(self: *Self, key: Key) *Value {
- const index = Indexer.indexOf(key);
- self.bits.set(index);
- self.values[index] = undefined;
- return &self.values[index];
- }
+ const r10 = EnumMultiset(Ball).init(.{
+ .red = 10,
+ });
+ const b10 = EnumMultiset(Ball).init(.{
+ .blue = 10,
+ });
- /// Sets the value associated with the key in the map,
- /// and returns the old value. If the key was not in
- /// the map, returns null.
- pub fn fetchPut(self: *Self, key: Key, value: Value) ?Value {
- const index = Indexer.indexOf(key);
- const result: ?Value = if (self.bits.isSet(index)) self.values[index] else null;
- self.bits.set(index);
- self.values[index] = value;
- return result;
- }
+ try testing.expect(!r10.subsetOf(b10));
+ try testing.expect(!b10.subsetOf(r10));
+ try testing.expect(!r10.supersetOf(b10));
+ try testing.expect(!b10.supersetOf(r10));
+ }
- /// Removes a key from the map. If the key was not in the map,
- /// does nothing.
- pub fn remove(self: *Self, key: Key) void {
- const index = Indexer.indexOf(key);
- self.bits.unset(index);
- self.values[index] = undefined;
- }
+ {
+ const result = r0_g1_b2.plusAssertSafe(ten_of_each);
+ try testing.expectEqual(result.getCount(.red), 10);
+ try testing.expectEqual(result.getCount(.green), 11);
+ try testing.expectEqual(result.getCount(.blue), 12);
+ }
- /// Removes a key from the map, and returns the old value.
- /// If the key was not in the map, returns null.
- pub fn fetchRemove(self: *Self, key: Key) ?Value {
- const index = Indexer.indexOf(key);
- const result: ?Value = if (self.bits.isSet(index)) self.values[index] else null;
- self.bits.unset(index);
- self.values[index] = undefined;
- return result;
- }
+ {
+ const result = try r0_g1_b2.plus(ten_of_each);
+ try testing.expectEqual(result.getCount(.red), 10);
+ try testing.expectEqual(result.getCount(.green), 11);
+ try testing.expectEqual(result.getCount(.blue), 12);
- /// Returns an iterator over the map, which visits items in index order.
- /// Modifications to the underlying map may or may not be observed by
- /// the iterator, but will not invalidate it.
- pub fn iterator(self: *Self) Iterator {
- return .{
- .inner = self.bits.iterator(.{}),
- .values = &self.values,
- };
- }
+ const full = EnumMultiset(Ball).initWithCount(std.math.maxInt(usize));
+ try testing.expectError(error.Overflow, result.plus(full));
+ }
- /// An entry in the map.
- pub const Entry = struct {
- /// The key associated with this entry.
- /// Modifying this key will not change the map.
- key: Key,
+ {
+ const result = ten_of_each.minus(r0_g1_b2);
+ try testing.expectEqual(result.getCount(.red), 10);
+ try testing.expectEqual(result.getCount(.green), 9);
+ try testing.expectEqual(result.getCount(.blue), 8);
+ }
- /// A pointer to the value in the map associated
- /// with this key. Modifications through this
- /// pointer will modify the underlying data.
- value: *Value,
- };
+ {
+ const result = ten_of_each.minus(r0_g1_b2).minus(ten_of_each);
+ try testing.expectEqual(result.getCount(.red), 0);
+ try testing.expectEqual(result.getCount(.green), 0);
+ try testing.expectEqual(result.getCount(.blue), 0);
+ }
- pub const Iterator = struct {
- inner: BitSet.Iterator(.{}),
- values: *[Indexer.count]Value,
+ {
+ var copy = empty;
+ var it = copy.iterator();
+ var entry = it.next().?;
+ try testing.expectEqual(entry.key, .red);
+ try testing.expectEqual(entry.value.*, 0);
+ entry = it.next().?;
+ try testing.expectEqual(entry.key, .green);
+ try testing.expectEqual(entry.value.*, 0);
+ entry = it.next().?;
+ try testing.expectEqual(entry.key, .blue);
+ try testing.expectEqual(entry.value.*, 0);
+ try testing.expectEqual(it.next(), null);
+ }
- pub fn next(self: *Iterator) ?Entry {
- return if (self.inner.next()) |index|
- Entry{
- .key = Indexer.keyForIndex(index),
- .value = &self.values[index],
- }
- else
- null;
- }
- };
- };
+ {
+ var copy = r0_g1_b2;
+ var it = copy.iterator();
+ var entry = it.next().?;
+ try testing.expectEqual(entry.key, .red);
+ try testing.expectEqual(entry.value.*, 0);
+ entry = it.next().?;
+ try testing.expectEqual(entry.key, .green);
+ try testing.expectEqual(entry.value.*, 1);
+ entry = it.next().?;
+ try testing.expectEqual(entry.key, .blue);
+ try testing.expectEqual(entry.value.*, 2);
+ try testing.expectEqual(it.next(), null);
+ }
}
-/// A dense array of values, using an indexed lookup.
-/// This type does no allocation and can be copied by value.
-pub fn IndexedArray(comptime I: type, comptime V: type, comptime Ext: ?fn (type) type) type {
- comptime ensureIndexer(I);
+/// An array keyed by an enum, backed by a dense array.
+/// If the enum is not dense, a mapping will be constructed from
+/// enum values to dense indices. This type does no dynamic
+/// allocation and can be copied by value.
+pub fn EnumArray(comptime E: type, comptime V: type) type {
return struct {
const Self = @This();
- pub usingnamespace (Ext orelse NoExtension)(Self);
-
/// The index mapping for this map
- pub const Indexer = I;
+ pub const Indexer = EnumIndexer(E);
/// The key type used to index this map
pub const Key = Indexer.Key;
/// The value type stored in this map
@@ -1201,6 +1038,21 @@ pub fn IndexedArray(comptime I: type, comptime V: type, comptime Ext: ?fn (type)
values: [Indexer.count]Value,
+ pub fn init(init_values: EnumFieldStruct(E, Value, null)) Self {
+ return initDefault(null, init_values);
+ }
+
+ /// Initializes values in the enum array, with the specified default.
+ pub fn initDefault(comptime default: ?Value, init_values: EnumFieldStruct(E, Value, default)) Self {
+ var result: Self = .{ .values = undefined };
+ inline for (0..Self.len) |i| {
+ const key = comptime Indexer.keyForIndex(i);
+ const tag = @tagName(key);
+ result.values[i] = @field(init_values, tag);
+ }
+ return result;
+ }
+
pub fn initUndefined() Self {
return Self{ .values = undefined };
}
@@ -1269,46 +1121,96 @@ pub fn IndexedArray(comptime I: type, comptime V: type, comptime Ext: ?fn (type)
};
}
-/// Verifies that a type is a valid Indexer, providing a helpful
-/// compile error if not. An Indexer maps a comptime-known set
-/// of keys to a dense set of zero-based indices.
-/// The indexer interface must look like this:
-/// ```
-/// struct {
-/// /// The key type which this indexer converts to indices
-/// pub const Key: type,
-/// /// The number of indexes in the dense mapping
-/// pub const count: comptime_int,
-/// /// Converts from a key to an index
-/// pub fn indexOf(Key) usize;
-/// /// Converts from an index to a key
-/// pub fn keyForIndex(usize) Key;
-/// }
-/// ```
-pub fn ensureIndexer(comptime T: type) void {
- comptime {
- if (!@hasDecl(T, "Key")) @compileError("Indexer must have decl Key: type.");
- if (@TypeOf(T.Key) != type) @compileError("Indexer.Key must be a type.");
- if (!@hasDecl(T, "count")) @compileError("Indexer must have decl count: comptime_int.");
- if (@TypeOf(T.count) != comptime_int) @compileError("Indexer.count must be a comptime_int.");
- if (!@hasDecl(T, "indexOf")) @compileError("Indexer.indexOf must be a fn (Key) usize.");
- if (@TypeOf(T.indexOf) != fn (T.Key) usize) @compileError("Indexer must have decl indexOf: fn (Key) usize.");
- if (!@hasDecl(T, "keyForIndex")) @compileError("Indexer must have decl keyForIndex: fn (usize) Key.");
- if (@TypeOf(T.keyForIndex) != fn (usize) T.Key) @compileError("Indexer.keyForIndex must be a fn (usize) Key.");
- }
+test "pure EnumSet fns" {
+ const Suit = enum { spades, hearts, clubs, diamonds };
+
+ const empty = EnumSet(Suit).initEmpty();
+ const full = EnumSet(Suit).initFull();
+ const black = EnumSet(Suit).initMany(&[_]Suit{ .spades, .clubs });
+ const red = EnumSet(Suit).initMany(&[_]Suit{ .hearts, .diamonds });
+
+ try testing.expect(empty.eql(empty));
+ try testing.expect(full.eql(full));
+ try testing.expect(!empty.eql(full));
+ try testing.expect(!full.eql(empty));
+ try testing.expect(!empty.eql(black));
+ try testing.expect(!full.eql(red));
+ try testing.expect(!red.eql(empty));
+ try testing.expect(!black.eql(full));
+
+ try testing.expect(empty.subsetOf(empty));
+ try testing.expect(empty.subsetOf(full));
+ try testing.expect(full.subsetOf(full));
+ try testing.expect(!black.subsetOf(red));
+ try testing.expect(!red.subsetOf(black));
+
+ try testing.expect(full.supersetOf(full));
+ try testing.expect(full.supersetOf(empty));
+ try testing.expect(empty.supersetOf(empty));
+ try testing.expect(!black.supersetOf(red));
+ try testing.expect(!red.supersetOf(black));
+
+ try testing.expect(empty.complement().eql(full));
+ try testing.expect(full.complement().eql(empty));
+ try testing.expect(black.complement().eql(red));
+ try testing.expect(red.complement().eql(black));
+
+ try testing.expect(empty.unionWith(empty).eql(empty));
+ try testing.expect(empty.unionWith(full).eql(full));
+ try testing.expect(full.unionWith(full).eql(full));
+ try testing.expect(full.unionWith(empty).eql(full));
+ try testing.expect(black.unionWith(red).eql(full));
+ try testing.expect(red.unionWith(black).eql(full));
+
+ try testing.expect(empty.intersectWith(empty).eql(empty));
+ try testing.expect(empty.intersectWith(full).eql(empty));
+ try testing.expect(full.intersectWith(full).eql(full));
+ try testing.expect(full.intersectWith(empty).eql(empty));
+ try testing.expect(black.intersectWith(red).eql(empty));
+ try testing.expect(red.intersectWith(black).eql(empty));
+
+ try testing.expect(empty.xorWith(empty).eql(empty));
+ try testing.expect(empty.xorWith(full).eql(full));
+ try testing.expect(full.xorWith(full).eql(empty));
+ try testing.expect(full.xorWith(empty).eql(full));
+ try testing.expect(black.xorWith(red).eql(full));
+ try testing.expect(red.xorWith(black).eql(full));
+
+ try testing.expect(empty.differenceWith(empty).eql(empty));
+ try testing.expect(empty.differenceWith(full).eql(empty));
+ try testing.expect(full.differenceWith(full).eql(empty));
+ try testing.expect(full.differenceWith(empty).eql(full));
+ try testing.expect(full.differenceWith(red).eql(black));
+ try testing.expect(full.differenceWith(black).eql(red));
}
-test ensureIndexer {
- ensureIndexer(struct {
- pub const Key = u32;
- pub const count: comptime_int = 8;
- pub fn indexOf(k: Key) usize {
- return @as(usize, @intCast(k));
- }
- pub fn keyForIndex(index: usize) Key {
- return @as(Key, @intCast(index));
- }
- });
+test "EnumSet empty" {
+ const E = enum {};
+ const empty = EnumSet(E).initEmpty();
+ const full = EnumSet(E).initFull();
+
+ try std.testing.expect(empty.eql(full));
+ try std.testing.expect(empty.complement().eql(full));
+ try std.testing.expect(empty.complement().eql(full.complement()));
+ try std.testing.expect(empty.eql(full.complement()));
+}
+
+test "EnumSet const iterator" {
+ const Direction = enum { up, down, left, right };
+ const diag_move = init: {
+ var move = EnumSet(Direction).initEmpty();
+ move.insert(.right);
+ move.insert(.up);
+ break :init move;
+ };
+
+ var result = EnumSet(Direction).initEmpty();
+ var it = diag_move.iterator();
+ while (it.next()) |dir| {
+ result.insert(dir);
+ }
+
+ try testing.expect(result.eql(diag_move));
}
pub fn EnumIndexer(comptime E: type) type {
@@ -1438,7 +1340,6 @@ test "EnumIndexer non-exhaustive" {
_,
};
const Indexer = EnumIndexer(E);
- ensureIndexer(Indexer);
const min_tag: E = @enumFromInt(std.math.minInt(BackingInt));
const max_tag: E = @enumFromInt(std.math.maxInt(BackingInt));
@@ -1466,7 +1367,6 @@ test "EnumIndexer non-exhaustive" {
test "EnumIndexer dense zeroed" {
const E = enum(u2) { b = 1, a = 0, c = 2 };
const Indexer = EnumIndexer(E);
- ensureIndexer(Indexer);
try testing.expectEqual(E, Indexer.Key);
try testing.expectEqual(3, Indexer.count);
@@ -1482,7 +1382,6 @@ test "EnumIndexer dense zeroed" {
test "EnumIndexer dense positive" {
const E = enum(u4) { c = 6, a = 4, b = 5 };
const Indexer = EnumIndexer(E);
- ensureIndexer(Indexer);
try testing.expectEqual(E, Indexer.Key);
try testing.expectEqual(3, Indexer.count);
@@ -1498,7 +1397,6 @@ test "EnumIndexer dense positive" {
test "EnumIndexer dense negative" {
const E = enum(i4) { a = -6, c = -4, b = -5 };
const Indexer = EnumIndexer(E);
- ensureIndexer(Indexer);
try testing.expectEqual(E, Indexer.Key);
try testing.expectEqual(3, Indexer.count);
@@ -1514,7 +1412,6 @@ test "EnumIndexer dense negative" {
test "EnumIndexer sparse" {
const E = enum(i4) { a = -2, c = 6, b = 4 };
const Indexer = EnumIndexer(E);
- ensureIndexer(Indexer);
try testing.expectEqual(E, Indexer.Key);
try testing.expectEqual(3, Indexer.count);
@@ -1530,7 +1427,6 @@ test "EnumIndexer sparse" {
test "EnumIndexer empty" {
const E = enum {};
const Indexer = EnumIndexer(E);
- ensureIndexer(Indexer);
try testing.expectEqual(E, Indexer.Key);
try testing.expectEqual(0, Indexer.count);
}
diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig
@@ -383,25 +383,24 @@ pub const O = switch (native_arch) {
else => @compileError("missing std.os.linux.O constants for this architecture"),
};
-pub usingnamespace @import("linux/io_uring.zig");
-
/// Set by startup code, used by `getauxval`.
pub var elf_aux_maybe: ?[*]std.elf.Auxv = null;
-pub usingnamespace if (switch (builtin.zig_backend) {
+const extern_getauxval = switch (builtin.zig_backend) {
// Calling extern functions is not yet supported with these backends
.stage2_aarch64, .stage2_arm, .stage2_riscv64, .stage2_sparc64 => false,
else => !builtin.link_libc,
-}) struct {
- /// See `std.elf` for the constants.
- /// This matches the libc getauxval function.
- pub extern fn getauxval(index: usize) usize;
- comptime {
+};
+
+comptime {
+ if (extern_getauxval) {
@export(getauxvalImpl, .{ .name = "getauxval", .linkage = .Weak });
}
-} else struct {
- pub const getauxval = getauxvalImpl;
-};
+}
+
+pub const getauxval = if (extern_getauxval) struct {
+ extern fn getauxval(index: usize) usize;
+}.getauxval else getauxvalImpl;
fn getauxvalImpl(index: usize) callconv(.C) usize {
const auxv = elf_aux_maybe orelse return 0;
@@ -2823,284 +2822,282 @@ pub const AF = struct {
pub const MAX = PF.MAX;
};
-pub const SO = struct {
- pub usingnamespace if (is_mips) struct {
- pub const DEBUG = 1;
- pub const REUSEADDR = 0x0004;
- pub const KEEPALIVE = 0x0008;
- pub const DONTROUTE = 0x0010;
- pub const BROADCAST = 0x0020;
- pub const LINGER = 0x0080;
- pub const OOBINLINE = 0x0100;
- pub const REUSEPORT = 0x0200;
- pub const SNDBUF = 0x1001;
- pub const RCVBUF = 0x1002;
- pub const SNDLOWAT = 0x1003;
- pub const RCVLOWAT = 0x1004;
- pub const RCVTIMEO = 0x1006;
- pub const SNDTIMEO = 0x1005;
- pub const ERROR = 0x1007;
- pub const TYPE = 0x1008;
- pub const ACCEPTCONN = 0x1009;
- pub const PROTOCOL = 0x1028;
- pub const DOMAIN = 0x1029;
- pub const NO_CHECK = 11;
- pub const PRIORITY = 12;
- pub const BSDCOMPAT = 14;
- pub const PASSCRED = 17;
- pub const PEERCRED = 18;
- pub const PEERSEC = 30;
- pub const SNDBUFFORCE = 31;
- pub const RCVBUFFORCE = 33;
- pub const SECURITY_AUTHENTICATION = 22;
- pub const SECURITY_ENCRYPTION_TRANSPORT = 23;
- pub const SECURITY_ENCRYPTION_NETWORK = 24;
- pub const BINDTODEVICE = 25;
- pub const ATTACH_FILTER = 26;
- pub const DETACH_FILTER = 27;
- pub const GET_FILTER = ATTACH_FILTER;
- pub const PEERNAME = 28;
- pub const TIMESTAMP_OLD = 29;
- pub const PASSSEC = 34;
- pub const TIMESTAMPNS_OLD = 35;
- pub const MARK = 36;
- pub const TIMESTAMPING_OLD = 37;
- pub const RXQ_OVFL = 40;
- pub const WIFI_STATUS = 41;
- pub const PEEK_OFF = 42;
- pub const NOFCS = 43;
- pub const LOCK_FILTER = 44;
- pub const SELECT_ERR_QUEUE = 45;
- pub const BUSY_POLL = 46;
- pub const MAX_PACING_RATE = 47;
- pub const BPF_EXTENSIONS = 48;
- pub const INCOMING_CPU = 49;
- pub const ATTACH_BPF = 50;
- pub const DETACH_BPF = DETACH_FILTER;
- pub const ATTACH_REUSEPORT_CBPF = 51;
- pub const ATTACH_REUSEPORT_EBPF = 52;
- pub const CNX_ADVICE = 53;
- pub const MEMINFO = 55;
- pub const INCOMING_NAPI_ID = 56;
- pub const COOKIE = 57;
- pub const PEERGROUPS = 59;
- pub const ZEROCOPY = 60;
- pub const TXTIME = 61;
- pub const BINDTOIFINDEX = 62;
- pub const TIMESTAMP_NEW = 63;
- pub const TIMESTAMPNS_NEW = 64;
- pub const TIMESTAMPING_NEW = 65;
- pub const RCVTIMEO_NEW = 66;
- pub const SNDTIMEO_NEW = 67;
- pub const DETACH_REUSEPORT_BPF = 68;
- } else if (is_ppc or is_ppc64) struct {
- pub const DEBUG = 1;
- pub const REUSEADDR = 2;
- pub const TYPE = 3;
- pub const ERROR = 4;
- pub const DONTROUTE = 5;
- pub const BROADCAST = 6;
- pub const SNDBUF = 7;
- pub const RCVBUF = 8;
- pub const KEEPALIVE = 9;
- pub const OOBINLINE = 10;
- pub const NO_CHECK = 11;
- pub const PRIORITY = 12;
- pub const LINGER = 13;
- pub const BSDCOMPAT = 14;
- pub const REUSEPORT = 15;
- pub const RCVLOWAT = 16;
- pub const SNDLOWAT = 17;
- pub const RCVTIMEO = 18;
- pub const SNDTIMEO = 19;
- pub const PASSCRED = 20;
- pub const PEERCRED = 21;
- pub const ACCEPTCONN = 30;
- pub const PEERSEC = 31;
- pub const SNDBUFFORCE = 32;
- pub const RCVBUFFORCE = 33;
- pub const PROTOCOL = 38;
- pub const DOMAIN = 39;
- pub const SECURITY_AUTHENTICATION = 22;
- pub const SECURITY_ENCRYPTION_TRANSPORT = 23;
- pub const SECURITY_ENCRYPTION_NETWORK = 24;
- pub const BINDTODEVICE = 25;
- pub const ATTACH_FILTER = 26;
- pub const DETACH_FILTER = 27;
- pub const GET_FILTER = ATTACH_FILTER;
- pub const PEERNAME = 28;
- pub const TIMESTAMP_OLD = 29;
- pub const PASSSEC = 34;
- pub const TIMESTAMPNS_OLD = 35;
- pub const MARK = 36;
- pub const TIMESTAMPING_OLD = 37;
- pub const RXQ_OVFL = 40;
- pub const WIFI_STATUS = 41;
- pub const PEEK_OFF = 42;
- pub const NOFCS = 43;
- pub const LOCK_FILTER = 44;
- pub const SELECT_ERR_QUEUE = 45;
- pub const BUSY_POLL = 46;
- pub const MAX_PACING_RATE = 47;
- pub const BPF_EXTENSIONS = 48;
- pub const INCOMING_CPU = 49;
- pub const ATTACH_BPF = 50;
- pub const DETACH_BPF = DETACH_FILTER;
- pub const ATTACH_REUSEPORT_CBPF = 51;
- pub const ATTACH_REUSEPORT_EBPF = 52;
- pub const CNX_ADVICE = 53;
- pub const MEMINFO = 55;
- pub const INCOMING_NAPI_ID = 56;
- pub const COOKIE = 57;
- pub const PEERGROUPS = 59;
- pub const ZEROCOPY = 60;
- pub const TXTIME = 61;
- pub const BINDTOIFINDEX = 62;
- pub const TIMESTAMP_NEW = 63;
- pub const TIMESTAMPNS_NEW = 64;
- pub const TIMESTAMPING_NEW = 65;
- pub const RCVTIMEO_NEW = 66;
- pub const SNDTIMEO_NEW = 67;
- pub const DETACH_REUSEPORT_BPF = 68;
- } else if (is_sparc) struct {
- pub const DEBUG = 1;
- pub const REUSEADDR = 4;
- pub const TYPE = 4104;
- pub const ERROR = 4103;
- pub const DONTROUTE = 16;
- pub const BROADCAST = 32;
- pub const SNDBUF = 4097;
- pub const RCVBUF = 4098;
- pub const KEEPALIVE = 8;
- pub const OOBINLINE = 256;
- pub const NO_CHECK = 11;
- pub const PRIORITY = 12;
- pub const LINGER = 128;
- pub const BSDCOMPAT = 1024;
- pub const REUSEPORT = 512;
- pub const PASSCRED = 2;
- pub const PEERCRED = 64;
- pub const RCVLOWAT = 2048;
- pub const SNDLOWAT = 4096;
- pub const RCVTIMEO = 8192;
- pub const SNDTIMEO = 16384;
- pub const ACCEPTCONN = 32768;
- pub const PEERSEC = 30;
- pub const SNDBUFFORCE = 4106;
- pub const RCVBUFFORCE = 4107;
- pub const PROTOCOL = 4136;
- pub const DOMAIN = 4137;
- pub const SECURITY_AUTHENTICATION = 20481;
- pub const SECURITY_ENCRYPTION_TRANSPORT = 20482;
- pub const SECURITY_ENCRYPTION_NETWORK = 20484;
- pub const BINDTODEVICE = 13;
- pub const ATTACH_FILTER = 26;
- pub const DETACH_FILTER = 27;
- pub const GET_FILTER = 26;
- pub const PEERNAME = 28;
- pub const TIMESTAMP_OLD = 29;
- pub const PASSSEC = 31;
- pub const TIMESTAMPNS_OLD = 33;
- pub const MARK = 34;
- pub const TIMESTAMPING_OLD = 35;
- pub const RXQ_OVFL = 36;
- pub const WIFI_STATUS = 37;
- pub const PEEK_OFF = 38;
- pub const NOFCS = 39;
- pub const LOCK_FILTER = 40;
- pub const SELECT_ERR_QUEUE = 41;
- pub const BUSY_POLL = 48;
- pub const MAX_PACING_RATE = 49;
- pub const BPF_EXTENSIONS = 50;
- pub const INCOMING_CPU = 51;
- pub const ATTACH_BPF = 52;
- pub const DETACH_BPF = 27;
- pub const ATTACH_REUSEPORT_CBPF = 53;
- pub const ATTACH_REUSEPORT_EBPF = 54;
- pub const CNX_ADVICE = 55;
- pub const MEMINFO = 57;
- pub const INCOMING_NAPI_ID = 58;
- pub const COOKIE = 59;
- pub const PEERGROUPS = 61;
- pub const ZEROCOPY = 62;
- pub const TXTIME = 63;
- pub const BINDTOIFINDEX = 65;
- pub const TIMESTAMP_NEW = 70;
- pub const TIMESTAMPNS_NEW = 66;
- pub const TIMESTAMPING_NEW = 67;
- pub const RCVTIMEO_NEW = 68;
- pub const SNDTIMEO_NEW = 69;
- pub const DETACH_REUSEPORT_BPF = 71;
- } else struct {
- pub const DEBUG = 1;
- pub const REUSEADDR = 2;
- pub const TYPE = 3;
- pub const ERROR = 4;
- pub const DONTROUTE = 5;
- pub const BROADCAST = 6;
- pub const SNDBUF = 7;
- pub const RCVBUF = 8;
- pub const KEEPALIVE = 9;
- pub const OOBINLINE = 10;
- pub const NO_CHECK = 11;
- pub const PRIORITY = 12;
- pub const LINGER = 13;
- pub const BSDCOMPAT = 14;
- pub const REUSEPORT = 15;
- pub const PASSCRED = 16;
- pub const PEERCRED = 17;
- pub const RCVLOWAT = 18;
- pub const SNDLOWAT = 19;
- pub const RCVTIMEO = 20;
- pub const SNDTIMEO = 21;
- pub const ACCEPTCONN = 30;
- pub const PEERSEC = 31;
- pub const SNDBUFFORCE = 32;
- pub const RCVBUFFORCE = 33;
- pub const PROTOCOL = 38;
- pub const DOMAIN = 39;
- pub const SECURITY_AUTHENTICATION = 22;
- pub const SECURITY_ENCRYPTION_TRANSPORT = 23;
- pub const SECURITY_ENCRYPTION_NETWORK = 24;
- pub const BINDTODEVICE = 25;
- pub const ATTACH_FILTER = 26;
- pub const DETACH_FILTER = 27;
- pub const GET_FILTER = ATTACH_FILTER;
- pub const PEERNAME = 28;
- pub const TIMESTAMP_OLD = 29;
- pub const PASSSEC = 34;
- pub const TIMESTAMPNS_OLD = 35;
- pub const MARK = 36;
- pub const TIMESTAMPING_OLD = 37;
- pub const RXQ_OVFL = 40;
- pub const WIFI_STATUS = 41;
- pub const PEEK_OFF = 42;
- pub const NOFCS = 43;
- pub const LOCK_FILTER = 44;
- pub const SELECT_ERR_QUEUE = 45;
- pub const BUSY_POLL = 46;
- pub const MAX_PACING_RATE = 47;
- pub const BPF_EXTENSIONS = 48;
- pub const INCOMING_CPU = 49;
- pub const ATTACH_BPF = 50;
- pub const DETACH_BPF = DETACH_FILTER;
- pub const ATTACH_REUSEPORT_CBPF = 51;
- pub const ATTACH_REUSEPORT_EBPF = 52;
- pub const CNX_ADVICE = 53;
- pub const MEMINFO = 55;
- pub const INCOMING_NAPI_ID = 56;
- pub const COOKIE = 57;
- pub const PEERGROUPS = 59;
- pub const ZEROCOPY = 60;
- pub const TXTIME = 61;
- pub const BINDTOIFINDEX = 62;
- pub const TIMESTAMP_NEW = 63;
- pub const TIMESTAMPNS_NEW = 64;
- pub const TIMESTAMPING_NEW = 65;
- pub const RCVTIMEO_NEW = 66;
- pub const SNDTIMEO_NEW = 67;
- pub const DETACH_REUSEPORT_BPF = 68;
- };
+pub const SO = if (is_mips) struct {
+ pub const DEBUG = 1;
+ pub const REUSEADDR = 0x0004;
+ pub const KEEPALIVE = 0x0008;
+ pub const DONTROUTE = 0x0010;
+ pub const BROADCAST = 0x0020;
+ pub const LINGER = 0x0080;
+ pub const OOBINLINE = 0x0100;
+ pub const REUSEPORT = 0x0200;
+ pub const SNDBUF = 0x1001;
+ pub const RCVBUF = 0x1002;
+ pub const SNDLOWAT = 0x1003;
+ pub const RCVLOWAT = 0x1004;
+ pub const RCVTIMEO = 0x1006;
+ pub const SNDTIMEO = 0x1005;
+ pub const ERROR = 0x1007;
+ pub const TYPE = 0x1008;
+ pub const ACCEPTCONN = 0x1009;
+ pub const PROTOCOL = 0x1028;
+ pub const DOMAIN = 0x1029;
+ pub const NO_CHECK = 11;
+ pub const PRIORITY = 12;
+ pub const BSDCOMPAT = 14;
+ pub const PASSCRED = 17;
+ pub const PEERCRED = 18;
+ pub const PEERSEC = 30;
+ pub const SNDBUFFORCE = 31;
+ pub const RCVBUFFORCE = 33;
+ pub const SECURITY_AUTHENTICATION = 22;
+ pub const SECURITY_ENCRYPTION_TRANSPORT = 23;
+ pub const SECURITY_ENCRYPTION_NETWORK = 24;
+ pub const BINDTODEVICE = 25;
+ pub const ATTACH_FILTER = 26;
+ pub const DETACH_FILTER = 27;
+ pub const GET_FILTER = ATTACH_FILTER;
+ pub const PEERNAME = 28;
+ pub const TIMESTAMP_OLD = 29;
+ pub const PASSSEC = 34;
+ pub const TIMESTAMPNS_OLD = 35;
+ pub const MARK = 36;
+ pub const TIMESTAMPING_OLD = 37;
+ pub const RXQ_OVFL = 40;
+ pub const WIFI_STATUS = 41;
+ pub const PEEK_OFF = 42;
+ pub const NOFCS = 43;
+ pub const LOCK_FILTER = 44;
+ pub const SELECT_ERR_QUEUE = 45;
+ pub const BUSY_POLL = 46;
+ pub const MAX_PACING_RATE = 47;
+ pub const BPF_EXTENSIONS = 48;
+ pub const INCOMING_CPU = 49;
+ pub const ATTACH_BPF = 50;
+ pub const DETACH_BPF = DETACH_FILTER;
+ pub const ATTACH_REUSEPORT_CBPF = 51;
+ pub const ATTACH_REUSEPORT_EBPF = 52;
+ pub const CNX_ADVICE = 53;
+ pub const MEMINFO = 55;
+ pub const INCOMING_NAPI_ID = 56;
+ pub const COOKIE = 57;
+ pub const PEERGROUPS = 59;
+ pub const ZEROCOPY = 60;
+ pub const TXTIME = 61;
+ pub const BINDTOIFINDEX = 62;
+ pub const TIMESTAMP_NEW = 63;
+ pub const TIMESTAMPNS_NEW = 64;
+ pub const TIMESTAMPING_NEW = 65;
+ pub const RCVTIMEO_NEW = 66;
+ pub const SNDTIMEO_NEW = 67;
+ pub const DETACH_REUSEPORT_BPF = 68;
+} else if (is_ppc or is_ppc64) struct {
+ pub const DEBUG = 1;
+ pub const REUSEADDR = 2;
+ pub const TYPE = 3;
+ pub const ERROR = 4;
+ pub const DONTROUTE = 5;
+ pub const BROADCAST = 6;
+ pub const SNDBUF = 7;
+ pub const RCVBUF = 8;
+ pub const KEEPALIVE = 9;
+ pub const OOBINLINE = 10;
+ pub const NO_CHECK = 11;
+ pub const PRIORITY = 12;
+ pub const LINGER = 13;
+ pub const BSDCOMPAT = 14;
+ pub const REUSEPORT = 15;
+ pub const RCVLOWAT = 16;
+ pub const SNDLOWAT = 17;
+ pub const RCVTIMEO = 18;
+ pub const SNDTIMEO = 19;
+ pub const PASSCRED = 20;
+ pub const PEERCRED = 21;
+ pub const ACCEPTCONN = 30;
+ pub const PEERSEC = 31;
+ pub const SNDBUFFORCE = 32;
+ pub const RCVBUFFORCE = 33;
+ pub const PROTOCOL = 38;
+ pub const DOMAIN = 39;
+ pub const SECURITY_AUTHENTICATION = 22;
+ pub const SECURITY_ENCRYPTION_TRANSPORT = 23;
+ pub const SECURITY_ENCRYPTION_NETWORK = 24;
+ pub const BINDTODEVICE = 25;
+ pub const ATTACH_FILTER = 26;
+ pub const DETACH_FILTER = 27;
+ pub const GET_FILTER = ATTACH_FILTER;
+ pub const PEERNAME = 28;
+ pub const TIMESTAMP_OLD = 29;
+ pub const PASSSEC = 34;
+ pub const TIMESTAMPNS_OLD = 35;
+ pub const MARK = 36;
+ pub const TIMESTAMPING_OLD = 37;
+ pub const RXQ_OVFL = 40;
+ pub const WIFI_STATUS = 41;
+ pub const PEEK_OFF = 42;
+ pub const NOFCS = 43;
+ pub const LOCK_FILTER = 44;
+ pub const SELECT_ERR_QUEUE = 45;
+ pub const BUSY_POLL = 46;
+ pub const MAX_PACING_RATE = 47;
+ pub const BPF_EXTENSIONS = 48;
+ pub const INCOMING_CPU = 49;
+ pub const ATTACH_BPF = 50;
+ pub const DETACH_BPF = DETACH_FILTER;
+ pub const ATTACH_REUSEPORT_CBPF = 51;
+ pub const ATTACH_REUSEPORT_EBPF = 52;
+ pub const CNX_ADVICE = 53;
+ pub const MEMINFO = 55;
+ pub const INCOMING_NAPI_ID = 56;
+ pub const COOKIE = 57;
+ pub const PEERGROUPS = 59;
+ pub const ZEROCOPY = 60;
+ pub const TXTIME = 61;
+ pub const BINDTOIFINDEX = 62;
+ pub const TIMESTAMP_NEW = 63;
+ pub const TIMESTAMPNS_NEW = 64;
+ pub const TIMESTAMPING_NEW = 65;
+ pub const RCVTIMEO_NEW = 66;
+ pub const SNDTIMEO_NEW = 67;
+ pub const DETACH_REUSEPORT_BPF = 68;
+} else if (is_sparc) struct {
+ pub const DEBUG = 1;
+ pub const REUSEADDR = 4;
+ pub const TYPE = 4104;
+ pub const ERROR = 4103;
+ pub const DONTROUTE = 16;
+ pub const BROADCAST = 32;
+ pub const SNDBUF = 4097;
+ pub const RCVBUF = 4098;
+ pub const KEEPALIVE = 8;
+ pub const OOBINLINE = 256;
+ pub const NO_CHECK = 11;
+ pub const PRIORITY = 12;
+ pub const LINGER = 128;
+ pub const BSDCOMPAT = 1024;
+ pub const REUSEPORT = 512;
+ pub const PASSCRED = 2;
+ pub const PEERCRED = 64;
+ pub const RCVLOWAT = 2048;
+ pub const SNDLOWAT = 4096;
+ pub const RCVTIMEO = 8192;
+ pub const SNDTIMEO = 16384;
+ pub const ACCEPTCONN = 32768;
+ pub const PEERSEC = 30;
+ pub const SNDBUFFORCE = 4106;
+ pub const RCVBUFFORCE = 4107;
+ pub const PROTOCOL = 4136;
+ pub const DOMAIN = 4137;
+ pub const SECURITY_AUTHENTICATION = 20481;
+ pub const SECURITY_ENCRYPTION_TRANSPORT = 20482;
+ pub const SECURITY_ENCRYPTION_NETWORK = 20484;
+ pub const BINDTODEVICE = 13;
+ pub const ATTACH_FILTER = 26;
+ pub const DETACH_FILTER = 27;
+ pub const GET_FILTER = 26;
+ pub const PEERNAME = 28;
+ pub const TIMESTAMP_OLD = 29;
+ pub const PASSSEC = 31;
+ pub const TIMESTAMPNS_OLD = 33;
+ pub const MARK = 34;
+ pub const TIMESTAMPING_OLD = 35;
+ pub const RXQ_OVFL = 36;
+ pub const WIFI_STATUS = 37;
+ pub const PEEK_OFF = 38;
+ pub const NOFCS = 39;
+ pub const LOCK_FILTER = 40;
+ pub const SELECT_ERR_QUEUE = 41;
+ pub const BUSY_POLL = 48;
+ pub const MAX_PACING_RATE = 49;
+ pub const BPF_EXTENSIONS = 50;
+ pub const INCOMING_CPU = 51;
+ pub const ATTACH_BPF = 52;
+ pub const DETACH_BPF = 27;
+ pub const ATTACH_REUSEPORT_CBPF = 53;
+ pub const ATTACH_REUSEPORT_EBPF = 54;
+ pub const CNX_ADVICE = 55;
+ pub const MEMINFO = 57;
+ pub const INCOMING_NAPI_ID = 58;
+ pub const COOKIE = 59;
+ pub const PEERGROUPS = 61;
+ pub const ZEROCOPY = 62;
+ pub const TXTIME = 63;
+ pub const BINDTOIFINDEX = 65;
+ pub const TIMESTAMP_NEW = 70;
+ pub const TIMESTAMPNS_NEW = 66;
+ pub const TIMESTAMPING_NEW = 67;
+ pub const RCVTIMEO_NEW = 68;
+ pub const SNDTIMEO_NEW = 69;
+ pub const DETACH_REUSEPORT_BPF = 71;
+} else struct {
+ pub const DEBUG = 1;
+ pub const REUSEADDR = 2;
+ pub const TYPE = 3;
+ pub const ERROR = 4;
+ pub const DONTROUTE = 5;
+ pub const BROADCAST = 6;
+ pub const SNDBUF = 7;
+ pub const RCVBUF = 8;
+ pub const KEEPALIVE = 9;
+ pub const OOBINLINE = 10;
+ pub const NO_CHECK = 11;
+ pub const PRIORITY = 12;
+ pub const LINGER = 13;
+ pub const BSDCOMPAT = 14;
+ pub const REUSEPORT = 15;
+ pub const PASSCRED = 16;
+ pub const PEERCRED = 17;
+ pub const RCVLOWAT = 18;
+ pub const SNDLOWAT = 19;
+ pub const RCVTIMEO = 20;
+ pub const SNDTIMEO = 21;
+ pub const ACCEPTCONN = 30;
+ pub const PEERSEC = 31;
+ pub const SNDBUFFORCE = 32;
+ pub const RCVBUFFORCE = 33;
+ pub const PROTOCOL = 38;
+ pub const DOMAIN = 39;
+ pub const SECURITY_AUTHENTICATION = 22;
+ pub const SECURITY_ENCRYPTION_TRANSPORT = 23;
+ pub const SECURITY_ENCRYPTION_NETWORK = 24;
+ pub const BINDTODEVICE = 25;
+ pub const ATTACH_FILTER = 26;
+ pub const DETACH_FILTER = 27;
+ pub const GET_FILTER = ATTACH_FILTER;
+ pub const PEERNAME = 28;
+ pub const TIMESTAMP_OLD = 29;
+ pub const PASSSEC = 34;
+ pub const TIMESTAMPNS_OLD = 35;
+ pub const MARK = 36;
+ pub const TIMESTAMPING_OLD = 37;
+ pub const RXQ_OVFL = 40;
+ pub const WIFI_STATUS = 41;
+ pub const PEEK_OFF = 42;
+ pub const NOFCS = 43;
+ pub const LOCK_FILTER = 44;
+ pub const SELECT_ERR_QUEUE = 45;
+ pub const BUSY_POLL = 46;
+ pub const MAX_PACING_RATE = 47;
+ pub const BPF_EXTENSIONS = 48;
+ pub const INCOMING_CPU = 49;
+ pub const ATTACH_BPF = 50;
+ pub const DETACH_BPF = DETACH_FILTER;
+ pub const ATTACH_REUSEPORT_CBPF = 51;
+ pub const ATTACH_REUSEPORT_EBPF = 52;
+ pub const CNX_ADVICE = 53;
+ pub const MEMINFO = 55;
+ pub const INCOMING_NAPI_ID = 56;
+ pub const COOKIE = 57;
+ pub const PEERGROUPS = 59;
+ pub const ZEROCOPY = 60;
+ pub const TXTIME = 61;
+ pub const BINDTOIFINDEX = 62;
+ pub const TIMESTAMP_NEW = 63;
+ pub const TIMESTAMPNS_NEW = 64;
+ pub const TIMESTAMPING_NEW = 65;
+ pub const RCVTIMEO_NEW = 66;
+ pub const SNDTIMEO_NEW = 67;
+ pub const DETACH_REUSEPORT_BPF = 68;
};
pub const SCM = struct {
@@ -4189,22 +4186,9 @@ pub const IORING_SETUP_SINGLE_ISSUER = 1 << 12;
pub const IORING_SETUP_DEFER_TASKRUN = 1 << 13;
/// IO submission data structure (Submission Queue Entry)
-pub const io_uring_sqe = extern struct {
- opcode: IORING_OP,
- flags: u8,
- ioprio: u16,
- fd: i32,
- off: u64,
- addr: u64,
- len: u32,
- rw_flags: u32,
- user_data: u64,
- buf_index: u16,
- personality: u16,
- splice_fd_in: i32,
- addr3: u64,
- resv: u64,
-};
+pub const io_uring_sqe = @import("linux/io_uring_sqe.zig").io_uring_sqe;
+
+pub const IoUring = @import("linux/IoUring.zig");
/// If sqe->file_index is set to this for opcodes that instantiate a new
/// direct descriptor (like openat/openat2/accept), then io_uring will allocate
diff --git a/lib/std/os/linux/IoUring.zig b/lib/std/os/linux/IoUring.zig
@@ -0,0 +1,3670 @@
+const IoUring = @This();
+const std = @import("../../std.zig");
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+const mem = std.mem;
+const net = std.net;
+const os = std.os;
+const posix = std.posix;
+const linux = os.linux;
+const testing = std.testing;
+
+fd: os.fd_t = -1,
+sq: SubmissionQueue,
+cq: CompletionQueue,
+flags: u32,
+features: u32,
+
+/// A friendly way to setup an io_uring, with default linux.io_uring_params.
+/// `entries` must be a power of two between 1 and 32768, although the kernel will make the final
+/// call on how many entries the submission and completion queues will ultimately have,
+/// see https://github.com/torvalds/linux/blob/v5.8/fs/io_uring.c#L8027-L8050.
+/// Matches the interface of io_uring_queue_init() in liburing.
+pub fn init(entries: u16, flags: u32) !IoUring {
+ var params = mem.zeroInit(linux.io_uring_params, .{
+ .flags = flags,
+ .sq_thread_idle = 1000,
+ });
+ return try IoUring.init_params(entries, ¶ms);
+}
+
+/// A powerful way to setup an io_uring, if you want to tweak linux.io_uring_params such as submission
+/// queue thread cpu affinity or thread idle timeout (the kernel and our default is 1 second).
+/// `params` is passed by reference because the kernel needs to modify the parameters.
+/// Matches the interface of io_uring_queue_init_params() in liburing.
+pub fn init_params(entries: u16, p: *linux.io_uring_params) !IoUring {
+ if (entries == 0) return error.EntriesZero;
+ if (!std.math.isPowerOfTwo(entries)) return error.EntriesNotPowerOfTwo;
+
+ assert(p.sq_entries == 0);
+ assert(p.cq_entries == 0 or p.flags & linux.IORING_SETUP_CQSIZE != 0);
+ assert(p.features == 0);
+ assert(p.wq_fd == 0 or p.flags & linux.IORING_SETUP_ATTACH_WQ != 0);
+ assert(p.resv[0] == 0);
+ assert(p.resv[1] == 0);
+ assert(p.resv[2] == 0);
+
+ const res = linux.io_uring_setup(entries, p);
+ switch (linux.getErrno(res)) {
+ .SUCCESS => {},
+ .FAULT => return error.ParamsOutsideAccessibleAddressSpace,
+ // The resv array contains non-zero data, p.flags contains an unsupported flag,
+ // entries out of bounds, IORING_SETUP_SQ_AFF was specified without IORING_SETUP_SQPOLL,
+ // or IORING_SETUP_CQSIZE was specified but linux.io_uring_params.cq_entries was invalid:
+ .INVAL => return error.ArgumentsInvalid,
+ .MFILE => return error.ProcessFdQuotaExceeded,
+ .NFILE => return error.SystemFdQuotaExceeded,
+ .NOMEM => return error.SystemResources,
+ // IORING_SETUP_SQPOLL was specified but effective user ID lacks sufficient privileges,
+ // or a container seccomp policy prohibits io_uring syscalls:
+ .PERM => return error.PermissionDenied,
+ .NOSYS => return error.SystemOutdated,
+ else => |errno| return os.unexpectedErrno(errno),
+ }
+ const fd = @as(os.fd_t, @intCast(res));
+ assert(fd >= 0);
+ errdefer os.close(fd);
+
+ // Kernel versions 5.4 and up use only one mmap() for the submission and completion queues.
+ // This is not an optional feature for us... if the kernel does it, we have to do it.
+ // The thinking on this by the kernel developers was that both the submission and the
+ // completion queue rings have sizes just over a power of two, but the submission queue ring
+ // is significantly smaller with u32 slots. By bundling both in a single mmap, the kernel
+ // gets the submission queue ring for free.
+ // See https://patchwork.kernel.org/patch/11115257 for the kernel patch.
+ // We do not support the double mmap() done before 5.4, because we want to keep the
+ // init/deinit mmap paths simple and because io_uring has had many bug fixes even since 5.4.
+ if ((p.features & linux.IORING_FEAT_SINGLE_MMAP) == 0) {
+ return error.SystemOutdated;
+ }
+
+ // Check that the kernel has actually set params and that "impossible is nothing".
+ assert(p.sq_entries != 0);
+ assert(p.cq_entries != 0);
+ assert(p.cq_entries >= p.sq_entries);
+
+ // From here on, we only need to read from params, so pass `p` by value as immutable.
+ // The completion queue shares the mmap with the submission queue, so pass `sq` there too.
+ var sq = try SubmissionQueue.init(fd, p.*);
+ errdefer sq.deinit();
+ var cq = try CompletionQueue.init(fd, p.*, sq);
+ errdefer cq.deinit();
+
+ // Check that our starting state is as we expect.
+ assert(sq.head.* == 0);
+ assert(sq.tail.* == 0);
+ assert(sq.mask == p.sq_entries - 1);
+ // Allow flags.* to be non-zero, since the kernel may set IORING_SQ_NEED_WAKEUP at any time.
+ assert(sq.dropped.* == 0);
+ assert(sq.array.len == p.sq_entries);
+ assert(sq.sqes.len == p.sq_entries);
+ assert(sq.sqe_head == 0);
+ assert(sq.sqe_tail == 0);
+
+ assert(cq.head.* == 0);
+ assert(cq.tail.* == 0);
+ assert(cq.mask == p.cq_entries - 1);
+ assert(cq.overflow.* == 0);
+ assert(cq.cqes.len == p.cq_entries);
+
+ return IoUring{
+ .fd = fd,
+ .sq = sq,
+ .cq = cq,
+ .flags = p.flags,
+ .features = p.features,
+ };
+}
+
+pub fn deinit(self: *IoUring) void {
+ assert(self.fd >= 0);
+ // The mmaps depend on the fd, so the order of these calls is important:
+ self.cq.deinit();
+ self.sq.deinit();
+ os.close(self.fd);
+ self.fd = -1;
+}
+
+/// Returns a pointer to a vacant SQE, or an error if the submission queue is full.
+/// We follow the implementation (and atomics) of liburing's `io_uring_get_sqe()` exactly.
+/// However, instead of a null we return an error to force safe handling.
+/// Any situation where the submission queue is full tends more towards a control flow error,
+/// and the null return in liburing is more a C idiom than anything else, for lack of a better
+/// alternative. In Zig, we have first-class error handling... so let's use it.
+/// Matches the implementation of io_uring_get_sqe() in liburing.
+pub fn get_sqe(self: *IoUring) !*linux.io_uring_sqe {
+ const head = @atomicLoad(u32, self.sq.head, .Acquire);
+ // Remember that these head and tail offsets wrap around every four billion operations.
+ // We must therefore use wrapping addition and subtraction to avoid a runtime crash.
+ const next = self.sq.sqe_tail +% 1;
+ if (next -% head > self.sq.sqes.len) return error.SubmissionQueueFull;
+ const sqe = &self.sq.sqes[self.sq.sqe_tail & self.sq.mask];
+ self.sq.sqe_tail = next;
+ return sqe;
+}
+
+/// Submits the SQEs acquired via get_sqe() to the kernel. You can call this once after you have
+/// called get_sqe() multiple times to setup multiple I/O requests.
+/// Returns the number of SQEs submitted, if not used alongside IORING_SETUP_SQPOLL.
+/// If the io_uring instance is uses IORING_SETUP_SQPOLL, the value returned on success is not
+/// guaranteed to match the amount of actually submitted sqes during this call. A value higher
+/// or lower, including 0, may be returned.
+/// Matches the implementation of io_uring_submit() in liburing.
+pub fn submit(self: *IoUring) !u32 {
+ return self.submit_and_wait(0);
+}
+
+/// Like submit(), but allows waiting for events as well.
+/// Returns the number of SQEs submitted.
+/// Matches the implementation of io_uring_submit_and_wait() in liburing.
+pub fn submit_and_wait(self: *IoUring, wait_nr: u32) !u32 {
+ const submitted = self.flush_sq();
+ var flags: u32 = 0;
+ if (self.sq_ring_needs_enter(&flags) or wait_nr > 0) {
+ if (wait_nr > 0 or (self.flags & linux.IORING_SETUP_IOPOLL) != 0) {
+ flags |= linux.IORING_ENTER_GETEVENTS;
+ }
+ return try self.enter(submitted, wait_nr, flags);
+ }
+ return submitted;
+}
+
+/// Tell the kernel we have submitted SQEs and/or want to wait for CQEs.
+/// Returns the number of SQEs submitted.
+pub fn enter(self: *IoUring, to_submit: u32, min_complete: u32, flags: u32) !u32 {
+ assert(self.fd >= 0);
+ const res = linux.io_uring_enter(self.fd, to_submit, min_complete, flags, null);
+ switch (linux.getErrno(res)) {
+ .SUCCESS => {},
+ // The kernel was unable to allocate memory or ran out of resources for the request.
+ // The application should wait for some completions and try again:
+ .AGAIN => return error.SystemResources,
+ // The SQE `fd` is invalid, or IOSQE_FIXED_FILE was set but no files were registered:
+ .BADF => return error.FileDescriptorInvalid,
+ // The file descriptor is valid, but the ring is not in the right state.
+ // See io_uring_register(2) for how to enable the ring.
+ .BADFD => return error.FileDescriptorInBadState,
+ // The application attempted to overcommit the number of requests it can have pending.
+ // The application should wait for some completions and try again:
+ .BUSY => return error.CompletionQueueOvercommitted,
+ // The SQE is invalid, or valid but the ring was setup with IORING_SETUP_IOPOLL:
+ .INVAL => return error.SubmissionQueueEntryInvalid,
+ // The buffer is outside the process' accessible address space, or IORING_OP_READ_FIXED
+ // or IORING_OP_WRITE_FIXED was specified but no buffers were registered, or the range
+ // described by `addr` and `len` is not within the buffer registered at `buf_index`:
+ .FAULT => return error.BufferInvalid,
+ .NXIO => return error.RingShuttingDown,
+ // The kernel believes our `self.fd` does not refer to an io_uring instance,
+ // or the opcode is valid but not supported by this kernel (more likely):
+ .OPNOTSUPP => return error.OpcodeNotSupported,
+ // The operation was interrupted by a delivery of a signal before it could complete.
+ // This can happen while waiting for events with IORING_ENTER_GETEVENTS:
+ .INTR => return error.SignalInterrupt,
+ else => |errno| return os.unexpectedErrno(errno),
+ }
+ return @as(u32, @intCast(res));
+}
+
+/// Sync internal state with kernel ring state on the SQ side.
+/// Returns the number of all pending events in the SQ ring, for the shared ring.
+/// This return value includes previously flushed SQEs, as per liburing.
+/// The rationale is to suggest that an io_uring_enter() call is needed rather than not.
+/// Matches the implementation of __io_uring_flush_sq() in liburing.
+pub fn flush_sq(self: *IoUring) u32 {
+ if (self.sq.sqe_head != self.sq.sqe_tail) {
+ // Fill in SQEs that we have queued up, adding them to the kernel ring.
+ const to_submit = self.sq.sqe_tail -% self.sq.sqe_head;
+ var tail = self.sq.tail.*;
+ var i: usize = 0;
+ while (i < to_submit) : (i += 1) {
+ self.sq.array[tail & self.sq.mask] = self.sq.sqe_head & self.sq.mask;
+ tail +%= 1;
+ self.sq.sqe_head +%= 1;
+ }
+ // Ensure that the kernel can actually see the SQE updates when it sees the tail update.
+ @atomicStore(u32, self.sq.tail, tail, .Release);
+ }
+ return self.sq_ready();
+}
+
+/// Returns true if we are not using an SQ thread (thus nobody submits but us),
+/// or if IORING_SQ_NEED_WAKEUP is set and the SQ thread must be explicitly awakened.
+/// For the latter case, we set the SQ thread wakeup flag.
+/// Matches the implementation of sq_ring_needs_enter() in liburing.
+pub fn sq_ring_needs_enter(self: *IoUring, flags: *u32) bool {
+ assert(flags.* == 0);
+ if ((self.flags & linux.IORING_SETUP_SQPOLL) == 0) return true;
+ if ((@atomicLoad(u32, self.sq.flags, .Unordered) & linux.IORING_SQ_NEED_WAKEUP) != 0) {
+ flags.* |= linux.IORING_ENTER_SQ_WAKEUP;
+ return true;
+ }
+ return false;
+}
+
+/// Returns the number of flushed and unflushed SQEs pending in the submission queue.
+/// In other words, this is the number of SQEs in the submission queue, i.e. its length.
+/// These are SQEs that the kernel is yet to consume.
+/// Matches the implementation of io_uring_sq_ready in liburing.
+pub fn sq_ready(self: *IoUring) u32 {
+ // Always use the shared ring state (i.e. head and not sqe_head) to avoid going out of sync,
+ // see https://github.com/axboe/liburing/issues/92.
+ return self.sq.sqe_tail -% @atomicLoad(u32, self.sq.head, .Acquire);
+}
+
+/// Returns the number of CQEs in the completion queue, i.e. its length.
+/// These are CQEs that the application is yet to consume.
+/// Matches the implementation of io_uring_cq_ready in liburing.
+pub fn cq_ready(self: *IoUring) u32 {
+ return @atomicLoad(u32, self.cq.tail, .Acquire) -% self.cq.head.*;
+}
+
+/// Copies as many CQEs as are ready, and that can fit into the destination `cqes` slice.
+/// If none are available, enters into the kernel to wait for at most `wait_nr` CQEs.
+/// Returns the number of CQEs copied, advancing the CQ ring.
+/// Provides all the wait/peek methods found in liburing, but with batching and a single method.
+/// The rationale for copying CQEs rather than copying pointers is that pointers are 8 bytes
+/// whereas CQEs are not much more at only 16 bytes, and this provides a safer faster interface.
+/// Safer, because you no longer need to call cqe_seen(), avoiding idempotency bugs.
+/// Faster, because we can now amortize the atomic store release to `cq.head` across the batch.
+/// See https://github.com/axboe/liburing/issues/103#issuecomment-686665007.
+/// Matches the implementation of io_uring_peek_batch_cqe() in liburing, but supports waiting.
+pub fn copy_cqes(self: *IoUring, cqes: []linux.io_uring_cqe, wait_nr: u32) !u32 {
+ const count = self.copy_cqes_ready(cqes);
+ if (count > 0) return count;
+ if (self.cq_ring_needs_flush() or wait_nr > 0) {
+ _ = try self.enter(0, wait_nr, linux.IORING_ENTER_GETEVENTS);
+ return self.copy_cqes_ready(cqes);
+ }
+ return 0;
+}
+
+fn copy_cqes_ready(self: *IoUring, cqes: []linux.io_uring_cqe) u32 {
+ const ready = self.cq_ready();
+ const count = @min(cqes.len, ready);
+ const head = self.cq.head.* & self.cq.mask;
+ const tail = (self.cq.head.* +% count) & self.cq.mask;
+
+ if (head <= tail) {
+ // head behind tail -> no wrapping
+ @memcpy(cqes[0..count], self.cq.cqes[head..tail]);
+ } else {
+ // head in front of tail -> buffer wraps
+ const two_copies_required: bool = self.cq.cqes.len - head < count;
+ const amount_to_copy_in_first = if (two_copies_required) self.cq.cqes.len - head else count;
+ @memcpy(cqes[0..amount_to_copy_in_first], self.cq.cqes[head .. head + amount_to_copy_in_first]);
+ if (two_copies_required) {
+ @memcpy(cqes[amount_to_copy_in_first..count], self.cq.cqes[0..tail]);
+ }
+ }
+
+ self.cq_advance(count);
+ return count;
+}
+
+/// Returns a copy of an I/O completion, waiting for it if necessary, and advancing the CQ ring.
+/// A convenience method for `copy_cqes()` for when you don't need to batch or peek.
+pub fn copy_cqe(ring: *IoUring) !linux.io_uring_cqe {
+ var cqes: [1]linux.io_uring_cqe = undefined;
+ while (true) {
+ const count = try ring.copy_cqes(&cqes, 1);
+ if (count > 0) return cqes[0];
+ }
+}
+
+/// Matches the implementation of cq_ring_needs_flush() in liburing.
+pub fn cq_ring_needs_flush(self: *IoUring) bool {
+ return (@atomicLoad(u32, self.sq.flags, .Unordered) & linux.IORING_SQ_CQ_OVERFLOW) != 0;
+}
+
+/// For advanced use cases only that implement custom completion queue methods.
+/// If you use copy_cqes() or copy_cqe() you must not call cqe_seen() or cq_advance().
+/// Must be called exactly once after a zero-copy CQE has been processed by your application.
+/// Not idempotent, calling more than once will result in other CQEs being lost.
+/// Matches the implementation of cqe_seen() in liburing.
+pub fn cqe_seen(self: *IoUring, cqe: *linux.io_uring_cqe) void {
+ _ = cqe;
+ self.cq_advance(1);
+}
+
+/// For advanced use cases only that implement custom completion queue methods.
+/// Matches the implementation of cq_advance() in liburing.
+pub fn cq_advance(self: *IoUring, count: u32) void {
+ if (count > 0) {
+ // Ensure the kernel only sees the new head value after the CQEs have been read.
+ @atomicStore(u32, self.cq.head, self.cq.head.* +% count, .Release);
+ }
+}
+
+/// Queues (but does not submit) an SQE to perform an `fsync(2)`.
+/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
+/// For example, for `fdatasync()` you can set `IORING_FSYNC_DATASYNC` in the SQE's `rw_flags`.
+/// N.B. While SQEs are initiated in the order in which they appear in the submission queue,
+/// operations execute in parallel and completions are unordered. Therefore, an application that
+/// submits a write followed by an fsync in the submission queue cannot expect the fsync to
+/// apply to the write, since the fsync may complete before the write is issued to the disk.
+/// You should preferably use `link_with_next_sqe()` on a write's SQE to link it with an fsync,
+/// or else insert a full write barrier using `drain_previous_sqes()` when queueing an fsync.
+pub fn fsync(self: *IoUring, user_data: u64, fd: os.fd_t, flags: u32) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_fsync(fd, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a no-op.
+/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
+/// A no-op is more useful than may appear at first glance.
+/// For example, you could call `drain_previous_sqes()` on the returned SQE, to use the no-op to
+/// know when the ring is idle before acting on a kill signal.
+pub fn nop(self: *IoUring, user_data: u64) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_nop();
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Used to select how the read should be handled.
+pub const ReadBuffer = union(enum) {
+ /// io_uring will read directly into this buffer
+ buffer: []u8,
+
+ /// io_uring will read directly into these buffers using readv.
+ iovecs: []const os.iovec,
+
+ /// io_uring will select a buffer that has previously been provided with `provide_buffers`.
+ /// The buffer group reference by `group_id` must contain at least one buffer for the read to work.
+ /// `len` controls the number of bytes to read into the selected buffer.
+ buffer_selection: struct {
+ group_id: u16,
+ len: usize,
+ },
+};
+
+/// Queues (but does not submit) an SQE to perform a `read(2)` or `preadv(2)` depending on the buffer type.
+/// * Reading into a `ReadBuffer.buffer` uses `read(2)`
+/// * Reading into a `ReadBuffer.iovecs` uses `preadv(2)`
+/// If you want to do a `preadv2(2)` then set `rw_flags` on the returned SQE. See https://man7.org/linux/man-pages/man2/preadv2.2.html
+///
+/// Returns a pointer to the SQE.
+pub fn read(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ buffer: ReadBuffer,
+ offset: u64,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ switch (buffer) {
+ .buffer => |slice| sqe.prep_read(fd, slice, offset),
+ .iovecs => |vecs| sqe.prep_readv(fd, vecs, offset),
+ .buffer_selection => |selection| {
+ sqe.prep_rw(.READ, fd, 0, selection.len, offset);
+ sqe.flags |= linux.IOSQE_BUFFER_SELECT;
+ sqe.buf_index = selection.group_id;
+ },
+ }
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `write(2)`.
+/// Returns a pointer to the SQE.
+pub fn write(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ buffer: []const u8,
+ offset: u64,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_write(fd, buffer, offset);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `splice(2)`
+/// Either `fd_in` or `fd_out` must be a pipe.
+/// If `fd_in` refers to a pipe, `off_in` is ignored and must be set to std.math.maxInt(u64).
+/// If `fd_in` does not refer to a pipe and `off_in` is maxInt(u64), then `len` are read
+/// from `fd_in` starting from the file offset, which is incremented by the number of bytes read.
+/// If `fd_in` does not refer to a pipe and `off_in` is not maxInt(u64), then the starting offset of `fd_in` will be `off_in`.
+/// This splice operation can be used to implement sendfile by splicing to an intermediate pipe first,
+/// then splice to the final destination. In fact, the implementation of sendfile in kernel uses splice internally.
+///
+/// NOTE that even if fd_in or fd_out refers to a pipe, the splice operation can still fail with EINVAL if one of the
+/// fd doesn't explicitly support splice peration, e.g. reading from terminal is unsupported from kernel 5.7 to 5.11.
+/// See https://github.com/axboe/liburing/issues/291
+///
+/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
+pub fn splice(self: *IoUring, user_data: u64, fd_in: os.fd_t, off_in: u64, fd_out: os.fd_t, off_out: u64, len: usize) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_splice(fd_in, off_in, fd_out, off_out, len);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a IORING_OP_READ_FIXED.
+/// The `buffer` provided must be registered with the kernel by calling `register_buffers` first.
+/// The `buffer_index` must be the same as its index in the array provided to `register_buffers`.
+///
+/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
+pub fn read_fixed(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ buffer: *os.iovec,
+ offset: u64,
+ buffer_index: u16,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_read_fixed(fd, buffer, offset, buffer_index);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `pwritev()`.
+/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
+/// For example, if you want to do a `pwritev2()` then set `rw_flags` on the returned SQE.
+/// See https://linux.die.net/man/2/pwritev.
+pub fn writev(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ iovecs: []const os.iovec_const,
+ offset: u64,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_writev(fd, iovecs, offset);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a IORING_OP_WRITE_FIXED.
+/// The `buffer` provided must be registered with the kernel by calling `register_buffers` first.
+/// The `buffer_index` must be the same as its index in the array provided to `register_buffers`.
+///
+/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
+pub fn write_fixed(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ buffer: *os.iovec,
+ offset: u64,
+ buffer_index: u16,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_write_fixed(fd, buffer, offset, buffer_index);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform an `accept4(2)` on a socket.
+/// Returns a pointer to the SQE.
+/// Available since 5.5
+pub fn accept(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ addr: ?*os.sockaddr,
+ addrlen: ?*os.socklen_t,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_accept(fd, addr, addrlen, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues an multishot accept on a socket.
+///
+/// Multishot variant allows an application to issue a single accept request,
+/// which will repeatedly trigger a CQE when a connection request comes in.
+/// While IORING_CQE_F_MORE flag is set in CQE flags accept will generate
+/// further CQEs.
+///
+/// Available since 5.19
+pub fn accept_multishot(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ addr: ?*os.sockaddr,
+ addrlen: ?*os.socklen_t,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_multishot_accept(fd, addr, addrlen, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues an accept using direct (registered) file descriptors.
+///
+/// To use an accept direct variant, the application must first have registered
+/// a file table (with register_files). An unused table index will be
+/// dynamically chosen and returned in the CQE res field.
+///
+/// After creation, they can be used by setting IOSQE_FIXED_FILE in the SQE
+/// flags member, and setting the SQE fd field to the direct descriptor value
+/// rather than the regular file descriptor.
+///
+/// Available since 5.19
+pub fn accept_direct(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ addr: ?*os.sockaddr,
+ addrlen: ?*os.socklen_t,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_accept_direct(fd, addr, addrlen, flags, linux.IORING_FILE_INDEX_ALLOC);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues an multishot accept using direct (registered) file descriptors.
+/// Available since 5.19
+pub fn accept_multishot_direct(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ addr: ?*os.sockaddr,
+ addrlen: ?*os.socklen_t,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_multishot_accept_direct(fd, addr, addrlen, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queue (but does not submit) an SQE to perform a `connect(2)` on a socket.
+/// Returns a pointer to the SQE.
+pub fn connect(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ addr: *const os.sockaddr,
+ addrlen: os.socklen_t,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_connect(fd, addr, addrlen);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `epoll_ctl(2)`.
+/// Returns a pointer to the SQE.
+pub fn epoll_ctl(
+ self: *IoUring,
+ user_data: u64,
+ epfd: os.fd_t,
+ fd: os.fd_t,
+ op: u32,
+ ev: ?*linux.epoll_event,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_epoll_ctl(epfd, fd, op, ev);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Used to select how the recv call should be handled.
+pub const RecvBuffer = union(enum) {
+ /// io_uring will recv directly into this buffer
+ buffer: []u8,
+
+ /// io_uring will select a buffer that has previously been provided with `provide_buffers`.
+ /// The buffer group referenced by `group_id` must contain at least one buffer for the recv call to work.
+ /// `len` controls the number of bytes to read into the selected buffer.
+ buffer_selection: struct {
+ group_id: u16,
+ len: usize,
+ },
+};
+
+/// Queues (but does not submit) an SQE to perform a `recv(2)`.
+/// Returns a pointer to the SQE.
+/// Available since 5.6
+pub fn recv(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ buffer: RecvBuffer,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ switch (buffer) {
+ .buffer => |slice| sqe.prep_recv(fd, slice, flags),
+ .buffer_selection => |selection| {
+ sqe.prep_rw(.RECV, fd, 0, selection.len, 0);
+ sqe.rw_flags = flags;
+ sqe.flags |= linux.IOSQE_BUFFER_SELECT;
+ sqe.buf_index = selection.group_id;
+ },
+ }
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `send(2)`.
+/// Returns a pointer to the SQE.
+/// Available since 5.6
+pub fn send(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ buffer: []const u8,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_send(fd, buffer, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform an async zerocopy `send(2)`.
+///
+/// This operation will most likely produce two CQEs. The flags field of the
+/// first cqe may likely contain IORING_CQE_F_MORE, which means that there will
+/// be a second cqe with the user_data field set to the same value. The user
+/// must not modify the data buffer until the notification is posted. The first
+/// cqe follows the usual rules and so its res field will contain the number of
+/// bytes sent or a negative error code. The notification's res field will be
+/// set to zero and the flags field will contain IORING_CQE_F_NOTIF. The two
+/// step model is needed because the kernel may hold on to buffers for a long
+/// time, e.g. waiting for a TCP ACK. Notifications responsible for controlling
+/// the lifetime of the buffers. Even errored requests may generate a
+/// notification.
+///
+/// Available since 6.0
+pub fn send_zc(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ buffer: []const u8,
+ send_flags: u32,
+ zc_flags: u16,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_send_zc(fd, buffer, send_flags, zc_flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform an async zerocopy `send(2)`.
+/// Returns a pointer to the SQE.
+/// Available since 6.0
+pub fn send_zc_fixed(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ buffer: []const u8,
+ send_flags: u32,
+ zc_flags: u16,
+ buf_index: u16,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_send_zc_fixed(fd, buffer, send_flags, zc_flags, buf_index);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `recvmsg(2)`.
+/// Returns a pointer to the SQE.
+/// Available since 5.3
+pub fn recvmsg(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ msg: *os.msghdr,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_recvmsg(fd, msg, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `sendmsg(2)`.
+/// Returns a pointer to the SQE.
+/// Available since 5.3
+pub fn sendmsg(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ msg: *const os.msghdr_const,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_sendmsg(fd, msg, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform an async zerocopy `sendmsg(2)`.
+/// Returns a pointer to the SQE.
+/// Available since 6.1
+pub fn sendmsg_zc(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ msg: *const os.msghdr_const,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_sendmsg_zc(fd, msg, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform an `openat(2)`.
+/// Returns a pointer to the SQE.
+/// Available since 5.6.
+pub fn openat(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ path: [*:0]const u8,
+ flags: linux.O,
+ mode: os.mode_t,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_openat(fd, path, flags, mode);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues an openat using direct (registered) file descriptors.
+///
+/// To use an accept direct variant, the application must first have registered
+/// a file table (with register_files). An unused table index will be
+/// dynamically chosen and returned in the CQE res field.
+///
+/// After creation, they can be used by setting IOSQE_FIXED_FILE in the SQE
+/// flags member, and setting the SQE fd field to the direct descriptor value
+/// rather than the regular file descriptor.
+///
+/// Available since 5.15
+pub fn openat_direct(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ path: [*:0]const u8,
+ flags: linux.O,
+ mode: os.mode_t,
+ file_index: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_openat_direct(fd, path, flags, mode, file_index);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `close(2)`.
+/// Returns a pointer to the SQE.
+/// Available since 5.6.
+pub fn close(self: *IoUring, user_data: u64, fd: os.fd_t) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_close(fd);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues close of registered file descriptor.
+/// Available since 5.15
+pub fn close_direct(self: *IoUring, user_data: u64, file_index: u32) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_close_direct(file_index);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to register a timeout operation.
+/// Returns a pointer to the SQE.
+///
+/// The timeout will complete when either the timeout expires, or after the specified number of
+/// events complete (if `count` is greater than `0`).
+///
+/// `flags` may be `0` for a relative timeout, or `IORING_TIMEOUT_ABS` for an absolute timeout.
+///
+/// The completion event result will be `-ETIME` if the timeout completed through expiration,
+/// `0` if the timeout completed after the specified number of events, or `-ECANCELED` if the
+/// timeout was removed before it expired.
+///
+/// io_uring timeouts use the `CLOCK.MONOTONIC` clock source.
+pub fn timeout(
+ self: *IoUring,
+ user_data: u64,
+ ts: *const os.linux.kernel_timespec,
+ count: u32,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_timeout(ts, count, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to remove an existing timeout operation.
+/// Returns a pointer to the SQE.
+///
+/// The timeout is identified by its `user_data`.
+///
+/// The completion event result will be `0` if the timeout was found and cancelled successfully,
+/// `-EBUSY` if the timeout was found but expiration was already in progress, or
+/// `-ENOENT` if the timeout was not found.
+pub fn timeout_remove(
+ self: *IoUring,
+ user_data: u64,
+ timeout_user_data: u64,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_timeout_remove(timeout_user_data, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to add a link timeout operation.
+/// Returns a pointer to the SQE.
+///
+/// You need to set linux.IOSQE_IO_LINK to flags of the target operation
+/// and then call this method right after the target operation.
+/// See https://lwn.net/Articles/803932/ for detail.
+///
+/// If the dependent request finishes before the linked timeout, the timeout
+/// is canceled. If the timeout finishes before the dependent request, the
+/// dependent request will be canceled.
+///
+/// The completion event result of the link_timeout will be
+/// `-ETIME` if the timeout finishes before the dependent request
+/// (in this case, the completion event result of the dependent request will
+/// be `-ECANCELED`), or
+/// `-EALREADY` if the dependent request finishes before the linked timeout.
+pub fn link_timeout(
+ self: *IoUring,
+ user_data: u64,
+ ts: *const os.linux.kernel_timespec,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_link_timeout(ts, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `poll(2)`.
+/// Returns a pointer to the SQE.
+pub fn poll_add(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ poll_mask: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_poll_add(fd, poll_mask);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to remove an existing poll operation.
+/// Returns a pointer to the SQE.
+pub fn poll_remove(
+ self: *IoUring,
+ user_data: u64,
+ target_user_data: u64,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_poll_remove(target_user_data);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to update the user data of an existing poll
+/// operation. Returns a pointer to the SQE.
+pub fn poll_update(
+ self: *IoUring,
+ user_data: u64,
+ old_user_data: u64,
+ new_user_data: u64,
+ poll_mask: u32,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_poll_update(old_user_data, new_user_data, poll_mask, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform an `fallocate(2)`.
+/// Returns a pointer to the SQE.
+pub fn fallocate(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ mode: i32,
+ offset: u64,
+ len: u64,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_fallocate(fd, mode, offset, len);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform an `statx(2)`.
+/// Returns a pointer to the SQE.
+pub fn statx(
+ self: *IoUring,
+ user_data: u64,
+ fd: os.fd_t,
+ path: [:0]const u8,
+ flags: u32,
+ mask: u32,
+ buf: *linux.Statx,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_statx(fd, path, flags, mask, buf);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to remove an existing operation.
+/// Returns a pointer to the SQE.
+///
+/// The operation is identified by its `user_data`.
+///
+/// The completion event result will be `0` if the operation was found and cancelled successfully,
+/// `-EALREADY` if the operation was found but was already in progress, or
+/// `-ENOENT` if the operation was not found.
+pub fn cancel(
+ self: *IoUring,
+ user_data: u64,
+ cancel_user_data: u64,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_cancel(cancel_user_data, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `shutdown(2)`.
+/// Returns a pointer to the SQE.
+///
+/// The operation is identified by its `user_data`.
+pub fn shutdown(
+ self: *IoUring,
+ user_data: u64,
+ sockfd: os.socket_t,
+ how: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_shutdown(sockfd, how);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `renameat2(2)`.
+/// Returns a pointer to the SQE.
+pub fn renameat(
+ self: *IoUring,
+ user_data: u64,
+ old_dir_fd: os.fd_t,
+ old_path: [*:0]const u8,
+ new_dir_fd: os.fd_t,
+ new_path: [*:0]const u8,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_renameat(old_dir_fd, old_path, new_dir_fd, new_path, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `unlinkat(2)`.
+/// Returns a pointer to the SQE.
+pub fn unlinkat(
+ self: *IoUring,
+ user_data: u64,
+ dir_fd: os.fd_t,
+ path: [*:0]const u8,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_unlinkat(dir_fd, path, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `mkdirat(2)`.
+/// Returns a pointer to the SQE.
+pub fn mkdirat(
+ self: *IoUring,
+ user_data: u64,
+ dir_fd: os.fd_t,
+ path: [*:0]const u8,
+ mode: os.mode_t,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_mkdirat(dir_fd, path, mode);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `symlinkat(2)`.
+/// Returns a pointer to the SQE.
+pub fn symlinkat(
+ self: *IoUring,
+ user_data: u64,
+ target: [*:0]const u8,
+ new_dir_fd: os.fd_t,
+ link_path: [*:0]const u8,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_symlinkat(target, new_dir_fd, link_path);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `linkat(2)`.
+/// Returns a pointer to the SQE.
+pub fn linkat(
+ self: *IoUring,
+ user_data: u64,
+ old_dir_fd: os.fd_t,
+ old_path: [*:0]const u8,
+ new_dir_fd: os.fd_t,
+ new_path: [*:0]const u8,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_linkat(old_dir_fd, old_path, new_dir_fd, new_path, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to provide a group of buffers used for commands that read/receive data.
+/// Returns a pointer to the SQE.
+///
+/// Provided buffers can be used in `read`, `recv` or `recvmsg` commands via .buffer_selection.
+///
+/// The kernel expects a contiguous block of memory of size (buffers_count * buffer_size).
+pub fn provide_buffers(
+ self: *IoUring,
+ user_data: u64,
+ buffers: [*]u8,
+ buffer_size: usize,
+ buffers_count: usize,
+ group_id: usize,
+ buffer_id: usize,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_provide_buffers(buffers, buffer_size, buffers_count, group_id, buffer_id);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to remove a group of provided buffers.
+/// Returns a pointer to the SQE.
+pub fn remove_buffers(
+ self: *IoUring,
+ user_data: u64,
+ buffers_count: usize,
+ group_id: usize,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_remove_buffers(buffers_count, group_id);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Queues (but does not submit) an SQE to perform a `waitid(2)`.
+/// Returns a pointer to the SQE.
+pub fn waitid(
+ self: *IoUring,
+ user_data: u64,
+ id_type: linux.P,
+ id: i32,
+ infop: *linux.siginfo_t,
+ options: u32,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_waitid(id_type, id, infop, options, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Registers an array of file descriptors.
+/// Every time a file descriptor is put in an SQE and submitted to the kernel, the kernel must
+/// retrieve a reference to the file, and once I/O has completed the file reference must be
+/// dropped. The atomic nature of this file reference can be a slowdown for high IOPS workloads.
+/// This slowdown can be avoided by pre-registering file descriptors.
+/// To refer to a registered file descriptor, IOSQE_FIXED_FILE must be set in the SQE's flags,
+/// and the SQE's fd must be set to the index of the file descriptor in the registered array.
+/// Registering file descriptors will wait for the ring to idle.
+/// Files are automatically unregistered by the kernel when the ring is torn down.
+/// An application need unregister only if it wants to register a new array of file descriptors.
+pub fn register_files(self: *IoUring, fds: []const os.fd_t) !void {
+ assert(self.fd >= 0);
+ const res = linux.io_uring_register(
+ self.fd,
+ .REGISTER_FILES,
+ @as(*const anyopaque, @ptrCast(fds.ptr)),
+ @as(u32, @intCast(fds.len)),
+ );
+ try handle_registration_result(res);
+}
+
+/// Updates registered file descriptors.
+///
+/// Updates are applied starting at the provided offset in the original file descriptors slice.
+/// There are three kind of updates:
+/// * turning a sparse entry (where the fd is -1) into a real one
+/// * removing an existing entry (set the fd to -1)
+/// * replacing an existing entry with a new fd
+/// Adding new file descriptors must be done with `register_files`.
+pub fn register_files_update(self: *IoUring, offset: u32, fds: []const os.fd_t) !void {
+ assert(self.fd >= 0);
+
+ const FilesUpdate = extern struct {
+ offset: u32,
+ resv: u32,
+ fds: u64 align(8),
+ };
+ var update = FilesUpdate{
+ .offset = offset,
+ .resv = @as(u32, 0),
+ .fds = @as(u64, @intFromPtr(fds.ptr)),
+ };
+
+ const res = linux.io_uring_register(
+ self.fd,
+ .REGISTER_FILES_UPDATE,
+ @as(*const anyopaque, @ptrCast(&update)),
+ @as(u32, @intCast(fds.len)),
+ );
+ try handle_registration_result(res);
+}
+
+/// Registers the file descriptor for an eventfd that will be notified of completion events on
+/// an io_uring instance.
+/// Only a single a eventfd can be registered at any given point in time.
+pub fn register_eventfd(self: *IoUring, fd: os.fd_t) !void {
+ assert(self.fd >= 0);
+ const res = linux.io_uring_register(
+ self.fd,
+ .REGISTER_EVENTFD,
+ @as(*const anyopaque, @ptrCast(&fd)),
+ 1,
+ );
+ try handle_registration_result(res);
+}
+
+/// Registers the file descriptor for an eventfd that will be notified of completion events on
+/// an io_uring instance. Notifications are only posted for events that complete in an async manner.
+/// This means that events that complete inline while being submitted do not trigger a notification event.
+/// Only a single eventfd can be registered at any given point in time.
+pub fn register_eventfd_async(self: *IoUring, fd: os.fd_t) !void {
+ assert(self.fd >= 0);
+ const res = linux.io_uring_register(
+ self.fd,
+ .REGISTER_EVENTFD_ASYNC,
+ @as(*const anyopaque, @ptrCast(&fd)),
+ 1,
+ );
+ try handle_registration_result(res);
+}
+
+/// Unregister the registered eventfd file descriptor.
+pub fn unregister_eventfd(self: *IoUring) !void {
+ assert(self.fd >= 0);
+ const res = linux.io_uring_register(
+ self.fd,
+ .UNREGISTER_EVENTFD,
+ null,
+ 0,
+ );
+ try handle_registration_result(res);
+}
+
+/// Registers an array of buffers for use with `read_fixed` and `write_fixed`.
+pub fn register_buffers(self: *IoUring, buffers: []const os.iovec) !void {
+ assert(self.fd >= 0);
+ const res = linux.io_uring_register(
+ self.fd,
+ .REGISTER_BUFFERS,
+ buffers.ptr,
+ @as(u32, @intCast(buffers.len)),
+ );
+ try handle_registration_result(res);
+}
+
+/// Unregister the registered buffers.
+pub fn unregister_buffers(self: *IoUring) !void {
+ assert(self.fd >= 0);
+ const res = linux.io_uring_register(self.fd, .UNREGISTER_BUFFERS, null, 0);
+ switch (linux.getErrno(res)) {
+ .SUCCESS => {},
+ .NXIO => return error.BuffersNotRegistered,
+ else => |errno| return os.unexpectedErrno(errno),
+ }
+}
+
+fn handle_registration_result(res: usize) !void {
+ switch (linux.getErrno(res)) {
+ .SUCCESS => {},
+ // One or more fds in the array are invalid, or the kernel does not support sparse sets:
+ .BADF => return error.FileDescriptorInvalid,
+ .BUSY => return error.FilesAlreadyRegistered,
+ .INVAL => return error.FilesEmpty,
+ // Adding `nr_args` file references would exceed the maximum allowed number of files the
+ // user is allowed to have according to the per-user RLIMIT_NOFILE resource limit and
+ // the CAP_SYS_RESOURCE capability is not set, or `nr_args` exceeds the maximum allowed
+ // for a fixed file set (older kernels have a limit of 1024 files vs 64K files):
+ .MFILE => return error.UserFdQuotaExceeded,
+ // Insufficient kernel resources, or the caller had a non-zero RLIMIT_MEMLOCK soft
+ // resource limit but tried to lock more memory than the limit permitted (not enforced
+ // when the process is privileged with CAP_IPC_LOCK):
+ .NOMEM => return error.SystemResources,
+ // Attempt to register files on a ring already registering files or being torn down:
+ .NXIO => return error.RingShuttingDownOrAlreadyRegisteringFiles,
+ else => |errno| return os.unexpectedErrno(errno),
+ }
+}
+
+/// Unregisters all registered file descriptors previously associated with the ring.
+pub fn unregister_files(self: *IoUring) !void {
+ assert(self.fd >= 0);
+ const res = linux.io_uring_register(self.fd, .UNREGISTER_FILES, null, 0);
+ switch (linux.getErrno(res)) {
+ .SUCCESS => {},
+ .NXIO => return error.FilesNotRegistered,
+ else => |errno| return os.unexpectedErrno(errno),
+ }
+}
+
+/// Prepares a socket creation request.
+/// New socket fd will be returned in completion result.
+/// Available since 5.19
+pub fn socket(
+ self: *IoUring,
+ user_data: u64,
+ domain: u32,
+ socket_type: u32,
+ protocol: u32,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_socket(domain, socket_type, protocol, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Prepares a socket creation request for registered file at index `file_index`.
+/// Available since 5.19
+pub fn socket_direct(
+ self: *IoUring,
+ user_data: u64,
+ domain: u32,
+ socket_type: u32,
+ protocol: u32,
+ flags: u32,
+ file_index: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_socket_direct(domain, socket_type, protocol, flags, file_index);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+/// Prepares a socket creation request for registered file, index chosen by kernel (file index alloc).
+/// File index will be returned in CQE res field.
+/// Available since 5.19
+pub fn socket_direct_alloc(
+ self: *IoUring,
+ user_data: u64,
+ domain: u32,
+ socket_type: u32,
+ protocol: u32,
+ flags: u32,
+) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ sqe.prep_socket_direct_alloc(domain, socket_type, protocol, flags);
+ sqe.user_data = user_data;
+ return sqe;
+}
+
+pub const SubmissionQueue = struct {
+ head: *u32,
+ tail: *u32,
+ mask: u32,
+ flags: *u32,
+ dropped: *u32,
+ array: []u32,
+ sqes: []linux.io_uring_sqe,
+ mmap: []align(mem.page_size) u8,
+ mmap_sqes: []align(mem.page_size) u8,
+
+ // We use `sqe_head` and `sqe_tail` in the same way as liburing:
+ // We increment `sqe_tail` (but not `tail`) for each call to `get_sqe()`.
+ // We then set `tail` to `sqe_tail` once, only when these events are actually submitted.
+ // This allows us to amortize the cost of the @atomicStore to `tail` across multiple SQEs.
+ sqe_head: u32 = 0,
+ sqe_tail: u32 = 0,
+
+ pub fn init(fd: os.fd_t, p: linux.io_uring_params) !SubmissionQueue {
+ assert(fd >= 0);
+ assert((p.features & linux.IORING_FEAT_SINGLE_MMAP) != 0);
+ const size = @max(
+ p.sq_off.array + p.sq_entries * @sizeOf(u32),
+ p.cq_off.cqes + p.cq_entries * @sizeOf(linux.io_uring_cqe),
+ );
+ const mmap = try os.mmap(
+ null,
+ size,
+ os.PROT.READ | os.PROT.WRITE,
+ .{ .TYPE = .SHARED, .POPULATE = true },
+ fd,
+ linux.IORING_OFF_SQ_RING,
+ );
+ errdefer os.munmap(mmap);
+ assert(mmap.len == size);
+
+ // The motivation for the `sqes` and `array` indirection is to make it possible for the
+ // application to preallocate static linux.io_uring_sqe entries and then replay them when needed.
+ const size_sqes = p.sq_entries * @sizeOf(linux.io_uring_sqe);
+ const mmap_sqes = try os.mmap(
+ null,
+ size_sqes,
+ os.PROT.READ | os.PROT.WRITE,
+ .{ .TYPE = .SHARED, .POPULATE = true },
+ fd,
+ linux.IORING_OFF_SQES,
+ );
+ errdefer os.munmap(mmap_sqes);
+ assert(mmap_sqes.len == size_sqes);
+
+ const array: [*]u32 = @ptrCast(@alignCast(&mmap[p.sq_off.array]));
+ const sqes: [*]linux.io_uring_sqe = @ptrCast(@alignCast(&mmap_sqes[0]));
+ // We expect the kernel copies p.sq_entries to the u32 pointed to by p.sq_off.ring_entries,
+ // see https://github.com/torvalds/linux/blob/v5.8/fs/io_uring.c#L7843-L7844.
+ assert(p.sq_entries == @as(*u32, @ptrCast(@alignCast(&mmap[p.sq_off.ring_entries]))).*);
+ return SubmissionQueue{
+ .head = @ptrCast(@alignCast(&mmap[p.sq_off.head])),
+ .tail = @ptrCast(@alignCast(&mmap[p.sq_off.tail])),
+ .mask = @as(*u32, @ptrCast(@alignCast(&mmap[p.sq_off.ring_mask]))).*,
+ .flags = @ptrCast(@alignCast(&mmap[p.sq_off.flags])),
+ .dropped = @ptrCast(@alignCast(&mmap[p.sq_off.dropped])),
+ .array = array[0..p.sq_entries],
+ .sqes = sqes[0..p.sq_entries],
+ .mmap = mmap,
+ .mmap_sqes = mmap_sqes,
+ };
+ }
+
+ pub fn deinit(self: *SubmissionQueue) void {
+ os.munmap(self.mmap_sqes);
+ os.munmap(self.mmap);
+ }
+};
+
+pub const CompletionQueue = struct {
+ head: *u32,
+ tail: *u32,
+ mask: u32,
+ overflow: *u32,
+ cqes: []linux.io_uring_cqe,
+
+ pub fn init(fd: os.fd_t, p: linux.io_uring_params, sq: SubmissionQueue) !CompletionQueue {
+ assert(fd >= 0);
+ assert((p.features & linux.IORING_FEAT_SINGLE_MMAP) != 0);
+ const mmap = sq.mmap;
+ const cqes: [*]linux.io_uring_cqe = @ptrCast(@alignCast(&mmap[p.cq_off.cqes]));
+ assert(p.cq_entries == @as(*u32, @ptrCast(@alignCast(&mmap[p.cq_off.ring_entries]))).*);
+ return CompletionQueue{
+ .head = @ptrCast(@alignCast(&mmap[p.cq_off.head])),
+ .tail = @ptrCast(@alignCast(&mmap[p.cq_off.tail])),
+ .mask = @as(*u32, @ptrCast(@alignCast(&mmap[p.cq_off.ring_mask]))).*,
+ .overflow = @ptrCast(@alignCast(&mmap[p.cq_off.overflow])),
+ .cqes = cqes[0..p.cq_entries],
+ };
+ }
+
+ pub fn deinit(self: *CompletionQueue) void {
+ _ = self;
+ // A no-op since we now share the mmap with the submission queue.
+ // Here for symmetry with the submission queue, and for any future feature support.
+ }
+};
+
+test "structs/offsets/entries" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ try testing.expectEqual(@as(usize, 120), @sizeOf(linux.io_uring_params));
+ try testing.expectEqual(@as(usize, 64), @sizeOf(linux.io_uring_sqe));
+ try testing.expectEqual(@as(usize, 16), @sizeOf(linux.io_uring_cqe));
+
+ try testing.expectEqual(0, linux.IORING_OFF_SQ_RING);
+ try testing.expectEqual(0x8000000, linux.IORING_OFF_CQ_RING);
+ try testing.expectEqual(0x10000000, linux.IORING_OFF_SQES);
+
+ try testing.expectError(error.EntriesZero, IoUring.init(0, 0));
+ try testing.expectError(error.EntriesNotPowerOfTwo, IoUring.init(3, 0));
+}
+
+test "nop" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer {
+ ring.deinit();
+ testing.expectEqual(@as(os.fd_t, -1), ring.fd) catch @panic("test failed");
+ }
+
+ const sqe = try ring.nop(0xaaaaaaaa);
+ try testing.expectEqual(linux.io_uring_sqe{
+ .opcode = .NOP,
+ .flags = 0,
+ .ioprio = 0,
+ .fd = 0,
+ .off = 0,
+ .addr = 0,
+ .len = 0,
+ .rw_flags = 0,
+ .user_data = 0xaaaaaaaa,
+ .buf_index = 0,
+ .personality = 0,
+ .splice_fd_in = 0,
+ .addr3 = 0,
+ .resv = 0,
+ }, sqe.*);
+
+ try testing.expectEqual(@as(u32, 0), ring.sq.sqe_head);
+ try testing.expectEqual(@as(u32, 1), ring.sq.sqe_tail);
+ try testing.expectEqual(@as(u32, 0), ring.sq.tail.*);
+ try testing.expectEqual(@as(u32, 0), ring.cq.head.*);
+ try testing.expectEqual(@as(u32, 1), ring.sq_ready());
+ try testing.expectEqual(@as(u32, 0), ring.cq_ready());
+
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ try testing.expectEqual(@as(u32, 1), ring.sq.sqe_head);
+ try testing.expectEqual(@as(u32, 1), ring.sq.sqe_tail);
+ try testing.expectEqual(@as(u32, 1), ring.sq.tail.*);
+ try testing.expectEqual(@as(u32, 0), ring.cq.head.*);
+ try testing.expectEqual(@as(u32, 0), ring.sq_ready());
+
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xaaaaaaaa,
+ .res = 0,
+ .flags = 0,
+ }, try ring.copy_cqe());
+ try testing.expectEqual(@as(u32, 1), ring.cq.head.*);
+ try testing.expectEqual(@as(u32, 0), ring.cq_ready());
+
+ const sqe_barrier = try ring.nop(0xbbbbbbbb);
+ sqe_barrier.flags |= linux.IOSQE_IO_DRAIN;
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xbbbbbbbb,
+ .res = 0,
+ .flags = 0,
+ }, try ring.copy_cqe());
+ try testing.expectEqual(@as(u32, 2), ring.sq.sqe_head);
+ try testing.expectEqual(@as(u32, 2), ring.sq.sqe_tail);
+ try testing.expectEqual(@as(u32, 2), ring.sq.tail.*);
+ try testing.expectEqual(@as(u32, 2), ring.cq.head.*);
+}
+
+test "readv" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const fd = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
+ defer os.close(fd);
+
+ // Linux Kernel 5.4 supports IORING_REGISTER_FILES but not sparse fd sets (i.e. an fd of -1).
+ // Linux Kernel 5.5 adds support for sparse fd sets.
+ // Compare:
+ // https://github.com/torvalds/linux/blob/v5.4/fs/io_uring.c#L3119-L3124 vs
+ // https://github.com/torvalds/linux/blob/v5.8/fs/io_uring.c#L6687-L6691
+ // We therefore avoid stressing sparse fd sets here:
+ var registered_fds = [_]os.fd_t{0} ** 1;
+ const fd_index = 0;
+ registered_fds[fd_index] = fd;
+ try ring.register_files(registered_fds[0..]);
+
+ var buffer = [_]u8{42} ** 128;
+ var iovecs = [_]os.iovec{os.iovec{ .iov_base = &buffer, .iov_len = buffer.len }};
+ const sqe = try ring.read(0xcccccccc, fd_index, .{ .iovecs = iovecs[0..] }, 0);
+ try testing.expectEqual(linux.IORING_OP.READV, sqe.opcode);
+ sqe.flags |= linux.IOSQE_FIXED_FILE;
+
+ try testing.expectError(error.SubmissionQueueFull, ring.nop(0));
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xcccccccc,
+ .res = buffer.len,
+ .flags = 0,
+ }, try ring.copy_cqe());
+ try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer.len), buffer[0..]);
+
+ try ring.unregister_files();
+}
+
+test "writev/fsync/readv" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(4, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+
+ const path = "test_io_uring_writev_fsync_readv";
+ const file = try tmp.dir.createFile(path, .{ .read = true, .truncate = true });
+ defer file.close();
+ const fd = file.handle;
+
+ const buffer_write = [_]u8{42} ** 128;
+ const iovecs_write = [_]os.iovec_const{
+ os.iovec_const{ .iov_base = &buffer_write, .iov_len = buffer_write.len },
+ };
+ var buffer_read = [_]u8{0} ** 128;
+ var iovecs_read = [_]os.iovec{
+ os.iovec{ .iov_base = &buffer_read, .iov_len = buffer_read.len },
+ };
+
+ const sqe_writev = try ring.writev(0xdddddddd, fd, iovecs_write[0..], 17);
+ try testing.expectEqual(linux.IORING_OP.WRITEV, sqe_writev.opcode);
+ try testing.expectEqual(@as(u64, 17), sqe_writev.off);
+ sqe_writev.flags |= linux.IOSQE_IO_LINK;
+
+ const sqe_fsync = try ring.fsync(0xeeeeeeee, fd, 0);
+ try testing.expectEqual(linux.IORING_OP.FSYNC, sqe_fsync.opcode);
+ try testing.expectEqual(fd, sqe_fsync.fd);
+ sqe_fsync.flags |= linux.IOSQE_IO_LINK;
+
+ const sqe_readv = try ring.read(0xffffffff, fd, .{ .iovecs = iovecs_read[0..] }, 17);
+ try testing.expectEqual(linux.IORING_OP.READV, sqe_readv.opcode);
+ try testing.expectEqual(@as(u64, 17), sqe_readv.off);
+
+ try testing.expectEqual(@as(u32, 3), ring.sq_ready());
+ try testing.expectEqual(@as(u32, 3), try ring.submit_and_wait(3));
+ try testing.expectEqual(@as(u32, 0), ring.sq_ready());
+ try testing.expectEqual(@as(u32, 3), ring.cq_ready());
+
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xdddddddd,
+ .res = buffer_write.len,
+ .flags = 0,
+ }, try ring.copy_cqe());
+ try testing.expectEqual(@as(u32, 2), ring.cq_ready());
+
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xeeeeeeee,
+ .res = 0,
+ .flags = 0,
+ }, try ring.copy_cqe());
+ try testing.expectEqual(@as(u32, 1), ring.cq_ready());
+
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xffffffff,
+ .res = buffer_read.len,
+ .flags = 0,
+ }, try ring.copy_cqe());
+ try testing.expectEqual(@as(u32, 0), ring.cq_ready());
+
+ try testing.expectEqualSlices(u8, buffer_write[0..], buffer_read[0..]);
+}
+
+test "write/read" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(2, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+ const path = "test_io_uring_write_read";
+ const file = try tmp.dir.createFile(path, .{ .read = true, .truncate = true });
+ defer file.close();
+ const fd = file.handle;
+
+ const buffer_write = [_]u8{97} ** 20;
+ var buffer_read = [_]u8{98} ** 20;
+ const sqe_write = try ring.write(0x11111111, fd, buffer_write[0..], 10);
+ try testing.expectEqual(linux.IORING_OP.WRITE, sqe_write.opcode);
+ try testing.expectEqual(@as(u64, 10), sqe_write.off);
+ sqe_write.flags |= linux.IOSQE_IO_LINK;
+ const sqe_read = try ring.read(0x22222222, fd, .{ .buffer = buffer_read[0..] }, 10);
+ try testing.expectEqual(linux.IORING_OP.READ, sqe_read.opcode);
+ try testing.expectEqual(@as(u64, 10), sqe_read.off);
+ try testing.expectEqual(@as(u32, 2), try ring.submit());
+
+ const cqe_write = try ring.copy_cqe();
+ const cqe_read = try ring.copy_cqe();
+ // Prior to Linux Kernel 5.6 this is the only way to test for read/write support:
+ // https://lwn.net/Articles/809820/
+ if (cqe_write.err() == .INVAL) return error.SkipZigTest;
+ if (cqe_read.err() == .INVAL) return error.SkipZigTest;
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x11111111,
+ .res = buffer_write.len,
+ .flags = 0,
+ }, cqe_write);
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x22222222,
+ .res = buffer_read.len,
+ .flags = 0,
+ }, cqe_read);
+ try testing.expectEqualSlices(u8, buffer_write[0..], buffer_read[0..]);
+}
+
+test "splice/read" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(4, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var tmp = std.testing.tmpDir(.{});
+ const path_src = "test_io_uring_splice_src";
+ const file_src = try tmp.dir.createFile(path_src, .{ .read = true, .truncate = true });
+ defer file_src.close();
+ const fd_src = file_src.handle;
+
+ const path_dst = "test_io_uring_splice_dst";
+ const file_dst = try tmp.dir.createFile(path_dst, .{ .read = true, .truncate = true });
+ defer file_dst.close();
+ const fd_dst = file_dst.handle;
+
+ const buffer_write = [_]u8{97} ** 20;
+ var buffer_read = [_]u8{98} ** 20;
+ _ = try file_src.write(&buffer_write);
+
+ const fds = try os.pipe();
+ const pipe_offset: u64 = std.math.maxInt(u64);
+
+ const sqe_splice_to_pipe = try ring.splice(0x11111111, fd_src, 0, fds[1], pipe_offset, buffer_write.len);
+ try testing.expectEqual(linux.IORING_OP.SPLICE, sqe_splice_to_pipe.opcode);
+ try testing.expectEqual(@as(u64, 0), sqe_splice_to_pipe.addr);
+ try testing.expectEqual(pipe_offset, sqe_splice_to_pipe.off);
+ sqe_splice_to_pipe.flags |= linux.IOSQE_IO_LINK;
+
+ const sqe_splice_from_pipe = try ring.splice(0x22222222, fds[0], pipe_offset, fd_dst, 10, buffer_write.len);
+ try testing.expectEqual(linux.IORING_OP.SPLICE, sqe_splice_from_pipe.opcode);
+ try testing.expectEqual(pipe_offset, sqe_splice_from_pipe.addr);
+ try testing.expectEqual(@as(u64, 10), sqe_splice_from_pipe.off);
+ sqe_splice_from_pipe.flags |= linux.IOSQE_IO_LINK;
+
+ const sqe_read = try ring.read(0x33333333, fd_dst, .{ .buffer = buffer_read[0..] }, 10);
+ try testing.expectEqual(linux.IORING_OP.READ, sqe_read.opcode);
+ try testing.expectEqual(@as(u64, 10), sqe_read.off);
+ try testing.expectEqual(@as(u32, 3), try ring.submit());
+
+ const cqe_splice_to_pipe = try ring.copy_cqe();
+ const cqe_splice_from_pipe = try ring.copy_cqe();
+ const cqe_read = try ring.copy_cqe();
+ // Prior to Linux Kernel 5.6 this is the only way to test for splice/read support:
+ // https://lwn.net/Articles/809820/
+ if (cqe_splice_to_pipe.err() == .INVAL) return error.SkipZigTest;
+ if (cqe_splice_from_pipe.err() == .INVAL) return error.SkipZigTest;
+ if (cqe_read.err() == .INVAL) return error.SkipZigTest;
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x11111111,
+ .res = buffer_write.len,
+ .flags = 0,
+ }, cqe_splice_to_pipe);
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x22222222,
+ .res = buffer_write.len,
+ .flags = 0,
+ }, cqe_splice_from_pipe);
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x33333333,
+ .res = buffer_read.len,
+ .flags = 0,
+ }, cqe_read);
+ try testing.expectEqualSlices(u8, buffer_write[0..], buffer_read[0..]);
+}
+
+test "write_fixed/read_fixed" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(2, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+
+ const path = "test_io_uring_write_read_fixed";
+ const file = try tmp.dir.createFile(path, .{ .read = true, .truncate = true });
+ defer file.close();
+ const fd = file.handle;
+
+ var raw_buffers: [2][11]u8 = undefined;
+ // First buffer will be written to the file.
+ @memset(&raw_buffers[0], 'z');
+ raw_buffers[0][0.."foobar".len].* = "foobar".*;
+
+ var buffers = [2]os.iovec{
+ .{ .iov_base = &raw_buffers[0], .iov_len = raw_buffers[0].len },
+ .{ .iov_base = &raw_buffers[1], .iov_len = raw_buffers[1].len },
+ };
+ ring.register_buffers(&buffers) catch |err| switch (err) {
+ error.SystemResources => {
+ // See https://github.com/ziglang/zig/issues/15362
+ return error.SkipZigTest;
+ },
+ else => |e| return e,
+ };
+
+ const sqe_write = try ring.write_fixed(0x45454545, fd, &buffers[0], 3, 0);
+ try testing.expectEqual(linux.IORING_OP.WRITE_FIXED, sqe_write.opcode);
+ try testing.expectEqual(@as(u64, 3), sqe_write.off);
+ sqe_write.flags |= linux.IOSQE_IO_LINK;
+
+ const sqe_read = try ring.read_fixed(0x12121212, fd, &buffers[1], 0, 1);
+ try testing.expectEqual(linux.IORING_OP.READ_FIXED, sqe_read.opcode);
+ try testing.expectEqual(@as(u64, 0), sqe_read.off);
+
+ try testing.expectEqual(@as(u32, 2), try ring.submit());
+
+ const cqe_write = try ring.copy_cqe();
+ const cqe_read = try ring.copy_cqe();
+
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x45454545,
+ .res = @as(i32, @intCast(buffers[0].iov_len)),
+ .flags = 0,
+ }, cqe_write);
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x12121212,
+ .res = @as(i32, @intCast(buffers[1].iov_len)),
+ .flags = 0,
+ }, cqe_read);
+
+ try testing.expectEqualSlices(u8, "\x00\x00\x00", buffers[1].iov_base[0..3]);
+ try testing.expectEqualSlices(u8, "foobar", buffers[1].iov_base[3..9]);
+ try testing.expectEqualSlices(u8, "zz", buffers[1].iov_base[9..11]);
+}
+
+test "openat" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+
+ const path = "test_io_uring_openat";
+
+ // Workaround for LLVM bug: https://github.com/ziglang/zig/issues/12014
+ const path_addr = if (builtin.zig_backend == .stage2_llvm) p: {
+ var workaround = path;
+ _ = &workaround;
+ break :p @intFromPtr(workaround);
+ } else @intFromPtr(path);
+
+ const flags: linux.O = .{ .CLOEXEC = true, .ACCMODE = .RDWR, .CREAT = true };
+ const mode: os.mode_t = 0o666;
+ const sqe_openat = try ring.openat(0x33333333, tmp.dir.fd, path, flags, mode);
+ try testing.expectEqual(linux.io_uring_sqe{
+ .opcode = .OPENAT,
+ .flags = 0,
+ .ioprio = 0,
+ .fd = tmp.dir.fd,
+ .off = 0,
+ .addr = path_addr,
+ .len = mode,
+ .rw_flags = @bitCast(flags),
+ .user_data = 0x33333333,
+ .buf_index = 0,
+ .personality = 0,
+ .splice_fd_in = 0,
+ .addr3 = 0,
+ .resv = 0,
+ }, sqe_openat.*);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe_openat = try ring.copy_cqe();
+ try testing.expectEqual(@as(u64, 0x33333333), cqe_openat.user_data);
+ if (cqe_openat.err() == .INVAL) return error.SkipZigTest;
+ if (cqe_openat.err() == .BADF) return error.SkipZigTest;
+ if (cqe_openat.res <= 0) std.debug.print("\ncqe_openat.res={}\n", .{cqe_openat.res});
+ try testing.expect(cqe_openat.res > 0);
+ try testing.expectEqual(@as(u32, 0), cqe_openat.flags);
+
+ os.close(cqe_openat.res);
+}
+
+test "close" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+
+ const path = "test_io_uring_close";
+ const file = try tmp.dir.createFile(path, .{});
+ errdefer file.close();
+
+ const sqe_close = try ring.close(0x44444444, file.handle);
+ try testing.expectEqual(linux.IORING_OP.CLOSE, sqe_close.opcode);
+ try testing.expectEqual(file.handle, sqe_close.fd);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe_close = try ring.copy_cqe();
+ if (cqe_close.err() == .INVAL) return error.SkipZigTest;
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x44444444,
+ .res = 0,
+ .flags = 0,
+ }, cqe_close);
+}
+
+test "accept/connect/send/recv" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(16, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const socket_test_harness = try createSocketTestHarness(&ring);
+ defer socket_test_harness.close();
+
+ const buffer_send = [_]u8{ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 };
+ var buffer_recv = [_]u8{ 0, 1, 0, 1, 0 };
+
+ const sqe_send = try ring.send(0xeeeeeeee, socket_test_harness.client, buffer_send[0..], 0);
+ sqe_send.flags |= linux.IOSQE_IO_LINK;
+ _ = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, 0);
+ try testing.expectEqual(@as(u32, 2), try ring.submit());
+
+ const cqe_send = try ring.copy_cqe();
+ if (cqe_send.err() == .INVAL) return error.SkipZigTest;
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xeeeeeeee,
+ .res = buffer_send.len,
+ .flags = 0,
+ }, cqe_send);
+
+ const cqe_recv = try ring.copy_cqe();
+ if (cqe_recv.err() == .INVAL) return error.SkipZigTest;
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xffffffff,
+ .res = buffer_recv.len,
+ // ignore IORING_CQE_F_SOCK_NONEMPTY since it is only set on some systems
+ .flags = cqe_recv.flags & linux.IORING_CQE_F_SOCK_NONEMPTY,
+ }, cqe_recv);
+
+ try testing.expectEqualSlices(u8, buffer_send[0..buffer_recv.len], buffer_recv[0..]);
+}
+
+test "sendmsg/recvmsg" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(2, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var address_server = try net.Address.parseIp4("127.0.0.1", 0);
+
+ const server = try os.socket(address_server.any.family, os.SOCK.DGRAM, 0);
+ defer os.close(server);
+ try os.setsockopt(server, os.SOL.SOCKET, os.SO.REUSEPORT, &mem.toBytes(@as(c_int, 1)));
+ try os.setsockopt(server, os.SOL.SOCKET, os.SO.REUSEADDR, &mem.toBytes(@as(c_int, 1)));
+ try os.bind(server, &address_server.any, address_server.getOsSockLen());
+
+ // set address_server to the OS-chosen IP/port.
+ var slen: os.socklen_t = address_server.getOsSockLen();
+ try os.getsockname(server, &address_server.any, &slen);
+
+ const client = try os.socket(address_server.any.family, os.SOCK.DGRAM, 0);
+ defer os.close(client);
+
+ const buffer_send = [_]u8{42} ** 128;
+ const iovecs_send = [_]os.iovec_const{
+ os.iovec_const{ .iov_base = &buffer_send, .iov_len = buffer_send.len },
+ };
+ const msg_send = os.msghdr_const{
+ .name = &address_server.any,
+ .namelen = address_server.getOsSockLen(),
+ .iov = &iovecs_send,
+ .iovlen = 1,
+ .control = null,
+ .controllen = 0,
+ .flags = 0,
+ };
+ const sqe_sendmsg = try ring.sendmsg(0x11111111, client, &msg_send, 0);
+ sqe_sendmsg.flags |= linux.IOSQE_IO_LINK;
+ try testing.expectEqual(linux.IORING_OP.SENDMSG, sqe_sendmsg.opcode);
+ try testing.expectEqual(client, sqe_sendmsg.fd);
+
+ var buffer_recv = [_]u8{0} ** 128;
+ var iovecs_recv = [_]os.iovec{
+ os.iovec{ .iov_base = &buffer_recv, .iov_len = buffer_recv.len },
+ };
+ const addr = [_]u8{0} ** 4;
+ var address_recv = net.Address.initIp4(addr, 0);
+ var msg_recv: os.msghdr = os.msghdr{
+ .name = &address_recv.any,
+ .namelen = address_recv.getOsSockLen(),
+ .iov = &iovecs_recv,
+ .iovlen = 1,
+ .control = null,
+ .controllen = 0,
+ .flags = 0,
+ };
+ const sqe_recvmsg = try ring.recvmsg(0x22222222, server, &msg_recv, 0);
+ try testing.expectEqual(linux.IORING_OP.RECVMSG, sqe_recvmsg.opcode);
+ try testing.expectEqual(server, sqe_recvmsg.fd);
+
+ try testing.expectEqual(@as(u32, 2), ring.sq_ready());
+ try testing.expectEqual(@as(u32, 2), try ring.submit_and_wait(2));
+ try testing.expectEqual(@as(u32, 0), ring.sq_ready());
+ try testing.expectEqual(@as(u32, 2), ring.cq_ready());
+
+ const cqe_sendmsg = try ring.copy_cqe();
+ if (cqe_sendmsg.res == -@as(i32, @intFromEnum(linux.E.INVAL))) return error.SkipZigTest;
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x11111111,
+ .res = buffer_send.len,
+ .flags = 0,
+ }, cqe_sendmsg);
+
+ const cqe_recvmsg = try ring.copy_cqe();
+ if (cqe_recvmsg.res == -@as(i32, @intFromEnum(linux.E.INVAL))) return error.SkipZigTest;
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x22222222,
+ .res = buffer_recv.len,
+ // ignore IORING_CQE_F_SOCK_NONEMPTY since it is set non-deterministically
+ .flags = cqe_recvmsg.flags & linux.IORING_CQE_F_SOCK_NONEMPTY,
+ }, cqe_recvmsg);
+
+ try testing.expectEqualSlices(u8, buffer_send[0..buffer_recv.len], buffer_recv[0..]);
+}
+
+test "timeout (after a relative time)" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const ms = 10;
+ const margin = 5;
+ const ts = os.linux.kernel_timespec{ .tv_sec = 0, .tv_nsec = ms * 1000000 };
+
+ const started = std.time.milliTimestamp();
+ const sqe = try ring.timeout(0x55555555, &ts, 0, 0);
+ try testing.expectEqual(linux.IORING_OP.TIMEOUT, sqe.opcode);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ const cqe = try ring.copy_cqe();
+ const stopped = std.time.milliTimestamp();
+
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x55555555,
+ .res = -@as(i32, @intFromEnum(linux.E.TIME)),
+ .flags = 0,
+ }, cqe);
+
+ // Tests should not depend on timings: skip test if outside margin.
+ if (!std.math.approxEqAbs(f64, ms, @as(f64, @floatFromInt(stopped - started)), margin)) return error.SkipZigTest;
+}
+
+test "timeout (after a number of completions)" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(2, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const ts = os.linux.kernel_timespec{ .tv_sec = 3, .tv_nsec = 0 };
+ const count_completions: u64 = 1;
+ const sqe_timeout = try ring.timeout(0x66666666, &ts, count_completions, 0);
+ try testing.expectEqual(linux.IORING_OP.TIMEOUT, sqe_timeout.opcode);
+ try testing.expectEqual(count_completions, sqe_timeout.off);
+ _ = try ring.nop(0x77777777);
+ try testing.expectEqual(@as(u32, 2), try ring.submit());
+
+ const cqe_nop = try ring.copy_cqe();
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x77777777,
+ .res = 0,
+ .flags = 0,
+ }, cqe_nop);
+
+ const cqe_timeout = try ring.copy_cqe();
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x66666666,
+ .res = 0,
+ .flags = 0,
+ }, cqe_timeout);
+}
+
+test "timeout_remove" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(2, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const ts = os.linux.kernel_timespec{ .tv_sec = 3, .tv_nsec = 0 };
+ const sqe_timeout = try ring.timeout(0x88888888, &ts, 0, 0);
+ try testing.expectEqual(linux.IORING_OP.TIMEOUT, sqe_timeout.opcode);
+ try testing.expectEqual(@as(u64, 0x88888888), sqe_timeout.user_data);
+
+ const sqe_timeout_remove = try ring.timeout_remove(0x99999999, 0x88888888, 0);
+ try testing.expectEqual(linux.IORING_OP.TIMEOUT_REMOVE, sqe_timeout_remove.opcode);
+ try testing.expectEqual(@as(u64, 0x88888888), sqe_timeout_remove.addr);
+ try testing.expectEqual(@as(u64, 0x99999999), sqe_timeout_remove.user_data);
+
+ try testing.expectEqual(@as(u32, 2), try ring.submit());
+
+ // The order in which the CQE arrive is not clearly documented and it changed with kernel 5.18:
+ // * kernel 5.10 gives user data 0x88888888 first, 0x99999999 second
+ // * kernel 5.18 gives user data 0x99999999 first, 0x88888888 second
+
+ var cqes: [2]os.linux.io_uring_cqe = undefined;
+ cqes[0] = try ring.copy_cqe();
+ cqes[1] = try ring.copy_cqe();
+
+ for (cqes) |cqe| {
+ // IORING_OP_TIMEOUT_REMOVE is not supported by this kernel version:
+ // Timeout remove operations set the fd to -1, which results in EBADF before EINVAL.
+ // We use IORING_FEAT_RW_CUR_POS as a safety check here to make sure we are at least pre-5.6.
+ // We don't want to skip this test for newer kernels.
+ if (cqe.user_data == 0x99999999 and
+ cqe.err() == .BADF and
+ (ring.features & linux.IORING_FEAT_RW_CUR_POS) == 0)
+ {
+ return error.SkipZigTest;
+ }
+
+ try testing.expect(cqe.user_data == 0x88888888 or cqe.user_data == 0x99999999);
+
+ if (cqe.user_data == 0x88888888) {
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x88888888,
+ .res = -@as(i32, @intFromEnum(linux.E.CANCELED)),
+ .flags = 0,
+ }, cqe);
+ } else if (cqe.user_data == 0x99999999) {
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x99999999,
+ .res = 0,
+ .flags = 0,
+ }, cqe);
+ }
+ }
+}
+
+test "accept/connect/recv/link_timeout" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(16, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const socket_test_harness = try createSocketTestHarness(&ring);
+ defer socket_test_harness.close();
+
+ var buffer_recv = [_]u8{ 0, 1, 0, 1, 0 };
+
+ const sqe_recv = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, 0);
+ sqe_recv.flags |= linux.IOSQE_IO_LINK;
+
+ const ts = os.linux.kernel_timespec{ .tv_sec = 0, .tv_nsec = 1000000 };
+ _ = try ring.link_timeout(0x22222222, &ts, 0);
+
+ const nr_wait = try ring.submit();
+ try testing.expectEqual(@as(u32, 2), nr_wait);
+
+ var i: usize = 0;
+ while (i < nr_wait) : (i += 1) {
+ const cqe = try ring.copy_cqe();
+ switch (cqe.user_data) {
+ 0xffffffff => {
+ if (cqe.res != -@as(i32, @intFromEnum(linux.E.INTR)) and
+ cqe.res != -@as(i32, @intFromEnum(linux.E.CANCELED)))
+ {
+ std.debug.print("Req 0x{x} got {d}\n", .{ cqe.user_data, cqe.res });
+ try testing.expect(false);
+ }
+ },
+ 0x22222222 => {
+ if (cqe.res != -@as(i32, @intFromEnum(linux.E.ALREADY)) and
+ cqe.res != -@as(i32, @intFromEnum(linux.E.TIME)))
+ {
+ std.debug.print("Req 0x{x} got {d}\n", .{ cqe.user_data, cqe.res });
+ try testing.expect(false);
+ }
+ },
+ else => @panic("should not happen"),
+ }
+ }
+}
+
+test "fallocate" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+
+ const path = "test_io_uring_fallocate";
+ const file = try tmp.dir.createFile(path, .{ .truncate = true, .mode = 0o666 });
+ defer file.close();
+
+ try testing.expectEqual(@as(u64, 0), (try file.stat()).size);
+
+ const len: u64 = 65536;
+ const sqe = try ring.fallocate(0xaaaaaaaa, file.handle, 0, 0, len);
+ try testing.expectEqual(linux.IORING_OP.FALLOCATE, sqe.opcode);
+ try testing.expectEqual(file.handle, sqe.fd);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ // This kernel's io_uring does not yet implement fallocate():
+ .INVAL => return error.SkipZigTest,
+ // This kernel does not implement fallocate():
+ .NOSYS => return error.SkipZigTest,
+ // The filesystem containing the file referred to by fd does not support this operation;
+ // or the mode is not supported by the filesystem containing the file referred to by fd:
+ .OPNOTSUPP => return error.SkipZigTest,
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xaaaaaaaa,
+ .res = 0,
+ .flags = 0,
+ }, cqe);
+
+ try testing.expectEqual(len, (try file.stat()).size);
+}
+
+test "statx" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+ const path = "test_io_uring_statx";
+ const file = try tmp.dir.createFile(path, .{ .truncate = true, .mode = 0o666 });
+ defer file.close();
+
+ try testing.expectEqual(@as(u64, 0), (try file.stat()).size);
+
+ try file.writeAll("foobar");
+
+ var buf: linux.Statx = undefined;
+ const sqe = try ring.statx(
+ 0xaaaaaaaa,
+ tmp.dir.fd,
+ path,
+ 0,
+ linux.STATX_SIZE,
+ &buf,
+ );
+ try testing.expectEqual(linux.IORING_OP.STATX, sqe.opcode);
+ try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ // This kernel's io_uring does not yet implement statx():
+ .INVAL => return error.SkipZigTest,
+ // This kernel does not implement statx():
+ .NOSYS => return error.SkipZigTest,
+ // The filesystem containing the file referred to by fd does not support this operation;
+ // or the mode is not supported by the filesystem containing the file referred to by fd:
+ .OPNOTSUPP => return error.SkipZigTest,
+ // not supported on older kernels (5.4)
+ .BADF => return error.SkipZigTest,
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xaaaaaaaa,
+ .res = 0,
+ .flags = 0,
+ }, cqe);
+
+ try testing.expect(buf.mask & os.linux.STATX_SIZE == os.linux.STATX_SIZE);
+ try testing.expectEqual(@as(u64, 6), buf.size);
+}
+
+test "accept/connect/recv/cancel" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(16, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const socket_test_harness = try createSocketTestHarness(&ring);
+ defer socket_test_harness.close();
+
+ var buffer_recv = [_]u8{ 0, 1, 0, 1, 0 };
+
+ _ = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const sqe_cancel = try ring.cancel(0x99999999, 0xffffffff, 0);
+ try testing.expectEqual(linux.IORING_OP.ASYNC_CANCEL, sqe_cancel.opcode);
+ try testing.expectEqual(@as(u64, 0xffffffff), sqe_cancel.addr);
+ try testing.expectEqual(@as(u64, 0x99999999), sqe_cancel.user_data);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ var cqe_recv = try ring.copy_cqe();
+ if (cqe_recv.err() == .INVAL) return error.SkipZigTest;
+ var cqe_cancel = try ring.copy_cqe();
+ if (cqe_cancel.err() == .INVAL) return error.SkipZigTest;
+
+ // The recv/cancel CQEs may arrive in any order, the recv CQE will sometimes come first:
+ if (cqe_recv.user_data == 0x99999999 and cqe_cancel.user_data == 0xffffffff) {
+ const a = cqe_recv;
+ const b = cqe_cancel;
+ cqe_recv = b;
+ cqe_cancel = a;
+ }
+
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xffffffff,
+ .res = -@as(i32, @intFromEnum(linux.E.CANCELED)),
+ .flags = 0,
+ }, cqe_recv);
+
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x99999999,
+ .res = 0,
+ .flags = 0,
+ }, cqe_cancel);
+}
+
+test "register_files_update" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const fd = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
+ defer os.close(fd);
+
+ var registered_fds = [_]os.fd_t{0} ** 2;
+ const fd_index = 0;
+ const fd_index2 = 1;
+ registered_fds[fd_index] = fd;
+ registered_fds[fd_index2] = -1;
+
+ ring.register_files(registered_fds[0..]) catch |err| switch (err) {
+ // Happens when the kernel doesn't support sparse entry (-1) in the file descriptors array.
+ error.FileDescriptorInvalid => return error.SkipZigTest,
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ };
+
+ // Test IORING_REGISTER_FILES_UPDATE
+ // Only available since Linux 5.5
+
+ const fd2 = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
+ defer os.close(fd2);
+
+ registered_fds[fd_index] = fd2;
+ registered_fds[fd_index2] = -1;
+ try ring.register_files_update(0, registered_fds[0..]);
+
+ var buffer = [_]u8{42} ** 128;
+ {
+ const sqe = try ring.read(0xcccccccc, fd_index, .{ .buffer = &buffer }, 0);
+ try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode);
+ sqe.flags |= linux.IOSQE_FIXED_FILE;
+
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xcccccccc,
+ .res = buffer.len,
+ .flags = 0,
+ }, try ring.copy_cqe());
+ try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer.len), buffer[0..]);
+ }
+
+ // Test with a non-zero offset
+
+ registered_fds[fd_index] = -1;
+ registered_fds[fd_index2] = -1;
+ try ring.register_files_update(1, registered_fds[1..]);
+
+ {
+ // Next read should still work since fd_index in the registered file descriptors hasn't been updated yet.
+ const sqe = try ring.read(0xcccccccc, fd_index, .{ .buffer = &buffer }, 0);
+ try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode);
+ sqe.flags |= linux.IOSQE_FIXED_FILE;
+
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xcccccccc,
+ .res = buffer.len,
+ .flags = 0,
+ }, try ring.copy_cqe());
+ try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer.len), buffer[0..]);
+ }
+
+ try ring.register_files_update(0, registered_fds[0..]);
+
+ {
+ // Now this should fail since both fds are sparse (-1)
+ const sqe = try ring.read(0xcccccccc, fd_index, .{ .buffer = &buffer }, 0);
+ try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode);
+ sqe.flags |= linux.IOSQE_FIXED_FILE;
+
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ const cqe = try ring.copy_cqe();
+ try testing.expectEqual(os.linux.E.BADF, cqe.err());
+ }
+
+ try ring.unregister_files();
+}
+
+test "shutdown" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(16, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var address = try net.Address.parseIp4("127.0.0.1", 0);
+
+ // Socket bound, expect shutdown to work
+ {
+ const server = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
+ defer os.close(server);
+ try os.setsockopt(server, os.SOL.SOCKET, os.SO.REUSEADDR, &mem.toBytes(@as(c_int, 1)));
+ try os.bind(server, &address.any, address.getOsSockLen());
+ try os.listen(server, 1);
+
+ // set address to the OS-chosen IP/port.
+ var slen: os.socklen_t = address.getOsSockLen();
+ try os.getsockname(server, &address.any, &slen);
+
+ const shutdown_sqe = try ring.shutdown(0x445445445, server, os.linux.SHUT.RD);
+ try testing.expectEqual(linux.IORING_OP.SHUTDOWN, shutdown_sqe.opcode);
+ try testing.expectEqual(@as(i32, server), shutdown_sqe.fd);
+
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ // This kernel's io_uring does not yet implement shutdown (kernel version < 5.11)
+ .INVAL => return error.SkipZigTest,
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x445445445,
+ .res = 0,
+ .flags = 0,
+ }, cqe);
+ }
+
+ // Socket not bound, expect to fail with ENOTCONN
+ {
+ const server = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
+ defer os.close(server);
+
+ const shutdown_sqe = ring.shutdown(0x445445445, server, os.linux.SHUT.RD) catch |err| switch (err) {
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ };
+ try testing.expectEqual(linux.IORING_OP.SHUTDOWN, shutdown_sqe.opcode);
+ try testing.expectEqual(@as(i32, server), shutdown_sqe.fd);
+
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ try testing.expectEqual(@as(u64, 0x445445445), cqe.user_data);
+ try testing.expectEqual(os.linux.E.NOTCONN, cqe.err());
+ }
+}
+
+test "renameat" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const old_path = "test_io_uring_renameat_old";
+ const new_path = "test_io_uring_renameat_new";
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+
+ // Write old file with data
+
+ const old_file = try tmp.dir.createFile(old_path, .{ .truncate = true, .mode = 0o666 });
+ defer old_file.close();
+ try old_file.writeAll("hello");
+
+ // Submit renameat
+
+ const sqe = try ring.renameat(
+ 0x12121212,
+ tmp.dir.fd,
+ old_path,
+ tmp.dir.fd,
+ new_path,
+ 0,
+ );
+ try testing.expectEqual(linux.IORING_OP.RENAMEAT, sqe.opcode);
+ try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd);
+ try testing.expectEqual(@as(i32, tmp.dir.fd), @as(i32, @bitCast(sqe.len)));
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ // This kernel's io_uring does not yet implement renameat (kernel version < 5.11)
+ .BADF, .INVAL => return error.SkipZigTest,
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x12121212,
+ .res = 0,
+ .flags = 0,
+ }, cqe);
+
+ // Validate that the old file doesn't exist anymore
+ {
+ _ = tmp.dir.openFile(old_path, .{}) catch |err| switch (err) {
+ error.FileNotFound => {},
+ else => std.debug.panic("unexpected error: {}", .{err}),
+ };
+ }
+
+ // Validate that the new file exists with the proper content
+ {
+ const new_file = try tmp.dir.openFile(new_path, .{});
+ defer new_file.close();
+
+ var new_file_data: [16]u8 = undefined;
+ const bytes_read = try new_file.readAll(&new_file_data);
+ try testing.expectEqualStrings("hello", new_file_data[0..bytes_read]);
+ }
+}
+
+test "unlinkat" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const path = "test_io_uring_unlinkat";
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+
+ // Write old file with data
+
+ const file = try tmp.dir.createFile(path, .{ .truncate = true, .mode = 0o666 });
+ defer file.close();
+
+ // Submit unlinkat
+
+ const sqe = try ring.unlinkat(
+ 0x12121212,
+ tmp.dir.fd,
+ path,
+ 0,
+ );
+ try testing.expectEqual(linux.IORING_OP.UNLINKAT, sqe.opcode);
+ try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ // This kernel's io_uring does not yet implement unlinkat (kernel version < 5.11)
+ .BADF, .INVAL => return error.SkipZigTest,
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x12121212,
+ .res = 0,
+ .flags = 0,
+ }, cqe);
+
+ // Validate that the file doesn't exist anymore
+ _ = tmp.dir.openFile(path, .{}) catch |err| switch (err) {
+ error.FileNotFound => {},
+ else => std.debug.panic("unexpected error: {}", .{err}),
+ };
+}
+
+test "mkdirat" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+
+ const path = "test_io_uring_mkdirat";
+
+ // Submit mkdirat
+
+ const sqe = try ring.mkdirat(
+ 0x12121212,
+ tmp.dir.fd,
+ path,
+ 0o0755,
+ );
+ try testing.expectEqual(linux.IORING_OP.MKDIRAT, sqe.opcode);
+ try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ // This kernel's io_uring does not yet implement mkdirat (kernel version < 5.15)
+ .BADF, .INVAL => return error.SkipZigTest,
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x12121212,
+ .res = 0,
+ .flags = 0,
+ }, cqe);
+
+ // Validate that the directory exist
+ _ = try tmp.dir.openDir(path, .{});
+}
+
+test "symlinkat" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+
+ const path = "test_io_uring_symlinkat";
+ const link_path = "test_io_uring_symlinkat_link";
+
+ const file = try tmp.dir.createFile(path, .{ .truncate = true, .mode = 0o666 });
+ defer file.close();
+
+ // Submit symlinkat
+
+ const sqe = try ring.symlinkat(
+ 0x12121212,
+ path,
+ tmp.dir.fd,
+ link_path,
+ );
+ try testing.expectEqual(linux.IORING_OP.SYMLINKAT, sqe.opcode);
+ try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ // This kernel's io_uring does not yet implement symlinkat (kernel version < 5.15)
+ .BADF, .INVAL => return error.SkipZigTest,
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x12121212,
+ .res = 0,
+ .flags = 0,
+ }, cqe);
+
+ // Validate that the symlink exist
+ _ = try tmp.dir.openFile(link_path, .{});
+}
+
+test "linkat" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+
+ const first_path = "test_io_uring_linkat_first";
+ const second_path = "test_io_uring_linkat_second";
+
+ // Write file with data
+
+ const first_file = try tmp.dir.createFile(first_path, .{ .truncate = true, .mode = 0o666 });
+ defer first_file.close();
+ try first_file.writeAll("hello");
+
+ // Submit linkat
+
+ const sqe = try ring.linkat(
+ 0x12121212,
+ tmp.dir.fd,
+ first_path,
+ tmp.dir.fd,
+ second_path,
+ 0,
+ );
+ try testing.expectEqual(linux.IORING_OP.LINKAT, sqe.opcode);
+ try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd);
+ try testing.expectEqual(@as(i32, tmp.dir.fd), @as(i32, @bitCast(sqe.len)));
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ // This kernel's io_uring does not yet implement linkat (kernel version < 5.15)
+ .BADF, .INVAL => return error.SkipZigTest,
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0x12121212,
+ .res = 0,
+ .flags = 0,
+ }, cqe);
+
+ // Validate the second file
+ const second_file = try tmp.dir.openFile(second_path, .{});
+ defer second_file.close();
+
+ var second_file_data: [16]u8 = undefined;
+ const bytes_read = try second_file.readAll(&second_file_data);
+ try testing.expectEqualStrings("hello", second_file_data[0..bytes_read]);
+}
+
+test "provide_buffers: read" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const fd = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
+ defer os.close(fd);
+
+ const group_id = 1337;
+ const buffer_id = 0;
+
+ const buffer_len = 128;
+
+ var buffers: [4][buffer_len]u8 = undefined;
+
+ // Provide 4 buffers
+
+ {
+ const sqe = try ring.provide_buffers(0xcccccccc, @as([*]u8, @ptrCast(&buffers)), buffer_len, buffers.len, group_id, buffer_id);
+ try testing.expectEqual(linux.IORING_OP.PROVIDE_BUFFERS, sqe.opcode);
+ try testing.expectEqual(@as(i32, buffers.len), sqe.fd);
+ try testing.expectEqual(@as(u32, buffers[0].len), sqe.len);
+ try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ // Happens when the kernel is < 5.7
+ .INVAL => return error.SkipZigTest,
+ .SUCCESS => {},
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(@as(u64, 0xcccccccc), cqe.user_data);
+ }
+
+ // Do 4 reads which should consume all buffers
+
+ var i: usize = 0;
+ while (i < buffers.len) : (i += 1) {
+ const sqe = try ring.read(0xdededede, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
+ try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode);
+ try testing.expectEqual(@as(i32, fd), sqe.fd);
+ try testing.expectEqual(@as(u64, 0), sqe.addr);
+ try testing.expectEqual(@as(u32, buffer_len), sqe.len);
+ try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+
+ try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER);
+ const used_buffer_id = cqe.flags >> 16;
+ try testing.expect(used_buffer_id >= 0 and used_buffer_id <= 3);
+ try testing.expectEqual(@as(i32, buffer_len), cqe.res);
+
+ try testing.expectEqual(@as(u64, 0xdededede), cqe.user_data);
+ try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer_len), buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))]);
+ }
+
+ // This read should fail
+
+ {
+ const sqe = try ring.read(0xdfdfdfdf, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
+ try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode);
+ try testing.expectEqual(@as(i32, fd), sqe.fd);
+ try testing.expectEqual(@as(u64, 0), sqe.addr);
+ try testing.expectEqual(@as(u32, buffer_len), sqe.len);
+ try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ // Expected
+ .NOBUFS => {},
+ .SUCCESS => std.debug.panic("unexpected success", .{}),
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data);
+ }
+
+ // Provide 1 buffer again
+
+ // Deliberately put something we don't expect in the buffers
+ @memset(mem.sliceAsBytes(&buffers), 42);
+
+ const reprovided_buffer_id = 2;
+
+ {
+ _ = try ring.provide_buffers(0xabababab, @as([*]u8, @ptrCast(&buffers[reprovided_buffer_id])), buffer_len, 1, group_id, reprovided_buffer_id);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ }
+
+ // Final read which should work
+
+ {
+ const sqe = try ring.read(0xdfdfdfdf, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
+ try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode);
+ try testing.expectEqual(@as(i32, fd), sqe.fd);
+ try testing.expectEqual(@as(u64, 0), sqe.addr);
+ try testing.expectEqual(@as(u32, buffer_len), sqe.len);
+ try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+
+ try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER);
+ const used_buffer_id = cqe.flags >> 16;
+ try testing.expectEqual(used_buffer_id, reprovided_buffer_id);
+ try testing.expectEqual(@as(i32, buffer_len), cqe.res);
+ try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data);
+ try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer_len), buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))]);
+ }
+}
+
+test "remove_buffers" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const fd = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
+ defer os.close(fd);
+
+ const group_id = 1337;
+ const buffer_id = 0;
+
+ const buffer_len = 128;
+
+ var buffers: [4][buffer_len]u8 = undefined;
+
+ // Provide 4 buffers
+
+ {
+ _ = try ring.provide_buffers(0xcccccccc, @as([*]u8, @ptrCast(&buffers)), buffer_len, buffers.len, group_id, buffer_id);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .INVAL => return error.SkipZigTest,
+ .SUCCESS => {},
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(@as(u64, 0xcccccccc), cqe.user_data);
+ }
+
+ // Remove 3 buffers
+
+ {
+ const sqe = try ring.remove_buffers(0xbababababa, 3, group_id);
+ try testing.expectEqual(linux.IORING_OP.REMOVE_BUFFERS, sqe.opcode);
+ try testing.expectEqual(@as(i32, 3), sqe.fd);
+ try testing.expectEqual(@as(u64, 0), sqe.addr);
+ try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(@as(u64, 0xbababababa), cqe.user_data);
+ }
+
+ // This read should work
+
+ {
+ _ = try ring.read(0xdfdfdfdf, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+
+ try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER);
+ const used_buffer_id = cqe.flags >> 16;
+ try testing.expect(used_buffer_id >= 0 and used_buffer_id < 4);
+ try testing.expectEqual(@as(i32, buffer_len), cqe.res);
+ try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data);
+ try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer_len), buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))]);
+ }
+
+ // Final read should _not_ work
+
+ {
+ _ = try ring.read(0xdfdfdfdf, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ // Expected
+ .NOBUFS => {},
+ .SUCCESS => std.debug.panic("unexpected success", .{}),
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ }
+}
+
+test "provide_buffers: accept/connect/send/recv" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(16, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const group_id = 1337;
+ const buffer_id = 0;
+
+ const buffer_len = 128;
+ var buffers: [4][buffer_len]u8 = undefined;
+
+ // Provide 4 buffers
+
+ {
+ const sqe = try ring.provide_buffers(0xcccccccc, @as([*]u8, @ptrCast(&buffers)), buffer_len, buffers.len, group_id, buffer_id);
+ try testing.expectEqual(linux.IORING_OP.PROVIDE_BUFFERS, sqe.opcode);
+ try testing.expectEqual(@as(i32, buffers.len), sqe.fd);
+ try testing.expectEqual(@as(u32, buffer_len), sqe.len);
+ try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ // Happens when the kernel is < 5.7
+ .INVAL => return error.SkipZigTest,
+ // Happens on the kernel 5.4
+ .BADF => return error.SkipZigTest,
+ .SUCCESS => {},
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(@as(u64, 0xcccccccc), cqe.user_data);
+ }
+
+ const socket_test_harness = try createSocketTestHarness(&ring);
+ defer socket_test_harness.close();
+
+ // Do 4 send on the socket
+
+ {
+ var i: usize = 0;
+ while (i < buffers.len) : (i += 1) {
+ _ = try ring.send(0xdeaddead, socket_test_harness.server, &([_]u8{'z'} ** buffer_len), 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ }
+
+ var cqes: [4]linux.io_uring_cqe = undefined;
+ try testing.expectEqual(@as(u32, 4), try ring.copy_cqes(&cqes, 4));
+ }
+
+ // Do 4 recv which should consume all buffers
+
+ // Deliberately put something we don't expect in the buffers
+ @memset(mem.sliceAsBytes(&buffers), 1);
+
+ var i: usize = 0;
+ while (i < buffers.len) : (i += 1) {
+ const sqe = try ring.recv(0xdededede, socket_test_harness.client, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
+ try testing.expectEqual(linux.IORING_OP.RECV, sqe.opcode);
+ try testing.expectEqual(@as(i32, socket_test_harness.client), sqe.fd);
+ try testing.expectEqual(@as(u64, 0), sqe.addr);
+ try testing.expectEqual(@as(u32, buffer_len), sqe.len);
+ try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
+ try testing.expectEqual(@as(u32, 0), sqe.rw_flags);
+ try testing.expectEqual(@as(u32, linux.IOSQE_BUFFER_SELECT), sqe.flags);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+
+ try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER);
+ const used_buffer_id = cqe.flags >> 16;
+ try testing.expect(used_buffer_id >= 0 and used_buffer_id <= 3);
+ try testing.expectEqual(@as(i32, buffer_len), cqe.res);
+
+ try testing.expectEqual(@as(u64, 0xdededede), cqe.user_data);
+ const buffer = buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))];
+ try testing.expectEqualSlices(u8, &([_]u8{'z'} ** buffer_len), buffer);
+ }
+
+ // This recv should fail
+
+ {
+ const sqe = try ring.recv(0xdfdfdfdf, socket_test_harness.client, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
+ try testing.expectEqual(linux.IORING_OP.RECV, sqe.opcode);
+ try testing.expectEqual(@as(i32, socket_test_harness.client), sqe.fd);
+ try testing.expectEqual(@as(u64, 0), sqe.addr);
+ try testing.expectEqual(@as(u32, buffer_len), sqe.len);
+ try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
+ try testing.expectEqual(@as(u32, 0), sqe.rw_flags);
+ try testing.expectEqual(@as(u32, linux.IOSQE_BUFFER_SELECT), sqe.flags);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ // Expected
+ .NOBUFS => {},
+ .SUCCESS => std.debug.panic("unexpected success", .{}),
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data);
+ }
+
+ // Provide 1 buffer again
+
+ const reprovided_buffer_id = 2;
+
+ {
+ _ = try ring.provide_buffers(0xabababab, @as([*]u8, @ptrCast(&buffers[reprovided_buffer_id])), buffer_len, 1, group_id, reprovided_buffer_id);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+ }
+
+ // Redo 1 send on the server socket
+
+ {
+ _ = try ring.send(0xdeaddead, socket_test_harness.server, &([_]u8{'w'} ** buffer_len), 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ _ = try ring.copy_cqe();
+ }
+
+ // Final recv which should work
+
+ // Deliberately put something we don't expect in the buffers
+ @memset(mem.sliceAsBytes(&buffers), 1);
+
+ {
+ const sqe = try ring.recv(0xdfdfdfdf, socket_test_harness.client, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
+ try testing.expectEqual(linux.IORING_OP.RECV, sqe.opcode);
+ try testing.expectEqual(@as(i32, socket_test_harness.client), sqe.fd);
+ try testing.expectEqual(@as(u64, 0), sqe.addr);
+ try testing.expectEqual(@as(u32, buffer_len), sqe.len);
+ try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
+ try testing.expectEqual(@as(u32, 0), sqe.rw_flags);
+ try testing.expectEqual(@as(u32, linux.IOSQE_BUFFER_SELECT), sqe.flags);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ const cqe = try ring.copy_cqe();
+ switch (cqe.err()) {
+ .SUCCESS => {},
+ else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
+ }
+
+ try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER);
+ const used_buffer_id = cqe.flags >> 16;
+ try testing.expectEqual(used_buffer_id, reprovided_buffer_id);
+ try testing.expectEqual(@as(i32, buffer_len), cqe.res);
+ try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data);
+ const buffer = buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))];
+ try testing.expectEqualSlices(u8, &([_]u8{'w'} ** buffer_len), buffer);
+ }
+}
+
+/// Used for testing server/client interactions.
+const SocketTestHarness = struct {
+ listener: os.socket_t,
+ server: os.socket_t,
+ client: os.socket_t,
+
+ fn close(self: SocketTestHarness) void {
+ posix.close(self.client);
+ posix.close(self.listener);
+ }
+};
+
+fn createSocketTestHarness(ring: *IoUring) !SocketTestHarness {
+ // Create a TCP server socket
+ var address = try net.Address.parseIp4("127.0.0.1", 0);
+ const listener_socket = try createListenerSocket(&address);
+ errdefer posix.close(listener_socket);
+
+ // Submit 1 accept
+ var accept_addr: os.sockaddr = undefined;
+ var accept_addr_len: os.socklen_t = @sizeOf(@TypeOf(accept_addr));
+ _ = try ring.accept(0xaaaaaaaa, listener_socket, &accept_addr, &accept_addr_len, 0);
+
+ // Create a TCP client socket
+ const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
+ errdefer posix.close(client);
+ _ = try ring.connect(0xcccccccc, client, &address.any, address.getOsSockLen());
+
+ try testing.expectEqual(@as(u32, 2), try ring.submit());
+
+ var cqe_accept = try ring.copy_cqe();
+ if (cqe_accept.err() == .INVAL) return error.SkipZigTest;
+ var cqe_connect = try ring.copy_cqe();
+ if (cqe_connect.err() == .INVAL) return error.SkipZigTest;
+
+ // The accept/connect CQEs may arrive in any order, the connect CQE will sometimes come first:
+ if (cqe_accept.user_data == 0xcccccccc and cqe_connect.user_data == 0xaaaaaaaa) {
+ const a = cqe_accept;
+ const b = cqe_connect;
+ cqe_accept = b;
+ cqe_connect = a;
+ }
+
+ try testing.expectEqual(@as(u64, 0xaaaaaaaa), cqe_accept.user_data);
+ if (cqe_accept.res <= 0) std.debug.print("\ncqe_accept.res={}\n", .{cqe_accept.res});
+ try testing.expect(cqe_accept.res > 0);
+ try testing.expectEqual(@as(u32, 0), cqe_accept.flags);
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xcccccccc,
+ .res = 0,
+ .flags = 0,
+ }, cqe_connect);
+
+ // All good
+
+ return SocketTestHarness{
+ .listener = listener_socket,
+ .server = cqe_accept.res,
+ .client = client,
+ };
+}
+
+fn createListenerSocket(address: *net.Address) !os.socket_t {
+ const kernel_backlog = 1;
+ const listener_socket = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
+ errdefer posix.close(listener_socket);
+
+ try os.setsockopt(listener_socket, os.SOL.SOCKET, os.SO.REUSEADDR, &mem.toBytes(@as(c_int, 1)));
+ try os.bind(listener_socket, &address.any, address.getOsSockLen());
+ try os.listen(listener_socket, kernel_backlog);
+
+ // set address to the OS-chosen IP/port.
+ var slen: os.socklen_t = address.getOsSockLen();
+ try os.getsockname(listener_socket, &address.any, &slen);
+
+ return listener_socket;
+}
+
+test "accept multishot" {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var ring = IoUring.init(16, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var address = try net.Address.parseIp4("127.0.0.1", 0);
+ const listener_socket = try createListenerSocket(&address);
+ defer posix.close(listener_socket);
+
+ // submit multishot accept operation
+ var addr: os.sockaddr = undefined;
+ var addr_len: os.socklen_t = @sizeOf(@TypeOf(addr));
+ const userdata: u64 = 0xaaaaaaaa;
+ _ = try ring.accept_multishot(userdata, listener_socket, &addr, &addr_len, 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ var nr: usize = 4; // number of clients to connect
+ while (nr > 0) : (nr -= 1) {
+ // connect client
+ const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
+ errdefer posix.close(client);
+ try os.connect(client, &address.any, address.getOsSockLen());
+
+ // test accept completion
+ var cqe = try ring.copy_cqe();
+ if (cqe.err() == .INVAL) return error.SkipZigTest;
+ try testing.expect(cqe.res > 0);
+ try testing.expect(cqe.user_data == userdata);
+ try testing.expect(cqe.flags & linux.IORING_CQE_F_MORE > 0); // more flag is set
+
+ posix.close(client);
+ }
+}
+
+test "accept/connect/send_zc/recv" {
+ try skipKernelLessThan(.{ .major = 6, .minor = 0, .patch = 0 });
+
+ var ring = IoUring.init(16, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const socket_test_harness = try createSocketTestHarness(&ring);
+ defer socket_test_harness.close();
+
+ const buffer_send = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe };
+ var buffer_recv = [_]u8{0} ** 10;
+
+ // zero-copy send
+ const sqe_send = try ring.send_zc(0xeeeeeeee, socket_test_harness.client, buffer_send[0..], 0, 0);
+ sqe_send.flags |= linux.IOSQE_IO_LINK;
+ _ = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, 0);
+ try testing.expectEqual(@as(u32, 2), try ring.submit());
+
+ // First completion of zero-copy send.
+ // IORING_CQE_F_MORE, means that there
+ // will be a second completion event / notification for the
+ // request, with the user_data field set to the same value.
+ // buffer_send must be keep alive until second cqe.
+ var cqe_send = try ring.copy_cqe();
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xeeeeeeee,
+ .res = buffer_send.len,
+ .flags = linux.IORING_CQE_F_MORE,
+ }, cqe_send);
+
+ const cqe_recv = try ring.copy_cqe();
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xffffffff,
+ .res = buffer_recv.len,
+ .flags = cqe_recv.flags & linux.IORING_CQE_F_SOCK_NONEMPTY,
+ }, cqe_recv);
+
+ try testing.expectEqualSlices(u8, buffer_send[0..buffer_recv.len], buffer_recv[0..]);
+
+ // Second completion of zero-copy send.
+ // IORING_CQE_F_NOTIF in flags signals that kernel is done with send_buffer
+ cqe_send = try ring.copy_cqe();
+ try testing.expectEqual(linux.io_uring_cqe{
+ .user_data = 0xeeeeeeee,
+ .res = 0,
+ .flags = linux.IORING_CQE_F_NOTIF,
+ }, cqe_send);
+}
+
+test "accept_direct" {
+ try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 });
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+ var address = try net.Address.parseIp4("127.0.0.1", 0);
+
+ // register direct file descriptors
+ var registered_fds = [_]os.fd_t{-1} ** 2;
+ try ring.register_files(registered_fds[0..]);
+
+ const listener_socket = try createListenerSocket(&address);
+ defer posix.close(listener_socket);
+
+ const accept_userdata: u64 = 0xaaaaaaaa;
+ const read_userdata: u64 = 0xbbbbbbbb;
+ const data = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe };
+
+ for (0..2) |_| {
+ for (registered_fds, 0..) |_, i| {
+ var buffer_recv = [_]u8{0} ** 16;
+ const buffer_send: []const u8 = data[0 .. data.len - i]; // make it different at each loop
+
+ // submit accept, will chose registered fd and return index in cqe
+ _ = try ring.accept_direct(accept_userdata, listener_socket, null, null, 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ // connect
+ const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
+ try os.connect(client, &address.any, address.getOsSockLen());
+ defer posix.close(client);
+
+ // accept completion
+ const cqe_accept = try ring.copy_cqe();
+ try testing.expectEqual(os.E.SUCCESS, cqe_accept.err());
+ const fd_index = cqe_accept.res;
+ try testing.expect(fd_index < registered_fds.len);
+ try testing.expect(cqe_accept.user_data == accept_userdata);
+
+ // send data
+ _ = try os.send(client, buffer_send, 0);
+
+ // Example of how to use registered fd:
+ // Submit receive to fixed file returned by accept (fd_index).
+ // Fd field is set to registered file index, returned by accept.
+ // Flag linux.IOSQE_FIXED_FILE must be set.
+ const recv_sqe = try ring.recv(read_userdata, fd_index, .{ .buffer = &buffer_recv }, 0);
+ recv_sqe.flags |= linux.IOSQE_FIXED_FILE;
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ // accept receive
+ const recv_cqe = try ring.copy_cqe();
+ try testing.expect(recv_cqe.user_data == read_userdata);
+ try testing.expect(recv_cqe.res == buffer_send.len);
+ try testing.expectEqualSlices(u8, buffer_send, buffer_recv[0..buffer_send.len]);
+ }
+ // no more available fds, accept will get NFILE error
+ {
+ // submit accept
+ _ = try ring.accept_direct(accept_userdata, listener_socket, null, null, 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ // connect
+ const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
+ try os.connect(client, &address.any, address.getOsSockLen());
+ defer posix.close(client);
+ // completion with error
+ const cqe_accept = try ring.copy_cqe();
+ try testing.expect(cqe_accept.user_data == accept_userdata);
+ try testing.expectEqual(os.E.NFILE, cqe_accept.err());
+ }
+ // return file descriptors to kernel
+ try ring.register_files_update(0, registered_fds[0..]);
+ }
+ try ring.unregister_files();
+}
+
+test "accept_multishot_direct" {
+ try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 });
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var address = try net.Address.parseIp4("127.0.0.1", 0);
+
+ var registered_fds = [_]os.fd_t{-1} ** 2;
+ try ring.register_files(registered_fds[0..]);
+
+ const listener_socket = try createListenerSocket(&address);
+ defer posix.close(listener_socket);
+
+ const accept_userdata: u64 = 0xaaaaaaaa;
+
+ for (0..2) |_| {
+ // submit multishot accept
+ // Will chose registered fd and return index of the selected registered file in cqe.
+ _ = try ring.accept_multishot_direct(accept_userdata, listener_socket, null, null, 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ for (registered_fds) |_| {
+ // connect
+ const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
+ try os.connect(client, &address.any, address.getOsSockLen());
+ defer posix.close(client);
+
+ // accept completion
+ const cqe_accept = try ring.copy_cqe();
+ const fd_index = cqe_accept.res;
+ try testing.expect(fd_index < registered_fds.len);
+ try testing.expect(cqe_accept.user_data == accept_userdata);
+ try testing.expect(cqe_accept.flags & linux.IORING_CQE_F_MORE > 0); // has more is set
+ }
+ // No more available fds, accept will get NFILE error.
+ // Multishot is terminated (more flag is not set).
+ {
+ // connect
+ const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
+ try os.connect(client, &address.any, address.getOsSockLen());
+ defer posix.close(client);
+ // completion with error
+ const cqe_accept = try ring.copy_cqe();
+ try testing.expect(cqe_accept.user_data == accept_userdata);
+ try testing.expectEqual(os.E.NFILE, cqe_accept.err());
+ try testing.expect(cqe_accept.flags & linux.IORING_CQE_F_MORE == 0); // has more is not set
+ }
+ // return file descriptors to kernel
+ try ring.register_files_update(0, registered_fds[0..]);
+ }
+ try ring.unregister_files();
+}
+
+test "socket" {
+ try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 });
+
+ var ring = IoUring.init(1, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ // prepare, submit socket operation
+ _ = try ring.socket(0, linux.AF.INET, os.SOCK.STREAM, 0, 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+
+ // test completion
+ var cqe = try ring.copy_cqe();
+ try testing.expectEqual(os.E.SUCCESS, cqe.err());
+ const fd: os.fd_t = @intCast(cqe.res);
+ try testing.expect(fd > 2);
+
+ os.close(fd);
+}
+
+test "socket_direct/socket_direct_alloc/close_direct" {
+ try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 });
+
+ var ring = IoUring.init(2, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var registered_fds = [_]os.fd_t{-1} ** 3;
+ try ring.register_files(registered_fds[0..]);
+
+ // create socket in registered file descriptor at index 0 (last param)
+ _ = try ring.socket_direct(0, linux.AF.INET, os.SOCK.STREAM, 0, 0, 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ var cqe_socket = try ring.copy_cqe();
+ try testing.expectEqual(os.E.SUCCESS, cqe_socket.err());
+ try testing.expect(cqe_socket.res == 0);
+
+ // create socket in registered file descriptor at index 1 (last param)
+ _ = try ring.socket_direct(0, linux.AF.INET, os.SOCK.STREAM, 0, 0, 1);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ cqe_socket = try ring.copy_cqe();
+ try testing.expectEqual(os.E.SUCCESS, cqe_socket.err());
+ try testing.expect(cqe_socket.res == 0); // res is 0 when index is specified
+
+ // create socket in kernel chosen file descriptor index (_alloc version)
+ // completion res has index from registered files
+ _ = try ring.socket_direct_alloc(0, linux.AF.INET, os.SOCK.STREAM, 0, 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ cqe_socket = try ring.copy_cqe();
+ try testing.expectEqual(os.E.SUCCESS, cqe_socket.err());
+ try testing.expect(cqe_socket.res == 2); // returns registered file index
+
+ // use sockets from registered_fds in connect operation
+ var address = try net.Address.parseIp4("127.0.0.1", 0);
+ const listener_socket = try createListenerSocket(&address);
+ defer posix.close(listener_socket);
+ const accept_userdata: u64 = 0xaaaaaaaa;
+ const connect_userdata: u64 = 0xbbbbbbbb;
+ const close_userdata: u64 = 0xcccccccc;
+ for (registered_fds, 0..) |_, fd_index| {
+ // prepare accept
+ _ = try ring.accept(accept_userdata, listener_socket, null, null, 0);
+ // prepare connect with fixed socket
+ const connect_sqe = try ring.connect(connect_userdata, @intCast(fd_index), &address.any, address.getOsSockLen());
+ connect_sqe.flags |= linux.IOSQE_FIXED_FILE; // fd is fixed file index
+ // submit both
+ try testing.expectEqual(@as(u32, 2), try ring.submit());
+ // get completions
+ var cqe_connect = try ring.copy_cqe();
+ var cqe_accept = try ring.copy_cqe();
+ // ignore order
+ if (cqe_connect.user_data == accept_userdata and cqe_accept.user_data == connect_userdata) {
+ const a = cqe_accept;
+ const b = cqe_connect;
+ cqe_accept = b;
+ cqe_connect = a;
+ }
+ // test connect completion
+ try testing.expect(cqe_connect.user_data == connect_userdata);
+ try testing.expectEqual(os.E.SUCCESS, cqe_connect.err());
+ // test accept completion
+ try testing.expect(cqe_accept.user_data == accept_userdata);
+ try testing.expectEqual(os.E.SUCCESS, cqe_accept.err());
+
+ // submit and test close_direct
+ _ = try ring.close_direct(close_userdata, @intCast(fd_index));
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ var cqe_close = try ring.copy_cqe();
+ try testing.expect(cqe_close.user_data == close_userdata);
+ try testing.expectEqual(os.E.SUCCESS, cqe_close.err());
+ }
+
+ try ring.unregister_files();
+}
+
+test "openat_direct/close_direct" {
+ try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 });
+
+ var ring = IoUring.init(2, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ var registered_fds = [_]os.fd_t{-1} ** 3;
+ try ring.register_files(registered_fds[0..]);
+
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+ const path = "test_io_uring_close_direct";
+ const flags: linux.O = .{ .ACCMODE = .RDWR, .CREAT = true };
+ const mode: os.mode_t = 0o666;
+ const user_data: u64 = 0;
+
+ // use registered file at index 0 (last param)
+ _ = try ring.openat_direct(user_data, tmp.dir.fd, path, flags, mode, 0);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ var cqe = try ring.copy_cqe();
+ try testing.expectEqual(os.E.SUCCESS, cqe.err());
+ try testing.expect(cqe.res == 0);
+
+ // use registered file at index 1
+ _ = try ring.openat_direct(user_data, tmp.dir.fd, path, flags, mode, 1);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ cqe = try ring.copy_cqe();
+ try testing.expectEqual(os.E.SUCCESS, cqe.err());
+ try testing.expect(cqe.res == 0); // res is 0 when we specify index
+
+ // let kernel choose registered file index
+ _ = try ring.openat_direct(user_data, tmp.dir.fd, path, flags, mode, linux.IORING_FILE_INDEX_ALLOC);
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ cqe = try ring.copy_cqe();
+ try testing.expectEqual(os.E.SUCCESS, cqe.err());
+ try testing.expect(cqe.res == 2); // chosen index is in res
+
+ // close all open file descriptors
+ for (registered_fds, 0..) |_, fd_index| {
+ _ = try ring.close_direct(user_data, @intCast(fd_index));
+ try testing.expectEqual(@as(u32, 1), try ring.submit());
+ var cqe_close = try ring.copy_cqe();
+ try testing.expectEqual(os.E.SUCCESS, cqe_close.err());
+ }
+ try ring.unregister_files();
+}
+
+test "waitid" {
+ try skipKernelLessThan(.{ .major = 6, .minor = 7, .patch = 0 });
+
+ var ring = IoUring.init(16, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const pid = try os.fork();
+ if (pid == 0) {
+ os.exit(7);
+ }
+
+ var siginfo: os.siginfo_t = undefined;
+ _ = try ring.waitid(0, .PID, pid, &siginfo, os.W.EXITED, 0);
+
+ try testing.expectEqual(1, try ring.submit());
+
+ const cqe_waitid = try ring.copy_cqe();
+ try testing.expectEqual(0, cqe_waitid.res);
+ try testing.expectEqual(pid, siginfo.fields.common.first.piduid.pid);
+ try testing.expectEqual(7, siginfo.fields.common.second.sigchld.status);
+}
+
+/// For use in tests. Returns SkipZigTest is kernel version is less than required.
+inline fn skipKernelLessThan(required: std.SemanticVersion) !void {
+ if (builtin.os.tag != .linux) return error.SkipZigTest;
+
+ var uts: linux.utsname = undefined;
+ const res = linux.uname(&uts);
+ switch (linux.getErrno(res)) {
+ .SUCCESS => {},
+ else => |errno| return os.unexpectedErrno(errno),
+ }
+
+ const release = mem.sliceTo(&uts.release, 0);
+ var current = try std.SemanticVersion.parse(release);
+ current.pre = null; // don't check pre field
+ if (required.order(current) == .gt) return error.SkipZigTest;
+}
diff --git a/lib/std/os/linux/io_uring.zig b/lib/std/os/linux/io_uring.zig
@@ -1,4228 +0,0 @@
-const std = @import("../../std.zig");
-const builtin = @import("builtin");
-const assert = std.debug.assert;
-const mem = std.mem;
-const net = std.net;
-const os = std.os;
-const posix = std.posix;
-const linux = os.linux;
-const testing = std.testing;
-
-pub const IO_Uring = struct {
- fd: os.fd_t = -1,
- sq: SubmissionQueue,
- cq: CompletionQueue,
- flags: u32,
- features: u32,
-
- /// A friendly way to setup an io_uring, with default linux.io_uring_params.
- /// `entries` must be a power of two between 1 and 32768, although the kernel will make the final
- /// call on how many entries the submission and completion queues will ultimately have,
- /// see https://github.com/torvalds/linux/blob/v5.8/fs/io_uring.c#L8027-L8050.
- /// Matches the interface of io_uring_queue_init() in liburing.
- pub fn init(entries: u16, flags: u32) !IO_Uring {
- var params = mem.zeroInit(linux.io_uring_params, .{
- .flags = flags,
- .sq_thread_idle = 1000,
- });
- return try IO_Uring.init_params(entries, ¶ms);
- }
-
- /// A powerful way to setup an io_uring, if you want to tweak linux.io_uring_params such as submission
- /// queue thread cpu affinity or thread idle timeout (the kernel and our default is 1 second).
- /// `params` is passed by reference because the kernel needs to modify the parameters.
- /// Matches the interface of io_uring_queue_init_params() in liburing.
- pub fn init_params(entries: u16, p: *linux.io_uring_params) !IO_Uring {
- if (entries == 0) return error.EntriesZero;
- if (!std.math.isPowerOfTwo(entries)) return error.EntriesNotPowerOfTwo;
-
- assert(p.sq_entries == 0);
- assert(p.cq_entries == 0 or p.flags & linux.IORING_SETUP_CQSIZE != 0);
- assert(p.features == 0);
- assert(p.wq_fd == 0 or p.flags & linux.IORING_SETUP_ATTACH_WQ != 0);
- assert(p.resv[0] == 0);
- assert(p.resv[1] == 0);
- assert(p.resv[2] == 0);
-
- const res = linux.io_uring_setup(entries, p);
- switch (linux.getErrno(res)) {
- .SUCCESS => {},
- .FAULT => return error.ParamsOutsideAccessibleAddressSpace,
- // The resv array contains non-zero data, p.flags contains an unsupported flag,
- // entries out of bounds, IORING_SETUP_SQ_AFF was specified without IORING_SETUP_SQPOLL,
- // or IORING_SETUP_CQSIZE was specified but linux.io_uring_params.cq_entries was invalid:
- .INVAL => return error.ArgumentsInvalid,
- .MFILE => return error.ProcessFdQuotaExceeded,
- .NFILE => return error.SystemFdQuotaExceeded,
- .NOMEM => return error.SystemResources,
- // IORING_SETUP_SQPOLL was specified but effective user ID lacks sufficient privileges,
- // or a container seccomp policy prohibits io_uring syscalls:
- .PERM => return error.PermissionDenied,
- .NOSYS => return error.SystemOutdated,
- else => |errno| return os.unexpectedErrno(errno),
- }
- const fd = @as(os.fd_t, @intCast(res));
- assert(fd >= 0);
- errdefer os.close(fd);
-
- // Kernel versions 5.4 and up use only one mmap() for the submission and completion queues.
- // This is not an optional feature for us... if the kernel does it, we have to do it.
- // The thinking on this by the kernel developers was that both the submission and the
- // completion queue rings have sizes just over a power of two, but the submission queue ring
- // is significantly smaller with u32 slots. By bundling both in a single mmap, the kernel
- // gets the submission queue ring for free.
- // See https://patchwork.kernel.org/patch/11115257 for the kernel patch.
- // We do not support the double mmap() done before 5.4, because we want to keep the
- // init/deinit mmap paths simple and because io_uring has had many bug fixes even since 5.4.
- if ((p.features & linux.IORING_FEAT_SINGLE_MMAP) == 0) {
- return error.SystemOutdated;
- }
-
- // Check that the kernel has actually set params and that "impossible is nothing".
- assert(p.sq_entries != 0);
- assert(p.cq_entries != 0);
- assert(p.cq_entries >= p.sq_entries);
-
- // From here on, we only need to read from params, so pass `p` by value as immutable.
- // The completion queue shares the mmap with the submission queue, so pass `sq` there too.
- var sq = try SubmissionQueue.init(fd, p.*);
- errdefer sq.deinit();
- var cq = try CompletionQueue.init(fd, p.*, sq);
- errdefer cq.deinit();
-
- // Check that our starting state is as we expect.
- assert(sq.head.* == 0);
- assert(sq.tail.* == 0);
- assert(sq.mask == p.sq_entries - 1);
- // Allow flags.* to be non-zero, since the kernel may set IORING_SQ_NEED_WAKEUP at any time.
- assert(sq.dropped.* == 0);
- assert(sq.array.len == p.sq_entries);
- assert(sq.sqes.len == p.sq_entries);
- assert(sq.sqe_head == 0);
- assert(sq.sqe_tail == 0);
-
- assert(cq.head.* == 0);
- assert(cq.tail.* == 0);
- assert(cq.mask == p.cq_entries - 1);
- assert(cq.overflow.* == 0);
- assert(cq.cqes.len == p.cq_entries);
-
- return IO_Uring{
- .fd = fd,
- .sq = sq,
- .cq = cq,
- .flags = p.flags,
- .features = p.features,
- };
- }
-
- pub fn deinit(self: *IO_Uring) void {
- assert(self.fd >= 0);
- // The mmaps depend on the fd, so the order of these calls is important:
- self.cq.deinit();
- self.sq.deinit();
- os.close(self.fd);
- self.fd = -1;
- }
-
- /// Returns a pointer to a vacant SQE, or an error if the submission queue is full.
- /// We follow the implementation (and atomics) of liburing's `io_uring_get_sqe()` exactly.
- /// However, instead of a null we return an error to force safe handling.
- /// Any situation where the submission queue is full tends more towards a control flow error,
- /// and the null return in liburing is more a C idiom than anything else, for lack of a better
- /// alternative. In Zig, we have first-class error handling... so let's use it.
- /// Matches the implementation of io_uring_get_sqe() in liburing.
- pub fn get_sqe(self: *IO_Uring) !*linux.io_uring_sqe {
- const head = @atomicLoad(u32, self.sq.head, .Acquire);
- // Remember that these head and tail offsets wrap around every four billion operations.
- // We must therefore use wrapping addition and subtraction to avoid a runtime crash.
- const next = self.sq.sqe_tail +% 1;
- if (next -% head > self.sq.sqes.len) return error.SubmissionQueueFull;
- const sqe = &self.sq.sqes[self.sq.sqe_tail & self.sq.mask];
- self.sq.sqe_tail = next;
- return sqe;
- }
-
- /// Submits the SQEs acquired via get_sqe() to the kernel. You can call this once after you have
- /// called get_sqe() multiple times to setup multiple I/O requests.
- /// Returns the number of SQEs submitted, if not used alongside IORING_SETUP_SQPOLL.
- /// If the io_uring instance is uses IORING_SETUP_SQPOLL, the value returned on success is not
- /// guaranteed to match the amount of actually submitted sqes during this call. A value higher
- /// or lower, including 0, may be returned.
- /// Matches the implementation of io_uring_submit() in liburing.
- pub fn submit(self: *IO_Uring) !u32 {
- return self.submit_and_wait(0);
- }
-
- /// Like submit(), but allows waiting for events as well.
- /// Returns the number of SQEs submitted.
- /// Matches the implementation of io_uring_submit_and_wait() in liburing.
- pub fn submit_and_wait(self: *IO_Uring, wait_nr: u32) !u32 {
- const submitted = self.flush_sq();
- var flags: u32 = 0;
- if (self.sq_ring_needs_enter(&flags) or wait_nr > 0) {
- if (wait_nr > 0 or (self.flags & linux.IORING_SETUP_IOPOLL) != 0) {
- flags |= linux.IORING_ENTER_GETEVENTS;
- }
- return try self.enter(submitted, wait_nr, flags);
- }
- return submitted;
- }
-
- /// Tell the kernel we have submitted SQEs and/or want to wait for CQEs.
- /// Returns the number of SQEs submitted.
- pub fn enter(self: *IO_Uring, to_submit: u32, min_complete: u32, flags: u32) !u32 {
- assert(self.fd >= 0);
- const res = linux.io_uring_enter(self.fd, to_submit, min_complete, flags, null);
- switch (linux.getErrno(res)) {
- .SUCCESS => {},
- // The kernel was unable to allocate memory or ran out of resources for the request.
- // The application should wait for some completions and try again:
- .AGAIN => return error.SystemResources,
- // The SQE `fd` is invalid, or IOSQE_FIXED_FILE was set but no files were registered:
- .BADF => return error.FileDescriptorInvalid,
- // The file descriptor is valid, but the ring is not in the right state.
- // See io_uring_register(2) for how to enable the ring.
- .BADFD => return error.FileDescriptorInBadState,
- // The application attempted to overcommit the number of requests it can have pending.
- // The application should wait for some completions and try again:
- .BUSY => return error.CompletionQueueOvercommitted,
- // The SQE is invalid, or valid but the ring was setup with IORING_SETUP_IOPOLL:
- .INVAL => return error.SubmissionQueueEntryInvalid,
- // The buffer is outside the process' accessible address space, or IORING_OP_READ_FIXED
- // or IORING_OP_WRITE_FIXED was specified but no buffers were registered, or the range
- // described by `addr` and `len` is not within the buffer registered at `buf_index`:
- .FAULT => return error.BufferInvalid,
- .NXIO => return error.RingShuttingDown,
- // The kernel believes our `self.fd` does not refer to an io_uring instance,
- // or the opcode is valid but not supported by this kernel (more likely):
- .OPNOTSUPP => return error.OpcodeNotSupported,
- // The operation was interrupted by a delivery of a signal before it could complete.
- // This can happen while waiting for events with IORING_ENTER_GETEVENTS:
- .INTR => return error.SignalInterrupt,
- else => |errno| return os.unexpectedErrno(errno),
- }
- return @as(u32, @intCast(res));
- }
-
- /// Sync internal state with kernel ring state on the SQ side.
- /// Returns the number of all pending events in the SQ ring, for the shared ring.
- /// This return value includes previously flushed SQEs, as per liburing.
- /// The rationale is to suggest that an io_uring_enter() call is needed rather than not.
- /// Matches the implementation of __io_uring_flush_sq() in liburing.
- pub fn flush_sq(self: *IO_Uring) u32 {
- if (self.sq.sqe_head != self.sq.sqe_tail) {
- // Fill in SQEs that we have queued up, adding them to the kernel ring.
- const to_submit = self.sq.sqe_tail -% self.sq.sqe_head;
- var tail = self.sq.tail.*;
- var i: usize = 0;
- while (i < to_submit) : (i += 1) {
- self.sq.array[tail & self.sq.mask] = self.sq.sqe_head & self.sq.mask;
- tail +%= 1;
- self.sq.sqe_head +%= 1;
- }
- // Ensure that the kernel can actually see the SQE updates when it sees the tail update.
- @atomicStore(u32, self.sq.tail, tail, .Release);
- }
- return self.sq_ready();
- }
-
- /// Returns true if we are not using an SQ thread (thus nobody submits but us),
- /// or if IORING_SQ_NEED_WAKEUP is set and the SQ thread must be explicitly awakened.
- /// For the latter case, we set the SQ thread wakeup flag.
- /// Matches the implementation of sq_ring_needs_enter() in liburing.
- pub fn sq_ring_needs_enter(self: *IO_Uring, flags: *u32) bool {
- assert(flags.* == 0);
- if ((self.flags & linux.IORING_SETUP_SQPOLL) == 0) return true;
- if ((@atomicLoad(u32, self.sq.flags, .Unordered) & linux.IORING_SQ_NEED_WAKEUP) != 0) {
- flags.* |= linux.IORING_ENTER_SQ_WAKEUP;
- return true;
- }
- return false;
- }
-
- /// Returns the number of flushed and unflushed SQEs pending in the submission queue.
- /// In other words, this is the number of SQEs in the submission queue, i.e. its length.
- /// These are SQEs that the kernel is yet to consume.
- /// Matches the implementation of io_uring_sq_ready in liburing.
- pub fn sq_ready(self: *IO_Uring) u32 {
- // Always use the shared ring state (i.e. head and not sqe_head) to avoid going out of sync,
- // see https://github.com/axboe/liburing/issues/92.
- return self.sq.sqe_tail -% @atomicLoad(u32, self.sq.head, .Acquire);
- }
-
- /// Returns the number of CQEs in the completion queue, i.e. its length.
- /// These are CQEs that the application is yet to consume.
- /// Matches the implementation of io_uring_cq_ready in liburing.
- pub fn cq_ready(self: *IO_Uring) u32 {
- return @atomicLoad(u32, self.cq.tail, .Acquire) -% self.cq.head.*;
- }
-
- /// Copies as many CQEs as are ready, and that can fit into the destination `cqes` slice.
- /// If none are available, enters into the kernel to wait for at most `wait_nr` CQEs.
- /// Returns the number of CQEs copied, advancing the CQ ring.
- /// Provides all the wait/peek methods found in liburing, but with batching and a single method.
- /// The rationale for copying CQEs rather than copying pointers is that pointers are 8 bytes
- /// whereas CQEs are not much more at only 16 bytes, and this provides a safer faster interface.
- /// Safer, because you no longer need to call cqe_seen(), avoiding idempotency bugs.
- /// Faster, because we can now amortize the atomic store release to `cq.head` across the batch.
- /// See https://github.com/axboe/liburing/issues/103#issuecomment-686665007.
- /// Matches the implementation of io_uring_peek_batch_cqe() in liburing, but supports waiting.
- pub fn copy_cqes(self: *IO_Uring, cqes: []linux.io_uring_cqe, wait_nr: u32) !u32 {
- const count = self.copy_cqes_ready(cqes);
- if (count > 0) return count;
- if (self.cq_ring_needs_flush() or wait_nr > 0) {
- _ = try self.enter(0, wait_nr, linux.IORING_ENTER_GETEVENTS);
- return self.copy_cqes_ready(cqes);
- }
- return 0;
- }
-
- fn copy_cqes_ready(self: *IO_Uring, cqes: []linux.io_uring_cqe) u32 {
- const ready = self.cq_ready();
- const count = @min(cqes.len, ready);
- const head = self.cq.head.* & self.cq.mask;
- const tail = (self.cq.head.* +% count) & self.cq.mask;
-
- if (head <= tail) {
- // head behind tail -> no wrapping
- @memcpy(cqes[0..count], self.cq.cqes[head..tail]);
- } else {
- // head in front of tail -> buffer wraps
- const two_copies_required: bool = self.cq.cqes.len - head < count;
- const amount_to_copy_in_first = if (two_copies_required) self.cq.cqes.len - head else count;
- @memcpy(cqes[0..amount_to_copy_in_first], self.cq.cqes[head .. head + amount_to_copy_in_first]);
- if (two_copies_required) {
- @memcpy(cqes[amount_to_copy_in_first..count], self.cq.cqes[0..tail]);
- }
- }
-
- self.cq_advance(count);
- return count;
- }
-
- /// Returns a copy of an I/O completion, waiting for it if necessary, and advancing the CQ ring.
- /// A convenience method for `copy_cqes()` for when you don't need to batch or peek.
- pub fn copy_cqe(ring: *IO_Uring) !linux.io_uring_cqe {
- var cqes: [1]linux.io_uring_cqe = undefined;
- while (true) {
- const count = try ring.copy_cqes(&cqes, 1);
- if (count > 0) return cqes[0];
- }
- }
-
- /// Matches the implementation of cq_ring_needs_flush() in liburing.
- pub fn cq_ring_needs_flush(self: *IO_Uring) bool {
- return (@atomicLoad(u32, self.sq.flags, .Unordered) & linux.IORING_SQ_CQ_OVERFLOW) != 0;
- }
-
- /// For advanced use cases only that implement custom completion queue methods.
- /// If you use copy_cqes() or copy_cqe() you must not call cqe_seen() or cq_advance().
- /// Must be called exactly once after a zero-copy CQE has been processed by your application.
- /// Not idempotent, calling more than once will result in other CQEs being lost.
- /// Matches the implementation of cqe_seen() in liburing.
- pub fn cqe_seen(self: *IO_Uring, cqe: *linux.io_uring_cqe) void {
- _ = cqe;
- self.cq_advance(1);
- }
-
- /// For advanced use cases only that implement custom completion queue methods.
- /// Matches the implementation of cq_advance() in liburing.
- pub fn cq_advance(self: *IO_Uring, count: u32) void {
- if (count > 0) {
- // Ensure the kernel only sees the new head value after the CQEs have been read.
- @atomicStore(u32, self.cq.head, self.cq.head.* +% count, .Release);
- }
- }
-
- /// Queues (but does not submit) an SQE to perform an `fsync(2)`.
- /// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
- /// For example, for `fdatasync()` you can set `IORING_FSYNC_DATASYNC` in the SQE's `rw_flags`.
- /// N.B. While SQEs are initiated in the order in which they appear in the submission queue,
- /// operations execute in parallel and completions are unordered. Therefore, an application that
- /// submits a write followed by an fsync in the submission queue cannot expect the fsync to
- /// apply to the write, since the fsync may complete before the write is issued to the disk.
- /// You should preferably use `link_with_next_sqe()` on a write's SQE to link it with an fsync,
- /// or else insert a full write barrier using `drain_previous_sqes()` when queueing an fsync.
- pub fn fsync(self: *IO_Uring, user_data: u64, fd: os.fd_t, flags: u32) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_fsync(sqe, fd, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a no-op.
- /// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
- /// A no-op is more useful than may appear at first glance.
- /// For example, you could call `drain_previous_sqes()` on the returned SQE, to use the no-op to
- /// know when the ring is idle before acting on a kill signal.
- pub fn nop(self: *IO_Uring, user_data: u64) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_nop(sqe);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Used to select how the read should be handled.
- pub const ReadBuffer = union(enum) {
- /// io_uring will read directly into this buffer
- buffer: []u8,
-
- /// io_uring will read directly into these buffers using readv.
- iovecs: []const os.iovec,
-
- /// io_uring will select a buffer that has previously been provided with `provide_buffers`.
- /// The buffer group reference by `group_id` must contain at least one buffer for the read to work.
- /// `len` controls the number of bytes to read into the selected buffer.
- buffer_selection: struct {
- group_id: u16,
- len: usize,
- },
- };
-
- /// Queues (but does not submit) an SQE to perform a `read(2)` or `preadv(2)` depending on the buffer type.
- /// * Reading into a `ReadBuffer.buffer` uses `read(2)`
- /// * Reading into a `ReadBuffer.iovecs` uses `preadv(2)`
- /// If you want to do a `preadv2(2)` then set `rw_flags` on the returned SQE. See https://man7.org/linux/man-pages/man2/preadv2.2.html
- ///
- /// Returns a pointer to the SQE.
- pub fn read(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- buffer: ReadBuffer,
- offset: u64,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- switch (buffer) {
- .buffer => |slice| io_uring_prep_read(sqe, fd, slice, offset),
- .iovecs => |vecs| io_uring_prep_readv(sqe, fd, vecs, offset),
- .buffer_selection => |selection| {
- io_uring_prep_rw(.READ, sqe, fd, 0, selection.len, offset);
- sqe.flags |= linux.IOSQE_BUFFER_SELECT;
- sqe.buf_index = selection.group_id;
- },
- }
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `write(2)`.
- /// Returns a pointer to the SQE.
- pub fn write(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- buffer: []const u8,
- offset: u64,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_write(sqe, fd, buffer, offset);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `splice(2)`
- /// Either `fd_in` or `fd_out` must be a pipe.
- /// If `fd_in` refers to a pipe, `off_in` is ignored and must be set to std.math.maxInt(u64).
- /// If `fd_in` does not refer to a pipe and `off_in` is maxInt(u64), then `len` are read
- /// from `fd_in` starting from the file offset, which is incremented by the number of bytes read.
- /// If `fd_in` does not refer to a pipe and `off_in` is not maxInt(u64), then the starting offset of `fd_in` will be `off_in`.
- /// This splice operation can be used to implement sendfile by splicing to an intermediate pipe first,
- /// then splice to the final destination. In fact, the implementation of sendfile in kernel uses splice internally.
- ///
- /// NOTE that even if fd_in or fd_out refers to a pipe, the splice operation can still fail with EINVAL if one of the
- /// fd doesn't explicitly support splice peration, e.g. reading from terminal is unsupported from kernel 5.7 to 5.11.
- /// See https://github.com/axboe/liburing/issues/291
- ///
- /// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
- pub fn splice(self: *IO_Uring, user_data: u64, fd_in: os.fd_t, off_in: u64, fd_out: os.fd_t, off_out: u64, len: usize) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_splice(sqe, fd_in, off_in, fd_out, off_out, len);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a IORING_OP_READ_FIXED.
- /// The `buffer` provided must be registered with the kernel by calling `register_buffers` first.
- /// The `buffer_index` must be the same as its index in the array provided to `register_buffers`.
- ///
- /// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
- pub fn read_fixed(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- buffer: *os.iovec,
- offset: u64,
- buffer_index: u16,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_read_fixed(sqe, fd, buffer, offset, buffer_index);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `pwritev()`.
- /// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
- /// For example, if you want to do a `pwritev2()` then set `rw_flags` on the returned SQE.
- /// See https://linux.die.net/man/2/pwritev.
- pub fn writev(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- iovecs: []const os.iovec_const,
- offset: u64,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_writev(sqe, fd, iovecs, offset);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a IORING_OP_WRITE_FIXED.
- /// The `buffer` provided must be registered with the kernel by calling `register_buffers` first.
- /// The `buffer_index` must be the same as its index in the array provided to `register_buffers`.
- ///
- /// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases.
- pub fn write_fixed(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- buffer: *os.iovec,
- offset: u64,
- buffer_index: u16,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_write_fixed(sqe, fd, buffer, offset, buffer_index);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform an `accept4(2)` on a socket.
- /// Returns a pointer to the SQE.
- /// Available since 5.5
- pub fn accept(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- addr: ?*os.sockaddr,
- addrlen: ?*os.socklen_t,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues an multishot accept on a socket.
- ///
- /// Multishot variant allows an application to issue a single accept request,
- /// which will repeatedly trigger a CQE when a connection request comes in.
- /// While IORING_CQE_F_MORE flag is set in CQE flags accept will generate
- /// further CQEs.
- ///
- /// Available since 5.19
- pub fn accept_multishot(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- addr: ?*os.sockaddr,
- addrlen: ?*os.socklen_t,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_multishot_accept(sqe, fd, addr, addrlen, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues an accept using direct (registered) file descriptors.
- ///
- /// To use an accept direct variant, the application must first have registered
- /// a file table (with register_files). An unused table index will be
- /// dynamically chosen and returned in the CQE res field.
- ///
- /// After creation, they can be used by setting IOSQE_FIXED_FILE in the SQE
- /// flags member, and setting the SQE fd field to the direct descriptor value
- /// rather than the regular file descriptor.
- ///
- /// Available since 5.19
- pub fn accept_direct(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- addr: ?*os.sockaddr,
- addrlen: ?*os.socklen_t,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_accept_direct(sqe, fd, addr, addrlen, flags, linux.IORING_FILE_INDEX_ALLOC);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues an multishot accept using direct (registered) file descriptors.
- /// Available since 5.19
- pub fn accept_multishot_direct(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- addr: ?*os.sockaddr,
- addrlen: ?*os.socklen_t,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_multishot_accept_direct(sqe, fd, addr, addrlen, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queue (but does not submit) an SQE to perform a `connect(2)` on a socket.
- /// Returns a pointer to the SQE.
- pub fn connect(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- addr: *const os.sockaddr,
- addrlen: os.socklen_t,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_connect(sqe, fd, addr, addrlen);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `epoll_ctl(2)`.
- /// Returns a pointer to the SQE.
- pub fn epoll_ctl(
- self: *IO_Uring,
- user_data: u64,
- epfd: os.fd_t,
- fd: os.fd_t,
- op: u32,
- ev: ?*linux.epoll_event,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_epoll_ctl(sqe, epfd, fd, op, ev);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Used to select how the recv call should be handled.
- pub const RecvBuffer = union(enum) {
- /// io_uring will recv directly into this buffer
- buffer: []u8,
-
- /// io_uring will select a buffer that has previously been provided with `provide_buffers`.
- /// The buffer group referenced by `group_id` must contain at least one buffer for the recv call to work.
- /// `len` controls the number of bytes to read into the selected buffer.
- buffer_selection: struct {
- group_id: u16,
- len: usize,
- },
- };
-
- /// Queues (but does not submit) an SQE to perform a `recv(2)`.
- /// Returns a pointer to the SQE.
- /// Available since 5.6
- pub fn recv(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- buffer: RecvBuffer,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- switch (buffer) {
- .buffer => |slice| io_uring_prep_recv(sqe, fd, slice, flags),
- .buffer_selection => |selection| {
- io_uring_prep_rw(.RECV, sqe, fd, 0, selection.len, 0);
- sqe.rw_flags = flags;
- sqe.flags |= linux.IOSQE_BUFFER_SELECT;
- sqe.buf_index = selection.group_id;
- },
- }
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `send(2)`.
- /// Returns a pointer to the SQE.
- /// Available since 5.6
- pub fn send(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- buffer: []const u8,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_send(sqe, fd, buffer, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform an async zerocopy `send(2)`.
- ///
- /// This operation will most likely produce two CQEs. The flags field of the
- /// first cqe may likely contain IORING_CQE_F_MORE, which means that there will
- /// be a second cqe with the user_data field set to the same value. The user
- /// must not modify the data buffer until the notification is posted. The first
- /// cqe follows the usual rules and so its res field will contain the number of
- /// bytes sent or a negative error code. The notification's res field will be
- /// set to zero and the flags field will contain IORING_CQE_F_NOTIF. The two
- /// step model is needed because the kernel may hold on to buffers for a long
- /// time, e.g. waiting for a TCP ACK. Notifications responsible for controlling
- /// the lifetime of the buffers. Even errored requests may generate a
- /// notification.
- ///
- /// Available since 6.0
- pub fn send_zc(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- buffer: []const u8,
- send_flags: u32,
- zc_flags: u16,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_send_zc(sqe, fd, buffer, send_flags, zc_flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform an async zerocopy `send(2)`.
- /// Returns a pointer to the SQE.
- /// Available since 6.0
- pub fn send_zc_fixed(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- buffer: []const u8,
- send_flags: u32,
- zc_flags: u16,
- buf_index: u16,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_send_zc_fixed(sqe, fd, buffer, send_flags, zc_flags, buf_index);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `recvmsg(2)`.
- /// Returns a pointer to the SQE.
- /// Available since 5.3
- pub fn recvmsg(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- msg: *os.msghdr,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_recvmsg(sqe, fd, msg, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `sendmsg(2)`.
- /// Returns a pointer to the SQE.
- /// Available since 5.3
- pub fn sendmsg(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- msg: *const os.msghdr_const,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_sendmsg(sqe, fd, msg, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform an async zerocopy `sendmsg(2)`.
- /// Returns a pointer to the SQE.
- /// Available since 6.1
- pub fn sendmsg_zc(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- msg: *const os.msghdr_const,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_sendmsg_zc(sqe, fd, msg, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform an `openat(2)`.
- /// Returns a pointer to the SQE.
- /// Available since 5.6.
- pub fn openat(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- path: [*:0]const u8,
- flags: linux.O,
- mode: os.mode_t,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_openat(sqe, fd, path, flags, mode);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues an openat using direct (registered) file descriptors.
- ///
- /// To use an accept direct variant, the application must first have registered
- /// a file table (with register_files). An unused table index will be
- /// dynamically chosen and returned in the CQE res field.
- ///
- /// After creation, they can be used by setting IOSQE_FIXED_FILE in the SQE
- /// flags member, and setting the SQE fd field to the direct descriptor value
- /// rather than the regular file descriptor.
- ///
- /// Available since 5.15
- pub fn openat_direct(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- path: [*:0]const u8,
- flags: linux.O,
- mode: os.mode_t,
- file_index: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_openat_direct(sqe, fd, path, flags, mode, file_index);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `close(2)`.
- /// Returns a pointer to the SQE.
- /// Available since 5.6.
- pub fn close(self: *IO_Uring, user_data: u64, fd: os.fd_t) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_close(sqe, fd);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues close of registered file descriptor.
- /// Available since 5.15
- pub fn close_direct(self: *IO_Uring, user_data: u64, file_index: u32) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_close_direct(sqe, file_index);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to register a timeout operation.
- /// Returns a pointer to the SQE.
- ///
- /// The timeout will complete when either the timeout expires, or after the specified number of
- /// events complete (if `count` is greater than `0`).
- ///
- /// `flags` may be `0` for a relative timeout, or `IORING_TIMEOUT_ABS` for an absolute timeout.
- ///
- /// The completion event result will be `-ETIME` if the timeout completed through expiration,
- /// `0` if the timeout completed after the specified number of events, or `-ECANCELED` if the
- /// timeout was removed before it expired.
- ///
- /// io_uring timeouts use the `CLOCK.MONOTONIC` clock source.
- pub fn timeout(
- self: *IO_Uring,
- user_data: u64,
- ts: *const os.linux.kernel_timespec,
- count: u32,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_timeout(sqe, ts, count, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to remove an existing timeout operation.
- /// Returns a pointer to the SQE.
- ///
- /// The timeout is identified by its `user_data`.
- ///
- /// The completion event result will be `0` if the timeout was found and cancelled successfully,
- /// `-EBUSY` if the timeout was found but expiration was already in progress, or
- /// `-ENOENT` if the timeout was not found.
- pub fn timeout_remove(
- self: *IO_Uring,
- user_data: u64,
- timeout_user_data: u64,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_timeout_remove(sqe, timeout_user_data, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to add a link timeout operation.
- /// Returns a pointer to the SQE.
- ///
- /// You need to set linux.IOSQE_IO_LINK to flags of the target operation
- /// and then call this method right after the target operation.
- /// See https://lwn.net/Articles/803932/ for detail.
- ///
- /// If the dependent request finishes before the linked timeout, the timeout
- /// is canceled. If the timeout finishes before the dependent request, the
- /// dependent request will be canceled.
- ///
- /// The completion event result of the link_timeout will be
- /// `-ETIME` if the timeout finishes before the dependent request
- /// (in this case, the completion event result of the dependent request will
- /// be `-ECANCELED`), or
- /// `-EALREADY` if the dependent request finishes before the linked timeout.
- pub fn link_timeout(
- self: *IO_Uring,
- user_data: u64,
- ts: *const os.linux.kernel_timespec,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_link_timeout(sqe, ts, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `poll(2)`.
- /// Returns a pointer to the SQE.
- pub fn poll_add(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- poll_mask: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_poll_add(sqe, fd, poll_mask);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to remove an existing poll operation.
- /// Returns a pointer to the SQE.
- pub fn poll_remove(
- self: *IO_Uring,
- user_data: u64,
- target_user_data: u64,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_poll_remove(sqe, target_user_data);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to update the user data of an existing poll
- /// operation. Returns a pointer to the SQE.
- pub fn poll_update(
- self: *IO_Uring,
- user_data: u64,
- old_user_data: u64,
- new_user_data: u64,
- poll_mask: u32,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_poll_update(sqe, old_user_data, new_user_data, poll_mask, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform an `fallocate(2)`.
- /// Returns a pointer to the SQE.
- pub fn fallocate(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- mode: i32,
- offset: u64,
- len: u64,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_fallocate(sqe, fd, mode, offset, len);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform an `statx(2)`.
- /// Returns a pointer to the SQE.
- pub fn statx(
- self: *IO_Uring,
- user_data: u64,
- fd: os.fd_t,
- path: [:0]const u8,
- flags: u32,
- mask: u32,
- buf: *linux.Statx,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_statx(sqe, fd, path, flags, mask, buf);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to remove an existing operation.
- /// Returns a pointer to the SQE.
- ///
- /// The operation is identified by its `user_data`.
- ///
- /// The completion event result will be `0` if the operation was found and cancelled successfully,
- /// `-EALREADY` if the operation was found but was already in progress, or
- /// `-ENOENT` if the operation was not found.
- pub fn cancel(
- self: *IO_Uring,
- user_data: u64,
- cancel_user_data: u64,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_cancel(sqe, cancel_user_data, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `shutdown(2)`.
- /// Returns a pointer to the SQE.
- ///
- /// The operation is identified by its `user_data`.
- pub fn shutdown(
- self: *IO_Uring,
- user_data: u64,
- sockfd: os.socket_t,
- how: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_shutdown(sqe, sockfd, how);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `renameat2(2)`.
- /// Returns a pointer to the SQE.
- pub fn renameat(
- self: *IO_Uring,
- user_data: u64,
- old_dir_fd: os.fd_t,
- old_path: [*:0]const u8,
- new_dir_fd: os.fd_t,
- new_path: [*:0]const u8,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_renameat(sqe, old_dir_fd, old_path, new_dir_fd, new_path, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `unlinkat(2)`.
- /// Returns a pointer to the SQE.
- pub fn unlinkat(
- self: *IO_Uring,
- user_data: u64,
- dir_fd: os.fd_t,
- path: [*:0]const u8,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_unlinkat(sqe, dir_fd, path, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `mkdirat(2)`.
- /// Returns a pointer to the SQE.
- pub fn mkdirat(
- self: *IO_Uring,
- user_data: u64,
- dir_fd: os.fd_t,
- path: [*:0]const u8,
- mode: os.mode_t,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_mkdirat(sqe, dir_fd, path, mode);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `symlinkat(2)`.
- /// Returns a pointer to the SQE.
- pub fn symlinkat(
- self: *IO_Uring,
- user_data: u64,
- target: [*:0]const u8,
- new_dir_fd: os.fd_t,
- link_path: [*:0]const u8,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_symlinkat(sqe, target, new_dir_fd, link_path);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `linkat(2)`.
- /// Returns a pointer to the SQE.
- pub fn linkat(
- self: *IO_Uring,
- user_data: u64,
- old_dir_fd: os.fd_t,
- old_path: [*:0]const u8,
- new_dir_fd: os.fd_t,
- new_path: [*:0]const u8,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_linkat(sqe, old_dir_fd, old_path, new_dir_fd, new_path, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to provide a group of buffers used for commands that read/receive data.
- /// Returns a pointer to the SQE.
- ///
- /// Provided buffers can be used in `read`, `recv` or `recvmsg` commands via .buffer_selection.
- ///
- /// The kernel expects a contiguous block of memory of size (buffers_count * buffer_size).
- pub fn provide_buffers(
- self: *IO_Uring,
- user_data: u64,
- buffers: [*]u8,
- buffer_size: usize,
- buffers_count: usize,
- group_id: usize,
- buffer_id: usize,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_provide_buffers(sqe, buffers, buffer_size, buffers_count, group_id, buffer_id);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to remove a group of provided buffers.
- /// Returns a pointer to the SQE.
- pub fn remove_buffers(
- self: *IO_Uring,
- user_data: u64,
- buffers_count: usize,
- group_id: usize,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_remove_buffers(sqe, buffers_count, group_id);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Queues (but does not submit) an SQE to perform a `waitid(2)`.
- /// Returns a pointer to the SQE.
- pub fn waitid(
- self: *IO_Uring,
- user_data: u64,
- id_type: linux.P,
- id: i32,
- infop: *linux.siginfo_t,
- options: u32,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_waitid(sqe, id_type, id, infop, options, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Registers an array of file descriptors.
- /// Every time a file descriptor is put in an SQE and submitted to the kernel, the kernel must
- /// retrieve a reference to the file, and once I/O has completed the file reference must be
- /// dropped. The atomic nature of this file reference can be a slowdown for high IOPS workloads.
- /// This slowdown can be avoided by pre-registering file descriptors.
- /// To refer to a registered file descriptor, IOSQE_FIXED_FILE must be set in the SQE's flags,
- /// and the SQE's fd must be set to the index of the file descriptor in the registered array.
- /// Registering file descriptors will wait for the ring to idle.
- /// Files are automatically unregistered by the kernel when the ring is torn down.
- /// An application need unregister only if it wants to register a new array of file descriptors.
- pub fn register_files(self: *IO_Uring, fds: []const os.fd_t) !void {
- assert(self.fd >= 0);
- const res = linux.io_uring_register(
- self.fd,
- .REGISTER_FILES,
- @as(*const anyopaque, @ptrCast(fds.ptr)),
- @as(u32, @intCast(fds.len)),
- );
- try handle_registration_result(res);
- }
-
- /// Updates registered file descriptors.
- ///
- /// Updates are applied starting at the provided offset in the original file descriptors slice.
- /// There are three kind of updates:
- /// * turning a sparse entry (where the fd is -1) into a real one
- /// * removing an existing entry (set the fd to -1)
- /// * replacing an existing entry with a new fd
- /// Adding new file descriptors must be done with `register_files`.
- pub fn register_files_update(self: *IO_Uring, offset: u32, fds: []const os.fd_t) !void {
- assert(self.fd >= 0);
-
- const FilesUpdate = extern struct {
- offset: u32,
- resv: u32,
- fds: u64 align(8),
- };
- var update = FilesUpdate{
- .offset = offset,
- .resv = @as(u32, 0),
- .fds = @as(u64, @intFromPtr(fds.ptr)),
- };
-
- const res = linux.io_uring_register(
- self.fd,
- .REGISTER_FILES_UPDATE,
- @as(*const anyopaque, @ptrCast(&update)),
- @as(u32, @intCast(fds.len)),
- );
- try handle_registration_result(res);
- }
-
- /// Registers the file descriptor for an eventfd that will be notified of completion events on
- /// an io_uring instance.
- /// Only a single a eventfd can be registered at any given point in time.
- pub fn register_eventfd(self: *IO_Uring, fd: os.fd_t) !void {
- assert(self.fd >= 0);
- const res = linux.io_uring_register(
- self.fd,
- .REGISTER_EVENTFD,
- @as(*const anyopaque, @ptrCast(&fd)),
- 1,
- );
- try handle_registration_result(res);
- }
-
- /// Registers the file descriptor for an eventfd that will be notified of completion events on
- /// an io_uring instance. Notifications are only posted for events that complete in an async manner.
- /// This means that events that complete inline while being submitted do not trigger a notification event.
- /// Only a single eventfd can be registered at any given point in time.
- pub fn register_eventfd_async(self: *IO_Uring, fd: os.fd_t) !void {
- assert(self.fd >= 0);
- const res = linux.io_uring_register(
- self.fd,
- .REGISTER_EVENTFD_ASYNC,
- @as(*const anyopaque, @ptrCast(&fd)),
- 1,
- );
- try handle_registration_result(res);
- }
-
- /// Unregister the registered eventfd file descriptor.
- pub fn unregister_eventfd(self: *IO_Uring) !void {
- assert(self.fd >= 0);
- const res = linux.io_uring_register(
- self.fd,
- .UNREGISTER_EVENTFD,
- null,
- 0,
- );
- try handle_registration_result(res);
- }
-
- /// Registers an array of buffers for use with `read_fixed` and `write_fixed`.
- pub fn register_buffers(self: *IO_Uring, buffers: []const os.iovec) !void {
- assert(self.fd >= 0);
- const res = linux.io_uring_register(
- self.fd,
- .REGISTER_BUFFERS,
- buffers.ptr,
- @as(u32, @intCast(buffers.len)),
- );
- try handle_registration_result(res);
- }
-
- /// Unregister the registered buffers.
- pub fn unregister_buffers(self: *IO_Uring) !void {
- assert(self.fd >= 0);
- const res = linux.io_uring_register(self.fd, .UNREGISTER_BUFFERS, null, 0);
- switch (linux.getErrno(res)) {
- .SUCCESS => {},
- .NXIO => return error.BuffersNotRegistered,
- else => |errno| return os.unexpectedErrno(errno),
- }
- }
-
- fn handle_registration_result(res: usize) !void {
- switch (linux.getErrno(res)) {
- .SUCCESS => {},
- // One or more fds in the array are invalid, or the kernel does not support sparse sets:
- .BADF => return error.FileDescriptorInvalid,
- .BUSY => return error.FilesAlreadyRegistered,
- .INVAL => return error.FilesEmpty,
- // Adding `nr_args` file references would exceed the maximum allowed number of files the
- // user is allowed to have according to the per-user RLIMIT_NOFILE resource limit and
- // the CAP_SYS_RESOURCE capability is not set, or `nr_args` exceeds the maximum allowed
- // for a fixed file set (older kernels have a limit of 1024 files vs 64K files):
- .MFILE => return error.UserFdQuotaExceeded,
- // Insufficient kernel resources, or the caller had a non-zero RLIMIT_MEMLOCK soft
- // resource limit but tried to lock more memory than the limit permitted (not enforced
- // when the process is privileged with CAP_IPC_LOCK):
- .NOMEM => return error.SystemResources,
- // Attempt to register files on a ring already registering files or being torn down:
- .NXIO => return error.RingShuttingDownOrAlreadyRegisteringFiles,
- else => |errno| return os.unexpectedErrno(errno),
- }
- }
-
- /// Unregisters all registered file descriptors previously associated with the ring.
- pub fn unregister_files(self: *IO_Uring) !void {
- assert(self.fd >= 0);
- const res = linux.io_uring_register(self.fd, .UNREGISTER_FILES, null, 0);
- switch (linux.getErrno(res)) {
- .SUCCESS => {},
- .NXIO => return error.FilesNotRegistered,
- else => |errno| return os.unexpectedErrno(errno),
- }
- }
-
- /// Prepares a socket creation request.
- /// New socket fd will be returned in completion result.
- /// Available since 5.19
- pub fn socket(
- self: *IO_Uring,
- user_data: u64,
- domain: u32,
- socket_type: u32,
- protocol: u32,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_socket(sqe, domain, socket_type, protocol, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Prepares a socket creation request for registered file at index `file_index`.
- /// Available since 5.19
- pub fn socket_direct(
- self: *IO_Uring,
- user_data: u64,
- domain: u32,
- socket_type: u32,
- protocol: u32,
- flags: u32,
- file_index: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_socket_direct(sqe, domain, socket_type, protocol, flags, file_index);
- sqe.user_data = user_data;
- return sqe;
- }
-
- /// Prepares a socket creation request for registered file, index chosen by kernel (file index alloc).
- /// File index will be returned in CQE res field.
- /// Available since 5.19
- pub fn socket_direct_alloc(
- self: *IO_Uring,
- user_data: u64,
- domain: u32,
- socket_type: u32,
- protocol: u32,
- flags: u32,
- ) !*linux.io_uring_sqe {
- const sqe = try self.get_sqe();
- io_uring_prep_socket_direct_alloc(sqe, domain, socket_type, protocol, flags);
- sqe.user_data = user_data;
- return sqe;
- }
-};
-
-pub const SubmissionQueue = struct {
- head: *u32,
- tail: *u32,
- mask: u32,
- flags: *u32,
- dropped: *u32,
- array: []u32,
- sqes: []linux.io_uring_sqe,
- mmap: []align(mem.page_size) u8,
- mmap_sqes: []align(mem.page_size) u8,
-
- // We use `sqe_head` and `sqe_tail` in the same way as liburing:
- // We increment `sqe_tail` (but not `tail`) for each call to `get_sqe()`.
- // We then set `tail` to `sqe_tail` once, only when these events are actually submitted.
- // This allows us to amortize the cost of the @atomicStore to `tail` across multiple SQEs.
- sqe_head: u32 = 0,
- sqe_tail: u32 = 0,
-
- pub fn init(fd: os.fd_t, p: linux.io_uring_params) !SubmissionQueue {
- assert(fd >= 0);
- assert((p.features & linux.IORING_FEAT_SINGLE_MMAP) != 0);
- const size = @max(
- p.sq_off.array + p.sq_entries * @sizeOf(u32),
- p.cq_off.cqes + p.cq_entries * @sizeOf(linux.io_uring_cqe),
- );
- const mmap = try os.mmap(
- null,
- size,
- os.PROT.READ | os.PROT.WRITE,
- .{ .TYPE = .SHARED, .POPULATE = true },
- fd,
- linux.IORING_OFF_SQ_RING,
- );
- errdefer os.munmap(mmap);
- assert(mmap.len == size);
-
- // The motivation for the `sqes` and `array` indirection is to make it possible for the
- // application to preallocate static linux.io_uring_sqe entries and then replay them when needed.
- const size_sqes = p.sq_entries * @sizeOf(linux.io_uring_sqe);
- const mmap_sqes = try os.mmap(
- null,
- size_sqes,
- os.PROT.READ | os.PROT.WRITE,
- .{ .TYPE = .SHARED, .POPULATE = true },
- fd,
- linux.IORING_OFF_SQES,
- );
- errdefer os.munmap(mmap_sqes);
- assert(mmap_sqes.len == size_sqes);
-
- const array: [*]u32 = @ptrCast(@alignCast(&mmap[p.sq_off.array]));
- const sqes: [*]linux.io_uring_sqe = @ptrCast(@alignCast(&mmap_sqes[0]));
- // We expect the kernel copies p.sq_entries to the u32 pointed to by p.sq_off.ring_entries,
- // see https://github.com/torvalds/linux/blob/v5.8/fs/io_uring.c#L7843-L7844.
- assert(p.sq_entries == @as(*u32, @ptrCast(@alignCast(&mmap[p.sq_off.ring_entries]))).*);
- return SubmissionQueue{
- .head = @ptrCast(@alignCast(&mmap[p.sq_off.head])),
- .tail = @ptrCast(@alignCast(&mmap[p.sq_off.tail])),
- .mask = @as(*u32, @ptrCast(@alignCast(&mmap[p.sq_off.ring_mask]))).*,
- .flags = @ptrCast(@alignCast(&mmap[p.sq_off.flags])),
- .dropped = @ptrCast(@alignCast(&mmap[p.sq_off.dropped])),
- .array = array[0..p.sq_entries],
- .sqes = sqes[0..p.sq_entries],
- .mmap = mmap,
- .mmap_sqes = mmap_sqes,
- };
- }
-
- pub fn deinit(self: *SubmissionQueue) void {
- os.munmap(self.mmap_sqes);
- os.munmap(self.mmap);
- }
-};
-
-pub const CompletionQueue = struct {
- head: *u32,
- tail: *u32,
- mask: u32,
- overflow: *u32,
- cqes: []linux.io_uring_cqe,
-
- pub fn init(fd: os.fd_t, p: linux.io_uring_params, sq: SubmissionQueue) !CompletionQueue {
- assert(fd >= 0);
- assert((p.features & linux.IORING_FEAT_SINGLE_MMAP) != 0);
- const mmap = sq.mmap;
- const cqes: [*]linux.io_uring_cqe = @ptrCast(@alignCast(&mmap[p.cq_off.cqes]));
- assert(p.cq_entries == @as(*u32, @ptrCast(@alignCast(&mmap[p.cq_off.ring_entries]))).*);
- return CompletionQueue{
- .head = @ptrCast(@alignCast(&mmap[p.cq_off.head])),
- .tail = @ptrCast(@alignCast(&mmap[p.cq_off.tail])),
- .mask = @as(*u32, @ptrCast(@alignCast(&mmap[p.cq_off.ring_mask]))).*,
- .overflow = @ptrCast(@alignCast(&mmap[p.cq_off.overflow])),
- .cqes = cqes[0..p.cq_entries],
- };
- }
-
- pub fn deinit(self: *CompletionQueue) void {
- _ = self;
- // A no-op since we now share the mmap with the submission queue.
- // Here for symmetry with the submission queue, and for any future feature support.
- }
-};
-
-pub fn io_uring_prep_nop(sqe: *linux.io_uring_sqe) void {
- sqe.* = .{
- .opcode = .NOP,
- .flags = 0,
- .ioprio = 0,
- .fd = 0,
- .off = 0,
- .addr = 0,
- .len = 0,
- .rw_flags = 0,
- .user_data = 0,
- .buf_index = 0,
- .personality = 0,
- .splice_fd_in = 0,
- .addr3 = 0,
- .resv = 0,
- };
-}
-
-pub fn io_uring_prep_fsync(sqe: *linux.io_uring_sqe, fd: os.fd_t, flags: u32) void {
- sqe.* = .{
- .opcode = .FSYNC,
- .flags = 0,
- .ioprio = 0,
- .fd = fd,
- .off = 0,
- .addr = 0,
- .len = 0,
- .rw_flags = flags,
- .user_data = 0,
- .buf_index = 0,
- .personality = 0,
- .splice_fd_in = 0,
- .addr3 = 0,
- .resv = 0,
- };
-}
-
-pub fn io_uring_prep_rw(
- op: linux.IORING_OP,
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- addr: u64,
- len: usize,
- offset: u64,
-) void {
- sqe.* = .{
- .opcode = op,
- .flags = 0,
- .ioprio = 0,
- .fd = fd,
- .off = offset,
- .addr = addr,
- .len = @as(u32, @intCast(len)),
- .rw_flags = 0,
- .user_data = 0,
- .buf_index = 0,
- .personality = 0,
- .splice_fd_in = 0,
- .addr3 = 0,
- .resv = 0,
- };
-}
-
-pub fn io_uring_prep_read(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []u8, offset: u64) void {
- io_uring_prep_rw(.READ, sqe, fd, @intFromPtr(buffer.ptr), buffer.len, offset);
-}
-
-pub fn io_uring_prep_write(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []const u8, offset: u64) void {
- io_uring_prep_rw(.WRITE, sqe, fd, @intFromPtr(buffer.ptr), buffer.len, offset);
-}
-
-pub fn io_uring_prep_splice(sqe: *linux.io_uring_sqe, fd_in: os.fd_t, off_in: u64, fd_out: os.fd_t, off_out: u64, len: usize) void {
- io_uring_prep_rw(.SPLICE, sqe, fd_out, undefined, len, off_out);
- sqe.addr = off_in;
- sqe.splice_fd_in = fd_in;
-}
-
-pub fn io_uring_prep_readv(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- iovecs: []const os.iovec,
- offset: u64,
-) void {
- io_uring_prep_rw(.READV, sqe, fd, @intFromPtr(iovecs.ptr), iovecs.len, offset);
-}
-
-pub fn io_uring_prep_writev(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- iovecs: []const os.iovec_const,
- offset: u64,
-) void {
- io_uring_prep_rw(.WRITEV, sqe, fd, @intFromPtr(iovecs.ptr), iovecs.len, offset);
-}
-
-pub fn io_uring_prep_read_fixed(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: *os.iovec, offset: u64, buffer_index: u16) void {
- io_uring_prep_rw(.READ_FIXED, sqe, fd, @intFromPtr(buffer.iov_base), buffer.iov_len, offset);
- sqe.buf_index = buffer_index;
-}
-
-pub fn io_uring_prep_write_fixed(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: *os.iovec, offset: u64, buffer_index: u16) void {
- io_uring_prep_rw(.WRITE_FIXED, sqe, fd, @intFromPtr(buffer.iov_base), buffer.iov_len, offset);
- sqe.buf_index = buffer_index;
-}
-
-/// Poll masks previously used to comprise of 16 bits in the flags union of
-/// a SQE, but were then extended to comprise of 32 bits in order to make
-/// room for additional option flags. To ensure that the correct bits of
-/// poll masks are consistently and properly read across multiple kernel
-/// versions, poll masks are enforced to be little-endian.
-/// https://www.spinics.net/lists/io-uring/msg02848.html
-pub inline fn __io_uring_prep_poll_mask(poll_mask: u32) u32 {
- return std.mem.nativeToLittle(u32, poll_mask);
-}
-
-pub fn io_uring_prep_accept(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- addr: ?*os.sockaddr,
- addrlen: ?*os.socklen_t,
- flags: u32,
-) void {
- // `addr` holds a pointer to `sockaddr`, and `addr2` holds a pointer to socklen_t`.
- // `addr2` maps to `sqe.off` (u64) instead of `sqe.len` (which is only a u32).
- io_uring_prep_rw(.ACCEPT, sqe, fd, @intFromPtr(addr), 0, @intFromPtr(addrlen));
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_accept_direct(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- addr: ?*os.sockaddr,
- addrlen: ?*os.socklen_t,
- flags: u32,
- file_index: u32,
-) void {
- io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
- __io_uring_set_target_fixed_file(sqe, file_index);
-}
-
-pub fn io_uring_prep_multishot_accept_direct(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- addr: ?*os.sockaddr,
- addrlen: ?*os.socklen_t,
- flags: u32,
-) void {
- io_uring_prep_multishot_accept(sqe, fd, addr, addrlen, flags);
- __io_uring_set_target_fixed_file(sqe, linux.IORING_FILE_INDEX_ALLOC);
-}
-
-fn __io_uring_set_target_fixed_file(sqe: *linux.io_uring_sqe, file_index: u32) void {
- const sqe_file_index: u32 = if (file_index == linux.IORING_FILE_INDEX_ALLOC)
- linux.IORING_FILE_INDEX_ALLOC
- else
- // 0 means no fixed files, indexes should be encoded as "index + 1"
- file_index + 1;
- // This filed is overloaded in liburing:
- // splice_fd_in: i32
- // sqe_file_index: u32
- sqe.splice_fd_in = @bitCast(sqe_file_index);
-}
-
-pub fn io_uring_prep_connect(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- addr: *const os.sockaddr,
- addrlen: os.socklen_t,
-) void {
- // `addrlen` maps to `sqe.off` (u64) instead of `sqe.len` (which is only a u32).
- io_uring_prep_rw(.CONNECT, sqe, fd, @intFromPtr(addr), 0, addrlen);
-}
-
-pub fn io_uring_prep_epoll_ctl(
- sqe: *linux.io_uring_sqe,
- epfd: os.fd_t,
- fd: os.fd_t,
- op: u32,
- ev: ?*linux.epoll_event,
-) void {
- io_uring_prep_rw(.EPOLL_CTL, sqe, epfd, @intFromPtr(ev), op, @as(u64, @intCast(fd)));
-}
-
-pub fn io_uring_prep_recv(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []u8, flags: u32) void {
- io_uring_prep_rw(.RECV, sqe, fd, @intFromPtr(buffer.ptr), buffer.len, 0);
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_send(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []const u8, flags: u32) void {
- io_uring_prep_rw(.SEND, sqe, fd, @intFromPtr(buffer.ptr), buffer.len, 0);
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_send_zc(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []const u8, flags: u32, zc_flags: u16) void {
- io_uring_prep_rw(.SEND_ZC, sqe, fd, @intFromPtr(buffer.ptr), buffer.len, 0);
- sqe.rw_flags = flags;
- sqe.ioprio = zc_flags;
-}
-
-pub fn io_uring_prep_send_zc_fixed(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []const u8, flags: u32, zc_flags: u16, buf_index: u16) void {
- io_uring_prep_send_zc(sqe, fd, buffer, flags, zc_flags);
- sqe.ioprio |= linux.IORING_RECVSEND_FIXED_BUF;
- sqe.buf_index = buf_index;
-}
-
-pub fn io_uring_prep_sendmsg_zc(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- msg: *const os.msghdr_const,
- flags: u32,
-) void {
- io_uring_prep_sendmsg(sqe, fd, msg, flags);
- sqe.opcode = .SENDMSG_ZC;
-}
-
-pub fn io_uring_prep_recvmsg(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- msg: *os.msghdr,
- flags: u32,
-) void {
- linux.io_uring_prep_rw(.RECVMSG, sqe, fd, @intFromPtr(msg), 1, 0);
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_sendmsg(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- msg: *const os.msghdr_const,
- flags: u32,
-) void {
- linux.io_uring_prep_rw(.SENDMSG, sqe, fd, @intFromPtr(msg), 1, 0);
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_openat(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- path: [*:0]const u8,
- flags: linux.O,
- mode: os.mode_t,
-) void {
- io_uring_prep_rw(.OPENAT, sqe, fd, @intFromPtr(path), mode, 0);
- sqe.rw_flags = @bitCast(flags);
-}
-
-pub fn io_uring_prep_openat_direct(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- path: [*:0]const u8,
- flags: linux.O,
- mode: os.mode_t,
- file_index: u32,
-) void {
- io_uring_prep_openat(sqe, fd, path, flags, mode);
- __io_uring_set_target_fixed_file(sqe, file_index);
-}
-
-pub fn io_uring_prep_close(sqe: *linux.io_uring_sqe, fd: os.fd_t) void {
- sqe.* = .{
- .opcode = .CLOSE,
- .flags = 0,
- .ioprio = 0,
- .fd = fd,
- .off = 0,
- .addr = 0,
- .len = 0,
- .rw_flags = 0,
- .user_data = 0,
- .buf_index = 0,
- .personality = 0,
- .splice_fd_in = 0,
- .addr3 = 0,
- .resv = 0,
- };
-}
-
-pub fn io_uring_prep_close_direct(sqe: *linux.io_uring_sqe, file_index: u32) void {
- io_uring_prep_close(sqe, 0);
- __io_uring_set_target_fixed_file(sqe, file_index);
-}
-
-pub fn io_uring_prep_timeout(
- sqe: *linux.io_uring_sqe,
- ts: *const os.linux.kernel_timespec,
- count: u32,
- flags: u32,
-) void {
- io_uring_prep_rw(.TIMEOUT, sqe, -1, @intFromPtr(ts), 1, count);
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_timeout_remove(sqe: *linux.io_uring_sqe, timeout_user_data: u64, flags: u32) void {
- sqe.* = .{
- .opcode = .TIMEOUT_REMOVE,
- .flags = 0,
- .ioprio = 0,
- .fd = -1,
- .off = 0,
- .addr = timeout_user_data,
- .len = 0,
- .rw_flags = flags,
- .user_data = 0,
- .buf_index = 0,
- .personality = 0,
- .splice_fd_in = 0,
- .addr3 = 0,
- .resv = 0,
- };
-}
-
-pub fn io_uring_prep_link_timeout(
- sqe: *linux.io_uring_sqe,
- ts: *const os.linux.kernel_timespec,
- flags: u32,
-) void {
- linux.io_uring_prep_rw(.LINK_TIMEOUT, sqe, -1, @intFromPtr(ts), 1, 0);
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_poll_add(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- poll_mask: u32,
-) void {
- io_uring_prep_rw(.POLL_ADD, sqe, fd, @intFromPtr(@as(?*anyopaque, null)), 0, 0);
- sqe.rw_flags = __io_uring_prep_poll_mask(poll_mask);
-}
-
-pub fn io_uring_prep_poll_remove(
- sqe: *linux.io_uring_sqe,
- target_user_data: u64,
-) void {
- io_uring_prep_rw(.POLL_REMOVE, sqe, -1, target_user_data, 0, 0);
-}
-
-pub fn io_uring_prep_poll_update(
- sqe: *linux.io_uring_sqe,
- old_user_data: u64,
- new_user_data: u64,
- poll_mask: u32,
- flags: u32,
-) void {
- io_uring_prep_rw(.POLL_REMOVE, sqe, -1, old_user_data, flags, new_user_data);
- sqe.rw_flags = __io_uring_prep_poll_mask(poll_mask);
-}
-
-pub fn io_uring_prep_fallocate(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- mode: i32,
- offset: u64,
- len: u64,
-) void {
- sqe.* = .{
- .opcode = .FALLOCATE,
- .flags = 0,
- .ioprio = 0,
- .fd = fd,
- .off = offset,
- .addr = len,
- .len = @as(u32, @intCast(mode)),
- .rw_flags = 0,
- .user_data = 0,
- .buf_index = 0,
- .personality = 0,
- .splice_fd_in = 0,
- .addr3 = 0,
- .resv = 0,
- };
-}
-
-pub fn io_uring_prep_statx(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- path: [*:0]const u8,
- flags: u32,
- mask: u32,
- buf: *linux.Statx,
-) void {
- io_uring_prep_rw(.STATX, sqe, fd, @intFromPtr(path), mask, @intFromPtr(buf));
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_cancel(
- sqe: *linux.io_uring_sqe,
- cancel_user_data: u64,
- flags: u32,
-) void {
- io_uring_prep_rw(.ASYNC_CANCEL, sqe, -1, cancel_user_data, 0, 0);
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_shutdown(
- sqe: *linux.io_uring_sqe,
- sockfd: os.socket_t,
- how: u32,
-) void {
- io_uring_prep_rw(.SHUTDOWN, sqe, sockfd, 0, how, 0);
-}
-
-pub fn io_uring_prep_renameat(
- sqe: *linux.io_uring_sqe,
- old_dir_fd: os.fd_t,
- old_path: [*:0]const u8,
- new_dir_fd: os.fd_t,
- new_path: [*:0]const u8,
- flags: u32,
-) void {
- io_uring_prep_rw(
- .RENAMEAT,
- sqe,
- old_dir_fd,
- @intFromPtr(old_path),
- 0,
- @intFromPtr(new_path),
- );
- sqe.len = @bitCast(new_dir_fd);
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_unlinkat(
- sqe: *linux.io_uring_sqe,
- dir_fd: os.fd_t,
- path: [*:0]const u8,
- flags: u32,
-) void {
- io_uring_prep_rw(.UNLINKAT, sqe, dir_fd, @intFromPtr(path), 0, 0);
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_mkdirat(
- sqe: *linux.io_uring_sqe,
- dir_fd: os.fd_t,
- path: [*:0]const u8,
- mode: os.mode_t,
-) void {
- io_uring_prep_rw(.MKDIRAT, sqe, dir_fd, @intFromPtr(path), mode, 0);
-}
-
-pub fn io_uring_prep_symlinkat(
- sqe: *linux.io_uring_sqe,
- target: [*:0]const u8,
- new_dir_fd: os.fd_t,
- link_path: [*:0]const u8,
-) void {
- io_uring_prep_rw(
- .SYMLINKAT,
- sqe,
- new_dir_fd,
- @intFromPtr(target),
- 0,
- @intFromPtr(link_path),
- );
-}
-
-pub fn io_uring_prep_linkat(
- sqe: *linux.io_uring_sqe,
- old_dir_fd: os.fd_t,
- old_path: [*:0]const u8,
- new_dir_fd: os.fd_t,
- new_path: [*:0]const u8,
- flags: u32,
-) void {
- io_uring_prep_rw(
- .LINKAT,
- sqe,
- old_dir_fd,
- @intFromPtr(old_path),
- 0,
- @intFromPtr(new_path),
- );
- sqe.len = @bitCast(new_dir_fd);
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_provide_buffers(
- sqe: *linux.io_uring_sqe,
- buffers: [*]u8,
- buffer_len: usize,
- num: usize,
- group_id: usize,
- buffer_id: usize,
-) void {
- const ptr = @intFromPtr(buffers);
- io_uring_prep_rw(.PROVIDE_BUFFERS, sqe, @as(i32, @intCast(num)), ptr, buffer_len, buffer_id);
- sqe.buf_index = @intCast(group_id);
-}
-
-pub fn io_uring_prep_remove_buffers(
- sqe: *linux.io_uring_sqe,
- num: usize,
- group_id: usize,
-) void {
- io_uring_prep_rw(.REMOVE_BUFFERS, sqe, @as(i32, @intCast(num)), 0, 0, 0);
- sqe.buf_index = @intCast(group_id);
-}
-
-pub fn io_uring_prep_multishot_accept(
- sqe: *linux.io_uring_sqe,
- fd: os.fd_t,
- addr: ?*os.sockaddr,
- addrlen: ?*os.socklen_t,
- flags: u32,
-) void {
- io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
- sqe.ioprio |= linux.IORING_ACCEPT_MULTISHOT;
-}
-
-pub fn io_uring_prep_socket(
- sqe: *linux.io_uring_sqe,
- domain: u32,
- socket_type: u32,
- protocol: u32,
- flags: u32,
-) void {
- io_uring_prep_rw(.SOCKET, sqe, @intCast(domain), 0, protocol, socket_type);
- sqe.rw_flags = flags;
-}
-
-pub fn io_uring_prep_socket_direct(
- sqe: *linux.io_uring_sqe,
- domain: u32,
- socket_type: u32,
- protocol: u32,
- flags: u32,
- file_index: u32,
-) void {
- io_uring_prep_socket(sqe, domain, socket_type, protocol, flags);
- __io_uring_set_target_fixed_file(sqe, file_index);
-}
-
-pub fn io_uring_prep_socket_direct_alloc(
- sqe: *linux.io_uring_sqe,
- domain: u32,
- socket_type: u32,
- protocol: u32,
- flags: u32,
-) void {
- io_uring_prep_socket(sqe, domain, socket_type, protocol, flags);
- __io_uring_set_target_fixed_file(sqe, linux.IORING_FILE_INDEX_ALLOC);
-}
-
-pub fn io_uring_prep_waitid(
- sqe: *linux.io_uring_sqe,
- id_type: linux.P,
- id: i32,
- infop: *linux.siginfo_t,
- options: u32,
- flags: u32,
-) void {
- io_uring_prep_rw(.WAITID, sqe, id, 0, @intFromEnum(id_type), @intFromPtr(infop));
- sqe.rw_flags = flags;
- sqe.splice_fd_in = @bitCast(options);
-}
-
-test "structs/offsets/entries" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- try testing.expectEqual(@as(usize, 120), @sizeOf(linux.io_uring_params));
- try testing.expectEqual(@as(usize, 64), @sizeOf(linux.io_uring_sqe));
- try testing.expectEqual(@as(usize, 16), @sizeOf(linux.io_uring_cqe));
-
- try testing.expectEqual(0, linux.IORING_OFF_SQ_RING);
- try testing.expectEqual(0x8000000, linux.IORING_OFF_CQ_RING);
- try testing.expectEqual(0x10000000, linux.IORING_OFF_SQES);
-
- try testing.expectError(error.EntriesZero, IO_Uring.init(0, 0));
- try testing.expectError(error.EntriesNotPowerOfTwo, IO_Uring.init(3, 0));
-}
-
-test "nop" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer {
- ring.deinit();
- testing.expectEqual(@as(os.fd_t, -1), ring.fd) catch @panic("test failed");
- }
-
- const sqe = try ring.nop(0xaaaaaaaa);
- try testing.expectEqual(linux.io_uring_sqe{
- .opcode = .NOP,
- .flags = 0,
- .ioprio = 0,
- .fd = 0,
- .off = 0,
- .addr = 0,
- .len = 0,
- .rw_flags = 0,
- .user_data = 0xaaaaaaaa,
- .buf_index = 0,
- .personality = 0,
- .splice_fd_in = 0,
- .addr3 = 0,
- .resv = 0,
- }, sqe.*);
-
- try testing.expectEqual(@as(u32, 0), ring.sq.sqe_head);
- try testing.expectEqual(@as(u32, 1), ring.sq.sqe_tail);
- try testing.expectEqual(@as(u32, 0), ring.sq.tail.*);
- try testing.expectEqual(@as(u32, 0), ring.cq.head.*);
- try testing.expectEqual(@as(u32, 1), ring.sq_ready());
- try testing.expectEqual(@as(u32, 0), ring.cq_ready());
-
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- try testing.expectEqual(@as(u32, 1), ring.sq.sqe_head);
- try testing.expectEqual(@as(u32, 1), ring.sq.sqe_tail);
- try testing.expectEqual(@as(u32, 1), ring.sq.tail.*);
- try testing.expectEqual(@as(u32, 0), ring.cq.head.*);
- try testing.expectEqual(@as(u32, 0), ring.sq_ready());
-
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xaaaaaaaa,
- .res = 0,
- .flags = 0,
- }, try ring.copy_cqe());
- try testing.expectEqual(@as(u32, 1), ring.cq.head.*);
- try testing.expectEqual(@as(u32, 0), ring.cq_ready());
-
- const sqe_barrier = try ring.nop(0xbbbbbbbb);
- sqe_barrier.flags |= linux.IOSQE_IO_DRAIN;
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xbbbbbbbb,
- .res = 0,
- .flags = 0,
- }, try ring.copy_cqe());
- try testing.expectEqual(@as(u32, 2), ring.sq.sqe_head);
- try testing.expectEqual(@as(u32, 2), ring.sq.sqe_tail);
- try testing.expectEqual(@as(u32, 2), ring.sq.tail.*);
- try testing.expectEqual(@as(u32, 2), ring.cq.head.*);
-}
-
-test "readv" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const fd = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
- defer os.close(fd);
-
- // Linux Kernel 5.4 supports IORING_REGISTER_FILES but not sparse fd sets (i.e. an fd of -1).
- // Linux Kernel 5.5 adds support for sparse fd sets.
- // Compare:
- // https://github.com/torvalds/linux/blob/v5.4/fs/io_uring.c#L3119-L3124 vs
- // https://github.com/torvalds/linux/blob/v5.8/fs/io_uring.c#L6687-L6691
- // We therefore avoid stressing sparse fd sets here:
- var registered_fds = [_]os.fd_t{0} ** 1;
- const fd_index = 0;
- registered_fds[fd_index] = fd;
- try ring.register_files(registered_fds[0..]);
-
- var buffer = [_]u8{42} ** 128;
- var iovecs = [_]os.iovec{os.iovec{ .iov_base = &buffer, .iov_len = buffer.len }};
- const sqe = try ring.read(0xcccccccc, fd_index, .{ .iovecs = iovecs[0..] }, 0);
- try testing.expectEqual(linux.IORING_OP.READV, sqe.opcode);
- sqe.flags |= linux.IOSQE_FIXED_FILE;
-
- try testing.expectError(error.SubmissionQueueFull, ring.nop(0));
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xcccccccc,
- .res = buffer.len,
- .flags = 0,
- }, try ring.copy_cqe());
- try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer.len), buffer[0..]);
-
- try ring.unregister_files();
-}
-
-test "writev/fsync/readv" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(4, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
-
- const path = "test_io_uring_writev_fsync_readv";
- const file = try tmp.dir.createFile(path, .{ .read = true, .truncate = true });
- defer file.close();
- const fd = file.handle;
-
- const buffer_write = [_]u8{42} ** 128;
- const iovecs_write = [_]os.iovec_const{
- os.iovec_const{ .iov_base = &buffer_write, .iov_len = buffer_write.len },
- };
- var buffer_read = [_]u8{0} ** 128;
- var iovecs_read = [_]os.iovec{
- os.iovec{ .iov_base = &buffer_read, .iov_len = buffer_read.len },
- };
-
- const sqe_writev = try ring.writev(0xdddddddd, fd, iovecs_write[0..], 17);
- try testing.expectEqual(linux.IORING_OP.WRITEV, sqe_writev.opcode);
- try testing.expectEqual(@as(u64, 17), sqe_writev.off);
- sqe_writev.flags |= linux.IOSQE_IO_LINK;
-
- const sqe_fsync = try ring.fsync(0xeeeeeeee, fd, 0);
- try testing.expectEqual(linux.IORING_OP.FSYNC, sqe_fsync.opcode);
- try testing.expectEqual(fd, sqe_fsync.fd);
- sqe_fsync.flags |= linux.IOSQE_IO_LINK;
-
- const sqe_readv = try ring.read(0xffffffff, fd, .{ .iovecs = iovecs_read[0..] }, 17);
- try testing.expectEqual(linux.IORING_OP.READV, sqe_readv.opcode);
- try testing.expectEqual(@as(u64, 17), sqe_readv.off);
-
- try testing.expectEqual(@as(u32, 3), ring.sq_ready());
- try testing.expectEqual(@as(u32, 3), try ring.submit_and_wait(3));
- try testing.expectEqual(@as(u32, 0), ring.sq_ready());
- try testing.expectEqual(@as(u32, 3), ring.cq_ready());
-
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xdddddddd,
- .res = buffer_write.len,
- .flags = 0,
- }, try ring.copy_cqe());
- try testing.expectEqual(@as(u32, 2), ring.cq_ready());
-
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xeeeeeeee,
- .res = 0,
- .flags = 0,
- }, try ring.copy_cqe());
- try testing.expectEqual(@as(u32, 1), ring.cq_ready());
-
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xffffffff,
- .res = buffer_read.len,
- .flags = 0,
- }, try ring.copy_cqe());
- try testing.expectEqual(@as(u32, 0), ring.cq_ready());
-
- try testing.expectEqualSlices(u8, buffer_write[0..], buffer_read[0..]);
-}
-
-test "write/read" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(2, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
- const path = "test_io_uring_write_read";
- const file = try tmp.dir.createFile(path, .{ .read = true, .truncate = true });
- defer file.close();
- const fd = file.handle;
-
- const buffer_write = [_]u8{97} ** 20;
- var buffer_read = [_]u8{98} ** 20;
- const sqe_write = try ring.write(0x11111111, fd, buffer_write[0..], 10);
- try testing.expectEqual(linux.IORING_OP.WRITE, sqe_write.opcode);
- try testing.expectEqual(@as(u64, 10), sqe_write.off);
- sqe_write.flags |= linux.IOSQE_IO_LINK;
- const sqe_read = try ring.read(0x22222222, fd, .{ .buffer = buffer_read[0..] }, 10);
- try testing.expectEqual(linux.IORING_OP.READ, sqe_read.opcode);
- try testing.expectEqual(@as(u64, 10), sqe_read.off);
- try testing.expectEqual(@as(u32, 2), try ring.submit());
-
- const cqe_write = try ring.copy_cqe();
- const cqe_read = try ring.copy_cqe();
- // Prior to Linux Kernel 5.6 this is the only way to test for read/write support:
- // https://lwn.net/Articles/809820/
- if (cqe_write.err() == .INVAL) return error.SkipZigTest;
- if (cqe_read.err() == .INVAL) return error.SkipZigTest;
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x11111111,
- .res = buffer_write.len,
- .flags = 0,
- }, cqe_write);
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x22222222,
- .res = buffer_read.len,
- .flags = 0,
- }, cqe_read);
- try testing.expectEqualSlices(u8, buffer_write[0..], buffer_read[0..]);
-}
-
-test "splice/read" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(4, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var tmp = std.testing.tmpDir(.{});
- const path_src = "test_io_uring_splice_src";
- const file_src = try tmp.dir.createFile(path_src, .{ .read = true, .truncate = true });
- defer file_src.close();
- const fd_src = file_src.handle;
-
- const path_dst = "test_io_uring_splice_dst";
- const file_dst = try tmp.dir.createFile(path_dst, .{ .read = true, .truncate = true });
- defer file_dst.close();
- const fd_dst = file_dst.handle;
-
- const buffer_write = [_]u8{97} ** 20;
- var buffer_read = [_]u8{98} ** 20;
- _ = try file_src.write(&buffer_write);
-
- const fds = try os.pipe();
- const pipe_offset: u64 = std.math.maxInt(u64);
-
- const sqe_splice_to_pipe = try ring.splice(0x11111111, fd_src, 0, fds[1], pipe_offset, buffer_write.len);
- try testing.expectEqual(linux.IORING_OP.SPLICE, sqe_splice_to_pipe.opcode);
- try testing.expectEqual(@as(u64, 0), sqe_splice_to_pipe.addr);
- try testing.expectEqual(pipe_offset, sqe_splice_to_pipe.off);
- sqe_splice_to_pipe.flags |= linux.IOSQE_IO_LINK;
-
- const sqe_splice_from_pipe = try ring.splice(0x22222222, fds[0], pipe_offset, fd_dst, 10, buffer_write.len);
- try testing.expectEqual(linux.IORING_OP.SPLICE, sqe_splice_from_pipe.opcode);
- try testing.expectEqual(pipe_offset, sqe_splice_from_pipe.addr);
- try testing.expectEqual(@as(u64, 10), sqe_splice_from_pipe.off);
- sqe_splice_from_pipe.flags |= linux.IOSQE_IO_LINK;
-
- const sqe_read = try ring.read(0x33333333, fd_dst, .{ .buffer = buffer_read[0..] }, 10);
- try testing.expectEqual(linux.IORING_OP.READ, sqe_read.opcode);
- try testing.expectEqual(@as(u64, 10), sqe_read.off);
- try testing.expectEqual(@as(u32, 3), try ring.submit());
-
- const cqe_splice_to_pipe = try ring.copy_cqe();
- const cqe_splice_from_pipe = try ring.copy_cqe();
- const cqe_read = try ring.copy_cqe();
- // Prior to Linux Kernel 5.6 this is the only way to test for splice/read support:
- // https://lwn.net/Articles/809820/
- if (cqe_splice_to_pipe.err() == .INVAL) return error.SkipZigTest;
- if (cqe_splice_from_pipe.err() == .INVAL) return error.SkipZigTest;
- if (cqe_read.err() == .INVAL) return error.SkipZigTest;
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x11111111,
- .res = buffer_write.len,
- .flags = 0,
- }, cqe_splice_to_pipe);
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x22222222,
- .res = buffer_write.len,
- .flags = 0,
- }, cqe_splice_from_pipe);
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x33333333,
- .res = buffer_read.len,
- .flags = 0,
- }, cqe_read);
- try testing.expectEqualSlices(u8, buffer_write[0..], buffer_read[0..]);
-}
-
-test "write_fixed/read_fixed" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(2, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
-
- const path = "test_io_uring_write_read_fixed";
- const file = try tmp.dir.createFile(path, .{ .read = true, .truncate = true });
- defer file.close();
- const fd = file.handle;
-
- var raw_buffers: [2][11]u8 = undefined;
- // First buffer will be written to the file.
- @memset(&raw_buffers[0], 'z');
- raw_buffers[0][0.."foobar".len].* = "foobar".*;
-
- var buffers = [2]os.iovec{
- .{ .iov_base = &raw_buffers[0], .iov_len = raw_buffers[0].len },
- .{ .iov_base = &raw_buffers[1], .iov_len = raw_buffers[1].len },
- };
- ring.register_buffers(&buffers) catch |err| switch (err) {
- error.SystemResources => {
- // See https://github.com/ziglang/zig/issues/15362
- return error.SkipZigTest;
- },
- else => |e| return e,
- };
-
- const sqe_write = try ring.write_fixed(0x45454545, fd, &buffers[0], 3, 0);
- try testing.expectEqual(linux.IORING_OP.WRITE_FIXED, sqe_write.opcode);
- try testing.expectEqual(@as(u64, 3), sqe_write.off);
- sqe_write.flags |= linux.IOSQE_IO_LINK;
-
- const sqe_read = try ring.read_fixed(0x12121212, fd, &buffers[1], 0, 1);
- try testing.expectEqual(linux.IORING_OP.READ_FIXED, sqe_read.opcode);
- try testing.expectEqual(@as(u64, 0), sqe_read.off);
-
- try testing.expectEqual(@as(u32, 2), try ring.submit());
-
- const cqe_write = try ring.copy_cqe();
- const cqe_read = try ring.copy_cqe();
-
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x45454545,
- .res = @as(i32, @intCast(buffers[0].iov_len)),
- .flags = 0,
- }, cqe_write);
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x12121212,
- .res = @as(i32, @intCast(buffers[1].iov_len)),
- .flags = 0,
- }, cqe_read);
-
- try testing.expectEqualSlices(u8, "\x00\x00\x00", buffers[1].iov_base[0..3]);
- try testing.expectEqualSlices(u8, "foobar", buffers[1].iov_base[3..9]);
- try testing.expectEqualSlices(u8, "zz", buffers[1].iov_base[9..11]);
-}
-
-test "openat" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
-
- const path = "test_io_uring_openat";
-
- // Workaround for LLVM bug: https://github.com/ziglang/zig/issues/12014
- const path_addr = if (builtin.zig_backend == .stage2_llvm) p: {
- var workaround = path;
- _ = &workaround;
- break :p @intFromPtr(workaround);
- } else @intFromPtr(path);
-
- const flags: linux.O = .{ .CLOEXEC = true, .ACCMODE = .RDWR, .CREAT = true };
- const mode: os.mode_t = 0o666;
- const sqe_openat = try ring.openat(0x33333333, tmp.dir.fd, path, flags, mode);
- try testing.expectEqual(linux.io_uring_sqe{
- .opcode = .OPENAT,
- .flags = 0,
- .ioprio = 0,
- .fd = tmp.dir.fd,
- .off = 0,
- .addr = path_addr,
- .len = mode,
- .rw_flags = @bitCast(flags),
- .user_data = 0x33333333,
- .buf_index = 0,
- .personality = 0,
- .splice_fd_in = 0,
- .addr3 = 0,
- .resv = 0,
- }, sqe_openat.*);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe_openat = try ring.copy_cqe();
- try testing.expectEqual(@as(u64, 0x33333333), cqe_openat.user_data);
- if (cqe_openat.err() == .INVAL) return error.SkipZigTest;
- if (cqe_openat.err() == .BADF) return error.SkipZigTest;
- if (cqe_openat.res <= 0) std.debug.print("\ncqe_openat.res={}\n", .{cqe_openat.res});
- try testing.expect(cqe_openat.res > 0);
- try testing.expectEqual(@as(u32, 0), cqe_openat.flags);
-
- os.close(cqe_openat.res);
-}
-
-test "close" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
-
- const path = "test_io_uring_close";
- const file = try tmp.dir.createFile(path, .{});
- errdefer file.close();
-
- const sqe_close = try ring.close(0x44444444, file.handle);
- try testing.expectEqual(linux.IORING_OP.CLOSE, sqe_close.opcode);
- try testing.expectEqual(file.handle, sqe_close.fd);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe_close = try ring.copy_cqe();
- if (cqe_close.err() == .INVAL) return error.SkipZigTest;
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x44444444,
- .res = 0,
- .flags = 0,
- }, cqe_close);
-}
-
-test "accept/connect/send/recv" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(16, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const socket_test_harness = try createSocketTestHarness(&ring);
- defer socket_test_harness.close();
-
- const buffer_send = [_]u8{ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 };
- var buffer_recv = [_]u8{ 0, 1, 0, 1, 0 };
-
- const send = try ring.send(0xeeeeeeee, socket_test_harness.client, buffer_send[0..], 0);
- send.flags |= linux.IOSQE_IO_LINK;
- _ = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, 0);
- try testing.expectEqual(@as(u32, 2), try ring.submit());
-
- const cqe_send = try ring.copy_cqe();
- if (cqe_send.err() == .INVAL) return error.SkipZigTest;
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xeeeeeeee,
- .res = buffer_send.len,
- .flags = 0,
- }, cqe_send);
-
- const cqe_recv = try ring.copy_cqe();
- if (cqe_recv.err() == .INVAL) return error.SkipZigTest;
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xffffffff,
- .res = buffer_recv.len,
- // ignore IORING_CQE_F_SOCK_NONEMPTY since it is only set on some systems
- .flags = cqe_recv.flags & linux.IORING_CQE_F_SOCK_NONEMPTY,
- }, cqe_recv);
-
- try testing.expectEqualSlices(u8, buffer_send[0..buffer_recv.len], buffer_recv[0..]);
-}
-
-test "sendmsg/recvmsg" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(2, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var address_server = try net.Address.parseIp4("127.0.0.1", 0);
-
- const server = try os.socket(address_server.any.family, os.SOCK.DGRAM, 0);
- defer os.close(server);
- try os.setsockopt(server, os.SOL.SOCKET, os.SO.REUSEPORT, &mem.toBytes(@as(c_int, 1)));
- try os.setsockopt(server, os.SOL.SOCKET, os.SO.REUSEADDR, &mem.toBytes(@as(c_int, 1)));
- try os.bind(server, &address_server.any, address_server.getOsSockLen());
-
- // set address_server to the OS-chosen IP/port.
- var slen: os.socklen_t = address_server.getOsSockLen();
- try os.getsockname(server, &address_server.any, &slen);
-
- const client = try os.socket(address_server.any.family, os.SOCK.DGRAM, 0);
- defer os.close(client);
-
- const buffer_send = [_]u8{42} ** 128;
- const iovecs_send = [_]os.iovec_const{
- os.iovec_const{ .iov_base = &buffer_send, .iov_len = buffer_send.len },
- };
- const msg_send = os.msghdr_const{
- .name = &address_server.any,
- .namelen = address_server.getOsSockLen(),
- .iov = &iovecs_send,
- .iovlen = 1,
- .control = null,
- .controllen = 0,
- .flags = 0,
- };
- const sqe_sendmsg = try ring.sendmsg(0x11111111, client, &msg_send, 0);
- sqe_sendmsg.flags |= linux.IOSQE_IO_LINK;
- try testing.expectEqual(linux.IORING_OP.SENDMSG, sqe_sendmsg.opcode);
- try testing.expectEqual(client, sqe_sendmsg.fd);
-
- var buffer_recv = [_]u8{0} ** 128;
- var iovecs_recv = [_]os.iovec{
- os.iovec{ .iov_base = &buffer_recv, .iov_len = buffer_recv.len },
- };
- const addr = [_]u8{0} ** 4;
- var address_recv = net.Address.initIp4(addr, 0);
- var msg_recv: os.msghdr = os.msghdr{
- .name = &address_recv.any,
- .namelen = address_recv.getOsSockLen(),
- .iov = &iovecs_recv,
- .iovlen = 1,
- .control = null,
- .controllen = 0,
- .flags = 0,
- };
- const sqe_recvmsg = try ring.recvmsg(0x22222222, server, &msg_recv, 0);
- try testing.expectEqual(linux.IORING_OP.RECVMSG, sqe_recvmsg.opcode);
- try testing.expectEqual(server, sqe_recvmsg.fd);
-
- try testing.expectEqual(@as(u32, 2), ring.sq_ready());
- try testing.expectEqual(@as(u32, 2), try ring.submit_and_wait(2));
- try testing.expectEqual(@as(u32, 0), ring.sq_ready());
- try testing.expectEqual(@as(u32, 2), ring.cq_ready());
-
- const cqe_sendmsg = try ring.copy_cqe();
- if (cqe_sendmsg.res == -@as(i32, @intFromEnum(linux.E.INVAL))) return error.SkipZigTest;
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x11111111,
- .res = buffer_send.len,
- .flags = 0,
- }, cqe_sendmsg);
-
- const cqe_recvmsg = try ring.copy_cqe();
- if (cqe_recvmsg.res == -@as(i32, @intFromEnum(linux.E.INVAL))) return error.SkipZigTest;
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x22222222,
- .res = buffer_recv.len,
- // ignore IORING_CQE_F_SOCK_NONEMPTY since it is set non-deterministically
- .flags = cqe_recvmsg.flags & linux.IORING_CQE_F_SOCK_NONEMPTY,
- }, cqe_recvmsg);
-
- try testing.expectEqualSlices(u8, buffer_send[0..buffer_recv.len], buffer_recv[0..]);
-}
-
-test "timeout (after a relative time)" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const ms = 10;
- const margin = 5;
- const ts = os.linux.kernel_timespec{ .tv_sec = 0, .tv_nsec = ms * 1000000 };
-
- const started = std.time.milliTimestamp();
- const sqe = try ring.timeout(0x55555555, &ts, 0, 0);
- try testing.expectEqual(linux.IORING_OP.TIMEOUT, sqe.opcode);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- const cqe = try ring.copy_cqe();
- const stopped = std.time.milliTimestamp();
-
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x55555555,
- .res = -@as(i32, @intFromEnum(linux.E.TIME)),
- .flags = 0,
- }, cqe);
-
- // Tests should not depend on timings: skip test if outside margin.
- if (!std.math.approxEqAbs(f64, ms, @as(f64, @floatFromInt(stopped - started)), margin)) return error.SkipZigTest;
-}
-
-test "timeout (after a number of completions)" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(2, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const ts = os.linux.kernel_timespec{ .tv_sec = 3, .tv_nsec = 0 };
- const count_completions: u64 = 1;
- const sqe_timeout = try ring.timeout(0x66666666, &ts, count_completions, 0);
- try testing.expectEqual(linux.IORING_OP.TIMEOUT, sqe_timeout.opcode);
- try testing.expectEqual(count_completions, sqe_timeout.off);
- _ = try ring.nop(0x77777777);
- try testing.expectEqual(@as(u32, 2), try ring.submit());
-
- const cqe_nop = try ring.copy_cqe();
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x77777777,
- .res = 0,
- .flags = 0,
- }, cqe_nop);
-
- const cqe_timeout = try ring.copy_cqe();
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x66666666,
- .res = 0,
- .flags = 0,
- }, cqe_timeout);
-}
-
-test "timeout_remove" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(2, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const ts = os.linux.kernel_timespec{ .tv_sec = 3, .tv_nsec = 0 };
- const sqe_timeout = try ring.timeout(0x88888888, &ts, 0, 0);
- try testing.expectEqual(linux.IORING_OP.TIMEOUT, sqe_timeout.opcode);
- try testing.expectEqual(@as(u64, 0x88888888), sqe_timeout.user_data);
-
- const sqe_timeout_remove = try ring.timeout_remove(0x99999999, 0x88888888, 0);
- try testing.expectEqual(linux.IORING_OP.TIMEOUT_REMOVE, sqe_timeout_remove.opcode);
- try testing.expectEqual(@as(u64, 0x88888888), sqe_timeout_remove.addr);
- try testing.expectEqual(@as(u64, 0x99999999), sqe_timeout_remove.user_data);
-
- try testing.expectEqual(@as(u32, 2), try ring.submit());
-
- // The order in which the CQE arrive is not clearly documented and it changed with kernel 5.18:
- // * kernel 5.10 gives user data 0x88888888 first, 0x99999999 second
- // * kernel 5.18 gives user data 0x99999999 first, 0x88888888 second
-
- var cqes: [2]os.linux.io_uring_cqe = undefined;
- cqes[0] = try ring.copy_cqe();
- cqes[1] = try ring.copy_cqe();
-
- for (cqes) |cqe| {
- // IORING_OP_TIMEOUT_REMOVE is not supported by this kernel version:
- // Timeout remove operations set the fd to -1, which results in EBADF before EINVAL.
- // We use IORING_FEAT_RW_CUR_POS as a safety check here to make sure we are at least pre-5.6.
- // We don't want to skip this test for newer kernels.
- if (cqe.user_data == 0x99999999 and
- cqe.err() == .BADF and
- (ring.features & linux.IORING_FEAT_RW_CUR_POS) == 0)
- {
- return error.SkipZigTest;
- }
-
- try testing.expect(cqe.user_data == 0x88888888 or cqe.user_data == 0x99999999);
-
- if (cqe.user_data == 0x88888888) {
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x88888888,
- .res = -@as(i32, @intFromEnum(linux.E.CANCELED)),
- .flags = 0,
- }, cqe);
- } else if (cqe.user_data == 0x99999999) {
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x99999999,
- .res = 0,
- .flags = 0,
- }, cqe);
- }
- }
-}
-
-test "accept/connect/recv/link_timeout" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(16, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const socket_test_harness = try createSocketTestHarness(&ring);
- defer socket_test_harness.close();
-
- var buffer_recv = [_]u8{ 0, 1, 0, 1, 0 };
-
- const sqe_recv = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, 0);
- sqe_recv.flags |= linux.IOSQE_IO_LINK;
-
- const ts = os.linux.kernel_timespec{ .tv_sec = 0, .tv_nsec = 1000000 };
- _ = try ring.link_timeout(0x22222222, &ts, 0);
-
- const nr_wait = try ring.submit();
- try testing.expectEqual(@as(u32, 2), nr_wait);
-
- var i: usize = 0;
- while (i < nr_wait) : (i += 1) {
- const cqe = try ring.copy_cqe();
- switch (cqe.user_data) {
- 0xffffffff => {
- if (cqe.res != -@as(i32, @intFromEnum(linux.E.INTR)) and
- cqe.res != -@as(i32, @intFromEnum(linux.E.CANCELED)))
- {
- std.debug.print("Req 0x{x} got {d}\n", .{ cqe.user_data, cqe.res });
- try testing.expect(false);
- }
- },
- 0x22222222 => {
- if (cqe.res != -@as(i32, @intFromEnum(linux.E.ALREADY)) and
- cqe.res != -@as(i32, @intFromEnum(linux.E.TIME)))
- {
- std.debug.print("Req 0x{x} got {d}\n", .{ cqe.user_data, cqe.res });
- try testing.expect(false);
- }
- },
- else => @panic("should not happen"),
- }
- }
-}
-
-test "fallocate" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
-
- const path = "test_io_uring_fallocate";
- const file = try tmp.dir.createFile(path, .{ .truncate = true, .mode = 0o666 });
- defer file.close();
-
- try testing.expectEqual(@as(u64, 0), (try file.stat()).size);
-
- const len: u64 = 65536;
- const sqe = try ring.fallocate(0xaaaaaaaa, file.handle, 0, 0, len);
- try testing.expectEqual(linux.IORING_OP.FALLOCATE, sqe.opcode);
- try testing.expectEqual(file.handle, sqe.fd);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- // This kernel's io_uring does not yet implement fallocate():
- .INVAL => return error.SkipZigTest,
- // This kernel does not implement fallocate():
- .NOSYS => return error.SkipZigTest,
- // The filesystem containing the file referred to by fd does not support this operation;
- // or the mode is not supported by the filesystem containing the file referred to by fd:
- .OPNOTSUPP => return error.SkipZigTest,
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xaaaaaaaa,
- .res = 0,
- .flags = 0,
- }, cqe);
-
- try testing.expectEqual(len, (try file.stat()).size);
-}
-
-test "statx" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
- const path = "test_io_uring_statx";
- const file = try tmp.dir.createFile(path, .{ .truncate = true, .mode = 0o666 });
- defer file.close();
-
- try testing.expectEqual(@as(u64, 0), (try file.stat()).size);
-
- try file.writeAll("foobar");
-
- var buf: linux.Statx = undefined;
- const sqe = try ring.statx(
- 0xaaaaaaaa,
- tmp.dir.fd,
- path,
- 0,
- linux.STATX_SIZE,
- &buf,
- );
- try testing.expectEqual(linux.IORING_OP.STATX, sqe.opcode);
- try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- // This kernel's io_uring does not yet implement statx():
- .INVAL => return error.SkipZigTest,
- // This kernel does not implement statx():
- .NOSYS => return error.SkipZigTest,
- // The filesystem containing the file referred to by fd does not support this operation;
- // or the mode is not supported by the filesystem containing the file referred to by fd:
- .OPNOTSUPP => return error.SkipZigTest,
- // not supported on older kernels (5.4)
- .BADF => return error.SkipZigTest,
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xaaaaaaaa,
- .res = 0,
- .flags = 0,
- }, cqe);
-
- try testing.expect(buf.mask & os.linux.STATX_SIZE == os.linux.STATX_SIZE);
- try testing.expectEqual(@as(u64, 6), buf.size);
-}
-
-test "accept/connect/recv/cancel" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(16, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const socket_test_harness = try createSocketTestHarness(&ring);
- defer socket_test_harness.close();
-
- var buffer_recv = [_]u8{ 0, 1, 0, 1, 0 };
-
- _ = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const sqe_cancel = try ring.cancel(0x99999999, 0xffffffff, 0);
- try testing.expectEqual(linux.IORING_OP.ASYNC_CANCEL, sqe_cancel.opcode);
- try testing.expectEqual(@as(u64, 0xffffffff), sqe_cancel.addr);
- try testing.expectEqual(@as(u64, 0x99999999), sqe_cancel.user_data);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- var cqe_recv = try ring.copy_cqe();
- if (cqe_recv.err() == .INVAL) return error.SkipZigTest;
- var cqe_cancel = try ring.copy_cqe();
- if (cqe_cancel.err() == .INVAL) return error.SkipZigTest;
-
- // The recv/cancel CQEs may arrive in any order, the recv CQE will sometimes come first:
- if (cqe_recv.user_data == 0x99999999 and cqe_cancel.user_data == 0xffffffff) {
- const a = cqe_recv;
- const b = cqe_cancel;
- cqe_recv = b;
- cqe_cancel = a;
- }
-
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xffffffff,
- .res = -@as(i32, @intFromEnum(linux.E.CANCELED)),
- .flags = 0,
- }, cqe_recv);
-
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x99999999,
- .res = 0,
- .flags = 0,
- }, cqe_cancel);
-}
-
-test "register_files_update" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const fd = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
- defer os.close(fd);
-
- var registered_fds = [_]os.fd_t{0} ** 2;
- const fd_index = 0;
- const fd_index2 = 1;
- registered_fds[fd_index] = fd;
- registered_fds[fd_index2] = -1;
-
- ring.register_files(registered_fds[0..]) catch |err| switch (err) {
- // Happens when the kernel doesn't support sparse entry (-1) in the file descriptors array.
- error.FileDescriptorInvalid => return error.SkipZigTest,
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- };
-
- // Test IORING_REGISTER_FILES_UPDATE
- // Only available since Linux 5.5
-
- const fd2 = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
- defer os.close(fd2);
-
- registered_fds[fd_index] = fd2;
- registered_fds[fd_index2] = -1;
- try ring.register_files_update(0, registered_fds[0..]);
-
- var buffer = [_]u8{42} ** 128;
- {
- const sqe = try ring.read(0xcccccccc, fd_index, .{ .buffer = &buffer }, 0);
- try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode);
- sqe.flags |= linux.IOSQE_FIXED_FILE;
-
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xcccccccc,
- .res = buffer.len,
- .flags = 0,
- }, try ring.copy_cqe());
- try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer.len), buffer[0..]);
- }
-
- // Test with a non-zero offset
-
- registered_fds[fd_index] = -1;
- registered_fds[fd_index2] = -1;
- try ring.register_files_update(1, registered_fds[1..]);
-
- {
- // Next read should still work since fd_index in the registered file descriptors hasn't been updated yet.
- const sqe = try ring.read(0xcccccccc, fd_index, .{ .buffer = &buffer }, 0);
- try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode);
- sqe.flags |= linux.IOSQE_FIXED_FILE;
-
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xcccccccc,
- .res = buffer.len,
- .flags = 0,
- }, try ring.copy_cqe());
- try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer.len), buffer[0..]);
- }
-
- try ring.register_files_update(0, registered_fds[0..]);
-
- {
- // Now this should fail since both fds are sparse (-1)
- const sqe = try ring.read(0xcccccccc, fd_index, .{ .buffer = &buffer }, 0);
- try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode);
- sqe.flags |= linux.IOSQE_FIXED_FILE;
-
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- const cqe = try ring.copy_cqe();
- try testing.expectEqual(os.linux.E.BADF, cqe.err());
- }
-
- try ring.unregister_files();
-}
-
-test "shutdown" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(16, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var address = try net.Address.parseIp4("127.0.0.1", 0);
-
- // Socket bound, expect shutdown to work
- {
- const server = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
- defer os.close(server);
- try os.setsockopt(server, os.SOL.SOCKET, os.SO.REUSEADDR, &mem.toBytes(@as(c_int, 1)));
- try os.bind(server, &address.any, address.getOsSockLen());
- try os.listen(server, 1);
-
- // set address to the OS-chosen IP/port.
- var slen: os.socklen_t = address.getOsSockLen();
- try os.getsockname(server, &address.any, &slen);
-
- const shutdown_sqe = try ring.shutdown(0x445445445, server, os.linux.SHUT.RD);
- try testing.expectEqual(linux.IORING_OP.SHUTDOWN, shutdown_sqe.opcode);
- try testing.expectEqual(@as(i32, server), shutdown_sqe.fd);
-
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- // This kernel's io_uring does not yet implement shutdown (kernel version < 5.11)
- .INVAL => return error.SkipZigTest,
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
-
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x445445445,
- .res = 0,
- .flags = 0,
- }, cqe);
- }
-
- // Socket not bound, expect to fail with ENOTCONN
- {
- const server = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
- defer os.close(server);
-
- const shutdown_sqe = ring.shutdown(0x445445445, server, os.linux.SHUT.RD) catch |err| switch (err) {
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- };
- try testing.expectEqual(linux.IORING_OP.SHUTDOWN, shutdown_sqe.opcode);
- try testing.expectEqual(@as(i32, server), shutdown_sqe.fd);
-
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- try testing.expectEqual(@as(u64, 0x445445445), cqe.user_data);
- try testing.expectEqual(os.linux.E.NOTCONN, cqe.err());
- }
-}
-
-test "renameat" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const old_path = "test_io_uring_renameat_old";
- const new_path = "test_io_uring_renameat_new";
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
-
- // Write old file with data
-
- const old_file = try tmp.dir.createFile(old_path, .{ .truncate = true, .mode = 0o666 });
- defer old_file.close();
- try old_file.writeAll("hello");
-
- // Submit renameat
-
- const sqe = try ring.renameat(
- 0x12121212,
- tmp.dir.fd,
- old_path,
- tmp.dir.fd,
- new_path,
- 0,
- );
- try testing.expectEqual(linux.IORING_OP.RENAMEAT, sqe.opcode);
- try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd);
- try testing.expectEqual(@as(i32, tmp.dir.fd), @as(i32, @bitCast(sqe.len)));
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- // This kernel's io_uring does not yet implement renameat (kernel version < 5.11)
- .BADF, .INVAL => return error.SkipZigTest,
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x12121212,
- .res = 0,
- .flags = 0,
- }, cqe);
-
- // Validate that the old file doesn't exist anymore
- {
- _ = tmp.dir.openFile(old_path, .{}) catch |err| switch (err) {
- error.FileNotFound => {},
- else => std.debug.panic("unexpected error: {}", .{err}),
- };
- }
-
- // Validate that the new file exists with the proper content
- {
- const new_file = try tmp.dir.openFile(new_path, .{});
- defer new_file.close();
-
- var new_file_data: [16]u8 = undefined;
- const read = try new_file.readAll(&new_file_data);
- try testing.expectEqualStrings("hello", new_file_data[0..read]);
- }
-}
-
-test "unlinkat" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const path = "test_io_uring_unlinkat";
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
-
- // Write old file with data
-
- const file = try tmp.dir.createFile(path, .{ .truncate = true, .mode = 0o666 });
- defer file.close();
-
- // Submit unlinkat
-
- const sqe = try ring.unlinkat(
- 0x12121212,
- tmp.dir.fd,
- path,
- 0,
- );
- try testing.expectEqual(linux.IORING_OP.UNLINKAT, sqe.opcode);
- try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- // This kernel's io_uring does not yet implement unlinkat (kernel version < 5.11)
- .BADF, .INVAL => return error.SkipZigTest,
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x12121212,
- .res = 0,
- .flags = 0,
- }, cqe);
-
- // Validate that the file doesn't exist anymore
- _ = tmp.dir.openFile(path, .{}) catch |err| switch (err) {
- error.FileNotFound => {},
- else => std.debug.panic("unexpected error: {}", .{err}),
- };
-}
-
-test "mkdirat" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
-
- const path = "test_io_uring_mkdirat";
-
- // Submit mkdirat
-
- const sqe = try ring.mkdirat(
- 0x12121212,
- tmp.dir.fd,
- path,
- 0o0755,
- );
- try testing.expectEqual(linux.IORING_OP.MKDIRAT, sqe.opcode);
- try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- // This kernel's io_uring does not yet implement mkdirat (kernel version < 5.15)
- .BADF, .INVAL => return error.SkipZigTest,
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x12121212,
- .res = 0,
- .flags = 0,
- }, cqe);
-
- // Validate that the directory exist
- _ = try tmp.dir.openDir(path, .{});
-}
-
-test "symlinkat" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
-
- const path = "test_io_uring_symlinkat";
- const link_path = "test_io_uring_symlinkat_link";
-
- const file = try tmp.dir.createFile(path, .{ .truncate = true, .mode = 0o666 });
- defer file.close();
-
- // Submit symlinkat
-
- const sqe = try ring.symlinkat(
- 0x12121212,
- path,
- tmp.dir.fd,
- link_path,
- );
- try testing.expectEqual(linux.IORING_OP.SYMLINKAT, sqe.opcode);
- try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- // This kernel's io_uring does not yet implement symlinkat (kernel version < 5.15)
- .BADF, .INVAL => return error.SkipZigTest,
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x12121212,
- .res = 0,
- .flags = 0,
- }, cqe);
-
- // Validate that the symlink exist
- _ = try tmp.dir.openFile(link_path, .{});
-}
-
-test "linkat" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
-
- const first_path = "test_io_uring_linkat_first";
- const second_path = "test_io_uring_linkat_second";
-
- // Write file with data
-
- const first_file = try tmp.dir.createFile(first_path, .{ .truncate = true, .mode = 0o666 });
- defer first_file.close();
- try first_file.writeAll("hello");
-
- // Submit linkat
-
- const sqe = try ring.linkat(
- 0x12121212,
- tmp.dir.fd,
- first_path,
- tmp.dir.fd,
- second_path,
- 0,
- );
- try testing.expectEqual(linux.IORING_OP.LINKAT, sqe.opcode);
- try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd);
- try testing.expectEqual(@as(i32, tmp.dir.fd), @as(i32, @bitCast(sqe.len)));
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- // This kernel's io_uring does not yet implement linkat (kernel version < 5.15)
- .BADF, .INVAL => return error.SkipZigTest,
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0x12121212,
- .res = 0,
- .flags = 0,
- }, cqe);
-
- // Validate the second file
- const second_file = try tmp.dir.openFile(second_path, .{});
- defer second_file.close();
-
- var second_file_data: [16]u8 = undefined;
- const read = try second_file.readAll(&second_file_data);
- try testing.expectEqualStrings("hello", second_file_data[0..read]);
-}
-
-test "provide_buffers: read" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const fd = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
- defer os.close(fd);
-
- const group_id = 1337;
- const buffer_id = 0;
-
- const buffer_len = 128;
-
- var buffers: [4][buffer_len]u8 = undefined;
-
- // Provide 4 buffers
-
- {
- const sqe = try ring.provide_buffers(0xcccccccc, @as([*]u8, @ptrCast(&buffers)), buffer_len, buffers.len, group_id, buffer_id);
- try testing.expectEqual(linux.IORING_OP.PROVIDE_BUFFERS, sqe.opcode);
- try testing.expectEqual(@as(i32, buffers.len), sqe.fd);
- try testing.expectEqual(@as(u32, buffers[0].len), sqe.len);
- try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- // Happens when the kernel is < 5.7
- .INVAL => return error.SkipZigTest,
- .SUCCESS => {},
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(@as(u64, 0xcccccccc), cqe.user_data);
- }
-
- // Do 4 reads which should consume all buffers
-
- var i: usize = 0;
- while (i < buffers.len) : (i += 1) {
- const sqe = try ring.read(0xdededede, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
- try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode);
- try testing.expectEqual(@as(i32, fd), sqe.fd);
- try testing.expectEqual(@as(u64, 0), sqe.addr);
- try testing.expectEqual(@as(u32, buffer_len), sqe.len);
- try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
-
- try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER);
- const used_buffer_id = cqe.flags >> 16;
- try testing.expect(used_buffer_id >= 0 and used_buffer_id <= 3);
- try testing.expectEqual(@as(i32, buffer_len), cqe.res);
-
- try testing.expectEqual(@as(u64, 0xdededede), cqe.user_data);
- try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer_len), buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))]);
- }
-
- // This read should fail
-
- {
- const sqe = try ring.read(0xdfdfdfdf, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
- try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode);
- try testing.expectEqual(@as(i32, fd), sqe.fd);
- try testing.expectEqual(@as(u64, 0), sqe.addr);
- try testing.expectEqual(@as(u32, buffer_len), sqe.len);
- try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- // Expected
- .NOBUFS => {},
- .SUCCESS => std.debug.panic("unexpected success", .{}),
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data);
- }
-
- // Provide 1 buffer again
-
- // Deliberately put something we don't expect in the buffers
- @memset(mem.sliceAsBytes(&buffers), 42);
-
- const reprovided_buffer_id = 2;
-
- {
- _ = try ring.provide_buffers(0xabababab, @as([*]u8, @ptrCast(&buffers[reprovided_buffer_id])), buffer_len, 1, group_id, reprovided_buffer_id);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- }
-
- // Final read which should work
-
- {
- const sqe = try ring.read(0xdfdfdfdf, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
- try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode);
- try testing.expectEqual(@as(i32, fd), sqe.fd);
- try testing.expectEqual(@as(u64, 0), sqe.addr);
- try testing.expectEqual(@as(u32, buffer_len), sqe.len);
- try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
-
- try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER);
- const used_buffer_id = cqe.flags >> 16;
- try testing.expectEqual(used_buffer_id, reprovided_buffer_id);
- try testing.expectEqual(@as(i32, buffer_len), cqe.res);
- try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data);
- try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer_len), buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))]);
- }
-}
-
-test "remove_buffers" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const fd = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
- defer os.close(fd);
-
- const group_id = 1337;
- const buffer_id = 0;
-
- const buffer_len = 128;
-
- var buffers: [4][buffer_len]u8 = undefined;
-
- // Provide 4 buffers
-
- {
- _ = try ring.provide_buffers(0xcccccccc, @as([*]u8, @ptrCast(&buffers)), buffer_len, buffers.len, group_id, buffer_id);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .INVAL => return error.SkipZigTest,
- .SUCCESS => {},
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(@as(u64, 0xcccccccc), cqe.user_data);
- }
-
- // Remove 3 buffers
-
- {
- const sqe = try ring.remove_buffers(0xbababababa, 3, group_id);
- try testing.expectEqual(linux.IORING_OP.REMOVE_BUFFERS, sqe.opcode);
- try testing.expectEqual(@as(i32, 3), sqe.fd);
- try testing.expectEqual(@as(u64, 0), sqe.addr);
- try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(@as(u64, 0xbababababa), cqe.user_data);
- }
-
- // This read should work
-
- {
- _ = try ring.read(0xdfdfdfdf, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
-
- try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER);
- const used_buffer_id = cqe.flags >> 16;
- try testing.expect(used_buffer_id >= 0 and used_buffer_id < 4);
- try testing.expectEqual(@as(i32, buffer_len), cqe.res);
- try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data);
- try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer_len), buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))]);
- }
-
- // Final read should _not_ work
-
- {
- _ = try ring.read(0xdfdfdfdf, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- // Expected
- .NOBUFS => {},
- .SUCCESS => std.debug.panic("unexpected success", .{}),
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- }
-}
-
-test "provide_buffers: accept/connect/send/recv" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(16, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const group_id = 1337;
- const buffer_id = 0;
-
- const buffer_len = 128;
- var buffers: [4][buffer_len]u8 = undefined;
-
- // Provide 4 buffers
-
- {
- const sqe = try ring.provide_buffers(0xcccccccc, @as([*]u8, @ptrCast(&buffers)), buffer_len, buffers.len, group_id, buffer_id);
- try testing.expectEqual(linux.IORING_OP.PROVIDE_BUFFERS, sqe.opcode);
- try testing.expectEqual(@as(i32, buffers.len), sqe.fd);
- try testing.expectEqual(@as(u32, buffer_len), sqe.len);
- try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- // Happens when the kernel is < 5.7
- .INVAL => return error.SkipZigTest,
- // Happens on the kernel 5.4
- .BADF => return error.SkipZigTest,
- .SUCCESS => {},
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(@as(u64, 0xcccccccc), cqe.user_data);
- }
-
- const socket_test_harness = try createSocketTestHarness(&ring);
- defer socket_test_harness.close();
-
- // Do 4 send on the socket
-
- {
- var i: usize = 0;
- while (i < buffers.len) : (i += 1) {
- _ = try ring.send(0xdeaddead, socket_test_harness.server, &([_]u8{'z'} ** buffer_len), 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- }
-
- var cqes: [4]linux.io_uring_cqe = undefined;
- try testing.expectEqual(@as(u32, 4), try ring.copy_cqes(&cqes, 4));
- }
-
- // Do 4 recv which should consume all buffers
-
- // Deliberately put something we don't expect in the buffers
- @memset(mem.sliceAsBytes(&buffers), 1);
-
- var i: usize = 0;
- while (i < buffers.len) : (i += 1) {
- const sqe = try ring.recv(0xdededede, socket_test_harness.client, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
- try testing.expectEqual(linux.IORING_OP.RECV, sqe.opcode);
- try testing.expectEqual(@as(i32, socket_test_harness.client), sqe.fd);
- try testing.expectEqual(@as(u64, 0), sqe.addr);
- try testing.expectEqual(@as(u32, buffer_len), sqe.len);
- try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
- try testing.expectEqual(@as(u32, 0), sqe.rw_flags);
- try testing.expectEqual(@as(u32, linux.IOSQE_BUFFER_SELECT), sqe.flags);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
-
- try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER);
- const used_buffer_id = cqe.flags >> 16;
- try testing.expect(used_buffer_id >= 0 and used_buffer_id <= 3);
- try testing.expectEqual(@as(i32, buffer_len), cqe.res);
-
- try testing.expectEqual(@as(u64, 0xdededede), cqe.user_data);
- const buffer = buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))];
- try testing.expectEqualSlices(u8, &([_]u8{'z'} ** buffer_len), buffer);
- }
-
- // This recv should fail
-
- {
- const sqe = try ring.recv(0xdfdfdfdf, socket_test_harness.client, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
- try testing.expectEqual(linux.IORING_OP.RECV, sqe.opcode);
- try testing.expectEqual(@as(i32, socket_test_harness.client), sqe.fd);
- try testing.expectEqual(@as(u64, 0), sqe.addr);
- try testing.expectEqual(@as(u32, buffer_len), sqe.len);
- try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
- try testing.expectEqual(@as(u32, 0), sqe.rw_flags);
- try testing.expectEqual(@as(u32, linux.IOSQE_BUFFER_SELECT), sqe.flags);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- // Expected
- .NOBUFS => {},
- .SUCCESS => std.debug.panic("unexpected success", .{}),
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data);
- }
-
- // Provide 1 buffer again
-
- const reprovided_buffer_id = 2;
-
- {
- _ = try ring.provide_buffers(0xabababab, @as([*]u8, @ptrCast(&buffers[reprovided_buffer_id])), buffer_len, 1, group_id, reprovided_buffer_id);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
- }
-
- // Redo 1 send on the server socket
-
- {
- _ = try ring.send(0xdeaddead, socket_test_harness.server, &([_]u8{'w'} ** buffer_len), 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- _ = try ring.copy_cqe();
- }
-
- // Final recv which should work
-
- // Deliberately put something we don't expect in the buffers
- @memset(mem.sliceAsBytes(&buffers), 1);
-
- {
- const sqe = try ring.recv(0xdfdfdfdf, socket_test_harness.client, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0);
- try testing.expectEqual(linux.IORING_OP.RECV, sqe.opcode);
- try testing.expectEqual(@as(i32, socket_test_harness.client), sqe.fd);
- try testing.expectEqual(@as(u64, 0), sqe.addr);
- try testing.expectEqual(@as(u32, buffer_len), sqe.len);
- try testing.expectEqual(@as(u16, group_id), sqe.buf_index);
- try testing.expectEqual(@as(u32, 0), sqe.rw_flags);
- try testing.expectEqual(@as(u32, linux.IOSQE_BUFFER_SELECT), sqe.flags);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- const cqe = try ring.copy_cqe();
- switch (cqe.err()) {
- .SUCCESS => {},
- else => |errno| std.debug.panic("unhandled errno: {}", .{errno}),
- }
-
- try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER);
- const used_buffer_id = cqe.flags >> 16;
- try testing.expectEqual(used_buffer_id, reprovided_buffer_id);
- try testing.expectEqual(@as(i32, buffer_len), cqe.res);
- try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data);
- const buffer = buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))];
- try testing.expectEqualSlices(u8, &([_]u8{'w'} ** buffer_len), buffer);
- }
-}
-
-/// Used for testing server/client interactions.
-const SocketTestHarness = struct {
- listener: os.socket_t,
- server: os.socket_t,
- client: os.socket_t,
-
- fn close(self: SocketTestHarness) void {
- posix.close(self.client);
- posix.close(self.listener);
- }
-};
-
-fn createSocketTestHarness(ring: *IO_Uring) !SocketTestHarness {
- // Create a TCP server socket
- var address = try net.Address.parseIp4("127.0.0.1", 0);
- const listener_socket = try createListenerSocket(&address);
- errdefer posix.close(listener_socket);
-
- // Submit 1 accept
- var accept_addr: os.sockaddr = undefined;
- var accept_addr_len: os.socklen_t = @sizeOf(@TypeOf(accept_addr));
- _ = try ring.accept(0xaaaaaaaa, listener_socket, &accept_addr, &accept_addr_len, 0);
-
- // Create a TCP client socket
- const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
- errdefer posix.close(client);
- _ = try ring.connect(0xcccccccc, client, &address.any, address.getOsSockLen());
-
- try testing.expectEqual(@as(u32, 2), try ring.submit());
-
- var cqe_accept = try ring.copy_cqe();
- if (cqe_accept.err() == .INVAL) return error.SkipZigTest;
- var cqe_connect = try ring.copy_cqe();
- if (cqe_connect.err() == .INVAL) return error.SkipZigTest;
-
- // The accept/connect CQEs may arrive in any order, the connect CQE will sometimes come first:
- if (cqe_accept.user_data == 0xcccccccc and cqe_connect.user_data == 0xaaaaaaaa) {
- const a = cqe_accept;
- const b = cqe_connect;
- cqe_accept = b;
- cqe_connect = a;
- }
-
- try testing.expectEqual(@as(u64, 0xaaaaaaaa), cqe_accept.user_data);
- if (cqe_accept.res <= 0) std.debug.print("\ncqe_accept.res={}\n", .{cqe_accept.res});
- try testing.expect(cqe_accept.res > 0);
- try testing.expectEqual(@as(u32, 0), cqe_accept.flags);
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xcccccccc,
- .res = 0,
- .flags = 0,
- }, cqe_connect);
-
- // All good
-
- return SocketTestHarness{
- .listener = listener_socket,
- .server = cqe_accept.res,
- .client = client,
- };
-}
-
-fn createListenerSocket(address: *net.Address) !os.socket_t {
- const kernel_backlog = 1;
- const listener_socket = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
- errdefer posix.close(listener_socket);
-
- try os.setsockopt(listener_socket, os.SOL.SOCKET, os.SO.REUSEADDR, &mem.toBytes(@as(c_int, 1)));
- try os.bind(listener_socket, &address.any, address.getOsSockLen());
- try os.listen(listener_socket, kernel_backlog);
-
- // set address to the OS-chosen IP/port.
- var slen: os.socklen_t = address.getOsSockLen();
- try os.getsockname(listener_socket, &address.any, &slen);
-
- return listener_socket;
-}
-
-test "accept multishot" {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var ring = IO_Uring.init(16, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var address = try net.Address.parseIp4("127.0.0.1", 0);
- const listener_socket = try createListenerSocket(&address);
- defer posix.close(listener_socket);
-
- // submit multishot accept operation
- var addr: os.sockaddr = undefined;
- var addr_len: os.socklen_t = @sizeOf(@TypeOf(addr));
- const userdata: u64 = 0xaaaaaaaa;
- _ = try ring.accept_multishot(userdata, listener_socket, &addr, &addr_len, 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- var nr: usize = 4; // number of clients to connect
- while (nr > 0) : (nr -= 1) {
- // connect client
- const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
- errdefer posix.close(client);
- try os.connect(client, &address.any, address.getOsSockLen());
-
- // test accept completion
- var cqe = try ring.copy_cqe();
- if (cqe.err() == .INVAL) return error.SkipZigTest;
- try testing.expect(cqe.res > 0);
- try testing.expect(cqe.user_data == userdata);
- try testing.expect(cqe.flags & linux.IORING_CQE_F_MORE > 0); // more flag is set
-
- posix.close(client);
- }
-}
-
-test "accept/connect/send_zc/recv" {
- try skipKernelLessThan(.{ .major = 6, .minor = 0, .patch = 0 });
-
- var ring = IO_Uring.init(16, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const socket_test_harness = try createSocketTestHarness(&ring);
- defer socket_test_harness.close();
-
- const buffer_send = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe };
- var buffer_recv = [_]u8{0} ** 10;
-
- // zero-copy send
- const send = try ring.send_zc(0xeeeeeeee, socket_test_harness.client, buffer_send[0..], 0, 0);
- send.flags |= linux.IOSQE_IO_LINK;
- _ = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, 0);
- try testing.expectEqual(@as(u32, 2), try ring.submit());
-
- // First completion of zero-copy send.
- // IORING_CQE_F_MORE, means that there
- // will be a second completion event / notification for the
- // request, with the user_data field set to the same value.
- // buffer_send must be keep alive until second cqe.
- var cqe_send = try ring.copy_cqe();
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xeeeeeeee,
- .res = buffer_send.len,
- .flags = linux.IORING_CQE_F_MORE,
- }, cqe_send);
-
- const cqe_recv = try ring.copy_cqe();
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xffffffff,
- .res = buffer_recv.len,
- .flags = cqe_recv.flags & linux.IORING_CQE_F_SOCK_NONEMPTY,
- }, cqe_recv);
-
- try testing.expectEqualSlices(u8, buffer_send[0..buffer_recv.len], buffer_recv[0..]);
-
- // Second completion of zero-copy send.
- // IORING_CQE_F_NOTIF in flags signals that kernel is done with send_buffer
- cqe_send = try ring.copy_cqe();
- try testing.expectEqual(linux.io_uring_cqe{
- .user_data = 0xeeeeeeee,
- .res = 0,
- .flags = linux.IORING_CQE_F_NOTIF,
- }, cqe_send);
-}
-
-test "accept_direct" {
- try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 });
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
- var address = try net.Address.parseIp4("127.0.0.1", 0);
-
- // register direct file descriptors
- var registered_fds = [_]os.fd_t{-1} ** 2;
- try ring.register_files(registered_fds[0..]);
-
- const listener_socket = try createListenerSocket(&address);
- defer posix.close(listener_socket);
-
- const accept_userdata: u64 = 0xaaaaaaaa;
- const read_userdata: u64 = 0xbbbbbbbb;
- const data = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe };
-
- for (0..2) |_| {
- for (registered_fds, 0..) |_, i| {
- var buffer_recv = [_]u8{0} ** 16;
- const buffer_send: []const u8 = data[0 .. data.len - i]; // make it different at each loop
-
- // submit accept, will chose registered fd and return index in cqe
- _ = try ring.accept_direct(accept_userdata, listener_socket, null, null, 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- // connect
- const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
- try os.connect(client, &address.any, address.getOsSockLen());
- defer posix.close(client);
-
- // accept completion
- const cqe_accept = try ring.copy_cqe();
- try testing.expectEqual(os.E.SUCCESS, cqe_accept.err());
- const fd_index = cqe_accept.res;
- try testing.expect(fd_index < registered_fds.len);
- try testing.expect(cqe_accept.user_data == accept_userdata);
-
- // send data
- _ = try os.send(client, buffer_send, 0);
-
- // Example of how to use registered fd:
- // Submit receive to fixed file returned by accept (fd_index).
- // Fd field is set to registered file index, returned by accept.
- // Flag linux.IOSQE_FIXED_FILE must be set.
- const recv_sqe = try ring.recv(read_userdata, fd_index, .{ .buffer = &buffer_recv }, 0);
- recv_sqe.flags |= linux.IOSQE_FIXED_FILE;
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- // accept receive
- const recv_cqe = try ring.copy_cqe();
- try testing.expect(recv_cqe.user_data == read_userdata);
- try testing.expect(recv_cqe.res == buffer_send.len);
- try testing.expectEqualSlices(u8, buffer_send, buffer_recv[0..buffer_send.len]);
- }
- // no more available fds, accept will get NFILE error
- {
- // submit accept
- _ = try ring.accept_direct(accept_userdata, listener_socket, null, null, 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- // connect
- const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
- try os.connect(client, &address.any, address.getOsSockLen());
- defer posix.close(client);
- // completion with error
- const cqe_accept = try ring.copy_cqe();
- try testing.expect(cqe_accept.user_data == accept_userdata);
- try testing.expectEqual(os.E.NFILE, cqe_accept.err());
- }
- // return file descriptors to kernel
- try ring.register_files_update(0, registered_fds[0..]);
- }
- try ring.unregister_files();
-}
-
-test "accept_multishot_direct" {
- try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 });
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var address = try net.Address.parseIp4("127.0.0.1", 0);
-
- var registered_fds = [_]os.fd_t{-1} ** 2;
- try ring.register_files(registered_fds[0..]);
-
- const listener_socket = try createListenerSocket(&address);
- defer posix.close(listener_socket);
-
- const accept_userdata: u64 = 0xaaaaaaaa;
-
- for (0..2) |_| {
- // submit multishot accept
- // Will chose registered fd and return index of the selected registered file in cqe.
- _ = try ring.accept_multishot_direct(accept_userdata, listener_socket, null, null, 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- for (registered_fds) |_| {
- // connect
- const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
- try os.connect(client, &address.any, address.getOsSockLen());
- defer posix.close(client);
-
- // accept completion
- const cqe_accept = try ring.copy_cqe();
- const fd_index = cqe_accept.res;
- try testing.expect(fd_index < registered_fds.len);
- try testing.expect(cqe_accept.user_data == accept_userdata);
- try testing.expect(cqe_accept.flags & linux.IORING_CQE_F_MORE > 0); // has more is set
- }
- // No more available fds, accept will get NFILE error.
- // Multishot is terminated (more flag is not set).
- {
- // connect
- const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
- try os.connect(client, &address.any, address.getOsSockLen());
- defer posix.close(client);
- // completion with error
- const cqe_accept = try ring.copy_cqe();
- try testing.expect(cqe_accept.user_data == accept_userdata);
- try testing.expectEqual(os.E.NFILE, cqe_accept.err());
- try testing.expect(cqe_accept.flags & linux.IORING_CQE_F_MORE == 0); // has more is not set
- }
- // return file descriptors to kernel
- try ring.register_files_update(0, registered_fds[0..]);
- }
- try ring.unregister_files();
-}
-
-test "socket" {
- try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 });
-
- var ring = IO_Uring.init(1, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- // prepare, submit socket operation
- _ = try ring.socket(0, linux.AF.INET, os.SOCK.STREAM, 0, 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
-
- // test completion
- var cqe = try ring.copy_cqe();
- try testing.expectEqual(os.E.SUCCESS, cqe.err());
- const fd: os.fd_t = @intCast(cqe.res);
- try testing.expect(fd > 2);
-
- os.close(fd);
-}
-
-test "socket_direct/socket_direct_alloc/close_direct" {
- try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 });
-
- var ring = IO_Uring.init(2, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var registered_fds = [_]os.fd_t{-1} ** 3;
- try ring.register_files(registered_fds[0..]);
-
- // create socket in registered file descriptor at index 0 (last param)
- _ = try ring.socket_direct(0, linux.AF.INET, os.SOCK.STREAM, 0, 0, 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- var cqe_socket = try ring.copy_cqe();
- try testing.expectEqual(os.E.SUCCESS, cqe_socket.err());
- try testing.expect(cqe_socket.res == 0);
-
- // create socket in registered file descriptor at index 1 (last param)
- _ = try ring.socket_direct(0, linux.AF.INET, os.SOCK.STREAM, 0, 0, 1);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- cqe_socket = try ring.copy_cqe();
- try testing.expectEqual(os.E.SUCCESS, cqe_socket.err());
- try testing.expect(cqe_socket.res == 0); // res is 0 when index is specified
-
- // create socket in kernel chosen file descriptor index (_alloc version)
- // completion res has index from registered files
- _ = try ring.socket_direct_alloc(0, linux.AF.INET, os.SOCK.STREAM, 0, 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- cqe_socket = try ring.copy_cqe();
- try testing.expectEqual(os.E.SUCCESS, cqe_socket.err());
- try testing.expect(cqe_socket.res == 2); // returns registered file index
-
- // use sockets from registered_fds in connect operation
- var address = try net.Address.parseIp4("127.0.0.1", 0);
- const listener_socket = try createListenerSocket(&address);
- defer posix.close(listener_socket);
- const accept_userdata: u64 = 0xaaaaaaaa;
- const connect_userdata: u64 = 0xbbbbbbbb;
- const close_userdata: u64 = 0xcccccccc;
- for (registered_fds, 0..) |_, fd_index| {
- // prepare accept
- _ = try ring.accept(accept_userdata, listener_socket, null, null, 0);
- // prepare connect with fixed socket
- const connect_sqe = try ring.connect(connect_userdata, @intCast(fd_index), &address.any, address.getOsSockLen());
- connect_sqe.flags |= linux.IOSQE_FIXED_FILE; // fd is fixed file index
- // submit both
- try testing.expectEqual(@as(u32, 2), try ring.submit());
- // get completions
- var cqe_connect = try ring.copy_cqe();
- var cqe_accept = try ring.copy_cqe();
- // ignore order
- if (cqe_connect.user_data == accept_userdata and cqe_accept.user_data == connect_userdata) {
- const a = cqe_accept;
- const b = cqe_connect;
- cqe_accept = b;
- cqe_connect = a;
- }
- // test connect completion
- try testing.expect(cqe_connect.user_data == connect_userdata);
- try testing.expectEqual(os.E.SUCCESS, cqe_connect.err());
- // test accept completion
- try testing.expect(cqe_accept.user_data == accept_userdata);
- try testing.expectEqual(os.E.SUCCESS, cqe_accept.err());
-
- // submit and test close_direct
- _ = try ring.close_direct(close_userdata, @intCast(fd_index));
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- var cqe_close = try ring.copy_cqe();
- try testing.expect(cqe_close.user_data == close_userdata);
- try testing.expectEqual(os.E.SUCCESS, cqe_close.err());
- }
-
- try ring.unregister_files();
-}
-
-test "openat_direct/close_direct" {
- try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 });
-
- var ring = IO_Uring.init(2, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- var registered_fds = [_]os.fd_t{-1} ** 3;
- try ring.register_files(registered_fds[0..]);
-
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
- const path = "test_io_uring_close_direct";
- const flags: linux.O = .{ .ACCMODE = .RDWR, .CREAT = true };
- const mode: os.mode_t = 0o666;
- const user_data: u64 = 0;
-
- // use registered file at index 0 (last param)
- _ = try ring.openat_direct(user_data, tmp.dir.fd, path, flags, mode, 0);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- var cqe = try ring.copy_cqe();
- try testing.expectEqual(os.E.SUCCESS, cqe.err());
- try testing.expect(cqe.res == 0);
-
- // use registered file at index 1
- _ = try ring.openat_direct(user_data, tmp.dir.fd, path, flags, mode, 1);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- cqe = try ring.copy_cqe();
- try testing.expectEqual(os.E.SUCCESS, cqe.err());
- try testing.expect(cqe.res == 0); // res is 0 when we specify index
-
- // let kernel choose registered file index
- _ = try ring.openat_direct(user_data, tmp.dir.fd, path, flags, mode, linux.IORING_FILE_INDEX_ALLOC);
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- cqe = try ring.copy_cqe();
- try testing.expectEqual(os.E.SUCCESS, cqe.err());
- try testing.expect(cqe.res == 2); // chosen index is in res
-
- // close all open file descriptors
- for (registered_fds, 0..) |_, fd_index| {
- _ = try ring.close_direct(user_data, @intCast(fd_index));
- try testing.expectEqual(@as(u32, 1), try ring.submit());
- var cqe_close = try ring.copy_cqe();
- try testing.expectEqual(os.E.SUCCESS, cqe_close.err());
- }
- try ring.unregister_files();
-}
-
-test "waitid" {
- try skipKernelLessThan(.{ .major = 6, .minor = 7, .patch = 0 });
-
- var ring = IO_Uring.init(16, 0) catch |err| switch (err) {
- error.SystemOutdated => return error.SkipZigTest,
- error.PermissionDenied => return error.SkipZigTest,
- else => return err,
- };
- defer ring.deinit();
-
- const pid = try os.fork();
- if (pid == 0) {
- os.exit(7);
- }
-
- var siginfo: os.siginfo_t = undefined;
- _ = try ring.waitid(0, .PID, pid, &siginfo, os.W.EXITED, 0);
-
- try testing.expectEqual(1, try ring.submit());
-
- const cqe_waitid = try ring.copy_cqe();
- try testing.expectEqual(0, cqe_waitid.res);
- try testing.expectEqual(pid, siginfo.fields.common.first.piduid.pid);
- try testing.expectEqual(7, siginfo.fields.common.second.sigchld.status);
-}
-
-/// For use in tests. Returns SkipZigTest is kernel version is less than required.
-inline fn skipKernelLessThan(required: std.SemanticVersion) !void {
- if (builtin.os.tag != .linux) return error.SkipZigTest;
-
- var uts: linux.utsname = undefined;
- const res = linux.uname(&uts);
- switch (linux.getErrno(res)) {
- .SUCCESS => {},
- else => |errno| return os.unexpectedErrno(errno),
- }
-
- const release = mem.sliceTo(&uts.release, 0);
- var current = try std.SemanticVersion.parse(release);
- current.pre = null; // don't check pre field
- if (required.order(current) == .gt) return error.SkipZigTest;
-}
diff --git a/lib/std/os/linux/io_uring_sqe.zig b/lib/std/os/linux/io_uring_sqe.zig
@@ -0,0 +1,579 @@
+//! Contains only the definition of `io_uring_sqe`.
+//! Split into its own file to compartmentalize the initialization methods.
+
+const std = @import("../../std.zig");
+const os = std.os;
+const linux = os.linux;
+
+pub const io_uring_sqe = extern struct {
+ opcode: linux.IORING_OP,
+ flags: u8,
+ ioprio: u16,
+ fd: i32,
+ off: u64,
+ addr: u64,
+ len: u32,
+ rw_flags: u32,
+ user_data: u64,
+ buf_index: u16,
+ personality: u16,
+ splice_fd_in: i32,
+ addr3: u64,
+ resv: u64,
+
+ pub fn prep_nop(sqe: *linux.io_uring_sqe) void {
+ sqe.* = .{
+ .opcode = .NOP,
+ .flags = 0,
+ .ioprio = 0,
+ .fd = 0,
+ .off = 0,
+ .addr = 0,
+ .len = 0,
+ .rw_flags = 0,
+ .user_data = 0,
+ .buf_index = 0,
+ .personality = 0,
+ .splice_fd_in = 0,
+ .addr3 = 0,
+ .resv = 0,
+ };
+ }
+
+ pub fn prep_fsync(sqe: *linux.io_uring_sqe, fd: os.fd_t, flags: u32) void {
+ sqe.* = .{
+ .opcode = .FSYNC,
+ .flags = 0,
+ .ioprio = 0,
+ .fd = fd,
+ .off = 0,
+ .addr = 0,
+ .len = 0,
+ .rw_flags = flags,
+ .user_data = 0,
+ .buf_index = 0,
+ .personality = 0,
+ .splice_fd_in = 0,
+ .addr3 = 0,
+ .resv = 0,
+ };
+ }
+
+ pub fn prep_rw(
+ sqe: *linux.io_uring_sqe,
+ op: linux.IORING_OP,
+ fd: os.fd_t,
+ addr: u64,
+ len: usize,
+ offset: u64,
+ ) void {
+ sqe.* = .{
+ .opcode = op,
+ .flags = 0,
+ .ioprio = 0,
+ .fd = fd,
+ .off = offset,
+ .addr = addr,
+ .len = @intCast(len),
+ .rw_flags = 0,
+ .user_data = 0,
+ .buf_index = 0,
+ .personality = 0,
+ .splice_fd_in = 0,
+ .addr3 = 0,
+ .resv = 0,
+ };
+ }
+
+ pub fn prep_read(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []u8, offset: u64) void {
+ sqe.prep_rw(.READ, fd, @intFromPtr(buffer.ptr), buffer.len, offset);
+ }
+
+ pub fn prep_write(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []const u8, offset: u64) void {
+ sqe.prep_rw(.WRITE, fd, @intFromPtr(buffer.ptr), buffer.len, offset);
+ }
+
+ pub fn prep_splice(sqe: *linux.io_uring_sqe, fd_in: os.fd_t, off_in: u64, fd_out: os.fd_t, off_out: u64, len: usize) void {
+ sqe.prep_rw(.SPLICE, fd_out, undefined, len, off_out);
+ sqe.addr = off_in;
+ sqe.splice_fd_in = fd_in;
+ }
+
+ pub fn prep_readv(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ iovecs: []const os.iovec,
+ offset: u64,
+ ) void {
+ sqe.prep_rw(.READV, fd, @intFromPtr(iovecs.ptr), iovecs.len, offset);
+ }
+
+ pub fn prep_writev(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ iovecs: []const os.iovec_const,
+ offset: u64,
+ ) void {
+ sqe.prep_rw(.WRITEV, fd, @intFromPtr(iovecs.ptr), iovecs.len, offset);
+ }
+
+ pub fn prep_read_fixed(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: *os.iovec, offset: u64, buffer_index: u16) void {
+ sqe.prep_rw(.READ_FIXED, fd, @intFromPtr(buffer.iov_base), buffer.iov_len, offset);
+ sqe.buf_index = buffer_index;
+ }
+
+ pub fn prep_write_fixed(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: *os.iovec, offset: u64, buffer_index: u16) void {
+ sqe.prep_rw(.WRITE_FIXED, fd, @intFromPtr(buffer.iov_base), buffer.iov_len, offset);
+ sqe.buf_index = buffer_index;
+ }
+
+ pub fn prep_accept(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ addr: ?*os.sockaddr,
+ addrlen: ?*os.socklen_t,
+ flags: u32,
+ ) void {
+ // `addr` holds a pointer to `sockaddr`, and `addr2` holds a pointer to socklen_t`.
+ // `addr2` maps to `sqe.off` (u64) instead of `sqe.len` (which is only a u32).
+ sqe.prep_rw(.ACCEPT, fd, @intFromPtr(addr), 0, @intFromPtr(addrlen));
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_accept_direct(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ addr: ?*os.sockaddr,
+ addrlen: ?*os.socklen_t,
+ flags: u32,
+ file_index: u32,
+ ) void {
+ prep_accept(sqe, fd, addr, addrlen, flags);
+ __io_uring_set_target_fixed_file(sqe, file_index);
+ }
+
+ pub fn prep_multishot_accept_direct(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ addr: ?*os.sockaddr,
+ addrlen: ?*os.socklen_t,
+ flags: u32,
+ ) void {
+ prep_multishot_accept(sqe, fd, addr, addrlen, flags);
+ __io_uring_set_target_fixed_file(sqe, linux.IORING_FILE_INDEX_ALLOC);
+ }
+
+ fn __io_uring_set_target_fixed_file(sqe: *linux.io_uring_sqe, file_index: u32) void {
+ const sqe_file_index: u32 = if (file_index == linux.IORING_FILE_INDEX_ALLOC)
+ linux.IORING_FILE_INDEX_ALLOC
+ else
+ // 0 means no fixed files, indexes should be encoded as "index + 1"
+ file_index + 1;
+ // This filed is overloaded in liburing:
+ // splice_fd_in: i32
+ // sqe_file_index: u32
+ sqe.splice_fd_in = @bitCast(sqe_file_index);
+ }
+
+ pub fn prep_connect(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ addr: *const os.sockaddr,
+ addrlen: os.socklen_t,
+ ) void {
+ // `addrlen` maps to `sqe.off` (u64) instead of `sqe.len` (which is only a u32).
+ sqe.prep_rw(.CONNECT, fd, @intFromPtr(addr), 0, addrlen);
+ }
+
+ pub fn prep_epoll_ctl(
+ sqe: *linux.io_uring_sqe,
+ epfd: os.fd_t,
+ fd: os.fd_t,
+ op: u32,
+ ev: ?*linux.epoll_event,
+ ) void {
+ sqe.prep_rw(.EPOLL_CTL, epfd, @intFromPtr(ev), op, @intCast(fd));
+ }
+
+ pub fn prep_recv(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []u8, flags: u32) void {
+ sqe.prep_rw(.RECV, fd, @intFromPtr(buffer.ptr), buffer.len, 0);
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_send(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []const u8, flags: u32) void {
+ sqe.prep_rw(.SEND, fd, @intFromPtr(buffer.ptr), buffer.len, 0);
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_send_zc(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []const u8, flags: u32, zc_flags: u16) void {
+ sqe.prep_rw(.SEND_ZC, fd, @intFromPtr(buffer.ptr), buffer.len, 0);
+ sqe.rw_flags = flags;
+ sqe.ioprio = zc_flags;
+ }
+
+ pub fn prep_send_zc_fixed(sqe: *linux.io_uring_sqe, fd: os.fd_t, buffer: []const u8, flags: u32, zc_flags: u16, buf_index: u16) void {
+ prep_send_zc(sqe, fd, buffer, flags, zc_flags);
+ sqe.ioprio |= linux.IORING_RECVSEND_FIXED_BUF;
+ sqe.buf_index = buf_index;
+ }
+
+ pub fn prep_sendmsg_zc(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ msg: *const os.msghdr_const,
+ flags: u32,
+ ) void {
+ prep_sendmsg(sqe, fd, msg, flags);
+ sqe.opcode = .SENDMSG_ZC;
+ }
+
+ pub fn prep_recvmsg(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ msg: *os.msghdr,
+ flags: u32,
+ ) void {
+ sqe.prep_rw(.RECVMSG, fd, @intFromPtr(msg), 1, 0);
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_sendmsg(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ msg: *const os.msghdr_const,
+ flags: u32,
+ ) void {
+ sqe.prep_rw(.SENDMSG, fd, @intFromPtr(msg), 1, 0);
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_openat(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ path: [*:0]const u8,
+ flags: linux.O,
+ mode: os.mode_t,
+ ) void {
+ sqe.prep_rw(.OPENAT, fd, @intFromPtr(path), mode, 0);
+ sqe.rw_flags = @bitCast(flags);
+ }
+
+ pub fn prep_openat_direct(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ path: [*:0]const u8,
+ flags: linux.O,
+ mode: os.mode_t,
+ file_index: u32,
+ ) void {
+ prep_openat(sqe, fd, path, flags, mode);
+ __io_uring_set_target_fixed_file(sqe, file_index);
+ }
+
+ pub fn prep_close(sqe: *linux.io_uring_sqe, fd: os.fd_t) void {
+ sqe.* = .{
+ .opcode = .CLOSE,
+ .flags = 0,
+ .ioprio = 0,
+ .fd = fd,
+ .off = 0,
+ .addr = 0,
+ .len = 0,
+ .rw_flags = 0,
+ .user_data = 0,
+ .buf_index = 0,
+ .personality = 0,
+ .splice_fd_in = 0,
+ .addr3 = 0,
+ .resv = 0,
+ };
+ }
+
+ pub fn prep_close_direct(sqe: *linux.io_uring_sqe, file_index: u32) void {
+ prep_close(sqe, 0);
+ __io_uring_set_target_fixed_file(sqe, file_index);
+ }
+
+ pub fn prep_timeout(
+ sqe: *linux.io_uring_sqe,
+ ts: *const os.linux.kernel_timespec,
+ count: u32,
+ flags: u32,
+ ) void {
+ sqe.prep_rw(.TIMEOUT, -1, @intFromPtr(ts), 1, count);
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_timeout_remove(sqe: *linux.io_uring_sqe, timeout_user_data: u64, flags: u32) void {
+ sqe.* = .{
+ .opcode = .TIMEOUT_REMOVE,
+ .flags = 0,
+ .ioprio = 0,
+ .fd = -1,
+ .off = 0,
+ .addr = timeout_user_data,
+ .len = 0,
+ .rw_flags = flags,
+ .user_data = 0,
+ .buf_index = 0,
+ .personality = 0,
+ .splice_fd_in = 0,
+ .addr3 = 0,
+ .resv = 0,
+ };
+ }
+
+ pub fn prep_link_timeout(
+ sqe: *linux.io_uring_sqe,
+ ts: *const os.linux.kernel_timespec,
+ flags: u32,
+ ) void {
+ sqe.prep_rw(.LINK_TIMEOUT, -1, @intFromPtr(ts), 1, 0);
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_poll_add(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ poll_mask: u32,
+ ) void {
+ sqe.prep_rw(.POLL_ADD, fd, @intFromPtr(@as(?*anyopaque, null)), 0, 0);
+ // Poll masks previously used to comprise of 16 bits in the flags union of
+ // a SQE, but were then extended to comprise of 32 bits in order to make
+ // room for additional option flags. To ensure that the correct bits of
+ // poll masks are consistently and properly read across multiple kernel
+ // versions, poll masks are enforced to be little-endian.
+ // https://www.spinics.net/lists/io-uring/msg02848.html
+ sqe.rw_flags = std.mem.nativeToLittle(u32, poll_mask);
+ }
+
+ pub fn prep_poll_remove(
+ sqe: *linux.io_uring_sqe,
+ target_user_data: u64,
+ ) void {
+ sqe.prep_rw(.POLL_REMOVE, -1, target_user_data, 0, 0);
+ }
+
+ pub fn prep_poll_update(
+ sqe: *linux.io_uring_sqe,
+ old_user_data: u64,
+ new_user_data: u64,
+ poll_mask: u32,
+ flags: u32,
+ ) void {
+ sqe.prep_rw(.POLL_REMOVE, -1, old_user_data, flags, new_user_data);
+ // Poll masks previously used to comprise of 16 bits in the flags union of
+ // a SQE, but were then extended to comprise of 32 bits in order to make
+ // room for additional option flags. To ensure that the correct bits of
+ // poll masks are consistently and properly read across multiple kernel
+ // versions, poll masks are enforced to be little-endian.
+ // https://www.spinics.net/lists/io-uring/msg02848.html
+ sqe.rw_flags = std.mem.nativeToLittle(u32, poll_mask);
+ }
+
+ pub fn prep_fallocate(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ mode: i32,
+ offset: u64,
+ len: u64,
+ ) void {
+ sqe.* = .{
+ .opcode = .FALLOCATE,
+ .flags = 0,
+ .ioprio = 0,
+ .fd = fd,
+ .off = offset,
+ .addr = len,
+ .len = @intCast(mode),
+ .rw_flags = 0,
+ .user_data = 0,
+ .buf_index = 0,
+ .personality = 0,
+ .splice_fd_in = 0,
+ .addr3 = 0,
+ .resv = 0,
+ };
+ }
+
+ pub fn prep_statx(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ path: [*:0]const u8,
+ flags: u32,
+ mask: u32,
+ buf: *linux.Statx,
+ ) void {
+ sqe.prep_rw(.STATX, fd, @intFromPtr(path), mask, @intFromPtr(buf));
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_cancel(
+ sqe: *linux.io_uring_sqe,
+ cancel_user_data: u64,
+ flags: u32,
+ ) void {
+ sqe.prep_rw(.ASYNC_CANCEL, -1, cancel_user_data, 0, 0);
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_shutdown(
+ sqe: *linux.io_uring_sqe,
+ sockfd: os.socket_t,
+ how: u32,
+ ) void {
+ sqe.prep_rw(.SHUTDOWN, sockfd, 0, how, 0);
+ }
+
+ pub fn prep_renameat(
+ sqe: *linux.io_uring_sqe,
+ old_dir_fd: os.fd_t,
+ old_path: [*:0]const u8,
+ new_dir_fd: os.fd_t,
+ new_path: [*:0]const u8,
+ flags: u32,
+ ) void {
+ sqe.prep_rw(
+ .RENAMEAT,
+ old_dir_fd,
+ @intFromPtr(old_path),
+ 0,
+ @intFromPtr(new_path),
+ );
+ sqe.len = @bitCast(new_dir_fd);
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_unlinkat(
+ sqe: *linux.io_uring_sqe,
+ dir_fd: os.fd_t,
+ path: [*:0]const u8,
+ flags: u32,
+ ) void {
+ sqe.prep_rw(.UNLINKAT, dir_fd, @intFromPtr(path), 0, 0);
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_mkdirat(
+ sqe: *linux.io_uring_sqe,
+ dir_fd: os.fd_t,
+ path: [*:0]const u8,
+ mode: os.mode_t,
+ ) void {
+ sqe.prep_rw(.MKDIRAT, dir_fd, @intFromPtr(path), mode, 0);
+ }
+
+ pub fn prep_symlinkat(
+ sqe: *linux.io_uring_sqe,
+ target: [*:0]const u8,
+ new_dir_fd: os.fd_t,
+ link_path: [*:0]const u8,
+ ) void {
+ sqe.prep_rw(
+ .SYMLINKAT,
+ new_dir_fd,
+ @intFromPtr(target),
+ 0,
+ @intFromPtr(link_path),
+ );
+ }
+
+ pub fn prep_linkat(
+ sqe: *linux.io_uring_sqe,
+ old_dir_fd: os.fd_t,
+ old_path: [*:0]const u8,
+ new_dir_fd: os.fd_t,
+ new_path: [*:0]const u8,
+ flags: u32,
+ ) void {
+ sqe.prep_rw(
+ .LINKAT,
+ old_dir_fd,
+ @intFromPtr(old_path),
+ 0,
+ @intFromPtr(new_path),
+ );
+ sqe.len = @bitCast(new_dir_fd);
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_provide_buffers(
+ sqe: *linux.io_uring_sqe,
+ buffers: [*]u8,
+ buffer_len: usize,
+ num: usize,
+ group_id: usize,
+ buffer_id: usize,
+ ) void {
+ const ptr = @intFromPtr(buffers);
+ sqe.prep_rw(.PROVIDE_BUFFERS, @intCast(num), ptr, buffer_len, buffer_id);
+ sqe.buf_index = @intCast(group_id);
+ }
+
+ pub fn prep_remove_buffers(
+ sqe: *linux.io_uring_sqe,
+ num: usize,
+ group_id: usize,
+ ) void {
+ sqe.prep_rw(.REMOVE_BUFFERS, @intCast(num), 0, 0, 0);
+ sqe.buf_index = @intCast(group_id);
+ }
+
+ pub fn prep_multishot_accept(
+ sqe: *linux.io_uring_sqe,
+ fd: os.fd_t,
+ addr: ?*os.sockaddr,
+ addrlen: ?*os.socklen_t,
+ flags: u32,
+ ) void {
+ prep_accept(sqe, fd, addr, addrlen, flags);
+ sqe.ioprio |= linux.IORING_ACCEPT_MULTISHOT;
+ }
+
+ pub fn prep_socket(
+ sqe: *linux.io_uring_sqe,
+ domain: u32,
+ socket_type: u32,
+ protocol: u32,
+ flags: u32,
+ ) void {
+ sqe.prep_rw(.SOCKET, @intCast(domain), 0, protocol, socket_type);
+ sqe.rw_flags = flags;
+ }
+
+ pub fn prep_socket_direct(
+ sqe: *linux.io_uring_sqe,
+ domain: u32,
+ socket_type: u32,
+ protocol: u32,
+ flags: u32,
+ file_index: u32,
+ ) void {
+ prep_socket(sqe, domain, socket_type, protocol, flags);
+ __io_uring_set_target_fixed_file(sqe, file_index);
+ }
+
+ pub fn prep_socket_direct_alloc(
+ sqe: *linux.io_uring_sqe,
+ domain: u32,
+ socket_type: u32,
+ protocol: u32,
+ flags: u32,
+ ) void {
+ prep_socket(sqe, domain, socket_type, protocol, flags);
+ __io_uring_set_target_fixed_file(sqe, linux.IORING_FILE_INDEX_ALLOC);
+ }
+
+ pub fn prep_waitid(
+ sqe: *linux.io_uring_sqe,
+ id_type: linux.P,
+ id: i32,
+ infop: *linux.siginfo_t,
+ options: u32,
+ flags: u32,
+ ) void {
+ sqe.prep_rw(.WAITID, id, 0, @intFromEnum(id_type), @intFromPtr(infop));
+ sqe.rw_flags = flags;
+ sqe.splice_fd_in = @bitCast(options);
+ }
+};
diff --git a/lib/std/os/linux/test.zig b/lib/std/os/linux/test.zig
@@ -120,3 +120,7 @@ test "fadvise" {
const ret = linux.fadvise(file.handle, 0, 0, linux.POSIX_FADV.SEQUENTIAL);
try expectEqual(@as(usize, 0), ret);
}
+
+test {
+ _ = linux.IoUring;
+}
diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig
@@ -3796,290 +3796,300 @@ pub const EXCEPTION_RECORD = extern struct {
ExceptionInformation: [15]usize,
};
-pub usingnamespace switch (native_arch) {
- .x86 => struct {
- pub const FLOATING_SAVE_AREA = extern struct {
- ControlWord: DWORD,
- StatusWord: DWORD,
- TagWord: DWORD,
- ErrorOffset: DWORD,
- ErrorSelector: DWORD,
- DataOffset: DWORD,
- DataSelector: DWORD,
- RegisterArea: [80]BYTE,
- Cr0NpxState: DWORD,
- };
+pub const FLOATING_SAVE_AREA = switch (native_arch) {
+ .x86 => extern struct {
+ ControlWord: DWORD,
+ StatusWord: DWORD,
+ TagWord: DWORD,
+ ErrorOffset: DWORD,
+ ErrorSelector: DWORD,
+ DataOffset: DWORD,
+ DataSelector: DWORD,
+ RegisterArea: [80]BYTE,
+ Cr0NpxState: DWORD,
+ },
+ else => @compileError("FLOATING_SAVE_AREA only defined on x86"),
+};
- pub const CONTEXT = extern struct {
- ContextFlags: DWORD,
- Dr0: DWORD,
- Dr1: DWORD,
- Dr2: DWORD,
- Dr3: DWORD,
- Dr6: DWORD,
- Dr7: DWORD,
- FloatSave: FLOATING_SAVE_AREA,
- SegGs: DWORD,
- SegFs: DWORD,
- SegEs: DWORD,
- SegDs: DWORD,
- Edi: DWORD,
- Esi: DWORD,
- Ebx: DWORD,
- Edx: DWORD,
- Ecx: DWORD,
- Eax: DWORD,
- Ebp: DWORD,
- Eip: DWORD,
- SegCs: DWORD,
- EFlags: DWORD,
- Esp: DWORD,
- SegSs: DWORD,
- ExtendedRegisters: [512]BYTE,
-
- pub fn getRegs(ctx: *const CONTEXT) struct { bp: usize, ip: usize } {
- return .{ .bp = ctx.Ebp, .ip = ctx.Eip };
- }
- };
+pub const M128A = switch (native_arch) {
+ .x86_64 => extern struct {
+ Low: ULONGLONG,
+ High: LONGLONG,
+ },
+ else => @compileError("M128A only defined on x86_64"),
+};
+
+pub const XMM_SAVE_AREA32 = switch (native_arch) {
+ .x86_64 => extern struct {
+ ControlWord: WORD,
+ StatusWord: WORD,
+ TagWord: BYTE,
+ Reserved1: BYTE,
+ ErrorOpcode: WORD,
+ ErrorOffset: DWORD,
+ ErrorSelector: WORD,
+ Reserved2: WORD,
+ DataOffset: DWORD,
+ DataSelector: WORD,
+ Reserved3: WORD,
+ MxCsr: DWORD,
+ MxCsr_Mask: DWORD,
+ FloatRegisters: [8]M128A,
+ XmmRegisters: [16]M128A,
+ Reserved4: [96]BYTE,
},
- .x86_64 => struct {
- pub const M128A = extern struct {
+ else => @compileError("XMM_SAVE_AREA32 only defined on x86_64"),
+};
+
+pub const NEON128 = switch (native_arch) {
+ .aarch64 => extern union {
+ DUMMYSTRUCTNAME: extern struct {
Low: ULONGLONG,
High: LONGLONG,
- };
-
- pub const XMM_SAVE_AREA32 = extern struct {
- ControlWord: WORD,
- StatusWord: WORD,
- TagWord: BYTE,
- Reserved1: BYTE,
- ErrorOpcode: WORD,
- ErrorOffset: DWORD,
- ErrorSelector: WORD,
- Reserved2: WORD,
- DataOffset: DWORD,
- DataSelector: WORD,
- Reserved3: WORD,
- MxCsr: DWORD,
- MxCsr_Mask: DWORD,
- FloatRegisters: [8]M128A,
- XmmRegisters: [16]M128A,
- Reserved4: [96]BYTE,
- };
-
- pub const CONTEXT = extern struct {
- P1Home: DWORD64 align(16),
- P2Home: DWORD64,
- P3Home: DWORD64,
- P4Home: DWORD64,
- P5Home: DWORD64,
- P6Home: DWORD64,
- ContextFlags: DWORD,
- MxCsr: DWORD,
- SegCs: WORD,
- SegDs: WORD,
- SegEs: WORD,
- SegFs: WORD,
- SegGs: WORD,
- SegSs: WORD,
- EFlags: DWORD,
- Dr0: DWORD64,
- Dr1: DWORD64,
- Dr2: DWORD64,
- Dr3: DWORD64,
- Dr6: DWORD64,
- Dr7: DWORD64,
- Rax: DWORD64,
- Rcx: DWORD64,
- Rdx: DWORD64,
- Rbx: DWORD64,
- Rsp: DWORD64,
- Rbp: DWORD64,
- Rsi: DWORD64,
- Rdi: DWORD64,
- R8: DWORD64,
- R9: DWORD64,
- R10: DWORD64,
- R11: DWORD64,
- R12: DWORD64,
- R13: DWORD64,
- R14: DWORD64,
- R15: DWORD64,
- Rip: DWORD64,
- DUMMYUNIONNAME: extern union {
- FltSave: XMM_SAVE_AREA32,
- FloatSave: XMM_SAVE_AREA32,
- DUMMYSTRUCTNAME: extern struct {
- Header: [2]M128A,
- Legacy: [8]M128A,
- Xmm0: M128A,
- Xmm1: M128A,
- Xmm2: M128A,
- Xmm3: M128A,
- Xmm4: M128A,
- Xmm5: M128A,
- Xmm6: M128A,
- Xmm7: M128A,
- Xmm8: M128A,
- Xmm9: M128A,
- Xmm10: M128A,
- Xmm11: M128A,
- Xmm12: M128A,
- Xmm13: M128A,
- Xmm14: M128A,
- Xmm15: M128A,
- },
+ },
+ D: [2]f64,
+ S: [4]f32,
+ H: [8]WORD,
+ B: [16]BYTE,
+ },
+ else => @compileError("NEON128 only defined on aarch64"),
+};
+
+pub const CONTEXT = switch (native_arch) {
+ .x86 => extern struct {
+ ContextFlags: DWORD,
+ Dr0: DWORD,
+ Dr1: DWORD,
+ Dr2: DWORD,
+ Dr3: DWORD,
+ Dr6: DWORD,
+ Dr7: DWORD,
+ FloatSave: FLOATING_SAVE_AREA,
+ SegGs: DWORD,
+ SegFs: DWORD,
+ SegEs: DWORD,
+ SegDs: DWORD,
+ Edi: DWORD,
+ Esi: DWORD,
+ Ebx: DWORD,
+ Edx: DWORD,
+ Ecx: DWORD,
+ Eax: DWORD,
+ Ebp: DWORD,
+ Eip: DWORD,
+ SegCs: DWORD,
+ EFlags: DWORD,
+ Esp: DWORD,
+ SegSs: DWORD,
+ ExtendedRegisters: [512]BYTE,
+
+ pub fn getRegs(ctx: *const CONTEXT) struct { bp: usize, ip: usize } {
+ return .{ .bp = ctx.Ebp, .ip = ctx.Eip };
+ }
+ },
+ .x86_64 => extern struct {
+ P1Home: DWORD64 align(16),
+ P2Home: DWORD64,
+ P3Home: DWORD64,
+ P4Home: DWORD64,
+ P5Home: DWORD64,
+ P6Home: DWORD64,
+ ContextFlags: DWORD,
+ MxCsr: DWORD,
+ SegCs: WORD,
+ SegDs: WORD,
+ SegEs: WORD,
+ SegFs: WORD,
+ SegGs: WORD,
+ SegSs: WORD,
+ EFlags: DWORD,
+ Dr0: DWORD64,
+ Dr1: DWORD64,
+ Dr2: DWORD64,
+ Dr3: DWORD64,
+ Dr6: DWORD64,
+ Dr7: DWORD64,
+ Rax: DWORD64,
+ Rcx: DWORD64,
+ Rdx: DWORD64,
+ Rbx: DWORD64,
+ Rsp: DWORD64,
+ Rbp: DWORD64,
+ Rsi: DWORD64,
+ Rdi: DWORD64,
+ R8: DWORD64,
+ R9: DWORD64,
+ R10: DWORD64,
+ R11: DWORD64,
+ R12: DWORD64,
+ R13: DWORD64,
+ R14: DWORD64,
+ R15: DWORD64,
+ Rip: DWORD64,
+ DUMMYUNIONNAME: extern union {
+ FltSave: XMM_SAVE_AREA32,
+ FloatSave: XMM_SAVE_AREA32,
+ DUMMYSTRUCTNAME: extern struct {
+ Header: [2]M128A,
+ Legacy: [8]M128A,
+ Xmm0: M128A,
+ Xmm1: M128A,
+ Xmm2: M128A,
+ Xmm3: M128A,
+ Xmm4: M128A,
+ Xmm5: M128A,
+ Xmm6: M128A,
+ Xmm7: M128A,
+ Xmm8: M128A,
+ Xmm9: M128A,
+ Xmm10: M128A,
+ Xmm11: M128A,
+ Xmm12: M128A,
+ Xmm13: M128A,
+ Xmm14: M128A,
+ Xmm15: M128A,
},
- VectorRegister: [26]M128A,
- VectorControl: DWORD64,
- DebugControl: DWORD64,
- LastBranchToRip: DWORD64,
- LastBranchFromRip: DWORD64,
- LastExceptionToRip: DWORD64,
- LastExceptionFromRip: DWORD64,
-
- pub fn getRegs(ctx: *const CONTEXT) struct { bp: usize, ip: usize, sp: usize } {
- return .{ .bp = ctx.Rbp, .ip = ctx.Rip, .sp = ctx.Rsp };
- }
-
- pub fn setIp(ctx: *CONTEXT, ip: usize) void {
- ctx.Rip = ip;
- }
-
- pub fn setSp(ctx: *CONTEXT, sp: usize) void {
- ctx.Rsp = sp;
- }
- };
+ },
+ VectorRegister: [26]M128A,
+ VectorControl: DWORD64,
+ DebugControl: DWORD64,
+ LastBranchToRip: DWORD64,
+ LastBranchFromRip: DWORD64,
+ LastExceptionToRip: DWORD64,
+ LastExceptionFromRip: DWORD64,
+
+ pub fn getRegs(ctx: *const CONTEXT) struct { bp: usize, ip: usize, sp: usize } {
+ return .{ .bp = ctx.Rbp, .ip = ctx.Rip, .sp = ctx.Rsp };
+ }
- pub const RUNTIME_FUNCTION = extern struct {
- BeginAddress: DWORD,
- EndAddress: DWORD,
- UnwindData: DWORD,
- };
+ pub fn setIp(ctx: *CONTEXT, ip: usize) void {
+ ctx.Rip = ip;
+ }
- pub const KNONVOLATILE_CONTEXT_POINTERS = extern struct {
- FloatingContext: [16]?*M128A,
- IntegerContext: [16]?*ULONG64,
- };
+ pub fn setSp(ctx: *CONTEXT, sp: usize) void {
+ ctx.Rsp = sp;
+ }
},
- .aarch64 => struct {
- pub const NEON128 = extern union {
+ .aarch64 => extern struct {
+ ContextFlags: ULONG align(16),
+ Cpsr: ULONG,
+ DUMMYUNIONNAME: extern union {
DUMMYSTRUCTNAME: extern struct {
- Low: ULONGLONG,
- High: LONGLONG,
- },
- D: [2]f64,
- S: [4]f32,
- H: [8]WORD,
- B: [16]BYTE,
- };
-
- pub const CONTEXT = extern struct {
- ContextFlags: ULONG align(16),
- Cpsr: ULONG,
- DUMMYUNIONNAME: extern union {
- DUMMYSTRUCTNAME: extern struct {
- X0: DWORD64,
- X1: DWORD64,
- X2: DWORD64,
- X3: DWORD64,
- X4: DWORD64,
- X5: DWORD64,
- X6: DWORD64,
- X7: DWORD64,
- X8: DWORD64,
- X9: DWORD64,
- X10: DWORD64,
- X11: DWORD64,
- X12: DWORD64,
- X13: DWORD64,
- X14: DWORD64,
- X15: DWORD64,
- X16: DWORD64,
- X17: DWORD64,
- X18: DWORD64,
- X19: DWORD64,
- X20: DWORD64,
- X21: DWORD64,
- X22: DWORD64,
- X23: DWORD64,
- X24: DWORD64,
- X25: DWORD64,
- X26: DWORD64,
- X27: DWORD64,
- X28: DWORD64,
- Fp: DWORD64,
- Lr: DWORD64,
- },
- X: [31]DWORD64,
+ X0: DWORD64,
+ X1: DWORD64,
+ X2: DWORD64,
+ X3: DWORD64,
+ X4: DWORD64,
+ X5: DWORD64,
+ X6: DWORD64,
+ X7: DWORD64,
+ X8: DWORD64,
+ X9: DWORD64,
+ X10: DWORD64,
+ X11: DWORD64,
+ X12: DWORD64,
+ X13: DWORD64,
+ X14: DWORD64,
+ X15: DWORD64,
+ X16: DWORD64,
+ X17: DWORD64,
+ X18: DWORD64,
+ X19: DWORD64,
+ X20: DWORD64,
+ X21: DWORD64,
+ X22: DWORD64,
+ X23: DWORD64,
+ X24: DWORD64,
+ X25: DWORD64,
+ X26: DWORD64,
+ X27: DWORD64,
+ X28: DWORD64,
+ Fp: DWORD64,
+ Lr: DWORD64,
},
- Sp: DWORD64,
- Pc: DWORD64,
- V: [32]NEON128,
- Fpcr: DWORD,
- Fpsr: DWORD,
- Bcr: [8]DWORD,
- Bvr: [8]DWORD64,
- Wcr: [2]DWORD,
- Wvr: [2]DWORD64,
-
- pub fn getRegs(ctx: *const CONTEXT) struct { bp: usize, ip: usize, sp: usize } {
- return .{
- .bp = ctx.DUMMYUNIONNAME.DUMMYSTRUCTNAME.Fp,
- .ip = ctx.Pc,
- .sp = ctx.Sp,
- };
- }
+ X: [31]DWORD64,
+ },
+ Sp: DWORD64,
+ Pc: DWORD64,
+ V: [32]NEON128,
+ Fpcr: DWORD,
+ Fpsr: DWORD,
+ Bcr: [8]DWORD,
+ Bvr: [8]DWORD64,
+ Wcr: [2]DWORD,
+ Wvr: [2]DWORD64,
+
+ pub fn getRegs(ctx: *const CONTEXT) struct { bp: usize, ip: usize, sp: usize } {
+ return .{
+ .bp = ctx.DUMMYUNIONNAME.DUMMYSTRUCTNAME.Fp,
+ .ip = ctx.Pc,
+ .sp = ctx.Sp,
+ };
+ }
- pub fn setIp(ctx: *CONTEXT, ip: usize) void {
- ctx.Pc = ip;
- }
+ pub fn setIp(ctx: *CONTEXT, ip: usize) void {
+ ctx.Pc = ip;
+ }
- pub fn setSp(ctx: *CONTEXT, sp: usize) void {
- ctx.Sp = sp;
- }
- };
+ pub fn setSp(ctx: *CONTEXT, sp: usize) void {
+ ctx.Sp = sp;
+ }
+ },
+ else => @compileError("CONTEXT is not defined for this architecture"),
+};
- pub const RUNTIME_FUNCTION = extern struct {
- BeginAddress: DWORD,
- DUMMYUNIONNAME: extern union {
- UnwindData: DWORD,
- DUMMYSTRUCTNAME: packed struct {
- Flag: u2,
- FunctionLength: u11,
- RegF: u3,
- RegI: u4,
- H: u1,
- CR: u2,
- FrameSize: u9,
- },
+pub const RUNTIME_FUNCTION = switch (native_arch) {
+ .x86_64 => extern struct {
+ BeginAddress: DWORD,
+ EndAddress: DWORD,
+ UnwindData: DWORD,
+ },
+ .aarch64 => extern struct {
+ BeginAddress: DWORD,
+ DUMMYUNIONNAME: extern union {
+ UnwindData: DWORD,
+ DUMMYSTRUCTNAME: packed struct {
+ Flag: u2,
+ FunctionLength: u11,
+ RegF: u3,
+ RegI: u4,
+ H: u1,
+ CR: u2,
+ FrameSize: u9,
},
- };
+ },
+ },
+ else => @compileError("RUNTIME_FUNCTION is not defined for this architecture"),
+};
- pub const KNONVOLATILE_CONTEXT_POINTERS = extern struct {
- X19: ?*DWORD64,
- X20: ?*DWORD64,
- X21: ?*DWORD64,
- X22: ?*DWORD64,
- X23: ?*DWORD64,
- X24: ?*DWORD64,
- X25: ?*DWORD64,
- X26: ?*DWORD64,
- X27: ?*DWORD64,
- X28: ?*DWORD64,
- Fp: ?*DWORD64,
- Lr: ?*DWORD64,
- D8: ?*DWORD64,
- D9: ?*DWORD64,
- D10: ?*DWORD64,
- D11: ?*DWORD64,
- D12: ?*DWORD64,
- D13: ?*DWORD64,
- D14: ?*DWORD64,
- D15: ?*DWORD64,
- };
+pub const KNONVOLATILE_CONTEXT_POINTERS = switch (native_arch) {
+ .x86_64 => extern struct {
+ FloatingContext: [16]?*M128A,
+ IntegerContext: [16]?*ULONG64,
+ },
+ .aarch64 => extern struct {
+ X19: ?*DWORD64,
+ X20: ?*DWORD64,
+ X21: ?*DWORD64,
+ X22: ?*DWORD64,
+ X23: ?*DWORD64,
+ X24: ?*DWORD64,
+ X25: ?*DWORD64,
+ X26: ?*DWORD64,
+ X27: ?*DWORD64,
+ X28: ?*DWORD64,
+ Fp: ?*DWORD64,
+ Lr: ?*DWORD64,
+ D8: ?*DWORD64,
+ D9: ?*DWORD64,
+ D10: ?*DWORD64,
+ D11: ?*DWORD64,
+ D12: ?*DWORD64,
+ D13: ?*DWORD64,
+ D14: ?*DWORD64,
+ D15: ?*DWORD64,
},
- else => struct {},
+ else => @compileError("KNONVOLATILE_CONTEXT_POINTERS is not defined for this architecture"),
};
pub const EXCEPTION_POINTERS = extern struct {