zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 2fa1a784914c536e45fdfaba9e357d0ef4f4f133 (tree)
parent 469bf6af07ad16fc5ac1421a302178a3fd508343
Author: Jacob Young <jacobly0@users.noreply.github.com>
Date:   Mon,  9 Feb 2026 14:19:27 -0500

Io.Dispatch: introduce grand central dispatch io impl

Diffstat:
Mlib/std/Build/Watch/FsEvents.zig | 44+++++++++++++-------------------------------
Mlib/std/Io.zig | 112++++++++++++++++++++++++++++++++++++++++----------------------------------------
Alib/std/Io/Dispatch.zig | 5038+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dlib/std/Io/IoUring.zig | 6336-------------------------------------------------------------------------------
Mlib/std/Io/Kqueue.zig | 218++++++-------------------------------------------------------------------------
Mlib/std/Io/Reader.zig | 2+-
Mlib/std/Io/Threaded.zig | 247+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
Alib/std/Io/Uring.zig | 6173+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/std/Io/fiber.zig | 201+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlib/std/c.zig | 13+++----------
Mlib/std/c/darwin.zig | 28+++-------------------------
Alib/std/c/darwin/dispatch.zig | 308+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlib/std/debug.zig | 8++------
Mlib/std/process/Environ.zig | 12++++++++++++
Msrc/crash_report.zig | 2++
15 files changed, 11980 insertions(+), 6762 deletions(-)

diff --git a/lib/std/Build/Watch/FsEvents.zig b/lib/std/Build/Watch/FsEvents.zig @@ -34,11 +34,11 @@ watch_paths: std.StringArrayHashMapUnmanaged([]const *std.Build.Step), /// The semaphore we use to block the thread calling `wait` until the callback determines a relevant /// event has occurred. This is retained across `wait` calls for simplicity and efficiency. -waiting_semaphore: dispatch_semaphore_t, +waiting_semaphore: dispatch.semaphore_t, /// This dispatch queue is created by us and executes serially. It exists exclusively to trigger the /// callbacks of the FSEventStream we create. This is not in use outside of `wait`, but is retained /// across `wait` calls for simplicity and efficiency. -dispatch_queue: dispatch_queue_t, +dispatch_queue: dispatch.queue_t, /// In theory, this field avoids race conditions. In practice, it is essentially unused at the time /// of writing. See the comment at the start of `wait` for details. since_event: FSEventStreamEventId, @@ -57,7 +57,7 @@ const ResolvedSymbols = struct { latency: CFTimeInterval, flags: FSEventStreamCreateFlags, ) callconv(.c) FSEventStreamRef, - FSEventStreamSetDispatchQueue: *const fn (stream: FSEventStreamRef, queue: dispatch_queue_t) callconv(.c) void, + FSEventStreamSetDispatchQueue: *const fn (stream: FSEventStreamRef, queue: dispatch.queue_t) callconv(.c) void, FSEventStreamStart: *const fn (stream: FSEventStreamRef) callconv(.c) bool, FSEventStreamStop: *const fn (stream: FSEventStreamRef) callconv(.c) void, FSEventStreamInvalidate: *const fn (stream: FSEventStreamRef) callconv(.c) void, @@ -80,7 +80,7 @@ const ResolvedSymbols = struct { kCFAllocatorUseContext: *const CFAllocatorRef, }; -pub fn init(cwd_path: []const u8) error{ OpenFrameworkFailed, MissingCoreServicesSymbol }!FsEvents { +pub fn init(cwd_path: []const u8) error{ OpenFrameworkFailed, MissingCoreServicesSymbol, SystemResources }!FsEvents { var core_services = std.DynLib.open("/System/Library/Frameworks/CoreServices.framework/CoreServices") catch return error.OpenFrameworkFailed; errdefer core_services.close(); @@ -96,8 +96,8 @@ pub fn init(cwd_path: []const u8) error{ OpenFrameworkFailed, MissingCoreService .paths_arena = .{}, .watch_roots = &.{}, .watch_paths = .empty, - .waiting_semaphore = dispatch_semaphore_create(0), - .dispatch_queue = dispatch_queue_create("zig-watch", .SERIAL), + .waiting_semaphore = dispatch.semaphore_create(0) orelse return error.SystemResources, + .dispatch_queue = dispatch.queue_create("zig-watch", .SERIAL()) orelse return error.SystemResources, // Not `.since_now`, because this means we can init `FsEvents` *before* we do work in order // to notice any changes which happened during said work. .since_event = resolved_symbols.FSEventsGetCurrentEventId(), @@ -106,8 +106,8 @@ pub fn init(cwd_path: []const u8) error{ OpenFrameworkFailed, MissingCoreService } pub fn deinit(fse: *FsEvents, gpa: Allocator, io: Io) void { - dispatch_release(fse.waiting_semaphore); - dispatch_release(fse.dispatch_queue); + fse.waiting_semaphore.as_object().release(); + fse.dispatch_queue.as_object().release(); fse.core_services.close(io); gpa.free(fse.watch_roots); @@ -275,9 +275,9 @@ pub fn wait(fse: *FsEvents, gpa: Allocator, timeout_ns: ?u64) error{ OutOfMemory defer rs.FSEventStreamInvalidate(event_stream); if (!rs.FSEventStreamStart(event_stream)) return error.StartFailed; defer rs.FSEventStreamStop(event_stream); - const result = dispatch_semaphore_wait(fse.waiting_semaphore, timeout: { - const ns = timeout_ns orelse break :timeout .forever; - break :timeout dispatch_time(.now, @intCast(ns)); + const result = fse.waiting_semaphore.wait(timeout: { + const ns = timeout_ns orelse break :timeout .FOREVER; + break :timeout .time(.NOW, @intCast(ns)); }); return switch (result) { 0 => .dirty, @@ -382,7 +382,7 @@ fn eventCallback( } if (any_dirty) { fse.since_event = rs.FSEventStreamGetLatestEventId(stream); - _ = dispatch_semaphore_signal(fse.waiting_semaphore); + _ = fse.waiting_semaphore.signal(); } } fn dirStartsWith(path: []const u8, prefix: []const u8) bool { @@ -392,25 +392,6 @@ fn dirStartsWith(path: []const u8, prefix: []const u8) bool { return true; // `path` is `/foo/bar/...`, `prefix` is `/foo/bar` } -const dispatch_time_t = enum(u64) { - now = 0, - forever = std.math.maxInt(u64), - _, -}; -extern fn dispatch_time(base: dispatch_time_t, delta_ns: i64) dispatch_time_t; - -const dispatch_semaphore_t = *opaque {}; -extern fn dispatch_semaphore_create(value: isize) dispatch_semaphore_t; -extern fn dispatch_semaphore_wait(dsema: dispatch_semaphore_t, timeout: dispatch_time_t) isize; -extern fn dispatch_semaphore_signal(dsema: dispatch_semaphore_t) isize; - -const dispatch_queue_t = *opaque {}; -const dispatch_queue_attr_t = ?*opaque { - const SERIAL: dispatch_queue_attr_t = null; -}; -extern fn dispatch_queue_create(label: [*:0]const u8, attr: dispatch_queue_attr_t) dispatch_queue_t; -extern fn dispatch_release(object: *anyopaque) void; - const CFAllocatorRef = ?*const opaque {}; const CFArrayRef = *const opaque {}; const CFStringRef = *const opaque {}; @@ -489,6 +470,7 @@ const FSEventStreamEventFlags = packed struct(u32) { _: u24 = 0, }; +const dispatch = std.c.dispatch; const std = @import("std"); const Io = std.Io; const assert = std.debug.assert; diff --git a/lib/std/Io.zig b/lib/std/Io.zig @@ -26,19 +26,17 @@ userdata: ?*anyopaque, vtable: *const VTable, pub const Threaded = @import("Io/Threaded.zig"); -pub const Evented = switch (builtin.os.tag) { - .linux => switch (builtin.cpu.arch) { - .x86_64, .aarch64 => IoUring, - else => void, // context-switching code not implemented yet - }, - .dragonfly, .freebsd, .netbsd, .openbsd, .driverkit, .ios, .maccatalyst, .macos, .tvos, .visionos, .watchos => switch (builtin.cpu.arch) { - .x86_64, .aarch64 => Kqueue, - else => void, // context-switching code not implemented yet - }, + +pub const fiber = @import("Io/fiber.zig"); +pub const Evented = if (fiber.supported) switch (builtin.os.tag) { + .linux => Uring, + .dragonfly, .freebsd, .netbsd, .openbsd => Kqueue, + .driverkit, .ios, .maccatalyst, .macos, .tvos, .visionos, .watchos => Dispatch, else => void, -}; +} else void; // context-switching code not implemented yet +pub const Dispatch = @import("Io/Dispatch.zig"); pub const Kqueue = @import("Io/Kqueue.zig"); -pub const IoUring = @import("Io/IoUring.zig"); +pub const Uring = @import("Io/Uring.zig"); pub const Reader = @import("Io/Reader.zig"); pub const Writer = @import("Io/Writer.zig"); @@ -51,6 +49,8 @@ pub const RwLock = @import("Io/RwLock.zig"); pub const Semaphore = @import("Io/Semaphore.zig"); pub const VTable = struct { + crashHandler: *const fn (?*anyopaque) void, + /// If it returns `null` it means `result` has been already populated and /// `await` will be a no-op. /// @@ -378,9 +378,9 @@ pub const Operation = union(enum) { pub const Pending = struct { node: List.DoubleNode, tag: Tag, - context: Context align(@max(@alignOf(usize), 4)), + userdata: Userdata align(@max(@alignOf(usize), 4)), - pub const Context = [3]usize; + pub const Userdata = [7]usize; }; pub const Completion = struct { @@ -431,7 +431,7 @@ pub const Batch = struct { submitted: Operation.List, pending: Operation.List, completed: Operation.List, - context: ?*anyopaque align(@max(@alignOf(?*anyopaque), 4)), + userdata: ?*anyopaque align(@max(@alignOf(?*anyopaque), 4)), /// After calling this, it is safe to unconditionally defer a call to /// `cancel`. `storage` is a pre-allocated buffer of undefined memory that @@ -453,40 +453,40 @@ pub const Batch = struct { .submitted = .empty, .pending = .empty, .completed = .empty, - .context = null, + .userdata = null, }; } /// Adds an operation to be performed at the next await call. /// Returns the index that will be returned by `next` after the operation completes. /// Asserts that no more than `storage.len` operations are active at a time. - pub fn add(b: *Batch, operation: Operation) u32 { - const index = b.unused.next; - b.addAt(index.toIndex(), operation); + pub fn add(batch: *Batch, operation: Operation) u32 { + const index = batch.unused.next; + batch.addAt(index.toIndex(), operation); return index; } /// Adds an operation to be performed at the next await call. /// After the operation completes, `next` will return `index`. /// Asserts that the operation at `index` is not active. - pub fn addAt(b: *Batch, index: u32, operation: Operation) void { - const storage = &b.storage[index]; + pub fn addAt(batch: *Batch, index: u32, operation: Operation) void { + const storage = &batch.storage[index]; const unused = storage.unused; switch (unused.prev) { - .none => b.unused.head = unused.next, - else => |prev_index| b.storage[prev_index.toIndex()].unused.next = unused.next, + .none => batch.unused.head = unused.next, + else => |prev_index| batch.storage[prev_index.toIndex()].unused.next = unused.next, } switch (unused.next) { - .none => b.unused.tail = unused.prev, - else => |next_index| b.storage[next_index.toIndex()].unused.prev = unused.prev, + .none => batch.unused.tail = unused.prev, + else => |next_index| batch.storage[next_index.toIndex()].unused.prev = unused.prev, } - switch (b.submitted.tail) { - .none => b.submitted.head = .fromIndex(index), - else => |tail_index| b.storage[tail_index.toIndex()].submission.node.next = .fromIndex(index), + switch (batch.submitted.tail) { + .none => batch.submitted.head = .fromIndex(index), + else => |tail_index| batch.storage[tail_index.toIndex()].submission.node.next = .fromIndex(index), } storage.* = .{ .submission = .{ .node = .{ .next = .none }, .operation = operation } }; - b.submitted.tail = .fromIndex(index); + batch.submitted.tail = .fromIndex(index); } pub const Completion = struct { @@ -502,22 +502,22 @@ pub const Batch = struct { /// /// Each completion returned from this function dequeues from the `Batch`. /// It is not required to dequeue all completions before awaiting again. - pub fn next(b: *Batch) ?Completion { - const index = b.completed.head; + pub fn next(batch: *Batch) ?Completion { + const index = batch.completed.head; if (index == .none) return null; - const storage = &b.storage[index.toIndex()]; + const storage = &batch.storage[index.toIndex()]; const completion = storage.completion; const next_index = completion.node.next; - b.completed.head = next_index; - if (next_index == .none) b.completed.tail = .none; + batch.completed.head = next_index; + if (next_index == .none) batch.completed.tail = .none; - const tail_index = b.unused.tail; + const tail_index = batch.unused.tail; switch (tail_index) { - .none => b.unused.head = index, - else => b.storage[tail_index.toIndex()].unused.next = index, + .none => batch.unused.head = index, + else => batch.storage[tail_index.toIndex()].unused.next = index, } storage.* = .{ .unused = .{ .prev = tail_index, .next = .none } }; - b.unused.tail = index; + batch.unused.tail = index; return .{ .index = index.toIndex(), .result = completion.result }; } @@ -529,8 +529,8 @@ pub const Batch = struct { /// concurrency into the batched operations, but unlike `awaitConcurrent`, /// does not require it, and therefore cannot fail with /// `error.ConcurrencyUnavailable`. - pub fn awaitAsync(b: *Batch, io: Io) Cancelable!void { - return io.vtable.batchAwaitAsync(io.userdata, b); + pub fn awaitAsync(batch: *Batch, io: Io) Cancelable!void { + return io.vtable.batchAwaitAsync(io.userdata, batch); } pub const AwaitConcurrentError = ConcurrentError || Cancelable || Timeout.Error; @@ -542,8 +542,8 @@ pub const Batch = struct { /// Unlike `awaitAsync`, this function requires the implementation to /// perform the operations concurrently and therefore can fail with /// `error.ConcurrencyUnavailable`. - pub fn awaitConcurrent(b: *Batch, io: Io, timeout: Timeout) AwaitConcurrentError!void { - return io.vtable.batchAwaitConcurrent(io.userdata, b, timeout); + pub fn awaitConcurrent(batch: *Batch, io: Io, timeout: Timeout) AwaitConcurrentError!void { + return io.vtable.batchAwaitConcurrent(io.userdata, batch, timeout); } /// Requests all pending operations to be interrupted, then waits for all @@ -552,28 +552,28 @@ pub const Batch = struct { /// canceled operations will be absent from the iteration. Some operations /// may have successfully completed regardless of the cancel request and /// will appear in the iteration. - pub fn cancel(b: *Batch, io: Io) void { + pub fn cancel(batch: *Batch, io: Io) void { { // abort pending submissions - var tail_index = b.unused.tail; - defer b.unused.tail = tail_index; - var index = b.submitted.head; - errdefer b.submissions.head = index; + var tail_index = batch.unused.tail; + defer batch.unused.tail = tail_index; + var index = batch.submitted.head; + errdefer batch.submissions.head = index; while (index != .none) { - const next_index = b.storage[index.toIndex()].submission.node.next; + const next_index = batch.storage[index.toIndex()].submission.node.next; switch (tail_index) { - .none => b.unused.head = index, - else => b.storage[tail_index.toIndex()].unused.next = index, + .none => batch.unused.head = index, + else => batch.storage[tail_index.toIndex()].unused.next = index, } - b.storage[index.toIndex()] = .{ .unused = .{ .prev = tail_index, .next = .none } }; + batch.storage[index.toIndex()] = .{ .unused = .{ .prev = tail_index, .next = .none } }; tail_index = index; index = next_index; } - b.submitted = .{ .head = .none, .tail = .none }; + batch.submitted = .{ .head = .none, .tail = .none }; } - io.vtable.batchCancel(io.userdata, b); - assert(b.submitted.head == .none and b.submitted.tail == .none); - assert(b.pending.head == .none and b.pending.tail == .none); - assert(b.context == null); // that was the last chance to deallocate resources + io.vtable.batchCancel(io.userdata, batch); + assert(batch.submitted.head == .none and batch.submitted.tail == .none); + assert(batch.pending.head == .none and batch.pending.tail == .none); + assert(batch.userdata == null); // that was the last chance to deallocate resources } }; @@ -643,7 +643,7 @@ pub const Limit = enum(usize) { } pub fn nonzero(l: Limit) bool { - return @intFromEnum(l) > 0; + return l != .nothing; } /// Return a new limit reduced by `amount` or return `null` indicating diff --git a/lib/std/Io/Dispatch.zig b/lib/std/Io/Dispatch.zig @@ -0,0 +1,5038 @@ +const Alignment = std.mem.Alignment; +const Allocator = std.mem.Allocator; +const Argv0 = Io.Threaded.Argv0; +const assert = std.debug.assert; +const builtin = @import("builtin"); +const c = std.c; +const ChdirError = Io.Threaded.ChdirError; +const clockToPosix = Io.Threaded.clockToPosix; +const closeFd = Io.Threaded.closeFd; +const Csprng = Io.Threaded.Csprng; +const default_PATH = Io.Threaded.default_PATH; +const Dir = Io.Dir; +const Environ = Io.Threaded.Environ; +const errnoBug = Io.Threaded.errnoBug; +const Evented = @This(); +const fallbackSeed = Io.Threaded.fallbackSeed; +const File = Io.File; +const Io = std.Io; +const iovec = std.posix.iovec; +const iovec_const = std.posix.iovec_const; +const log = std.log.scoped(.dispatch); +const max_iovecs_len = Io.Threaded.max_iovecs_len; +const nanosecondsFromPosix = Io.Threaded.nanosecondsFromPosix; +const net = Io.net; +const pathToPosix = Io.Threaded.pathToPosix; +const process = std.process; +const recoverableOsBugDetected = Io.Threaded.recoverableOsBugDetected; +const setTimestampToPosix = Io.Threaded.setTimestampToPosix; +const splat_buffer_size = Io.Threaded.splat_buffer_size; +const statFromPosix = Io.Threaded.statFromPosix; +const statusToTerm = Io.Threaded.statusToTerm; +const std = @import("std"); +const timestampFromPosix = Io.Threaded.timestampFromPosix; +const unexpectedErrno = std.posix.unexpectedErrno; +const UseSendfile = Io.Threaded.UseSendfile; +const UseFcopyfile = Io.Threaded.UseFcopyfile; + +/// Empirically saw >4KB being used by the llvm aarch64 backend. +const main_loop_stack_size = 8 * 1024; + +queue: c.dispatch.queue_t, +backing_allocator_needs_mutex: bool, +backing_allocator_mutex: Mutex, +/// Does not need to be thread-safe if not used elsewhere. +backing_allocator: Allocator, +main_fiber: Fiber, +main_loop_stack: [*]align(builtin.target.stackAlignment()) u8, +exit_semaphore: c.dispatch.semaphore_t, + +use_sendfile: UseSendfile, +use_fcopyfile: UseFcopyfile, +leeway: u64, + +futexes: [1 << 8]Futex, + +init_stderr_writer: c.dispatch.once_t, +stderr_mutex: Mutex, +stderr_writer: File.Writer, +stderr_mode: Io.Terminal.Mode, + +scan_environ: c.dispatch.once_t, +environ: Environ, + +open_dev_null: c.dispatch.once_t, +dev_null_file: File.OpenError!File, + +csprng_mutex: Mutex, +csprng: Csprng, + +const Thread = struct { + main_context: Io.fiber.Context, + current_context: ?*Io.fiber.Context, + seed_csprng: c.dispatch.once_t, + csprng: Csprng, + + threadlocal var self: Thread = .{ + .main_context = undefined, + .current_context = null, + .seed_csprng = .init, + .csprng = undefined, + }; + + noinline fn current() *Thread { + return &self; + } + + fn currentFiber(thread: *Thread) *Fiber { + assert(thread.current_context != &thread.main_context); + return @fieldParentPtr("context", thread.current_context.?); + } + + const List = struct { + allocated: []Thread, + reserved: u32, + active: u32, + }; +}; + +const Fiber = struct { + required_align: void align(4), + evented: *Evented, + context: Io.fiber.Context, + await_count: i32, + link: union { + awaiter: ?*Fiber, + group: struct { prev: ?*Fiber, next: ?*Fiber }, + }, + status: union(enum) { + queue_next: ?*Fiber, + awaiting_group: Group, + }, + cancel_status: CancelStatus, + cancel_protection: CancelProtection, + + var next_name: u64 = 0; + + const CancelStatus = packed struct(usize) { + requested: bool, + awaiting: Awaiting, + + const unrequested: CancelStatus = .{ .requested = false, .awaiting = .nothing }; + + const Awaiting = enum(@Int(.unsigned, @bitSizeOf(usize) - shift)) { + nothing = 0, + group = 1, + select = 2, + _, + + const shift = 1; + + fn subWrap(lhs: Awaiting, rhs: Awaiting) Awaiting { + return @enumFromInt(@intFromEnum(lhs) -% @intFromEnum(rhs)); + } + + fn fromCancelable(cancelable: *Cancelable) Awaiting { + return @enumFromInt(@shrExact(@intFromPtr(cancelable), shift)); + } + + fn toCancelable(awaiting: Awaiting) *Cancelable { + return @ptrFromInt(@shlExact(@as(usize, @intFromEnum(awaiting)), shift)); + } + }; + + fn changeAwaiting( + cancel_status: *CancelStatus, + old_awaiting: Awaiting, + new_awaiting: Awaiting, + ) bool { + const old_cancel_status = @atomicRmw(CancelStatus, cancel_status, .Add, .{ + .requested = false, + .awaiting = new_awaiting.subWrap(old_awaiting), + }, .release); + assert(old_cancel_status.awaiting == old_awaiting); + return old_cancel_status.requested; + } + }; + + const CancelProtection = packed struct { + user: Io.CancelProtection, + acknowledged: bool, + + const unblocked: CancelProtection = .{ .user = .unblocked, .acknowledged = false }; + + fn check(cancel_protection: CancelProtection) Io.CancelProtection { + return @enumFromInt(@intFromBool(cancel_protection != unblocked)); + } + + fn acknowledge(cancel_protection: *CancelProtection) void { + assert(!cancel_protection.acknowledged); + cancel_protection.acknowledged = true; + } + + fn recancel(cancel_protection: *CancelProtection) void { + assert(cancel_protection.acknowledged); + cancel_protection.acknowledged = false; + } + + test check { + try std.testing.expectEqual(Io.CancelProtection.unblocked, check(.unblocked)); + try std.testing.expectEqual(Io.CancelProtection.blocked, check(.{ + .user = .unblocked, + .acknowledged = true, + })); + try std.testing.expectEqual(Io.CancelProtection.blocked, check(.{ + .user = .blocked, + .acknowledged = false, + })); + try std.testing.expectEqual(Io.CancelProtection.blocked, check(.{ + .user = .blocked, + .acknowledged = true, + })); + } + }; + + const finished: ?*Fiber = @ptrFromInt(@alignOf(Fiber)); + + const max_result_align: Alignment = .@"16"; + const max_result_size = max_result_align.forward(512); + /// This includes any stack realignments that need to happen, and also the + /// initial frame return address slot and argument frame, depending on target. + const min_stack_size = 60 * 1024 * 1024; + const max_context_align: Alignment = .@"16"; + const max_context_size = max_context_align.forward(1024); + const max_closure_size: usize = @sizeOf(AsyncClosure); + const max_closure_align: Alignment = .of(AsyncClosure); + const allocation_size = std.mem.alignForward( + usize, + max_closure_align.max(max_context_align).forward( + max_result_align.forward(@sizeOf(Fiber)) + max_result_size + min_stack_size, + ) + max_closure_size + max_context_size, + std.heap.page_size_max, + ); + + fn create(ev: *Evented) error{OutOfMemory}!*Fiber { + return @ptrCast(try ev.allocator().alignedAlloc(u8, .of(Fiber), allocation_size)); + } + + fn destroy(fiber: *Fiber, ev: *Evented) void { + assert(fiber.status.queue_next == null); + ev.allocator().free(fiber.allocatedSlice()); + } + + fn allocatedSlice(f: *Fiber) []align(@alignOf(Fiber)) u8 { + return @as([*]align(@alignOf(Fiber)) u8, @ptrCast(f))[0..allocation_size]; + } + + fn allocatedEnd(f: *Fiber) [*]u8 { + const allocated_slice = f.allocatedSlice(); + return allocated_slice[allocated_slice.len..].ptr; + } + + fn resultPointer(f: *Fiber, comptime Result: type) *Result { + return @ptrCast(@alignCast(f.resultBytes(.of(Result)))); + } + + fn resultBytes(f: *Fiber, alignment: Alignment) [*]u8 { + return @ptrFromInt(alignment.forward(@intFromPtr(f) + @sizeOf(Fiber))); + } + + const Queue = struct { head: *Fiber, tail: *Fiber }; + + /// Like a `*Fiber`, but 2 bits smaller than a pointer (because the LSBs are always 0 due to + /// alignment) so that those two bits can be used in a `packed struct`. + const PackedPtr = enum(@Int(.unsigned, @bitSizeOf(usize) - 2)) { + null = 0, + all_ones = std.math.maxInt(@Int(.unsigned, @bitSizeOf(usize) - 2)), + _, + + const Split = packed struct(usize) { low: u2, high: PackedPtr }; + fn pack(ptr: ?*Fiber) PackedPtr { + const split: Split = @bitCast(@intFromPtr(ptr)); + assert(split.low == 0); + return split.high; + } + fn unpack(ptr: PackedPtr) ?*Fiber { + const split: Split = .{ .low = 0, .high = ptr }; + return @ptrFromInt(@as(usize, @bitCast(split))); + } + }; + + fn requestCancel(fiber: *Fiber, ev: *Evented) void { + const cancel_status = @atomicRmw( + Fiber.CancelStatus, + &fiber.cancel_status, + .Or, + .{ .requested = true, .awaiting = .nothing }, + .acquire, + ); + assert(!cancel_status.requested); + switch (cancel_status.awaiting) { + .nothing => {}, + .group => { + // The awaiter received a cancelation request while awaiting a group, + // so propagate the cancelation to the group. + if (fiber.status.awaiting_group.cancel(ev, null)) { + fiber.status = .{ .queue_next = null }; + ev.queue.async(fiber, &Fiber.@"resume"); + } + }, + .select => if (@atomicRmw(i32, &fiber.await_count, .Add, 1, .monotonic) == -1) { + ev.queue.async(fiber, &Fiber.@"resume"); + }, + _ => |awaiting| awaiting.toCancelable().canceled(), + } + } + + fn @"resume"(context: ?*anyopaque) callconv(.c) void { + const fiber: *Fiber = @ptrCast(@alignCast(context)); + const thread: *Thread = .current(); + const message: SwitchMessage = .{ + .contexts = .{ + .old = &thread.main_context, + .new = &fiber.context, + }, + .pending_task = .nothing, + }; + contextSwitch(&message).handle(fiber.evented); + } +}; + +pub fn allocator(ev: *Evented) std.mem.Allocator { + return if (ev.backing_allocator_needs_mutex) .{ + .ptr = ev, + .vtable = &.{ + .alloc = alloc, + .resize = resize, + .remap = remap, + .free = free, + }, + } else ev.backing_allocator; +} + +fn alloc(userdata: *anyopaque, len: usize, alignment: std.mem.Alignment, ret_addr: usize) ?[*]u8 { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + ev.backing_allocator_mutex.lockUncancelable(ev); + defer ev.backing_allocator_mutex.unlock(); + return ev.backing_allocator.rawAlloc(len, alignment, ret_addr); +} + +fn resize( + userdata: *anyopaque, + memory: []u8, + alignment: std.mem.Alignment, + new_len: usize, + ret_addr: usize, +) bool { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + ev.backing_allocator_mutex.lockUncancelable(ev); + defer ev.backing_allocator_mutex.unlock(); + return ev.backing_allocator.rawResize(memory, alignment, new_len, ret_addr); +} + +fn remap( + userdata: *anyopaque, + memory: []u8, + alignment: Alignment, + new_len: usize, + ret_addr: usize, +) ?[*]u8 { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + ev.backing_allocator_mutex.lockUncancelable(ev); + defer ev.backing_allocator_mutex.unlock(); + return ev.backing_allocator.rawRemap(memory, alignment, new_len, ret_addr); +} + +fn free(userdata: *anyopaque, memory: []u8, alignment: std.mem.Alignment, ret_addr: usize) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + ev.backing_allocator_mutex.lockUncancelable(ev); + defer ev.backing_allocator_mutex.unlock(); + return ev.backing_allocator.rawFree(memory, alignment, ret_addr); +} + +pub fn io(ev: *Evented) Io { + return .{ + .userdata = ev, + .vtable = &.{ + .crashHandler = crashHandler, + + .async = async, + .concurrent = concurrent, + .await = await, + .cancel = cancel, + + .groupAsync = groupAsync, + .groupConcurrent = groupConcurrent, + .groupAwait = groupAwait, + .groupCancel = groupCancel, + + .recancel = recancel, + .swapCancelProtection = swapCancelProtection, + .checkCancel = checkCancel, + + .select = select, + + .futexWait = futexWait, + .futexWaitUncancelable = futexWaitUncancelable, + .futexWake = futexWake, + + .operate = operate, + .batchAwaitAsync = batchAwaitAsync, + .batchAwaitConcurrent = batchAwaitConcurrent, + .batchCancel = batchCancel, + + .dirCreateDir = dirCreateDir, + .dirCreateDirPath = dirCreateDirPath, + .dirCreateDirPathOpen = dirCreateDirPathOpen, + .dirOpenDir = dirOpenDir, + .dirStat = dirStat, + .dirStatFile = dirStatFile, + .dirAccess = dirAccess, + .dirCreateFile = dirCreateFile, + .dirCreateFileAtomic = dirCreateFileAtomic, + .dirOpenFile = dirOpenFile, + .dirClose = dirClose, + .dirRead = dirRead, + .dirRealPath = dirRealPath, + .dirRealPathFile = dirRealPathFile, + .dirDeleteFile = dirDeleteFile, + .dirDeleteDir = dirDeleteDir, + .dirRename = dirRename, + .dirRenamePreserve = dirRenamePreserve, + .dirSymLink = dirSymLink, + .dirReadLink = dirReadLink, + .dirSetOwner = dirSetOwner, + .dirSetFileOwner = dirSetFileOwner, + .dirSetPermissions = dirSetPermissions, + .dirSetFilePermissions = dirSetFilePermissions, + .dirSetTimestamps = dirSetTimestamps, + .dirHardLink = dirHardLink, + + .fileStat = fileStat, + .fileLength = fileLength, + .fileClose = fileClose, + .fileWritePositional = fileWritePositional, + .fileWriteFileStreaming = fileWriteFileStreaming, + .fileWriteFilePositional = fileWriteFilePositional, + .fileReadPositional = fileReadPositional, + .fileSeekBy = fileSeekBy, + .fileSeekTo = fileSeekTo, + .fileSync = fileSync, + .fileIsTty = fileIsTty, + .fileEnableAnsiEscapeCodes = fileEnableAnsiEscapeCodes, + .fileSupportsAnsiEscapeCodes = fileIsTty, + .fileSetLength = fileSetLength, + .fileSetOwner = fileSetOwner, + .fileSetPermissions = fileSetPermissions, + .fileSetTimestamps = fileSetTimestamps, + .fileLock = fileLock, + .fileTryLock = fileTryLock, + .fileUnlock = fileUnlock, + .fileDowngradeLock = fileDowngradeLock, + .fileRealPath = fileRealPath, + .fileHardLink = fileHardLink, + + .fileMemoryMapCreate = fileMemoryMapCreate, + .fileMemoryMapDestroy = fileMemoryMapDestroy, + .fileMemoryMapSetLength = fileMemoryMapSetLength, + .fileMemoryMapRead = fileMemoryMapRead, + .fileMemoryMapWrite = fileMemoryMapWrite, + + .processExecutableOpen = processExecutableOpen, + .processExecutablePath = processExecutablePath, + .lockStderr = lockStderr, + .tryLockStderr = tryLockStderr, + .unlockStderr = unlockStderr, + .processCurrentPath = processCurrentPath, + .processSetCurrentDir = processSetCurrentDir, + .processReplace = processReplace, + .processReplacePath = processReplacePath, + .processSpawn = processSpawn, + .processSpawnPath = processSpawnPath, + .childWait = childWait, + .childKill = childKill, + + .progressParentFile = progressParentFile, + + .now = now, + .clockResolution = clockResolution, + .sleep = sleep, + + .random = random, + .randomSecure = randomSecure, + + .netListenIp = netListenIpUnavailable, + .netAccept = netAcceptUnavailable, + .netBindIp = netBindIpUnavailable, + .netConnectIp = netConnectIpUnavailable, + .netListenUnix = netListenUnixUnavailable, + .netConnectUnix = netConnectUnixUnavailable, + .netSocketCreatePair = netSocketCreatePairUnavailable, + .netSend = netSendUnavailable, + .netReceive = netReceiveUnavailable, + .netRead = netReadUnavailable, + .netWrite = netWriteUnavailable, + .netWriteFile = netWriteFileUnavailable, + .netClose = netClose, + .netShutdown = netShutdownUnavailable, + .netInterfaceNameResolve = netInterfaceNameResolveUnavailable, + .netInterfaceName = netInterfaceNameUnavailable, + .netLookup = netLookupUnavailable, + }, + }; +} + +pub const InitOptions = struct { + backing_allocator_needs_mutex: bool = true, + queue: ?c.dispatch.queue_t = null, + /// Upper limit on the allowable delay in processing timeouts in order to improve power + /// consumption and system performance. + leeway: Io.Duration = .fromMilliseconds(10), + + /// Affects the following operations: + /// * `processExecutablePath` on OpenBSD and Haiku. + argv0: Argv0 = .empty, + /// Affects the following operations: + /// * `fileIsTty` + /// * `processSpawn`, `processSpawnPath`, `processReplace`, `processReplacePath` + environ: process.Environ = .empty, +}; + +pub fn init(ev: *Evented, backing_allocator: Allocator, options: InitOptions) !void { + const queue = if (options.queue) |queue| queue: { + queue.as_object().retain(); + break :queue queue; + } else c.dispatch.queue_create("org.ziglang.std.Io.Dispatch", .CONCURRENT()) orelse + return error.SystemResources; + errdefer queue.as_object().release(); + const main_loop_stack = try backing_allocator.alignedAlloc( + u8, + .fromByteUnits(builtin.target.stackAlignment()), + main_loop_stack_size, + ); + errdefer backing_allocator.free(main_loop_stack); + const exit_semaphore = c.dispatch.semaphore_create(0) orelse return error.SystemResources; + errdefer exit_semaphore.as_object().release(); + ev.* = .{ + .queue = queue, + .backing_allocator_needs_mutex = options.backing_allocator_needs_mutex, + .backing_allocator_mutex = undefined, + .backing_allocator = backing_allocator, + .main_fiber = .{ + .required_align = {}, + .evented = ev, + .context = undefined, + .await_count = 0, + .link = .{ .awaiter = null }, + .status = .{ .queue_next = null }, + .cancel_status = .unrequested, + .cancel_protection = .unblocked, + }, + .main_loop_stack = main_loop_stack.ptr, + .exit_semaphore = exit_semaphore, + + .use_fcopyfile = .default, + .use_sendfile = .default, + .leeway = std.math.lossyCast(u64, options.leeway.toNanoseconds()), + + .futexes = undefined, + + .init_stderr_writer = .init, + .stderr_mutex = undefined, + .stderr_writer = .{ + .io = ev.io(), + .interface = Io.File.Writer.initInterface(&.{}), + .file = .stderr(), + .mode = .streaming, + }, + .stderr_mode = .no_color, + + .scan_environ = if (options.environ.block.isEmpty()) .done else .init, + .environ = .{ .process_environ = options.environ }, + + .open_dev_null = .init, + .dev_null_file = error.FileNotFound, + + .csprng_mutex = undefined, + .csprng = .uninitialized, + }; + try ev.backing_allocator_mutex.init(queue); + errdefer ev.backing_allocator_mutex.deinit(); + var initialized_futexes: usize = 0; + errdefer for (ev.futexes[0..initialized_futexes]) |*futex| futex.deinit(); + for (&ev.futexes) |*futex| { + try futex.init(queue); + initialized_futexes += 1; + } + try ev.stderr_mutex.init(queue); + errdefer ev.stderr_mutex.deinit(); + try ev.csprng_mutex.init(queue); + errdefer ev.csprng_mutex.deinit(); + const thread: *Thread = .current(); + thread.main_context = switch (builtin.cpu.arch) { + .aarch64 => .{ + .sp = @intFromPtr(main_loop_stack[main_loop_stack_size..].ptr), + .fp = @intFromPtr(ev), + .pc = @intFromPtr(&mainLoopEntry), + }, + .x86_64 => .{ + .rsp = @intFromPtr(main_loop_stack[main_loop_stack_size..].ptr) - 8, + .rbp = @intFromPtr(ev), + .rip = @intFromPtr(&mainLoopEntry), + }, + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + }; + thread.current_context = &ev.main_fiber.context; +} + +pub fn deinit(ev: *Evented) void { + assert(Thread.current().currentFiber() == &ev.main_fiber); + ev.yield(.exit); + ev.csprng_mutex.deinit(); + if (ev.dev_null_file) |file| fileClose(ev, &.{file}) else |_| {} + ev.stderr_mutex.deinit(); + for (&ev.futexes) |*futex| futex.deinit(); + ev.exit_semaphore.as_object().release(); + ev.backing_allocator.free(ev.main_loop_stack[0..main_loop_stack_size]); + ev.queue.as_object().release(); +} + +fn yield(ev: *Evented, pending_task: SwitchMessage.PendingTask) void { + const thread: *Thread = .current(); + const message: SwitchMessage = .{ + .contexts = .{ + .old = thread.current_context.?, + .new = &thread.main_context, + }, + .pending_task = pending_task, + }; + contextSwitch(&message).handle(ev); +} + +fn mainLoopEntry() callconv(.naked) void { + switch (builtin.cpu.arch) { + .aarch64 => asm volatile ( + \\ mov x0, fp + \\ mov fp, #0 + \\ b %[mainLoop] + : + : [mainLoop] "X" (&mainLoop), + ), + .x86_64 => asm volatile ( + \\ movq %%rbp, %%rdi + \\ xor %%ebp, %%ebp + \\ jmp %[mainLoop:P] + : + : [mainLoop] "X" (&mainLoop), + ), + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + } +} + +fn mainLoop(ev: *Evented, message: *const SwitchMessage) callconv(.c) noreturn { + message.handle(ev); + assert(ev.exit_semaphore.wait(.FOREVER) == 0); + Fiber.@"resume"(&ev.main_fiber); + unreachable; // switched to dead fiber +} + +const SwitchMessage = struct { + contexts: Io.fiber.Switch, + pending_task: PendingTask, + + const PendingTask = union(enum) { + nothing, + await: u31, + activate: c.dispatch.object_t, + @"resume": c.dispatch.object_t, + group_await: Group, + group_cancel: Group, + mutex_wait: *Mutex.Waiter, + futex_wait: *Futex.Waiter, + futex_wake: *Futex.Waker, + sleep: c.dispatch.time_t, + destroy, + exit, + }; + + fn handle(message: *const SwitchMessage, ev: *Evented) void { + const thread: *Thread = .current(); + thread.current_context = message.contexts.new; + switch (message.pending_task) { + .nothing => {}, + .await => |count| { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); + if (@atomicRmw(i32, &fiber.await_count, .Sub, count, .monotonic) > 0) + ev.queue.async(fiber, &Fiber.@"resume"); + }, + .activate => |object| object.activate(), + .@"resume" => |object| object.@"resume"(), + .group_await => |group| { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); + if (group.await(ev, fiber)) ev.queue.async(fiber, &Fiber.@"resume"); + }, + .group_cancel => |group| { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); + if (group.cancel(ev, fiber)) ev.queue.async(fiber, &Fiber.@"resume"); + }, + .mutex_wait => |waiter| { + waiter.sleeper = + .init(ev.queue, @alignCast(@fieldParentPtr("context", message.contexts.old))); + switch (waiter.sleeper.fiber.cancel_protection.check()) { + .unblocked => {}, + .blocked => waiter.cancelable = .blocked, + } + waiter.mutex.queue.async(waiter, &Mutex.Waiter.add); + }, + .futex_wait => |waiter| { + waiter.sleeper = + .init(ev.queue, @alignCast(@fieldParentPtr("context", message.contexts.old))); + switch (waiter.sleeper.fiber.cancel_protection.check()) { + .unblocked => {}, + .blocked => waiter.cancelable = .blocked, + } + waiter.futex.queue.async(waiter, &Futex.Waiter.add); + }, + .futex_wake => |waker| { + waker.sleeper = + .init(ev.queue, @alignCast(@fieldParentPtr("context", message.contexts.old))); + waker.futex.queue.async(waker, &Futex.Waker.remove); + }, + .sleep => |when| { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); + when.after(ev.queue, fiber, &Fiber.@"resume"); + }, + .destroy => { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); + fiber.destroy(ev); + }, + .exit => _ = ev.exit_semaphore.signal(), + } + } +}; + +inline fn contextSwitch(message: *const SwitchMessage) *const SwitchMessage { + return @fieldParentPtr("contexts", Io.fiber.contextSwitch(&message.contexts)); +} + +const Cancelable = struct { + required_align: void align(2) = {}, + queue: c.dispatch.queue_t, + cancel: c.dispatch.function_t, + + const is_blocked: c.dispatch.function_t = + @ptrFromInt(@typeInfo(c.dispatch.function_t).pointer.alignment * 1); + const is_requested: c.dispatch.function_t = + @ptrFromInt(@typeInfo(c.dispatch.function_t).pointer.alignment * 2); + + const blocked: Cancelable = .{ .queue = undefined, .cancel = is_blocked }; + + const AwaitError = error{CancelRequested}; + + fn await(cancelable: *Cancelable, fiber: *Fiber) AwaitError!void { + const function = cancelable.cancel; + assert(function != is_requested); + if (function == is_blocked) { + @branchHint(.unlikely); + return; + } + if (@cmpxchgStrong( + Fiber.CancelStatus, + &fiber.cancel_status, + .{ .requested = false, .awaiting = .nothing }, + .{ .requested = false, .awaiting = .fromCancelable(cancelable) }, + .release, + .monotonic, + )) |cancel_status| { + assert(cancel_status.requested and cancel_status.awaiting == .nothing); + cancelable.cancel = is_requested; + return error.CancelRequested; + } + } + + fn canceled(cancelable: *Cancelable) void { + assert(cancelable.cancel != is_blocked); + assert(cancelable.cancel != is_requested); + cancelable.queue.async(cancelable, cancelable.cancel); + } + + fn check(cancelable: *Cancelable, fiber: *Fiber) Io.Cancelable!void { + if (cancelable.cancel == is_requested) { + @branchHint(.unlikely); + fiber.cancel_protection.acknowledge(); + return error.Canceled; + } + } +}; + +const Sleeper = struct { + queue: c.dispatch.queue_t, + fiber: *Fiber, + + fn init(queue: c.dispatch.queue_t, fiber: *Fiber) Sleeper { + queue.as_object().retain(); + return .{ .queue = queue, .fiber = fiber }; + } + + fn wake(context: ?*anyopaque) callconv(.c) void { + const sleeper: *Sleeper = @ptrCast(@alignCast(context)); + const queue = sleeper.queue; + sleeper.queue = undefined; + queue.async(sleeper.fiber, &Fiber.@"resume"); + queue.as_object().release(); + } +}; + +const Mutex = struct { + /// including the locker + num_waiters: usize, + queue: c.dispatch.queue_t, + waiters: std.DoublyLinkedList, + + const Waiter = struct { + sleeper: Sleeper = undefined, + cancelable: Cancelable, + mutex: *Mutex, + node: std.DoublyLinkedList.Node = .{}, + + fn add(context: ?*anyopaque) callconv(.c) void { + const waiter: *Waiter = @ptrCast(@alignCast(context)); + waiter.tryAdd() catch |err| switch (err) { + error.CancelRequested => { + waiter.wake(); + assert(@atomicRmw(usize, &waiter.mutex.num_waiters, .Sub, 1, .monotonic) >= 1); + }, + }; + } + + fn tryAdd(waiter: *Waiter) Cancelable.AwaitError!void { + switch (@atomicLoad(usize, &waiter.mutex.num_waiters, .acquire)) { + 0 => unreachable, + 1 => return waiter.wake(), // already locked exclusively + else => try waiter.cancelable.await(waiter.sleeper.fiber), + } + waiter.mutex.waiters.append(&waiter.node); + } + + fn canceled(context: ?*anyopaque) callconv(.c) void { + const cancelable: *Cancelable = @ptrCast(@alignCast(context)); + cancelable.cancel = Cancelable.is_requested; + const waiter: *Waiter = @fieldParentPtr("cancelable", cancelable); + assert(@atomicRmw( + Fiber.CancelStatus, + &waiter.sleeper.fiber.cancel_status, + .Xchg, + .{ .requested = true, .awaiting = .nothing }, + .monotonic, + ) == Fiber.CancelStatus{ .requested = true, .awaiting = .fromCancelable(cancelable) }); + const mutex = waiter.mutex; + mutex.waiters.remove(&waiter.node); + waiter.wake(); + assert(@atomicRmw(usize, &mutex.num_waiters, .Sub, 1, .monotonic) >= 1); + } + + fn remove(context: ?*anyopaque) callconv(.c) void { + const mutex: *Mutex = @ptrCast(@alignCast(context)); + var stop_node: ?*std.DoublyLinkedList.Node = null; + while (mutex.waiters.first != stop_node) { + @branchHint(.likely); + const waiter: *Waiter = @fieldParentPtr("node", mutex.waiters.popFirst().?); + if (waiter.cancelable.cancel != Cancelable.is_blocked) { + @branchHint(.likely); + const cancel_status = @atomicRmw( + Fiber.CancelStatus, + &waiter.sleeper.fiber.cancel_status, + .And, + .{ .requested = true, .awaiting = .nothing }, + .monotonic, + ); + assert(cancel_status.awaiting.toCancelable() == &waiter.cancelable); + if (cancel_status.requested) { + @branchHint(.unlikely); + // carefully place the hot potato out of the way + mutex.waiters.append(&waiter.node); + if (stop_node == null) stop_node = &waiter.node; + continue; + } + } + waiter.wake(); + return; + } + // everyone is about to die, nobody will wake up ;-( + } + + fn wake(waiter: *Waiter) void { + Sleeper.wake(&waiter.sleeper); + } + }; + + fn init(mutex: *Mutex, queue: c.dispatch.queue_t) error{SystemResources}!void { + mutex.* = .{ + .num_waiters = 0, + .queue = c.dispatch.queue_create_with_target( + "org.ziglang.std.Io.Dispatch.Mutex", + .SERIAL(), + queue, + ) orelse return error.SystemResources, + .waiters = .{}, + }; + } + + fn deinit(mutex: *Mutex) void { + assert(mutex.num_waiters == 0 and mutex.waiters.first == null and mutex.waiters.last == null); + mutex.queue.as_object().release(); + mutex.* = undefined; + } + + fn tryLock(mutex: *Mutex) bool { + if (@cmpxchgWeak(usize, &mutex.num_waiters, 0, 1, .acquire, .monotonic) == null) { + @branchHint(.likely); + return true; + } + return false; + } + + fn lock(mutex: *Mutex, ev: *Evented) Io.Cancelable!void { + switch (@atomicRmw(usize, &mutex.num_waiters, .Add, 1, .acquire)) { + 0 => {}, + else => { + @branchHint(.unlikely); + var waiter: Waiter = .{ + .cancelable = .{ .queue = mutex.queue, .cancel = &Mutex.Waiter.canceled }, + .mutex = mutex, + }; + ev.yield(.{ .mutex_wait = &waiter }); + try waiter.cancelable.check(waiter.sleeper.fiber); + }, + } + } + + fn lockUncancelable(mutex: *Mutex, ev: *Evented) void { + switch (@atomicRmw(usize, &mutex.num_waiters, .Add, 1, .acquire)) { + 0 => {}, + else => { + @branchHint(.unlikely); + var waiter: Waiter = .{ .cancelable = .blocked, .mutex = mutex }; + ev.yield(.{ .mutex_wait = &waiter }); + waiter.cancelable.check(waiter.sleeper.fiber) catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; + }, + } + } + + fn unlock(mutex: *Mutex) void { + switch (@atomicRmw(usize, &mutex.num_waiters, .Sub, 1, .release)) { + 0 => unreachable, + 1 => {}, + else => { + @branchHint(.unlikely); + mutex.queue.async(mutex, &Waiter.remove); + }, + } + } +}; + +fn crashHandler(userdata: ?*anyopaque) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const thread = &Thread.self; + if (thread.current_context == null) std.process.abort(); + if (thread.current_context == &thread.main_context) std.process.abort(); + const fiber = thread.currentFiber(); + @atomicStore( + Fiber.CancelStatus, + &fiber.cancel_status, + .{ .requested = true, .awaiting = .nothing }, + .monotonic, + ); + fiber.cancel_protection = .{ .user = .blocked, .acknowledged = true }; +} + +const AsyncClosure = struct { + ev: *Evented, + fiber: *Fiber, + start: *const fn (context: *const anyopaque, result: *anyopaque) void, + result_align: Alignment, + + fn fromFiber(fiber: *Fiber) *AsyncClosure { + return @ptrFromInt(Fiber.max_context_align.max(.of(AsyncClosure)).backward( + @intFromPtr(fiber.allocatedEnd()) - Fiber.max_context_size, + ) - @sizeOf(AsyncClosure)); + } + + fn contextPointer(closure: *AsyncClosure) [*]align(Fiber.max_context_align.toByteUnits()) u8 { + return @alignCast(@as([*]u8, @ptrCast(closure)) + @sizeOf(AsyncClosure)); + } + + fn entry() callconv(.naked) void { + switch (builtin.cpu.arch) { + .aarch64 => asm volatile ( + \\ mov x0, sp + \\ b %[call] + : + : [call] "X" (&call), + ), + .x86_64 => asm volatile ( + \\ leaq 8(%%rsp), %%rdi + \\ jmp %[call:P] + : + : [call] "X" (&call), + ), + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + } + } + + fn call( + closure: *AsyncClosure, + message: *const SwitchMessage, + ) callconv(.withStackAlign(.c, @alignOf(AsyncClosure))) noreturn { + message.handle(closure.ev); + const fiber = closure.fiber; + closure.start(closure.contextPointer(), fiber.resultBytes(closure.result_align)); + if (@atomicRmw(?*Fiber, &fiber.link.awaiter, .Xchg, Fiber.finished, .acq_rel)) |awaiter| + if (@atomicRmw(i32, &awaiter.await_count, .Add, 1, .monotonic) == -1) + closure.ev.queue.async(awaiter, &Fiber.@"resume"); + closure.ev.yield(.nothing); + unreachable; // switched to dead fiber + } +}; + +fn async( + userdata: ?*anyopaque, + result: []u8, + result_alignment: Alignment, + context: []const u8, + context_alignment: Alignment, + start: *const fn (context: *const anyopaque, result: *anyopaque) void, +) ?*std.Io.AnyFuture { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return concurrent(ev, result.len, result_alignment, context, context_alignment, start) catch { + start(context.ptr, result.ptr); + return null; + }; +} + +fn concurrent( + userdata: ?*anyopaque, + result_len: usize, + result_alignment: Alignment, + context: []const u8, + context_alignment: Alignment, + start: *const fn (context: *const anyopaque, result: *anyopaque) void, +) Io.ConcurrentError!*std.Io.AnyFuture { + assert(result_alignment.compare(.lte, Fiber.max_result_align)); // TODO + assert(context_alignment.compare(.lte, Fiber.max_context_align)); // TODO + assert(result_len <= Fiber.max_result_size); // TODO + assert(context.len <= Fiber.max_context_size); // TODO + + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const fiber = Fiber.create(ev) catch |err| switch (err) { + error.OutOfMemory => return error.ConcurrencyUnavailable, + }; + + const closure: *AsyncClosure = .fromFiber(fiber); + fiber.* = .{ + .required_align = {}, + .evented = ev, + .context = switch (builtin.cpu.arch) { + .aarch64 => .{ + .sp = @intFromPtr(closure), + .fp = 0, + .pc = @intFromPtr(&AsyncClosure.entry), + }, + .x86_64 => .{ + .rsp = @intFromPtr(closure) - 8, + .rbp = 0, + .rip = @intFromPtr(&AsyncClosure.entry), + }, + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + }, + .await_count = 0, + .link = .{ .awaiter = null }, + .status = .{ .queue_next = null }, + .cancel_status = .unrequested, + .cancel_protection = .unblocked, + }; + closure.* = .{ + .ev = ev, + .fiber = fiber, + .start = start, + .result_align = result_alignment, + }; + @memcpy(closure.contextPointer(), context); + + ev.queue.async(fiber, &Fiber.@"resume"); + return @ptrCast(fiber); +} + +fn await( + userdata: ?*anyopaque, + future: *std.Io.AnyFuture, + result: []u8, + result_alignment: Alignment, +) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const fiber = Thread.current().currentFiber(); + const future_fiber: *Fiber = @ptrCast(@alignCast(future)); + if (@atomicRmw(?*Fiber, &future_fiber.link.awaiter, .Xchg, fiber, .acq_rel)) |awaiter| { + assert(awaiter == Fiber.finished); + } else while (true) { + ev.yield(.{ .await = 1 }); + const awaiter = @atomicLoad(?*Fiber, &future_fiber.link.awaiter, .acquire); + if (awaiter == Fiber.finished) break; + assert(awaiter == fiber); // spurious wakeup + } + @memcpy(result, future_fiber.resultBytes(result_alignment)); + future_fiber.destroy(ev); +} + +fn cancel( + userdata: ?*anyopaque, + future: *std.Io.AnyFuture, + result: []u8, + result_alignment: Alignment, +) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const future_fiber: *Fiber = @ptrCast(@alignCast(future)); + future_fiber.requestCancel(ev); + await(ev, future, result, result_alignment); +} + +const Group = struct { + ptr: *Io.Group, + + const List = packed struct(usize) { + cancel_requested: bool, + awaiter_delayed: bool, + fibers: Fiber.PackedPtr, + }; + fn listPtr(group: Group) *List { + return @ptrCast(&group.ptr.token); + } + + const Mutex = packed struct(u32) { + locked: bool, + contended: bool, + shared2: u30, + }; + fn mutexPtr(group: Group) *Group.Mutex { + return switch (comptime builtin.cpu.arch.endian()) { + .little => @ptrCast(&group.ptr.state), + .big => @ptrCast(@alignCast( + @as([*]u8, @ptrCast(&group.ptr.state)) + @sizeOf(usize) - @sizeOf(u32), + )), + }; + } + + const Awaiter = packed struct(usize) { + locked: bool, + contended: bool, + awaiter: Fiber.PackedPtr, + }; + fn awaiterPtr(group: Group) *Awaiter { + return @ptrCast(&group.ptr.state); + } + + fn lock(group: Group, ev: *Evented) void { + const mutex = group.mutexPtr(); + { + const old_state = @atomicRmw( + Group.Mutex, + mutex, + .Or, + .{ .locked = true, .contended = false, .shared2 = 0 }, + .acquire, + ); + if (!old_state.locked) { + @branchHint(.likely); + return; + } + if (old_state.contended) { + futexWaitUncancelable(ev, @ptrCast(mutex), @bitCast(old_state)); + } + } + while (true) { + var old_state = @atomicRmw( + Group.Mutex, + mutex, + .Or, + .{ .locked = true, .contended = true, .shared2 = 0 }, + .acquire, + ); + if (!old_state.locked) { + @branchHint(.likely); + return; + } + old_state.contended = true; + futexWaitUncancelable(ev, @ptrCast(mutex), @bitCast(old_state)); + } + } + + fn unlock(group: Group, ev: *Evented) void { + const mutex = group.mutexPtr(); + const old_state = @atomicRmw( + Group.Mutex, + mutex, + .And, + .{ .locked = false, .contended = false, .shared2 = std.math.maxInt(u30) }, + .release, + ); + assert(old_state.locked); + if (old_state.contended) futexWake(ev, @ptrCast(mutex), 1); + } + + fn addFiber(group: Group, ev: *Evented, fiber: *Fiber) void { + group.lock(ev); + defer group.unlock(ev); + const list_ptr = group.listPtr(); + const list = @atomicLoad(List, list_ptr, .monotonic); + if (list.cancel_requested) fiber.cancel_status = .{ .requested = true, .awaiting = .nothing }; + const old_head = list.fibers.unpack(); + if (old_head) |head| head.link.group.prev = fiber; + fiber.link.group.next = old_head; + @atomicStore(List, list_ptr, .{ + .cancel_requested = list.cancel_requested, + .awaiter_delayed = list.awaiter_delayed, + .fibers = .pack(fiber), + }, .monotonic); + } + + fn removeFiber(group: Group, ev: *Evented, fiber: *Fiber) ?*Fiber { + group.lock(ev); + defer group.unlock(ev); + const list_ptr = group.listPtr(); + const list = @atomicLoad(List, list_ptr, .monotonic); + if (fiber.link.group.next) |next| next.link.group.prev = fiber.link.group.prev; + if (fiber.link.group.prev) |prev| { + prev.link.group.next = fiber.link.group.next; + } else if (fiber.link.group.next) |new_head| { + @atomicStore(List, list_ptr, .{ + .cancel_requested = list.cancel_requested, + .awaiter_delayed = list.awaiter_delayed, + .fibers = .pack(new_head), + }, .monotonic); + } else if (@atomicLoad(Awaiter, group.awaiterPtr(), .monotonic).awaiter.unpack()) |awaiter| { + if (!awaiter.cancel_status.changeAwaiting(.group, .nothing) or list.cancel_requested) { + @atomicStore(List, list_ptr, .{ + .cancel_requested = false, + .awaiter_delayed = false, + .fibers = .null, + }, .release); + assert(awaiter.status.awaiting_group.ptr == group.ptr); + awaiter.status = .{ .queue_next = null }; + return awaiter; + } + // Race with `Fiber.requestCancel` + @atomicStore(List, list_ptr, .{ + .cancel_requested = false, + .awaiter_delayed = true, + .fibers = .null, + }, .monotonic); + } else @atomicStore(List, list_ptr, .{ + .cancel_requested = false, + .awaiter_delayed = false, + .fibers = .null, + }, .release); + return null; + } + + fn await(group: Group, ev: *Evented, awaiter: *Fiber) bool { + group.lock(ev); + defer group.unlock(ev); + if (@atomicLoad(List, group.listPtr(), .monotonic).fibers.unpack()) |_| { + if (group.registerAwaiter(awaiter) and awaiter.cancel_protection.check() == .unblocked) { + // The awaiter already had an unacknowledged cancelation request before + // attempting to await a group, so propagate the cancelation to the group. + assert(!group.cancelLocked(ev, null)); + } + return false; + } + return true; + } + + fn cancel(group: Group, ev: *Evented, maybe_awaiter: ?*Fiber) bool { + group.lock(ev); + defer group.unlock(ev); + return group.cancelLocked(ev, maybe_awaiter); + } + + /// Assumes the mutex is held. + fn cancelLocked(group: Group, ev: *Evented, maybe_awaiter: ?*Fiber) bool { + const list_ptr = group.listPtr(); + const list = @atomicRmw( + List, + list_ptr, + .Add, + .{ .cancel_requested = true, .awaiter_delayed = false, .fibers = .null }, + .monotonic, + ); + assert(!list.cancel_requested); + if (list.fibers.unpack()) |head| { + var maybe_fiber: ?*Fiber = head; + while (maybe_fiber) |fiber| { + fiber.requestCancel(ev); + maybe_fiber = fiber.link.group.next; + } + if (maybe_awaiter) |awaiter| _ = group.registerAwaiter(awaiter); + return false; + } + @atomicStore( + List, + list_ptr, + .{ .cancel_requested = false, .awaiter_delayed = false, .fibers = .null }, + .release, + ); + return if (maybe_awaiter) |_| true else list.awaiter_delayed; + } + + /// Assumes the mutex is held. + fn registerAwaiter(group: Group, awaiter: *Fiber) bool { + assert(awaiter.status.queue_next == null); + awaiter.status = .{ .awaiting_group = group }; + assert(@atomicRmw( + Awaiter, + group.awaiterPtr(), + .Add, + .{ .locked = false, .contended = false, .awaiter = .pack(awaiter) }, + .monotonic, + ).awaiter == .null); + return awaiter.cancel_status.changeAwaiting(.nothing, .group); + } + + const AsyncClosure = struct { + ev: *Evented, + group: Group, + fiber: *Fiber, + start: *const fn (context: *const anyopaque) Io.Cancelable!void, + + fn fromFiber(fiber: *Fiber) *Group.AsyncClosure { + return @ptrFromInt(Fiber.max_context_align.max(.of(Group.AsyncClosure)).backward( + @intFromPtr(fiber.allocatedEnd()) - Fiber.max_context_size, + ) - @sizeOf(Group.AsyncClosure)); + } + + fn contextPointer( + closure: *Group.AsyncClosure, + ) [*]align(Fiber.max_context_align.toByteUnits()) u8 { + return @alignCast(@as([*]u8, @ptrCast(closure)) + @sizeOf(Group.AsyncClosure)); + } + + fn entry() callconv(.naked) void { + switch (builtin.cpu.arch) { + .aarch64 => asm volatile ( + \\ mov x0, sp + \\ b %[call] + : + : [call] "X" (&call), + ), + .x86_64 => asm volatile ( + \\ leaq 8(%%rsp), %%rdi + \\ jmp %[call:P] + : + : [call] "X" (&call), + ), + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + } + } + + fn call( + closure: *Group.AsyncClosure, + message: *const SwitchMessage, + ) callconv(.withStackAlign(.c, @alignOf(Group.AsyncClosure))) noreturn { + message.handle(closure.ev); + assert(closure.fiber.status.queue_next == null); + const result = closure.start(closure.contextPointer()); + const ev = closure.ev; + const group = closure.group; + const fiber = closure.fiber; + const cancel_acknowledged = fiber.cancel_protection.acknowledged; + if (result) { + assert(!cancel_acknowledged); // group task acknowledged cancelation but did not return `error.Canceled` + } else |err| switch (err) { + error.Canceled => assert(cancel_acknowledged), // group task returned `error.Canceled` but was never canceled + } + if (group.removeFiber(ev, fiber)) |awaiter| ev.queue.async(awaiter, &Fiber.@"resume"); + ev.yield(.destroy); + unreachable; // switched to dead fiber + } + }; +}; + +fn groupAsync( + userdata: ?*anyopaque, + type_erased: *Io.Group, + context: []const u8, + context_alignment: Alignment, + start: *const fn (context: *const anyopaque) Io.Cancelable!void, +) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return groupConcurrent(ev, type_erased, context, context_alignment, start) catch { + const fiber = Thread.current().currentFiber(); + const pre_acknowledged = fiber.cancel_protection.acknowledged; + const result = start(context.ptr); + const post_acknowledged = fiber.cancel_protection.acknowledged; + if (result) { + if (pre_acknowledged) { + assert(post_acknowledged); // group task called `recancel` but was not canceled + } else { + assert(!post_acknowledged); // group task acknowledged cancelation but did not return `error.Canceled` + } + } else |err| switch (err) { + // Don't swallow the cancelation: make it visible to the `Group.async` caller. + error.Canceled => { + assert(!pre_acknowledged); // group task called `recancel` but was not canceled + assert(post_acknowledged); // group task returned `error.Canceled` but was never canceled + fiber.cancel_protection.recancel(); + }, + } + }; +} + +fn groupConcurrent( + userdata: ?*anyopaque, + type_erased: *Io.Group, + context: []const u8, + context_alignment: Alignment, + start: *const fn (context: *const anyopaque) Io.Cancelable!void, +) Io.ConcurrentError!void { + assert(context_alignment.compare(.lte, Fiber.max_context_align)); // TODO + assert(context.len <= Fiber.max_context_size); // TODO + + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const group: Group = .{ .ptr = type_erased }; + const fiber = Fiber.create(ev) catch |err| switch (err) { + error.OutOfMemory => return error.ConcurrencyUnavailable, + }; + + const closure: *Group.AsyncClosure = .fromFiber(fiber); + fiber.* = .{ + .required_align = {}, + .evented = ev, + .context = switch (builtin.cpu.arch) { + .aarch64 => .{ + .sp = @intFromPtr(closure), + .fp = 0, + .pc = @intFromPtr(&Group.AsyncClosure.entry), + }, + .x86_64 => .{ + .rsp = @intFromPtr(closure) - 8, + .rbp = 0, + .rip = @intFromPtr(&Group.AsyncClosure.entry), + }, + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + }, + .await_count = 0, + .link = .{ .group = .{ .prev = null, .next = null } }, + .status = .{ .queue_next = null }, + .cancel_status = .unrequested, + .cancel_protection = .unblocked, + }; + closure.* = .{ + .ev = ev, + .group = group, + .fiber = fiber, + .start = start, + }; + @memcpy(closure.contextPointer(), context); + group.addFiber(ev, fiber); + ev.queue.async(fiber, &Fiber.@"resume"); +} + +fn groupAwait( + userdata: ?*anyopaque, + type_erased: *Io.Group, + initial_token: *anyopaque, +) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = initial_token; + ev.yield(.{ .group_await = .{ .ptr = type_erased } }); +} + +fn groupCancel(userdata: ?*anyopaque, type_erased: *Io.Group, initial_token: *anyopaque) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = initial_token; + ev.yield(.{ .group_cancel = .{ .ptr = type_erased } }); +} + +fn recancel(userdata: ?*anyopaque) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + Thread.current().currentFiber().cancel_protection.recancel(); +} + +fn swapCancelProtection(userdata: ?*anyopaque, new: Io.CancelProtection) Io.CancelProtection { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const cancel_protection = &Thread.current().currentFiber().cancel_protection; + defer cancel_protection.user = new; + return cancel_protection.user; +} + +fn checkCancel(userdata: ?*anyopaque) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const fiber = Thread.current().currentFiber(); + switch (fiber.cancel_protection.check()) { + .unblocked => { + const cancel_status = @atomicLoad(Fiber.CancelStatus, &fiber.cancel_status, .monotonic); + assert(cancel_status.awaiting == .nothing); + if (cancel_status.requested) { + @branchHint(.unlikely); + fiber.cancel_protection.acknowledge(); + return error.Canceled; + } + }, + .blocked => {}, + } +} + +const Futex = struct { + num_waiters: usize, + queue: c.dispatch.queue_t, + waiters: std.DoublyLinkedList, + + const Waiter = struct { + sleeper: Sleeper = undefined, + cancelable: Cancelable, + futex: *Futex, + node: std.DoublyLinkedList.Node = .{}, + ptr: *const u32, + expected: u32, + timeout: c.dispatch.time_t = .FOREVER, + leeway: u64, + timer: ?c.dispatch.source_t = null, + + const already_signaled: c.dispatch.source_t = @ptrFromInt(1); + + fn add(context: ?*anyopaque) callconv(.c) void { + const waiter: *Waiter = @ptrCast(@alignCast(context)); + const futex = waiter.futex; + _ = @atomicRmw(usize, &futex.num_waiters, .Add, 1, .acquire); + waiter.tryAdd() catch |err| switch (err) { + error.CancelRequested => { + wake(waiter); + assert(@atomicRmw(usize, &futex.num_waiters, .Sub, 1, .monotonic) >= 1); + }, + }; + } + + fn tryAdd(waiter: *Waiter) Cancelable.AwaitError!void { + if (@atomicLoad(u32, waiter.ptr, .monotonic) != waiter.expected) + return error.CancelRequested; + try waiter.cancelable.await(waiter.sleeper.fiber); + const futex = waiter.futex; + switch (waiter.timeout) { + .FOREVER => {}, + else => |timeout| { + const timer = c.dispatch.source_create(.TIMER, 0, .none, futex.queue) orelse { + log.warn("unable to create timer for futex timeout", .{}); + return error.CancelRequested; + }; + timer.as_object().set_context(waiter); + timer.set_event_handler(&timedOut); + timer.set_cancel_handler(&wake); + timer.set_timer(timeout, c.dispatch.TIME_FOREVER, waiter.leeway); + timer.as_object().activate(); + waiter.timer = timer; + }, + } + futex.waiters.append(&waiter.node); + } + + fn canceled(context: ?*anyopaque) callconv(.c) void { + const cancelable: *Cancelable = @ptrCast(@alignCast(context)); + cancelable.cancel = Cancelable.is_requested; + const waiter: *Waiter = @fieldParentPtr("cancelable", cancelable); + assert(@atomicRmw( + Fiber.CancelStatus, + &waiter.sleeper.fiber.cancel_status, + .Xchg, + .{ .requested = true, .awaiting = .nothing }, + .monotonic, + ) == Fiber.CancelStatus{ .requested = true, .awaiting = .fromCancelable(cancelable) }); + const futex = waiter.futex; + waiter.removeUncancelable(); + assert(@atomicRmw(usize, &futex.num_waiters, .Sub, 1, .monotonic) >= 1); + } + + fn timedOut(context: ?*anyopaque) callconv(.c) void { + const waiter: *Waiter = @ptrCast(@alignCast(context)); + const futex = waiter.futex; + waiter.remove() catch |err| switch (err) { + error.CancelRequested => return, + }; + assert(@atomicRmw(usize, &futex.num_waiters, .Sub, 1, .monotonic) >= 1); + } + + fn remove(waiter: *Waiter) Cancelable.AwaitError!void { + if (waiter.cancelable.cancel != Cancelable.is_blocked) { + @branchHint(.likely); + const cancel_status = @atomicRmw( + Fiber.CancelStatus, + &waiter.sleeper.fiber.cancel_status, + .And, + .{ .requested = true, .awaiting = .nothing }, + .monotonic, + ); + assert(cancel_status.awaiting.toCancelable() == &waiter.cancelable); + if (cancel_status.requested) return error.CancelRequested; + } + waiter.removeUncancelable(); + } + + fn removeUncancelable(waiter: *Waiter) void { + waiter.futex.waiters.remove(&waiter.node); + if (waiter.timer) |timer| timer.cancel() else wake(waiter); + } + + fn wake(context: ?*anyopaque) callconv(.c) void { + const waiter: *Waiter = @ptrCast(@alignCast(context)); + if (waiter.timer) |timer| timer.as_object().release(); + Sleeper.wake(&waiter.sleeper); + } + }; + + const Waker = struct { + sleeper: Sleeper = undefined, + futex: *Futex, + ptr: *const u32, + max_waiters: u32, + + fn remove(context: ?*anyopaque) callconv(.c) void { + const waker: *Waker = @ptrCast(@alignCast(context)); + const futex = waker.futex; + const ptr = waker.ptr; + const max_waiters = waker.max_waiters; + + var num_removed: usize = 0; + var next_node = futex.waiters.first; + while (num_removed < max_waiters) { + const waiter: *Waiter = @fieldParentPtr("node", next_node orelse break); + next_node = waiter.node.next; + if (waiter.ptr != ptr) { + @branchHint(.unlikely); + continue; + } + waiter.remove() catch |err| switch (err) { + error.CancelRequested => continue, + }; + num_removed += 1; + } + assert(@atomicRmw(usize, &futex.num_waiters, .Sub, num_removed, .monotonic) >= num_removed); + + var sleeper = waker.sleeper; + waker.* = undefined; + Sleeper.wake(&sleeper); + } + }; + + fn init(futex: *Futex, queue: c.dispatch.queue_t) error{SystemResources}!void { + futex.* = .{ + .num_waiters = 0, + .queue = c.dispatch.queue_create_with_target( + "org.ziglang.std.Io.Dispatch.Futex", + .SERIAL(), + queue, + ) orelse return error.SystemResources, + .waiters = .{}, + }; + } + + fn deinit(futex: *Futex) void { + assert(futex.num_waiters == 0 and futex.waiters.first == null and futex.waiters.last == null); + futex.queue.as_object().release(); + futex.* = undefined; + } +}; + +fn futexForAddress(ev: *Evented, address: usize) *Futex { + // Here we use Fibonacci hashing: the golden ratio can be used to evenly redistribute input + // values across a range, giving a poor, but extremely quick to compute, hash. + + // This literal is the rounded value of '2^64 / phi' (where 'phi' is the golden ratio). The + // shift then converts it to '2^b / phi', where 'b' is the pointer bit width. + const fibonacci_multiplier = 0x9E3779B97F4A7C15 >> (64 - @bitSizeOf(usize)); + const hashed = address *% fibonacci_multiplier; + comptime assert(std.math.isPowerOfTwo(ev.futexes.len)); + // The high bits of `hashed` have better entropy than the low bits. + return &ev.futexes[hashed >> @clz(ev.futexes.len - 1)]; +} + +fn select(userdata: ?*anyopaque, futures: []const *Io.AnyFuture) Io.Cancelable!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const fiber = Thread.current().currentFiber(); + var await_count: u31, var result = for (futures, 0..) |future, future_index| { + const future_fiber: *Fiber = @ptrCast(@alignCast(future)); + if (@atomicRmw( + ?*Fiber, + &future_fiber.link.awaiter, + .Xchg, + fiber, + .acq_rel, + )) |awaiter| { + assert(awaiter == Fiber.finished); + break .{ @intCast(future_index), future_index }; + } + } else result: { + const await_count: u31 = @intCast(futures.len); + ev.yield(.{ .await = 1 }); + break :result .{ await_count - 1, futures.len }; + }; + for (futures[0..result], 0..) |future, future_index| { + const future_fiber: *Fiber = @ptrCast(@alignCast(future)); + const awaiter = @atomicRmw(?*Fiber, &future_fiber.link.awaiter, .Xchg, null, .monotonic); + if (awaiter == Fiber.finished) { + @atomicStore(?*Fiber, &future_fiber.link.awaiter, Fiber.finished, .monotonic); + result = @min(future_index, result); + } else { + assert(awaiter == fiber); + await_count -= 1; + } + } + // Equivalent to `ev.yield(null, .{ .await = await_count });`, + // but avoiding a context switch in the common case. + switch (std.math.order( + @atomicRmw(i32, &fiber.await_count, .Sub, await_count, .monotonic), + await_count, + )) { + .lt => ev.yield(.{ .await = 0 }), + .eq => {}, + .gt => unreachable, + } + return result; +} + +fn futexWait( + userdata: ?*anyopaque, + ptr: *const u32, + expected: u32, + timeout: Io.Timeout, +) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const futex = ev.futexForAddress(@intFromPtr(ptr)); + var waiter: Futex.Waiter = .{ + .cancelable = .{ .queue = futex.queue, .cancel = &Futex.Waiter.canceled }, + .futex = futex, + .ptr = ptr, + .expected = expected, + .timeout = ev.timeFromTimeout(timeout), + .leeway = ev.leeway, + }; + ev.yield(.{ .futex_wait = &waiter }); + try waiter.cancelable.check(waiter.sleeper.fiber); +} + +fn futexWaitUncancelable(userdata: ?*anyopaque, ptr: *const u32, expected: u32) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const futex = ev.futexForAddress(@intFromPtr(ptr)); + var waiter: Futex.Waiter = .{ + .cancelable = .blocked, + .futex = futex, + .ptr = ptr, + .expected = expected, + .leeway = ev.leeway, + }; + ev.yield(.{ .futex_wait = &waiter }); + waiter.cancelable.check(waiter.sleeper.fiber) catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; +} + +fn futexWake(userdata: ?*anyopaque, ptr: *const u32, max_waiters: u32) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (max_waiters == 0) return; + const futex = ev.futexForAddress(@intFromPtr(ptr)); + switch (@atomicRmw(usize, &futex.num_waiters, .Add, 0, .release)) { + 0 => return, + else => { + @branchHint(.unlikely); + var waker: Futex.Waker = .{ .futex = futex, .ptr = ptr, .max_waiters = max_waiters }; + ev.yield(.{ .futex_wake = &waker }); + }, + } +} + +fn operate(userdata: ?*anyopaque, operation: Io.Operation) Io.Cancelable!Io.Operation.Result { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + switch (operation) { + .file_read_streaming => |o| return .{ + .file_read_streaming = ev.fileReadStreaming(o.file, o.data) catch |err| switch (err) { + error.Canceled => |e| return e, + else => |e| e, + }, + }, + .file_write_streaming => |o| return .{ + .file_write_streaming = ev.fileWriteStreaming( + o.file, + o.header, + o.data, + o.splat, + ) catch |err| switch (err) { + error.Canceled => |e| return e, + else => |e| e, + }, + }, + .device_io_control => |*o| return .{ .device_io_control = try deviceIoControl(o) }, + } +} + +fn fileReadStreaming(ev: *Evented, file: File, data: []const []u8) File.ReadStreamingError!usize { + if (file.flags.nonblocking) nonblocking: { + return fileReadStreamingLimit(file.handle, data, .unlimited) catch |err| switch (err) { + error.WouldBlock => break :nonblocking, + else => |e| return e, + }; + } + const source = c.dispatch.source_create( + .READ, + @bitCast(@as(isize, file.handle)), + .none, + ev.queue, + ) orelse return error.SystemResources; + source.as_object().set_context(Thread.current().currentFiber()); + source.set_event_handler(&Fiber.@"resume"); + ev.yield(.{ .activate = source.as_object() }); + const limit = source.get_data(); + source.as_object().release(); + while (true) return fileReadStreamingLimit( + file.handle, + data, + .limited(limit), + ) catch |err| switch (err) { + error.WouldBlock => { + ev.yield(.nothing); + continue; + }, + else => |e| return e, + }; +} +fn fileReadStreamingLimit( + handle: File.Handle, + data: []const []u8, + limit: Io.Limit, +) File.ReadStreamingError!usize { + var iovecs: [max_iovecs_len]iovec = undefined; + var iovlen: iovlen_t = 0; + // .nothing can mean that the write side has been closed, + // in which case the buffer still needs to be drained + var remaining = if (limit == .nothing) .unlimited else limit; + for (data) |buf| addBuf(false, &iovecs, &iovlen, &remaining, buf); + if (iovlen == 0) return 0; + while (true) { + const rc = c.readv(handle, &iovecs, iovlen); + switch (c.errno(rc)) { + .SUCCESS => return if (rc == 0) error.EndOfStream else @intCast(rc), + .INTR => continue, + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .AGAIN => return error.WouldBlock, + .BADF => |err| return errnoBug(err), // File descriptor used after closed + .IO => return error.InputOutput, + .ISDIR => return error.IsDir, + .NOBUFS => return error.SystemResources, + .NOMEM => return error.SystemResources, + .NOTCONN => return error.SocketUnconnected, + .CONNRESET => return error.ConnectionResetByPeer, + else => |err| return unexpectedErrno(err), + } + } +} + +fn fileWriteStreaming( + ev: *Evented, + file: File, + header: []const u8, + data: []const []const u8, + splat: usize, +) File.Writer.Error!usize { + if (file.flags.nonblocking) nonblocking: { + return fileWriteStreamingLimit( + file.handle, + header, + data, + splat, + .unlimited, + ) catch |err| switch (err) { + error.WouldBlock => break :nonblocking, + else => |e| return e, + }; + } + const source = c.dispatch.source_create( + .WRITE, + @bitCast(@as(isize, file.handle)), + .none, + ev.queue, + ) orelse return error.SystemResources; + source.as_object().set_context(Thread.current().currentFiber()); + source.set_event_handler(&Fiber.@"resume"); + ev.yield(.{ .activate = source.as_object() }); + const limit = source.get_data(); + source.as_object().release(); + while (true) return fileWriteStreamingLimit( + file.handle, + header, + data, + splat, + .limited(limit), + ) catch |err| switch (err) { + error.WouldBlock => { + ev.yield(.nothing); + continue; + }, + else => |e| return e, + }; +} +fn fileWriteStreamingLimit( + handle: File.Handle, + header: []const u8, + data: []const []const u8, + splat: usize, + limit: Io.Limit, +) File.Writer.Error!usize { + if (limit == .nothing) return 0; + var iovecs: [max_iovecs_len]iovec_const = undefined; + var iovlen: iovlen_t = 0; + var remaining = limit; + addBuf(true, &iovecs, &iovlen, &remaining, header); + for (data[0 .. data.len - 1]) |bytes| addBuf(true, &iovecs, &iovlen, &remaining, bytes); + const pattern = data[data.len - 1]; + var backup_buffer: [splat_buffer_size]u8 = undefined; + if (iovecs.len - iovlen != 0 and remaining != .nothing) switch (splat) { + 0 => {}, + 1 => addBuf(true, &iovecs, &iovlen, &remaining, pattern), + else => switch (pattern.len) { + 0 => {}, + 1 => { + const splat_buffer = &backup_buffer; + const memset_len = @min(splat_buffer.len, splat); + const buf = splat_buffer[0..memset_len]; + @memset(buf, pattern[0]); + addBuf(true, &iovecs, &iovlen, &remaining, buf); + var remaining_splat = splat - buf.len; + while (remaining_splat > splat_buffer.len and iovecs.len - iovlen != 0 and remaining != .nothing) { + assert(buf.len == splat_buffer.len); + addBuf(true, &iovecs, &iovlen, &remaining, splat_buffer); + remaining_splat -= splat_buffer.len; + } + addBuf(true, &iovecs, &iovlen, &remaining, splat_buffer[0..@min(remaining_splat, splat_buffer.len)]); + }, + else => for (0..@min(splat, iovecs.len - iovlen)) |_| { + if (remaining == .nothing) break; + addBuf(true, &iovecs, &iovlen, &remaining, pattern); + }, + }, + }; + if (iovlen == 0) return 0; + while (true) { + const rc = c.writev(handle, &iovecs, iovlen); + switch (c.errno(rc)) { + .SUCCESS => return @intCast(rc), + .INTR => continue, + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .AGAIN => return error.WouldBlock, + .BADF => return error.NotOpenForWriting, // Can be a race condition. + .DESTADDRREQ => |err| return errnoBug(err), // `connect` was never called. + .DQUOT => return error.DiskQuota, + .FBIG => return error.FileTooBig, + .IO => return error.InputOutput, + .NOSPC => return error.NoSpaceLeft, + .PERM => return error.PermissionDenied, + .PIPE => return error.BrokenPipe, + .CONNRESET => |err| return errnoBug(err), // Not a socket handle. + .BUSY => return error.DeviceBusy, + else => |err| return unexpectedErrno(err), + } + } +} + +fn deviceIoControl(o: *const Io.Operation.DeviceIoControl) Io.Cancelable!i32 { + while (true) { + const rc = c.ioctl(o.file.handle, @bitCast(o.code), @intFromPtr(o.arg)); + switch (c.errno(rc)) { + .SUCCESS => return rc, + .INTR => {}, + else => |err| return -@as(i32, @intFromEnum(err)), + } + } +} + +const BatchWaiter = struct { + sleeper: Sleeper, + queue: c.dispatch.queue_t, + timer: ?c.dispatch.source_t = null, + + const already_signaled: c.dispatch.source_t = @ptrFromInt(1); + + fn signal(context: ?*anyopaque) callconv(.c) void { + const waiter: *BatchWaiter = @ptrCast(@alignCast(context)); + if (waiter.timer) |timer| { + if (timer != already_signaled) timer.cancel(); + } else { + waiter.timer = already_signaled; + waiter.queue.async(waiter, &@"suspend"); + } + } + + fn @"suspend"(context: ?*anyopaque) callconv(.c) void { + const waiter: *BatchWaiter = @ptrCast(@alignCast(context)); + if (waiter.timer) |timer| if (timer != already_signaled) timer.as_object().release(); + waiter.queue.as_object().@"suspend"(); + waiter.wake(); + } + + fn wake(waiter: *BatchWaiter) void { + var sleeper = waiter.sleeper; + waiter.* = undefined; + Sleeper.wake(&sleeper); + } +}; + +fn batchAwaitAsync(userdata: ?*anyopaque, batch: *Io.Batch) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const queue = ev.batchDrainSubmitted(batch, false) catch |err| switch (err) { + error.ConcurrencyUnavailable => unreachable, // passed concurrency=false + error.Canceled => |e| return e, + } orelse return; + if (batch.pending.head == .none) return; + var waiter: BatchWaiter = .{ + .sleeper = .init(ev.queue, Thread.current().currentFiber()), + .queue = queue, + }; + if (batch.completed.head != .none) BatchWaiter.signal(&waiter); + queue.as_object().set_context(&waiter); + ev.yield(.{ .@"resume" = queue.as_object() }); +} + +fn batchAwaitConcurrent( + userdata: ?*anyopaque, + batch: *Io.Batch, + timeout: Io.Timeout, +) Io.Batch.AwaitConcurrentError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const queue = try ev.batchDrainSubmitted(batch, true) orelse return; + if (batch.pending.head == .none) return; + var waiter: BatchWaiter = .{ + .sleeper = .init(ev.queue, Thread.current().currentFiber()), + .queue = queue, + }; + if (batch.completed.head == .none) switch (timeout) { + .none => {}, + else => { + const timer = c.dispatch.source_create(.TIMER, 0, .none, queue) orelse + return error.ConcurrencyUnavailable; + assert(timer != BatchWaiter.already_signaled); + timer.as_object().set_context(&waiter); + timer.set_event_handler(&BatchWaiter.signal); + timer.set_cancel_handler(&BatchWaiter.@"suspend"); + timer.set_timer(ev.timeFromTimeout(timeout), c.dispatch.TIME_FOREVER, ev.leeway); + timer.as_object().activate(); + waiter.timer = timer; + }, + } else BatchWaiter.signal(&waiter); + queue.as_object().set_context(&waiter); + ev.yield(.{ .@"resume" = queue.as_object() }); +} + +fn batchCancel(userdata: ?*anyopaque, batch: *Io.Batch) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var index = batch.pending.head; + while (index != .none) { + const storage = &batch.storage[index.toIndex()]; + const pending = &storage.pending; + const operation_userdata: *BatchOperationUserdata = .fromErased(&pending.userdata); + assert(operation_userdata.batch == batch); + operation_userdata.source.cancel(); + } + const queue: c.dispatch.queue_t = @ptrCast(batch.userdata orelse return); + if (batch.pending.head != .none) { + var waiter: BatchWaiter = .{ + .sleeper = .init(ev.queue, Thread.current().currentFiber()), + .queue = queue, + .timer = BatchWaiter.already_signaled, + }; + if (batch.pending.head == .none) queue.async(&waiter, &BatchWaiter.signal); + queue.as_object().set_context(&waiter); + ev.yield(.{ .@"resume" = queue.as_object() }); + } + batch.userdata = null; +} + +const BatchOperationUserdata = extern struct { + batch: *Io.Batch, + source: c.dispatch.source_t, + operation: extern union { + file_read_streaming: extern struct { + data_ptr: [*]const []u8, + data_len: usize, + }, + file_write_streaming: extern struct { + header_ptr: [*]const u8, + header_len: usize, + data_ptr: [*]const []const u8, + data_len: usize, + splat: usize, + + fn header(operation: *const @This()) []const u8 { + return operation.header_ptr[0..operation.header_len]; + } + + fn data(operation: *const @This()) []const []const u8 { + return operation.data_ptr[0..operation.data_len]; + } + }, + }, + + const Erased = Io.Operation.Storage.Pending.Userdata; + + comptime { + assert(@sizeOf(BatchOperationUserdata) <= @sizeOf(Erased)); + } + + fn toErased(userdata: *BatchOperationUserdata) *Erased { + return @ptrCast(userdata); + } + + fn fromErased(erased: *Erased) *BatchOperationUserdata { + return @ptrCast(erased); + } +}; + +/// If `concurrency` is false, `error.ConcurrencyUnavailable` is unreachable. +fn batchDrainSubmitted( + ev: *Evented, + batch: *Io.Batch, + concurrency: bool, +) (Io.ConcurrentError || Io.Cancelable)!?c.dispatch.queue_t { + var index = batch.submitted.head; + if (index == .none) return @ptrCast(batch.userdata); + errdefer batch.submitted.head = index; + const maybe_queue: ?c.dispatch.queue_t = if (batch.userdata) |batch_userdata| + @ptrCast(batch_userdata) + else maybe_queue: { + const queue = c.dispatch.queue_create_with_target( + "org.ziglang.std.Io.Dispatch.Batch", + .SERIAL(), + ev.queue, + ) orelse if (concurrency) return error.ConcurrencyUnavailable else break :maybe_queue null; + queue.as_object().@"suspend"(); + batch.userdata = queue; + break :maybe_queue queue; + }; + while (index != .none) { + const storage = &batch.storage[index.toIndex()]; + const next_index = storage.submission.node.next; + if (@as(?Io.Operation.Result, result: { + if (maybe_queue) |queue| switch (storage.submission.operation) { + .file_read_streaming => |operation| { + const data = for (operation.data, 0..) |buffer, data_index| { + if (buffer.len > 0) break operation.data[data_index..]; + } else break :result .{ .file_read_streaming = 0 }; + const source = c.dispatch.source_create( + .READ, + @bitCast(@as(isize, operation.file.handle)), + .none, + queue, + ) orelse break :result .{ .file_read_streaming = error.SystemResources }; + storage.* = .{ .pending = .{ + .node = .{ .prev = batch.pending.tail, .next = .none }, + .tag = .file_read_streaming, + .userdata = undefined, + } }; + const operation_userdata: *BatchOperationUserdata = + .fromErased(&storage.pending.userdata); + operation_userdata.* = .{ + .batch = batch, + .source = source, + .operation = .{ .file_read_streaming = .{ + .data_ptr = data.ptr, + .data_len = data.len, + } }, + }; + source.as_object().set_context(storage); + source.set_event_handler(&batchSourceEvent); + source.set_cancel_handler(&batchSourceCancel); + source.as_object().activate(); + break :result null; + }, + .file_write_streaming => |operation| { + const data = for (operation.data, 0..) |buffer, data_index| { + if (buffer.len > 0) break operation.data[data_index..]; + } else if (operation.header.len > 0) + operation.data[0..1] + else + break :result .{ .file_write_streaming = 0 }; + const source = c.dispatch.source_create( + .WRITE, + @bitCast(@as(isize, operation.file.handle)), + .none, + queue, + ) orelse break :result .{ .file_write_streaming = error.SystemResources }; + storage.* = .{ .pending = .{ + .node = .{ .prev = batch.pending.tail, .next = .none }, + .tag = .file_write_streaming, + .userdata = undefined, + } }; + const operation_userdata: *BatchOperationUserdata = + .fromErased(&storage.pending.userdata); + operation_userdata.* = .{ + .batch = batch, + .source = source, + .operation = .{ .file_write_streaming = .{ + .header_ptr = operation.header.ptr, + .header_len = operation.header.len, + .data_ptr = data.ptr, + .data_len = data.len, + .splat = operation.splat, + } }, + }; + source.as_object().set_context(storage); + source.set_event_handler(&batchSourceEvent); + source.set_cancel_handler(&batchSourceCancel); + source.as_object().activate(); + break :result null; + }, + .device_io_control => {}, + }; + if (concurrency) return error.ConcurrencyUnavailable; + break :result try operate(ev, storage.submission.operation); + })) |result| { + switch (batch.completed.tail) { + .none => batch.completed.head = index, + else => |tail_index| batch.storage[tail_index.toIndex()].completion.node.next = index, + } + batch.completed.tail = index; + storage.* = .{ .completion = .{ .node = .{ .next = .none }, .result = result } }; + } else { + switch (batch.pending.tail) { + .none => batch.pending.head = index, + else => |tail_index| batch.storage[tail_index.toIndex()].pending.node.next = index, + } + batch.pending.tail = index; + } + index = next_index; + } + batch.submitted = .{ .head = .none, .tail = .none }; + return maybe_queue; +} + +fn batchSourceEvent(context: ?*anyopaque) callconv(.c) void { + const storage: *Io.Operation.Storage = @ptrCast(@alignCast(context)); + const pending = &storage.pending; + const operation_userdata: *BatchOperationUserdata = .fromErased(&pending.userdata); + const batch = operation_userdata.batch; + const source = operation_userdata.source; + const index: Io.Operation.OptionalIndex = .fromIndex(storage - batch.storage.ptr); + const result: Io.Operation.Result = result: switch (pending.tag) { + .file_read_streaming => { + const operation = &operation_userdata.operation.file_read_streaming; + break :result .{ .file_read_streaming = fileReadStreamingLimit( + @intCast(source.get_handle()), + operation.data_ptr[0..operation.data_len], + .limited(source.get_data()), + ) catch |err| switch (err) { + error.Canceled => return Thread.current().currentFiber().cancel_protection.recancel(), + error.WouldBlock => return, + else => |e| e, + } }; + }, + .file_write_streaming => { + const operation = &operation_userdata.operation.file_write_streaming; + break :result .{ .file_write_streaming = fileWriteStreamingLimit( + @intCast(source.get_handle()), + operation.header_ptr[0..operation.header_len], + operation.data_ptr[0..operation.data_len], + operation.splat, + .limited(source.get_data()), + ) catch |err| switch (err) { + error.Canceled => return Thread.current().currentFiber().cancel_protection.recancel(), + error.WouldBlock => return, + else => |e| e, + } }; + }, + .device_io_control => unreachable, + }; + + switch (pending.node.prev) { + .none => batch.pending.head = pending.node.next, + else => |prev_index| batch.storage[prev_index.toIndex()].pending.node.next = pending.node.next, + } + switch (pending.node.next) { + .none => batch.pending.tail = pending.node.prev, + else => |next_index| batch.storage[next_index.toIndex()].pending.node.prev = pending.node.prev, + } + + switch (batch.completed.tail) { + .none => batch.completed.head = index, + else => |tail_index| batch.storage[tail_index.toIndex()].completion.node.next = index, + } + storage.* = .{ .completion = .{ .node = .{ .next = .none }, .result = result } }; + batch.completed.tail = index; + + source.as_object().release(); + const queue: c.dispatch.queue_t = @ptrCast(batch.userdata); + const waiter: *BatchWaiter = @ptrCast(@alignCast(queue.as_object().get_context())); + BatchWaiter.signal(waiter); +} + +fn batchSourceCancel(context: ?*anyopaque) callconv(.c) void { + const storage: *Io.Operation.Storage = @ptrCast(@alignCast(context)); + const pending = &storage.pending; + const operation_userdata: *BatchOperationUserdata = .fromErased(&pending.userdata); + const batch = operation_userdata.batch; + const source = operation_userdata.source; + const index: Io.Operation.OptionalIndex = .fromIndex(storage - batch.storage.ptr); + + switch (pending.node.prev) { + .none => batch.pending.head = pending.node.next, + else => |prev_index| batch.storage[prev_index.toIndex()].pending.node.next = pending.node.next, + } + switch (pending.node.next) { + .none => batch.pending.tail = pending.node.prev, + else => |next_index| batch.storage[next_index.toIndex()].pending.node.prev = pending.node.prev, + } + + const tail_index = batch.unused.tail; + switch (tail_index) { + .none => batch.unused.head = index, + else => batch.storage[tail_index.toIndex()].unused.next = index, + } + storage.* = .{ .unused = .{ .prev = tail_index, .next = .none } }; + batch.unused.tail = index; + + source.as_object().release(); + if (batch.pending.head != .none) return; + const queue: c.dispatch.queue_t = @ptrCast(batch.userdata); + const waiter: *BatchWaiter = @ptrCast(@alignCast(queue.as_object().get_context())); + queue.as_object().release(); + waiter.wake(); +} + +fn dirCreateDir( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + permissions: Dir.Permissions, +) Dir.CreateDirError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + while (true) { + switch (c.errno(c.mkdirat(dir.handle, sub_path_posix, permissions.toMode()))) { + .SUCCESS => return, + .INTR => {}, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .DQUOT => return error.DiskQuota, + .EXIST => return error.PathAlreadyExists, + .LOOP => return error.SymLinkLoop, + .MLINK => return error.LinkQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .NOTDIR => return error.NotDir, + .ROFS => return error.ReadOnlyFileSystem, + .ILSEQ => return error.BadPathName, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .FAULT => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirCreateDirPath( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + permissions: Dir.Permissions, +) Dir.CreateDirPathError!Dir.CreatePathStatus { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var it = Dir.path.componentIterator(sub_path); + var status: Dir.CreatePathStatus = .existed; + var component = it.last() orelse return error.BadPathName; + while (true) { + if (dirCreateDir(ev, dir, component.path, permissions)) |_| { + status = .created; + } else |err| switch (err) { + error.PathAlreadyExists => { + // It is important to return an error if it's not a directory + // because otherwise a dangling symlink could cause an infinite + // loop. + const fstat = try dirStatFile(ev, dir, component.path, .{}); + if (fstat.kind != .directory) return error.NotDir; + }, + error.FileNotFound => |e| { + component = it.previous() orelse return e; + continue; + }, + else => |e| return e, + } + component = it.next() orelse return status; + } +} + +fn dirCreateDirPathOpen( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + permissions: Dir.Permissions, + options: Dir.OpenOptions, +) Dir.CreateDirPathOpenError!Dir { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return dirOpenDir(ev, dir, sub_path, options) catch |err| switch (err) { + error.FileNotFound => { + _ = try dirCreateDirPath(ev, dir, sub_path, permissions); + return dirOpenDir(ev, dir, sub_path, options); + }, + else => |e| return e, + }; +} + +fn dirOpenDir( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + options: Dir.OpenOptions, +) Dir.OpenError!Dir { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + const flags: c.O = .{ + .ACCMODE = .RDONLY, + .NOFOLLOW = !options.follow_symlinks, + .DIRECTORY = true, + .CLOEXEC = true, + }; + + while (true) { + const rc = c.openat(dir.handle, sub_path_posix, flags); + switch (c.errno(rc)) { + .SUCCESS => return .{ .handle = @intCast(rc) }, + .INTR => {}, + .INVAL => return error.BadPathName, + .ACCES => return error.AccessDenied, + .LOOP => return error.SymLinkLoop, + .MFILE => return error.ProcessFdQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NFILE => return error.SystemFdQuotaExceeded, + .NODEV => return error.NoDevice, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.NotDir, + .PERM => return error.PermissionDenied, + .NXIO => return error.NoDevice, + .ILSEQ => return error.BadPathName, + .FAULT => |err| return errnoBug(err), + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .BUSY => |err| return errnoBug(err), // O_EXCL not passed + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirStat(userdata: ?*anyopaque, dir: Dir) Dir.StatError!Dir.Stat { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return fileStat(ev, .{ + .handle = dir.handle, + .flags = .{ .nonblocking = false }, + }); +} + +fn dirStatFile( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + options: Dir.StatFileOptions, +) Dir.StatFileError!File.Stat { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + const flags: u32 = if (options.follow_symlinks) 0 else c.AT.SYMLINK_NOFOLLOW; + + while (true) { + var stat = std.mem.zeroes(c.Stat); + switch (c.errno(c.fstatat(dir.handle, sub_path_posix, &stat, flags))) { + .SUCCESS => return statFromPosix(&stat), + .INTR => {}, + .INVAL => |err| return errnoBug(err), + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .NOMEM => return error.SystemResources, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .FAULT => |err| return errnoBug(err), + .NAMETOOLONG => return error.NameTooLong, + .LOOP => return error.SymLinkLoop, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.FileNotFound, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirAccess( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + options: Dir.AccessOptions, +) Dir.AccessError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + const flags: u32 = if (options.follow_symlinks) 0 else c.AT.SYMLINK_NOFOLLOW; + + const mode: u32 = + @as(u32, if (options.read) c.R_OK else 0) | + @as(u32, if (options.write) c.W_OK else 0) | + @as(u32, if (options.execute) c.X_OK else 0); + + while (true) switch (c.errno(c.faccessat(dir.handle, sub_path_posix, mode, flags))) { + .SUCCESS => return, + .INTR => {}, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + .LOOP => return error.SymLinkLoop, + .TXTBSY => return error.FileBusy, + .NOTDIR => return error.FileNotFound, + .NOENT => return error.FileNotFound, + .NAMETOOLONG => return error.NameTooLong, + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .IO => return error.InputOutput, + .NOMEM => return error.SystemResources, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + }; +} + +fn dirCreateFile( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + flags: File.CreateFlags, +) File.OpenError!File { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + const os_flags: c.O = .{ + .ACCMODE = if (flags.read) .RDWR else .WRONLY, + .NONBLOCK = flags.lock == .none or flags.lock_nonblocking, + .SHLOCK = flags.lock == .shared, + .EXLOCK = flags.lock == .exclusive, + .CREAT = true, + .TRUNC = flags.truncate, + .EXCL = flags.exclusive, + .CLOEXEC = true, + }; + + const fd: c.fd_t = while (true) { + const rc = c.openat(dir.handle, sub_path_posix, os_flags, flags.permissions.toMode()); + switch (c.errno(rc)) { + .SUCCESS => break @intCast(rc), + .INTR => {}, + .FAULT => |err| return errnoBug(err), + .INVAL => return error.BadPathName, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .ACCES => return error.AccessDenied, + .FBIG => return error.FileTooBig, + .OVERFLOW => return error.FileTooBig, + .ISDIR => return error.IsDir, + .LOOP => return error.SymLinkLoop, + .MFILE => return error.ProcessFdQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NFILE => return error.SystemFdQuotaExceeded, + .NODEV => return error.NoDevice, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .NOTDIR => return error.NotDir, + .PERM => return error.PermissionDenied, + .EXIST => return error.PathAlreadyExists, + .BUSY => return error.DeviceBusy, + .OPNOTSUPP => return error.FileLocksUnsupported, + .AGAIN => return error.WouldBlock, + .TXTBSY => return error.FileBusy, + .NXIO => return error.NoDevice, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + }; + errdefer closeFd(fd); + + return .{ + .handle = fd, + .flags = .{ .nonblocking = os_flags.NONBLOCK }, + }; +} + +fn dirCreateFileAtomic( + userdata: ?*anyopaque, + dir: Dir, + dest_path: []const u8, + options: Dir.CreateFileAtomicOptions, +) Dir.CreateFileAtomicError!File.Atomic { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (Dir.path.dirname(dest_path)) |dirname| { + const new_dir = if (options.make_path) + dirCreateDirPathOpen(ev, dir, dirname, .default_dir, .{}) catch |err| switch (err) { + // None of these make sense in this context. + error.IsDir, + error.Streaming, + error.DiskQuota, + error.PathAlreadyExists, + error.LinkQuotaExceeded, + error.PipeBusy, + error.FileTooBig, + error.FileLocksUnsupported, + error.DeviceBusy, + => return error.Unexpected, + + else => |e| return e, + } + else + try dirOpenDir(ev, dir, dirname, .{}); + return ev.atomicFileInit(Dir.path.basename(dest_path), options.permissions, new_dir, true); + } + return ev.atomicFileInit(dest_path, options.permissions, dir, false); +} + +fn atomicFileInit( + ev: *Evented, + dest_basename: []const u8, + permissions: File.Permissions, + dir: Dir, + close_dir_on_deinit: bool, +) Dir.CreateFileAtomicError!File.Atomic { + while (true) { + var random_integer: u64 = undefined; + random(ev, @ptrCast(&random_integer)); + const tmp_sub_path = std.fmt.hex(random_integer); + const file = dirCreateFile(ev, dir, &tmp_sub_path, .{ + .permissions = permissions, + .exclusive = true, + }) catch |err| switch (err) { + error.PathAlreadyExists => continue, + error.DeviceBusy => continue, + error.FileBusy => continue, + + error.IsDir => return error.Unexpected, // No path components. + error.FileTooBig => return error.Unexpected, // Creating, not opening. + error.FileLocksUnsupported => return error.Unexpected, // Not asking for locks. + error.PipeBusy => return error.Unexpected, // Not opening a pipe. + + else => |e| return e, + }; + return .{ + .file = file, + .file_basename_hex = random_integer, + .dest_sub_path = dest_basename, + .file_open = true, + .file_exists = true, + .close_dir_on_deinit = close_dir_on_deinit, + .dir = dir, + }; + } +} + +fn dirOpenFile( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + flags: File.OpenFlags, +) File.OpenError!File { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + const os_flags: c.O = .{ + .ACCMODE = switch (flags.mode) { + .read_only => .RDONLY, + .write_only => .WRONLY, + .read_write => .RDWR, + }, + .NONBLOCK = flags.lock == .none or flags.lock_nonblocking, + .SHLOCK = flags.lock == .shared, + .EXLOCK = flags.lock == .exclusive, + .NOFOLLOW = !flags.follow_symlinks, + .NOCTTY = !flags.allow_ctty, + .CLOEXEC = true, + }; + + const fd: c.fd_t = while (true) { + const rc = c.openat(dir.handle, sub_path_posix, os_flags); + switch (c.errno(rc)) { + .SUCCESS => break @intCast(rc), + .INTR => {}, + .FAULT => |err| return errnoBug(err), + .INVAL => return error.BadPathName, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .ACCES => return error.AccessDenied, + .FBIG => return error.FileTooBig, + .OVERFLOW => return error.FileTooBig, + .ISDIR => return error.IsDir, + .LOOP => return error.SymLinkLoop, + .MFILE => return error.ProcessFdQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NFILE => return error.SystemFdQuotaExceeded, + .NODEV => return error.NoDevice, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .NOTDIR => return error.NotDir, + .PERM => return error.PermissionDenied, + .EXIST => return error.PathAlreadyExists, + .BUSY => return error.DeviceBusy, + .OPNOTSUPP => return error.FileLocksUnsupported, + .AGAIN => return error.WouldBlock, + .TXTBSY => return error.FileBusy, + .NXIO => return error.NoDevice, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + }; + errdefer closeFd(fd); + + if (!flags.allow_directory) { + const is_dir = is_dir: { + const stat = fileStat(ev, .{ + .handle = fd, + .flags = .{ .nonblocking = false }, + }) catch |err| switch (err) { + // The directory-ness is either unknown or unknowable + error.Streaming => break :is_dir false, + else => |e| return e, + }; + break :is_dir stat.kind == .directory; + }; + if (is_dir) return error.IsDir; + } + + return .{ + .handle = fd, + .flags = .{ .nonblocking = os_flags.NONBLOCK }, + }; +} + +fn dirClose(userdata: ?*anyopaque, dirs: []const Dir) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + for (dirs) |dir| closeFd(dir.handle); +} + +fn dirRead(userdata: ?*anyopaque, dr: *Dir.Reader, buffer: []Dir.Entry) Dir.Reader.Error!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const Header = extern struct { + seek: i64, + }; + const header: *Header = @ptrCast(dr.buffer.ptr); + const header_end: usize = @sizeOf(Header); + if (dr.index < header_end) { + // Initialize header. + dr.index = header_end; + dr.end = header_end; + header.* = .{ .seek = 0 }; + } + var buffer_index: usize = 0; + while (buffer.len - buffer_index != 0) { + if (dr.end - dr.index == 0) { + // Refill the buffer, unless we've already created references to + // buffered data. + if (buffer_index != 0) break; + if (dr.state == .reset) { + ev.lseek(dr.dir.handle, 0, c.SEEK.SET) catch |err| switch (err) { + error.Unseekable => return error.Unexpected, + else => |e| return e, + }; + dr.state = .reading; + } + const dents_buffer = dr.buffer[header_end..]; + const n: usize = while (true) { + const rc = c.getdirentries(dr.dir.handle, dents_buffer.ptr, dents_buffer.len, &header.seek); + switch (c.errno(rc)) { + .SUCCESS => break @intCast(rc), + .INTR => {}, + .BADF => |err| return errnoBug(err), // Dir is invalid or was opened without iteration ability. + .FAULT => |err| return errnoBug(err), + .NOTDIR => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + }; + if (n == 0) { + dr.state = .finished; + return 0; + } + dr.index = header_end; + dr.end = header_end + n; + } + const darwin_entry = @as(*align(1) c.dirent, @ptrCast(&dr.buffer[dr.index])); + const next_index = dr.index + darwin_entry.reclen; + dr.index = next_index; + + const name = @as([*]u8, @ptrCast(&darwin_entry.name))[0..darwin_entry.namlen]; + if (std.mem.eql(u8, name, ".") or std.mem.eql(u8, name, "..") or (darwin_entry.ino == 0)) + continue; + + const entry_kind: File.Kind = switch (darwin_entry.type) { + c.DT.BLK => .block_device, + c.DT.CHR => .character_device, + c.DT.DIR => .directory, + c.DT.FIFO => .named_pipe, + c.DT.LNK => .sym_link, + c.DT.REG => .file, + c.DT.SOCK => .unix_domain_socket, + c.DT.WHT => .whiteout, + else => .unknown, + }; + buffer[buffer_index] = .{ + .name = name, + .kind = entry_kind, + .inode = darwin_entry.ino, + }; + buffer_index += 1; + } + return buffer_index; +} + +fn dirRealPath(userdata: ?*anyopaque, dir: Dir, out_buffer: []u8) Dir.RealPathError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return ev.realPath(dir.handle, out_buffer); +} + +fn realPath(ev: *Evented, fd: c.fd_t, out_buffer: []u8) File.RealPathError!usize { + _ = ev; + var buffer: [c.PATH_MAX]u8 = undefined; + @memset(&buffer, 0); + while (true) { + switch (c.errno(c.fcntl(fd, c.F.GETPATH, &buffer))) { + .SUCCESS => break, + .INTR => {}, + .ACCES => return error.AccessDenied, + .BADF => return error.FileNotFound, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NameTooLong, + .RANGE => return error.NameTooLong, + else => |err| return unexpectedErrno(err), + } + } + const n = std.mem.indexOfScalar(u8, &buffer, 0) orelse buffer.len; + if (n > out_buffer.len) return error.NameTooLong; + @memcpy(out_buffer[0..n], buffer[0..n]); + return n; +} + +fn dirRealPathFile( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + out_buffer: []u8, +) Dir.RealPathFileError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + if (dir.handle == c.AT.FDCWD) { + if (out_buffer.len < c.PATH_MAX) return error.NameTooLong; + while (true) { + if (c.realpath(sub_path_posix, out_buffer.ptr)) |redundant_pointer| { + assert(redundant_pointer == out_buffer.ptr); + return std.mem.indexOfScalar(u8, out_buffer, 0) orelse out_buffer.len; + } + const err: c.E = @enumFromInt(c._errno().*); + switch (err) { + .INTR => {}, + .INVAL => return errnoBug(err), + .BADF => return errnoBug(err), + .FAULT => return errnoBug(err), + .ACCES => return error.AccessDenied, + .NOENT => return error.FileNotFound, + .OPNOTSUPP => return error.OperationUnsupported, + .NOTDIR => return error.NotDir, + .NAMETOOLONG => return error.NameTooLong, + .LOOP => return error.SymLinkLoop, + .IO => return error.InputOutput, + else => return unexpectedErrno(err), + } + } + } + + const os_flags: c.O = .{ + .NONBLOCK = true, + .CLOEXEC = true, + }; + + const fd: c.fd_t = while (true) { + const rc = c.openat(dir.handle, sub_path_posix, os_flags); + switch (c.errno(rc)) { + .SUCCESS => break @intCast(rc), + .INTR => {}, + .FAULT => |err| return errnoBug(err), + .INVAL => return error.BadPathName, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .ACCES => return error.AccessDenied, + .FBIG => return error.FileTooBig, + .OVERFLOW => return error.FileTooBig, + .ISDIR => return error.IsDir, + .LOOP => return error.SymLinkLoop, + .MFILE => return error.ProcessFdQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NFILE => return error.SystemFdQuotaExceeded, + .NODEV => return error.NoDevice, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .NOTDIR => return error.NotDir, + .PERM => return error.PermissionDenied, + .EXIST => return error.PathAlreadyExists, + .BUSY => return error.DeviceBusy, + .NXIO => return error.NoDevice, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + }; + defer closeFd(fd); + return ev.realPath(fd, out_buffer); +} + +fn dirDeleteFile(userdata: ?*anyopaque, dir: Dir, sub_path: []const u8) Dir.DeleteFileError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + while (true) switch (c.errno(c.unlinkat(dir.handle, sub_path_posix, 0))) { + .SUCCESS => return, + .INTR => {}, + // Some systems return permission errors when trying to delete a + // directory, so we need to handle that case specifically and + // translate the error. + .PERM => { + // Don't follow symlinks to match unlinkat (which acts on symlinks rather than follows them). + var st = std.mem.zeroes(c.Stat); + while (true) switch (c.errno(c.fstatat( + dir.handle, + sub_path_posix, + &st, + c.AT.SYMLINK_NOFOLLOW, + ))) { + .SUCCESS => break, + .INTR => {}, + else => return error.PermissionDenied, + }; + if (st.mode & c.S.IFMT == c.S.IFDIR) return error.IsDir else return error.PermissionDenied; + }, + .ACCES => return error.AccessDenied, + .BUSY => return error.FileBusy, + .FAULT => |err| return errnoBug(err), + .IO => return error.FileSystem, + .ISDIR => return error.IsDir, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + .ROFS => return error.ReadOnlyFileSystem, + .EXIST => |err| return errnoBug(err), + .NOTEMPTY => |err| return errnoBug(err), // Not passing AT.REMOVEDIR + .ILSEQ => return error.BadPathName, + .INVAL => |err| return errnoBug(err), // invalid flags, or pathname has . as last component + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + else => |err| return unexpectedErrno(err), + }; +} + +fn dirDeleteDir(userdata: ?*anyopaque, dir: Dir, sub_path: []const u8) Dir.DeleteDirError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + while (true) switch (c.errno(c.unlinkat(dir.handle, sub_path_posix, c.AT.REMOVEDIR))) { + .SUCCESS => return, + .INTR => {}, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .BUSY => return error.FileBusy, + .FAULT => |err| return errnoBug(err), + .IO => return error.FileSystem, + .ISDIR => |err| return errnoBug(err), + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + .ROFS => return error.ReadOnlyFileSystem, + .EXIST => |err| return errnoBug(err), + .NOTEMPTY => return error.DirNotEmpty, + .ILSEQ => return error.BadPathName, + .INVAL => |err| return errnoBug(err), // invalid flags, or pathname has . as last component + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + else => |err| return unexpectedErrno(err), + }; +} + +fn dirRename( + userdata: ?*anyopaque, + old_dir: Dir, + old_sub_path: []const u8, + new_dir: Dir, + new_sub_path: []const u8, +) Dir.RenameError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var old_path_buffer: [c.PATH_MAX]u8 = undefined; + var new_path_buffer: [c.PATH_MAX]u8 = undefined; + + const old_sub_path_posix = try pathToPosix(old_sub_path, &old_path_buffer); + const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); + + while (true) switch (c.errno(c.renameat(old_dir.handle, old_sub_path_posix, new_dir.handle, new_sub_path_posix))) { + .SUCCESS => return, + .INTR => {}, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .BUSY => return error.FileBusy, + .DQUOT => return error.DiskQuota, + .ISDIR => return error.IsDir, + .IO => return error.HardwareFailure, + .LOOP => return error.SymLinkLoop, + .MLINK => return error.LinkQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .EXIST => return error.DirNotEmpty, + .NOTEMPTY => return error.DirNotEmpty, + .ROFS => return error.ReadOnlyFileSystem, + .XDEV => return error.CrossDevice, + .ILSEQ => return error.BadPathName, + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + }; +} + +fn dirRenamePreserve( + userdata: ?*anyopaque, + old_dir: Dir, + old_sub_path: []const u8, + new_dir: Dir, + new_sub_path: []const u8, +) Dir.RenamePreserveError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + // Make a hard link then delete the original. + try dirHardLink(ev, old_dir, old_sub_path, new_dir, new_sub_path, .{ .follow_symlinks = false }); + const prev = swapCancelProtection(ev, .blocked); + defer _ = swapCancelProtection(ev, prev); + dirDeleteFile(ev, old_dir, old_sub_path) catch {}; +} + +fn dirSymLink( + userdata: ?*anyopaque, + dir: Dir, + target_path: []const u8, + sym_link_path: []const u8, + flags: Dir.SymLinkFlags, +) Dir.SymLinkError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = flags; + + var target_path_buffer: [c.PATH_MAX]u8 = undefined; + var sym_link_path_buffer: [c.PATH_MAX]u8 = undefined; + + const target_path_posix = try pathToPosix(target_path, &target_path_buffer); + const sym_link_path_posix = try pathToPosix(sym_link_path, &sym_link_path_buffer); + + while (true) switch (c.errno(c.symlinkat(target_path_posix, dir.handle, sym_link_path_posix))) { + .SUCCESS => return, + .INTR => {}, + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .DQUOT => return error.DiskQuota, + .EXIST => return error.PathAlreadyExists, + .IO => return error.FileSystem, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .ROFS => return error.ReadOnlyFileSystem, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + }; +} + +fn dirReadLink( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + buffer: []u8, +) Dir.ReadLinkError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + var sub_path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &sub_path_buffer); + while (true) { + const rc = c.readlinkat(dir.handle, sub_path_posix, buffer.ptr, buffer.len); + switch (c.errno(rc)) { + .SUCCESS => return @intCast(rc), + .INTR => {}, + .ACCES => return error.AccessDenied, + .FAULT => |err| return errnoBug(err), + .INVAL => return error.NotLink, + .IO => return error.FileSystem, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.NotDir, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirSetOwner( + userdata: ?*anyopaque, + dir: Dir, + owner: ?File.Uid, + group: ?File.Gid, +) Dir.SetOwnerError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + return fchown(dir.handle, owner, group); +} + +fn fchown(fd: c.fd_t, owner: ?File.Uid, group: ?File.Gid) File.SetOwnerError!void { + const uid = owner orelse std.math.maxInt(c.uid_t); + const gid = group orelse std.math.maxInt(c.gid_t); + while (true) switch (c.errno(c.fchown(fd, uid, gid))) { + .SUCCESS => return, + .INTR => {}, + .BADF => |err| return errnoBug(err), // likely fd refers to directory opened without `Dir.OpenOptions.iterate` + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .IO => return error.InputOutput, + .LOOP => return error.SymLinkLoop, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.FileNotFound, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + else => |err| return unexpectedErrno(err), + }; +} + +fn dirSetFileOwner( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + owner: ?File.Uid, + group: ?File.Gid, + options: Dir.SetFileOwnerOptions, +) Dir.SetFileOwnerError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + _ = ev; + while (true) switch (c.errno(c.fchownat( + dir.handle, + sub_path_posix, + owner orelse std.math.maxInt(c.uid_t), + group orelse std.math.maxInt(c.gid_t), + if (options.follow_symlinks) 0 else c.AT.SYMLINK_NOFOLLOW, + ))) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), // likely fd refers to directory opened without `Dir.OpenOptions.iterate` + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .IO => return error.InputOutput, + .LOOP => return error.SymLinkLoop, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.FileNotFound, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + else => |err| return unexpectedErrno(err), + }; +} + +fn dirSetPermissions( + userdata: ?*anyopaque, + dir: Dir, + permissions: Dir.Permissions, +) Dir.SetPermissionsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return ev.fchmod(dir.handle, permissions.toMode()); +} + +fn dirSetFilePermissions( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + permissions: Dir.Permissions, + options: Dir.SetFilePermissionsOptions, +) Dir.SetFilePermissionsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + const mode = permissions.toMode(); + const flags: u32 = if (options.follow_symlinks) 0 else c.AT.SYMLINK_NOFOLLOW; + + while (true) switch (c.errno(c.fchmodat(dir.handle, sub_path_posix, mode, flags))) { + .SUCCESS => return, + .INTR => {}, + .BADF => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .IO => return error.InputOutput, + .LOOP => return error.SymLinkLoop, + .MFILE => return error.ProcessFdQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NFILE => return error.SystemFdQuotaExceeded, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .OPNOTSUPP => return error.OperationUnsupported, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + else => |err| return unexpectedErrno(err), + }; +} + +fn dirSetTimestamps( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + options: Dir.SetTimestampsOptions, +) Dir.SetTimestampsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var times_buffer: [2]c.timespec = undefined; + const times = if (options.modify_timestamp == .now and options.access_timestamp == .now) null else p: { + times_buffer = .{ + setTimestampToPosix(options.access_timestamp), + setTimestampToPosix(options.modify_timestamp), + }; + break :p &times_buffer; + }; + + const flags: u32 = if (options.follow_symlinks) 0 else c.AT.SYMLINK_NOFOLLOW; + + var path_buffer: [c.PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + while (true) switch (c.errno(c.utimensat(dir.handle, sub_path_posix, times, flags))) { + .SUCCESS => return, + .INTR => {}, + .BADF => |err| return errnoBug(err), // always a race condition + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + else => |err| return unexpectedErrno(err), + }; +} + +fn dirHardLink( + userdata: ?*anyopaque, + old_dir: Dir, + old_sub_path: []const u8, + new_dir: Dir, + new_sub_path: []const u8, + options: Dir.HardLinkOptions, +) Dir.HardLinkError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var old_path_buffer: [c.PATH_MAX]u8 = undefined; + var new_path_buffer: [c.PATH_MAX]u8 = undefined; + + const old_sub_path_posix = try pathToPosix(old_sub_path, &old_path_buffer); + const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); + + const flags: u32 = if (options.follow_symlinks) c.AT.SYMLINK_FOLLOW else 0; + return linkat(old_dir.handle, old_sub_path_posix, new_dir.handle, new_sub_path_posix, flags); +} + +fn fileStat(userdata: ?*anyopaque, file: File) File.StatError!File.Stat { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + while (true) { + var stat = std.mem.zeroes(c.Stat); + switch (c.errno(c.fstat(file.handle, &stat))) { + .SUCCESS => return statFromPosix(&stat), + .INTR => {}, + .INVAL => |err| return errnoBug(err), + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .NOMEM => return error.SystemResources, + .ACCES => return error.AccessDenied, + else => |err| return unexpectedErrno(err), + } + } +} + +fn fileLength(userdata: ?*anyopaque, file: File) File.LengthError!u64 { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const stat = try fileStat(ev, file); + return stat.size; +} + +fn fileClose(userdata: ?*anyopaque, files: []const File) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + for (files) |file| closeFd(file.handle); +} + +fn fileWritePositional( + userdata: ?*anyopaque, + file: File, + header: []const u8, + data: []const []const u8, + splat: usize, + offset: u64, +) File.WritePositionalError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + var iovecs: [max_iovecs_len]iovec_const = undefined; + var iovlen: iovlen_t = 0; + var remaining: Io.Limit = .unlimited; + addBuf(true, &iovecs, &iovlen, &remaining, header); + for (data[0 .. data.len - 1]) |bytes| addBuf(true, &iovecs, &iovlen, &remaining, bytes); + const pattern = data[data.len - 1]; + var backup_buffer: [splat_buffer_size]u8 = undefined; + if (iovecs.len - iovlen != 0 and remaining != .nothing) switch (splat) { + 0 => {}, + 1 => addBuf(true, &iovecs, &iovlen, &remaining, pattern), + else => switch (pattern.len) { + 0 => {}, + 1 => { + const splat_buffer = &backup_buffer; + const memset_len = @min(splat_buffer.len, splat); + const buf = splat_buffer[0..memset_len]; + @memset(buf, pattern[0]); + addBuf(true, &iovecs, &iovlen, &remaining, buf); + var remaining_splat = splat - buf.len; + while (remaining_splat > splat_buffer.len and iovecs.len - iovlen != 0 and remaining != .nothing) { + assert(buf.len == splat_buffer.len); + addBuf(true, &iovecs, &iovlen, &remaining, splat_buffer); + remaining_splat -= splat_buffer.len; + } + addBuf(true, &iovecs, &iovlen, &remaining, splat_buffer[0..@min(remaining_splat, splat_buffer.len)]); + }, + else => for (0..@min(splat, iovecs.len - iovlen)) |_| { + if (remaining == .nothing) break; + addBuf(true, &iovecs, &iovlen, &remaining, pattern); + }, + }, + }; + if (iovlen == 0) return 0; + while (true) { + const rc = c.pwritev(file.handle, &iovecs, iovlen, @bitCast(offset)); + switch (c.errno(rc)) { + .SUCCESS => return @intCast(rc), + .INTR => {}, + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .DESTADDRREQ => |err| return errnoBug(err), // `connect` was never called. + .CONNRESET => |err| return errnoBug(err), // Not a socket handle. + .BADF => return error.NotOpenForWriting, + .AGAIN => return error.WouldBlock, + .DQUOT => return error.DiskQuota, + .FBIG => return error.FileTooBig, + .IO => return error.InputOutput, + .NOSPC => return error.NoSpaceLeft, + .PERM => return error.PermissionDenied, + .PIPE => return error.BrokenPipe, + .BUSY => return error.DeviceBusy, + .TXTBSY => return error.FileBusy, + .NXIO => return error.Unseekable, + .SPIPE => return error.Unseekable, + .OVERFLOW => return error.Unseekable, + else => |err| return unexpectedErrno(err), + } + } +} + +fn fileWriteFileStreaming( + userdata: ?*anyopaque, + file: File, + header: []const u8, + file_reader: *File.Reader, + limit: Io.Limit, +) File.Writer.WriteFileError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const reader_buffered = file_reader.interface.buffered(); + if (reader_buffered.len >= @intFromEnum(limit)) { + const n = try fileWriteStreaming(ev, file, header, &.{limit.slice(reader_buffered)}, 1); + file_reader.interface.toss(n -| header.len); + return n; + } + const file_limit = @intFromEnum(limit) - reader_buffered.len; + const out_fd = file.handle; + const in_fd = file_reader.file.handle; + + if (file_reader.size) |size| { + if (size - file_reader.pos == 0) { + if (reader_buffered.len != 0) { + const n = try fileWriteStreaming(ev, file, header, &.{limit.slice(reader_buffered)}, 1); + file_reader.interface.toss(n -| header.len); + return n; + } else { + return error.EndOfStream; + } + } + } + + if (@atomicLoad(UseSendfile, &ev.use_sendfile, .monotonic) == .disabled) return error.Unimplemented; + const offset = std.math.cast(c.off_t, file_reader.pos) orelse return error.Unimplemented; + var hdtr_data: c.sf_hdtr = undefined; + var headers: [2]iovec_const = undefined; + var headers_i: u8 = 0; + if (header.len != 0) { + headers[headers_i] = .{ .base = header.ptr, .len = header.len }; + headers_i += 1; + } + if (reader_buffered.len != 0) { + headers[headers_i] = .{ .base = reader_buffered.ptr, .len = reader_buffered.len }; + headers_i += 1; + } + const hdtr: ?*c.sf_hdtr = if (headers_i == 0) null else b: { + hdtr_data = .{ + .headers = &headers, + .hdr_cnt = headers_i, + .trailers = null, + .trl_cnt = 0, + }; + break :b &hdtr_data; + }; + const max_count = std.math.maxInt(i32); // Avoid EINVAL. + var len: c.off_t = @min(file_limit, max_count); + const flags = 0; + while (true) switch (c.errno(c.sendfile(in_fd, out_fd, offset, &len, hdtr, flags))) { + .SUCCESS => break, + .OPNOTSUPP, .NOTSOCK, .NOSYS => { + // Give calling code chance to observe before trying + // something else. + @atomicStore(UseSendfile, &ev.use_sendfile, .disabled, .monotonic); + return 0; + }, + .INTR => if (len > 0) break, + .AGAIN => { + if (len == 0) return error.WouldBlock; + break; + }, + else => |e| { + assert(error.Unexpected == switch (e) { + .NOTCONN => return error.BrokenPipe, + .IO => return error.InputOutput, + .PIPE => return error.BrokenPipe, + .BADF => |err| errnoBug(err), + .FAULT => |err| errnoBug(err), + .INVAL => |err| errnoBug(err), + else => |err| unexpectedErrno(err), + }); + // Give calling code chance to observe the error before trying + // something else. + @atomicStore(UseSendfile, &ev.use_sendfile, .disabled, .monotonic); + return 0; + }, + }; + if (len == 0) { + file_reader.size = file_reader.pos; + return error.EndOfStream; + } + const u_len: usize = @bitCast(len); + file_reader.interface.toss(u_len -| header.len); + return u_len; +} + +fn fileWriteFilePositional( + userdata: ?*anyopaque, + file: File, + header: []const u8, + file_reader: *File.Reader, + limit: Io.Limit, + offset: u64, +) File.WriteFilePositionalError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const reader_buffered = file_reader.interface.buffered(); + if (reader_buffered.len >= @intFromEnum(limit)) { + const n = try fileWritePositional( + ev, + file, + header, + &.{limit.slice(reader_buffered)}, + 1, + offset, + ); + file_reader.interface.toss(n -| header.len); + return n; + } + const out_fd = file.handle; + const in_fd = file_reader.file.handle; + + if (file_reader.size) |size| { + if (size - file_reader.pos == 0) { + if (reader_buffered.len != 0) { + const n = try fileWritePositional( + ev, + file, + header, + &.{limit.slice(reader_buffered)}, + 1, + offset, + ); + file_reader.interface.toss(n -| header.len); + return n; + } else { + return error.EndOfStream; + } + } + } + + if (@atomicLoad(UseFcopyfile, &ev.use_fcopyfile, .monotonic) == .disabled) + return error.Unimplemented; + if (file_reader.pos != 0) return error.Unimplemented; + if (offset != 0) return error.Unimplemented; + if (limit != .unlimited) return error.Unimplemented; + const size = file_reader.getSize() catch return error.Unimplemented; + if (header.len != 0 or reader_buffered.len != 0) { + const n = try fileWritePositional( + ev, + file, + header, + &.{limit.slice(reader_buffered)}, + 1, + offset, + ); + file_reader.interface.toss(n -| header.len); + return n; + } + while (true) { + const rc = c.fcopyfile(in_fd, out_fd, null, .{ .DATA = true }); + switch (c.errno(rc)) { + .SUCCESS => break, + .INTR => {}, + .OPNOTSUPP => { + // Give calling code chance to observe before trying + // something else. + @atomicStore(UseFcopyfile, &ev.use_fcopyfile, .disabled, .monotonic); + return 0; + }, + else => |e| { + assert(error.Unexpected == switch (e) { + .NOMEM => return error.SystemResources, + .INVAL => |err| errnoBug(err), + else => |err| unexpectedErrno(err), + }); + return 0; + }, + } + } + file_reader.pos = size; + return size; +} + +fn fileReadPositional( + userdata: ?*anyopaque, + file: File, + data: []const []u8, + offset: u64, +) File.ReadPositionalError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + var iovecs: [max_iovecs_len]iovec = undefined; + var iovlen: iovlen_t = 0; + var remaining: Io.Limit = .unlimited; + for (data) |buf| addBuf(false, &iovecs, &iovlen, &remaining, buf); + if (iovlen == 0) return 0; + while (true) { + const rc = c.preadv(file.handle, &iovecs, iovlen, @bitCast(offset)); + switch (c.errno(rc)) { + .SUCCESS => return @intCast(rc), + .INTR => {}, + .NXIO => return error.Unseekable, + .SPIPE => return error.Unseekable, + .OVERFLOW => return error.Unseekable, + .NOBUFS => return error.SystemResources, + .NOMEM => return error.SystemResources, + .AGAIN => return error.WouldBlock, + .IO => return error.InputOutput, + .ISDIR => return error.IsDir, + .NOTCONN => |err| return errnoBug(err), // not a socket + .CONNRESET => |err| return errnoBug(err), // not a socket + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .BADF => return error.NotOpenForReading, + else => |err| return unexpectedErrno(err), + } + } +} + +fn fileSeekBy(userdata: ?*anyopaque, file: File, offset: i64) File.SeekError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return ev.lseek(file.handle, @bitCast(offset), c.SEEK.CUR); +} + +fn fileSeekTo(userdata: ?*anyopaque, file: File, offset: u64) File.SeekError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return ev.lseek(file.handle, offset, c.SEEK.SET); +} + +fn lseek(ev: *Evented, fd: c.fd_t, offset: u64, whence: i32) File.SeekError!void { + _ = ev; + while (true) switch (c.errno(c.lseek(fd, @bitCast(offset), whence))) { + .SUCCESS => return, + .INTR => {}, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .INVAL => return error.Unseekable, + .OVERFLOW => return error.Unseekable, + .SPIPE => return error.Unseekable, + .NXIO => return error.Unseekable, + else => |err| return unexpectedErrno(err), + }; +} + +fn fileSync(userdata: ?*anyopaque, file: File) File.SyncError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + while (true) switch (c.errno(c.fsync(file.handle))) { + .SUCCESS => return, + .INTR => {}, + .BADF => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ROFS => |err| return errnoBug(err), + .IO => return error.InputOutput, + .NOSPC => return error.NoSpaceLeft, + .DQUOT => return error.DiskQuota, + else => |err| return unexpectedErrno(err), + }; +} + +fn fileIsTty(userdata: ?*anyopaque, file: File) Io.Cancelable!bool { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + while (true) { + const rc = c.isatty(file.handle); + switch (c.errno(rc - 1)) { + .SUCCESS => return true, + .INTR => {}, + else => return false, + } + } +} + +fn fileEnableAnsiEscapeCodes(userdata: ?*anyopaque, file: File) File.EnableAnsiEscapeCodesError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (!try fileIsTty(ev, file)) return error.NotTerminalDevice; +} + +fn fileSetLength(userdata: ?*anyopaque, file: File, length: u64) File.SetLengthError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + const signed_len: i64 = @bitCast(length); + if (signed_len < 0) return error.FileTooBig; // Avoid ambiguous EINVAL errors. + + while (true) switch (c.errno(c.ftruncate(file.handle, signed_len))) { + .SUCCESS => return, + .INTR => {}, + .FBIG => return error.FileTooBig, + .IO => return error.InputOutput, + .PERM => return error.PermissionDenied, + .TXTBSY => return error.FileBusy, + .BADF => |err| return errnoBug(err), // Handle not open for writing. + .INVAL => return error.NonResizable, // This is returned for /dev/null for example. + else => |err| return unexpectedErrno(err), + }; +} + +fn fileSetOwner( + userdata: ?*anyopaque, + file: File, + owner: ?File.Uid, + group: ?File.Gid, +) File.SetOwnerError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + return fchown(file.handle, owner, group); +} + +fn fileSetPermissions( + userdata: ?*anyopaque, + file: File, + permissions: File.Permissions, +) File.SetPermissionsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return ev.fchmod(file.handle, permissions.toMode()); +} + +fn fchmod(ev: *Evented, fd: c.fd_t, mode: c.mode_t) File.SetPermissionsError!void { + _ = ev; + while (true) switch (c.errno(c.fchmod(fd, mode))) { + .SUCCESS => return, + .INTR => {}, + .BADF => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .IO => return error.InputOutput, + .LOOP => return error.SymLinkLoop, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.FileNotFound, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + else => |err| return unexpectedErrno(err), + }; +} + +fn fileSetTimestamps( + userdata: ?*anyopaque, + file: File, + options: File.SetTimestampsOptions, +) File.SetTimestampsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var times_buffer: [2]c.timespec = undefined; + const times = if (options.modify_timestamp == .now and options.access_timestamp == .now) null else p: { + times_buffer = .{ + setTimestampToPosix(options.access_timestamp), + setTimestampToPosix(options.modify_timestamp), + }; + break :p &times_buffer; + }; + + while (true) switch (c.errno(c.futimens(file.handle, times))) { + .SUCCESS => return, + .INTR => {}, + .BADF => |err| return errnoBug(err), // always a race condition + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + else => |err| return unexpectedErrno(err), + }; +} + +fn fileLock(userdata: ?*anyopaque, file: File, lock: File.Lock) File.LockError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const operation: i32 = switch (lock) { + .none => c.LOCK.UN, + .shared => c.LOCK.SH, + .exclusive => c.LOCK.EX, + }; + while (true) switch (c.errno(c.flock(file.handle, operation))) { + .SUCCESS => return, + .INTR => {}, + .BADF => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), // invalid parameters + .NOLCK => return error.SystemResources, + .AGAIN => |err| return errnoBug(err), + .OPNOTSUPP => return error.FileLocksUnsupported, + else => |err| return unexpectedErrno(err), + }; +} + +fn fileTryLock(userdata: ?*anyopaque, file: File, lock: File.Lock) File.LockError!bool { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const operation: i32 = switch (lock) { + .none => c.LOCK.UN, + .shared => c.LOCK.SH | c.LOCK.NB, + .exclusive => c.LOCK.EX | c.LOCK.NB, + }; + while (true) switch (c.errno(c.flock(file.handle, operation))) { + .SUCCESS => return true, + .INTR => {}, + .AGAIN => return false, + .BADF => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), // invalid parameters + .NOLCK => return error.SystemResources, + .OPNOTSUPP => return error.FileLocksUnsupported, + else => |err| return unexpectedErrno(err), + }; +} + +fn fileUnlock(userdata: ?*anyopaque, file: File) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + while (true) switch (c.errno(c.flock(file.handle, c.LOCK.UN))) { + .SUCCESS => return, + .INTR => {}, + .AGAIN => return recoverableOsBugDetected(), // unlocking can't block + .BADF => return recoverableOsBugDetected(), // File descriptor used after closed. + .INVAL => return recoverableOsBugDetected(), // invalid parameters + .NOLCK => return recoverableOsBugDetected(), // Resource deallocation. + .OPNOTSUPP => return recoverableOsBugDetected(), // We already got the lock. + else => return recoverableOsBugDetected(), // Resource deallocation must succeed. + }; +} + +fn fileDowngradeLock(userdata: ?*anyopaque, file: File) File.DowngradeLockError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const operation = c.LOCK.SH | c.LOCK.NB; + while (true) switch (c.errno(c.flock(file.handle, operation))) { + .SUCCESS => return, + .INTR => {}, + .AGAIN => |err| return errnoBug(err), // File was not locked in exclusive mode. + .BADF => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), // invalid parameters + .NOLCK => |err| return errnoBug(err), // Lock already obtained. + .OPNOTSUPP => |err| return errnoBug(err), // Lock already obtained. + else => |err| return unexpectedErrno(err), + }; +} + +fn fileRealPath(userdata: ?*anyopaque, file: File, out_buffer: []u8) File.RealPathError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + var buffer: [c.PATH_MAX]u8 = undefined; + @memset(&buffer, 0); + while (true) { + switch (c.errno(c.fcntl(file.handle, c.F.GETPATH, &buffer))) { + .SUCCESS => break, + .INTR => {}, + .ACCES => return error.AccessDenied, + .BADF => return error.FileNotFound, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NameTooLong, + .RANGE => return error.NameTooLong, + else => |err| return unexpectedErrno(err), + } + } + const n = std.mem.indexOfScalar(u8, &buffer, 0) orelse buffer.len; + if (n > out_buffer.len) return error.NameTooLong; + @memcpy(out_buffer[0..n], buffer[0..n]); + return n; +} + +fn fileHardLink( + userdata: ?*anyopaque, + file: File, + new_dir: Dir, + new_sub_path: []const u8, + options: File.HardLinkOptions, +) File.HardLinkError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = file; + _ = new_dir; + _ = new_sub_path; + _ = options; + return error.OperationUnsupported; +} + +fn linkat( + old_dir: c.fd_t, + old_path: [*:0]const u8, + new_dir: c.fd_t, + new_path: [*:0]const u8, + flags: u32, +) File.HardLinkError!void { + while (true) switch (c.errno(c.linkat(old_dir, old_path, new_dir, new_path, flags))) { + .SUCCESS => return, + .INTR => {}, + .ACCES => return error.AccessDenied, + .DQUOT => return error.DiskQuota, + .EXIST => return error.PathAlreadyExists, + .IO => return error.HardwareFailure, + .LOOP => return error.SymLinkLoop, + .MLINK => return error.LinkQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .NOTDIR => return error.NotDir, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + .XDEV => return error.CrossDevice, + .ILSEQ => return error.BadPathName, + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + }; +} + +fn fileMemoryMapCreate( + userdata: ?*anyopaque, + file: File, + options: File.MemoryMap.CreateOptions, +) File.MemoryMap.CreateError!File.MemoryMap { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + const prot: c.PROT = .{ + .READ = options.protection.read, + .WRITE = options.protection.write, + .EXEC = options.protection.execute, + }; + const flags: c.MAP = .{ + .TYPE = .SHARED, + }; + + const page_align = std.heap.page_size_min; + + const contents = while (true) { + const casted_offset = std.math.cast(i64, options.offset) orelse return error.Unseekable; + const rc = c.mmap(null, options.len, prot, flags, file.handle, casted_offset); + const err: c.E = if (rc != c.MAP_FAILED) .SUCCESS else @enumFromInt(c._errno().*); + switch (err) { + .SUCCESS => break @as([*]align(page_align) u8, @ptrCast(@alignCast(rc)))[0..options.len], + .INTR => {}, + .ACCES => return error.AccessDenied, + .AGAIN => return error.LockedMemoryLimitExceeded, + .MFILE => return error.ProcessFdQuotaExceeded, + .NFILE => return error.SystemFdQuotaExceeded, + .NOMEM => return error.OutOfMemory, + .PERM => return error.PermissionDenied, + .OVERFLOW => return error.Unseekable, + .BADF => return errnoBug(err), // Always a race condition. + .INVAL => return errnoBug(err), // Invalid parameters to mmap() + else => return unexpectedErrno(err), + } + }; + return .{ + .file = file, + .offset = options.offset, + .memory = contents, + .section = {}, + }; +} + +fn fileMemoryMapDestroy(userdata: ?*anyopaque, mm: *File.MemoryMap) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const memory = mm.memory; + if (memory.len == 0) return; + switch (c.errno(c.munmap(memory.ptr, memory.len))) { + .SUCCESS => {}, + else => |err| if (builtin.mode == .Debug) + std.log.err("failed to unmap {d} bytes at {*}: {t}", .{ memory.len, memory.ptr, err }), + } + mm.* = undefined; +} + +fn fileMemoryMapSetLength( + userdata: ?*anyopaque, + mm: *File.MemoryMap, + new_len: usize, +) File.MemoryMap.SetLengthError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + const page_size = std.heap.pageSize(); + const alignment: Alignment = .fromByteUnits(page_size); + const old_memory = mm.memory; + + if (alignment.forward(new_len) == alignment.forward(old_memory.len)) { + mm.memory.len = new_len; + return; + } + return error.OperationUnsupported; +} + +fn fileMemoryMapRead(userdata: ?*anyopaque, mm: *File.MemoryMap) File.ReadPositionalError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = mm; +} + +fn fileMemoryMapWrite(userdata: ?*anyopaque, mm: *File.MemoryMap) File.WritePositionalError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = mm; +} + +fn processExecutableOpen( + userdata: ?*anyopaque, + flags: File.OpenFlags, +) process.OpenExecutableError!File { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + // _NSGetExecutablePath() returns a path that might be a symlink to + // the executable. Here it does not matter since we open it. + var symlink_path_buf: [c.PATH_MAX + 1]u8 = undefined; + var n: u32 = symlink_path_buf.len; + const rc = c._NSGetExecutablePath(&symlink_path_buf, &n); + if (rc != 0) return error.NameTooLong; + const symlink_path = std.mem.sliceTo(&symlink_path_buf, 0); + return dirOpenFile(ev, .cwd(), symlink_path, flags); +} + +fn processExecutablePath(userdata: ?*anyopaque, out_buffer: []u8) process.ExecutablePathError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + // _NSGetExecutablePath() returns a path that might be a symlink to + // the executable. + var symlink_path_buf: [c.PATH_MAX + 1]u8 = undefined; + var n: u32 = symlink_path_buf.len; + const rc = c._NSGetExecutablePath(&symlink_path_buf, &n); + if (rc != 0) return error.NameTooLong; + const symlink_path = std.mem.sliceTo(&symlink_path_buf, 0); + assert(Dir.path.isAbsolute(symlink_path)); + return dirRealPathFile(ev, .cwd(), symlink_path, out_buffer) catch |err| switch (err) { + error.NetworkNotFound => unreachable, // Windows-only + error.FileBusy => unreachable, // Windows-only + else => |e| return e, + }; +} + +fn lockStderr(userdata: ?*anyopaque, terminal_mode: ?Io.Terminal.Mode) Io.Cancelable!Io.LockedStderr { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + try ev.stderr_mutex.lock(ev); + errdefer ev.stderr_mutex.unlock(); + return ev.initLockedStderr(terminal_mode); +} + +fn tryLockStderr( + userdata: ?*anyopaque, + terminal_mode: ?Io.Terminal.Mode, +) Io.Cancelable!?Io.LockedStderr { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (!ev.stderr_mutex.tryLock()) return null; + errdefer ev.stderr_mutex.unlock(); + return try ev.initLockedStderr(terminal_mode); +} + +fn initLockedStderr(ev: *Evented, terminal_mode: ?Io.Terminal.Mode) Io.Cancelable!Io.LockedStderr { + ev.init_stderr_writer.once(ev, &initStderrWriter); + return .{ + .file_writer = &ev.stderr_writer, + .terminal_mode = terminal_mode orelse ev.stderr_mode, + }; +} + +fn initStderrWriter(context: ?*anyopaque) callconv(.c) void { + const ev: *Evented = @ptrCast(@alignCast(context)); + const cancel_protection = swapCancelProtection(ev, .blocked); + defer assert(swapCancelProtection(ev, cancel_protection) == .blocked); + ev.scan_environ.once(ev, &scanEnviron); + const NO_COLOR = ev.environ.exist.NO_COLOR; + const CLICOLOR_FORCE = ev.environ.exist.CLICOLOR_FORCE; + ev.stderr_mode = Io.Terminal.Mode.detect( + ev.io(), + ev.stderr_writer.file, + NO_COLOR, + CLICOLOR_FORCE, + ) catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; +} + +fn unlockStderr(userdata: ?*anyopaque) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (ev.stderr_writer.err == null) ev.stderr_writer.interface.flush() catch {}; + if (ev.stderr_writer.err) |err| { + switch (err) { + error.Canceled => Thread.current().currentFiber().cancel_protection.recancel(), + else => {}, + } + ev.stderr_writer.err = null; + } + ev.stderr_writer.interface.end = 0; + ev.stderr_writer.interface.buffer.len = 0; + ev.stderr_mutex.unlock(); +} + +fn processCurrentPath(userdata: ?*anyopaque, buffer: []u8) process.CurrentPathError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const err: c.E = if (c.getcwd(buffer.ptr, buffer.len)) |_| .SUCCESS else @enumFromInt(c._errno().*); + switch (err) { + .SUCCESS => return std.mem.findScalar(u8, buffer, 0).?, + .NOENT => return error.CurrentDirUnlinked, + .RANGE => return error.NameTooLong, + .FAULT => |e| return errnoBug(e), + .INVAL => |e| return errnoBug(e), + else => return unexpectedErrno(err), + } +} + +fn processSetCurrentDir(userdata: ?*anyopaque, dir: Dir) process.SetCurrentDirError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + if (dir.handle == c.AT.FDCWD) return; + while (true) switch (c.errno(c.fchdir(dir.handle))) { + .SUCCESS => return, + .INTR => {}, + .ACCES => return error.AccessDenied, + .NOTDIR => return error.NotDir, + .IO => return error.FileSystem, + .BADF => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + }; +} + +fn processSetCurrentPath(userdata: ?*anyopaque, dir_path: []const u8) ChdirError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + var path_buffer: [c.PATH_MAX]u8 = undefined; + const dir_path_posix = try pathToPosix(dir_path, &path_buffer); + while (true) switch (c.errno(c.chdir(dir_path_posix))) { + .SUCCESS => return, + .INTR => {}, + .ACCES => return error.AccessDenied, + .IO => return error.FileSystem, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.NotDir, + .ILSEQ => return error.BadPathName, + .FAULT => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + }; +} + +fn processReplace(userdata: ?*anyopaque, options: process.ReplaceOptions) process.ReplaceError { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + if (!process.can_replace) return error.OperationUnsupported; + + ev.scan_environ.once(ev, &scanEnviron); // for PATH + const PATH = ev.environ.string.PATH orelse default_PATH; + + var arena_allocator = std.heap.ArenaAllocator.init(ev.allocator()); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + const argv_buf = try arena.allocSentinel(?[*:0]const u8, options.argv.len, null); + for (options.argv, 0..) |arg, i| argv_buf[i] = (try arena.dupeZ(u8, arg)).ptr; + + const env_block = env_block: { + const prog_fd: i32 = -1; + if (options.environ_map) |environ_map| break :env_block try environ_map.createPosixBlock(arena, .{ + .zig_progress_fd = prog_fd, + }); + break :env_block try ev.environ.process_environ.createPosixBlock(arena, .{ + .zig_progress_fd = prog_fd, + }); + }; + + return ev.execv(options.expand_arg0, argv_buf.ptr[0].?, argv_buf.ptr, env_block, PATH); +} + +fn processReplacePath( + userdata: ?*anyopaque, + dir: Dir, + options: process.ReplaceOptions, +) process.ReplaceError { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = dir; + _ = options; + @panic("TODO processReplacePath"); +} + +fn processSpawn(userdata: ?*anyopaque, options: process.SpawnOptions) process.SpawnError!process.Child { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const spawned = try ev.spawn(options); + defer fileClose(ev, &.{spawned.err_pipe}); + + // Wait for the child to report any errors in or before `execvpe`. + var child_err: ForkBailError = undefined; + ev.readAll(spawned.err_pipe, @ptrCast(&child_err)) catch |read_err| { + switch (read_err) { + error.Canceled => unreachable, // blocked + error.EndOfStream => { + // Write end closed by CLOEXEC at the time of the `execvpe` call, + // indicating success. + }, + else => { + // Problem reading the error from the error reporting pipe. We + // don't know if the child is alive or dead. Better to assume it is + // alive so the resource does not risk being leaked. + }, + } + return .{ + .id = spawned.pid, + .thread_handle = {}, + .stdin = spawned.stdin, + .stdout = spawned.stdout, + .stderr = spawned.stderr, + .request_resource_usage_statistics = options.request_resource_usage_statistics, + }; + }; + return child_err; +} + +fn processSpawnPath( + userdata: ?*anyopaque, + dir: Dir, + options: process.SpawnOptions, +) process.SpawnError!process.Child { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = dir; + _ = options; + @panic("TODO processSpawnPath"); +} + +const prog_fileno = @max(c.STDIN_FILENO, c.STDOUT_FILENO, c.STDERR_FILENO) + 1; + +const Spawned = struct { + pid: c.pid_t, + err_pipe: File, + stdin: ?File, + stdout: ?File, + stderr: ?File, +}; +fn spawn(ev: *Evented, options: process.SpawnOptions) process.SpawnError!Spawned { + // The child process does need to access (one end of) these pipes. However, + // we must initially set CLOEXEC to avoid a race condition. If another thread + // is racing to spawn a different child process, we don't want it to inherit + // these FDs in any scenario; that would mean that, for instance, calls to + // `poll` from the parent would not report the child's stdout as closing when + // expected, since the other child may retain a reference to the write end of + // the pipe. So, we create the pipes with CLOEXEC initially. After fork, we + // need to do something in the new child to make sure we preserve the reference + // we want. We could use `fcntl` to remove CLOEXEC from the FD, but as it + // turns out, we `dup2` everything anyway, so there's no need! + const pipe_flags: c.O = .{ .CLOEXEC = true }; + + const stdin_pipe = if (options.stdin == .pipe) try pipe2(pipe_flags) else undefined; + errdefer if (options.stdin == .pipe) { + destroyPipe(stdin_pipe); + }; + + const stdout_pipe = if (options.stdout == .pipe) try pipe2(pipe_flags) else undefined; + errdefer if (options.stdout == .pipe) { + destroyPipe(stdout_pipe); + }; + + const stderr_pipe = if (options.stderr == .pipe) try pipe2(pipe_flags) else undefined; + errdefer if (options.stderr == .pipe) { + destroyPipe(stderr_pipe); + }; + + const any_ignore = + options.stdin == .ignore or options.stdout == .ignore or options.stderr == .ignore; + const dev_null_file = if (any_ignore) dev_null_file: { + ev.open_dev_null.once(ev, &openDevNullFile); + break :dev_null_file try ev.dev_null_file; + } else undefined; + + const prog_pipe: [2]c.fd_t = if (options.progress_node.index != .none) + // We use CLOEXEC for the same reason as in `pipe_flags`. + try pipe2(.{ .NONBLOCK = true, .CLOEXEC = true }) + else + .{ -1, -1 }; + errdefer destroyPipe(prog_pipe); + + var arena_allocator = std.heap.ArenaAllocator.init(ev.allocator()); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + // The POSIX standard does not allow malloc() between fork() and execve(), + // and this allocator may be a libc allocator. + // I have personally observed the child process deadlocking when it tries + // to call malloc() due to a heap allocation between fork() and execve(), + // in musl v1.1.24. + // Additionally, we want to reduce the number of possible ways things + // can fail between fork() and execve(). + // Therefore, we do all the allocation for the execve() before the fork(). + // This means we must do the null-termination of argv and env vars here. + const argv_buf = try arena.allocSentinel(?[*:0]const u8, options.argv.len, null); + for (options.argv, 0..) |arg, i| argv_buf[i] = (try arena.dupeZ(u8, arg)).ptr; + + const env_block = env_block: { + const prog_fd: i32 = if (prog_pipe[1] == -1) -1 else prog_fileno; + if (options.environ_map) |environ_map| break :env_block try environ_map.createPosixBlock(arena, .{ + .zig_progress_fd = prog_fd, + }); + break :env_block try ev.environ.process_environ.createPosixBlock(arena, .{ + .zig_progress_fd = prog_fd, + }); + }; + + // This pipe communicates to the parent errors in the child between `fork` and `execvpe`. + // It is closed by the child (via CLOEXEC) without writing if `execvpe` succeeds. + const err_pipe: [2]File = err_pipe: { + const err_pipe = try pipe2(.{ .CLOEXEC = true }); + break :err_pipe .{ + .{ .handle = err_pipe[0], .flags = .{ .nonblocking = false } }, + .{ .handle = err_pipe[1], .flags = .{ .nonblocking = false } }, + }; + }; + errdefer fileClose(ev, &err_pipe); + + ev.scan_environ.once(ev, &scanEnviron); // for PATH + const PATH = ev.environ.string.PATH orelse default_PATH; + + const pid_result: c.pid_t = fork: { + const rc = c.fork(); + switch (c.errno(rc)) { + .SUCCESS => break :fork @intCast(rc), + .AGAIN => return error.SystemResources, + .NOMEM => return error.SystemResources, + .NOSYS => return error.OperationUnsupported, + else => |err| return unexpectedErrno(err), + } + }; + + if (pid_result == 0) { + defer comptime unreachable; // We are the child. + const err = ev.setUpChild(.{ + .stdin_pipe = stdin_pipe[0], + .stdout_pipe = stdout_pipe[1], + .stderr_pipe = stderr_pipe[1], + .dev_null_fd = dev_null_file.handle, + .prog_pipe = prog_pipe[1], + .argv_buf = argv_buf, + .env_block = env_block, + .PATH = PATH, + .spawn = options, + }); + ev.writeAll(err_pipe[1], @ptrCast(&err)) catch {}; + c.exit(1); + } + + const pid: c.pid_t = @intCast(pid_result); // We are the parent. + errdefer comptime unreachable; // The child is forked; we must not error from now on + + fileClose(ev, err_pipe[1..2]); // make sure only the child holds the write end open + + if (options.stdin == .pipe) closeFd(stdin_pipe[0]); + if (options.stdout == .pipe) closeFd(stdout_pipe[1]); + if (options.stderr == .pipe) closeFd(stderr_pipe[1]); + + if (prog_pipe[1] != -1) closeFd(prog_pipe[1]); + + options.progress_node.setIpcFile(ev, .{ .handle = prog_pipe[0], .flags = .{ .nonblocking = true } }); + + return .{ + .pid = pid, + .err_pipe = err_pipe[0], + .stdin = switch (options.stdin) { + .pipe => .{ .handle = stdin_pipe[1], .flags = .{ .nonblocking = false } }, + else => null, + }, + .stdout = switch (options.stdout) { + .pipe => .{ .handle = stdout_pipe[0], .flags = .{ .nonblocking = false } }, + else => null, + }, + .stderr = switch (options.stderr) { + .pipe => .{ .handle = stderr_pipe[0], .flags = .{ .nonblocking = false } }, + else => null, + }, + }; +} + +fn openDevNullFile(context: ?*anyopaque) callconv(.c) void { + const ev: *Evented = @ptrCast(@alignCast(context)); + ev.dev_null_file = dirOpenFile(ev, .cwd(), "/dev/null", .{ .mode = .read_write }); +} + +/// Errors that can occur between fork() and execv() +const ForkBailError = process.SetCurrentDirError || ChdirError || + process.SpawnError || process.ReplaceError; +fn setUpChild(ev: *Evented, options: struct { + stdin_pipe: c.fd_t, + stdout_pipe: c.fd_t, + stderr_pipe: c.fd_t, + dev_null_fd: c.fd_t, + prog_pipe: c.fd_t, + argv_buf: [:null]?[*:0]const u8, + env_block: process.Environ.Block, + PATH: []const u8, + spawn: process.SpawnOptions, +}) ForkBailError { + try ev.setUpChildIo( + options.spawn.stdin, + options.stdin_pipe, + c.STDIN_FILENO, + options.dev_null_fd, + ); + try ev.setUpChildIo( + options.spawn.stdout, + options.stdout_pipe, + c.STDOUT_FILENO, + options.dev_null_fd, + ); + try ev.setUpChildIo( + options.spawn.stderr, + options.stderr_pipe, + c.STDERR_FILENO, + options.dev_null_fd, + ); + + switch (options.spawn.cwd) { + .inherit => {}, + .dir => |cwd_dir| try processSetCurrentDir(ev, cwd_dir), + .path => |cwd_path| try processSetCurrentPath(ev, cwd_path), + } + + // Must happen after fchdir above, the cwd file descriptor might be + // equal to prog_fileno and be clobbered by this dup2 call. + if (options.prog_pipe != -1) try ev.dup2(options.prog_pipe, prog_fileno); + + if (options.spawn.gid) |gid| while (true) switch (c.errno(c.setregid(gid, gid))) { + .SUCCESS => break, + .INTR => {}, + .AGAIN => return error.ResourceLimitReached, + .INVAL => return error.InvalidUserId, + .PERM => return error.PermissionDenied, + else => return error.Unexpected, + }; + + if (options.spawn.uid) |uid| while (true) switch (c.errno(c.setreuid(uid, uid))) { + .SUCCESS => break, + .INTR => {}, + .AGAIN => return error.ResourceLimitReached, + .INVAL => return error.InvalidUserId, + .PERM => return error.PermissionDenied, + else => return error.Unexpected, + }; + + if (options.spawn.pgid) |pid| while (true) switch (c.errno(c.setpgid(0, pid))) { + .SUCCESS => break, + .INTR => {}, + .ACCES => return error.ProcessAlreadyExec, + .INVAL => return error.InvalidProcessGroupId, + .PERM => return error.PermissionDenied, + else => return error.Unexpected, + }; + + if (options.spawn.start_suspended) while (true) switch (c.errno(c.kill(0, .STOP))) { + .SUCCESS => break, + .INTR => {}, + .PERM => return error.PermissionDenied, + else => return error.Unexpected, + }; + + return ev.execv( + options.spawn.expand_arg0, + options.argv_buf.ptr[0].?, + options.argv_buf.ptr, + options.env_block, + options.PATH, + ); +} + +fn setUpChildIo( + ev: *Evented, + stdio: process.SpawnOptions.StdIo, + pipe_fd: c.fd_t, + std_fileno: i32, + dev_null_fd: c.fd_t, +) !void { + switch (stdio) { + .pipe => try ev.dup2(pipe_fd, std_fileno), + .close => closeFd(std_fileno), + .inherit => {}, + .ignore => try ev.dup2(dev_null_fd, std_fileno), + .file => |file| { + if (file.flags.nonblocking) @panic("TODO implement setUpChildIo when nonblocking file is used"); + try ev.dup2(file.handle, std_fileno); + }, + } +} + +const PipeError = error{ + SystemFdQuotaExceeded, + ProcessFdQuotaExceeded, +} || Io.UnexpectedError; + +fn pipe2(flags: c.O) PipeError![2]c.fd_t { + var fds: [2]c.fd_t = undefined; + + while (true) switch (c.errno(c.pipe(&fds))) { + .SUCCESS => break, + .INTR => {}, + .NFILE => return error.SystemFdQuotaExceeded, + .MFILE => return error.ProcessFdQuotaExceeded, + else => |err| return unexpectedErrno(err), + }; + errdefer { + closeFd(fds[0]); + closeFd(fds[1]); + } + + // https://github.com/ziglang/zig/issues/18882 + if (@as(u32, @bitCast(flags)) == 0) return fds; + + // CLOEXEC is special, it's a file descriptor flag and must be set using + // F.SETFD. + if (flags.CLOEXEC) for (fds) |fd| while (true) switch (c.errno(c.fcntl(fd, c.F.SETFD, @as(u32, c.FD_CLOEXEC)))) { + .SUCCESS => break, + .INTR => {}, + else => |err| return unexpectedErrno(err), + }; + + const new_flags: u32 = f: { + var new_flags = flags; + new_flags.CLOEXEC = false; + break :f @bitCast(new_flags); + }; + + // Set every other flag affecting the file status using F.SETFL. + if (new_flags != 0) for (fds) |fd| while (true) switch (c.errno(c.fcntl(fd, c.F.SETFL, new_flags))) { + .SUCCESS => break, + .INTR => {}, + .INVAL => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + }; + + return fds; +} + +fn destroyPipe(pipe: [2]c.fd_t) void { + if (pipe[0] != -1) closeFd(pipe[0]); + if (pipe[0] != pipe[1]) closeFd(pipe[1]); +} + +const DupError = error{ + ProcessFdQuotaExceeded, + SystemResources, +} || Io.UnexpectedError || Io.Cancelable; +fn dup2(ev: *Evented, old_fd: c.fd_t, new_fd: c.fd_t) DupError!void { + _ = ev; + while (true) switch (c.errno(c.dup2(old_fd, new_fd))) { + .SUCCESS => return, + .BUSY, .INTR => {}, + .INVAL => |err| return errnoBug(err), // invalid parameters + .BADF => |err| return errnoBug(err), // use after free + .MFILE => return error.ProcessFdQuotaExceeded, + .NOMEM => return error.SystemResources, + else => |err| return unexpectedErrno(err), + }; +} + +fn execv( + ev: *Evented, + arg0_expand: process.ArgExpansion, + file: [*:0]const u8, + child_argv: [*:null]?[*:0]const u8, + env_block: process.Environ.PosixBlock, + PATH: []const u8, +) process.ReplaceError { + const file_slice = std.mem.sliceTo(file, 0); + if (std.mem.findScalar(u8, file_slice, '/') != null) return ev.execvPath(file, child_argv, env_block); + + // Use of PATH_MAX here is valid as the path_buf will be passed + // directly to the operating system in posixExecvPath. + var path_buf: [c.PATH_MAX]u8 = undefined; + var it = std.mem.tokenizeScalar(u8, PATH, ':'); + var seen_eacces = false; + var err: process.ReplaceError = error.FileNotFound; + + // In case of expanding arg0 we must put it back if we return with an error. + const prev_arg0 = child_argv[0]; + defer switch (arg0_expand) { + .expand => child_argv[0] = prev_arg0, + .no_expand => {}, + }; + + while (it.next()) |search_path| { + const path_len = search_path.len + file_slice.len + 1; + if (path_buf.len < path_len + 1) return error.NameTooLong; + @memcpy(path_buf[0..search_path.len], search_path); + path_buf[search_path.len] = '/'; + @memcpy(path_buf[search_path.len + 1 ..][0..file_slice.len], file_slice); + path_buf[path_len] = 0; + const full_path = path_buf[0..path_len :0].ptr; + switch (arg0_expand) { + .expand => child_argv[0] = full_path, + .no_expand => {}, + } + err = ev.execvPath(full_path, child_argv, env_block); + switch (err) { + error.AccessDenied => seen_eacces = true, + error.FileNotFound, error.NotDir => {}, + else => |e| return e, + } + } + if (seen_eacces) return error.AccessDenied; + return err; +} +/// This function ignores PATH environment variable. +fn execvPath( + ev: *Evented, + path: [*:0]const u8, + child_argv: [*:null]const ?[*:0]const u8, + env_block: process.Environ.PosixBlock, +) process.ReplaceError { + _ = ev; + switch (c.errno(c.execve(path, child_argv, env_block.slice.ptr))) { + .FAULT => |err| return errnoBug(err), // Bad pointer parameter. + .@"2BIG" => return error.SystemResources, + .MFILE => return error.ProcessFdQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NFILE => return error.SystemFdQuotaExceeded, + .NOMEM => return error.SystemResources, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .INVAL => return error.InvalidExe, + .NOEXEC => return error.InvalidExe, + .IO => return error.FileSystem, + .LOOP => return error.FileSystem, + .ISDIR => return error.IsDir, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .TXTBSY => return error.FileBusy, + .BADEXEC => return error.InvalidExe, + .BADARCH => return error.InvalidExe, + else => |err| return unexpectedErrno(err), + } +} + +fn childWait(userdata: ?*anyopaque, child: *process.Child) process.Child.WaitError!process.Child.Term { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + defer ev.childCleanup(child); + const pid = child.id.?; + const source = c.dispatch.source_create( + .PROC, + @bitCast(@as(isize, pid)), + .{ .PROC = .{ .EXIT = true } }, + ev.queue, + ) orelse return error.Unexpected; + source.as_object().set_context(Thread.current().currentFiber()); + source.set_event_handler(&Fiber.@"resume"); + ev.yield(.{ .activate = source.as_object() }); + source.as_object().release(); + var status: c_int = undefined; + var ru: c.rusage = undefined; + const ru_ptr = if (child.request_resource_usage_statistics) &ru else null; + while (true) switch (c.errno(c.wait4(pid, &status, 0, ru_ptr))) { + .SUCCESS => { + if (ru_ptr) |p| child.resource_usage_statistics.rusage = p.*; + return statusToTerm(@bitCast(status)); + }, + .INTR => {}, + .CHILD => |err| return errnoBug(err), // Double-free. + else => |err| return unexpectedErrno(err), + }; +} + +fn childKill(userdata: ?*anyopaque, child: *process.Child) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + defer ev.childCleanup(child); + const pid = child.id.?; + while (true) switch (c.errno(c.kill(pid, .TERM))) { + .SUCCESS => break, + .INTR => {}, + .PERM => return, + .INVAL => |err| errnoBug(err) catch return, + .SRCH => |err| errnoBug(err) catch return, + else => |err| unexpectedErrno(err) catch return, + }; + var status: c_int = undefined; + while (true) switch (c.errno(c.wait4(pid, &status, 0, null))) { + .SUCCESS => return, + .INTR => {}, + .CHILD => |err| errnoBug(err) catch return, // Double-free. + else => |err| unexpectedErrno(err) catch return, + }; +} + +fn childCleanup(ev: *Evented, child: *process.Child) void { + if (child.stdin) |stdin| { + fileClose(ev, &.{stdin}); + child.stdin = null; + } + if (child.stdout) |stdout| { + fileClose(ev, &.{stdout}); + child.stdout = null; + } + if (child.stderr) |stderr| { + fileClose(ev, &.{stderr}); + child.stderr = null; + } + child.id = null; +} + +fn progressParentFile(userdata: ?*anyopaque) std.Progress.ParentFileError!File { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + ev.scan_environ.once(ev, &scanEnviron); + return ev.environ.zig_progress_file; +} + +fn scanEnviron(context: ?*anyopaque) callconv(.c) void { + const ev: *Evented = @ptrCast(@alignCast(context)); + ev.environ.scan(ev.allocator()); +} + +fn now(userdata: ?*anyopaque, clock: Io.Clock) Io.Timestamp { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const clock_id: c.clockid_t = clockToPosix(clock); + var timespec: c.timespec = undefined; + switch (c.errno(c.clock_gettime(clock_id, &timespec))) { + .SUCCESS => return timestampFromPosix(&timespec), + else => return .zero, + } +} + +fn clockResolution(userdata: ?*anyopaque, clock: Io.Clock) Io.Clock.ResolutionError!Io.Duration { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const clock_id: c.clockid_t = clockToPosix(clock); + var timespec: c.timespec = undefined; + return switch (c.errno(c.clock_getres(clock_id, &timespec))) { + .SUCCESS => .fromNanoseconds(nanosecondsFromPosix(&timespec)), + .INVAL => return error.ClockUnavailable, + else => |err| return unexpectedErrno(err), + }; +} + +fn sleep(userdata: ?*anyopaque, timeout: Io.Timeout) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + ev.yield(.{ .sleep = ev.timeFromTimeout(timeout) }); +} + +fn timeFromTimeout(ev: *Evented, timeout: Io.Timeout) c.dispatch.time_t { + return timeout: switch (timeout) { + .none => .FOREVER, + .duration => |duration| .time(switch (duration.clock) { + .real => .WALL_NOW, + else => .NOW, + }, std.math.lossyCast(i64, duration.raw.toNanoseconds())), + .deadline => |deadline| switch (deadline.clock) { + .real => .walltime(&.{ + .sec = @intCast(@divFloor(deadline.raw.toNanoseconds(), std.time.ns_per_s)), + .nsec = @intCast(@mod(deadline.raw.toNanoseconds(), std.time.ns_per_s)), + }, 0), + else => continue :timeout .{ .duration = deadline.durationFromNow(ev.io()) }, + }, + }; +} + +const Random = struct { + evented: *Evented, + thread: *Thread, + buffer: []u8, + + fn seed(context: ?*anyopaque) callconv(.c) void { + const rand: *Random = @ptrCast(@alignCast(context)); + const ev = rand.evented; + ev.csprng_mutex.lockUncancelable(ev); + defer ev.csprng_mutex.unlock(); + var buffer: [Csprng.seed_len]u8 = undefined; + if (!ev.csprng.isInitialized()) { + @branchHint(.unlikely); + const cancel_protection = swapCancelProtection(ev, .blocked); + defer assert(swapCancelProtection(ev, cancel_protection) == .blocked); + randomSecure(ev, &buffer) catch |err| switch (err) { + error.Canceled => unreachable, // blocked + error.EntropyUnavailable => fallbackSeed(ev, &buffer), + }; + ev.csprng.rng = .init(buffer); + } + ev.csprng.rng.fill(&buffer); + rand.thread.csprng.rng = .init(buffer); + rand.thread.csprng.rng.fill(rand.buffer); + rand.buffer.len = 0; + } +}; + +fn random(userdata: ?*anyopaque, buffer: []u8) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (buffer.len == 0) return; + const thread: *Thread = .current(); + var rand: Random = .{ .evented = ev, .thread = thread, .buffer = buffer }; + thread.seed_csprng.once(&rand, &Random.seed); + if (rand.buffer.len > 0) thread.csprng.rng.fill(buffer); +} + +fn randomSecure(userdata: ?*anyopaque, buffer: []u8) Io.RandomSecureError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + if (buffer.len > 0) c.arc4random_buf(buffer.ptr, buffer.len); +} + +fn netListenIpUnavailable( + userdata: ?*anyopaque, + address: net.IpAddress, + options: net.IpAddress.ListenOptions, +) net.IpAddress.ListenError!net.Server { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + _ = options; + return error.NetworkDown; +} + +fn netAcceptUnavailable( + userdata: ?*anyopaque, + listen_handle: net.Socket.Handle, +) net.Server.AcceptError!net.Stream { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = listen_handle; + return error.NetworkDown; +} + +fn netBindIpUnavailable( + userdata: ?*anyopaque, + address: *const net.IpAddress, + options: net.IpAddress.BindOptions, +) net.IpAddress.BindError!net.Socket { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + _ = options; + return error.NetworkDown; +} + +fn netConnectIpUnavailable( + userdata: ?*anyopaque, + address: *const net.IpAddress, + options: net.IpAddress.ConnectOptions, +) net.IpAddress.ConnectError!net.Stream { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + _ = options; + return error.NetworkDown; +} + +fn netListenUnixUnavailable( + userdata: ?*anyopaque, + address: *const net.UnixAddress, + options: net.UnixAddress.ListenOptions, +) net.UnixAddress.ListenError!net.Socket.Handle { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + _ = options; + return error.AddressFamilyUnsupported; +} + +fn netConnectUnixUnavailable( + userdata: ?*anyopaque, + address: *const net.UnixAddress, +) net.UnixAddress.ConnectError!net.Socket.Handle { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + return error.AddressFamilyUnsupported; +} + +fn netSocketCreatePairUnavailable( + userdata: ?*anyopaque, + options: net.Socket.CreatePairOptions, +) net.Socket.CreatePairError![2]net.Socket { + _ = userdata; + _ = options; + return error.OperationUnsupported; +} + +fn netSendUnavailable( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + messages: []net.OutgoingMessage, + flags: net.SendFlags, +) struct { ?net.Socket.SendError, usize } { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = handle; + _ = messages; + _ = flags; + return .{ error.NetworkDown, 0 }; +} + +fn netReceiveUnavailable( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + message_buffer: []net.IncomingMessage, + data_buffer: []u8, + flags: net.ReceiveFlags, + timeout: Io.Timeout, +) struct { ?net.Socket.ReceiveTimeoutError, usize } { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = handle; + _ = message_buffer; + _ = data_buffer; + _ = flags; + _ = timeout; + return .{ error.NetworkDown, 0 }; +} + +fn netReadUnavailable( + userdata: ?*anyopaque, + fd: net.Socket.Handle, + data: [][]u8, +) net.Stream.Reader.Error!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = fd; + _ = data; + return error.NetworkDown; +} + +fn netWriteUnavailable( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + header: []const u8, + data: []const []const u8, + splat: usize, +) net.Stream.Writer.Error!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = handle; + _ = header; + _ = data; + _ = splat; + return error.NetworkDown; +} + +fn netWriteFileUnavailable( + userdata: ?*anyopaque, + socket_handle: net.Socket.Handle, + header: []const u8, + file_reader: *File.Reader, + limit: Io.Limit, +) net.Stream.Writer.WriteFileError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = socket_handle; + _ = header; + _ = file_reader; + _ = limit; + return error.NetworkDown; +} + +fn netClose(userdata: ?*anyopaque, handles: []const net.Socket.Handle) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + for (handles) |handle| closeFd(handle); +} + +fn netShutdownUnavailable( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + how: net.ShutdownHow, +) net.ShutdownError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = handle; + _ = how; + unreachable; // How you gonna shutdown something that was impossible to open? +} + +fn netInterfaceNameResolveUnavailable( + userdata: ?*anyopaque, + name: *const net.Interface.Name, +) net.Interface.Name.ResolveError!net.Interface { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = name; + return error.InterfaceNotFound; +} + +fn netInterfaceNameUnavailable( + userdata: ?*anyopaque, + interface: net.Interface, +) net.Interface.NameError!net.Interface.Name { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = interface; + return error.Unexpected; +} + +fn netLookupUnavailable( + userdata: ?*anyopaque, + host_name: net.HostName, + resolved: *Io.Queue(net.HostName.LookupResult), + options: net.HostName.LookupOptions, +) net.HostName.LookupError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = host_name; + _ = options; + resolved.close(ev.io()); + return error.NetworkDown; +} + +fn readAll(ev: *Evented, file: File, buffer: []u8) File.ReadStreamingError!void { + var index: usize = 0; + while (buffer.len - index != 0) { + const len = try ev.fileReadStreaming(file, &.{buffer[index..]}); + if (len == 0) return error.EndOfStream; + index += len; + } +} + +fn writeAll(ev: *Evented, file: File, buffer: []const u8) (File.Writer.Error || error{EndOfStream})!void { + var index: usize = 0; + while (buffer.len - index != 0) { + const len = try ev.fileWriteStreaming(file, &.{}, &.{buffer[index..]}, 1); + if (len == 0) return error.EndOfStream; + index += len; + } +} + +/// This is either usize or u32. Since, either is fine, let's use the same +/// `addBuf` function for both writing to a file and sending network messages. +const iovlen_t = @FieldType(c.msghdr_const, "iovlen"); + +fn addConstBuf(v: []iovec_const, i: *iovlen_t, remaining: ?*usize, bytes: []const u8) void { + if (v.len - i.* == 0) return; + const len = @min(remaining.*, bytes.len); + if (len == 0) return; + v[i.*] = .{ .base = bytes.ptr, .len = len }; + i.* += 1; + remaining.* -= len; +} +fn addBuf( + comptime is_const: bool, + vec: []if (is_const) iovec_const else iovec, + vec_len: *iovlen_t, + remaining: *Io.Limit, + bytes: if (is_const) []const u8 else []u8, +) void { + if (vec.len - vec_len.* == 0) return; + const len = remaining.minInt(bytes.len); + if (len == 0) return; + vec[vec_len.*] = .{ .base = bytes.ptr, .len = len }; + vec_len.* += 1; + remaining.* = remaining.subtract(len).?; +} + +test { + _ = Fiber.CancelProtection; +} diff --git a/lib/std/Io/IoUring.zig b/lib/std/Io/IoUring.zig @@ -1,6336 +0,0 @@ -const addressFromPosix = Io.Threaded.addressFromPosix; -const addressToPosix = Io.Threaded.addressToPosix; -const Alignment = std.mem.Alignment; -const Allocator = std.mem.Allocator; -const Argv0 = Io.Threaded.Argv0; -const assert = std.debug.assert; -const builtin = @import("builtin"); -const ChdirError = Io.Threaded.ChdirError; -const clockToPosix = Io.Threaded.clockToPosix; -const Csprng = Io.Threaded.Csprng; -const default_PATH = Io.Threaded.default_PATH; -const Dir = Io.Dir; -const Environ = Io.Threaded.Environ; -const errnoBug = Io.Threaded.errnoBug; -const Evented = @This(); -const fallbackSeed = Io.Threaded.fallbackSeed; -const fd_t = linux.fd_t; -const File = Io.File; -const Io = std.Io; -const IoUring = linux.IoUring; -const iovec = std.posix.iovec; -const iovec_const = std.posix.iovec_const; -const linux = std.os.linux; -const linux_statx_request = Io.Threaded.linux_statx_request; -const LOCK = std.posix.LOCK; -const log = std.log.scoped(.@"io-uring"); -const max_iovecs_len = Io.Threaded.max_iovecs_len; -const nanosecondsFromPosix = Io.Threaded.nanosecondsFromPosix; -const net = Io.net; -const PATH_MAX = linux.PATH_MAX; -const pathToPosix = Io.Threaded.pathToPosix; -const pid_t = linux.pid_t; -const PosixAddress = Io.Threaded.PosixAddress; -const posixAddressFamily = Io.Threaded.posixAddressFamily; -const posixProtocol = Io.Threaded.posixProtocol; -const posixSocketMode = Io.Threaded.posixSocketMode; -const process = std.process; -const recoverableOsBugDetected = Io.Threaded.recoverableOsBugDetected; -const setTimestampToPosix = Io.Threaded.setTimestampToPosix; -const splat_buffer_size = Io.Threaded.splat_buffer_size; -const statFromLinux = Io.Threaded.statFromLinux; -const std = @import("../std.zig"); -const timestampFromPosix = Io.Threaded.timestampFromPosix; -const unexpectedErrno = std.posix.unexpectedErrno; -const winsize = std.posix.winsize; - -const tracy = if (@hasDecl(@import("root"), "tracy")) @import("root").tracy else struct { - const enable = false; - inline fn fiberEnter(fiber: [*:0]const u8) void { - _ = fiber; - } - inline fn fiberLeave() void {} -}; - -backing_allocator_needs_mutex: bool, -backing_allocator_mutex: Io.Mutex, -/// Does not need to be thread-safe if not used elsewhere. -backing_allocator: Allocator, -main_fiber_buffer: [ - std.mem.alignForward(usize, @sizeOf(Fiber), @alignOf(Completion)) + @sizeOf(Completion) -]u8 align(@max(@alignOf(Fiber), @alignOf(Completion))), -log2_ring_entries: u4, -threads: Thread.List, -sync_limit: ?Io.Semaphore, - -stderr_mutex: Io.Mutex, -stderr_writer: File.Writer = .{ - .io = undefined, - .interface = Io.File.Writer.initInterface(&.{}), - .file = .stderr(), - .mode = .streaming, -}, -stderr_mode: Io.Terminal.Mode = .no_color, -stderr_writer_initialized: bool = false, - -environ_mutex: Io.Mutex, -environ: Environ, - -null_fd: CachedFd, -random_fd: CachedFd, - -csprng_mutex: Io.Mutex, -csprng: Csprng, - -/// Empirically saw >128KB being used by the self-hosted backend to panic. -/// Empirically saw glibc complain about 256KB. -const idle_stack_size = 512 * 1024; - -const max_idle_search = 1; -const max_steal_ready_search = 2; -const max_steal_free_search = 4; - -const Thread = struct { - required_align: void align(4), - thread: std.Thread, - idle_context: Context, - current_context: *Context, - ready_queue: ?*Fiber, - free_queue: ?*Fiber, - io_uring: IoUring, - idle_search_index: u32, - steal_ready_search_index: u32, - steal_free_search_index: u32, - name_arena: if (tracy.enable) std.heap.ArenaAllocator.State else struct {}, - csprng: Csprng, - - threadlocal var self: ?*Thread = null; - - noinline fn current() *Thread { - return self.?; - } - - fn deinit(thread: *Thread, gpa: Allocator) void { - var next_fiber = thread.free_queue; - while (next_fiber) |free_fiber| { - next_fiber = free_fiber.status.free_next; - gpa.free(free_fiber.allocatedSlice()); - } - thread.io_uring.deinit(); - } - - fn currentFiber(thread: *Thread) *Fiber { - assert(thread.current_context != &thread.idle_context); - return @fieldParentPtr("context", thread.current_context); - } - - fn enqueue(thread: *Thread) *linux.io_uring_sqe { - while (true) return thread.io_uring.get_sqe() catch { - thread.submit(); - continue; - }; - } - - fn submit(thread: *Thread) void { - _ = thread.io_uring.submit() catch |err| switch (err) { - error.SignalInterrupt => {}, - else => |e| @panic(@errorName(e)), - }; - } - - const List = struct { - allocated: []Thread, - reserved: u32, - active: u32, - }; -}; - -const Fiber = struct { - required_align: void align(4), - context: Context, - await_count: i32, - link: union { - awaiter: ?*Fiber, - group: struct { prev: ?*Fiber, next: ?*Fiber }, - }, - status: union(enum) { - queue_next: ?*Fiber, - awaiting_group: Group, - free_next: ?*Fiber, - }, - cancel_status: CancelStatus, - cancel_protection: CancelProtection, - name: if (tracy.enable) [*:0]const u8 else void, - - var next_name: u64 = 0; - - const CancelStatus = packed struct(u32) { - requested: bool, - awaiting: Awaiting, - - const unrequested: CancelStatus = .{ .requested = false, .awaiting = .nothing }; - - const Awaiting = enum(u31) { - nothing = std.math.maxInt(u31), - group = std.math.maxInt(u31) - 1, - select = std.math.maxInt(u31) - 2, - /// An io_uring fd. - _, - - fn subWrap(lhs: Awaiting, rhs: Awaiting) Awaiting { - return @enumFromInt(@intFromEnum(lhs) -% @intFromEnum(rhs)); - } - - fn fromIoUringFd(fd: fd_t) Awaiting { - const awaiting: Awaiting = @enumFromInt(fd); - switch (awaiting) { - .nothing, .group, .select => unreachable, - _ => return awaiting, - } - } - - fn toIoUringFd(awaiting: Awaiting) fd_t { - switch (awaiting) { - .nothing, .group => unreachable, - _ => return @intFromEnum(awaiting), - } - } - }; - - fn changeAwaiting( - cancel_status: *CancelStatus, - old_awaiting: Awaiting, - new_awaiting: Awaiting, - ) bool { - const old_cancel_status = @atomicRmw(CancelStatus, cancel_status, .Add, .{ - .requested = false, - .awaiting = new_awaiting.subWrap(old_awaiting), - }, .monotonic); - assert(old_cancel_status.awaiting == old_awaiting); - return old_cancel_status.requested; - } - }; - - const CancelProtection = packed struct { - user: Io.CancelProtection, - acknowledged: bool, - - const unblocked: CancelProtection = .{ .user = .unblocked, .acknowledged = false }; - - fn check(cancel_protection: CancelProtection) Io.CancelProtection { - return @enumFromInt(@intFromBool(cancel_protection != unblocked)); - } - - fn acknowledge(cancel_protection: *CancelProtection) void { - assert(!cancel_protection.acknowledged); - cancel_protection.acknowledged = true; - } - - fn recancel(cancel_protection: *CancelProtection) void { - assert(cancel_protection.acknowledged); - cancel_protection.acknowledged = false; - } - - test check { - try std.testing.expectEqual(Io.CancelProtection.unblocked, check(.unblocked)); - try std.testing.expectEqual(Io.CancelProtection.blocked, check(.{ - .user = .unblocked, - .acknowledged = true, - })); - try std.testing.expectEqual(Io.CancelProtection.blocked, check(.{ - .user = .blocked, - .acknowledged = false, - })); - try std.testing.expectEqual(Io.CancelProtection.blocked, check(.{ - .user = .blocked, - .acknowledged = true, - })); - } - }; - - const finished: ?*Fiber = @ptrFromInt(@alignOf(Fiber)); - - const max_result_align: Alignment = .@"16"; - const max_result_size = max_result_align.forward(512); - /// This includes any stack realignments that need to happen, and also the - /// initial frame return address slot and argument frame, depending on target. - const min_stack_size = 60 * 1024 * 1024; - const max_context_align: Alignment = .@"16"; - const max_context_size = max_context_align.forward(1024); - const max_closure_size: usize = @sizeOf(AsyncClosure); - const max_closure_align: Alignment = .of(AsyncClosure); - const allocation_size = std.mem.alignForward( - usize, - max_closure_align.max(max_context_align).forward( - max_result_align.forward(@sizeOf(Fiber)) + max_result_size + min_stack_size, - ) + max_closure_size + max_context_size, - std.heap.page_size_max, - ); - comptime { - assert(max_result_align.compare(.gte, .of(Completion))); - assert(max_result_size >= @sizeOf(Completion)); - } - - fn create(ev: *Evented) error{OutOfMemory}!*Fiber { - const thread: *Thread = .current(); - if (@atomicRmw(?*Fiber, &thread.free_queue, .Xchg, finished, .acquire)) |free_fiber| { - assert(free_fiber != finished); - @atomicStore(?*Fiber, &thread.free_queue, free_fiber.status.free_next, .release); - return free_fiber; - } - const active_threads = @atomicLoad(u32, &ev.threads.active, .acquire); - for (0..@min(max_steal_free_search, active_threads)) |_| { - defer thread.steal_free_search_index += 1; - if (thread.steal_free_search_index == active_threads) thread.steal_free_search_index = 0; - const steal_free_search_thread = - &ev.threads.allocated[0..active_threads][thread.steal_free_search_index]; - if (steal_free_search_thread == thread) continue; - const free_fiber = - @atomicLoad(?*Fiber, &steal_free_search_thread.free_queue, .monotonic) orelse continue; - if (free_fiber == finished) continue; - if (@cmpxchgWeak( - ?*Fiber, - &steal_free_search_thread.free_queue, - free_fiber, - null, - .acquire, - .monotonic, - )) |_| continue; - @atomicStore(?*Fiber, &thread.free_queue, free_fiber.status.free_next, .release); - return free_fiber; - } - @atomicStore(?*Fiber, &thread.free_queue, null, .monotonic); - return @ptrCast(try ev.allocator().alignedAlloc(u8, .of(Fiber), allocation_size)); - } - - fn destroy(fiber: *Fiber) void { - const thread: *Thread = .current(); - assert(fiber.status.queue_next == null); - fiber.status = .{ .free_next = @atomicLoad(?*Fiber, &thread.free_queue, .acquire) }; - while (true) fiber.status.free_next = @cmpxchgWeak( - ?*Fiber, - &thread.free_queue, - fiber.status.free_next, - fiber, - .acq_rel, - .acquire, - ) orelse break; - } - - fn allocatedSlice(f: *Fiber) []align(@alignOf(Fiber)) u8 { - return @as([*]align(@alignOf(Fiber)) u8, @ptrCast(f))[0..allocation_size]; - } - - fn allocatedEnd(f: *Fiber) [*]u8 { - const allocated_slice = f.allocatedSlice(); - return allocated_slice[allocated_slice.len..].ptr; - } - - fn resultPointer(f: *Fiber, comptime Result: type) *Result { - return @ptrCast(@alignCast(f.resultBytes(.of(Result)))); - } - - fn resultBytes(f: *Fiber, alignment: Alignment) [*]u8 { - return @ptrFromInt(alignment.forward(@intFromPtr(f) + @sizeOf(Fiber))); - } - - const Queue = struct { head: *Fiber, tail: *Fiber }; - - /// Like a `*Fiber`, but 2 bits smaller than a pointer (because the LSBs are always 0 due to - /// alignment) so that those two bits can be used in a `packed struct`. - const PackedPtr = enum(@Int(.unsigned, @bitSizeOf(usize) - 2)) { - null = 0, - all_ones = std.math.maxInt(@Int(.unsigned, @bitSizeOf(usize) - 2)), - _, - - const Split = packed struct(usize) { low: u2, high: PackedPtr }; - fn pack(ptr: ?*Fiber) PackedPtr { - const split: Split = @bitCast(@intFromPtr(ptr)); - assert(split.low == 0); - return split.high; - } - fn unpack(ptr: PackedPtr) ?*Fiber { - const split: Split = .{ .low = 0, .high = ptr }; - return @ptrFromInt(@as(usize, @bitCast(split))); - } - }; - - fn requestCancel(fiber: *Fiber, ev: *Evented) void { - const cancel_status = @atomicRmw( - Fiber.CancelStatus, - &fiber.cancel_status, - .Or, - .{ .requested = true, .awaiting = @enumFromInt(0) }, - .acq_rel, - ); - assert(!cancel_status.requested); - switch (cancel_status.awaiting) { - .nothing => {}, - .group => { - // The awaiter received a cancelation request while awaiting a group, - // so propagate the cancelation to the group. - if (fiber.status.awaiting_group.cancel(ev, null)) { - fiber.status = .{ .queue_next = null }; - _ = ev.schedule(.current(), .{ .head = fiber, .tail = fiber }); - } - }, - .select => if (@atomicRmw(i32, &fiber.await_count, .Add, 1, .monotonic) == -1) { - _ = ev.schedule(.current(), .{ .head = fiber, .tail = fiber }); - }, - _ => |cancel_io_uring_fd| { - const thread: *Thread = .current(); - thread.enqueue().* = if (thread.io_uring.fd == @intFromEnum(cancel_io_uring_fd)) .{ - .opcode = .ASYNC_CANCEL, - .flags = linux.IOSQE_CQE_SKIP_SUCCESS, - .ioprio = 0, - .fd = 0, - .off = 0, - .addr = @intFromPtr(fiber), - .len = 0, - .rw_flags = 0, - .user_data = @intFromEnum(Completion.UserData.wakeup), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - } else .{ - .opcode = .MSG_RING, - .flags = linux.IOSQE_CQE_SKIP_SUCCESS, - .ioprio = 0, - .fd = @intFromEnum(cancel_io_uring_fd), - .off = @intFromPtr(fiber) | 0b01, - .addr = @intFromEnum(linux.IORING_MSG_RING_COMMAND.DATA), - .len = 0, - .rw_flags = 0, - .user_data = @intFromEnum(Completion.UserData.cleanup), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - }, - } - } -}; - -const CancelRegion = struct { - fiber: *Fiber, - status: Fiber.CancelStatus, - fn init() CancelRegion { - const fiber = Thread.current().currentFiber(); - return .{ - .fiber = fiber, - .status = .{ - .requested = fiber.cancel_protection.check() == .unblocked, - .awaiting = .nothing, - }, - }; - } - fn initBlocked() CancelRegion { - return .{ - .fiber = Thread.current().currentFiber(), - .status = .{ .requested = false, .awaiting = .nothing }, - }; - } - fn deinit(cancel_region: *CancelRegion) void { - if (cancel_region.status.requested) _ = cancel_region.fiber.cancel_status.changeAwaiting( - cancel_region.status.awaiting, - .nothing, - ); - cancel_region.* = undefined; - } - fn await(cancel_region: *CancelRegion, awaiting: Fiber.CancelStatus.Awaiting) Io.Cancelable!void { - if (!cancel_region.status.requested) return; - const status: Fiber.CancelStatus = .{ .requested = true, .awaiting = awaiting }; - if (cancel_region.fiber.cancel_status.changeAwaiting( - cancel_region.status.awaiting, - status.awaiting, - )) { - cancel_region.fiber.cancel_protection.acknowledge(); - cancel_region.status = .unrequested; - return error.Canceled; - } - cancel_region.status = status; - } - fn awaitIoUring(cancel_region: *CancelRegion) Io.Cancelable!*Thread { - const thread: *Thread = .current(); - try cancel_region.await(.fromIoUringFd(thread.io_uring.fd)); - return thread; - } - fn completion(cancel_region: *const CancelRegion) Completion { - return cancel_region.fiber.resultPointer(Completion).*; - } - fn errno(cancel_region: *const CancelRegion) linux.E { - return cancel_region.completion().errno(); - } - - const Sync = struct { - cancel_region: CancelRegion, - fn init(ev: *Evented) Io.Cancelable!Sync { - if (ev.sync_limit) |*sync_limit| try sync_limit.wait(ev.io()); - return .{ .cancel_region = .init() }; - } - fn initBlocked(ev: *Evented) Sync { - if (ev.sync_limit) |*sync_limit| sync_limit.waitUncancelable(ev.io()); - return .{ .cancel_region = .initBlocked() }; - } - fn deinit(sync: *Sync, ev: *Evented) void { - sync.cancel_region.deinit(); - if (ev.sync_limit) |*sync_limit| sync_limit.post(ev.io()); - } - - const Maybe = union(enum) { - cancel_region: CancelRegion, - sync: Sync, - - fn deinit(maybe: *Maybe, ev: *Evented) void { - switch (maybe.*) { - .cancel_region => |*cancel_region| cancel_region.deinit(), - .sync => |*sync| sync.deinit(ev), - } - } - - fn enterSync(maybe: *Maybe, ev: *Evented) Io.Cancelable!*Sync { - switch (maybe.*) { - .cancel_region => |cancel_region| { - if (ev.sync_limit) |*sync_limit| try sync_limit.wait(ev.io()); - maybe.* = .{ .sync = .{ .cancel_region = cancel_region } }; - }, - .sync => {}, - } - return &maybe.sync; - } - - fn leaveSync(maybe: *Maybe, ev: *Evented) void { - switch (maybe.*) { - .cancel_region => {}, - .sync => |sync| { - if (ev.sync_limit) |*sync_limit| sync_limit.post(ev.io()); - maybe.* = .{ .cancel_region = sync.cancel_region }; - }, - } - } - - fn cancelRegion(maybe: *Maybe) *CancelRegion { - return switch (maybe.*) { - .cancel_region => |*cancel_region| cancel_region, - .sync => |*sync| &sync.cancel_region, - }; - } - }; - }; -}; - -const CachedFd = struct { - once: Once, - - const Once = enum(fd_t) { - uninitialized = -1, - initializing = -2, - /// fd - _, - - fn fromFd(fd: fd_t) Once { - return @enumFromInt(@as(u31, @intCast(fd))); - } - - fn toFd(once: Once) fd_t { - return @as(u31, @intCast(@intFromEnum(once))); - } - }; - - const init: CachedFd = .{ .once = .uninitialized }; - - fn close(cached_fd: *CachedFd) void { - switch (cached_fd.once) { - .uninitialized => {}, - .initializing => unreachable, - _ => |fd| { - assert(@intFromEnum(fd) >= 0); - _ = std.os.linux.close(@intFromEnum(fd)); - cached_fd.* = .init; - }, - } - } - - fn open( - cached_fd: *CachedFd, - ev: *Evented, - cancel_region: *CancelRegion, - path: [*:0]const u8, - flags: linux.O, - ) File.OpenError!fd_t { - var once = @atomicLoad(Once, &cached_fd.once, .monotonic); - while (true) { - switch (once) { - .uninitialized => {}, - .initializing => try futexWait( - ev, - @ptrCast(&cached_fd.once), - @bitCast(@intFromEnum(once)), - .none, - ), - _ => |fd| { - @branchHint(.likely); - return fd.toFd(); - }, - } - once = @cmpxchgWeak( - Once, - &cached_fd.once, - .uninitialized, - .initializing, - .monotonic, - .monotonic, - ) orelse { - errdefer { - @atomicStore(Once, &cached_fd.once, .uninitialized, .monotonic); - futexWake(ev, @ptrCast(&cached_fd.once), 1); - } - const fd = try ev.openat(cancel_region, linux.AT.FDCWD, path, flags, 0); - @atomicStore(Once, &cached_fd.once, .fromFd(fd), .monotonic); - futexWake(ev, @ptrCast(&cached_fd.once), std.math.maxInt(u32)); - return fd; - }; - } - } -}; - -pub fn allocator(ev: *Evented) std.mem.Allocator { - return if (ev.backing_allocator_needs_mutex) .{ - .ptr = ev, - .vtable = &.{ - .alloc = alloc, - .resize = resize, - .remap = remap, - .free = free, - }, - } else ev.backing_allocator; -} - -fn alloc(userdata: *anyopaque, len: usize, alignment: std.mem.Alignment, ret_addr: usize) ?[*]u8 { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const ev_io = ev.io(); - ev.backing_allocator_mutex.lockUncancelable(ev_io); - defer ev.backing_allocator_mutex.unlock(ev_io); - return ev.backing_allocator.rawAlloc(len, alignment, ret_addr); -} - -fn resize( - userdata: *anyopaque, - memory: []u8, - alignment: std.mem.Alignment, - new_len: usize, - ret_addr: usize, -) bool { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const ev_io = ev.io(); - ev.backing_allocator_mutex.lockUncancelable(ev_io); - defer ev.backing_allocator_mutex.unlock(ev_io); - return ev.backing_allocator.rawResize(memory, alignment, new_len, ret_addr); -} - -fn remap( - userdata: *anyopaque, - memory: []u8, - alignment: Alignment, - new_len: usize, - ret_addr: usize, -) ?[*]u8 { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const ev_io = ev.io(); - ev.backing_allocator_mutex.lockUncancelable(ev_io); - defer ev.backing_allocator_mutex.unlock(ev_io); - return ev.backing_allocator.rawRemap(memory, alignment, new_len, ret_addr); -} - -fn free(userdata: *anyopaque, memory: []u8, alignment: std.mem.Alignment, ret_addr: usize) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const ev_io = ev.io(); - ev.backing_allocator_mutex.lockUncancelable(ev_io); - defer ev.backing_allocator_mutex.unlock(ev_io); - return ev.backing_allocator.rawFree(memory, alignment, ret_addr); -} - -pub fn io(ev: *Evented) Io { - return .{ - .userdata = ev, - .vtable = &.{ - .async = async, - .concurrent = concurrent, - .await = await, - .cancel = cancel, - - .groupAsync = groupAsync, - .groupConcurrent = groupConcurrent, - .groupAwait = groupAwait, - .groupCancel = groupCancel, - - .recancel = recancel, - .swapCancelProtection = swapCancelProtection, - .checkCancel = checkCancel, - - .select = select, - - .futexWait = futexWait, - .futexWaitUncancelable = futexWaitUncancelable, - .futexWake = futexWake, - - .operate = operate, - .batchAwaitAsync = batchAwaitAsync, - .batchAwaitConcurrent = batchAwaitConcurrent, - .batchCancel = batchCancel, - - .dirCreateDir = dirCreateDir, - .dirCreateDirPath = dirCreateDirPath, - .dirCreateDirPathOpen = dirCreateDirPathOpen, - .dirOpenDir = dirOpenDir, - .dirStat = dirStat, - .dirStatFile = dirStatFile, - .dirAccess = dirAccess, - .dirCreateFile = dirCreateFile, - .dirCreateFileAtomic = dirCreateFileAtomic, - .dirOpenFile = dirOpenFile, - .dirClose = dirClose, - .dirRead = dirRead, - .dirRealPath = dirRealPath, - .dirRealPathFile = dirRealPathFile, - .dirDeleteFile = dirDeleteFile, - .dirDeleteDir = dirDeleteDir, - .dirRename = dirRename, - .dirRenamePreserve = dirRenamePreserve, - .dirSymLink = dirSymLink, - .dirReadLink = dirReadLink, - .dirSetOwner = dirSetOwner, - .dirSetFileOwner = dirSetFileOwner, - .dirSetPermissions = dirSetPermissions, - .dirSetFilePermissions = dirSetFilePermissions, - .dirSetTimestamps = dirSetTimestamps, - .dirHardLink = dirHardLink, - - .fileStat = fileStat, - .fileLength = fileLength, - .fileClose = fileClose, - .fileWritePositional = fileWritePositional, - .fileWriteFileStreaming = fileWriteFileStreaming, - .fileWriteFilePositional = fileWriteFilePositional, - .fileReadPositional = fileReadPositional, - .fileSeekBy = fileSeekBy, - .fileSeekTo = fileSeekTo, - .fileSync = fileSync, - .fileIsTty = fileIsTty, - .fileEnableAnsiEscapeCodes = fileEnableAnsiEscapeCodes, - .fileSupportsAnsiEscapeCodes = fileIsTty, - .fileSetLength = fileSetLength, - .fileSetOwner = fileSetOwner, - .fileSetPermissions = fileSetPermissions, - .fileSetTimestamps = fileSetTimestamps, - .fileLock = fileLock, - .fileTryLock = fileTryLock, - .fileUnlock = fileUnlock, - .fileDowngradeLock = fileDowngradeLock, - .fileRealPath = fileRealPath, - .fileHardLink = fileHardLink, - - .fileMemoryMapCreate = fileMemoryMapCreate, - .fileMemoryMapDestroy = fileMemoryMapDestroy, - .fileMemoryMapSetLength = fileMemoryMapSetLength, - .fileMemoryMapRead = fileMemoryMapRead, - .fileMemoryMapWrite = fileMemoryMapWrite, - - .processExecutableOpen = processExecutableOpen, - .processExecutablePath = processExecutablePath, - .lockStderr = lockStderr, - .tryLockStderr = tryLockStderr, - .unlockStderr = unlockStderr, - .processCurrentPath = processCurrentPath, - .processSetCurrentDir = processSetCurrentDir, - .processReplace = processReplace, - .processReplacePath = processReplacePath, - .processSpawn = processSpawn, - .processSpawnPath = processSpawnPath, - .childWait = childWait, - .childKill = childKill, - - .progressParentFile = progressParentFile, - - .now = now, - .clockResolution = clockResolution, - .sleep = sleep, - - .random = random, - .randomSecure = randomSecure, - - .netListenIp = netListenIpUnavailable, - .netAccept = netAcceptUnavailable, - .netBindIp = netBindIp, - .netConnectIp = netConnectIpUnavailable, - .netListenUnix = netListenUnixUnavailable, - .netConnectUnix = netConnectUnixUnavailable, - .netSocketCreatePair = netSocketCreatePairUnavailable, - .netSend = netSendUnavailable, - .netReceive = netReceive, - .netRead = netReadUnavailable, - .netWrite = netWriteUnavailable, - .netWriteFile = netWriteFileUnavailable, - .netClose = netClose, - .netShutdown = netShutdown, - .netInterfaceNameResolve = netInterfaceNameResolveUnavailable, - .netInterfaceName = netInterfaceNameUnavailable, - .netLookup = netLookupUnavailable, - }, - }; -} - -fn fileMemoryMapSetLength( - userdata: ?*anyopaque, - mm: *File.MemoryMap, - new_len: usize, -) File.MemoryMap.SetLengthError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - const page_size = std.heap.pageSize(); - const alignment: Alignment = .fromByteUnits(page_size); - const page_align = std.heap.page_size_min; - const old_memory = mm.memory; - - if (alignment.forward(new_len) == alignment.forward(old_memory.len)) { - mm.memory.len = new_len; - return; - } - const flags: linux.MREMAP = .{ .MAYMOVE = true }; - const addr_hint: ?[*]const u8 = null; - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - const new_memory = while (true) { - try sync.cancel_region.await(.nothing); - const rc = linux.mremap(old_memory.ptr, old_memory.len, new_len, flags, addr_hint); - switch (linux.errno(rc)) { - .SUCCESS => break @as([*]align(page_align) u8, @ptrFromInt(rc))[0..new_len], - .INTR => continue, - .AGAIN => return error.LockedMemoryLimitExceeded, - .NOMEM => return error.OutOfMemory, - .INVAL => |err| return errnoBug(err), - .FAULT => |err| return errnoBug(err), - else => |err| return unexpectedErrno(err), - } - }; - mm.memory = new_memory; -} - -fn fileMemoryMapRead(userdata: ?*anyopaque, mm: *File.MemoryMap) File.ReadPositionalError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = mm; -} - -fn fileMemoryMapWrite(userdata: ?*anyopaque, mm: *File.MemoryMap) File.WritePositionalError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = mm; -} - -pub const InitOptions = struct { - backing_allocator_needs_mutex: bool = true, - - /// Maximum thread pool size (excluding the main thread). - /// Defaults to one less than the number of logical CPU cores. - thread_limit: ?usize = null, - /// Maximum number of threads that may perform synchronous syscalls. - sync_limit: Io.Limit = .unlimited, - - log2_ring_entries: u4 = 3, - - /// Affects the following operations: - /// * `processExecutablePath` on OpenBSD and Haiku. - argv0: Argv0 = .empty, - /// Affects the following operations: - /// * `fileIsTty` - /// * `processSpawn`, `processSpawnPath`, `processReplace`, `processReplacePath` - environ: process.Environ = .empty, -}; - -pub fn init(ev: *Evented, backing_allocator: Allocator, options: InitOptions) !void { - const threads_size = @sizeOf(Thread) * if (options.thread_limit) |thread_limit| - 1 + thread_limit - else - @max(std.Thread.getCpuCount() catch 1, 1); - const idle_stack_end_offset = - std.mem.alignForward(usize, threads_size + idle_stack_size, std.heap.page_size_max); - const allocated_slice = try backing_allocator.alignedAlloc(u8, .of(Thread), idle_stack_end_offset); - errdefer backing_allocator.free(allocated_slice); - ev.* = .{ - .backing_allocator_needs_mutex = options.backing_allocator_needs_mutex, - .backing_allocator_mutex = .init, - .backing_allocator = backing_allocator, - .main_fiber_buffer = undefined, - .log2_ring_entries = options.log2_ring_entries, - .threads = .{ - .allocated = @ptrCast(allocated_slice[0..threads_size]), - .reserved = 1, - .active = 1, - }, - .sync_limit = if (options.sync_limit.toInt()) |sync_limit| .{ .permits = sync_limit } else null, - - .stderr_mutex = .init, - .stderr_writer = .{ - .io = ev.io(), - .interface = Io.File.Writer.initInterface(&.{}), - .file = .stderr(), - .mode = .streaming, - }, - .stderr_mode = .no_color, - .stderr_writer_initialized = false, - - .environ_mutex = .init, - .environ = .{ .process_environ = options.environ }, - - .null_fd = .init, - .random_fd = .init, - - .csprng_mutex = .init, - .csprng = .uninitialized, - }; - const main_fiber: *Fiber = @ptrCast(&ev.main_fiber_buffer); - main_fiber.* = .{ - .required_align = {}, - .context = undefined, - .await_count = 0, - .link = .{ .awaiter = null }, - .status = .{ .queue_next = null }, - .cancel_status = .unrequested, - .cancel_protection = .unblocked, - .name = if (tracy.enable) "main task", - }; - const main_thread = &ev.threads.allocated[0]; - Thread.self = main_thread; - const idle_stack_end: [*]align(16) usize = - @ptrCast(@alignCast(allocated_slice[idle_stack_end_offset..].ptr)); - (idle_stack_end - 1)[0..1].* = .{@intFromPtr(ev)}; - main_thread.* = .{ - .required_align = {}, - .thread = undefined, - .idle_context = switch (builtin.cpu.arch) { - .aarch64 => .{ - .sp = @intFromPtr(idle_stack_end), - .fp = 0, - .pc = @intFromPtr(&mainIdleEntry), - }, - .x86_64 => .{ - .rsp = @intFromPtr(idle_stack_end - 1), - .rbp = 0, - .rip = @intFromPtr(&mainIdleEntry), - }, - else => @compileError("unimplemented architecture"), - }, - .current_context = &main_fiber.context, - .ready_queue = null, - .free_queue = null, - .io_uring = try .init( - @as(u16, 1) << ev.log2_ring_entries, - linux.IORING_SETUP_COOP_TASKRUN | linux.IORING_SETUP_SINGLE_ISSUER, - ), - .idle_search_index = 1, - .steal_ready_search_index = 1, - .steal_free_search_index = 1, - .name_arena = .{}, - .csprng = .uninitialized, - }; - errdefer main_thread.io_uring.deinit(); - if (tracy.enable) tracy.fiberEnter(main_fiber.name); -} - -pub fn deinit(ev: *Evented) void { - const active_threads = @atomicLoad(u32, &ev.threads.active, .acquire); - for (ev.threads.allocated[0..active_threads]) |*thread| { - const ready_fiber = @atomicLoad(?*Fiber, &thread.ready_queue, .monotonic); - assert(ready_fiber == null or ready_fiber == Fiber.finished); // pending async - } - ev.yield(null, .exit); - ev.threads.allocated[0].deinit(ev.allocator()); - ev.null_fd.close(); - ev.random_fd.close(); - const allocated_ptr: [*]align(@alignOf(Thread)) u8 = @ptrCast(@alignCast(ev.threads.allocated.ptr)); - const idle_stack_end_offset = std.mem.alignForward( - usize, - ev.threads.allocated.len * @sizeOf(Thread) + idle_stack_size, - std.heap.page_size_max, - ); - for (ev.threads.allocated[1..active_threads]) |*thread| thread.thread.join(); - assert(active_threads == ev.threads.active); // spawned threads while there was no pending async? - ev.backing_allocator.free(allocated_ptr[0..idle_stack_end_offset]); - ev.* = undefined; -} - -fn findReadyFiber(ev: *Evented, thread: *Thread) ?*Fiber { - if (@atomicRmw(?*Fiber, &thread.ready_queue, .Xchg, Fiber.finished, .acquire)) |ready_fiber| { - assert(ready_fiber != Fiber.finished); - @atomicStore(?*Fiber, &thread.ready_queue, ready_fiber.status.queue_next, .release); - ready_fiber.status.queue_next = null; - return ready_fiber; - } - const active_threads = @atomicLoad(u32, &ev.threads.active, .acquire); - for (0..@min(max_steal_ready_search, active_threads)) |_| { - defer thread.steal_ready_search_index += 1; - if (thread.steal_ready_search_index == active_threads) thread.steal_ready_search_index = 0; - const steal_ready_search_thread = - &ev.threads.allocated[0..active_threads][thread.steal_ready_search_index]; - if (steal_ready_search_thread == thread) continue; - const ready_fiber = - @atomicLoad(?*Fiber, &steal_ready_search_thread.ready_queue, .monotonic) orelse continue; - if (ready_fiber == Fiber.finished) continue; - if (@cmpxchgWeak( - ?*Fiber, - &steal_ready_search_thread.ready_queue, - ready_fiber, - null, - .acquire, - .monotonic, - )) |_| continue; - @atomicStore(?*Fiber, &thread.ready_queue, ready_fiber.status.queue_next, .release); - ready_fiber.status.queue_next = null; - return ready_fiber; - } - // couldn't find anything to do, so we are now open for business - @atomicStore(?*Fiber, &thread.ready_queue, null, .monotonic); - return null; -} - -fn yield(ev: *Evented, maybe_ready_fiber: ?*Fiber, pending_task: SwitchMessage.PendingTask) void { - const thread: *Thread = .current(); - const ready_context = if (maybe_ready_fiber orelse ev.findReadyFiber(thread)) |ready_fiber| - &ready_fiber.context - else - &thread.idle_context; - const message: SwitchMessage = .{ - .contexts = .{ - .prev = thread.current_context, - .ready = ready_context, - }, - .pending_task = pending_task, - }; - contextSwitch(&message).handle(ev); -} - -fn schedule(ev: *Evented, thread: *Thread, ready_queue: Fiber.Queue) bool { - // shared fields of previous `Thread` must be initialized before later ones are marked as active - const new_thread_index = @atomicLoad(u32, &ev.threads.active, .acquire); - for (0..@min(max_idle_search, new_thread_index)) |_| { - defer thread.idle_search_index += 1; - if (thread.idle_search_index == new_thread_index) thread.idle_search_index = 0; - const idle_search_thread = &ev.threads.allocated[0..new_thread_index][thread.idle_search_index]; - if (idle_search_thread == thread) continue; - if (@cmpxchgWeak( - ?*Fiber, - &idle_search_thread.ready_queue, - null, - ready_queue.head, - .release, - .monotonic, - )) |_| continue; - thread.enqueue().* = .{ - .opcode = .MSG_RING, - .flags = linux.IOSQE_CQE_SKIP_SUCCESS, - .ioprio = 0, - .fd = idle_search_thread.io_uring.fd, - .off = @intFromEnum(Completion.UserData.wakeup), - .addr = @intFromEnum(linux.IORING_MSG_RING_COMMAND.DATA), - .len = 0, - .rw_flags = 0, - .user_data = @intFromEnum(Completion.UserData.wakeup), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - return true; - } - spawn_thread: { - // previous failed reservations must have completed before retrying - if (new_thread_index == ev.threads.allocated.len or @cmpxchgWeak( - u32, - &ev.threads.reserved, - new_thread_index, - new_thread_index + 1, - .acquire, - .monotonic, - ) != null) break :spawn_thread; - const new_thread = &ev.threads.allocated[new_thread_index]; - const next_thread_index = new_thread_index + 1; - var params = std.mem.zeroInit(linux.io_uring_params, .{ - .flags = linux.IORING_SETUP_ATTACH_WQ | - linux.IORING_SETUP_R_DISABLED | - linux.IORING_SETUP_COOP_TASKRUN | - linux.IORING_SETUP_SINGLE_ISSUER, - .wq_fd = @as(u32, @intCast(ev.threads.allocated[0].io_uring.fd)), - }); - new_thread.* = .{ - .required_align = {}, - .thread = undefined, - .idle_context = undefined, - .current_context = &new_thread.idle_context, - .ready_queue = ready_queue.head, - .free_queue = null, - .io_uring = IoUring.init_params(@as(u16, 1) << ev.log2_ring_entries, &params) catch |err| { - @atomicStore(u32, &ev.threads.reserved, new_thread_index, .release); - // no more access to `thread` after giving up reservation - log.warn("unable to create worker thread due to io_uring init failure: {s}", .{ - @errorName(err), - }); - break :spawn_thread; - }, - .idle_search_index = 0, - .steal_ready_search_index = 0, - .steal_free_search_index = 0, - .name_arena = .{}, - .csprng = .uninitialized, - }; - new_thread.thread = std.Thread.spawn(.{ - .stack_size = idle_stack_size, - .allocator = ev.allocator(), - }, threadEntry, .{ ev, new_thread_index }) catch |err| { - new_thread.io_uring.deinit(); - @atomicStore(u32, &ev.threads.reserved, new_thread_index, .release); - // no more access to `thread` after giving up reservation - log.warn("unable to create worker thread due spawn failure: {s}", .{@errorName(err)}); - break :spawn_thread; - }; - // shared fields of `Thread` must be initialized before being marked active - @atomicStore(u32, &ev.threads.active, next_thread_index, .release); - return false; - } - // nobody wanted it, so just queue it on ourselves - while (true) ready_queue.tail.status.queue_next = @cmpxchgWeak( - ?*Fiber, - &thread.ready_queue, - ready_queue.tail.status.queue_next, - ready_queue.head, - .acq_rel, - .acquire, - ) orelse break; - return false; -} - -fn mainIdle( - ev: *Evented, - message: *const SwitchMessage, -) callconv(.withStackAlign(.c, @max(@alignOf(Thread), @alignOf(Context)))) noreturn { - message.handle(ev); - ev.idle(&ev.threads.allocated[0]); - ev.yield(@ptrCast(&ev.main_fiber_buffer), .nothing); - unreachable; // switched to dead fiber -} - -fn threadEntry(ev: *Evented, index: u32) void { - const thread: *Thread = &ev.threads.allocated[index]; - Thread.self = thread; - defer thread.deinit(ev.allocator()); - switch (linux.errno(linux.io_uring_register(thread.io_uring.fd, .REGISTER_ENABLE_RINGS, null, 0))) { - .SUCCESS => ev.idle(thread), - else => |err| @panic(@tagName(err)), - } -} - -const Completion = struct { - result: i32, - flags: u32, - - const UserData = enum(usize) { - unused, - wakeup, - futex_wake, - cleanup, - exit, - /// If bit 0 is 1, a pointer to the `context` field of `Io.Batch.Storage.Pending`. - /// If bits 0 and 1 are 0, a `*Fiber`. - _, - }; - - fn errno(completion: Completion) linux.E { - return linux.errno(@bitCast(@as(isize, completion.result))); - } -}; - -fn idle(ev: *Evented, thread: *Thread) void { - var maybe_ready_fiber: ?*Fiber = null; - while (true) { - while (maybe_ready_fiber orelse ev.findReadyFiber(thread)) |ready_fiber| { - ev.yield(ready_fiber, .nothing); - maybe_ready_fiber = null; - } - _ = thread.io_uring.submit_and_wait(1) catch |err| switch (err) { - error.SignalInterrupt => {}, - else => |e| @panic(@errorName(e)), - }; - var maybe_ready_queue: ?Fiber.Queue = null; - while (true) { - var cqes_buffer: [1 << 8]linux.io_uring_cqe = undefined; - const cqes = cqes_buffer[0 .. thread.io_uring.copy_cqes(&cqes_buffer, 0) catch |err| switch (err) { - error.SignalInterrupt => 0, - else => |e| @panic(@errorName(e)), - }]; - if (cqes.len == 0) break; - for (cqes) |cqe| if (cqe.flags & linux.IORING_CQE_F_SKIP == 0) switch (@as( - Completion.UserData, - @enumFromInt(cqe.user_data), - )) { - .unused => unreachable, // bad submission queued? - .wakeup => {}, - .futex_wake => switch (Completion.errno(.{ .result = cqe.res, .flags = cqe.flags })) { - .SUCCESS => recoverableOsBugDetected(), // success is skipped - .INVAL => {}, // invalid futex_wait() on ptr done elsewhere - .INTR, .CANCELED => recoverableOsBugDetected(), // `Completion.UserData.futex_wake` is not cancelable - .FAULT => {}, // pointer became invalid while doing the wake - else => recoverableOsBugDetected(), // deadlock due to operating system bug - }, - .cleanup => @panic("failed to notify other threads that we are exiting"), - .exit => { - assert(maybe_ready_fiber == null and maybe_ready_queue == null); // pending async - return; - }, - _ => if (@as(?*Fiber, ready_fiber: switch (@as(u2, @truncate(cqe.user_data))) { - 0b00 => { - const ready_fiber: *Fiber = @ptrFromInt(cqe.user_data & ~@as(usize, 0b11)); - ready_fiber.resultPointer(Completion).* = .{ - .result = cqe.res, - .flags = cqe.flags, - }; - break :ready_fiber ready_fiber; - }, - 0b01 => { - thread.enqueue().* = .{ - .opcode = .ASYNC_CANCEL, - .flags = linux.IOSQE_CQE_SKIP_SUCCESS, - .ioprio = 0, - .fd = 0, - .off = 0, - .addr = cqe.user_data & ~@as(usize, 0b11), - .len = 0, - .rw_flags = 0, - .user_data = @intFromEnum(Completion.UserData.wakeup), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - break :ready_fiber null; - }, - 0b10 => { - const context: *Io.Operation.Storage.Pending.Context = - @ptrFromInt(cqe.user_data & ~@as(usize, 0b11)); - const batch: *Io.Batch = @ptrFromInt(context[0]); - var next: usize = 0b00; - context[0..3].* = .{ next, @as(u32, @bitCast(cqe.res)), cqe.flags }; - while (true) { - next = @cmpxchgWeak( - usize, - @as(*usize, @ptrCast(&batch.context)), - next, - cqe.user_data, - .release, - .acquire, - ) orelse break; - context[0] = next; - } - break :ready_fiber switch (@as(u2, @truncate(next))) { - 0b00, 0b01 => @ptrFromInt(next & ~@as(usize, 0b11)), - 0b10, 0b11 => null, - }; - }, - 0b11 => switch (Completion.errno(.{ .result = cqe.res, .flags = cqe.flags })) { - .SUCCESS => unreachable, // no event count specified - .TIME => { - const context: *usize = @ptrFromInt(cqe.user_data & ~@as(usize, 0b11)); - const fiber = @atomicRmw(usize, context, .Add, 0b01, .acquire); - break :ready_fiber switch (@as(u2, @truncate(fiber))) { - else => unreachable, // timeout completed multiple times - 0b00 => @ptrFromInt(fiber & ~@as(usize, 0b11)), - 0b10 => null, - }; - }, - .CANCELED => null, // user data may have been invalidated - else => |err| unexpectedErrno(err) catch null, - }, - })) |ready_fiber| { - assert(ready_fiber.status.queue_next == null); - if (maybe_ready_fiber == null) { - maybe_ready_fiber = ready_fiber; - } else if (maybe_ready_queue) |*ready_queue| { - ready_queue.tail.status.queue_next = ready_fiber; - ready_queue.tail = ready_fiber; - } else maybe_ready_queue = .{ .head = ready_fiber, .tail = ready_fiber }; - }, - }; - } - if (maybe_ready_queue) |ready_queue| _ = ev.schedule(thread, ready_queue); - } -} - -const SwitchMessage = struct { - contexts: extern struct { - prev: *Context, - ready: *Context, - }, - pending_task: PendingTask, - - const PendingTask = union(enum) { - nothing, - reschedule, - await: u31, - group_await: Group, - group_cancel: Group, - batch_await: *Io.Batch, - destroy, - exit, - }; - - fn handle(message: *const SwitchMessage, ev: *Evented) void { - const thread: *Thread = .current(); - thread.current_context = message.contexts.ready; - if (tracy.enable) { - if (message.contexts.ready != &thread.idle_context) { - const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.ready)); - tracy.fiberEnter(fiber.name); - } else tracy.fiberLeave(); - } - switch (message.pending_task) { - .nothing => {}, - .reschedule => if (message.contexts.prev != &thread.idle_context) { - const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); - assert(fiber.status.queue_next == null); - _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); - }, - .await => |count| { - const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); - if (@atomicRmw(i32, &fiber.await_count, .Sub, count, .monotonic) > 0) - _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); - }, - .group_await => |group| { - const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); - if (group.await(ev, fiber)) - _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); - }, - .group_cancel => |group| { - const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); - if (group.cancel(ev, fiber)) - _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); - }, - .batch_await => |batch| { - const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); - if (@cmpxchgStrong( - ?*anyopaque, - &batch.context, - null, - fiber, - .release, - .monotonic, - )) |head| { - assert(@as(u2, @truncate(@intFromPtr(head))) != 0b00); - _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); - } - }, - .destroy => { - const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); - fiber.destroy(); - }, - .exit => for ( - ev.threads.allocated[0..@atomicLoad(u32, &ev.threads.active, .acquire)], - ) |*each_thread| { - thread.enqueue().* = .{ - .opcode = .MSG_RING, - .flags = linux.IOSQE_CQE_SKIP_SUCCESS, - .ioprio = 0, - .fd = each_thread.io_uring.fd, - .off = @intFromEnum(Completion.UserData.exit), - .addr = @intFromEnum(linux.IORING_MSG_RING_COMMAND.DATA), - .len = 0, - .rw_flags = 0, - .user_data = @intFromEnum(Completion.UserData.cleanup), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - }, - } - } -}; - -const Context = switch (builtin.cpu.arch) { - .aarch64 => extern struct { - sp: u64, - fp: u64, - pc: u64, - }, - .x86_64 => extern struct { - rsp: u64, - rbp: u64, - rip: u64, - }, - else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), -}; - -inline fn contextSwitch(message: *const SwitchMessage) *const SwitchMessage { - return @fieldParentPtr("contexts", switch (builtin.cpu.arch) { - .aarch64 => asm volatile ( - \\ ldp x0, x2, [x1] - \\ ldr x3, [x2, #16] - \\ mov x4, sp - \\ stp x4, fp, [x0] - \\ adr x5, 0f - \\ ldp x4, fp, [x2] - \\ str x5, [x0, #16] - \\ mov sp, x4 - \\ br x3 - \\0: - : [received_message] "={x1}" (-> *const @FieldType(SwitchMessage, "contexts")), - : [message_to_send] "{x1}" (&message.contexts), - : .{ - .x0 = true, - .x1 = true, - .x2 = true, - .x3 = true, - .x4 = true, - .x5 = true, - .x6 = true, - .x7 = true, - .x8 = true, - .x9 = true, - .x10 = true, - .x11 = true, - .x12 = true, - .x13 = true, - .x14 = true, - .x15 = true, - .x16 = true, - .x17 = true, - .x19 = true, - .x20 = true, - .x21 = true, - .x22 = true, - .x23 = true, - .x24 = true, - .x25 = true, - .x26 = true, - .x27 = true, - .x28 = true, - .x30 = true, - .z0 = true, - .z1 = true, - .z2 = true, - .z3 = true, - .z4 = true, - .z5 = true, - .z6 = true, - .z7 = true, - .z8 = true, - .z9 = true, - .z10 = true, - .z11 = true, - .z12 = true, - .z13 = true, - .z14 = true, - .z15 = true, - .z16 = true, - .z17 = true, - .z18 = true, - .z19 = true, - .z20 = true, - .z21 = true, - .z22 = true, - .z23 = true, - .z24 = true, - .z25 = true, - .z26 = true, - .z27 = true, - .z28 = true, - .z29 = true, - .z30 = true, - .z31 = true, - .p0 = true, - .p1 = true, - .p2 = true, - .p3 = true, - .p4 = true, - .p5 = true, - .p6 = true, - .p7 = true, - .p8 = true, - .p9 = true, - .p10 = true, - .p11 = true, - .p12 = true, - .p13 = true, - .p14 = true, - .p15 = true, - .fpcr = true, - .fpsr = true, - .ffr = true, - .memory = true, - }), - .x86_64 => asm volatile ( - \\ movq 0(%%rsi), %%rax - \\ movq 8(%%rsi), %%rcx - \\ leaq 0f(%%rip), %%rdx - \\ movq %%rsp, 0(%%rax) - \\ movq %%rbp, 8(%%rax) - \\ movq %%rdx, 16(%%rax) - \\ movq 0(%%rcx), %%rsp - \\ movq 8(%%rcx), %%rbp - \\ jmpq *16(%%rcx) - \\0: - : [received_message] "={rsi}" (-> *const @FieldType(SwitchMessage, "contexts")), - : [message_to_send] "{rsi}" (&message.contexts), - : .{ - .rax = true, - .rcx = true, - .rdx = true, - .rbx = true, - .rsi = true, - .rdi = true, - .r8 = true, - .r9 = true, - .r10 = true, - .r11 = true, - .r12 = true, - .r13 = true, - .r14 = true, - .r15 = true, - .mm0 = true, - .mm1 = true, - .mm2 = true, - .mm3 = true, - .mm4 = true, - .mm5 = true, - .mm6 = true, - .mm7 = true, - .zmm0 = true, - .zmm1 = true, - .zmm2 = true, - .zmm3 = true, - .zmm4 = true, - .zmm5 = true, - .zmm6 = true, - .zmm7 = true, - .zmm8 = true, - .zmm9 = true, - .zmm10 = true, - .zmm11 = true, - .zmm12 = true, - .zmm13 = true, - .zmm14 = true, - .zmm15 = true, - .zmm16 = true, - .zmm17 = true, - .zmm18 = true, - .zmm19 = true, - .zmm20 = true, - .zmm21 = true, - .zmm22 = true, - .zmm23 = true, - .zmm24 = true, - .zmm25 = true, - .zmm26 = true, - .zmm27 = true, - .zmm28 = true, - .zmm29 = true, - .zmm30 = true, - .zmm31 = true, - .fpsr = true, - .fpcr = true, - .mxcsr = true, - .rflags = true, - .dirflag = true, - .memory = true, - }), - else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), - }); -} - -fn mainIdleEntry() callconv(.naked) void { - switch (builtin.cpu.arch) { - .aarch64 => asm volatile ( - \\ ldr x0, [sp, #-8] - \\ b %[mainIdle] - : - : [mainIdle] "X" (&mainIdle), - ), - .x86_64 => asm volatile ( - \\ movq (%%rsp), %%rdi - \\ jmp %[mainIdle:P] - : - : [mainIdle] "X" (&mainIdle), - ), - else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), - } -} - -const AsyncClosure = struct { - ev: *Evented, - fiber: *Fiber, - start: *const fn (context: *const anyopaque, result: *anyopaque) void, - result_align: Alignment, - - fn fromFiber(fiber: *Fiber) *AsyncClosure { - return @ptrFromInt(Fiber.max_context_align.max(.of(AsyncClosure)).backward( - @intFromPtr(fiber.allocatedEnd()) - Fiber.max_context_size, - ) - @sizeOf(AsyncClosure)); - } - - fn contextPointer(closure: *AsyncClosure) [*]align(Fiber.max_context_align.toByteUnits()) u8 { - return @alignCast(@as([*]u8, @ptrCast(closure)) + @sizeOf(AsyncClosure)); - } - - fn entry() callconv(.naked) void { - switch (builtin.cpu.arch) { - .aarch64 => asm volatile ( - \\ mov x0, sp - \\ b %[call] - : - : [call] "X" (&call), - ), - .x86_64 => asm volatile ( - \\ leaq 8(%%rsp), %%rdi - \\ jmp %[call:P] - : - : [call] "X" (&call), - ), - else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), - } - } - - fn call( - closure: *AsyncClosure, - message: *const SwitchMessage, - ) callconv(.withStackAlign(.c, @alignOf(AsyncClosure))) noreturn { - message.handle(closure.ev); - const fiber = closure.fiber; - closure.start(closure.contextPointer(), fiber.resultBytes(closure.result_align)); - closure.ev.yield( - if (@atomicRmw(?*Fiber, &fiber.link.awaiter, .Xchg, Fiber.finished, .acq_rel)) |awaiter| - if (@atomicRmw(i32, &awaiter.await_count, .Add, 1, .monotonic) == -1) awaiter else null - else - null, - .nothing, - ); - unreachable; // switched to dead fiber - } -}; - -fn async( - userdata: ?*anyopaque, - result: []u8, - result_alignment: Alignment, - context: []const u8, - context_alignment: Alignment, - start: *const fn (context: *const anyopaque, result: *anyopaque) void, -) ?*std.Io.AnyFuture { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - return concurrent(ev, result.len, result_alignment, context, context_alignment, start) catch { - start(context.ptr, result.ptr); - return null; - }; -} - -fn concurrent( - userdata: ?*anyopaque, - result_len: usize, - result_alignment: Alignment, - context: []const u8, - context_alignment: Alignment, - start: *const fn (context: *const anyopaque, result: *anyopaque) void, -) Io.ConcurrentError!*std.Io.AnyFuture { - assert(result_alignment.compare(.lte, Fiber.max_result_align)); // TODO - assert(context_alignment.compare(.lte, Fiber.max_context_align)); // TODO - assert(result_len <= Fiber.max_result_size); // TODO - assert(context.len <= Fiber.max_context_size); // TODO - - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const fiber = Fiber.create(ev) catch |err| switch (err) { - error.OutOfMemory => return error.ConcurrencyUnavailable, - }; - - const closure: *AsyncClosure = .fromFiber(fiber); - fiber.* = .{ - .required_align = {}, - .context = switch (builtin.cpu.arch) { - .aarch64 => .{ - .sp = @intFromPtr(closure), - .fp = 0, - .pc = @intFromPtr(&AsyncClosure.entry), - }, - .x86_64 => .{ - .rsp = @intFromPtr(closure) - @sizeOf(usize), - .rbp = 0, - .rip = @intFromPtr(&AsyncClosure.entry), - }, - else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), - }, - .await_count = 0, - .link = .{ .awaiter = null }, - .status = .{ .queue_next = null }, - .cancel_status = .unrequested, - .cancel_protection = .unblocked, - .name = if (tracy.enable) name: { - const thread: *Thread = .current(); - var name_arena = thread.name_arena.promote(std.heap.page_allocator); - defer thread.name_arena = name_arena.state; - break :name std.fmt.allocPrintSentinel( - name_arena.allocator(), - "task {d}", - .{@atomicRmw(u64, &Fiber.next_name, .Add, 1, .monotonic)}, - 0, - ) catch return error.ConcurrencyUnavailable; - }, - }; - closure.* = .{ - .ev = ev, - .fiber = fiber, - .start = start, - .result_align = result_alignment, - }; - @memcpy(closure.contextPointer(), context); - - const thread: *Thread = .current(); - if (ev.schedule(thread, .{ .head = fiber, .tail = fiber })) thread.submit(); - return @ptrCast(fiber); -} - -fn await( - userdata: ?*anyopaque, - future: *std.Io.AnyFuture, - result: []u8, - result_alignment: Alignment, -) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const fiber = Thread.current().currentFiber(); - const future_fiber: *Fiber = @ptrCast(@alignCast(future)); - if (@atomicRmw(?*Fiber, &future_fiber.link.awaiter, .Xchg, fiber, .acq_rel)) |awaiter| { - assert(awaiter == Fiber.finished); - } else while (true) { - ev.yield(null, .{ .await = 1 }); - const awaiter = @atomicLoad(?*Fiber, &future_fiber.link.awaiter, .acquire); - if (awaiter == Fiber.finished) break; - assert(awaiter == fiber); // spurious wakeup - } - @memcpy(result, future_fiber.resultBytes(result_alignment)); - future_fiber.destroy(); -} - -fn cancel( - userdata: ?*anyopaque, - future: *std.Io.AnyFuture, - result: []u8, - result_alignment: Alignment, -) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const future_fiber: *Fiber = @ptrCast(@alignCast(future)); - future_fiber.requestCancel(ev); - await(ev, future, result, result_alignment); -} - -const Group = struct { - ptr: *Io.Group, - - const List = packed struct(usize) { - cancel_requested: bool, - awaiter_delayed: bool, - fibers: Fiber.PackedPtr, - }; - fn listPtr(group: Group) *List { - return @ptrCast(&group.ptr.token); - } - - const Mutex = packed struct(u32) { - locked: bool, - contended: bool, - shared2: u30, - }; - fn mutexPtr(group: Group) *Mutex { - return switch (comptime builtin.cpu.arch.endian()) { - .little => @ptrCast(&group.ptr.state), - .big => @ptrCast(@alignCast( - @as([*]u8, @ptrCast(&group.ptr.state)) + @sizeOf(usize) - @sizeOf(u32), - )), - }; - } - - const Awaiter = packed struct(usize) { - locked: bool, - contended: bool, - awaiter: Fiber.PackedPtr, - }; - fn awaiterPtr(group: Group) *Awaiter { - return @ptrCast(&group.ptr.state); - } - - fn lock(group: Group, ev: *Evented) void { - const mutex = group.mutexPtr(); - { - const old_state = @atomicRmw( - Mutex, - mutex, - .Or, - .{ .locked = true, .contended = false, .shared2 = 0 }, - .acquire, - ); - if (!old_state.locked) { - @branchHint(.likely); - return; - } - if (old_state.contended) { - futexWaitUncancelable(ev, @ptrCast(mutex), @bitCast(old_state)); - } - } - while (true) { - var old_state = @atomicRmw( - Mutex, - mutex, - .Or, - .{ .locked = true, .contended = true, .shared2 = 0 }, - .acquire, - ); - if (!old_state.locked) { - @branchHint(.likely); - return; - } - old_state.contended = true; - futexWaitUncancelable(ev, @ptrCast(mutex), @bitCast(old_state)); - } - } - - fn unlock(group: Group, ev: *Evented) void { - const mutex = group.mutexPtr(); - const old_state = @atomicRmw( - Mutex, - mutex, - .And, - .{ .locked = false, .contended = false, .shared2 = std.math.maxInt(u30) }, - .release, - ); - assert(old_state.locked); - if (old_state.contended) futexWake(ev, @ptrCast(mutex), 1); - } - - fn addFiber(group: Group, ev: *Evented, fiber: *Fiber) void { - group.lock(ev); - defer group.unlock(ev); - const list_ptr = group.listPtr(); - const list = @atomicLoad(List, list_ptr, .monotonic); - if (list.cancel_requested) fiber.cancel_status = .{ .requested = true, .awaiting = .nothing }; - const old_head = list.fibers.unpack(); - if (old_head) |head| head.link.group.prev = fiber; - fiber.link.group.next = old_head; - @atomicStore(List, list_ptr, .{ - .cancel_requested = list.cancel_requested, - .awaiter_delayed = list.awaiter_delayed, - .fibers = .pack(fiber), - }, .monotonic); - } - - fn removeFiber(group: Group, ev: *Evented, fiber: *Fiber) ?*Fiber { - group.lock(ev); - defer group.unlock(ev); - const list_ptr = group.listPtr(); - const list = @atomicLoad(List, list_ptr, .monotonic); - if (fiber.link.group.next) |next| next.link.group.prev = fiber.link.group.prev; - if (fiber.link.group.prev) |prev| { - prev.link.group.next = fiber.link.group.next; - } else if (fiber.link.group.next) |new_head| { - @atomicStore(List, list_ptr, .{ - .cancel_requested = list.cancel_requested, - .awaiter_delayed = list.awaiter_delayed, - .fibers = .pack(new_head), - }, .monotonic); - } else if (@atomicLoad(Awaiter, group.awaiterPtr(), .monotonic).awaiter.unpack()) |awaiter| { - if (!awaiter.cancel_status.changeAwaiting(.group, .nothing) or list.cancel_requested) { - @atomicStore(List, list_ptr, .{ - .cancel_requested = false, - .awaiter_delayed = false, - .fibers = .null, - }, .release); - assert(awaiter.status.awaiting_group.ptr == group.ptr); - awaiter.status = .{ .queue_next = null }; - return awaiter; - } - // Race with `Fiber.requestCancel` - @atomicStore(List, list_ptr, .{ - .cancel_requested = false, - .awaiter_delayed = true, - .fibers = .null, - }, .monotonic); - } else @atomicStore(List, list_ptr, .{ - .cancel_requested = false, - .awaiter_delayed = false, - .fibers = .null, - }, .release); - return null; - } - - fn await(group: Group, ev: *Evented, awaiter: *Fiber) bool { - group.lock(ev); - defer group.unlock(ev); - if (@atomicLoad(List, group.listPtr(), .monotonic).fibers.unpack()) |_| { - if (group.registerAwaiter(awaiter) and awaiter.cancel_protection.check() == .unblocked) { - // The awaiter already had an unacknowledged cancelation request before - // attempting to await a group, so propagate the cancelation to the group. - assert(!group.cancelLocked(ev, null)); - } - return false; - } - return true; - } - - fn cancel(group: Group, ev: *Evented, maybe_awaiter: ?*Fiber) bool { - group.lock(ev); - defer group.unlock(ev); - return group.cancelLocked(ev, maybe_awaiter); - } - - /// Assumes the mutex is held. - fn cancelLocked(group: Group, ev: *Evented, maybe_awaiter: ?*Fiber) bool { - const list_ptr = group.listPtr(); - const list = @atomicRmw( - List, - list_ptr, - .Add, - .{ .cancel_requested = true, .awaiter_delayed = false, .fibers = .null }, - .monotonic, - ); - assert(!list.cancel_requested); - if (list.fibers.unpack()) |head| { - var maybe_fiber: ?*Fiber = head; - while (maybe_fiber) |fiber| { - fiber.requestCancel(ev); - maybe_fiber = fiber.link.group.next; - } - if (maybe_awaiter) |awaiter| _ = group.registerAwaiter(awaiter); - return false; - } - @atomicStore( - List, - list_ptr, - .{ .cancel_requested = false, .awaiter_delayed = false, .fibers = .null }, - .release, - ); - return if (maybe_awaiter) |_| true else list.awaiter_delayed; - } - - /// Assumes the mutex is held. - fn registerAwaiter(group: Group, awaiter: *Fiber) bool { - assert(awaiter.status.queue_next == null); - awaiter.status = .{ .awaiting_group = group }; - assert(@atomicRmw( - Awaiter, - group.awaiterPtr(), - .Add, - .{ .locked = false, .contended = false, .awaiter = .pack(awaiter) }, - .monotonic, - ).awaiter == .null); - return awaiter.cancel_status.changeAwaiting(.nothing, .group); - } - - const AsyncClosure = struct { - ev: *Evented, - group: Group, - fiber: *Fiber, - start: *const fn (context: *const anyopaque) Io.Cancelable!void, - - fn fromFiber(fiber: *Fiber) *Group.AsyncClosure { - return @ptrFromInt(Fiber.max_context_align.max(.of(Group.AsyncClosure)).backward( - @intFromPtr(fiber.allocatedEnd()) - Fiber.max_context_size, - ) - @sizeOf(Group.AsyncClosure)); - } - - fn contextPointer( - closure: *Group.AsyncClosure, - ) [*]align(Fiber.max_context_align.toByteUnits()) u8 { - return @alignCast(@as([*]u8, @ptrCast(closure)) + @sizeOf(Group.AsyncClosure)); - } - - fn entry() callconv(.naked) void { - switch (builtin.cpu.arch) { - .aarch64 => asm volatile ( - \\ mov x0, sp - \\ b %[call] - : - : [call] "X" (&call), - ), - .x86_64 => asm volatile ( - \\ leaq 8(%%rsp), %%rdi - \\ jmp %[call:P] - : - : [call] "X" (&call), - ), - else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), - } - } - - fn call( - closure: *Group.AsyncClosure, - message: *const SwitchMessage, - ) callconv(.withStackAlign(.c, @alignOf(Group.AsyncClosure))) noreturn { - message.handle(closure.ev); - assert(closure.fiber.status.queue_next == null); - const result = closure.start(closure.contextPointer()); - const ev = closure.ev; - const group = closure.group; - const fiber = closure.fiber; - const cancel_acknowledged = fiber.cancel_protection.acknowledged; - if (result) { - assert(!cancel_acknowledged); // group task acknowledged cancelation but did not return `error.Canceled` - } else |err| switch (err) { - error.Canceled => assert(cancel_acknowledged), // group task returned `error.Canceled` but was never canceled - } - ev.yield(group.removeFiber(ev, fiber), .destroy); - unreachable; // switched to dead fiber - } - }; -}; - -fn groupAsync( - userdata: ?*anyopaque, - type_erased: *Io.Group, - context: []const u8, - context_alignment: Alignment, - start: *const fn (context: *const anyopaque) Io.Cancelable!void, -) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - return groupConcurrent(ev, type_erased, context, context_alignment, start) catch { - const fiber = Thread.current().currentFiber(); - const pre_acknowledged = fiber.cancel_protection.acknowledged; - const result = start(context.ptr); - const post_acknowledged = fiber.cancel_protection.acknowledged; - if (result) { - if (pre_acknowledged) { - assert(post_acknowledged); // group task called `recancel` but was not canceled - } else { - assert(!post_acknowledged); // group task acknowledged cancelation but did not return `error.Canceled` - } - } else |err| switch (err) { - // Don't swallow the cancelation: make it visible to the `Group.async` caller. - error.Canceled => { - assert(!pre_acknowledged); // group task called `recancel` but was not canceled - assert(post_acknowledged); // group task returned `error.Canceled` but was never canceled - recancel(userdata); - }, - } - }; -} - -fn groupConcurrent( - userdata: ?*anyopaque, - type_erased: *Io.Group, - context: []const u8, - context_alignment: Alignment, - start: *const fn (context: *const anyopaque) Io.Cancelable!void, -) Io.ConcurrentError!void { - assert(context_alignment.compare(.lte, Fiber.max_context_align)); // TODO - assert(context.len <= Fiber.max_context_size); // TODO - - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const group: Group = .{ .ptr = type_erased }; - const fiber = Fiber.create(ev) catch |err| switch (err) { - error.OutOfMemory => return error.ConcurrencyUnavailable, - }; - - const closure: *Group.AsyncClosure = .fromFiber(fiber); - fiber.* = .{ - .required_align = {}, - .context = switch (builtin.cpu.arch) { - .aarch64 => .{ - .sp = @intFromPtr(closure), - .fp = 0, - .pc = @intFromPtr(&Group.AsyncClosure.entry), - }, - .x86_64 => .{ - .rsp = @intFromPtr(closure) - @sizeOf(usize), - .rbp = 0, - .rip = @intFromPtr(&Group.AsyncClosure.entry), - }, - else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), - }, - .await_count = 0, - .link = .{ .group = .{ .prev = null, .next = null } }, - .status = .{ .queue_next = null }, - .cancel_status = .unrequested, - .cancel_protection = .unblocked, - .name = if (tracy.enable) name: { - const thread: *Thread = .current(); - var name_arena = thread.name_arena.promote(std.heap.page_allocator); - defer thread.name_arena = name_arena.state; - break :name std.fmt.allocPrintSentinel( - name_arena.allocator(), - "group task {d}", - .{@atomicRmw(u64, &Fiber.next_name, .Add, 1, .monotonic)}, - 0, - ) catch return error.ConcurrencyUnavailable; - }, - }; - closure.* = .{ - .ev = ev, - .group = group, - .fiber = fiber, - .start = start, - }; - @memcpy(closure.contextPointer(), context); - group.addFiber(ev, fiber); - const thread: *Thread = .current(); - if (ev.schedule(thread, .{ .head = fiber, .tail = fiber })) thread.submit(); -} - -fn groupAwait( - userdata: ?*anyopaque, - type_erased: *Io.Group, - initial_token: *anyopaque, -) Io.Cancelable!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = initial_token; - ev.yield(null, .{ .group_await = .{ .ptr = type_erased } }); -} - -fn groupCancel(userdata: ?*anyopaque, type_erased: *Io.Group, initial_token: *anyopaque) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = initial_token; - ev.yield(null, .{ .group_cancel = .{ .ptr = type_erased } }); -} - -fn recancel(userdata: ?*anyopaque) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - const cancel_protection = &Thread.current().currentFiber().cancel_protection; - assert(cancel_protection.acknowledged); - cancel_protection.acknowledged = false; -} - -fn swapCancelProtection(userdata: ?*anyopaque, new: Io.CancelProtection) Io.CancelProtection { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - const cancel_protection = &Thread.current().currentFiber().cancel_protection; - defer cancel_protection.user = new; - return cancel_protection.user; -} - -fn checkCancel(userdata: ?*anyopaque) Io.Cancelable!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - const fiber = Thread.current().currentFiber(); - switch (fiber.cancel_protection.check()) { - .blocked => {}, - .unblocked => if (@atomicLoad(Fiber.CancelStatus, &fiber.cancel_status, .monotonic).requested) { - fiber.cancel_protection.acknowledge(); - return error.Canceled; - }, - } -} - -fn select(userdata: ?*anyopaque, futures: []const *Io.AnyFuture) Io.Cancelable!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - var await_count: u31, var result = for (futures, 0..) |future, future_index| { - const future_fiber: *Fiber = @ptrCast(@alignCast(future)); - if (@atomicRmw( - ?*Fiber, - &future_fiber.link.awaiter, - .Xchg, - cancel_region.fiber, - .acq_rel, - )) |awaiter| { - assert(awaiter == Fiber.finished); - break .{ @intCast(future_index), future_index }; - } - } else result: { - const await_count: u31 = @intCast(futures.len); - cancel_region.await(.select) catch |err| switch (err) { - error.Canceled => |e| break :result .{ await_count + 1, e }, - }; - ev.yield(null, .{ .await = 1 }); - cancel_region.await(.nothing) catch |err| switch (err) { - error.Canceled => |e| break :result .{ await_count, e }, - }; - break :result .{ await_count - 1, futures.len }; - }; - for (futures[0 .. result catch futures.len], 0..) |future, future_index| { - const future_fiber: *Fiber = @ptrCast(@alignCast(future)); - const awaiter = @atomicRmw(?*Fiber, &future_fiber.link.awaiter, .Xchg, null, .monotonic); - if (awaiter == Fiber.finished) { - @atomicStore(?*Fiber, &future_fiber.link.awaiter, Fiber.finished, .monotonic); - result = if (result) |finished_index| @min(future_index, finished_index) else |e| e; - } else { - assert(awaiter == cancel_region.fiber); - await_count -= 1; - } - } - // Equivalent to `ev.yield(null, .{ .await = await_count });`, - // but avoiding a context switch in the common case. - switch (std.math.order( - @atomicRmw(i32, &cancel_region.fiber.await_count, .Sub, await_count, .monotonic), - await_count, - )) { - .lt => ev.yield(null, .{ .await = 0 }), - .eq => {}, - .gt => unreachable, - } - return result; -} - -fn futexWait( - userdata: ?*anyopaque, - ptr: *const u32, - expected: u32, - timeout: Io.Timeout, -) Io.Cancelable!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const timespec: ?linux.kernel_timespec, const clock: Io.Clock, const timeout_flags: u32 = timespec: switch (timeout) { - .none => .{ - null, - .awake, - linux.IORING_TIMEOUT_ABS, - }, - .duration => |duration| { - const ns = duration.raw.toNanoseconds(); - break :timespec .{ - .{ - .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), - .nsec = @intCast(@mod(ns, std.time.ns_per_s)), - }, - duration.clock, - 0, - }; - }, - .deadline => |deadline| { - const ns = deadline.raw.toNanoseconds(); - break :timespec .{ - .{ - .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), - .nsec = @intCast(@mod(ns, std.time.ns_per_s)), - }, - deadline.clock, - linux.IORING_TIMEOUT_ABS, - }; - }, - }; - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .FUTEX_WAIT, - .flags = if (timespec) |_| linux.IOSQE_IO_LINK else 0, - .ioprio = 0, - .fd = @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = true }), - .off = expected, - .addr = @intFromPtr(ptr), - .len = 0, - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = std.math.maxInt(u32), - .resv = 0, - }; - if (timespec) |*timespec_ptr| thread.enqueue().* = .{ - .opcode = .LINK_TIMEOUT, - .flags = linux.IOSQE_CQE_SKIP_SUCCESS, - .ioprio = 0, - .fd = 0, - .off = 0, - .addr = @intFromPtr(timespec_ptr), - .len = 1, - .rw_flags = timeout_flags | @as(u32, switch (clock) { - .real => linux.IORING_TIMEOUT_REALTIME, - else => 0, - .boot => linux.IORING_TIMEOUT_BOOTTIME, - }), - .user_data = @intFromEnum(Completion.UserData.wakeup), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => {}, // notified by `wake()` - .INTR, .CANCELED => {}, // caller's responsibility to retry - .AGAIN => {}, // ptr.* != expect - .INVAL => {}, // possibly timeout overflow - .TIMEDOUT => unreachable, - .FAULT => recoverableOsBugDetected(), // ptr was invalid - else => recoverableOsBugDetected(), - } -} - -fn futexWaitUncancelable(userdata: ?*anyopaque, ptr: *const u32, expected: u32) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var cancel_region: CancelRegion = .initBlocked(); - defer cancel_region.deinit(); - const thread = cancel_region.awaitIoUring() catch |err| switch (err) { - error.Canceled => unreachable, // blocked - }; - thread.enqueue().* = .{ - .opcode = .FUTEX_WAIT, - .flags = 0, - .ioprio = 0, - .fd = @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = true }), - .off = expected, - .addr = @intFromPtr(ptr), - .len = 0, - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = std.math.maxInt(u32), - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => {}, // notified by `wake()` - .INTR, .CANCELED => {}, // caller's responsibility to retry - .AGAIN => {}, // ptr.* != expect - .INVAL => {}, // possibly timeout overflow - .FAULT => recoverableOsBugDetected(), // ptr was invalid - else => recoverableOsBugDetected(), - } -} - -fn futexWake(userdata: ?*anyopaque, ptr: *const u32, max_waiters: u32) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - const thread: *Thread = .current(); - thread.enqueue().* = .{ - .opcode = .FUTEX_WAKE, - .flags = linux.IOSQE_CQE_SKIP_SUCCESS, - .ioprio = 0, - .fd = @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = true }), - .off = max_waiters, - .addr = @intFromPtr(ptr), - .len = 0, - .rw_flags = 0, - .user_data = @intFromEnum(Completion.UserData.futex_wake), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = std.math.maxInt(u32), - .resv = 0, - }; - thread.submit(); -} - -fn operate(userdata: ?*anyopaque, operation: Io.Operation) Io.Cancelable!Io.Operation.Result { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; - defer maybe_sync.deinit(ev); - return switch (operation) { - .file_read_streaming => |o| .{ - .file_read_streaming = ev.fileReadStreaming( - &maybe_sync.cancel_region, - o.file, - o.data, - ) catch |err| switch (err) { - error.Canceled => |e| return e, - else => |e| e, - }, - }, - .file_write_streaming => |o| .{ - .file_write_streaming = ev.fileWriteStreaming( - &maybe_sync.cancel_region, - o.file, - o.header, - o.data, - o.splat, - ) catch |err| switch (err) { - error.Canceled => |e| return e, - else => |e| e, - }, - }, - .device_io_control => |o| .{ - .device_io_control = try ev.deviceIoControl(try maybe_sync.enterSync(ev), o), - }, - }; -} - -fn fileReadStreaming( - ev: *Evented, - cancel_region: *CancelRegion, - file: File, - data: []const []u8, -) File.ReadStreamingError!usize { - var iovecs_buffer: [max_iovecs_len]iovec = undefined; - var i: usize = 0; - for (data) |buf| { - if (iovecs_buffer.len - i == 0) break; - if (buf.len != 0) { - iovecs_buffer[i] = .{ .base = buf.ptr, .len = buf.len }; - i += 1; - } - } - const dest = iovecs_buffer[0..i]; - assert(dest[0].len > 0); - - const n = try ev.preadv(cancel_region, file.handle, dest, null); - return if (n == 0) error.EndOfStream else n; -} - -fn fileWriteStreaming( - ev: *Evented, - cancel_region: *CancelRegion, - file: File, - header: []const u8, - data: []const []const u8, - splat: usize, -) File.Writer.Error!usize { - var iovecs: [max_iovecs_len]iovec_const = undefined; - var iovlen: iovlen_t = 0; - addBuf(&iovecs, &iovlen, header); - for (data[0 .. data.len - 1]) |bytes| addBuf(&iovecs, &iovlen, bytes); - const pattern = data[data.len - 1]; - if (iovecs.len - iovlen != 0) switch (splat) { - 0 => {}, - 1 => addBuf(&iovecs, &iovlen, pattern), - else => switch (pattern.len) { - 0 => {}, - 1 => { - var backup_buffer: [splat_buffer_size]u8 = undefined; - const splat_buffer = &backup_buffer; - const memset_len = @min(splat_buffer.len, splat); - const buf = splat_buffer[0..memset_len]; - @memset(buf, pattern[0]); - addBuf(&iovecs, &iovlen, buf); - var remaining_splat = splat - buf.len; - while (remaining_splat > splat_buffer.len and iovecs.len - iovlen != 0) { - assert(buf.len == splat_buffer.len); - addBuf(&iovecs, &iovlen, splat_buffer); - remaining_splat -= splat_buffer.len; - } - addBuf(&iovecs, &iovlen, splat_buffer[0..@min(remaining_splat, splat_buffer.len)]); - }, - else => for (0..@min(splat, iovecs.len - iovlen)) |_| { - addBuf(&iovecs, &iovlen, pattern); - }, - }, - }; - return ev.pwritev(cancel_region, file.handle, iovecs[0..iovlen], null); -} - -fn deviceIoControl( - ev: *Evented, - sync: *CancelRegion.Sync, - o: Io.Operation.DeviceIoControl, -) Io.Cancelable!i32 { - _ = ev; - while (true) { - try sync.cancel_region.await(.nothing); - const rc = linux.ioctl(o.file.handle, @bitCast(o.code), @intFromPtr(o.arg)); - switch (linux.errno(rc)) { - .SUCCESS => return @bitCast(@as(u32, @truncate(rc))), - .INTR => continue, - else => |err| return -@as(i32, @intFromEnum(err)), - } - } -} - -fn batchAwaitAsync(userdata: ?*anyopaque, batch: *Io.Batch) Io.Cancelable!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; - defer maybe_sync.deinit(ev); - ev.batchDrainSubmitted(&maybe_sync, batch, false) catch |err| switch (err) { - error.ConcurrencyUnavailable => unreachable, // passed concurrency=false - else => |e| return e, - }; - maybe_sync.leaveSync(ev); - while (true) { - batchDrainReady(batch) catch |err| switch (err) { - error.Timeout => unreachable, // no timeout - }; - if (batch.completed.head != .none) return; - ev.yield(null, .{ .batch_await = batch }); - } -} - -fn batchAwaitConcurrent( - userdata: ?*anyopaque, - batch: *Io.Batch, - timeout: Io.Timeout, -) Io.Batch.AwaitConcurrentError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; - defer maybe_sync.deinit(ev); - try ev.batchDrainSubmitted(&maybe_sync, batch, true); - maybe_sync.leaveSync(ev); - const timespec: linux.kernel_timespec, const clock: Io.Clock, const timeout_flags: u32 = while (true) { - batchDrainReady(batch) catch |err| switch (err) { - error.Timeout => unreachable, // no timeout - }; - if (batch.completed.head != .none) return; - switch (timeout) { - .none => ev.yield(null, .{ .batch_await = batch }), - .duration => |duration| { - const ns = duration.raw.toNanoseconds(); - break .{ - .{ - .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), - .nsec = @intCast(@mod(ns, std.time.ns_per_s)), - }, - duration.clock, - 0, - }; - }, - .deadline => |deadline| { - const ns = deadline.raw.toNanoseconds(); - break .{ - .{ - .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), - .nsec = @intCast(@mod(ns, std.time.ns_per_s)), - }, - deadline.clock, - linux.IORING_TIMEOUT_ABS, - }; - }, - } - }; - { - const thread = try maybe_sync.cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .TIMEOUT, - .flags = 0, - .ioprio = 0, - .fd = 0, - .off = 0, - .addr = @intFromPtr(&timespec), - .len = 1, - .rw_flags = timeout_flags | @as(u32, switch (clock) { - .real => linux.IORING_TIMEOUT_REALTIME, - else => 0, - .boot => linux.IORING_TIMEOUT_BOOTTIME, - }), - .user_data = @intFromPtr(&batch.context) | 0b11, - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - } - while (batch.completed.head == .none) { - ev.yield(null, .{ .batch_await = batch }); - batchDrainReady(batch) catch |err| switch (err) { - error.Timeout => |e| return if (batch.completed.head == .none) e, - }; - if (batch.completed.head == .none) continue; - } - const thread = try maybe_sync.cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .TIMEOUT_REMOVE, - .flags = 0, - .ioprio = 0, - .fd = 0, - .off = 0, - .addr = @intFromPtr(&batch.context) | 0b11, - .len = 0, - .rw_flags = 0, - .user_data = @intFromPtr(maybe_sync.cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (maybe_sync.cancel_region.errno()) { - .SUCCESS => return, - .BUSY, .NOENT => {}, - else => |err| unexpectedErrno(err) catch {}, - } - while (true) { - batchDrainReady(batch) catch |err| switch (err) { - error.Timeout => return, - }; - ev.yield(null, .{ .batch_await = batch }); - } -} - -/// If `concurrency` is false, `error.ConcurrencyUnavailable` is unreachable. -fn batchDrainSubmitted( - ev: *Evented, - maybe_sync: *CancelRegion.Sync.Maybe, - batch: *Io.Batch, - concurrency: bool, -) (Io.ConcurrentError || Io.Cancelable)!void { - var index = batch.submitted.head; - if (index == .none) return; - errdefer batch.submitted.head = index; - const thread = try maybe_sync.cancelRegion().awaitIoUring(); - while (index != .none) { - const storage = &batch.storage[index.toIndex()]; - const next_index = storage.submission.node.next; - if (@as(?Io.Operation.Result, result: switch (storage.submission.operation) { - .file_read_streaming => |o| { - const buffer = for (o.data) |buffer| { - if (buffer.len != 0) break buffer; - } else break :result .{ .file_read_streaming = 0 }; - const fd = o.file.handle; - storage.* = .{ .pending = .{ - .node = .{ .prev = batch.pending.tail, .next = .none }, - .tag = .file_read_streaming, - .context = undefined, - } }; - thread.enqueue().* = .{ - .opcode = .READ, - .flags = 0, - .ioprio = 0, - .fd = fd, - .off = std.math.maxInt(u64), - .addr = @intFromPtr(buffer.ptr), - .len = @min(buffer.len, 0xfffff000), - .rw_flags = 0, - .user_data = @intFromPtr(&storage.pending.context) | 0b10, - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - break :result null; - }, - .file_write_streaming => |o| { - const buffer = buffer: { - if (o.header.len != 0) break :buffer o.header; - for (o.data[0 .. o.data.len - 1]) |buffer| { - if (buffer.len != 0) break :buffer buffer; - } - if (o.splat > 0) break :buffer o.data[o.data.len - 1]; - break :result .{ .file_write_streaming = 0 }; - }; - const fd = o.file.handle; - storage.* = .{ .pending = .{ - .node = .{ .prev = batch.pending.tail, .next = .none }, - .tag = .file_write_streaming, - .context = undefined, - } }; - thread.enqueue().* = .{ - .opcode = .WRITE, - .flags = 0, - .ioprio = 0, - .fd = fd, - .off = std.math.maxInt(u64), - .addr = @intFromPtr(buffer.ptr), - .len = @min(buffer.len, 0xfffff000), - .rw_flags = 0, - .user_data = @intFromPtr(&storage.pending.context) | 0b10, - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - break :result null; - }, - .device_io_control => |o| if (concurrency) - return error.ConcurrencyUnavailable - else - .{ .device_io_control = try ev.deviceIoControl(try maybe_sync.enterSync(ev), o) }, - })) |result| { - switch (batch.completed.tail) { - .none => batch.completed.head = index, - else => |tail_index| batch.storage[tail_index.toIndex()].completion.node.next = index, - } - batch.completed.tail = index; - storage.* = .{ .completion = .{ .node = .{ .next = .none }, .result = result } }; - } else { - switch (batch.pending.tail) { - .none => batch.pending.head = index, - else => |tail_index| batch.storage[tail_index.toIndex()].pending.node.next = index, - } - batch.pending.tail = index; - storage.pending.context[0] = @intFromPtr(batch); - } - index = next_index; - } - batch.submitted = .{ .head = .none, .tail = .none }; -} - -fn batchDrainReady(batch: *Io.Batch) Io.Timeout.Error!void { - while (@atomicRmw(?*anyopaque, &batch.context, .Xchg, null, .acquire)) |head| { - var next: usize = @intFromPtr(head); - var timeout = false; - while (cond: switch (@as(u2, @truncate(next))) { - 0b00 => if (timeout) return error.Timeout else false, - 0b01 => { - assert(!timeout); - return error.Timeout; - }, - 0b10 => true, - 0b11 => { - assert(!timeout); - timeout = true; - break :cond true; - }, - }) { - var context: *Io.Operation.Storage.Pending.Context = @ptrFromInt(next & ~@as(usize, 0b11)); - next = context[0]; - const completion: Completion = .{ - .result = @bitCast(@as(u32, @intCast(context[1]))), - .flags = @intCast(context[2]), - }; - const pending: *Io.Operation.Storage.Pending = @fieldParentPtr("context", context); - const storage: *Io.Operation.Storage = @fieldParentPtr("pending", pending); - const index: Io.Operation.OptionalIndex = .fromIndex(storage - batch.storage.ptr); - assert(completion.flags & linux.IORING_CQE_F_SKIP == 0); - switch (pending.node.prev) { - .none => batch.pending.head = pending.node.next, - else => |prev_index| batch.storage[prev_index.toIndex()].pending.node.next = - pending.node.next, - } - switch (pending.node.next) { - .none => batch.pending.tail = pending.node.prev, - else => |prev_index| batch.storage[prev_index.toIndex()].pending.node.prev = - pending.node.prev, - } - if (@as(?Io.Operation.Result, result: switch (pending.tag) { - .file_read_streaming => .{ - .file_read_streaming = switch (completion.errno()) { - .SUCCESS => @as(u32, @bitCast(completion.result)), - .INTR => 0, - .CANCELED => break :result null, - .INVAL => |err| errnoBug(err), - .FAULT => |err| errnoBug(err), - .AGAIN => error.WouldBlock, - .BADF => |err| errnoBug(err), // File descriptor used after closed - .IO => error.InputOutput, - .ISDIR => error.IsDir, - .NOBUFS => error.SystemResources, - .NOMEM => error.SystemResources, - .NOTCONN => error.SocketUnconnected, - .CONNRESET => error.ConnectionResetByPeer, - else => |err| unexpectedErrno(err), - }, - }, - .file_write_streaming => .{ - .file_write_streaming = switch (completion.errno()) { - .SUCCESS => @as(u32, @bitCast(completion.result)), - .INTR => 0, - .CANCELED => break :result null, - .INVAL => |err| errnoBug(err), - .FAULT => |err| errnoBug(err), - .AGAIN => error.WouldBlock, - .BADF => error.NotOpenForWriting, // Can be a race condition. - .DESTADDRREQ => |err| errnoBug(err), // `connect` was never called. - .DQUOT => error.DiskQuota, - .FBIG => error.FileTooBig, - .IO => error.InputOutput, - .NOSPC => error.NoSpaceLeft, - .PERM => error.PermissionDenied, - .PIPE => error.BrokenPipe, - .CONNRESET => |err| errnoBug(err), // Not a socket handle. - .BUSY => error.DeviceBusy, - else => |err| unexpectedErrno(err), - }, - }, - .device_io_control => unreachable, - })) |result| { - switch (batch.completed.tail) { - .none => batch.completed.head = index, - else => |tail_index| batch.storage[tail_index.toIndex()].completion.node.next = - index, - } - storage.* = .{ .completion = .{ .node = .{ .next = .none }, .result = result } }; - batch.completed.tail = index; - } else { - switch (batch.unused.tail) { - .none => batch.unused.head = index, - else => |tail_index| batch.storage[tail_index.toIndex()].unused.next = index, - } - storage.* = .{ .unused = .{ .prev = batch.unused.tail, .next = .none } }; - batch.unused.tail = index; - } - } - } -} - -fn batchCancel(userdata: ?*anyopaque, batch: *Io.Batch) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - batchDrainReady(batch) catch |err| switch (err) { - error.Timeout => unreachable, // no timeout - }; - var index = batch.pending.head; - if (index == .none) return; - var cancel_region: CancelRegion = .initBlocked(); - defer cancel_region.deinit(); - const thread = cancel_region.awaitIoUring() catch |err| switch (err) { - error.Canceled => unreachable, // blocked - }; - while (index != .none) { - const pending = &batch.storage[index.toIndex()].pending; - thread.enqueue().* = .{ - .opcode = .ASYNC_CANCEL, - .flags = linux.IOSQE_CQE_SKIP_SUCCESS, - .ioprio = 0, - .fd = 0, - .off = 0, - .addr = @intFromPtr(&pending.context) | 0b10, - .len = 0, - .rw_flags = 0, - .user_data = @intFromEnum(Completion.UserData.wakeup), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - index = pending.node.next; - } - while (batch.pending.head != .none) batchDrainReady(batch) catch |err| switch (err) { - error.Timeout => unreachable, // no timeout - }; -} - -fn dirCreateDir( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - permissions: Dir.Permissions, -) Dir.CreateDirError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .MKDIRAT, - .flags = 0, - .ioprio = 0, - .fd = dir.handle, - .off = 0, - .addr = @intFromPtr(sub_path_posix.ptr), - .len = permissions.toMode(), - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .ACCES => return error.AccessDenied, - .BADF => |err| return errnoBug(err), // File descriptor used after closed. - .PERM => return error.PermissionDenied, - .DQUOT => return error.DiskQuota, - .EXIST => return error.PathAlreadyExists, - .FAULT => |err| return errnoBug(err), - .LOOP => return error.SymLinkLoop, - .MLINK => return error.LinkQuotaExceeded, - .NAMETOOLONG => return error.NameTooLong, - .NOENT => return error.FileNotFound, - .NOMEM => return error.SystemResources, - .NOSPC => return error.NoSpaceLeft, - .NOTDIR => return error.NotDir, - .ROFS => return error.ReadOnlyFileSystem, - // dragonfly: when dir_fd is unlinked from filesystem - .NOTCONN => return error.FileNotFound, - .ILSEQ => return error.BadPathName, - else => |err| return unexpectedErrno(err), - } - } -} - -fn dirCreateDirPath( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - permissions: Dir.Permissions, -) Dir.CreateDirPathError!Dir.CreatePathStatus { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var it = Dir.path.componentIterator(sub_path); - var status: Dir.CreatePathStatus = .existed; - var component = it.last() orelse return error.BadPathName; - while (true) { - if (dirCreateDir(ev, dir, component.path, permissions)) |_| { - status = .created; - } else |err| switch (err) { - error.PathAlreadyExists => { - // stat the file and return an error if it's not a directory - // this is important because otherwise a dangling symlink - // could cause an infinite loop - const fstat = try dirStatFile(ev, dir, component.path, .{}); - if (fstat.kind != .directory) return error.NotDir; - }, - error.FileNotFound => |e| { - component = it.previous() orelse return e; - continue; - }, - else => |e| return e, - } - component = it.next() orelse return status; - } -} - -fn dirCreateDirPathOpen( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - permissions: Dir.Permissions, - options: Dir.OpenOptions, -) Dir.CreateDirPathOpenError!Dir { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - return dirOpenDir(ev, dir, sub_path, options) catch |err| switch (err) { - error.FileNotFound => { - _ = try dirCreateDirPath(ev, dir, sub_path, permissions); - return dirOpenDir(ev, dir, sub_path, options); - }, - else => |e| return e, - }; -} - -fn dirOpenDir( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - options: Dir.OpenOptions, -) Dir.OpenError!Dir { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - return .{ - .handle = ev.openat(&cancel_region, dir.handle, sub_path_posix, .{ - .ACCMODE = .RDONLY, - .DIRECTORY = true, - .NOFOLLOW = !options.follow_symlinks, - .CLOEXEC = true, - .PATH = !options.iterate, - }, 0) catch |err| switch (err) { - error.IsDir => return errnoBug(.ISDIR), - error.WouldBlock => return errnoBug(.AGAIN), - error.FileTooBig => return errnoBug(.FBIG), - error.NoSpaceLeft => return errnoBug(.NOSPC), - error.DeviceBusy => return errnoBug(.BUSY), // O_EXCL not passed - error.FileBusy => return errnoBug(.TXTBSY), - error.PathAlreadyExists => return errnoBug(.EXIST), // Not creating. - error.PipeBusy => return error.Unexpected, // Not opening a pipe. - error.AntivirusInterference => unreachable, // Windows-only - error.FileLocksUnsupported => return errnoBug(.OPNOTSUPP), // Not asking for locks. - else => |e| return e, - }, - }; -} - -fn dirStat(userdata: ?*anyopaque, dir: Dir) Dir.StatError!Dir.Stat { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - return ev.stat(&cancel_region, dir.handle); -} - -fn dirStatFile( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - options: Dir.StatFileOptions, -) Dir.StatFileError!File.Stat { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - return ev.statx(&cancel_region, dir.handle, sub_path_posix, linux.AT.NO_AUTOMOUNT | - @as(u32, if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW)); -} - -fn dirAccess( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - options: Dir.AccessOptions, -) Dir.AccessError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - - const mode: u32 = - @as(u32, if (options.read) linux.R_OK else 0) | - @as(u32, if (options.write) linux.W_OK else 0) | - @as(u32, if (options.execute) linux.X_OK else 0); - const flags: u32 = if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW; - - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - while (true) { - try sync.cancel_region.await(.nothing); - switch (linux.errno(linux.faccessat(dir.handle, sub_path_posix, mode, flags))) { - .SUCCESS => return, - .INTR => continue, - .ACCES => return error.AccessDenied, - .PERM => return error.PermissionDenied, - .ROFS => return error.ReadOnlyFileSystem, - .LOOP => return error.SymLinkLoop, - .TXTBSY => return error.FileBusy, - .NOTDIR => return error.FileNotFound, - .NOENT => return error.FileNotFound, - .NAMETOOLONG => return error.NameTooLong, - .INVAL => |err| return errnoBug(err), - .FAULT => |err| return errnoBug(err), - .IO => return error.InputOutput, - .NOMEM => return error.SystemResources, - .ILSEQ => return error.BadPathName, - else => |err| return unexpectedErrno(err), - } - } -} - -fn dirCreateFile( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - flags: File.CreateFlags, -) File.OpenError!File { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - - var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; - defer maybe_sync.deinit(ev); - const fd = try ev.openat(&maybe_sync.cancel_region, dir.handle, sub_path_posix, .{ - .ACCMODE = if (flags.read) .RDWR else .WRONLY, - .CREAT = true, - .TRUNC = flags.truncate, - .EXCL = flags.exclusive, - .CLOEXEC = true, - }, flags.permissions.toMode()); - errdefer ev.close(maybe_sync.cancelRegion(), fd); - - switch (flags.lock) { - .none => {}, - .shared, .exclusive => try ev.flock( - try maybe_sync.enterSync(ev), - fd, - flags.lock, - if (flags.lock_nonblocking) .nonblocking else .blocking, - ), - } - - return .{ .handle = fd, .flags = .{ .nonblocking = false } }; -} - -fn dirCreateFileAtomic( - userdata: ?*anyopaque, - dir: Dir, - dest_path: []const u8, - options: Dir.CreateFileAtomicOptions, -) Dir.CreateFileAtomicError!File.Atomic { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - // Linux has O_TMPFILE, but linkat() does not support AT_REPLACE, so it's - // useless when we have to make up a bogus path name to do the rename() - // anyway. - if (!options.replace) tmpfile: { - const flags: linux.O = if (@hasField(linux.O, "TMPFILE")) .{ - .ACCMODE = .RDWR, - .TMPFILE = true, - .DIRECTORY = true, - .CLOEXEC = true, - } else if (@hasField(linux.O, "TMPFILE0") and !@hasField(linux.O, "TMPFILE2")) .{ - .ACCMODE = .RDWR, - .TMPFILE0 = true, - .TMPFILE1 = true, - .DIRECTORY = true, - .CLOEXEC = true, - } else break :tmpfile; - - const dest_dirname = Dir.path.dirname(dest_path); - if (dest_dirname) |dirname| { - // This has a nice side effect of preemptively triggering EISDIR or - // ENOENT, avoiding the ambiguity below. - _ = dirCreateDirPath(ev, dir, dirname, .default_dir) catch |err| switch (err) { - // None of these make sense in this context. - error.IsDir, - error.Streaming, - error.DiskQuota, - error.PathAlreadyExists, - error.LinkQuotaExceeded, - error.PipeBusy, - error.FileTooBig, - error.DeviceBusy, - error.FileLocksUnsupported, - error.FileBusy, - => return error.Unexpected, - - else => |e| return e, - }; - } - - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(dest_dirname orelse ".", &path_buffer); - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - return .{ - .file = .{ - .handle = ev.openat( - &cancel_region, - dir.handle, - sub_path_posix, - flags, - options.permissions.toMode(), - ) catch |err| switch (err) { - error.IsDir, error.FileNotFound => { - // Ambiguous error code. It might mean the file system - // does not support O_TMPFILE. Therefore, we must fall - // back to not using O_TMPFILE. - break :tmpfile; - }, - error.FileTooBig => return errnoBug(.FBIG), - error.DeviceBusy => return errnoBug(.BUSY), // O_EXCL not passed - error.PathAlreadyExists => return errnoBug(.EXIST), // Not creating. - error.PipeBusy => return error.Unexpected, // Not opening a pipe. - error.AntivirusInterference => unreachable, // Windows-only - error.FileLocksUnsupported => return errnoBug(.OPNOTSUPP), // Not asking for locks. - else => |e| return e, - }, - .flags = .{ .nonblocking = false }, - }, - .file_basename_hex = 0, - .dest_sub_path = dest_path, - .file_open = true, - .file_exists = false, - .close_dir_on_deinit = false, - .dir = dir, - }; - } - - if (Dir.path.dirname(dest_path)) |dirname| { - const new_dir = if (options.make_path) - dirCreateDirPathOpen(ev, dir, dirname, .default_dir, .{}) catch |err| switch (err) { - // None of these make sense in this context. - error.IsDir, - error.Streaming, - error.DiskQuota, - error.PathAlreadyExists, - error.LinkQuotaExceeded, - error.PipeBusy, - error.FileTooBig, - error.FileLocksUnsupported, - error.DeviceBusy, - => return error.Unexpected, - - else => |e| return e, - } - else - try dirOpenDir(ev, dir, dirname, .{}); - - return ev.atomicFileInit(Dir.path.basename(dest_path), options.permissions, new_dir, true); - } - - return ev.atomicFileInit(dest_path, options.permissions, dir, false); -} - -fn atomicFileInit( - ev: *Evented, - dest_basename: []const u8, - permissions: File.Permissions, - dir: Dir, - close_dir_on_deinit: bool, -) Dir.CreateFileAtomicError!File.Atomic { - while (true) { - var random_integer: u64 = undefined; - random(ev, @ptrCast(&random_integer)); - const tmp_sub_path = std.fmt.hex(random_integer); - const file = dirCreateFile(ev, dir, &tmp_sub_path, .{ - .permissions = permissions, - .exclusive = true, - }) catch |err| switch (err) { - error.PathAlreadyExists => continue, - error.DeviceBusy => continue, - error.FileBusy => continue, - - error.IsDir => return error.Unexpected, // No path components. - error.FileTooBig => return error.Unexpected, // Creating, not opening. - error.FileLocksUnsupported => return error.Unexpected, // Not asking for locks. - error.PipeBusy => return error.Unexpected, // Not opening a pipe. - - else => |e| return e, - }; - return .{ - .file = file, - .file_basename_hex = random_integer, - .dest_sub_path = dest_basename, - .file_open = true, - .file_exists = true, - .close_dir_on_deinit = close_dir_on_deinit, - .dir = dir, - }; - } -} - -fn dirOpenFile( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - flags: File.OpenFlags, -) File.OpenError!File { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - - var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; - defer maybe_sync.deinit(ev); - const fd = try ev.openat(&maybe_sync.cancel_region, dir.handle, sub_path_posix, .{ - .ACCMODE = switch (flags.mode) { - .read_only => .RDONLY, - .write_only => .WRONLY, - .read_write => .RDWR, - }, - .NOCTTY = !flags.allow_ctty, - .NOFOLLOW = !flags.follow_symlinks, - .CLOEXEC = true, - .PATH = flags.path_only, - }, 0); - errdefer ev.close(maybe_sync.cancelRegion(), fd); - - if (!flags.allow_directory) { - const is_dir = is_dir: { - const s = ev.stat(&maybe_sync.cancel_region, fd) catch |err| switch (err) { - // The directory-ness is either unknown or unknowable - error.Streaming => break :is_dir false, - else => |e| return e, - }; - break :is_dir s.kind == .directory; - }; - if (is_dir) return error.IsDir; - } - - switch (flags.lock) { - .none => {}, - .shared, .exclusive => try ev.flock( - try maybe_sync.enterSync(ev), - fd, - flags.lock, - if (flags.lock_nonblocking) .nonblocking else .blocking, - ), - } - - return .{ .handle = fd, .flags = .{ .nonblocking = false } }; -} - -fn dirClose(userdata: ?*anyopaque, dirs: []const Dir) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - for (dirs) |dir| ev.close(&cancel_region, dir.handle); -} - -fn dirRead(userdata: ?*anyopaque, dr: *Dir.Reader, buffer: []Dir.Entry) Dir.Reader.Error!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var buffer_index: usize = 0; - while (buffer.len - buffer_index != 0) { - if (dr.end - dr.index == 0) { - // Refill the buffer, unless we've already created references to - // buffered data. - if (buffer_index != 0) break; - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - if (dr.state == .reset) { - ev.lseek(&sync, dr.dir.handle, 0, linux.SEEK.SET) catch |err| switch (err) { - error.Unseekable => return error.Unexpected, - else => |e| return e, - }; - dr.state = .reading; - } - const n = while (true) { - try sync.cancel_region.await(.nothing); - const rc = linux.getdents64(dr.dir.handle, dr.buffer.ptr, dr.buffer.len); - switch (linux.errno(rc)) { - .SUCCESS => break rc, - .INTR => continue, - .BADF => |err| return errnoBug(err), // Dir is invalid or was opened without iteration ability. - .FAULT => |err| return errnoBug(err), - .NOTDIR => |err| return errnoBug(err), - // To be consistent across platforms, iteration - // ends if the directory being iterated is deleted - // during iteration. This matches the behavior of - // non-Linux, non-WASI UNIX platforms. - .NOENT => { - dr.state = .finished; - return 0; - }, - // This can occur when reading /proc/$PID/net, or - // if the provided buffer is too small. Neither - // scenario is intended to be handled by this API. - .INVAL => return error.Unexpected, - .ACCES => return error.AccessDenied, // Lacking permission to iterate this directory. - else => |err| return unexpectedErrno(err), - } - }; - if (n == 0) { - dr.state = .finished; - return 0; - } - dr.index = 0; - dr.end = n; - } - // Linux aligns the header by padding after the null byte of the name - // to align the next entry. This means we can find the end of the name - // by looking at only the 8 bytes before the next record. However since - // file names are usually short it's better to keep the machine code - // simpler. - // - // Furthermore, I observed qemu user mode to not align this struct, so - // this code makes the conservative choice to not assume alignment. - const linux_entry: *align(1) linux.dirent64 = @ptrCast(&dr.buffer[dr.index]); - const next_index = dr.index + linux_entry.reclen; - dr.index = next_index; - const name_ptr: [*]u8 = &linux_entry.name; - const padded_name = name_ptr[0 .. linux_entry.reclen - @offsetOf(linux.dirent64, "name")]; - const name_len = std.mem.findScalar(u8, padded_name, 0).?; - const name = name_ptr[0..name_len :0]; - - if (std.mem.eql(u8, name, ".") or std.mem.eql(u8, name, "..")) continue; - - const entry_kind: File.Kind = switch (linux_entry.type) { - linux.DT.BLK => .block_device, - linux.DT.CHR => .character_device, - linux.DT.DIR => .directory, - linux.DT.FIFO => .named_pipe, - linux.DT.LNK => .sym_link, - linux.DT.REG => .file, - linux.DT.SOCK => .unix_domain_socket, - else => .unknown, - }; - buffer[buffer_index] = .{ - .name = name, - .kind = entry_kind, - .inode = linux_entry.ino, - }; - buffer_index += 1; - } - return buffer_index; -} - -fn dirRealPath(userdata: ?*anyopaque, dir: Dir, out_buffer: []u8) Dir.RealPathError!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - return ev.realPath(&sync, dir.handle, out_buffer); -} - -fn dirRealPathFile( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - out_buffer: []u8, -) Dir.RealPathFileError!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - - var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; - defer maybe_sync.deinit(ev); - const fd = ev.openat(&maybe_sync.cancel_region, dir.handle, sub_path_posix, .{ - .CLOEXEC = true, - .PATH = true, - }, 0) catch |err| switch (err) { - error.WouldBlock => return errnoBug(.AGAIN), - error.FileLocksUnsupported => return errnoBug(.OPNOTSUPP), // Not asking for locks. - else => |e| return e, - }; - defer ev.close(maybe_sync.cancelRegion(), fd); - return ev.realPath(try maybe_sync.enterSync(ev), fd, out_buffer); -} - -fn dirDeleteFile(userdata: ?*anyopaque, dir: Dir, sub_path: []const u8) Dir.DeleteFileError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .UNLINKAT, - .flags = 0, - .ioprio = 0, - .fd = dir.handle, - .off = 0, - .addr = @intFromPtr(sub_path_posix.ptr), - .len = 0, - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .PERM => return error.PermissionDenied, - .ACCES => return error.AccessDenied, - .BUSY => return error.FileBusy, - .FAULT => |err| return errnoBug(err), - .IO => return error.FileSystem, - .ISDIR => return error.IsDir, - .LOOP => return error.SymLinkLoop, - .NAMETOOLONG => return error.NameTooLong, - .NOENT => return error.FileNotFound, - .NOTDIR => return error.NotDir, - .NOMEM => return error.SystemResources, - .ROFS => return error.ReadOnlyFileSystem, - .EXIST => |err| return errnoBug(err), - .NOTEMPTY => |err| return errnoBug(err), // Not passing AT.REMOVEDIR - .ILSEQ => return error.BadPathName, - .INVAL => |err| return errnoBug(err), // invalid flags, or pathname has . as last component - .BADF => |err| return errnoBug(err), // File descriptor used after closed. - else => |err| return unexpectedErrno(err), - } - } -} - -fn dirDeleteDir(userdata: ?*anyopaque, dir: Dir, sub_path: []const u8) Dir.DeleteDirError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .UNLINKAT, - .flags = 0, - .ioprio = 0, - .fd = dir.handle, - .off = 0, - .addr = @intFromPtr(sub_path_posix.ptr), - .len = 0, - .rw_flags = linux.AT.REMOVEDIR, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .ACCES => return error.AccessDenied, - .PERM => return error.PermissionDenied, - .BUSY => return error.FileBusy, - .FAULT => |err| return errnoBug(err), - .IO => return error.FileSystem, - .ISDIR => |err| return errnoBug(err), - .LOOP => return error.SymLinkLoop, - .NAMETOOLONG => return error.NameTooLong, - .NOENT => return error.FileNotFound, - .NOTDIR => return error.NotDir, - .NOMEM => return error.SystemResources, - .ROFS => return error.ReadOnlyFileSystem, - .EXIST => |err| return errnoBug(err), - .NOTEMPTY => return error.DirNotEmpty, - .ILSEQ => return error.BadPathName, - .INVAL => |err| return errnoBug(err), // invalid flags, or pathname has . as last component - .BADF => |err| return errnoBug(err), // File descriptor used after closed. - else => |err| return unexpectedErrno(err), - } - } -} - -fn dirRename( - userdata: ?*anyopaque, - old_dir: Dir, - old_sub_path: []const u8, - new_dir: Dir, - new_sub_path: []const u8, -) Dir.RenameError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var old_path_buffer: [PATH_MAX]u8 = undefined; - var new_path_buffer: [PATH_MAX]u8 = undefined; - - const old_sub_path_posix = try pathToPosix(old_sub_path, &old_path_buffer); - const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - return ev.renameat( - &cancel_region, - old_dir.handle, - old_sub_path_posix, - new_dir.handle, - new_sub_path_posix, - .{}, - ); -} - -fn dirRenamePreserve( - userdata: ?*anyopaque, - old_dir: Dir, - old_sub_path: []const u8, - new_dir: Dir, - new_sub_path: []const u8, -) Dir.RenamePreserveError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var old_path_buffer: [PATH_MAX]u8 = undefined; - var new_path_buffer: [PATH_MAX]u8 = undefined; - - const old_sub_path_posix = try pathToPosix(old_sub_path, &old_path_buffer); - const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - return ev.renameat( - &cancel_region, - old_dir.handle, - old_sub_path_posix, - new_dir.handle, - new_sub_path_posix, - .{ .NOREPLACE = true }, - ); -} - -fn dirSymLink( - userdata: ?*anyopaque, - dir: Dir, - target_path: []const u8, - sym_link_path: []const u8, - flags: Dir.SymLinkFlags, -) Dir.SymLinkError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = flags; - - var target_path_buffer: [PATH_MAX]u8 = undefined; - var sym_link_path_buffer: [PATH_MAX]u8 = undefined; - - const target_path_posix = try pathToPosix(target_path, &target_path_buffer); - const sym_link_path_posix = try pathToPosix(sym_link_path, &sym_link_path_buffer); - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .SYMLINKAT, - .flags = 0, - .ioprio = 0, - .fd = dir.handle, - .off = @intFromPtr(sym_link_path_posix.ptr), - .addr = @intFromPtr(target_path_posix.ptr), - .len = 0, - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .FAULT => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), - .ACCES => return error.AccessDenied, - .PERM => return error.PermissionDenied, - .DQUOT => return error.DiskQuota, - .EXIST => return error.PathAlreadyExists, - .IO => return error.FileSystem, - .LOOP => return error.SymLinkLoop, - .NAMETOOLONG => return error.NameTooLong, - .NOENT => return error.FileNotFound, - .NOTDIR => return error.NotDir, - .NOMEM => return error.SystemResources, - .NOSPC => return error.NoSpaceLeft, - .ROFS => return error.ReadOnlyFileSystem, - .ILSEQ => return error.BadPathName, - else => |err| return unexpectedErrno(err), - } - } -} - -fn dirReadLink( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - buffer: []u8, -) Dir.ReadLinkError!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var sub_path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &sub_path_buffer); - - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - while (true) { - try sync.cancel_region.await(.nothing); - const rc = linux.readlinkat(dir.handle, sub_path_posix, buffer.ptr, buffer.len); - switch (linux.errno(rc)) { - .SUCCESS => { - const len: usize = @bitCast(rc); - return len; - }, - .INTR => continue, - .ACCES => return error.AccessDenied, - .FAULT => |err| return errnoBug(err), - .INVAL => return error.NotLink, - .IO => return error.FileSystem, - .LOOP => return error.SymLinkLoop, - .NAMETOOLONG => return error.NameTooLong, - .NOENT => return error.FileNotFound, - .NOMEM => return error.SystemResources, - .NOTDIR => return error.NotDir, - .ILSEQ => return error.BadPathName, - else => |err| return unexpectedErrno(err), - } - } -} - -fn dirSetOwner( - userdata: ?*anyopaque, - dir: Dir, - owner: ?File.Uid, - group: ?File.Gid, -) Dir.SetOwnerError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - try ev.fchownat( - &sync, - dir.handle, - "", - owner orelse std.math.maxInt(linux.uid_t), - group orelse std.math.maxInt(linux.gid_t), - linux.AT.EMPTY_PATH, - ); -} - -fn dirSetFileOwner( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - owner: ?File.Uid, - group: ?File.Gid, - options: Dir.SetFileOwnerOptions, -) Dir.SetFileOwnerError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - try ev.fchownat( - &sync, - dir.handle, - sub_path_posix, - owner orelse std.math.maxInt(linux.uid_t), - group orelse std.math.maxInt(linux.gid_t), - if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW, - ); -} - -fn dirSetPermissions( - userdata: ?*anyopaque, - dir: Dir, - permissions: Dir.Permissions, -) Dir.SetPermissionsError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - ev.fchmodat( - &sync, - dir.handle, - "", - permissions.toMode(), - linux.AT.EMPTY_PATH, - ) catch |err| switch (err) { - error.NameTooLong => return errnoBug(.NAMETOOLONG), - error.BadPathName => return errnoBug(.ILSEQ), - error.ProcessFdQuotaExceeded => return errnoBug(.MFILE), - error.SystemFdQuotaExceeded => return errnoBug(.NFILE), - error.OperationUnsupported => return errnoBug(.OPNOTSUPP), - else => |e| return e, - }; -} - -fn dirSetFilePermissions( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - permissions: Dir.Permissions, - options: Dir.SetFilePermissionsOptions, -) Dir.SetFilePermissionsError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - try ev.fchmodat( - &sync, - dir.handle, - sub_path_posix, - permissions.toMode(), - if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW, - ); -} - -fn dirSetTimestamps( - userdata: ?*anyopaque, - dir: Dir, - sub_path: []const u8, - options: Dir.SetTimestampsOptions, -) Dir.SetTimestampsError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var path_buffer: [PATH_MAX]u8 = undefined; - const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - var cancel_region: CancelRegion.Sync = try .init(ev); - defer cancel_region.deinit(ev); - try ev.utimensat( - &cancel_region, - dir.handle, - sub_path_posix, - if (options.modify_timestamp != .now or options.access_timestamp != .now) &.{ - setTimestampToPosix(options.access_timestamp), - setTimestampToPosix(options.modify_timestamp), - } else null, - if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW, - ); -} - -fn dirHardLink( - userdata: ?*anyopaque, - old_dir: Dir, - old_sub_path: []const u8, - new_dir: Dir, - new_sub_path: []const u8, - options: Dir.HardLinkOptions, -) Dir.HardLinkError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var old_path_buffer: [PATH_MAX]u8 = undefined; - var new_path_buffer: [PATH_MAX]u8 = undefined; - - const old_sub_path_posix = try pathToPosix(old_sub_path, &old_path_buffer); - const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - return ev.linkat( - &cancel_region, - old_dir.handle, - old_sub_path_posix, - new_dir.handle, - new_sub_path_posix, - if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW, - ); -} - -fn fileStat(userdata: ?*anyopaque, file: File) File.StatError!File.Stat { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - return ev.stat(&cancel_region, file.handle); -} - -fn fileLength(userdata: ?*anyopaque, file: File) File.LengthError!u64 { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - while (true) { - var statx_buf = std.mem.zeroes(linux.Statx); - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .STATX, - .flags = 0, - .ioprio = 0, - .fd = file.handle, - .off = @intFromPtr(&statx_buf), - .addr = @intFromPtr(""), - .len = @bitCast(linux.STATX{ .SIZE = true }), - .rw_flags = linux.AT.EMPTY_PATH, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => { - if (!statx_buf.mask.SIZE) return error.Unexpected; - return statx_buf.size; - }, - .INTR, .CANCELED => continue, - .ACCES => |err| return errnoBug(err), - .BADF => |err| return errnoBug(err), // File descriptor used after closed. - .FAULT => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), - .LOOP => |err| return errnoBug(err), - .NAMETOOLONG => |err| return errnoBug(err), - .NOENT => |err| return errnoBug(err), - .NOMEM => return error.SystemResources, - .NOTDIR => |err| return errnoBug(err), - else => |err| return unexpectedErrno(err), - } - } -} - -fn fileClose(userdata: ?*anyopaque, files: []const File) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - for (files) |file| ev.close(&cancel_region, file.handle); -} - -fn fileWritePositional( - userdata: ?*anyopaque, - file: File, - header: []const u8, - data: []const []const u8, - splat: usize, - offset: u64, -) File.WritePositionalError!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var iovecs: [max_iovecs_len]iovec_const = undefined; - var iovlen: iovlen_t = 0; - addBuf(&iovecs, &iovlen, header); - for (data[0 .. data.len - 1]) |bytes| addBuf(&iovecs, &iovlen, bytes); - const pattern = data[data.len - 1]; - if (iovecs.len - iovlen != 0) switch (splat) { - 0 => {}, - 1 => addBuf(&iovecs, &iovlen, pattern), - else => switch (pattern.len) { - 0 => {}, - 1 => { - var backup_buffer: [splat_buffer_size]u8 = undefined; - const splat_buffer = &backup_buffer; - const memset_len = @min(splat_buffer.len, splat); - const buf = splat_buffer[0..memset_len]; - @memset(buf, pattern[0]); - addBuf(&iovecs, &iovlen, buf); - var remaining_splat = splat - buf.len; - while (remaining_splat > splat_buffer.len and iovecs.len - iovlen != 0) { - assert(buf.len == splat_buffer.len); - addBuf(&iovecs, &iovlen, splat_buffer); - remaining_splat -= splat_buffer.len; - } - addBuf(&iovecs, &iovlen, splat_buffer[0..@min(remaining_splat, splat_buffer.len)]); - }, - else => for (0..@min(splat, iovecs.len - iovlen)) |_| { - addBuf(&iovecs, &iovlen, pattern); - }, - }, - }; - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - return ev.pwritev(&cancel_region, file.handle, iovecs[0..iovlen], offset); -} - -/// This is either usize or u32. Since, either is fine, let's use the same -/// `addBuf` function for both writing to a file and sending network messages. -const iovlen_t = @FieldType(linux.msghdr_const, "iovlen"); - -fn addBuf(v: []iovec_const, i: *iovlen_t, bytes: []const u8) void { - // OS checks ptr addr before length so zero length vectors must be omitted. - if (bytes.len == 0) return; - if (v.len - i.* == 0) return; - v[i.*] = .{ .base = bytes.ptr, .len = bytes.len }; - i.* += 1; -} - -fn fileWriteFileStreaming( - userdata: ?*anyopaque, - file: File, - header: []const u8, - file_reader: *File.Reader, - limit: Io.Limit, -) File.Writer.WriteFileError!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = file; - _ = header; - _ = file_reader; - _ = limit; - return error.Unimplemented; -} - -fn fileWriteFilePositional( - userdata: ?*anyopaque, - file: File, - header: []const u8, - file_reader: *File.Reader, - limit: Io.Limit, - offset: u64, -) File.WriteFilePositionalError!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = file; - _ = header; - _ = file_reader; - _ = limit; - _ = offset; - return error.Unimplemented; -} - -fn fileReadPositional( - userdata: ?*anyopaque, - file: File, - data: []const []u8, - offset: u64, -) File.ReadPositionalError!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var iovecs_buffer: [max_iovecs_len]iovec = undefined; - var i: usize = 0; - for (data) |buf| { - if (iovecs_buffer.len - i == 0) break; - if (buf.len != 0) { - iovecs_buffer[i] = .{ .base = buf.ptr, .len = buf.len }; - i += 1; - } - } - if (i == 0) return 0; - const dest = iovecs_buffer[0..i]; - assert(dest[0].len > 0); - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - return ev.preadv(&cancel_region, file.handle, dest, offset) catch |err| switch (err) { - error.SocketUnconnected => errnoBug(.NOTCONN), // not a socket - error.ConnectionResetByPeer => errnoBug(.CONNRESET), // not a socket - else => |e| e, - }; -} - -fn fileSeekBy(userdata: ?*anyopaque, file: File, offset: i64) File.SeekError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - try ev.lseek(&sync, file.handle, @bitCast(offset), linux.SEEK.CUR); -} - -fn fileSeekTo(userdata: ?*anyopaque, file: File, offset: u64) File.SeekError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - try ev.lseek(&sync, file.handle, offset, linux.SEEK.SET); -} - -fn fileSync(userdata: ?*anyopaque, file: File) File.SyncError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .FSYNC, - .flags = 0, - .ioprio = 0, - .fd = file.handle, - .off = 0, - .addr = 0, - .len = 0, - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .BADF => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), - .ROFS => |err| return errnoBug(err), - .IO => return error.InputOutput, - .NOSPC => return error.NoSpaceLeft, - .DQUOT => return error.DiskQuota, - else => |err| return unexpectedErrno(err), - } - } -} - -fn fileIsTty(userdata: ?*anyopaque, file: File) Io.Cancelable!bool { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - while (true) { - try sync.cancel_region.await(.nothing); - var wsz: winsize = undefined; - const rc = linux.ioctl(file.handle, linux.T.IOCGWINSZ, @intFromPtr(&wsz)); - switch (linux.errno(rc)) { - .SUCCESS => return true, - .INTR => continue, - else => return false, - } - } -} - -fn fileEnableAnsiEscapeCodes(userdata: ?*anyopaque, file: File) File.EnableAnsiEscapeCodesError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - if (!try fileIsTty(ev, file)) return error.NotTerminalDevice; -} - -fn fileSetLength(userdata: ?*anyopaque, file: File, length: u64) File.SetLengthError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .FTRUNCATE, - .flags = 0, - .ioprio = 0, - .fd = file.handle, - .off = length, - .addr = 0, - .len = 0, - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .FBIG => return error.FileTooBig, - .IO => return error.InputOutput, - .PERM => return error.PermissionDenied, - .TXTBSY => return error.FileBusy, - .BADF => |err| return errnoBug(err), // Handle not open for writing. - .INVAL => return error.NonResizable, // This is returned for /dev/null for example. - else => |err| return unexpectedErrno(err), - } - } -} - -fn fileSetOwner( - userdata: ?*anyopaque, - file: File, - owner: ?File.Uid, - group: ?File.Gid, -) File.SetOwnerError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - try ev.fchownat( - &sync, - file.handle, - "", - owner orelse std.math.maxInt(linux.uid_t), - group orelse std.math.maxInt(linux.gid_t), - linux.AT.EMPTY_PATH, - ); -} - -fn fileSetPermissions( - userdata: ?*anyopaque, - file: File, - permissions: File.Permissions, -) File.SetPermissionsError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - ev.fchmodat( - &sync, - file.handle, - "", - permissions.toMode(), - linux.AT.EMPTY_PATH, - ) catch |err| switch (err) { - error.NameTooLong => return errnoBug(.NAMETOOLONG), - error.BadPathName => return errnoBug(.ILSEQ), - error.ProcessFdQuotaExceeded => return errnoBug(.MFILE), - error.SystemFdQuotaExceeded => return errnoBug(.NFILE), - error.OperationUnsupported => return errnoBug(.OPNOTSUPP), - else => |e| return e, - }; -} - -fn fileSetTimestamps( - userdata: ?*anyopaque, - file: File, - options: File.SetTimestampsOptions, -) File.SetTimestampsError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - try ev.utimensat( - &sync, - file.handle, - "", - if (options.modify_timestamp != .now or options.access_timestamp != .now) &.{ - setTimestampToPosix(options.access_timestamp), - setTimestampToPosix(options.modify_timestamp), - } else null, - linux.AT.EMPTY_PATH, - ); -} - -fn fileLock(userdata: ?*anyopaque, file: File, lock: File.Lock) File.LockError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - ev.flock(&sync, file.handle, lock, .blocking) catch |err| switch (err) { - error.WouldBlock => unreachable, // blocking - else => |e| return e, - }; -} - -fn fileTryLock(userdata: ?*anyopaque, file: File, lock: File.Lock) File.LockError!bool { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - ev.flock(&sync, file.handle, lock, switch (lock) { - .none => .blocking, - .shared, .exclusive => .nonblocking, - }) catch |err| switch (err) { - error.WouldBlock => return false, - else => |e| return e, - }; - return true; -} - -fn fileUnlock(userdata: ?*anyopaque, file: File) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = .initBlocked(ev); - defer sync.deinit(ev); - ev.flock(&sync, file.handle, .none, .blocking) catch |err| switch (err) { - error.Canceled => unreachable, // blocked - error.WouldBlock => unreachable, // blocking - error.SystemResources => return recoverableOsBugDetected(), // Resource deallocation. - error.FileLocksUnsupported => return recoverableOsBugDetected(), // We already got the lock. - error.Unexpected => return recoverableOsBugDetected(), // Resource deallocation must succeed. - }; -} - -fn fileDowngradeLock(userdata: ?*anyopaque, file: File) File.DowngradeLockError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - ev.flock(&sync, file.handle, .shared, .nonblocking) catch |err| switch (err) { - error.WouldBlock => return errnoBug(.AGAIN), // File was not locked in exclusive mode. - error.SystemResources => return errnoBug(.NOLCK), // Lock already obtained. - error.FileLocksUnsupported => return errnoBug(.OPNOTSUPP), // Lock already obtained. - else => |e| return e, - }; -} - -fn fileRealPath(userdata: ?*anyopaque, file: File, out_buffer: []u8) File.RealPathError!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - return ev.realPath(&sync, file.handle, out_buffer); -} - -fn fileHardLink( - userdata: ?*anyopaque, - file: File, - new_dir: Dir, - new_sub_path: []const u8, - options: File.HardLinkOptions, -) File.HardLinkError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var new_path_buffer: [PATH_MAX]u8 = undefined; - const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - return ev.linkat( - &cancel_region, - file.handle, - "", - new_dir.handle, - new_sub_path_posix, - linux.AT.EMPTY_PATH | @as(u32, if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW), - ); -} - -fn fileMemoryMapCreate( - userdata: ?*anyopaque, - file: File, - options: File.MemoryMap.CreateOptions, -) File.MemoryMap.CreateError!File.MemoryMap { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - const prot: linux.PROT = .{ - .READ = options.protection.read, - .WRITE = options.protection.write, - .EXEC = options.protection.execute, - }; - const flags: linux.MAP = .{ - .TYPE = .SHARED_VALIDATE, - .POPULATE = options.populate, - }; - - const page_align = std.heap.page_size_min; - - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - const contents = while (true) { - try sync.cancel_region.await(.nothing); - const casted_offset = std.math.cast(i64, options.offset) orelse return error.Unseekable; - const rc = linux.mmap(null, options.len, prot, flags, file.handle, casted_offset); - switch (linux.errno(rc)) { - .SUCCESS => break @as([*]align(page_align) u8, @ptrFromInt(rc))[0..options.len], - .INTR => continue, - .ACCES => return error.AccessDenied, - .AGAIN => return error.LockedMemoryLimitExceeded, - .MFILE => return error.ProcessFdQuotaExceeded, - .NFILE => return error.SystemFdQuotaExceeded, - .NOMEM => return error.OutOfMemory, - .PERM => return error.PermissionDenied, - .OVERFLOW => return error.Unseekable, - .BADF => |err| return errnoBug(err), // Always a race condition. - .INVAL => |err| return errnoBug(err), // Invalid parameters to mmap() - .OPNOTSUPP => |err| return errnoBug(err), // Bad flags with MAP.SHARED_VALIDATE on Linux. - else => |err| return unexpectedErrno(err), - } - }; - return .{ - .file = file, - .offset = options.offset, - .memory = contents, - .section = {}, - }; -} - -fn fileMemoryMapDestroy(userdata: ?*anyopaque, mm: *File.MemoryMap) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - const memory = mm.memory; - if (memory.len == 0) return; - switch (linux.errno(linux.munmap(memory.ptr, memory.len))) { - .SUCCESS => {}, - else => |err| if (builtin.mode == .Debug) - std.log.err("failed to unmap {d} bytes at {*}: {t}", .{ memory.len, memory.ptr, err }), - } - mm.* = undefined; -} - -fn processExecutableOpen( - userdata: ?*anyopaque, - flags: File.OpenFlags, -) process.OpenExecutableError!File { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - return dirOpenFile(ev, .{ .handle = linux.AT.FDCWD }, "/proc/self/exe", flags); -} - -fn processExecutablePath(userdata: ?*anyopaque, out_buffer: []u8) process.ExecutablePathError!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - return dirReadLink(ev, .cwd(), "/proc/self/exe", out_buffer) catch |err| switch (err) { - error.UnsupportedReparsePointType => unreachable, // Windows-only - error.NetworkNotFound => unreachable, // Windows-only - error.FileBusy => unreachable, // Windows-only - else => |e| return e, - }; -} - -fn lockStderr(userdata: ?*anyopaque, terminal_mode: ?Io.Terminal.Mode) Io.Cancelable!Io.LockedStderr { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const ev_io = ev.io(); - ev.stderr_mutex.lockUncancelable(ev_io); - errdefer ev.stderr_mutex.unlock(ev_io); - return ev.initLockedStderr(terminal_mode); -} - -fn tryLockStderr( - userdata: ?*anyopaque, - terminal_mode: ?Io.Terminal.Mode, -) Io.Cancelable!?Io.LockedStderr { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const ev_io = ev.io(); - if (!ev.stderr_mutex.tryLock()) return null; - errdefer ev.stderr_mutex.unlock(ev_io); - return try ev.initLockedStderr(terminal_mode); -} - -fn initLockedStderr(ev: *Evented, terminal_mode: ?Io.Terminal.Mode) Io.Cancelable!Io.LockedStderr { - if (!ev.stderr_writer_initialized) { - const ev_io = ev.io(); - try ev.scanEnviron(); - const NO_COLOR = ev.environ.exist.NO_COLOR; - const CLICOLOR_FORCE = ev.environ.exist.CLICOLOR_FORCE; - ev.stderr_mode = terminal_mode orelse - try .detect(ev_io, ev.stderr_writer.file, NO_COLOR, CLICOLOR_FORCE); - ev.stderr_writer_initialized = true; - } - return .{ - .file_writer = &ev.stderr_writer, - .terminal_mode = terminal_mode orelse ev.stderr_mode, - }; -} - -fn unlockStderr(userdata: ?*anyopaque) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - ev.stderr_writer.interface.flush() catch |err| switch (err) { - error.WriteFailed => switch (ev.stderr_writer.err.?) { - error.Canceled => recancel(ev), - else => {}, - }, - }; - ev.stderr_writer.interface.end = 0; - ev.stderr_writer.interface.buffer = &.{}; - ev.stderr_mutex.unlock(ev.io()); -} - -fn processCurrentPath(userdata: ?*anyopaque, buffer: []u8) process.CurrentPathError!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - while (true) { - try sync.cancel_region.await(.nothing); - switch (linux.errno(linux.getcwd(buffer.ptr, buffer.len))) { - .SUCCESS => return std.mem.findScalar(u8, buffer, 0).?, - .INTR => continue, - .NOENT => return error.CurrentDirUnlinked, - .RANGE => return error.NameTooLong, - .FAULT => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), - else => |err| return unexpectedErrno(err), - } - } -} - -fn processSetCurrentDir(userdata: ?*anyopaque, dir: Dir) process.SetCurrentDirError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - if (dir.handle == linux.AT.FDCWD) return; - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - return ev.fchdir(&sync, dir.handle); -} - -fn processSetCurrentPath(userdata: ?*anyopaque, dir_path: []const u8) ChdirError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var path_buffer: [PATH_MAX]u8 = undefined; - const dir_path_posix = try pathToPosix(dir_path, &path_buffer); - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - return ev.chdir(&sync, dir_path_posix); -} - -fn processReplace(userdata: ?*anyopaque, options: process.ReplaceOptions) process.ReplaceError { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - try ev.scanEnviron(); // for PATH - const PATH = ev.environ.string.PATH orelse default_PATH; - - var arena_allocator = std.heap.ArenaAllocator.init(ev.allocator()); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - const argv_buf = try arena.allocSentinel(?[*:0]const u8, options.argv.len, null); - for (options.argv, 0..) |arg, i| argv_buf[i] = (try arena.dupeZ(u8, arg)).ptr; - - const env_block = env_block: { - const prog_fd: i32 = -1; - if (options.environ_map) |environ_map| break :env_block try environ_map.createPosixBlock(arena, .{ - .zig_progress_fd = prog_fd, - }); - break :env_block try ev.environ.process_environ.createPosixBlock(arena, .{ - .zig_progress_fd = prog_fd, - }); - }; - - var sync: CancelRegion.Sync = try .init(ev); - defer sync.deinit(ev); - return ev.execv(&sync, options.expand_arg0, argv_buf.ptr[0].?, argv_buf.ptr, env_block, PATH); -} - -fn processReplacePath( - userdata: ?*anyopaque, - dir: Dir, - options: process.ReplaceOptions, -) process.ReplaceError { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = dir; - _ = options; - @panic("TODO processReplacePath"); -} - -fn processSpawn(userdata: ?*anyopaque, options: process.SpawnOptions) process.SpawnError!process.Child { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const spawned = try ev.spawn(options); - var cancel_region: CancelRegion = .initBlocked(); - defer cancel_region.deinit(); - defer ev.close(&cancel_region, spawned.err_fd); - - // Wait for the child to report any errors in or before `execvpe`. - var child_err: ForkBailError = undefined; - ev.readAll(&cancel_region, spawned.err_fd, @ptrCast(&child_err)) catch |read_err| { - switch (read_err) { - error.Canceled => unreachable, // blocked - error.EndOfStream => { - // Write end closed by CLOEXEC at the time of the `execvpe` call, - // indicating success. - }, - else => { - // Problem reading the error from the error reporting pipe. We - // don't know if the child is alive or dead. Better to assume it is - // alive so the resource does not risk being leaked. - }, - } - return .{ - .id = spawned.pid, - .thread_handle = {}, - .stdin = spawned.stdin, - .stdout = spawned.stdout, - .stderr = spawned.stderr, - .request_resource_usage_statistics = options.request_resource_usage_statistics, - }; - }; - return child_err; -} - -fn processSpawnPath( - userdata: ?*anyopaque, - dir: Dir, - options: process.SpawnOptions, -) process.SpawnError!process.Child { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = dir; - _ = options; - @panic("TODO processSpawnPath"); -} - -const prog_fileno = 3; - -const Spawned = struct { - pid: pid_t, - err_fd: fd_t, - stdin: ?File, - stdout: ?File, - stderr: ?File, -}; -fn spawn(ev: *Evented, options: process.SpawnOptions) process.SpawnError!Spawned { - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - - // The child process does need to access (one end of) these pipes. However, - // we must initially set CLOEXEC to avoid a race condition. If another thread - // is racing to spawn a different child process, we don't want it to inherit - // these FDs in any scenario; that would mean that, for instance, calls to - // `poll` from the parent would not report the child's stdout as closing when - // expected, since the other child may retain a reference to the write end of - // the pipe. So, we create the pipes with CLOEXEC initially. After fork, we - // need to do something in the new child to make sure we preserve the reference - // we want. We could use `fcntl` to remove CLOEXEC from the FD, but as it - // turns out, we `dup2` everything anyway, so there's no need! - const pipe_flags: linux.O = .{ .CLOEXEC = true }; - - const stdin_pipe = if (options.stdin == .pipe) try pipe2(pipe_flags) else undefined; - errdefer if (options.stdin == .pipe) { - ev.destroyPipe(&cancel_region, stdin_pipe); - }; - - const stdout_pipe = if (options.stdout == .pipe) try pipe2(pipe_flags) else undefined; - errdefer if (options.stdout == .pipe) { - ev.destroyPipe(&cancel_region, stdout_pipe); - }; - - const stderr_pipe = if (options.stderr == .pipe) try pipe2(pipe_flags) else undefined; - errdefer if (options.stderr == .pipe) { - ev.destroyPipe(&cancel_region, stderr_pipe); - }; - - const any_ignore = - options.stdin == .ignore or options.stdout == .ignore or options.stderr == .ignore; - const dev_null_fd = if (any_ignore) try ev.null_fd.open(ev, &cancel_region, "/dev/null", .{ - .ACCMODE = .RDWR, - }) else undefined; - - const prog_pipe: [2]fd_t = if (options.progress_node.index != .none) pipe: { - // We use CLOEXEC for the same reason as in `pipe_flags`. - const pipe = try pipe2(.{ .NONBLOCK = true, .CLOEXEC = true }); - _ = linux.fcntl(pipe[0], linux.F.SETPIPE_SZ, @as(u32, std.Progress.max_packet_len * 2)); - break :pipe pipe; - } else .{ -1, -1 }; - errdefer ev.destroyPipe(&cancel_region, prog_pipe); - - var arena_allocator = std.heap.ArenaAllocator.init(ev.allocator()); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - // The POSIX standard does not allow malloc() between fork() and execve(), - // and this allocator may be a libc allocator. - // I have personally observed the child process deadlocking when it tries - // to call malloc() due to a heap allocation between fork() and execve(), - // in musl v1.1.24. - // Additionally, we want to reduce the number of possible ways things - // can fail between fork() and execve(). - // Therefore, we do all the allocation for the execve() before the fork(). - // This means we must do the null-termination of argv and env vars here. - const argv_buf = try arena.allocSentinel(?[*:0]const u8, options.argv.len, null); - for (options.argv, 0..) |arg, i| argv_buf[i] = (try arena.dupeZ(u8, arg)).ptr; - - comptime assert(@max(linux.STDIN_FILENO, linux.STDOUT_FILENO, linux.STDERR_FILENO) + 1 == prog_fileno); - - const env_block = env_block: { - const prog_fd: i32 = if (prog_pipe[1] == -1) -1 else prog_fileno; - if (options.environ_map) |environ_map| break :env_block try environ_map.createPosixBlock(arena, .{ - .zig_progress_fd = prog_fd, - }); - break :env_block try ev.environ.process_environ.createPosixBlock(arena, .{ - .zig_progress_fd = prog_fd, - }); - }; - - // This pipe communicates to the parent errors in the child between `fork` and `execvpe`. - // It is closed by the child (via CLOEXEC) without writing if `execvpe` succeeds. - const err_pipe: [2]fd_t = try pipe2(.{ .CLOEXEC = true }); - errdefer ev.destroyPipe(&cancel_region, err_pipe); - - try ev.scanEnviron(); // for PATH - const PATH = ev.environ.string.PATH orelse default_PATH; - - const pid_result: pid_t = fork: { - const rc = linux.fork(); - switch (linux.errno(rc)) { - .SUCCESS => break :fork @intCast(rc), - .AGAIN => return error.SystemResources, - .NOMEM => return error.SystemResources, - .NOSYS => return error.OperationUnsupported, - else => |err| return unexpectedErrno(err), - } - }; - - if (pid_result == 0) { - defer comptime unreachable; // We are the child. - var sync: CancelRegion.Sync = .{ .cancel_region = .initBlocked() }; - const err = ev.setUpChild(&sync, .{ - .stdin_pipe = stdin_pipe[0], - .stdout_pipe = stdout_pipe[1], - .stderr_pipe = stderr_pipe[1], - .dev_null_fd = dev_null_fd, - .prog_pipe = prog_pipe[1], - .argv_buf = argv_buf, - .env_block = env_block, - .PATH = PATH, - .spawn = options, - }); - ev.writeAll(&sync.cancel_region, err_pipe[1], @ptrCast(&err)) catch {}; - const exit = if (builtin.single_threaded) linux.exit else linux.exit_group; - exit(1); - } - - const pid: pid_t = @intCast(pid_result); // We are the parent. - errdefer comptime unreachable; // The child is forked; we must not error from now on - - ev.close(&cancel_region, err_pipe[1]); // make sure only the child holds the write end open - - if (options.stdin == .pipe) ev.close(&cancel_region, stdin_pipe[0]); - if (options.stdout == .pipe) ev.close(&cancel_region, stdout_pipe[1]); - if (options.stderr == .pipe) ev.close(&cancel_region, stderr_pipe[1]); - - if (prog_pipe[1] != -1) ev.close(&cancel_region, prog_pipe[1]); - - options.progress_node.setIpcFile(ev, .{ .handle = prog_pipe[0], .flags = .{ .nonblocking = true } }); - - return .{ - .pid = pid, - .err_fd = err_pipe[0], - .stdin = switch (options.stdin) { - .pipe => .{ .handle = stdin_pipe[1], .flags = .{ .nonblocking = false } }, - else => null, - }, - .stdout = switch (options.stdout) { - .pipe => .{ .handle = stdout_pipe[0], .flags = .{ .nonblocking = false } }, - else => null, - }, - .stderr = switch (options.stderr) { - .pipe => .{ .handle = stderr_pipe[0], .flags = .{ .nonblocking = false } }, - else => null, - }, - }; -} - -pub const PipeError = error{ - SystemFdQuotaExceeded, - ProcessFdQuotaExceeded, -} || Io.UnexpectedError; -pub fn pipe2(flags: linux.O) PipeError![2]fd_t { - var fds: [2]fd_t = undefined; - switch (linux.errno(linux.pipe2(&fds, flags))) { - .SUCCESS => return fds, - .INVAL => |err| return errnoBug(err), // Invalid flags - .NFILE => return error.SystemFdQuotaExceeded, - .MFILE => return error.ProcessFdQuotaExceeded, - else => |err| return unexpectedErrno(err), - } -} -fn destroyPipe(ev: *Evented, cancel_region: *CancelRegion, pipe: [2]fd_t) void { - if (pipe[0] != -1) ev.close(cancel_region, pipe[0]); - if (pipe[0] != pipe[1]) ev.close(cancel_region, pipe[1]); -} - -/// Errors that can occur between fork() and execv() -const ForkBailError = process.SetCurrentDirError || ChdirError || - process.SpawnError || process.ReplaceError; -fn setUpChild( - ev: *Evented, - sync: *CancelRegion.Sync, - options: struct { - stdin_pipe: fd_t, - stdout_pipe: fd_t, - stderr_pipe: fd_t, - dev_null_fd: fd_t, - prog_pipe: fd_t, - argv_buf: [:null]?[*:0]const u8, - env_block: process.Environ.Block, - PATH: []const u8, - spawn: process.SpawnOptions, - }, -) ForkBailError { - try ev.setUpChildIo( - sync, - options.spawn.stdin, - options.stdin_pipe, - linux.STDIN_FILENO, - options.dev_null_fd, - ); - try ev.setUpChildIo( - sync, - options.spawn.stdout, - options.stdout_pipe, - linux.STDOUT_FILENO, - options.dev_null_fd, - ); - try ev.setUpChildIo( - sync, - options.spawn.stderr, - options.stderr_pipe, - linux.STDERR_FILENO, - options.dev_null_fd, - ); - - switch (options.spawn.cwd) { - .inherit => {}, - .dir => |cwd_dir| try ev.fchdir(sync, cwd_dir.handle), - .path => |cwd_path| { - var cwd_path_buffer: [PATH_MAX]u8 = undefined; - const cwd_path_posix = try pathToPosix(cwd_path, &cwd_path_buffer); - try ev.chdir(sync, cwd_path_posix); - }, - } - - // Must happen after fchdir above, the cwd file descriptor might be - // equal to prog_fileno and be clobbered by this dup2 call. - if (options.prog_pipe != -1) try ev.dup2(sync, options.prog_pipe, prog_fileno); - - if (options.spawn.gid) |gid| { - switch (linux.errno(linux.setregid(gid, gid))) { - .SUCCESS => {}, - .AGAIN => return error.ResourceLimitReached, - .INVAL => return error.InvalidUserId, - .PERM => return error.PermissionDenied, - else => return error.Unexpected, - } - } - - if (options.spawn.uid) |uid| { - switch (linux.errno(linux.setreuid(uid, uid))) { - .SUCCESS => {}, - .AGAIN => return error.ResourceLimitReached, - .INVAL => return error.InvalidUserId, - .PERM => return error.PermissionDenied, - else => return error.Unexpected, - } - } - - if (options.spawn.pgid) |pid| { - switch (linux.errno(linux.setpgid(0, pid))) { - .SUCCESS => {}, - .ACCES => return error.ProcessAlreadyExec, - .INVAL => return error.InvalidProcessGroupId, - .PERM => return error.PermissionDenied, - else => return error.Unexpected, - } - } - - if (options.spawn.start_suspended) { - switch (linux.errno(linux.kill(linux.getpid(), .STOP))) { - .SUCCESS => {}, - .PERM => return error.PermissionDenied, - else => return error.Unexpected, - } - } - - return ev.execv( - sync, - options.spawn.expand_arg0, - options.argv_buf.ptr[0].?, - options.argv_buf.ptr, - options.env_block, - options.PATH, - ); -} - -fn setUpChildIo( - ev: *Evented, - sync: *CancelRegion.Sync, - stdio: process.SpawnOptions.StdIo, - pipe_fd: fd_t, - std_fileno: i32, - dev_null_fd: fd_t, -) !void { - switch (stdio) { - .pipe => try ev.dup2(sync, pipe_fd, std_fileno), - .close => ev.close(&sync.cancel_region, std_fileno), - .inherit => {}, - .ignore => try ev.dup2(sync, dev_null_fd, std_fileno), - .file => |file| try ev.dup2(sync, file.handle, std_fileno), - } -} - -pub const DupError = error{ - ProcessFdQuotaExceeded, - SystemResources, -} || Io.UnexpectedError || Io.Cancelable; -pub fn dup2(ev: *Evented, sync: *CancelRegion.Sync, old_fd: fd_t, new_fd: fd_t) DupError!void { - _ = ev; - while (true) { - try sync.cancel_region.await(.nothing); - switch (linux.errno(linux.dup2(old_fd, new_fd))) { - .SUCCESS => {}, - .BUSY, .INTR => continue, - .INVAL => |err| return errnoBug(err), // invalid parameters - .BADF => |err| return errnoBug(err), // use after free - .MFILE => return error.ProcessFdQuotaExceeded, - .NOMEM => return error.SystemResources, - else => |err| return unexpectedErrno(err), - } - } -} - -fn execv( - ev: *Evented, - sync: *CancelRegion.Sync, - arg0_expand: process.ArgExpansion, - file: [*:0]const u8, - child_argv: [*:null]?[*:0]const u8, - env_block: process.Environ.PosixBlock, - PATH: []const u8, -) process.ReplaceError { - const file_slice = std.mem.sliceTo(file, 0); - if (std.mem.findScalar(u8, file_slice, '/') != null) return ev.execvPath(sync, file, child_argv, env_block); - - // Use of PATH_MAX here is valid as the path_buf will be passed - // directly to the operating system in posixExecvPath. - var path_buf: [PATH_MAX]u8 = undefined; - var it = std.mem.tokenizeScalar(u8, PATH, ':'); - var seen_eacces = false; - var err: process.ReplaceError = error.FileNotFound; - - // In case of expanding arg0 we must put it back if we return with an error. - const prev_arg0 = child_argv[0]; - defer switch (arg0_expand) { - .expand => child_argv[0] = prev_arg0, - .no_expand => {}, - }; - - while (it.next()) |search_path| { - const path_len = search_path.len + file_slice.len + 1; - if (path_buf.len < path_len + 1) return error.NameTooLong; - @memcpy(path_buf[0..search_path.len], search_path); - path_buf[search_path.len] = '/'; - @memcpy(path_buf[search_path.len + 1 ..][0..file_slice.len], file_slice); - path_buf[path_len] = 0; - const full_path = path_buf[0..path_len :0].ptr; - switch (arg0_expand) { - .expand => child_argv[0] = full_path, - .no_expand => {}, - } - err = ev.execvPath(sync, full_path, child_argv, env_block); - switch (err) { - error.AccessDenied => seen_eacces = true, - error.FileNotFound, error.NotDir => {}, - else => |e| return e, - } - } - if (seen_eacces) return error.AccessDenied; - return err; -} -/// This function ignores PATH environment variable. -pub fn execvPath( - ev: *Evented, - sync: *CancelRegion.Sync, - path: [*:0]const u8, - child_argv: [*:null]const ?[*:0]const u8, - env_block: process.Environ.PosixBlock, -) process.ReplaceError { - _ = ev; - try sync.cancel_region.await(.nothing); - switch (linux.errno(linux.execve(path, child_argv, env_block.slice.ptr))) { - .FAULT => |err| return errnoBug(err), // Bad pointer parameter. - .@"2BIG" => return error.SystemResources, - .MFILE => return error.ProcessFdQuotaExceeded, - .NAMETOOLONG => return error.NameTooLong, - .NFILE => return error.SystemFdQuotaExceeded, - .NOMEM => return error.SystemResources, - .ACCES => return error.AccessDenied, - .PERM => return error.PermissionDenied, - .INVAL => return error.InvalidExe, - .NOEXEC => return error.InvalidExe, - .IO => return error.FileSystem, - .LOOP => return error.FileSystem, - .ISDIR => return error.IsDir, - .NOENT => return error.FileNotFound, - .NOTDIR => return error.NotDir, - .TXTBSY => return error.FileBusy, - .LIBBAD => return error.InvalidExe, - else => |err| return unexpectedErrno(err), - } -} - -fn childWait(userdata: ?*anyopaque, child: *process.Child) process.Child.WaitError!process.Child.Term { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; - defer maybe_sync.deinit(ev); - defer ev.childCleanup(maybe_sync.cancelRegion(), child); - - const pid = child.id.?; - var info: linux.siginfo_t = undefined; - while (true) { - const thread = try maybe_sync.cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .WAITID, - .flags = 0, - .ioprio = 0, - .fd = pid, - .off = @intFromPtr(&info), - .addr = 0, - .len = @intFromEnum(linux.P.PID), - .rw_flags = 0, - .user_data = @intFromPtr(maybe_sync.cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = linux.W.EXITED | - @as(i32, if (child.request_resource_usage_statistics) linux.W.NOWAIT else 0), - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (maybe_sync.cancel_region.errno()) { - .SUCCESS => { - if (child.request_resource_usage_statistics) { - const sync = try maybe_sync.enterSync(ev); - while (true) { - try sync.cancel_region.await(.nothing); - var rusage: linux.rusage = undefined; - switch (linux.errno(linux.waitid( - .PID, - pid, - &info, - linux.W.EXITED | linux.W.NOHANG, - &rusage, - ))) { - .SUCCESS => { - child.resource_usage_statistics.rusage = rusage; - break; - }, - .INTR, .CANCELED => continue, - .CHILD => |err| return errnoBug(err), // Double-free. - else => |err| return unexpectedErrno(err), - } - } - } - const status: u32 = @bitCast(info.fields.common.second.sigchld.status); - const code: linux.CLD = @enumFromInt(info.code); - return switch (code) { - .EXITED => .{ .exited = @truncate(status) }, - .KILLED, .DUMPED => .{ .signal = @enumFromInt(status) }, - .TRAPPED, .STOPPED => .{ .stopped = status }, - _, .CONTINUED => .{ .unknown = status }, - }; - }, - .INTR, .CANCELED => continue, - .CHILD => |err| return errnoBug(err), // Double-free. - else => |err| return unexpectedErrno(err), - } - } -} - -fn childKill(userdata: ?*anyopaque, child: *process.Child) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - var maybe_sync: CancelRegion.Sync.Maybe = .{ .sync = .initBlocked(ev) }; - defer maybe_sync.deinit(ev); - defer ev.childCleanup(maybe_sync.cancelRegion(), child); - - const pid = child.id.?; - while (true) switch (linux.errno(linux.kill(pid, .TERM))) { - .SUCCESS => break, - .INTR => continue, - .PERM => return, - .INVAL => |err| return errnoBug(err) catch {}, - .SRCH => |err| return errnoBug(err) catch {}, - else => |err| return unexpectedErrno(err) catch {}, - }; - maybe_sync.leaveSync(ev); - - var info: linux.siginfo_t = undefined; - while (true) { - const thread = maybe_sync.cancel_region.awaitIoUring() catch |err| switch (err) { - error.Canceled => unreachable, // blocked - }; - thread.enqueue().* = .{ - .opcode = .WAITID, - .flags = 0, - .ioprio = 0, - .fd = pid, - .off = @intFromPtr(&info), - .addr = 0, - .len = @intFromEnum(linux.P.PID), - .rw_flags = 0, - .user_data = @intFromPtr(maybe_sync.cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = linux.W.EXITED, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (maybe_sync.cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .CHILD => |err| return errnoBug(err) catch {}, // Double-free. - else => |err| return unexpectedErrno(err) catch {}, - } - } -} - -fn childCleanup(ev: *Evented, cancel_region: *CancelRegion, child: *process.Child) void { - if (child.stdin) |*stdin| { - ev.close(cancel_region, stdin.handle); - child.stdin = null; - } - if (child.stdout) |*stdout| { - ev.close(cancel_region, stdout.handle); - child.stdout = null; - } - if (child.stderr) |*stderr| { - ev.close(cancel_region, stderr.handle); - child.stderr = null; - } - child.id = null; -} - -fn progressParentFile(userdata: ?*anyopaque) std.Progress.ParentFileError!File { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const cancel_protection = swapCancelProtection(ev, .blocked); - defer assert(swapCancelProtection(ev, cancel_protection) == .blocked); - ev.scanEnviron() catch |err| switch (err) { - error.Canceled => unreachable, // blocked - }; - return ev.environ.zig_progress_file; -} - -fn scanEnviron(ev: *Evented) Io.Cancelable!void { - const ev_io = ev.io(); - try ev.environ_mutex.lock(ev_io); - defer ev.environ_mutex.unlock(ev_io); - ev.environ.scan(ev.allocator()); -} - -fn clockResolution(userdata: ?*anyopaque, clock: Io.Clock) Io.Clock.ResolutionError!Io.Duration { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - const clock_id = clockToPosix(clock); - var timespec: linux.timespec = undefined; - return switch (linux.errno(linux.clock_getres(clock_id, &timespec))) { - .SUCCESS => .fromNanoseconds(nanosecondsFromPosix(&timespec)), - .INVAL => return error.ClockUnavailable, - else => |err| return unexpectedErrno(err), - }; -} - -fn now(userdata: ?*anyopaque, clock: Io.Clock) Io.Timestamp { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - var tp: linux.timespec = undefined; - switch (linux.errno(linux.clock_gettime(clockToPosix(clock), &tp))) { - .SUCCESS => return timestampFromPosix(&tp), - else => return .zero, - } -} - -fn sleep(userdata: ?*anyopaque, timeout: Io.Timeout) Io.Cancelable!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - - const timespec: linux.kernel_timespec, const clock: Io.Clock, const timeout_flags: u32 = timespec: switch (timeout) { - .none => .{ - .{ - .sec = std.math.maxInt(i64), - .nsec = std.time.ns_per_s - 1, - }, - .awake, - linux.IORING_TIMEOUT_ABS, - }, - .duration => |duration| { - const ns = duration.raw.toNanoseconds(); - break :timespec .{ - .{ - .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), - .nsec = @intCast(@mod(ns, std.time.ns_per_s)), - }, - duration.clock, - 0, - }; - }, - .deadline => |deadline| { - const ns = deadline.raw.toNanoseconds(); - break :timespec .{ - .{ - .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), - .nsec = @intCast(@mod(ns, std.time.ns_per_s)), - }, - deadline.clock, - linux.IORING_TIMEOUT_ABS, - }; - }, - }; - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .TIMEOUT, - .flags = 0, - .ioprio = 0, - .fd = 0, - .off = 0, - .addr = @intFromPtr(&timespec), - .len = 1, - .rw_flags = timeout_flags | @as(u32, switch (clock) { - .real => linux.IORING_TIMEOUT_REALTIME, - else => 0, - .boot => linux.IORING_TIMEOUT_BOOTTIME, - }), - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - // Handles SUCCESS as well as clock not available and unexpected - // errors. The user had a chance to check clock resolution before - // getting here, which would have reported 0, making this a legal - // amount of time to sleep. - else => return, - .INTR, .CANCELED => return error.Canceled, - } -} - -fn random(userdata: ?*anyopaque, buffer: []u8) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var thread: *Thread = .current(); - if (!thread.csprng.isInitialized()) { - @branchHint(.unlikely); - var seed: [Csprng.seed_len]u8 = undefined; - { - const ev_io = ev.io(); - ev.csprng_mutex.lockUncancelable(ev_io); - defer ev.csprng_mutex.unlock(ev_io); - if (!ev.csprng.isInitialized()) { - @branchHint(.unlikely); - var cancel_region: CancelRegion = .initBlocked(); - defer cancel_region.deinit(); - ev.urandomReadAll(&cancel_region, &seed) catch |err| switch (err) { - error.Canceled => unreachable, // blocked - else => fallbackSeed(ev, &seed), - }; - ev.csprng.rng = .init(seed); - thread = .current(); - } - ev.csprng.rng.fill(&seed); - } - if (!thread.csprng.isInitialized()) { - @branchHint(.likely); - thread.csprng.rng = .init(seed); - } else thread.csprng.rng.addEntropy(&seed); - } - thread.csprng.rng.fill(buffer); -} - -fn randomSecure(userdata: ?*anyopaque, buffer: []u8) Io.RandomSecureError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - if (buffer.len == 0) return; - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - ev.urandomReadAll(&cancel_region, buffer) catch |err| switch (err) { - error.Canceled => return error.Canceled, - else => return error.EntropyUnavailable, - }; -} - -fn netListenIpUnavailable( - userdata: ?*anyopaque, - address: net.IpAddress, - options: net.IpAddress.ListenOptions, -) net.IpAddress.ListenError!net.Server { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = address; - _ = options; - return error.NetworkDown; -} - -fn netAcceptUnavailable( - userdata: ?*anyopaque, - listen_handle: net.Socket.Handle, -) net.Server.AcceptError!net.Stream { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = listen_handle; - return error.NetworkDown; -} - -fn netBindIp( - userdata: ?*anyopaque, - address: *const net.IpAddress, - options: net.IpAddress.BindOptions, -) net.IpAddress.BindError!net.Socket { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const family = posixAddressFamily(address); - var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; - defer maybe_sync.deinit(ev); - const socket_fd = try ev.socket(&maybe_sync.cancel_region, family, options); - errdefer ev.close(maybe_sync.cancelRegion(), socket_fd); - var storage: PosixAddress = undefined; - var addr_len = addressToPosix(address, &storage); - try ev.bind(&maybe_sync.cancel_region, socket_fd, &storage.any, addr_len); - try ev.getsockname(try maybe_sync.enterSync(ev), socket_fd, &storage.any, &addr_len); - return .{ - .handle = socket_fd, - .address = addressFromPosix(&storage), - }; -} - -fn netBindIpUnavailable( - userdata: ?*anyopaque, - address: *const net.IpAddress, - options: net.IpAddress.BindOptions, -) net.IpAddress.BindError!net.Socket { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = address; - _ = options; - return error.NetworkDown; -} - -fn netConnectIpUnavailable( - userdata: ?*anyopaque, - address: *const net.IpAddress, - options: net.IpAddress.ConnectOptions, -) net.IpAddress.ConnectError!net.Stream { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = address; - _ = options; - return error.NetworkDown; -} - -fn netListenUnixUnavailable( - userdata: ?*anyopaque, - address: *const net.UnixAddress, - options: net.UnixAddress.ListenOptions, -) net.UnixAddress.ListenError!net.Socket.Handle { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = address; - _ = options; - return error.AddressFamilyUnsupported; -} - -fn netConnectUnixUnavailable( - userdata: ?*anyopaque, - address: *const net.UnixAddress, -) net.UnixAddress.ConnectError!net.Socket.Handle { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = address; - return error.AddressFamilyUnsupported; -} - -fn netSocketCreatePairUnavailable( - userdata: ?*anyopaque, - options: net.Socket.CreatePairOptions, -) net.Socket.CreatePairError![2]net.Socket { - _ = userdata; - _ = options; - return error.OperationUnsupported; -} - -fn netSendUnavailable( - userdata: ?*anyopaque, - handle: net.Socket.Handle, - messages: []net.OutgoingMessage, - flags: net.SendFlags, -) struct { ?net.Socket.SendError, usize } { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = handle; - _ = messages; - _ = flags; - return .{ error.NetworkDown, 0 }; -} - -fn netReceive( - userdata: ?*anyopaque, - handle: net.Socket.Handle, - message_buffer: []net.IncomingMessage, - data_buffer: []u8, - flags: net.ReceiveFlags, - timeout: Io.Timeout, -) struct { ?net.Socket.ReceiveTimeoutError, usize } { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - const ev_io = ev.io(); - - var message_i: usize = 0; - var data_i: usize = 0; - - const deadline: ?struct { - raw: Io.Timestamp, - timespec: linux.kernel_timespec, - clock: Io.Clock, - } = if (timeout.toTimestamp(ev_io)) |deadline| deadline: { - const ns = deadline.raw.toNanoseconds(); - break :deadline .{ - .raw = deadline.raw, - .timespec = .{ - .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), - .nsec = @intCast(@mod(ns, std.time.ns_per_s)), - }, - .clock = deadline.clock, - }; - } else null; - - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - while (true) { - if (message_buffer.len - message_i == 0) return .{ null, message_i }; - const message = &message_buffer[message_i]; - const remaining_data_buffer = data_buffer[data_i..]; - var storage: PosixAddress = undefined; - var iov: iovec = .{ .base = remaining_data_buffer.ptr, .len = remaining_data_buffer.len }; - var msg: linux.msghdr = .{ - .name = &storage.any, - .namelen = @sizeOf(PosixAddress), - .iov = (&iov)[0..1], - .iovlen = 1, - .control = message.control.ptr, - .controllen = @intCast(message.control.len), - .flags = undefined, - }; - - const thread = cancel_region.awaitIoUring() catch |err| return .{ err, message_i }; - thread.enqueue().* = .{ - .opcode = .RECVMSG, - .flags = if (deadline) |_| linux.IOSQE_IO_LINK else 0, - .ioprio = 0, - .fd = handle, - .off = 0, - .addr = @intFromPtr(&msg), - .len = 0, - .rw_flags = linux.MSG.NOSIGNAL | - @as(u32, if (flags.oob) linux.MSG.OOB else 0) | - @as(u32, if (flags.peek) linux.MSG.PEEK else 0) | - @as(u32, if (flags.trunc) linux.MSG.TRUNC else 0), - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - if (deadline) |*deadline_ptr| thread.enqueue().* = .{ - .opcode = .LINK_TIMEOUT, - .flags = linux.IOSQE_CQE_SKIP_SUCCESS, - .ioprio = 0, - .fd = 0, - .off = 0, - .addr = @intFromPtr(&deadline_ptr.timespec), - .len = 1, - .rw_flags = linux.IORING_TIMEOUT_ABS | @as(u32, switch (deadline_ptr.clock) { - .real => linux.IORING_TIMEOUT_REALTIME, - else => 0, - .boot => linux.IORING_TIMEOUT_BOOTTIME, - }), - .user_data = @intFromEnum(Completion.UserData.wakeup), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - const completion = cancel_region.completion(); - switch (completion.errno()) { - .SUCCESS => { - const data = remaining_data_buffer[0..@intCast(completion.result)]; - data_i += data.len; - message.* = .{ - .from = addressFromPosix(&storage), - .data = data, - .control = if (msg.control) |ptr| @as([*]u8, @ptrCast(ptr))[0..msg.controllen] else message.control, - .flags = .{ - .eor = (msg.flags & linux.MSG.EOR) != 0, - .trunc = (msg.flags & linux.MSG.TRUNC) != 0, - .ctrunc = (msg.flags & linux.MSG.CTRUNC) != 0, - .oob = (msg.flags & linux.MSG.OOB) != 0, - .errqueue = if (@hasDecl(linux.MSG, "ERRQUEUE")) (msg.flags & linux.MSG.ERRQUEUE) != 0 else false, - }, - }; - message_i += 1; - continue; - }, - .AGAIN => unreachable, - .INTR, .CANCELED => { - if (deadline) |d| { - if (now(ev, d.clock).nanoseconds >= d.raw.nanoseconds) return .{ error.Timeout, message_i }; - } - continue; - }, - - .BADF => |err| return .{ errnoBug(err), message_i }, - .NFILE => return .{ error.SystemFdQuotaExceeded, message_i }, - .MFILE => return .{ error.ProcessFdQuotaExceeded, message_i }, - .FAULT => |err| return .{ errnoBug(err), message_i }, - .INVAL => |err| return .{ errnoBug(err), message_i }, - .NOBUFS => return .{ error.SystemResources, message_i }, - .NOMEM => return .{ error.SystemResources, message_i }, - .NOTCONN => return .{ error.SocketUnconnected, message_i }, - .NOTSOCK => |err| return .{ errnoBug(err), message_i }, - .MSGSIZE => return .{ error.MessageOversize, message_i }, - .PIPE => return .{ error.SocketUnconnected, message_i }, - .OPNOTSUPP => |err| return .{ errnoBug(err), message_i }, - .CONNRESET => return .{ error.ConnectionResetByPeer, message_i }, - .NETDOWN => return .{ error.NetworkDown, message_i }, - else => |err| return .{ unexpectedErrno(err), message_i }, - } - } -} - -fn netReceiveUnavailable( - userdata: ?*anyopaque, - handle: net.Socket.Handle, - message_buffer: []net.IncomingMessage, - data_buffer: []u8, - flags: net.ReceiveFlags, - timeout: Io.Timeout, -) struct { ?net.Socket.ReceiveTimeoutError, usize } { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = handle; - _ = message_buffer; - _ = data_buffer; - _ = flags; - _ = timeout; - return .{ error.NetworkDown, 0 }; -} - -fn netReadUnavailable( - userdata: ?*anyopaque, - fd: net.Socket.Handle, - data: [][]u8, -) net.Stream.Reader.Error!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = fd; - _ = data; - return error.NetworkDown; -} - -fn netWriteUnavailable( - userdata: ?*anyopaque, - handle: net.Socket.Handle, - header: []const u8, - data: []const []const u8, - splat: usize, -) net.Stream.Writer.Error!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = handle; - _ = header; - _ = data; - _ = splat; - return error.NetworkDown; -} - -fn netWriteFileUnavailable( - userdata: ?*anyopaque, - socket_handle: net.Socket.Handle, - header: []const u8, - file_reader: *File.Reader, - limit: Io.Limit, -) net.Stream.Writer.WriteFileError!usize { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = socket_handle; - _ = header; - _ = file_reader; - _ = limit; - return error.NetworkDown; -} - -fn netClose(userdata: ?*anyopaque, handles: []const net.Socket.Handle) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - for (handles) |handle| ev.close(&cancel_region, handle); -} - -fn netCloseUnavailable(userdata: ?*anyopaque, handles: []const net.Socket.Handle) void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = handles; - unreachable; // How you gonna close something that was impossible to open? -} - -fn netShutdown( - userdata: ?*anyopaque, - handle: net.Socket.Handle, - how: net.ShutdownHow, -) net.ShutdownError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - var cancel_region: CancelRegion = .init(); - defer cancel_region.deinit(); - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .SHUTDOWN, - .flags = 0, - .ioprio = 0, - .fd = handle, - .off = 0, - .addr = 0, - .len = switch (how) { - .recv => linux.SHUT.RD, - .send => linux.SHUT.WR, - .both => linux.SHUT.RDWR, - }, - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .BADF, .NOTSOCK, .INVAL => |err| return errnoBug(err), - .NOTCONN => return error.SocketUnconnected, - .NOBUFS => return error.SystemResources, - else => |err| return unexpectedErrno(err), - } - } -} - -fn netShutdownUnavailable( - userdata: ?*anyopaque, - handle: net.Socket.Handle, - how: net.ShutdownHow, -) net.ShutdownError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = handle; - _ = how; - unreachable; // How you gonna shutdown something that was impossible to open? -} - -fn netInterfaceNameResolveUnavailable( - userdata: ?*anyopaque, - name: *const net.Interface.Name, -) net.Interface.Name.ResolveError!net.Interface { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = name; - return error.InterfaceNotFound; -} - -fn netInterfaceNameUnavailable( - userdata: ?*anyopaque, - interface: net.Interface, -) net.Interface.NameError!net.Interface.Name { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = ev; - _ = interface; - return error.Unexpected; -} - -fn netLookupUnavailable( - userdata: ?*anyopaque, - host_name: net.HostName, - resolved: *Io.Queue(net.HostName.LookupResult), - options: net.HostName.LookupOptions, -) net.HostName.LookupError!void { - const ev: *Evented = @ptrCast(@alignCast(userdata)); - _ = host_name; - _ = options; - resolved.close(ev.io()); - return error.NetworkDown; -} - -fn bind( - ev: *Evented, - cancel_region: *CancelRegion, - socket_fd: fd_t, - addr: *const linux.sockaddr, - addr_len: linux.socklen_t, -) !void { - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .BIND, - .flags = 0, - .ioprio = 0, - .fd = socket_fd, - .off = addr_len, - .addr = @intFromPtr(addr), - .len = 0, - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .ADDRINUSE => return error.AddressInUse, - .BADF => |err| return errnoBug(err), // File descriptor used after closed. - .INVAL => |err| return errnoBug(err), // invalid parameters - .NOTSOCK => |err| return errnoBug(err), // invalid `sockfd` - .AFNOSUPPORT => return error.AddressFamilyUnsupported, - .ADDRNOTAVAIL => return error.AddressUnavailable, - .FAULT => |err| return errnoBug(err), // invalid `addr` pointer - .NOMEM => return error.SystemResources, - else => |err| return unexpectedErrno(err), - } - } -} - -fn chdir(ev: *Evented, sync: *CancelRegion.Sync, path: [*:0]const u8) ChdirError!void { - _ = ev; - while (true) { - try sync.cancel_region.await(.nothing); - switch (linux.errno(linux.chdir(path))) { - .SUCCESS => return, - .INTR => continue, - .ACCES => return error.AccessDenied, - .IO => return error.FileSystem, - .LOOP => return error.SymLinkLoop, - .NAMETOOLONG => return error.NameTooLong, - .NOENT => return error.FileNotFound, - .NOMEM => return error.SystemResources, - .NOTDIR => return error.NotDir, - .ILSEQ => return error.BadPathName, - .FAULT => |err| return errnoBug(err), - else => |err| return unexpectedErrno(err), - } - } -} - -fn close(ev: *Evented, cancel_region: *CancelRegion, fd: fd_t) void { - while (true) { - const thread = cancel_region.awaitIoUring() catch |err| switch (err) { - error.Canceled => unreachable, // blocked - }; - thread.enqueue().* = .{ - .opcode = .CLOSE, - .flags = 0, - .ioprio = 0, - .fd = fd, - .off = 0, - .addr = 0, - .len = 0, - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .BADF => unreachable, // Always a race condition. - else => break, - } - } -} - -fn fchdir(ev: *Evented, sync: *CancelRegion.Sync, dir: fd_t) process.SetCurrentDirError!void { - _ = ev; - if (dir == linux.AT.FDCWD) return; - while (true) { - try sync.cancel_region.await(.nothing); - switch (linux.errno(linux.fchdir(dir))) { - .SUCCESS => return, - .INTR => continue, - .ACCES => return error.AccessDenied, - .NOTDIR => return error.NotDir, - .IO => return error.FileSystem, - .BADF => |err| return errnoBug(err), - else => |err| return unexpectedErrno(err), - } - } -} - -fn fchmodat( - ev: *Evented, - sync: *CancelRegion.Sync, - dir: fd_t, - path: [*:0]const u8, - mode: linux.mode_t, - flags: u32, -) Dir.SetFilePermissionsError!void { - _ = ev; - while (true) { - try sync.cancel_region.await(.nothing); - switch (linux.errno(linux.fchmodat2(dir, path, mode, flags))) { - .SUCCESS => return, - .INTR => continue, - .BADF => |err| return errnoBug(err), - .FAULT => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), - .ACCES => return error.AccessDenied, - .IO => return error.InputOutput, - .LOOP => return error.SymLinkLoop, - .NOENT => return error.FileNotFound, - .NOMEM => return error.SystemResources, - .NOTDIR => return error.FileNotFound, - .OPNOTSUPP => return error.OperationUnsupported, - .PERM => return error.PermissionDenied, - .ROFS => return error.ReadOnlyFileSystem, - else => |err| return unexpectedErrno(err), - } - } -} - -fn fchownat( - ev: *Evented, - sync: *CancelRegion.Sync, - dir: fd_t, - path: [*:0]const u8, - owner: linux.uid_t, - group: linux.gid_t, - flags: u32, -) File.SetOwnerError!void { - _ = ev; - while (true) { - try sync.cancel_region.await(.nothing); - switch (linux.errno(linux.fchownat(dir, path, owner, group, flags))) { - .SUCCESS => return, - .INTR => continue, - .BADF => |err| return errnoBug(err), // likely fd refers to directory opened without `Dir.OpenOptions.iterate` - .FAULT => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), - .ACCES => return error.AccessDenied, - .IO => return error.InputOutput, - .LOOP => return error.SymLinkLoop, - .NOENT => return error.FileNotFound, - .NOMEM => return error.SystemResources, - .NOTDIR => return error.FileNotFound, - .PERM => return error.PermissionDenied, - .ROFS => return error.ReadOnlyFileSystem, - else => |err| return unexpectedErrno(err), - } - } -} - -fn flock( - ev: *Evented, - sync: *CancelRegion.Sync, - fd: fd_t, - op: File.Lock, - blocking: enum { blocking, nonblocking }, -) (File.LockError || error{WouldBlock})!void { - while (true) { - try sync.cancel_region.await(.nothing); - switch (linux.errno(linux.flock(fd, LOCK.NB | @as(i32, switch (op) { - .none => LOCK.UN, - .shared => LOCK.SH, - .exclusive => LOCK.EX, - })))) { - .SUCCESS => return, - .INTR => continue, - .BADF => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), // invalid parameters - .NOLCK => return error.SystemResources, - .AGAIN => { - const thread = try sync.cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .NOP, - .flags = 0, - .ioprio = 0, - .fd = 0, - .off = 0, - .addr = 0, - .len = 0, - .rw_flags = 0, - .user_data = @intFromPtr(sync.cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (sync.cancel_region.errno()) { - .SUCCESS, .INTR, .CANCELED => {}, - else => unreachable, - } - switch (blocking) { - .blocking => continue, - .nonblocking => return error.WouldBlock, - } - }, - .OPNOTSUPP => return error.FileLocksUnsupported, - else => |err| return unexpectedErrno(err), - } - } -} - -fn getsockname( - ev: *Evented, - sync: *CancelRegion.Sync, - socket_fd: fd_t, - addr: *linux.sockaddr, - addr_len: *linux.socklen_t, -) !void { - _ = ev; - while (true) { - try sync.cancel_region.await(.nothing); - switch (linux.errno(linux.getsockname(socket_fd, addr, addr_len))) { - .SUCCESS => return, - .INTR => continue, - .BADF => |err| return errnoBug(err), // File descriptor used after closed. - .FAULT => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), // invalid parameters - .NOTSOCK => |err| return errnoBug(err), // always a race condition - .NOBUFS => return error.SystemResources, - else => |err| return unexpectedErrno(err), - } - } -} - -fn linkat( - ev: *Evented, - cancel_region: *CancelRegion, - old_dir: fd_t, - old_path: [*:0]const u8, - new_dir: fd_t, - new_path: [*:0]const u8, - flags: u32, -) File.HardLinkError!void { - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .LINKAT, - .flags = 0, - .ioprio = 0, - .fd = old_dir, - .off = @intFromPtr(new_path), - .addr = @intFromPtr(old_path), - .len = @bitCast(new_dir), - .rw_flags = flags, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .ACCES => return error.AccessDenied, - .DQUOT => return error.DiskQuota, - .EXIST => return error.PathAlreadyExists, - .IO => return error.HardwareFailure, - .LOOP => return error.SymLinkLoop, - .MLINK => return error.LinkQuotaExceeded, - .NAMETOOLONG => return error.NameTooLong, - .NOENT => return error.FileNotFound, - .NOMEM => return error.SystemResources, - .NOSPC => return error.NoSpaceLeft, - .NOTDIR => return error.NotDir, - .PERM => return error.PermissionDenied, - .ROFS => return error.ReadOnlyFileSystem, - .XDEV => return error.CrossDevice, - .ILSEQ => return error.BadPathName, - .FAULT => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), - else => |err| return unexpectedErrno(err), - } - } -} - -fn lseek( - ev: *Evented, - sync: *CancelRegion.Sync, - fd: fd_t, - offset: u64, - whence: u32, -) File.SeekError!void { - _ = ev; - while (true) { - try sync.cancel_region.await(.nothing); - var result: u64 = undefined; - switch (linux.errno(switch (@sizeOf(usize)) { - else => comptime unreachable, - 4 => linux.llseek(fd, offset, &result, whence), - 8 => linux.lseek(fd, @bitCast(offset), whence), - })) { - .SUCCESS => return, - .INTR => continue, - .BADF => |err| return errnoBug(err), // File descriptor used after closed. - .INVAL => return error.Unseekable, - .OVERFLOW => return error.Unseekable, - .SPIPE => return error.Unseekable, - .NXIO => return error.Unseekable, - else => |err| return unexpectedErrno(err), - } - } -} - -fn openat( - ev: *Evented, - cancel_region: *CancelRegion, - dir: fd_t, - path: [*:0]const u8, - flags: linux.O, - mode: linux.mode_t, -) File.OpenError!fd_t { - var mut_flags = flags; - if (@hasField(linux.O, "LARGEFILE")) mut_flags.LARGEFILE = true; - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .OPENAT, - .flags = 0, - .ioprio = 0, - .fd = dir, - .off = 0, - .addr = @intFromPtr(path), - .len = mode, - .rw_flags = @bitCast(mut_flags), - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - const completion = cancel_region.completion(); - switch (completion.errno()) { - .SUCCESS => return completion.result, - .INTR, .CANCELED => continue, - .FAULT => |err| return errnoBug(err), - .INVAL => return error.BadPathName, - .BADF => |err| return errnoBug(err), // File descriptor used after closed. - .ACCES => return error.AccessDenied, - .FBIG => return error.FileTooBig, - .OVERFLOW => return error.FileTooBig, - .ISDIR => return error.IsDir, - .LOOP => return error.SymLinkLoop, - .MFILE => return error.ProcessFdQuotaExceeded, - .NAMETOOLONG => return error.NameTooLong, - .NFILE => return error.SystemFdQuotaExceeded, - .NODEV => return error.NoDevice, - .NOENT => return error.FileNotFound, - .SRCH => return error.FileNotFound, // Linux when opening procfs files. - .NOMEM => return error.SystemResources, - .NOSPC => return error.NoSpaceLeft, - .NOTDIR => return error.NotDir, - .PERM => return error.PermissionDenied, - .EXIST => return error.PathAlreadyExists, - .BUSY => return error.DeviceBusy, - .OPNOTSUPP => return error.FileLocksUnsupported, - .AGAIN => return error.WouldBlock, - .TXTBSY => return error.FileBusy, - .NXIO => return error.NoDevice, - .ILSEQ => return error.BadPathName, - else => |err| return unexpectedErrno(err), - } - } -} - -fn preadv( - ev: *Evented, - cancel_region: *CancelRegion, - fd: fd_t, - iov: []const iovec, - offset: ?u64, -) File.Reader.Error!usize { - if (iov.len == 0) return 0; - const gather = iov.len > 1 or iov[0].len > 0xfffff000; - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = if (gather) .READV else .READ, - .flags = 0, - .ioprio = 0, - .fd = fd, - .off = offset orelse std.math.maxInt(u64), - .addr = if (gather) @intFromPtr(iov.ptr) else @intFromPtr(iov[0].base), - .len = @intCast(if (gather) iov.len else iov[0].len), - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - const completion = cancel_region.completion(); - switch (completion.errno()) { - .SUCCESS => return @as(u32, @bitCast(completion.result)), - .INTR, .CANCELED => continue, - .INVAL => |err| return errnoBug(err), - .FAULT => |err| return errnoBug(err), - .AGAIN => return error.WouldBlock, - .BADF => |err| return errnoBug(err), // File descriptor used after closed - .IO => return error.InputOutput, - .ISDIR => return error.IsDir, - .NOBUFS => return error.SystemResources, - .NOMEM => return error.SystemResources, - .NOTCONN => return error.SocketUnconnected, - .CONNRESET => return error.ConnectionResetByPeer, - else => |err| return unexpectedErrno(err), - } - } -} - -fn pwritev( - ev: *Evented, - cancel_region: *CancelRegion, - fd: fd_t, - iov: []const iovec_const, - offset: ?u64, -) File.Writer.Error!usize { - if (iov.len == 0) return 0; - const scatter = iov.len > 1 or iov[0].len > 0xfffff000; - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = if (scatter) .WRITEV else .WRITE, - .flags = 0, - .ioprio = 0, - .fd = fd, - .off = offset orelse std.math.maxInt(u64), - .addr = if (scatter) @intFromPtr(iov.ptr) else @intFromPtr(iov[0].base), - .len = @intCast(if (scatter) iov.len else iov[0].len), - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - const completion = cancel_region.completion(); - switch (completion.errno()) { - .SUCCESS => return @as(u32, @bitCast(completion.result)), - .INTR, .CANCELED => continue, - .INVAL => |err| return errnoBug(err), - .FAULT => |err| return errnoBug(err), - .AGAIN => return error.WouldBlock, - .BADF => return error.NotOpenForWriting, // Can be a race condition. - .DESTADDRREQ => |err| return errnoBug(err), // `connect` was never called. - .DQUOT => return error.DiskQuota, - .FBIG => return error.FileTooBig, - .IO => return error.InputOutput, - .NOSPC => return error.NoSpaceLeft, - .PERM => return error.PermissionDenied, - .PIPE => return error.BrokenPipe, - .CONNRESET => |err| return errnoBug(err), // Not a socket handle. - .BUSY => return error.DeviceBusy, - else => |err| return unexpectedErrno(err), - } - } -} - -fn readAll( - ev: *Evented, - cancel_region: *CancelRegion, - fd: fd_t, - buffer: []u8, -) (File.Reader.Error || error{EndOfStream})!void { - var index: usize = 0; - while (buffer.len - index != 0) { - const len = try ev.preadv(cancel_region, fd, &.{ - .{ .base = buffer[index..].ptr, .len = buffer.len - index }, - }, null); - if (len == 0) return error.EndOfStream; - index += len; - } -} - -fn realPath( - ev: *Evented, - sync: *CancelRegion.Sync, - fd: fd_t, - out_buffer: []u8, -) File.RealPathError!usize { - _ = ev; - var procfs_buf: [std.fmt.count("/proc/self/fd/{d}\x00", .{std.math.minInt(fd_t)})]u8 = undefined; - const proc_path = std.fmt.bufPrintSentinel(&procfs_buf, "/proc/self/fd/{d}", .{fd}, 0) catch - unreachable; - while (true) { - try sync.cancel_region.await(.nothing); - const rc = linux.readlink(proc_path, out_buffer.ptr, out_buffer.len); - switch (linux.errno(rc)) { - .SUCCESS => return rc, - .INTR => continue, - .ACCES => return error.AccessDenied, - .FAULT => |err| return errnoBug(err), - .IO => return error.FileSystem, - .LOOP => return error.SymLinkLoop, - .NAMETOOLONG => return error.NameTooLong, - .NOENT => return error.FileNotFound, - .NOMEM => return error.SystemResources, - .NOTDIR => return error.NotDir, - .ILSEQ => |err| return errnoBug(err), - else => |err| return unexpectedErrno(err), - } - } -} - -fn renameat( - ev: *Evented, - cancel_region: *CancelRegion, - old_dir: fd_t, - old_path: [*:0]const u8, - new_dir: fd_t, - new_path: [*:0]const u8, - flags: linux.RENAME, -) Dir.RenameError!void { - while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .RENAMEAT, - .flags = 0, - .ioprio = 0, - .fd = old_dir, - .off = @intFromPtr(new_path), - .addr = @intFromPtr(old_path), - .len = @bitCast(new_dir), - .rw_flags = @bitCast(flags), - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .ACCES => return error.AccessDenied, - .PERM => return error.PermissionDenied, - .BUSY => return error.FileBusy, - .DQUOT => return error.DiskQuota, - .ISDIR => return error.IsDir, - .IO => return error.HardwareFailure, - .LOOP => return error.SymLinkLoop, - .MLINK => return error.LinkQuotaExceeded, - .NAMETOOLONG => return error.NameTooLong, - .NOENT => return error.FileNotFound, - .NOTDIR => return error.NotDir, - .NOMEM => return error.SystemResources, - .NOSPC => return error.NoSpaceLeft, - .EXIST => return error.DirNotEmpty, - .NOTEMPTY => return error.DirNotEmpty, - .ROFS => return error.ReadOnlyFileSystem, - .XDEV => return error.CrossDevice, - .ILSEQ => return error.BadPathName, - .FAULT => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), - else => |err| return unexpectedErrno(err), - } - } -} - -fn setsockopt( - ev: *Evented, - cancel_region: *CancelRegion, - fd: fd_t, - level: i32, - opt_name: u32, - option: u32, -) !void { - const o: []const u8 = @ptrCast(&option); - while (true) { - const off: extern struct { - cmd_op: linux.IO_URING_SOCKET_OP, - pad: u32, - } align(@alignOf(u64)) = .{ - .cmd_op = .SETSOCKOPT, - .pad = 0, - }; - const addr: extern struct { level: i32, opt_name: u32 } align(@alignOf(u64)) = .{ - .level = level, - .opt_name = opt_name, - }; - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .URING_CMD, - .flags = 0, - .ioprio = 0, - .fd = fd, - .off = @as(*const u64, @ptrCast(&off)).*, - .addr = @as(*const u64, @ptrCast(&addr)).*, - .len = 0, - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = @intCast(o.len), - .addr3 = @intFromPtr(o.ptr), - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return, - .INTR, .CANCELED => continue, - .BADF => |err| return errnoBug(err), // File descriptor used after closed. - .NOTSOCK => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), - .FAULT => |err| return errnoBug(err), - else => |err| return unexpectedErrno(err), - } - } -} - -fn socket( - ev: *Evented, - cancel_region: *CancelRegion, - family: linux.sa_family_t, - options: net.IpAddress.BindOptions, -) error{ - AddressFamilyUnsupported, - ProtocolUnsupportedBySystem, - ProcessFdQuotaExceeded, - SystemFdQuotaExceeded, - SystemResources, - ProtocolUnsupportedByAddressFamily, - SocketModeUnsupported, - OptionUnsupported, - Unexpected, - Canceled, -}!fd_t { - const mode = posixSocketMode(options.mode); - const protocol = posixProtocol(options.protocol); - const socket_fd = while (true) { - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .SOCKET, - .flags = 0, - .ioprio = 0, - .fd = family, - .off = mode | linux.SOCK.CLOEXEC, - .addr = 0, - .len = protocol, - .rw_flags = 0, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - const completion = cancel_region.completion(); - switch (completion.errno()) { - .SUCCESS => break completion.result, - .INTR, .CANCELED => continue, - .AFNOSUPPORT => return error.AddressFamilyUnsupported, - .INVAL => return error.ProtocolUnsupportedBySystem, - .MFILE => return error.ProcessFdQuotaExceeded, - .NFILE => return error.SystemFdQuotaExceeded, - .NOBUFS => return error.SystemResources, - .NOMEM => return error.SystemResources, - .PROTONOSUPPORT => return error.ProtocolUnsupportedByAddressFamily, - .PROTOTYPE => return error.SocketModeUnsupported, - else => |err| return unexpectedErrno(err), - } - }; - errdefer ev.close(cancel_region, socket_fd); - - if (options.ip6_only) { - if (linux.IPV6 == void) return error.OptionUnsupported; - try ev.setsockopt(cancel_region, socket_fd, linux.IPPROTO.IPV6, linux.IPV6.V6ONLY, 0); - } - - return socket_fd; -} - -fn stat(ev: *Evented, cancel_region: *CancelRegion, fd: fd_t) Dir.StatError!Dir.Stat { - return ev.statx(cancel_region, fd, "", linux.AT.EMPTY_PATH) catch |err| switch (err) { - error.BadPathName, error.NameTooLong => unreachable, // path is empty - error.AccessDenied => return errnoBug(.ACCES), - error.SymLinkLoop => return errnoBug(.LOOP), - error.FileNotFound => return errnoBug(.NOENT), - error.NotDir => return errnoBug(.NOTDIR), - else => |e| return e, - }; -} - -fn statx( - ev: *Evented, - cancel_region: *CancelRegion, - dir: fd_t, - path: [*:0]const u8, - flags: u32, -) (Dir.StatError || Dir.PathNameError || error{ FileNotFound, NotDir, SymLinkLoop })!Dir.Stat { - while (true) { - var statx_buf = std.mem.zeroes(linux.Statx); - const thread = try cancel_region.awaitIoUring(); - thread.enqueue().* = .{ - .opcode = .STATX, - .flags = 0, - .ioprio = 0, - .fd = dir, - .off = @intFromPtr(&statx_buf), - .addr = @intFromPtr(path), - .len = @bitCast(linux_statx_request), - .rw_flags = flags, - .user_data = @intFromPtr(cancel_region.fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - ev.yield(null, .nothing); - switch (cancel_region.errno()) { - .SUCCESS => return statFromLinux(&statx_buf), - .INTR, .CANCELED => continue, - .ACCES => return error.AccessDenied, - .BADF => |err| return errnoBug(err), // File descriptor used after closed. - .FAULT => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), - .LOOP => return error.SymLinkLoop, - .NAMETOOLONG => |err| return errnoBug(err), - .NOENT => return error.FileNotFound, - .NOTDIR => return error.NotDir, - .NOMEM => return error.SystemResources, - else => |err| return unexpectedErrno(err), - } - } -} - -fn urandomReadAll( - ev: *Evented, - cancel_region: *CancelRegion, - buffer: []u8, -) (File.OpenError || File.Reader.Error || error{EndOfStream})!void { - return ev.readAll(cancel_region, try ev.random_fd.open(ev, cancel_region, "/dev/urandom", .{ - .ACCMODE = .RDONLY, - .CLOEXEC = true, - }), buffer); -} - -fn utimensat( - ev: *Evented, - sync: *CancelRegion.Sync, - dir: fd_t, - path: [*:0]const u8, - times: ?*const [2]linux.timespec, - flags: u32, -) File.SetTimestampsError!void { - _ = ev; - while (true) { - try sync.cancel_region.await(.nothing); - switch (linux.errno(linux.utimensat(dir, path, times, flags))) { - .SUCCESS => return, - .INTR => continue, - .BADF => |err| return errnoBug(err), // always a race condition - .FAULT => |err| return errnoBug(err), - .INVAL => |err| return errnoBug(err), - .ACCES => return error.AccessDenied, - .PERM => return error.PermissionDenied, - .ROFS => return error.ReadOnlyFileSystem, - else => |err| return unexpectedErrno(err), - } - } -} - -fn writeAll( - ev: *Evented, - cancel_region: *CancelRegion, - fd: fd_t, - buffer: []const u8, -) (File.Writer.Error || error{EndOfStream})!void { - var index: usize = 0; - while (buffer.len - index != 0) { - const len = try ev.pwritev(cancel_region, fd, &.{ - .{ .base = buffer[index..].ptr, .len = buffer.len - index }, - }, null); - if (len == 0) return error.EndOfStream; - index += len; - } -} - -test { - _ = Fiber.CancelProtection; -} diff --git a/lib/std/Io/Kqueue.zig b/lib/std/Io/Kqueue.zig @@ -31,8 +31,8 @@ const changes_buffer_len = 64; const Thread = struct { thread: std.Thread, - idle_context: Context, - current_context: *Context, + idle_context: Io.fiber.Context, + current_context: *Io.fiber.Context, ready_queue: ?*Fiber, kq_fd: posix.fd_t, idle_search_index: u32, @@ -74,7 +74,7 @@ const Thread = struct { const Fiber = struct { required_align: void align(4), - context: Context, + context: Io.fiber.Context, awaiter: ?*Fiber, queue_next: ?*Fiber, cancel_thread: ?*Thread, @@ -291,12 +291,12 @@ fn yield(k: *Kqueue, maybe_ready_fiber: ?*Fiber, pending_task: SwitchMessage.Pen &thread.idle_context; const message: SwitchMessage = .{ .contexts = .{ - .prev = thread.current_context, - .ready = ready_context, + .old = thread.current_context, + .new = ready_context, }, .pending_task = pending_task, }; - std.log.debug("switching from {*} to {*}", .{ message.contexts.prev, message.contexts.ready }); + std.log.debug("switching from {*} to {*}", .{ message.contexts.old, message.contexts.new }); contextSwitch(&message).handle(k); } @@ -393,7 +393,7 @@ fn schedule(k: *Kqueue, thread: *Thread, ready_queue: Fiber.Queue) void { )) |old_head| ready_queue.tail.queue_next = old_head; } -fn mainIdle(k: *Kqueue, message: *const SwitchMessage) callconv(.withStackAlign(.c, @max(@alignOf(Thread), @alignOf(Context)))) noreturn { +fn mainIdle(k: *Kqueue, message: *const SwitchMessage) callconv(.withStackAlign(.c, @max(@alignOf(Thread), @alignOf(Io.fiber.Context)))) noreturn { message.handle(k); k.idle(&k.threads.allocated[0]); k.yield(@ptrCast(&k.main_fiber_buffer), .nothing); @@ -483,10 +483,7 @@ fn idle(k: *Kqueue, thread: *Thread) void { } const SwitchMessage = struct { - contexts: extern struct { - prev: *Context, - ready: *Context, - }, + contexts: Io.fiber.Switch, pending_task: PendingTask, const PendingTask = union(enum) { @@ -500,11 +497,11 @@ const SwitchMessage = struct { fn handle(message: *const SwitchMessage, k: *Kqueue) void { const thread: *Thread = .current(); - thread.current_context = message.contexts.ready; + thread.current_context = message.contexts.new; switch (message.pending_task) { .nothing => {}, - .reschedule => if (message.contexts.prev != &thread.idle_context) { - const prev_fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); + .reschedule => if (message.contexts.old != &thread.idle_context) { + const prev_fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); assert(prev_fiber.queue_next == null); k.schedule(thread, .{ .head = prev_fiber, .tail = prev_fiber }); }, @@ -512,13 +509,13 @@ const SwitchMessage = struct { k.recycle(fiber); }, .register_awaiter => |awaiter| { - const prev_fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); + const prev_fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); assert(prev_fiber.queue_next == null); if (@atomicRmw(?*Fiber, awaiter, .Xchg, prev_fiber, .acq_rel) == Fiber.finished) k.schedule(thread, .{ .head = prev_fiber, .tail = prev_fiber }); }, .register_select => |futures| { - const prev_fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); + const prev_fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); assert(prev_fiber.queue_next == null); for (futures) |any_future| { const future_fiber: *Fiber = @ptrCast(@alignCast(any_future)); @@ -550,195 +547,8 @@ const SwitchMessage = struct { } }; -const Context = switch (builtin.cpu.arch) { - .aarch64 => extern struct { - sp: u64, - fp: u64, - pc: u64, - }, - .x86_64 => extern struct { - rsp: u64, - rbp: u64, - rip: u64, - }, - else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), -}; - inline fn contextSwitch(message: *const SwitchMessage) *const SwitchMessage { - return @fieldParentPtr("contexts", switch (builtin.cpu.arch) { - .aarch64 => asm volatile ( - \\ ldp x0, x2, [x1] - \\ ldr x3, [x2, #16] - \\ mov x4, sp - \\ stp x4, fp, [x0] - \\ adr x5, 0f - \\ ldp x4, fp, [x2] - \\ str x5, [x0, #16] - \\ mov sp, x4 - \\ br x3 - \\0: - : [received_message] "={x1}" (-> *const @FieldType(SwitchMessage, "contexts")), - : [message_to_send] "{x1}" (&message.contexts), - : .{ - .x0 = true, - .x1 = true, - .x2 = true, - .x3 = true, - .x4 = true, - .x5 = true, - .x6 = true, - .x7 = true, - .x8 = true, - .x9 = true, - .x10 = true, - .x11 = true, - .x12 = true, - .x13 = true, - .x14 = true, - .x15 = true, - .x16 = true, - .x17 = true, - .x19 = true, - .x20 = true, - .x21 = true, - .x22 = true, - .x23 = true, - .x24 = true, - .x25 = true, - .x26 = true, - .x27 = true, - .x28 = true, - .x30 = true, - .z0 = true, - .z1 = true, - .z2 = true, - .z3 = true, - .z4 = true, - .z5 = true, - .z6 = true, - .z7 = true, - .z8 = true, - .z9 = true, - .z10 = true, - .z11 = true, - .z12 = true, - .z13 = true, - .z14 = true, - .z15 = true, - .z16 = true, - .z17 = true, - .z18 = true, - .z19 = true, - .z20 = true, - .z21 = true, - .z22 = true, - .z23 = true, - .z24 = true, - .z25 = true, - .z26 = true, - .z27 = true, - .z28 = true, - .z29 = true, - .z30 = true, - .z31 = true, - .p0 = true, - .p1 = true, - .p2 = true, - .p3 = true, - .p4 = true, - .p5 = true, - .p6 = true, - .p7 = true, - .p8 = true, - .p9 = true, - .p10 = true, - .p11 = true, - .p12 = true, - .p13 = true, - .p14 = true, - .p15 = true, - .fpcr = true, - .fpsr = true, - .ffr = true, - .memory = true, - }), - .x86_64 => asm volatile ( - \\ movq 0(%%rsi), %%rax - \\ movq 8(%%rsi), %%rcx - \\ leaq 0f(%%rip), %%rdx - \\ movq %%rsp, 0(%%rax) - \\ movq %%rbp, 8(%%rax) - \\ movq %%rdx, 16(%%rax) - \\ movq 0(%%rcx), %%rsp - \\ movq 8(%%rcx), %%rbp - \\ jmpq *16(%%rcx) - \\0: - : [received_message] "={rsi}" (-> *const @FieldType(SwitchMessage, "contexts")), - : [message_to_send] "{rsi}" (&message.contexts), - : .{ - .rax = true, - .rcx = true, - .rdx = true, - .rbx = true, - .rsi = true, - .rdi = true, - .r8 = true, - .r9 = true, - .r10 = true, - .r11 = true, - .r12 = true, - .r13 = true, - .r14 = true, - .r15 = true, - .mm0 = true, - .mm1 = true, - .mm2 = true, - .mm3 = true, - .mm4 = true, - .mm5 = true, - .mm6 = true, - .mm7 = true, - .zmm0 = true, - .zmm1 = true, - .zmm2 = true, - .zmm3 = true, - .zmm4 = true, - .zmm5 = true, - .zmm6 = true, - .zmm7 = true, - .zmm8 = true, - .zmm9 = true, - .zmm10 = true, - .zmm11 = true, - .zmm12 = true, - .zmm13 = true, - .zmm14 = true, - .zmm15 = true, - .zmm16 = true, - .zmm17 = true, - .zmm18 = true, - .zmm19 = true, - .zmm20 = true, - .zmm21 = true, - .zmm22 = true, - .zmm23 = true, - .zmm24 = true, - .zmm25 = true, - .zmm26 = true, - .zmm27 = true, - .zmm28 = true, - .zmm29 = true, - .zmm30 = true, - .zmm31 = true, - .fpsr = true, - .fpcr = true, - .mxcsr = true, - .rflags = true, - .dirflag = true, - .memory = true, - }), - else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), - }); + return @fieldParentPtr("contexts", Io.fiber.contextSwitch(&message.contexts)); } fn mainIdleEntry() callconv(.naked) void { diff --git a/lib/std/Io/Reader.zig b/lib/std/Io/Reader.zig @@ -375,7 +375,7 @@ pub fn appendRemainingAligned( defer list.* = a.toArrayListAligned(alignment); var remaining = limit; - while (remaining.nonzero()) { + while (remaining != .nothing) { const n = stream(r, &a.writer, remaining) catch |err| switch (err) { error.EndOfStream => return, error.WriteFailed => return error.OutOfMemory, diff --git a/lib/std/Io/Threaded.zig b/lib/std/Io/Threaded.zig @@ -72,6 +72,9 @@ stderr_mutex_locker: std.Thread.Id = Thread.invalid_id, stderr_mutex_lock_count: usize = 0, argv0: Argv0, +/// Protected by `mutex`. Determines whether `environ` has been +/// memoized based on `process_environ`. +environ_initialized: bool, environ: Environ, null_file: NullFile = .{}, @@ -125,9 +128,6 @@ pub const Argv0 = switch (native_os) { pub const Environ = struct { /// Unmodified data directly from the OS. process_environ: process.Environ, - /// Protected by `mutex`. Determines whether the other fields have been - /// memoized based on `process_environ`. - initialized: bool = false, /// Protected by `mutex`. Memoized based on `process_environ`. Tracks whether the /// environment variables are present, ignoring their value. exist: Exist = .{}, @@ -161,9 +161,6 @@ pub const Environ = struct { }; pub fn scan(environ: *Environ, allocator: std.mem.Allocator) void { - if (environ.initialized) return; - environ.initialized = true; - if (is_windows) { // This value expires with any call that modifies the environment, // which is outside of this Io implementation's control, so references @@ -1589,6 +1586,7 @@ pub fn init( .old_sig_pipe = undefined, .have_signal_handler = init_single_threaded.have_signal_handler, .argv0 = options.argv0, + .environ_initialized = options.environ.block.isEmpty(), .environ = .{ .process_environ = options.environ }, .worker_threads = init_single_threaded.worker_threads, .disable_memory_mapping = options.disable_memory_mapping, @@ -1606,6 +1604,7 @@ pub fn init( .old_sig_pipe = undefined, .have_signal_handler = false, .argv0 = options.argv0, + .environ_initialized = options.environ.block.isEmpty(), .environ = .{ .process_environ = options.environ }, .worker_threads = .init(null), .disable_memory_mapping = options.disable_memory_mapping, @@ -1643,9 +1642,8 @@ pub const init_single_threaded: Threaded = .{ .old_sig_pipe = undefined, .have_signal_handler = false, .argv0 = .empty, - .environ = .{ .process_environ = .{ - .block = if (process.Environ.Block == process.Environ.GlobalBlock) .global else .empty, - } }, + .environ_initialized = true, + .environ = .empty, .worker_threads = .init(null), .disable_memory_mapping = false, }; @@ -1768,6 +1766,8 @@ pub fn io(t: *Threaded) Io { return .{ .userdata = t, .vtable = &.{ + .crashHandler = crashHandler, + .async = async, .concurrent = concurrent, .await = await, @@ -1932,6 +1932,8 @@ pub fn ioBasic(t: *Threaded) Io { return .{ .userdata = t, .vtable = &.{ + .crashHandler = crashHandler, + .async = async, .concurrent = concurrent, .await = await, @@ -2157,6 +2159,14 @@ const use_libc_getrandom = std.c.versionCheck(if (builtin.abi.isAndroid()) .{ const use_dev_urandom = @TypeOf(posix.system.getrandom) == void and native_os == .linux; +fn crashHandler(userdata: ?*anyopaque) void { + const t: *Threaded = @ptrCast(@alignCast(userdata)); + _ = t; + const thread = Thread.current orelse return; + thread.status.store(.{ .cancelation = .canceled, .awaitable = .null }, .monotonic); + thread.cancel_protection = .blocked; +} + fn async( userdata: ?*anyopaque, result: []u8, @@ -2838,19 +2848,19 @@ fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout var poll_buffer: [poll_buffer_len]posix.pollfd = undefined; var poll_storage: struct { gpa: std.mem.Allocator, - b: *Io.Batch, + batch: *Io.Batch, slice: []posix.pollfd, len: u32, fn add(storage: *@This(), file: Io.File, events: @FieldType(posix.pollfd, "events")) Io.ConcurrentError!void { const len = storage.len; if (len == poll_buffer_len) { - const slice: []posix.pollfd = if (storage.b.context) |context| - @as([*]posix.pollfd, @ptrCast(@alignCast(context)))[0..storage.b.storage.len] + const slice: []posix.pollfd = if (storage.batch.userdata) |batch_userdata| + @as([*]posix.pollfd, @ptrCast(@alignCast(batch_userdata)))[0..storage.batch.storage.len] else allocation: { - const allocation = storage.gpa.alloc(posix.pollfd, storage.b.storage.len) catch + const allocation = storage.gpa.alloc(posix.pollfd, storage.batch.storage.len) catch return error.ConcurrencyUnavailable; - storage.b.context = allocation.ptr; + storage.batch.userdata = allocation.ptr; break :allocation allocation; }; @memcpy(slice[0..poll_buffer_len], storage.slice); @@ -2863,7 +2873,7 @@ fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout }; storage.len = len + 1; } - } = .{ .gpa = t.allocator, .b = b, .slice = &poll_buffer, .len = 0 }; + } = .{ .gpa = t.allocator, .batch = b, .slice = &poll_buffer, .len = 0 }; { var index = b.submitted.head; while (index != .none) { @@ -2962,21 +2972,21 @@ fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout } } -const WindowsBatchPendingOperationContext = extern struct { +const WindowsBatchOperationUserdata = extern struct { file: windows.HANDLE, iosb: windows.IO_STATUS_BLOCK, - const Erased = Io.Operation.Storage.Pending.Context; + const Erased = Io.Operation.Storage.Pending.Userdata; comptime { - assert(@sizeOf(Erased) <= @sizeOf(WindowsBatchPendingOperationContext)); + assert(@sizeOf(WindowsBatchOperationUserdata) <= @sizeOf(Erased)); } - fn toErased(context: *WindowsBatchPendingOperationContext) *Erased { - return @ptrCast(context); + fn toErased(userdata: *WindowsBatchOperationUserdata) *Erased { + return @ptrCast(userdata); } - fn fromErased(erased: *Erased) *WindowsBatchPendingOperationContext { + fn fromErased(erased: *Erased) *WindowsBatchOperationUserdata { return @ptrCast(erased); } }; @@ -2989,15 +2999,16 @@ fn batchCancel(userdata: ?*anyopaque, b: *Io.Batch) void { var index = b.pending.head; while (index != .none) { const pending = &b.storage[index.toIndex()].pending; - const context: *WindowsBatchPendingOperationContext = .fromErased(&pending.context); + const operation_userdata: *WindowsBatchOperationUserdata = .fromErased(&pending.userdata); var cancel_iosb: windows.IO_STATUS_BLOCK = undefined; - _ = windows.ntdll.NtCancelIoFileEx(context.file, &context.iosb, &cancel_iosb); + _ = windows.ntdll.NtCancelIoFileEx(operation_userdata.file, &operation_userdata.iosb, &cancel_iosb); index = pending.node.next; } while (b.pending.head != .none) waitForApcOrAlert(); - } else if (b.context) |context| { - t.allocator.free(@as([*]posix.pollfd, @ptrCast(@alignCast(context)))[0..b.storage.len]); - b.context = null; + } else if (b.userdata) |batch_userdata| { + const poll_storage: [*]posix.pollfd = @ptrCast(@alignCast(batch_userdata)); + t.allocator.free(poll_storage[0..b.storage.len]); + b.userdata = null; } } @@ -3007,9 +3018,9 @@ fn batchApc( _: windows.ULONG, ) callconv(.winapi) void { const b: *Io.Batch = @ptrCast(@alignCast(apc_context)); - const context: *WindowsBatchPendingOperationContext = @fieldParentPtr("iosb", iosb); - const erased_context = context.toErased(); - const pending: *Io.Operation.Storage.Pending = @fieldParentPtr("context", erased_context); + const operation_userdata: *WindowsBatchOperationUserdata = @fieldParentPtr("iosb", iosb); + const erased_userdata = operation_userdata.toErased(); + const pending: *Io.Operation.Storage.Pending = @fieldParentPtr("userdata", erased_userdata); switch (pending.node.prev) { .none => b.pending.head = pending.node.next, else => |prev_index| b.storage[prev_index.toIndex()].pending.node.next = pending.node.next, @@ -3019,24 +3030,23 @@ fn batchApc( else => |next_index| b.storage[next_index.toIndex()].pending.node.prev = pending.node.prev, } const storage: *Io.Operation.Storage = @fieldParentPtr("pending", pending); - const index = storage - b.storage.ptr; + const index: Io.Operation.OptionalIndex = .fromIndex(storage - b.storage.ptr); switch (iosb.u.Status) { .CANCELLED => { const tail_index = b.unused.tail; switch (tail_index) { - .none => b.unused.head = .fromIndex(index), - else => b.storage[tail_index.toIndex()].unused.next = .fromIndex(index), + .none => b.unused.head = index, + else => b.storage[tail_index.toIndex()].unused.next = index, } storage.* = .{ .unused = .{ .prev = tail_index, .next = .none } }; - b.unused.tail = .fromIndex(index); + b.unused.tail = index; }, else => { switch (b.completed.tail) { - .none => b.completed.head = .fromIndex(index), - else => |tail_index| b.storage[tail_index.toIndex()].completion.node.next = - .fromIndex(index), + .none => b.completed.head = index, + else => |tail_index| b.storage[tail_index.toIndex()].completion.node.next = index, } - b.completed.tail = .fromIndex(index); + b.completed.tail = index; const result: Io.Operation.Result = switch (pending.tag) { .file_read_streaming => .{ .file_read_streaming = ntReadFileResult(iosb) }, .file_write_streaming => .{ .file_write_streaming = ntWriteFileResult(iosb) }, @@ -3057,38 +3067,38 @@ fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentErr storage.* = .{ .pending = .{ .node = .{ .prev = b.pending.tail, .next = .none }, .tag = submission.operation, - .context = undefined, + .userdata = undefined, } }; switch (b.pending.tail) { .none => b.pending.head = index, else => |tail_index| b.storage[tail_index.toIndex()].pending.node.next = index, } b.pending.tail = index; - const context: *WindowsBatchPendingOperationContext = .fromErased(&storage.pending.context); + const operation_userdata: *WindowsBatchOperationUserdata = .fromErased(&storage.pending.userdata); errdefer { - context.iosb = .{ .u = .{ .Status = .CANCELLED }, .Information = undefined }; - batchApc(b, &context.iosb, 0); + operation_userdata.iosb = .{ .u = .{ .Status = .CANCELLED }, .Information = undefined }; + batchApc(b, &operation_userdata.iosb, 0); } switch (submission.operation) { .file_read_streaming => |o| o: { var data_index: usize = 0; while (o.data.len - data_index != 0 and o.data[data_index].len == 0) data_index += 1; if (o.data.len - data_index == 0) { - context.iosb = .{ .u = .{ .Status = .SUCCESS }, .Information = 0 }; - batchApc(b, &context.iosb, 0); + operation_userdata.iosb = .{ .u = .{ .Status = .SUCCESS }, .Information = 0 }; + batchApc(b, &operation_userdata.iosb, 0); break :o; } const buffer = o.data[data_index]; const short_buffer_len = std.math.lossyCast(u32, buffer.len); if (o.file.flags.nonblocking) { - context.file = o.file.handle; + operation_userdata.file = o.file.handle; switch (windows.ntdll.NtReadFile( o.file.handle, null, // event &batchApc, b, - &context.iosb, + &operation_userdata.iosb, buffer.ptr, short_buffer_len, null, // byte offset @@ -3097,8 +3107,8 @@ fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentErr .PENDING, .SUCCESS => {}, .CANCELLED => unreachable, else => |status| { - context.iosb.u.Status = status; - batchApc(b, &context.iosb, 0); + operation_userdata.iosb.u.Status = status; + batchApc(b, &operation_userdata.iosb, 0); }, } } else { @@ -3110,7 +3120,7 @@ fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentErr null, // event null, // APC routine null, // APC context - &context.iosb, + &operation_userdata.iosb, buffer.ptr, short_buffer_len, null, // byte offset @@ -3123,9 +3133,8 @@ fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentErr }, else => |status| { syscall.finish(); - - context.iosb.u.Status = status; - batchApc(b, &context.iosb, 0); + operation_userdata.iosb.u.Status = status; + batchApc(b, &operation_userdata.iosb, 0); break; }, }; @@ -3134,18 +3143,18 @@ fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentErr .file_write_streaming => |o| o: { const buffer = windowsWriteBuffer(o.header, o.data, o.splat); if (buffer.len == 0) { - context.iosb = .{ .u = .{ .Status = .SUCCESS }, .Information = 0 }; - batchApc(b, &context.iosb, 0); + operation_userdata.iosb = .{ .u = .{ .Status = .SUCCESS }, .Information = 0 }; + batchApc(b, &operation_userdata.iosb, 0); break :o; } if (o.file.flags.nonblocking) { - context.file = o.file.handle; + operation_userdata.file = o.file.handle; switch (windows.ntdll.NtWriteFile( o.file.handle, null, // event &batchApc, b, - &context.iosb, + &operation_userdata.iosb, buffer.ptr, @intCast(buffer.len), null, // byte offset @@ -3154,8 +3163,8 @@ fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentErr .PENDING, .SUCCESS => {}, .CANCELLED => unreachable, else => |status| { - context.iosb.u.Status = status; - batchApc(b, &context.iosb, 0); + operation_userdata.iosb.u.Status = status; + batchApc(b, &operation_userdata.iosb, 0); }, } } else { @@ -3167,7 +3176,7 @@ fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentErr null, // event null, // APC routine null, // APC context - &context.iosb, + &operation_userdata.iosb, buffer.ptr, @intCast(buffer.len), null, // byte offset @@ -3180,9 +3189,8 @@ fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentErr }, else => |status| { syscall.finish(); - - context.iosb.u.Status = status; - batchApc(b, &context.iosb, 0); + operation_userdata.iosb.u.Status = status; + batchApc(b, &operation_userdata.iosb, 0); break; }, }; @@ -3194,13 +3202,13 @@ fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentErr else => &windows.ntdll.NtDeviceIoControlFile, }; if (o.file.flags.nonblocking) { - context.file = o.file.handle; + operation_userdata.file = o.file.handle; switch (NtControlFile( o.file.handle, null, // event &batchApc, b, - &context.iosb, + &operation_userdata.iosb, o.code, if (o.in.len > 0) o.in.ptr else null, @intCast(o.in.len), @@ -3210,8 +3218,8 @@ fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentErr .PENDING, .SUCCESS => {}, .CANCELLED => unreachable, else => |status| { - context.iosb.u.Status = status; - batchApc(b, &context.iosb, 0); + operation_userdata.iosb.u.Status = status; + batchApc(b, &operation_userdata.iosb, 0); }, } } else { @@ -3223,7 +3231,7 @@ fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentErr null, // event null, // APC routine null, // APC context - &context.iosb, + &operation_userdata.iosb, o.code, if (o.in.len > 0) o.in.ptr else null, @intCast(o.in.len), @@ -3237,9 +3245,8 @@ fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentErr }, else => |status| { syscall.finish(); - - context.iosb.u.Status = status; - batchApc(b, &context.iosb, 0); + operation_userdata.iosb.u.Status = status; + batchApc(b, &operation_userdata.iosb, 0); break; }, }; @@ -3816,7 +3823,13 @@ fn filePathKind(t: *Threaded, dir: Dir, sub_path: []const u8) !File.Kind { const syscall: Syscall = try .start(); while (true) { var statx = std.mem.zeroes(linux.Statx); - switch (linux.errno(linux.statx(dir.handle, sub_path_posix, 0, .{ .TYPE = true }, &statx))) { + switch (linux.errno(linux.statx( + dir.handle, + sub_path_posix, + linux.AT.NO_AUTOMOUNT | linux.AT.SYMLINK_NOFOLLOW, + .{ .TYPE = true }, + &statx, + ))) { .SUCCESS => { syscall.finish(); if (!statx.mask.TYPE) return error.Unexpected; @@ -3832,7 +3845,7 @@ fn filePathKind(t: *Threaded, dir: Dir, sub_path: []const u8) !File.Kind { } } - const stat = try dirStatFile(t, dir, sub_path, .{}); + const stat = try dirStatFile(t, dir, sub_path, .{ .follow_symlinks = false }); return stat.kind; } @@ -13573,13 +13586,13 @@ fn netWriteWindows( addWsaBuf(&iovecs, &len, header); for (data[0 .. data.len - 1]) |bytes| addWsaBuf(&iovecs, &len, bytes); const pattern = data[data.len - 1]; + var backup_buffer: [64]u8 = undefined; if (iovecs.len - len != 0) switch (splat) { 0 => {}, 1 => addWsaBuf(&iovecs, &len, pattern), else => switch (pattern.len) { 0 => {}, 1 => { - var backup_buffer: [64]u8 = undefined; const splat_buffer = &backup_buffer; const memset_len = @min(splat_buffer.len, splat); const buf = splat_buffer[0..memset_len]; @@ -14519,7 +14532,7 @@ pub fn statFromLinux(stx: *const std.os.linux.Statx) Io.UnexpectedError!File.Sta }; } -fn statxKind(stx_mode: u16) File.Kind { +pub fn statxKind(stx_mode: u16) File.Kind { return switch (stx_mode & std.os.linux.S.IFMT) { std.os.linux.S.IFDIR => .directory, std.os.linux.S.IFCHR => .character_device, @@ -14532,7 +14545,7 @@ fn statxKind(stx_mode: u16) File.Kind { }; } -fn statFromPosix(st: *const posix.Stat) File.Stat { +pub fn statFromPosix(st: *const posix.Stat) File.Stat { const atime = st.atime(); const mtime = st.mtime(); const ctime = st.ctime(); @@ -15116,7 +15129,9 @@ const WindowsEnvironStrings = struct { fn scanEnviron(t: *Threaded) void { mutexLock(&t.mutex); defer mutexUnlock(&t.mutex); + if (t.environ_initialized) return; t.environ.scan(t.allocator); + t.environ_initialized = true; } fn processReplace(userdata: ?*anyopaque, options: process.ReplaceOptions) process.ReplaceError { @@ -15256,7 +15271,7 @@ fn spawnPosix(t: *Threaded, options: process.SpawnOptions) process.SpawnError!Sp // This pipe communicates to the parent errors in the child between `fork` and `execvpe`. // It is closed by the child (via CLOEXEC) without writing if `execvpe` succeeds. - const err_pipe: [2]posix.fd_t = try pipe2(.{ .CLOEXEC = true }); + const err_pipe = try pipe2(.{ .CLOEXEC = true }); errdefer destroyPipe(err_pipe); t.scanEnviron(); // for PATH @@ -15327,7 +15342,7 @@ fn spawnPosix(t: *Threaded, options: process.SpawnOptions) process.SpawnError!Sp } if (options.start_suspended) { - switch (posix.errno(posix.system.kill(posix.system.getpid(), .STOP))) { + switch (posix.errno(posix.system.kill(0, .STOP))) { .SUCCESS => {}, .PERM => forkBail(ep1, error.PermissionDenied), else => forkBail(ep1, error.Unexpected), @@ -15539,15 +15554,15 @@ fn childCleanupWindows(child: *process.Child) void { windows.CloseHandle(child.thread_handle); child.thread_handle = undefined; - if (child.stdin) |*stdin| { + if (child.stdin) |stdin| { windows.CloseHandle(stdin.handle); child.stdin = null; } - if (child.stdout) |*stdout| { + if (child.stdout) |stdout| { windows.CloseHandle(stdout.handle); child.stdout = null; } - if (child.stderr) |*stderr| { + if (child.stderr) |stderr| { windows.CloseHandle(stderr.handle); child.stderr = null; } @@ -15621,7 +15636,7 @@ fn childWaitPosix(child: *process.Child) process.Child.WaitError!process.Child.T }; } -fn statusToTerm(status: u32) process.Child.Term { +pub fn statusToTerm(status: u32) process.Child.Term { return if (posix.W.IFEXITED(status)) .{ .exited = posix.W.EXITSTATUS(status) } else if (posix.W.IFSIGNALED(status)) @@ -15677,15 +15692,15 @@ fn childKillPosix(child: *process.Child) !void { } fn childCleanupPosix(child: *process.Child) void { - if (child.stdin) |*stdin| { + if (child.stdin) |stdin| { closeFd(stdin.handle); child.stdin = null; } - if (child.stdout) |*stdout| { + if (child.stdout) |stdout| { closeFd(stdout.handle); child.stdout = null; } - if (child.stderr) |*stderr| { + if (child.stderr) |stderr| { closeFd(stderr.handle); child.stderr = null; } @@ -15818,21 +15833,57 @@ fn processSpawnWindows(userdata: ?*anyopaque, options: process.SpawnOptions) pro .dwFlags = windows.STARTF_USESTDHANDLES, .hStdInput = switch (options.stdin) { .inherit => peb.ProcessParameters.hStdInput, - .file => |file| file.handle, + .file => |file| try OpenFile(&.{}, .{ + .access_mask = .{ + .STANDARD = .{ .SYNCHRONIZE = true }, + .GENERIC = .{ .READ = true }, + }, + .dir = file.handle, + .sa = &.{ + .nLength = @sizeOf(windows.SECURITY_ATTRIBUTES), + .lpSecurityDescriptor = null, + .bInheritHandle = windows.TRUE, + }, + .creation = .OPEN, + }), .ignore => nul_handle, .pipe => stdin_pipe[1], .close => null, }, .hStdOutput = switch (options.stdout) { .inherit => peb.ProcessParameters.hStdOutput, - .file => |file| file.handle, + .file => |file| try OpenFile(&.{}, .{ + .access_mask = .{ + .STANDARD = .{ .SYNCHRONIZE = true }, + .GENERIC = .{ .WRITE = true }, + }, + .dir = file.handle, + .sa = &.{ + .nLength = @sizeOf(windows.SECURITY_ATTRIBUTES), + .lpSecurityDescriptor = null, + .bInheritHandle = windows.TRUE, + }, + .creation = .OPEN, + }), .ignore => nul_handle, .pipe => stdout_pipe[1], .close => null, }, .hStdError = switch (options.stderr) { .inherit => peb.ProcessParameters.hStdError, - .file => |file| file.handle, + .file => |file| try OpenFile(&.{}, .{ + .access_mask = .{ + .STANDARD = .{ .SYNCHRONIZE = true }, + .GENERIC = .{ .WRITE = true }, + }, + .dir = file.handle, + .sa = &.{ + .nLength = @sizeOf(windows.SECURITY_ATTRIBUTES), + .lpSecurityDescriptor = null, + .bInheritHandle = windows.TRUE, + }, + .creation = .OPEN, + }), .ignore => nul_handle, .pipe => stderr_pipe[1], .close => null, @@ -16030,6 +16081,10 @@ fn processSpawnWindows(userdata: ?*anyopaque, options: process.SpawnOptions) pro .id = piProcInfo.hProcess, .thread_handle = piProcInfo.hThread, .stdin = stdin: switch (options.stdin) { + .file => { + windows.CloseHandle(siStartInfo.hStdInput.?); + break :stdin null; + }, .pipe => { windows.CloseHandle(stdin_pipe[1]); break :stdin .{ .handle = stdin_pipe[0], .flags = .{ .nonblocking = false } }; @@ -16037,6 +16092,10 @@ fn processSpawnWindows(userdata: ?*anyopaque, options: process.SpawnOptions) pro else => null, }, .stdout = stdout: switch (options.stdout) { + .file => { + windows.CloseHandle(siStartInfo.hStdOutput.?); + break :stdout null; + }, .pipe => { windows.CloseHandle(stdout_pipe[1]); break :stdout .{ .handle = stdout_pipe[0], .flags = .{ .nonblocking = true } }; @@ -16044,6 +16103,10 @@ fn processSpawnWindows(userdata: ?*anyopaque, options: process.SpawnOptions) pro else => null, }, .stderr = stderr: switch (options.stderr) { + .file => { + windows.CloseHandle(siStartInfo.hStdError.?); + break :stderr null; + }, .pipe => { windows.CloseHandle(stderr_pipe[1]); break :stderr .{ .handle = stderr_pipe[0], .flags = .{ .nonblocking = true } }; @@ -16054,6 +16117,8 @@ fn processSpawnWindows(userdata: ?*anyopaque, options: process.SpawnOptions) pro }; } +fn inheritFile() windows.HANDLE {} + fn getCngDevice(t: *Threaded) Io.RandomSecureError!windows.HANDLE { { mutexLock(&t.mutex); @@ -17758,11 +17823,7 @@ const parking_futex = struct { waiter.node.next = waking_head; waking_head = &waiter.node; num_removed += 1; - // Signal to `waiter` that they're about to be unparked, in case we're racing with their - // timeout. See corresponding logic in `wake`. - waiter.address = 0; } - _ = bucket.num_waiters.fetchSub(num_removed, .monotonic); } @@ -19056,7 +19117,7 @@ const OpenError = error{ const OpenFileOptions = struct { access_mask: windows.ACCESS_MASK, dir: ?windows.HANDLE = null, - sa: ?*windows.SECURITY_ATTRIBUTES = null, + sa: ?*const windows.SECURITY_ATTRIBUTES = null, share_access: windows.FILE.SHARE = .VALID_FLAGS, creation: windows.FILE.CREATE_DISPOSITION, filter: Filter = .non_directory_only, @@ -19076,10 +19137,10 @@ const OpenFileOptions = struct { /// TODO: inline this logic everywhere and delete this function fn OpenFile(sub_path_w: []const u16, options: OpenFileOptions) OpenError!windows.HANDLE { - if (std.mem.eql(u16, sub_path_w, &[_]u16{'.'}) and options.filter == .non_directory_only) { + if (std.mem.eql(u16, sub_path_w, &.{'.'}) and options.filter == .non_directory_only) { return error.IsDir; } - if (std.mem.eql(u16, sub_path_w, &[_]u16{ '.', '.' }) and options.filter == .non_directory_only) { + if (std.mem.eql(u16, sub_path_w, &.{ '.', '.' }) and options.filter == .non_directory_only) { return error.IsDir; } diff --git a/lib/std/Io/Uring.zig b/lib/std/Io/Uring.zig @@ -0,0 +1,6173 @@ +const addressFromPosix = Io.Threaded.addressFromPosix; +const addressToPosix = Io.Threaded.addressToPosix; +const Alignment = std.mem.Alignment; +const Allocator = std.mem.Allocator; +const Argv0 = Io.Threaded.Argv0; +const assert = std.debug.assert; +const builtin = @import("builtin"); +const ChdirError = Io.Threaded.ChdirError; +const clockToPosix = Io.Threaded.clockToPosix; +const Csprng = Io.Threaded.Csprng; +const default_PATH = Io.Threaded.default_PATH; +const Dir = Io.Dir; +const Environ = Io.Threaded.Environ; +const errnoBug = Io.Threaded.errnoBug; +const Evented = @This(); +const fallbackSeed = Io.Threaded.fallbackSeed; +const fd_t = linux.fd_t; +const File = Io.File; +const Io = std.Io; +const IoUring = linux.IoUring; +const iovec = std.posix.iovec; +const iovec_const = std.posix.iovec_const; +const linux = std.os.linux; +const linux_statx_request = Io.Threaded.linux_statx_request; +const LOCK = std.posix.LOCK; +const log = std.log.scoped(.@"io-uring"); +const max_iovecs_len = Io.Threaded.max_iovecs_len; +const nanosecondsFromPosix = Io.Threaded.nanosecondsFromPosix; +const net = Io.net; +const PATH_MAX = linux.PATH_MAX; +const pathToPosix = Io.Threaded.pathToPosix; +const pid_t = linux.pid_t; +const PosixAddress = Io.Threaded.PosixAddress; +const posixAddressFamily = Io.Threaded.posixAddressFamily; +const posixProtocol = Io.Threaded.posixProtocol; +const posixSocketMode = Io.Threaded.posixSocketMode; +const process = std.process; +const recoverableOsBugDetected = Io.Threaded.recoverableOsBugDetected; +const setTimestampToPosix = Io.Threaded.setTimestampToPosix; +const splat_buffer_size = Io.Threaded.splat_buffer_size; +const statFromLinux = Io.Threaded.statFromLinux; +const statxKind = Io.Threaded.statxKind; +const std = @import("../std.zig"); +const timestampFromPosix = Io.Threaded.timestampFromPosix; +const unexpectedErrno = std.posix.unexpectedErrno; +const winsize = std.posix.winsize; + +const tracy = if (@hasDecl(@import("root"), "tracy")) @import("root").tracy else struct { + const enable = false; + inline fn fiberEnter(fiber: [*:0]const u8) void { + _ = fiber; + } + inline fn fiberLeave() void {} +}; + +/// Empirically saw >128KB being used by the self-hosted backend to panic. +/// Empirically saw glibc complain about 256KB. +const idle_stack_size = 512 * 1024; + +const max_idle_search = 1; +const max_steal_ready_search = 2; +const max_steal_free_search = 4; + +backing_allocator_needs_mutex: bool, +backing_allocator_mutex: Io.Mutex, +/// Does not need to be thread-safe if not used elsewhere. +backing_allocator: Allocator, +main_fiber_buffer: [ + std.mem.alignForward(usize, @sizeOf(Fiber), @alignOf(Completion)) + @sizeOf(Completion) +]u8 align(@max(@alignOf(Fiber), @alignOf(Completion))), +log2_ring_entries: u4, +threads: Thread.List, +sync_limit: ?Io.Semaphore, + +stderr_writer_initialized: bool = false, +stderr_mutex: Io.Mutex, +stderr_writer: File.Writer = .{ + .io = undefined, + .interface = Io.File.Writer.initInterface(&.{}), + .file = .stderr(), + .mode = .streaming, +}, +stderr_mode: Io.Terminal.Mode = .no_color, + +environ_mutex: Io.Mutex, +environ_initialized: bool, +environ: Environ, + +null_fd: CachedFd, +random_fd: CachedFd, + +csprng_mutex: Io.Mutex, +csprng: Csprng, + +const Thread = struct { + required_align: void align(4), + thread: std.Thread, + idle_context: Io.fiber.Context, + current_context: *Io.fiber.Context, + ready_queue: ?*Fiber, + free_queue: ?*Fiber, + io_uring: IoUring, + idle_search_index: u32, + steal_ready_search_index: u32, + steal_free_search_index: u32, + name_arena: if (tracy.enable) std.heap.ArenaAllocator.State else struct {}, + csprng: Csprng, + + threadlocal var self: ?*Thread = null; + + noinline fn current() *Thread { + return self.?; + } + + fn deinit(thread: *Thread, gpa: Allocator) void { + var next_fiber = thread.free_queue; + while (next_fiber) |free_fiber| { + next_fiber = free_fiber.status.free_next; + gpa.free(free_fiber.allocatedSlice()); + } + thread.io_uring.deinit(); + } + + fn currentFiber(thread: *Thread) *Fiber { + assert(thread.current_context != &thread.idle_context); + return @fieldParentPtr("context", thread.current_context); + } + + fn enqueue(thread: *Thread) *linux.io_uring_sqe { + while (true) return thread.io_uring.get_sqe() catch { + thread.submit(); + continue; + }; + } + + fn submit(thread: *Thread) void { + _ = thread.io_uring.submit() catch |err| switch (err) { + error.SignalInterrupt => {}, + else => |e| @panic(@errorName(e)), + }; + } + + const List = struct { + allocated: []Thread, + reserved: u32, + active: u32, + }; +}; + +const Fiber = struct { + required_align: void align(4), + context: Io.fiber.Context, + await_count: i32, + link: union { + awaiter: ?*Fiber, + group: struct { prev: ?*Fiber, next: ?*Fiber }, + }, + status: union(enum) { + queue_next: ?*Fiber, + awaiting_group: Group, + free_next: ?*Fiber, + }, + cancel_status: CancelStatus, + cancel_protection: CancelProtection, + name: if (tracy.enable) [*:0]const u8 else void, + + var next_name: u64 = 0; + + const CancelStatus = packed struct(u32) { + requested: bool, + awaiting: Awaiting, + + const unrequested: CancelStatus = .{ .requested = false, .awaiting = .nothing }; + + const Awaiting = enum(u31) { + nothing = std.math.maxInt(u31), + group = std.math.maxInt(u31) - 1, + select = std.math.maxInt(u31) - 2, + /// An io_uring fd. + _, + + fn subWrap(lhs: Awaiting, rhs: Awaiting) Awaiting { + return @enumFromInt(@intFromEnum(lhs) -% @intFromEnum(rhs)); + } + + fn fromIoUringFd(fd: fd_t) Awaiting { + const awaiting: Awaiting = @enumFromInt(fd); + switch (awaiting) { + .nothing, .group, .select => unreachable, + _ => return awaiting, + } + } + + fn toIoUringFd(awaiting: Awaiting) fd_t { + switch (awaiting) { + .nothing, .group, .select => unreachable, + _ => return @intFromEnum(awaiting), + } + } + }; + + fn changeAwaiting( + cancel_status: *CancelStatus, + old_awaiting: Awaiting, + new_awaiting: Awaiting, + ) bool { + const old_cancel_status = @atomicRmw(CancelStatus, cancel_status, .Add, .{ + .requested = false, + .awaiting = new_awaiting.subWrap(old_awaiting), + }, .monotonic); + assert(old_cancel_status.awaiting == old_awaiting); + return old_cancel_status.requested; + } + }; + + const CancelProtection = packed struct { + user: Io.CancelProtection, + acknowledged: bool, + + const unblocked: CancelProtection = .{ .user = .unblocked, .acknowledged = false }; + + fn check(cancel_protection: CancelProtection) Io.CancelProtection { + return @enumFromInt(@intFromBool(cancel_protection != unblocked)); + } + + fn acknowledge(cancel_protection: *CancelProtection) void { + assert(!cancel_protection.acknowledged); + cancel_protection.acknowledged = true; + } + + fn recancel(cancel_protection: *CancelProtection) void { + assert(cancel_protection.acknowledged); + cancel_protection.acknowledged = false; + } + + test check { + try std.testing.expectEqual(Io.CancelProtection.unblocked, check(.unblocked)); + try std.testing.expectEqual(Io.CancelProtection.blocked, check(.{ + .user = .unblocked, + .acknowledged = true, + })); + try std.testing.expectEqual(Io.CancelProtection.blocked, check(.{ + .user = .blocked, + .acknowledged = false, + })); + try std.testing.expectEqual(Io.CancelProtection.blocked, check(.{ + .user = .blocked, + .acknowledged = true, + })); + } + }; + + const finished: ?*Fiber = @ptrFromInt(@alignOf(Fiber)); + + const max_result_align: Alignment = .@"16"; + const max_result_size = max_result_align.forward(512); + /// This includes any stack realignments that need to happen, and also the + /// initial frame return address slot and argument frame, depending on target. + const min_stack_size = 60 * 1024 * 1024; + const max_context_align: Alignment = .@"16"; + const max_context_size = max_context_align.forward(1024); + const max_closure_size: usize = @sizeOf(AsyncClosure); + const max_closure_align: Alignment = .of(AsyncClosure); + const allocation_size = std.mem.alignForward( + usize, + max_closure_align.max(max_context_align).forward( + max_result_align.forward(@sizeOf(Fiber)) + max_result_size + min_stack_size, + ) + max_closure_size + max_context_size, + std.heap.page_size_max, + ); + comptime { + assert(max_result_align.compare(.gte, .of(Completion))); + assert(max_result_size >= @sizeOf(Completion)); + } + + fn create(ev: *Evented) error{OutOfMemory}!*Fiber { + const thread: *Thread = .current(); + if (@atomicRmw(?*Fiber, &thread.free_queue, .Xchg, finished, .acquire)) |free_fiber| { + assert(free_fiber != finished); + @atomicStore(?*Fiber, &thread.free_queue, free_fiber.status.free_next, .release); + return free_fiber; + } + const active_threads = @atomicLoad(u32, &ev.threads.active, .acquire); + for (0..@min(max_steal_free_search, active_threads)) |_| { + defer thread.steal_free_search_index += 1; + if (thread.steal_free_search_index == active_threads) thread.steal_free_search_index = 0; + const steal_free_search_thread = + &ev.threads.allocated[0..active_threads][thread.steal_free_search_index]; + if (steal_free_search_thread == thread) continue; + const free_fiber = + @atomicLoad(?*Fiber, &steal_free_search_thread.free_queue, .monotonic) orelse continue; + if (free_fiber == finished) continue; + if (@cmpxchgWeak( + ?*Fiber, + &steal_free_search_thread.free_queue, + free_fiber, + null, + .acquire, + .monotonic, + )) |_| continue; + @atomicStore(?*Fiber, &thread.free_queue, free_fiber.status.free_next, .release); + return free_fiber; + } + @atomicStore(?*Fiber, &thread.free_queue, null, .monotonic); + return @ptrCast(try ev.allocator().alignedAlloc(u8, .of(Fiber), allocation_size)); + } + + fn destroy(fiber: *Fiber) void { + const thread: *Thread = .current(); + assert(fiber.status.queue_next == null); + fiber.status = .{ .free_next = @atomicLoad(?*Fiber, &thread.free_queue, .acquire) }; + while (true) fiber.status.free_next = @cmpxchgWeak( + ?*Fiber, + &thread.free_queue, + fiber.status.free_next, + fiber, + .acq_rel, + .acquire, + ) orelse break; + } + + fn allocatedSlice(f: *Fiber) []align(@alignOf(Fiber)) u8 { + return @as([*]align(@alignOf(Fiber)) u8, @ptrCast(f))[0..allocation_size]; + } + + fn allocatedEnd(f: *Fiber) [*]u8 { + const allocated_slice = f.allocatedSlice(); + return allocated_slice[allocated_slice.len..].ptr; + } + + fn resultPointer(f: *Fiber, comptime Result: type) *Result { + return @ptrCast(@alignCast(f.resultBytes(.of(Result)))); + } + + fn resultBytes(f: *Fiber, alignment: Alignment) [*]u8 { + return @ptrFromInt(alignment.forward(@intFromPtr(f) + @sizeOf(Fiber))); + } + + const Queue = struct { head: *Fiber, tail: *Fiber }; + + /// Like a `*Fiber`, but 2 bits smaller than a pointer (because the LSBs are always 0 due to + /// alignment) so that those two bits can be used in a `packed struct`. + const PackedPtr = enum(@Int(.unsigned, @bitSizeOf(usize) - 2)) { + null = 0, + all_ones = std.math.maxInt(@Int(.unsigned, @bitSizeOf(usize) - 2)), + _, + + const Split = packed struct(usize) { low: u2, high: PackedPtr }; + fn pack(ptr: ?*Fiber) PackedPtr { + const split: Split = @bitCast(@intFromPtr(ptr)); + assert(split.low == 0); + return split.high; + } + fn unpack(ptr: PackedPtr) ?*Fiber { + const split: Split = .{ .low = 0, .high = ptr }; + return @ptrFromInt(@as(usize, @bitCast(split))); + } + }; + + fn requestCancel(fiber: *Fiber, ev: *Evented) void { + const cancel_status = @atomicRmw( + Fiber.CancelStatus, + &fiber.cancel_status, + .Or, + .{ .requested = true, .awaiting = @enumFromInt(0) }, + .acquire, + ); + assert(!cancel_status.requested); + switch (cancel_status.awaiting) { + .nothing => {}, + .group => { + // The awaiter received a cancelation request while awaiting a group, + // so propagate the cancelation to the group. + if (fiber.status.awaiting_group.cancel(ev, null)) { + fiber.status = .{ .queue_next = null }; + _ = ev.schedule(.current(), .{ .head = fiber, .tail = fiber }); + } + }, + .select => if (@atomicRmw(i32, &fiber.await_count, .Add, 1, .monotonic) == -1) { + _ = ev.schedule(.current(), .{ .head = fiber, .tail = fiber }); + }, + _ => |awaiting| { + const awaiting_io_uring_fd = awaiting.toIoUringFd(); + const thread: *Thread = .current(); + thread.enqueue().* = if (thread.io_uring.fd == awaiting_io_uring_fd) .{ + .opcode = .ASYNC_CANCEL, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = @intFromPtr(fiber), + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.Userdata.wakeup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + } else .{ + .opcode = .MSG_RING, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = awaiting_io_uring_fd, + .off = @intFromPtr(fiber) | 0b01, + .addr = @intFromEnum(linux.IORING_MSG_RING_COMMAND.DATA), + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.Userdata.cleanup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + }, + } + } +}; + +const CancelRegion = struct { + fiber: *Fiber, + status: Fiber.CancelStatus, + fn init() CancelRegion { + const fiber = Thread.current().currentFiber(); + return .{ + .fiber = fiber, + .status = .{ + .requested = fiber.cancel_protection.check() == .unblocked, + .awaiting = .nothing, + }, + }; + } + fn initBlocked() CancelRegion { + return .{ + .fiber = Thread.current().currentFiber(), + .status = .{ .requested = false, .awaiting = .nothing }, + }; + } + fn deinit(cancel_region: *CancelRegion) void { + if (cancel_region.status.requested) { + @branchHint(.likely); + _ = cancel_region.fiber.cancel_status.changeAwaiting( + cancel_region.status.awaiting, + .nothing, + ); + } + cancel_region.* = undefined; + } + fn await(cancel_region: *CancelRegion, awaiting: Fiber.CancelStatus.Awaiting) Io.Cancelable!void { + if (!cancel_region.status.requested) { + @branchHint(.unlikely); + return; + } + const status: Fiber.CancelStatus = .{ .requested = true, .awaiting = awaiting }; + if (cancel_region.fiber.cancel_status.changeAwaiting( + cancel_region.status.awaiting, + status.awaiting, + )) { + @branchHint(.unlikely); + cancel_region.fiber.cancel_protection.acknowledge(); + cancel_region.status = .unrequested; + return error.Canceled; + } + cancel_region.status = status; + } + fn awaitIoUring(cancel_region: *CancelRegion) Io.Cancelable!*Thread { + const thread: *Thread = .current(); + try cancel_region.await(.fromIoUringFd(thread.io_uring.fd)); + return thread; + } + fn completion(cancel_region: *const CancelRegion) Completion { + return cancel_region.fiber.resultPointer(Completion).*; + } + fn errno(cancel_region: *const CancelRegion) linux.E { + return cancel_region.completion().errno(); + } + + const Sync = struct { + cancel_region: CancelRegion, + fn init(ev: *Evented) Io.Cancelable!Sync { + if (ev.sync_limit) |*sync_limit| try sync_limit.wait(ev.io()); + return .{ .cancel_region = .init() }; + } + fn initBlocked(ev: *Evented) Sync { + if (ev.sync_limit) |*sync_limit| sync_limit.waitUncancelable(ev.io()); + return .{ .cancel_region = .initBlocked() }; + } + fn deinit(sync: *Sync, ev: *Evented) void { + sync.cancel_region.deinit(); + if (ev.sync_limit) |*sync_limit| sync_limit.post(ev.io()); + } + + const Maybe = union(enum) { + cancel_region: CancelRegion, + sync: Sync, + + fn deinit(maybe: *Maybe, ev: *Evented) void { + switch (maybe.*) { + .cancel_region => |*cancel_region| cancel_region.deinit(), + .sync => |*sync| sync.deinit(ev), + } + } + + fn enterSync(maybe: *Maybe, ev: *Evented) Io.Cancelable!*Sync { + switch (maybe.*) { + .cancel_region => |cancel_region| { + if (ev.sync_limit) |*sync_limit| try sync_limit.wait(ev.io()); + maybe.* = .{ .sync = .{ .cancel_region = cancel_region } }; + }, + .sync => {}, + } + return &maybe.sync; + } + + fn leaveSync(maybe: *Maybe, ev: *Evented) void { + switch (maybe.*) { + .cancel_region => {}, + .sync => |sync| { + if (ev.sync_limit) |*sync_limit| sync_limit.post(ev.io()); + maybe.* = .{ .cancel_region = sync.cancel_region }; + }, + } + } + + fn cancelRegion(maybe: *Maybe) *CancelRegion { + return switch (maybe.*) { + .cancel_region => |*cancel_region| cancel_region, + .sync => |*sync| &sync.cancel_region, + }; + } + }; + }; +}; + +const CachedFd = struct { + once: Once, + + const Once = enum(fd_t) { + uninitialized = -1, + initializing = -2, + /// fd + _, + + fn fromFd(fd: fd_t) Once { + return @enumFromInt(@as(u31, @intCast(fd))); + } + + fn toFd(once: Once) fd_t { + return @as(u31, @intCast(@intFromEnum(once))); + } + }; + + const init: CachedFd = .{ .once = .uninitialized }; + + fn close(cached_fd: *CachedFd) void { + switch (cached_fd.once) { + .uninitialized => {}, + .initializing => unreachable, + _ => |fd| { + assert(@intFromEnum(fd) >= 0); + _ = linux.close(@intFromEnum(fd)); + cached_fd.* = .init; + }, + } + } + + fn open( + cached_fd: *CachedFd, + ev: *Evented, + cancel_region: *CancelRegion, + path: [*:0]const u8, + flags: linux.O, + ) File.OpenError!fd_t { + var once = @atomicLoad(Once, &cached_fd.once, .monotonic); + while (true) { + switch (once) { + .uninitialized => {}, + .initializing => try futexWait( + ev, + @ptrCast(&cached_fd.once), + @bitCast(@intFromEnum(once)), + .none, + ), + _ => |fd| { + @branchHint(.likely); + return fd.toFd(); + }, + } + once = @cmpxchgWeak( + Once, + &cached_fd.once, + .uninitialized, + .initializing, + .monotonic, + .monotonic, + ) orelse { + errdefer { + @atomicStore(Once, &cached_fd.once, .uninitialized, .monotonic); + futexWake(ev, @ptrCast(&cached_fd.once), 1); + } + const fd = try ev.openat(cancel_region, linux.AT.FDCWD, path, flags, 0); + @atomicStore(Once, &cached_fd.once, .fromFd(fd), .monotonic); + futexWake(ev, @ptrCast(&cached_fd.once), std.math.maxInt(u32)); + return fd; + }; + } + } +}; + +pub fn allocator(ev: *Evented) std.mem.Allocator { + return if (ev.backing_allocator_needs_mutex) .{ + .ptr = ev, + .vtable = &.{ + .alloc = alloc, + .resize = resize, + .remap = remap, + .free = free, + }, + } else ev.backing_allocator; +} + +fn alloc(userdata: *anyopaque, len: usize, alignment: std.mem.Alignment, ret_addr: usize) ?[*]u8 { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + ev.backing_allocator_mutex.lockUncancelable(ev_io); + defer ev.backing_allocator_mutex.unlock(ev_io); + return ev.backing_allocator.rawAlloc(len, alignment, ret_addr); +} + +fn resize( + userdata: *anyopaque, + memory: []u8, + alignment: std.mem.Alignment, + new_len: usize, + ret_addr: usize, +) bool { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + ev.backing_allocator_mutex.lockUncancelable(ev_io); + defer ev.backing_allocator_mutex.unlock(ev_io); + return ev.backing_allocator.rawResize(memory, alignment, new_len, ret_addr); +} + +fn remap( + userdata: *anyopaque, + memory: []u8, + alignment: Alignment, + new_len: usize, + ret_addr: usize, +) ?[*]u8 { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + ev.backing_allocator_mutex.lockUncancelable(ev_io); + defer ev.backing_allocator_mutex.unlock(ev_io); + return ev.backing_allocator.rawRemap(memory, alignment, new_len, ret_addr); +} + +fn free(userdata: *anyopaque, memory: []u8, alignment: std.mem.Alignment, ret_addr: usize) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + ev.backing_allocator_mutex.lockUncancelable(ev_io); + defer ev.backing_allocator_mutex.unlock(ev_io); + return ev.backing_allocator.rawFree(memory, alignment, ret_addr); +} + +pub fn io(ev: *Evented) Io { + return .{ + .userdata = ev, + .vtable = &.{ + .crashHandler = crashHandler, + + .async = async, + .concurrent = concurrent, + .await = await, + .cancel = cancel, + + .groupAsync = groupAsync, + .groupConcurrent = groupConcurrent, + .groupAwait = groupAwait, + .groupCancel = groupCancel, + + .recancel = recancel, + .swapCancelProtection = swapCancelProtection, + .checkCancel = checkCancel, + + .select = select, + + .futexWait = futexWait, + .futexWaitUncancelable = futexWaitUncancelable, + .futexWake = futexWake, + + .operate = operate, + .batchAwaitAsync = batchAwaitAsync, + .batchAwaitConcurrent = batchAwaitConcurrent, + .batchCancel = batchCancel, + + .dirCreateDir = dirCreateDir, + .dirCreateDirPath = dirCreateDirPath, + .dirCreateDirPathOpen = dirCreateDirPathOpen, + .dirOpenDir = dirOpenDir, + .dirStat = dirStat, + .dirStatFile = dirStatFile, + .dirAccess = dirAccess, + .dirCreateFile = dirCreateFile, + .dirCreateFileAtomic = dirCreateFileAtomic, + .dirOpenFile = dirOpenFile, + .dirClose = dirClose, + .dirRead = dirRead, + .dirRealPath = dirRealPath, + .dirRealPathFile = dirRealPathFile, + .dirDeleteFile = dirDeleteFile, + .dirDeleteDir = dirDeleteDir, + .dirRename = dirRename, + .dirRenamePreserve = dirRenamePreserve, + .dirSymLink = dirSymLink, + .dirReadLink = dirReadLink, + .dirSetOwner = dirSetOwner, + .dirSetFileOwner = dirSetFileOwner, + .dirSetPermissions = dirSetPermissions, + .dirSetFilePermissions = dirSetFilePermissions, + .dirSetTimestamps = dirSetTimestamps, + .dirHardLink = dirHardLink, + + .fileStat = fileStat, + .fileLength = fileLength, + .fileClose = fileClose, + .fileWritePositional = fileWritePositional, + .fileWriteFileStreaming = fileWriteFileStreaming, + .fileWriteFilePositional = fileWriteFilePositional, + .fileReadPositional = fileReadPositional, + .fileSeekBy = fileSeekBy, + .fileSeekTo = fileSeekTo, + .fileSync = fileSync, + .fileIsTty = fileIsTty, + .fileEnableAnsiEscapeCodes = fileEnableAnsiEscapeCodes, + .fileSupportsAnsiEscapeCodes = fileIsTty, + .fileSetLength = fileSetLength, + .fileSetOwner = fileSetOwner, + .fileSetPermissions = fileSetPermissions, + .fileSetTimestamps = fileSetTimestamps, + .fileLock = fileLock, + .fileTryLock = fileTryLock, + .fileUnlock = fileUnlock, + .fileDowngradeLock = fileDowngradeLock, + .fileRealPath = fileRealPath, + .fileHardLink = fileHardLink, + + .fileMemoryMapCreate = fileMemoryMapCreate, + .fileMemoryMapDestroy = fileMemoryMapDestroy, + .fileMemoryMapSetLength = fileMemoryMapSetLength, + .fileMemoryMapRead = fileMemoryMapRead, + .fileMemoryMapWrite = fileMemoryMapWrite, + + .processExecutableOpen = processExecutableOpen, + .processExecutablePath = processExecutablePath, + .lockStderr = lockStderr, + .tryLockStderr = tryLockStderr, + .unlockStderr = unlockStderr, + .processCurrentPath = processCurrentPath, + .processSetCurrentDir = processSetCurrentDir, + .processReplace = processReplace, + .processReplacePath = processReplacePath, + .processSpawn = processSpawn, + .processSpawnPath = processSpawnPath, + .childWait = childWait, + .childKill = childKill, + + .progressParentFile = progressParentFile, + + .now = now, + .clockResolution = clockResolution, + .sleep = sleep, + + .random = random, + .randomSecure = randomSecure, + + .netListenIp = netListenIpUnavailable, + .netAccept = netAcceptUnavailable, + .netBindIp = netBindIp, + .netConnectIp = netConnectIpUnavailable, + .netListenUnix = netListenUnixUnavailable, + .netConnectUnix = netConnectUnixUnavailable, + .netSocketCreatePair = netSocketCreatePairUnavailable, + .netSend = netSendUnavailable, + .netReceive = netReceive, + .netRead = netReadUnavailable, + .netWrite = netWriteUnavailable, + .netWriteFile = netWriteFileUnavailable, + .netClose = netClose, + .netShutdown = netShutdown, + .netInterfaceNameResolve = netInterfaceNameResolveUnavailable, + .netInterfaceName = netInterfaceNameUnavailable, + .netLookup = netLookupUnavailable, + }, + }; +} + +pub const InitOptions = struct { + backing_allocator_needs_mutex: bool = true, + + /// Maximum thread pool size (excluding the main thread). + /// Defaults to one less than the number of logical CPU cores. + thread_limit: ?usize = null, + /// Maximum number of threads that may perform synchronous syscalls. + sync_limit: Io.Limit = .unlimited, + + log2_ring_entries: u4 = 3, + + /// Affects the following operations: + /// * `processExecutablePath` on OpenBSD and Haiku. + argv0: Argv0 = .empty, + /// Affects the following operations: + /// * `fileIsTty` + /// * `processSpawn`, `processSpawnPath`, `processReplace`, `processReplacePath` + environ: process.Environ = .empty, +}; + +pub fn init(ev: *Evented, backing_allocator: Allocator, options: InitOptions) !void { + const threads_size = @sizeOf(Thread) * if (options.thread_limit) |thread_limit| + 1 + thread_limit + else + @max(std.Thread.getCpuCount() catch 1, 1); + const idle_stack_end_offset = + std.mem.alignForward(usize, threads_size + idle_stack_size, std.heap.pageSize()); + const allocated_slice = try backing_allocator.alignedAlloc(u8, .of(Thread), idle_stack_end_offset); + errdefer backing_allocator.free(allocated_slice); + ev.* = .{ + .backing_allocator_needs_mutex = options.backing_allocator_needs_mutex, + .backing_allocator_mutex = .init, + .backing_allocator = backing_allocator, + .main_fiber_buffer = undefined, + .log2_ring_entries = options.log2_ring_entries, + .threads = .{ + .allocated = @ptrCast(allocated_slice[0..threads_size]), + .reserved = 1, + .active = 1, + }, + .sync_limit = if (options.sync_limit.toInt()) |sync_limit| .{ .permits = sync_limit } else null, + + .stderr_writer_initialized = false, + .stderr_mutex = .init, + .stderr_writer = .{ + .io = ev.io(), + .interface = Io.File.Writer.initInterface(&.{}), + .file = .stderr(), + .mode = .streaming, + }, + .stderr_mode = .no_color, + + .environ_mutex = .init, + .environ_initialized = options.environ.block.isEmpty(), + .environ = .{ .process_environ = options.environ }, + + .null_fd = .init, + .random_fd = .init, + + .csprng_mutex = .init, + .csprng = .uninitialized, + }; + const main_fiber: *Fiber = @ptrCast(&ev.main_fiber_buffer); + main_fiber.* = .{ + .required_align = {}, + .context = undefined, + .await_count = 0, + .link = .{ .awaiter = null }, + .status = .{ .queue_next = null }, + .cancel_status = .unrequested, + .cancel_protection = .unblocked, + .name = if (tracy.enable) "main task", + }; + const main_thread = &ev.threads.allocated[0]; + Thread.self = main_thread; + main_thread.* = .{ + .required_align = {}, + .thread = undefined, + .idle_context = switch (builtin.cpu.arch) { + .aarch64 => .{ + .sp = @intFromPtr(allocated_slice[idle_stack_end_offset..].ptr), + .fp = @intFromPtr(ev), + .pc = @intFromPtr(&mainIdleEntry), + }, + .x86_64 => .{ + .rsp = @intFromPtr(allocated_slice[idle_stack_end_offset..].ptr), + .rbp = @intFromPtr(ev), + .rip = @intFromPtr(&mainIdleEntry), + }, + else => @compileError("unimplemented architecture"), + }, + .current_context = &main_fiber.context, + .ready_queue = null, + .free_queue = null, + .io_uring = try .init( + @as(u16, 1) << ev.log2_ring_entries, + linux.IORING_SETUP_COOP_TASKRUN | linux.IORING_SETUP_SINGLE_ISSUER, + ), + .idle_search_index = 1, + .steal_ready_search_index = 1, + .steal_free_search_index = 1, + .name_arena = .{}, + .csprng = .uninitialized, + }; + errdefer main_thread.io_uring.deinit(); + if (tracy.enable) tracy.fiberEnter(main_fiber.name); +} + +pub fn deinit(ev: *Evented) void { + const main_fiber: *Fiber = @ptrCast(&ev.main_fiber_buffer); + assert(Thread.current().currentFiber() == main_fiber); + const active_threads = @atomicLoad(u32, &ev.threads.active, .acquire); + for (ev.threads.allocated[0..active_threads]) |*thread| { + const ready_fiber = @atomicLoad(?*Fiber, &thread.ready_queue, .monotonic); + assert(ready_fiber == null or ready_fiber == Fiber.finished); // pending async + } + ev.yield(null, .exit); + ev.null_fd.close(); + ev.random_fd.close(); + const allocated_ptr: [*]align(@alignOf(Thread)) u8 = @ptrCast(@alignCast(ev.threads.allocated.ptr)); + const idle_stack_end_offset = std.mem.alignForward( + usize, + ev.threads.allocated.len * @sizeOf(Thread) + idle_stack_size, + std.heap.page_size_max, + ); + for (ev.threads.allocated[1..active_threads]) |*thread| thread.thread.join(); + for (ev.threads.allocated[0..active_threads]) |*thread| thread.deinit(ev.backing_allocator); + assert(active_threads == ev.threads.active); // spawned threads while there was no pending async? + ev.backing_allocator.free(allocated_ptr[0..idle_stack_end_offset]); + ev.* = undefined; +} + +fn findReadyFiber(ev: *Evented, thread: *Thread) ?*Fiber { + if (@atomicRmw(?*Fiber, &thread.ready_queue, .Xchg, Fiber.finished, .acquire)) |ready_fiber| { + assert(ready_fiber != Fiber.finished); + @atomicStore(?*Fiber, &thread.ready_queue, ready_fiber.status.queue_next, .release); + ready_fiber.status.queue_next = null; + return ready_fiber; + } + const active_threads = @atomicLoad(u32, &ev.threads.active, .acquire); + for (0..@min(max_steal_ready_search, active_threads)) |_| { + defer thread.steal_ready_search_index += 1; + if (thread.steal_ready_search_index == active_threads) thread.steal_ready_search_index = 0; + const steal_ready_search_thread = + &ev.threads.allocated[0..active_threads][thread.steal_ready_search_index]; + if (steal_ready_search_thread == thread) continue; + const ready_fiber = + @atomicLoad(?*Fiber, &steal_ready_search_thread.ready_queue, .monotonic) orelse continue; + if (ready_fiber == Fiber.finished) continue; + if (@cmpxchgWeak( + ?*Fiber, + &steal_ready_search_thread.ready_queue, + ready_fiber, + null, + .acquire, + .monotonic, + )) |_| continue; + @atomicStore(?*Fiber, &thread.ready_queue, ready_fiber.status.queue_next, .release); + ready_fiber.status.queue_next = null; + return ready_fiber; + } + // couldn't find anything to do, so we are now open for business + @atomicStore(?*Fiber, &thread.ready_queue, null, .monotonic); + return null; +} + +fn yield(ev: *Evented, maybe_ready_fiber: ?*Fiber, pending_task: SwitchMessage.PendingTask) void { + const thread: *Thread = .current(); + const ready_context = if (maybe_ready_fiber orelse ev.findReadyFiber(thread)) |ready_fiber| + &ready_fiber.context + else + &thread.idle_context; + const message: SwitchMessage = .{ + .contexts = .{ + .old = thread.current_context, + .new = ready_context, + }, + .pending_task = pending_task, + }; + contextSwitch(&message).handle(ev); +} + +fn schedule(ev: *Evented, thread: *Thread, ready_queue: Fiber.Queue) bool { + // shared fields of previous `Thread` must be initialized before later ones are marked as active + const new_thread_index = @atomicLoad(u32, &ev.threads.active, .acquire); + for (0..@min(max_idle_search, new_thread_index)) |_| { + defer thread.idle_search_index += 1; + if (thread.idle_search_index == new_thread_index) thread.idle_search_index = 0; + const idle_search_thread = &ev.threads.allocated[0..new_thread_index][thread.idle_search_index]; + if (idle_search_thread == thread) continue; + if (@cmpxchgWeak( + ?*Fiber, + &idle_search_thread.ready_queue, + null, + ready_queue.head, + .release, + .monotonic, + )) |_| continue; + thread.enqueue().* = .{ + .opcode = .MSG_RING, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = idle_search_thread.io_uring.fd, + .off = @intFromEnum(Completion.Userdata.wakeup), + .addr = @intFromEnum(linux.IORING_MSG_RING_COMMAND.DATA), + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.Userdata.wakeup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + return true; + } + spawn_thread: { + // previous failed reservations must have completed before retrying + if (new_thread_index == ev.threads.allocated.len or @cmpxchgWeak( + u32, + &ev.threads.reserved, + new_thread_index, + new_thread_index + 1, + .acquire, + .monotonic, + ) != null) break :spawn_thread; + const new_thread = &ev.threads.allocated[new_thread_index]; + const next_thread_index = new_thread_index + 1; + var params = std.mem.zeroInit(linux.io_uring_params, .{ + .flags = linux.IORING_SETUP_ATTACH_WQ | + linux.IORING_SETUP_R_DISABLED | + linux.IORING_SETUP_COOP_TASKRUN | + linux.IORING_SETUP_SINGLE_ISSUER, + .wq_fd = @as(u32, @intCast(ev.threads.allocated[0].io_uring.fd)), + }); + new_thread.* = .{ + .required_align = {}, + .thread = undefined, + .idle_context = undefined, + .current_context = &new_thread.idle_context, + .ready_queue = ready_queue.head, + .free_queue = null, + .io_uring = IoUring.init_params(@as(u16, 1) << ev.log2_ring_entries, &params) catch |err| { + @atomicStore(u32, &ev.threads.reserved, new_thread_index, .release); + // no more access to `thread` after giving up reservation + log.warn("unable to create worker thread due to io_uring init failure: {s}", .{ + @errorName(err), + }); + break :spawn_thread; + }, + .idle_search_index = 0, + .steal_ready_search_index = 0, + .steal_free_search_index = 0, + .name_arena = .{}, + .csprng = .uninitialized, + }; + new_thread.thread = std.Thread.spawn(.{ + .stack_size = idle_stack_size, + .allocator = ev.allocator(), + }, threadEntry, .{ ev, new_thread_index }) catch |err| { + new_thread.io_uring.deinit(); + @atomicStore(u32, &ev.threads.reserved, new_thread_index, .release); + // no more access to `thread` after giving up reservation + log.warn("unable to create worker thread due spawn failure: {s}", .{@errorName(err)}); + break :spawn_thread; + }; + // shared fields of `Thread` must be initialized before being marked active + @atomicStore(u32, &ev.threads.active, next_thread_index, .release); + return false; + } + // nobody wanted it, so just queue it on ourselves + while (true) ready_queue.tail.status.queue_next = @cmpxchgWeak( + ?*Fiber, + &thread.ready_queue, + ready_queue.tail.status.queue_next, + ready_queue.head, + .acq_rel, + .acquire, + ) orelse break; + return false; +} + +fn threadEntry(ev: *Evented, index: u32) void { + const thread: *Thread = &ev.threads.allocated[index]; + Thread.self = thread; + switch (linux.errno(linux.io_uring_register(thread.io_uring.fd, .REGISTER_ENABLE_RINGS, null, 0))) { + .SUCCESS => ev.idle(thread), + else => |err| @panic(@tagName(err)), + } +} + +const Completion = struct { + result: i32, + flags: u32, + + const Userdata = enum(usize) { + unused, + wakeup, + futex_wake, + close, + cleanup, + exit, + /// If bit 0 is 1, a pointer to the `context` field of `Io.Batch.Storage.Pending`. + /// If bits 0 and 1 are 0, a `*Fiber`. + _, + }; + + fn errno(completion: Completion) linux.E { + return linux.errno(@bitCast(@as(isize, completion.result))); + } +}; + +fn mainIdleEntry() callconv(.naked) void { + switch (builtin.cpu.arch) { + .aarch64 => asm volatile ( + \\ mov x0, fp + \\ mov fp, #0 + \\ b %[mainIdle] + : + : [mainIdle] "X" (&mainIdle), + ), + .x86_64 => asm volatile ( + \\ movq %%rbp, %%rdi + \\ xor %%ebp, %%ebp + \\ jmp %[mainIdle:P] + : + : [mainIdle] "X" (&mainIdle), + ), + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + } +} + +fn mainIdle( + ev: *Evented, + message: *const SwitchMessage, +) callconv(.withStackAlign(.c, @max(@alignOf(Thread), @alignOf(Io.fiber.Context)))) noreturn { + message.handle(ev); + ev.idle(&ev.threads.allocated[0]); + ev.yield(@ptrCast(&ev.main_fiber_buffer), .nothing); + unreachable; // switched to dead fiber +} + +fn idle(ev: *Evented, thread: *Thread) void { + var maybe_ready_fiber: ?*Fiber = null; + while (true) { + while (maybe_ready_fiber orelse ev.findReadyFiber(thread)) |ready_fiber| { + ev.yield(ready_fiber, .nothing); + maybe_ready_fiber = null; + } + _ = thread.io_uring.submit_and_wait(1) catch |err| switch (err) { + error.SignalInterrupt => {}, + else => |e| @panic(@errorName(e)), + }; + var maybe_ready_queue: ?Fiber.Queue = null; + while (true) { + var cqes_buffer: [1 << 8]linux.io_uring_cqe = undefined; + const cqes = cqes_buffer[0 .. thread.io_uring.copy_cqes(&cqes_buffer, 0) catch |err| switch (err) { + error.SignalInterrupt => 0, + else => |e| @panic(@errorName(e)), + }]; + if (cqes.len == 0) break; + for (cqes) |cqe| if (cqe.flags & linux.IORING_CQE_F_SKIP == 0) switch (@as( + Completion.Userdata, + @enumFromInt(cqe.user_data), + )) { + .unused => unreachable, // bad submission queued? + .wakeup => {}, + .futex_wake => switch (Completion.errno(.{ .result = cqe.res, .flags = cqe.flags })) { + .SUCCESS => recoverableOsBugDetected(), // success is skipped + .INVAL => {}, // invalid futex_wait() on ptr done elsewhere + .INTR, .CANCELED => recoverableOsBugDetected(), // `Completion.Userdata.futex_wake` is not cancelable + .FAULT => {}, // pointer became invalid while doing the wake + else => recoverableOsBugDetected(), // deadlock due to operating system bug + }, + .close => switch (Completion.errno(.{ .result = cqe.res, .flags = cqe.flags })) { + .BADF => recoverableOsBugDetected(), // Always a race condition. + .INTR => {}, // This is still a success. See https://github.com/ziglang/zig/issues/2425 + else => {}, + }, + .cleanup => @panic("failed to notify other threads that we are exiting"), + .exit => { + assert(maybe_ready_fiber == null and maybe_ready_queue == null); // pending async + return; + }, + _ => if (@as(?*Fiber, ready_fiber: switch (@as(u2, @truncate(cqe.user_data))) { + 0b00 => { + const ready_fiber: *Fiber = @ptrFromInt(cqe.user_data & ~@as(usize, 0b11)); + ready_fiber.resultPointer(Completion).* = .{ + .result = cqe.res, + .flags = cqe.flags, + }; + break :ready_fiber ready_fiber; + }, + 0b01 => { + thread.enqueue().* = .{ + .opcode = .ASYNC_CANCEL, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = cqe.user_data & ~@as(usize, 0b11), + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.Userdata.wakeup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + break :ready_fiber null; + }, + 0b10 => { + const batch_userdata: *Io.Operation.Storage.Pending.Userdata = + @ptrFromInt(cqe.user_data & ~@as(usize, 0b11)); + const batch: *Io.Batch = @ptrFromInt(batch_userdata[0]); + var next: usize = 0b00; + batch_userdata[0..3].* = .{ next, @as(u32, @bitCast(cqe.res)), cqe.flags }; + while (true) { + next = @cmpxchgWeak( + usize, + @as(*usize, @ptrCast(&batch.userdata)), + next, + cqe.user_data, + .release, + .acquire, + ) orelse break; + batch_userdata[0] = next; + } + break :ready_fiber switch (@as(u2, @truncate(next))) { + 0b00, 0b01 => @ptrFromInt(next & ~@as(usize, 0b11)), + 0b10, 0b11 => null, + }; + }, + 0b11 => switch (Completion.errno(.{ .result = cqe.res, .flags = cqe.flags })) { + .SUCCESS => unreachable, // no event count specified + .TIME => { + const context: *usize = @ptrFromInt(cqe.user_data & ~@as(usize, 0b11)); + const fiber = @atomicRmw(usize, context, .Add, 0b01, .acquire); + break :ready_fiber switch (@as(u2, @truncate(fiber))) { + else => unreachable, // timeout completed multiple times + 0b00 => @ptrFromInt(fiber & ~@as(usize, 0b11)), + 0b10 => null, + }; + }, + .CANCELED => null, // user data may have been invalidated + else => |err| unexpectedErrno(err) catch null, + }, + })) |ready_fiber| { + assert(ready_fiber.status.queue_next == null); + if (maybe_ready_fiber == null) { + maybe_ready_fiber = ready_fiber; + } else if (maybe_ready_queue) |*ready_queue| { + ready_queue.tail.status.queue_next = ready_fiber; + ready_queue.tail = ready_fiber; + } else maybe_ready_queue = .{ .head = ready_fiber, .tail = ready_fiber }; + }, + }; + } + if (maybe_ready_queue) |ready_queue| _ = ev.schedule(thread, ready_queue); + } +} + +const SwitchMessage = struct { + contexts: Io.fiber.Switch, + pending_task: PendingTask, + + const PendingTask = union(enum) { + nothing, + reschedule, + await: u31, + group_await: Group, + group_cancel: Group, + batch_await: *Io.Batch, + destroy, + exit, + }; + + fn handle(message: *const SwitchMessage, ev: *Evented) void { + const thread: *Thread = .current(); + thread.current_context = message.contexts.new; + if (tracy.enable) { + if (message.contexts.new != &thread.idle_context) { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.new)); + tracy.fiberEnter(fiber.name); + } else tracy.fiberLeave(); + } + switch (message.pending_task) { + .nothing => {}, + .reschedule => if (message.contexts.old != &thread.idle_context) { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); + assert(fiber.status.queue_next == null); + _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); + }, + .await => |count| { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); + if (@atomicRmw(i32, &fiber.await_count, .Sub, count, .monotonic) > 0) + _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); + }, + .group_await => |group| { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); + if (group.await(ev, fiber)) + _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); + }, + .group_cancel => |group| { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); + if (group.cancel(ev, fiber)) + _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); + }, + .batch_await => |batch| { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); + if (@cmpxchgStrong( + ?*anyopaque, + &batch.userdata, + null, + fiber, + .release, + .monotonic, + )) |head| { + assert(@as(u2, @truncate(@intFromPtr(head))) != 0b00); + _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); + } + }, + .destroy => { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.old)); + fiber.destroy(); + }, + .exit => for ( + ev.threads.allocated[0..@atomicLoad(u32, &ev.threads.active, .acquire)], + ) |*each_thread| { + thread.enqueue().* = .{ + .opcode = .MSG_RING, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = each_thread.io_uring.fd, + .off = @intFromEnum(Completion.Userdata.exit), + .addr = @intFromEnum(linux.IORING_MSG_RING_COMMAND.DATA), + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.Userdata.cleanup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + }, + } + } +}; + +inline fn contextSwitch(message: *const SwitchMessage) *const SwitchMessage { + return @fieldParentPtr("contexts", Io.fiber.contextSwitch(&message.contexts)); +} + +fn crashHandler(userdata: ?*anyopaque) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const thread = Thread.self orelse std.process.abort(); + if (thread.current_context == &thread.idle_context) std.process.abort(); + const fiber = thread.currentFiber(); + @atomicStore( + Fiber.CancelStatus, + &fiber.cancel_status, + .{ .requested = true, .awaiting = .nothing }, + .monotonic, + ); + fiber.cancel_protection = .{ .user = .blocked, .acknowledged = true }; +} + +const AsyncClosure = struct { + ev: *Evented, + fiber: *Fiber, + start: *const fn (context: *const anyopaque, result: *anyopaque) void, + result_align: Alignment, + + fn fromFiber(fiber: *Fiber) *AsyncClosure { + return @ptrFromInt(Fiber.max_context_align.max(.of(AsyncClosure)).backward( + @intFromPtr(fiber.allocatedEnd()) - Fiber.max_context_size, + ) - @sizeOf(AsyncClosure)); + } + + fn contextPointer(closure: *AsyncClosure) [*]align(Fiber.max_context_align.toByteUnits()) u8 { + return @alignCast(@as([*]u8, @ptrCast(closure)) + @sizeOf(AsyncClosure)); + } + + fn entry() callconv(.naked) void { + switch (builtin.cpu.arch) { + .aarch64 => asm volatile ( + \\ mov x0, sp + \\ b %[call] + : + : [call] "X" (&call), + ), + .x86_64 => asm volatile ( + \\ leaq 8(%%rsp), %%rdi + \\ jmp %[call:P] + : + : [call] "X" (&call), + ), + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + } + } + + fn call( + closure: *AsyncClosure, + message: *const SwitchMessage, + ) callconv(.withStackAlign(.c, @alignOf(AsyncClosure))) noreturn { + message.handle(closure.ev); + const fiber = closure.fiber; + closure.start(closure.contextPointer(), fiber.resultBytes(closure.result_align)); + closure.ev.yield( + if (@atomicRmw(?*Fiber, &fiber.link.awaiter, .Xchg, Fiber.finished, .acq_rel)) |awaiter| + if (@atomicRmw(i32, &awaiter.await_count, .Add, 1, .monotonic) == -1) awaiter else null + else + null, + .nothing, + ); + unreachable; // switched to dead fiber + } +}; + +fn async( + userdata: ?*anyopaque, + result: []u8, + result_alignment: Alignment, + context: []const u8, + context_alignment: Alignment, + start: *const fn (context: *const anyopaque, result: *anyopaque) void, +) ?*std.Io.AnyFuture { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return concurrent(ev, result.len, result_alignment, context, context_alignment, start) catch { + start(context.ptr, result.ptr); + return null; + }; +} + +fn concurrent( + userdata: ?*anyopaque, + result_len: usize, + result_alignment: Alignment, + context: []const u8, + context_alignment: Alignment, + start: *const fn (context: *const anyopaque, result: *anyopaque) void, +) Io.ConcurrentError!*std.Io.AnyFuture { + assert(result_alignment.compare(.lte, Fiber.max_result_align)); // TODO + assert(context_alignment.compare(.lte, Fiber.max_context_align)); // TODO + assert(result_len <= Fiber.max_result_size); // TODO + assert(context.len <= Fiber.max_context_size); // TODO + + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const fiber = Fiber.create(ev) catch |err| switch (err) { + error.OutOfMemory => return error.ConcurrencyUnavailable, + }; + + const closure: *AsyncClosure = .fromFiber(fiber); + fiber.* = .{ + .required_align = {}, + .context = switch (builtin.cpu.arch) { + .aarch64 => .{ + .sp = @intFromPtr(closure), + .fp = 0, + .pc = @intFromPtr(&AsyncClosure.entry), + }, + .x86_64 => .{ + .rsp = @intFromPtr(closure) - 8, + .rbp = 0, + .rip = @intFromPtr(&AsyncClosure.entry), + }, + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + }, + .await_count = 0, + .link = .{ .awaiter = null }, + .status = .{ .queue_next = null }, + .cancel_status = .unrequested, + .cancel_protection = .unblocked, + .name = if (tracy.enable) name: { + const thread: *Thread = .current(); + var name_arena = thread.name_arena.promote(std.heap.page_allocator); + defer thread.name_arena = name_arena.state; + break :name std.fmt.allocPrintSentinel( + name_arena.allocator(), + "task {d}", + .{@atomicRmw(u64, &Fiber.next_name, .Add, 1, .monotonic)}, + 0, + ) catch return error.ConcurrencyUnavailable; + }, + }; + closure.* = .{ + .ev = ev, + .fiber = fiber, + .start = start, + .result_align = result_alignment, + }; + @memcpy(closure.contextPointer(), context); + + const thread: *Thread = .current(); + if (ev.schedule(thread, .{ .head = fiber, .tail = fiber })) thread.submit(); + return @ptrCast(fiber); +} + +fn await( + userdata: ?*anyopaque, + future: *std.Io.AnyFuture, + result: []u8, + result_alignment: Alignment, +) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const fiber = Thread.current().currentFiber(); + const future_fiber: *Fiber = @ptrCast(@alignCast(future)); + if (@atomicRmw(?*Fiber, &future_fiber.link.awaiter, .Xchg, fiber, .acq_rel)) |awaiter| { + assert(awaiter == Fiber.finished); + } else while (true) { + ev.yield(null, .{ .await = 1 }); + const awaiter = @atomicLoad(?*Fiber, &future_fiber.link.awaiter, .acquire); + if (awaiter == Fiber.finished) break; + assert(awaiter == fiber); // spurious wakeup + } + @memcpy(result, future_fiber.resultBytes(result_alignment)); + future_fiber.destroy(); +} + +fn cancel( + userdata: ?*anyopaque, + future: *std.Io.AnyFuture, + result: []u8, + result_alignment: Alignment, +) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const future_fiber: *Fiber = @ptrCast(@alignCast(future)); + future_fiber.requestCancel(ev); + await(ev, future, result, result_alignment); +} + +const Group = struct { + ptr: *Io.Group, + + const List = packed struct(usize) { + cancel_requested: bool, + awaiter_delayed: bool, + fibers: Fiber.PackedPtr, + }; + fn listPtr(group: Group) *List { + return @ptrCast(&group.ptr.token); + } + + const Mutex = packed struct(u32) { + locked: bool, + contended: bool, + shared2: u30, + }; + fn mutexPtr(group: Group) *Mutex { + return switch (comptime builtin.cpu.arch.endian()) { + .little => @ptrCast(&group.ptr.state), + .big => @ptrCast(@alignCast( + @as([*]u8, @ptrCast(&group.ptr.state)) + @sizeOf(usize) - @sizeOf(u32), + )), + }; + } + + const Awaiter = packed struct(usize) { + locked: bool, + contended: bool, + awaiter: Fiber.PackedPtr, + }; + fn awaiterPtr(group: Group) *Awaiter { + return @ptrCast(&group.ptr.state); + } + + fn lock(group: Group, ev: *Evented) void { + const mutex = group.mutexPtr(); + { + const old_state = @atomicRmw( + Mutex, + mutex, + .Or, + .{ .locked = true, .contended = false, .shared2 = 0 }, + .acquire, + ); + if (!old_state.locked) { + @branchHint(.likely); + return; + } + if (old_state.contended) { + futexWaitUncancelable(ev, @ptrCast(mutex), @bitCast(old_state)); + } + } + while (true) { + var old_state = @atomicRmw( + Mutex, + mutex, + .Or, + .{ .locked = true, .contended = true, .shared2 = 0 }, + .acquire, + ); + if (!old_state.locked) { + @branchHint(.likely); + return; + } + old_state.contended = true; + futexWaitUncancelable(ev, @ptrCast(mutex), @bitCast(old_state)); + } + } + + fn unlock(group: Group, ev: *Evented) void { + const mutex = group.mutexPtr(); + const old_state = @atomicRmw( + Mutex, + mutex, + .And, + .{ .locked = false, .contended = false, .shared2 = std.math.maxInt(u30) }, + .release, + ); + assert(old_state.locked); + if (old_state.contended) futexWake(ev, @ptrCast(mutex), 1); + } + + fn addFiber(group: Group, ev: *Evented, fiber: *Fiber) void { + group.lock(ev); + defer group.unlock(ev); + const list_ptr = group.listPtr(); + const list = @atomicLoad(List, list_ptr, .monotonic); + if (list.cancel_requested) fiber.cancel_status = .{ .requested = true, .awaiting = .nothing }; + const old_head = list.fibers.unpack(); + if (old_head) |head| head.link.group.prev = fiber; + fiber.link.group.next = old_head; + @atomicStore(List, list_ptr, .{ + .cancel_requested = list.cancel_requested, + .awaiter_delayed = list.awaiter_delayed, + .fibers = .pack(fiber), + }, .monotonic); + } + + fn removeFiber(group: Group, ev: *Evented, fiber: *Fiber) ?*Fiber { + group.lock(ev); + defer group.unlock(ev); + const list_ptr = group.listPtr(); + const list = @atomicLoad(List, list_ptr, .monotonic); + if (fiber.link.group.next) |next| next.link.group.prev = fiber.link.group.prev; + if (fiber.link.group.prev) |prev| { + prev.link.group.next = fiber.link.group.next; + } else if (fiber.link.group.next) |new_head| { + @atomicStore(List, list_ptr, .{ + .cancel_requested = list.cancel_requested, + .awaiter_delayed = list.awaiter_delayed, + .fibers = .pack(new_head), + }, .monotonic); + } else if (@atomicLoad(Awaiter, group.awaiterPtr(), .monotonic).awaiter.unpack()) |awaiter| { + if (!awaiter.cancel_status.changeAwaiting(.group, .nothing) or list.cancel_requested) { + @atomicStore(List, list_ptr, .{ + .cancel_requested = false, + .awaiter_delayed = false, + .fibers = .null, + }, .release); + assert(awaiter.status.awaiting_group.ptr == group.ptr); + awaiter.status = .{ .queue_next = null }; + return awaiter; + } + // Race with `Fiber.requestCancel` + @atomicStore(List, list_ptr, .{ + .cancel_requested = false, + .awaiter_delayed = true, + .fibers = .null, + }, .monotonic); + } else @atomicStore(List, list_ptr, .{ + .cancel_requested = false, + .awaiter_delayed = false, + .fibers = .null, + }, .release); + return null; + } + + fn await(group: Group, ev: *Evented, awaiter: *Fiber) bool { + group.lock(ev); + defer group.unlock(ev); + if (@atomicLoad(List, group.listPtr(), .monotonic).fibers.unpack()) |_| { + if (group.registerAwaiter(awaiter) and awaiter.cancel_protection.check() == .unblocked) { + // The awaiter already had an unacknowledged cancelation request before + // attempting to await a group, so propagate the cancelation to the group. + assert(!group.cancelLocked(ev, null)); + } + return false; + } + return true; + } + + fn cancel(group: Group, ev: *Evented, maybe_awaiter: ?*Fiber) bool { + group.lock(ev); + defer group.unlock(ev); + return group.cancelLocked(ev, maybe_awaiter); + } + + /// Assumes the mutex is held. + fn cancelLocked(group: Group, ev: *Evented, maybe_awaiter: ?*Fiber) bool { + const list_ptr = group.listPtr(); + const list = @atomicRmw( + List, + list_ptr, + .Add, + .{ .cancel_requested = true, .awaiter_delayed = false, .fibers = .null }, + .monotonic, + ); + assert(!list.cancel_requested); + if (list.fibers.unpack()) |head| { + var maybe_fiber: ?*Fiber = head; + while (maybe_fiber) |fiber| { + fiber.requestCancel(ev); + maybe_fiber = fiber.link.group.next; + } + if (maybe_awaiter) |awaiter| _ = group.registerAwaiter(awaiter); + return false; + } + @atomicStore( + List, + list_ptr, + .{ .cancel_requested = false, .awaiter_delayed = false, .fibers = .null }, + .release, + ); + return if (maybe_awaiter) |_| true else list.awaiter_delayed; + } + + /// Assumes the mutex is held. + fn registerAwaiter(group: Group, awaiter: *Fiber) bool { + assert(awaiter.status.queue_next == null); + awaiter.status = .{ .awaiting_group = group }; + assert(@atomicRmw( + Awaiter, + group.awaiterPtr(), + .Add, + .{ .locked = false, .contended = false, .awaiter = .pack(awaiter) }, + .monotonic, + ).awaiter == .null); + return awaiter.cancel_status.changeAwaiting(.nothing, .group); + } + + const AsyncClosure = struct { + ev: *Evented, + group: Group, + fiber: *Fiber, + start: *const fn (context: *const anyopaque) Io.Cancelable!void, + + fn fromFiber(fiber: *Fiber) *Group.AsyncClosure { + return @ptrFromInt(Fiber.max_context_align.max(.of(Group.AsyncClosure)).backward( + @intFromPtr(fiber.allocatedEnd()) - Fiber.max_context_size, + ) - @sizeOf(Group.AsyncClosure)); + } + + fn contextPointer( + closure: *Group.AsyncClosure, + ) [*]align(Fiber.max_context_align.toByteUnits()) u8 { + return @alignCast(@as([*]u8, @ptrCast(closure)) + @sizeOf(Group.AsyncClosure)); + } + + fn entry() callconv(.naked) void { + switch (builtin.cpu.arch) { + .aarch64 => asm volatile ( + \\ mov x0, sp + \\ b %[call] + : + : [call] "X" (&call), + ), + .x86_64 => asm volatile ( + \\ leaq 8(%%rsp), %%rdi + \\ jmp %[call:P] + : + : [call] "X" (&call), + ), + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + } + } + + fn call( + closure: *Group.AsyncClosure, + message: *const SwitchMessage, + ) callconv(.withStackAlign(.c, @alignOf(Group.AsyncClosure))) noreturn { + message.handle(closure.ev); + assert(closure.fiber.status.queue_next == null); + const result = closure.start(closure.contextPointer()); + const ev = closure.ev; + const group = closure.group; + const fiber = closure.fiber; + const cancel_acknowledged = fiber.cancel_protection.acknowledged; + if (result) { + assert(!cancel_acknowledged); // group task acknowledged cancelation but did not return `error.Canceled` + } else |err| switch (err) { + error.Canceled => assert(cancel_acknowledged), // group task returned `error.Canceled` but was never canceled + } + ev.yield(group.removeFiber(ev, fiber), .destroy); + unreachable; // switched to dead fiber + } + }; +}; + +fn groupAsync( + userdata: ?*anyopaque, + type_erased: *Io.Group, + context: []const u8, + context_alignment: Alignment, + start: *const fn (context: *const anyopaque) Io.Cancelable!void, +) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return groupConcurrent(ev, type_erased, context, context_alignment, start) catch { + const fiber = Thread.current().currentFiber(); + const pre_acknowledged = fiber.cancel_protection.acknowledged; + const result = start(context.ptr); + const post_acknowledged = fiber.cancel_protection.acknowledged; + if (result) { + if (pre_acknowledged) { + assert(post_acknowledged); // group task called `recancel` but was not canceled + } else { + assert(!post_acknowledged); // group task acknowledged cancelation but did not return `error.Canceled` + } + } else |err| switch (err) { + // Don't swallow the cancelation: make it visible to the `Group.async` caller. + error.Canceled => { + assert(!pre_acknowledged); // group task called `recancel` but was not canceled + assert(post_acknowledged); // group task returned `error.Canceled` but was never canceled + fiber.cancel_protection.recancel(); + }, + } + }; +} + +fn groupConcurrent( + userdata: ?*anyopaque, + type_erased: *Io.Group, + context: []const u8, + context_alignment: Alignment, + start: *const fn (context: *const anyopaque) Io.Cancelable!void, +) Io.ConcurrentError!void { + assert(context_alignment.compare(.lte, Fiber.max_context_align)); // TODO + assert(context.len <= Fiber.max_context_size); // TODO + + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const group: Group = .{ .ptr = type_erased }; + const fiber = Fiber.create(ev) catch |err| switch (err) { + error.OutOfMemory => return error.ConcurrencyUnavailable, + }; + + const closure: *Group.AsyncClosure = .fromFiber(fiber); + fiber.* = .{ + .required_align = {}, + .context = switch (builtin.cpu.arch) { + .aarch64 => .{ + .sp = @intFromPtr(closure), + .fp = 0, + .pc = @intFromPtr(&Group.AsyncClosure.entry), + }, + .x86_64 => .{ + .rsp = @intFromPtr(closure) - 8, + .rbp = 0, + .rip = @intFromPtr(&Group.AsyncClosure.entry), + }, + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + }, + .await_count = 0, + .link = .{ .group = .{ .prev = null, .next = null } }, + .status = .{ .queue_next = null }, + .cancel_status = .unrequested, + .cancel_protection = .unblocked, + .name = if (tracy.enable) name: { + const thread: *Thread = .current(); + var name_arena = thread.name_arena.promote(std.heap.page_allocator); + defer thread.name_arena = name_arena.state; + break :name std.fmt.allocPrintSentinel( + name_arena.allocator(), + "group task {d}", + .{@atomicRmw(u64, &Fiber.next_name, .Add, 1, .monotonic)}, + 0, + ) catch return error.ConcurrencyUnavailable; + }, + }; + closure.* = .{ + .ev = ev, + .group = group, + .fiber = fiber, + .start = start, + }; + @memcpy(closure.contextPointer(), context); + group.addFiber(ev, fiber); + const thread: *Thread = .current(); + if (ev.schedule(thread, .{ .head = fiber, .tail = fiber })) thread.submit(); +} + +fn groupAwait( + userdata: ?*anyopaque, + type_erased: *Io.Group, + initial_token: *anyopaque, +) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = initial_token; + ev.yield(null, .{ .group_await = .{ .ptr = type_erased } }); +} + +fn groupCancel(userdata: ?*anyopaque, type_erased: *Io.Group, initial_token: *anyopaque) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = initial_token; + ev.yield(null, .{ .group_cancel = .{ .ptr = type_erased } }); +} + +fn recancel(userdata: ?*anyopaque) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + Thread.current().currentFiber().cancel_protection.recancel(); +} + +fn swapCancelProtection(userdata: ?*anyopaque, new: Io.CancelProtection) Io.CancelProtection { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const cancel_protection = &Thread.current().currentFiber().cancel_protection; + defer cancel_protection.user = new; + return cancel_protection.user; +} + +fn checkCancel(userdata: ?*anyopaque) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const fiber = Thread.current().currentFiber(); + switch (fiber.cancel_protection.check()) { + .unblocked => { + const cancel_status = @atomicLoad(Fiber.CancelStatus, &fiber.cancel_status, .monotonic); + assert(cancel_status.awaiting == .nothing); + if (cancel_status.requested) { + @branchHint(.unlikely); + fiber.cancel_protection.acknowledge(); + return error.Canceled; + } + }, + .blocked => {}, + } +} + +fn select(userdata: ?*anyopaque, futures: []const *Io.AnyFuture) Io.Cancelable!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + var await_count: u31, var result = for (futures, 0..) |future, future_index| { + const future_fiber: *Fiber = @ptrCast(@alignCast(future)); + if (@atomicRmw( + ?*Fiber, + &future_fiber.link.awaiter, + .Xchg, + cancel_region.fiber, + .acq_rel, + )) |awaiter| { + assert(awaiter == Fiber.finished); + break .{ @intCast(future_index), future_index }; + } + } else result: { + const await_count: u31 = @intCast(futures.len); + cancel_region.await(.select) catch |err| switch (err) { + error.Canceled => |e| break :result .{ await_count + 1, e }, + }; + ev.yield(null, .{ .await = 1 }); + cancel_region.await(.nothing) catch |err| switch (err) { + error.Canceled => |e| break :result .{ await_count, e }, + }; + break :result .{ await_count - 1, futures.len }; + }; + for (futures[0 .. result catch futures.len], 0..) |future, future_index| { + const future_fiber: *Fiber = @ptrCast(@alignCast(future)); + const awaiter = @atomicRmw(?*Fiber, &future_fiber.link.awaiter, .Xchg, null, .monotonic); + if (awaiter == Fiber.finished) { + @atomicStore(?*Fiber, &future_fiber.link.awaiter, Fiber.finished, .monotonic); + result = if (result) |finished_index| @min(future_index, finished_index) else |e| e; + } else { + assert(awaiter == cancel_region.fiber); + await_count -= 1; + } + } + // Equivalent to `ev.yield(null, .{ .await = await_count });`, + // but avoiding a context switch in the common case. + switch (std.math.order( + @atomicRmw(i32, &cancel_region.fiber.await_count, .Sub, await_count, .monotonic), + await_count, + )) { + .lt => ev.yield(null, .{ .await = 0 }), + .eq => {}, + .gt => unreachable, + } + return result; +} + +fn futexWait( + userdata: ?*anyopaque, + ptr: *const u32, + expected: u32, + timeout: Io.Timeout, +) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const timespec: ?linux.kernel_timespec, const clock: Io.Clock, const timeout_flags: u32 = timespec: switch (timeout) { + .none => .{ + null, + .awake, + linux.IORING_TIMEOUT_ABS, + }, + .duration => |duration| { + const ns = duration.raw.toNanoseconds(); + break :timespec .{ + .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + duration.clock, + 0, + }; + }, + .deadline => |deadline| { + const ns = deadline.raw.toNanoseconds(); + break :timespec .{ + .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + deadline.clock, + linux.IORING_TIMEOUT_ABS, + }; + }, + }; + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .FUTEX_WAIT, + .flags = if (timespec) |_| linux.IOSQE_IO_LINK else 0, + .ioprio = 0, + .fd = @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = true }), + .off = expected, + .addr = @intFromPtr(ptr), + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = std.math.maxInt(u32), + .resv = 0, + }; + if (timespec) |*timespec_ptr| thread.enqueue().* = .{ + .opcode = .LINK_TIMEOUT, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = @intFromPtr(timespec_ptr), + .len = 1, + .rw_flags = timeout_flags | @as(u32, switch (clock) { + .real => linux.IORING_TIMEOUT_REALTIME, + else => 0, + .boot => linux.IORING_TIMEOUT_BOOTTIME, + }), + .user_data = @intFromEnum(Completion.Userdata.wakeup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => {}, // notified by `wake()` + .INTR, .CANCELED => {}, // caller's responsibility to retry + .AGAIN => {}, // ptr.* != expect + .INVAL => {}, // possibly timeout overflow + .TIMEDOUT => unreachable, + .FAULT => recoverableOsBugDetected(), // ptr was invalid + else => recoverableOsBugDetected(), + } +} + +fn futexWaitUncancelable(userdata: ?*anyopaque, ptr: *const u32, expected: u32) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .initBlocked(); + defer cancel_region.deinit(); + const thread = cancel_region.awaitIoUring() catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; + thread.enqueue().* = .{ + .opcode = .FUTEX_WAIT, + .flags = 0, + .ioprio = 0, + .fd = @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = true }), + .off = expected, + .addr = @intFromPtr(ptr), + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = std.math.maxInt(u32), + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => {}, // notified by `wake()` + .INTR, .CANCELED => {}, // caller's responsibility to retry + .AGAIN => {}, // ptr.* != expect + .INVAL => {}, // possibly timeout overflow + .FAULT => recoverableOsBugDetected(), // ptr was invalid + else => recoverableOsBugDetected(), + } +} + +fn futexWake(userdata: ?*anyopaque, ptr: *const u32, max_waiters: u32) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const thread: *Thread = .current(); + thread.enqueue().* = .{ + .opcode = .FUTEX_WAKE, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = true }), + .off = max_waiters, + .addr = @intFromPtr(ptr), + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.Userdata.futex_wake), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = std.math.maxInt(u32), + .resv = 0, + }; + thread.submit(); +} + +fn operate(userdata: ?*anyopaque, operation: Io.Operation) Io.Cancelable!Io.Operation.Result { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; + defer maybe_sync.deinit(ev); + return switch (operation) { + .file_read_streaming => |o| .{ + .file_read_streaming = ev.fileReadStreaming( + &maybe_sync.cancel_region, + o.file, + o.data, + ) catch |err| switch (err) { + error.Canceled => |e| return e, + else => |e| e, + }, + }, + .file_write_streaming => |o| .{ + .file_write_streaming = ev.fileWriteStreaming( + &maybe_sync.cancel_region, + o.file, + o.header, + o.data, + o.splat, + ) catch |err| switch (err) { + error.Canceled => |e| return e, + else => |e| e, + }, + }, + .device_io_control => |o| .{ + .device_io_control = try ev.deviceIoControl(try maybe_sync.enterSync(ev), o), + }, + }; +} + +fn fileReadStreaming( + ev: *Evented, + cancel_region: *CancelRegion, + file: File, + data: []const []u8, +) File.ReadStreamingError!usize { + var iovecs_buffer: [max_iovecs_len]iovec = undefined; + var i: usize = 0; + for (data) |buf| { + if (iovecs_buffer.len - i == 0) break; + if (buf.len > 0) { + iovecs_buffer[i] = .{ .base = buf.ptr, .len = buf.len }; + i += 1; + } + } + const dest = iovecs_buffer[0..i]; + assert(dest[0].len > 0); + + const n = try ev.preadv(cancel_region, file.handle, dest, null); + return if (n == 0) error.EndOfStream else n; +} + +fn fileWriteStreaming( + ev: *Evented, + cancel_region: *CancelRegion, + file: File, + header: []const u8, + data: []const []const u8, + splat: usize, +) File.Writer.Error!usize { + var iovecs: [max_iovecs_len]iovec_const = undefined; + var iovlen: iovlen_t = 0; + addBuf(&iovecs, &iovlen, header); + for (data[0 .. data.len - 1]) |bytes| addBuf(&iovecs, &iovlen, bytes); + const pattern = data[data.len - 1]; + var backup_buffer: [splat_buffer_size]u8 = undefined; + if (iovecs.len - iovlen != 0) switch (splat) { + 0 => {}, + 1 => addBuf(&iovecs, &iovlen, pattern), + else => switch (pattern.len) { + 0 => {}, + 1 => { + const splat_buffer = &backup_buffer; + const memset_len = @min(splat_buffer.len, splat); + const buf = splat_buffer[0..memset_len]; + @memset(buf, pattern[0]); + addBuf(&iovecs, &iovlen, buf); + var remaining_splat = splat - buf.len; + while (remaining_splat > splat_buffer.len and iovecs.len - iovlen != 0) { + assert(buf.len == splat_buffer.len); + addBuf(&iovecs, &iovlen, splat_buffer); + remaining_splat -= splat_buffer.len; + } + addBuf(&iovecs, &iovlen, splat_buffer[0..@min(remaining_splat, splat_buffer.len)]); + }, + else => for (0..@min(splat, iovecs.len - iovlen)) |_| { + addBuf(&iovecs, &iovlen, pattern); + }, + }, + }; + return ev.pwritev(cancel_region, file.handle, iovecs[0..iovlen], null); +} + +fn deviceIoControl( + ev: *Evented, + sync: *CancelRegion.Sync, + o: Io.Operation.DeviceIoControl, +) Io.Cancelable!i32 { + _ = ev; + while (true) { + try sync.cancel_region.await(.nothing); + const rc = linux.ioctl(o.file.handle, @bitCast(o.code), @intFromPtr(o.arg)); + switch (linux.errno(rc)) { + .SUCCESS => return @bitCast(@as(u32, @truncate(rc))), + .INTR => continue, + else => |err| return -@as(i32, @intFromEnum(err)), + } + } +} + +fn batchAwaitAsync(userdata: ?*anyopaque, batch: *Io.Batch) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; + defer maybe_sync.deinit(ev); + ev.batchDrainSubmitted(&maybe_sync, batch, false) catch |err| switch (err) { + error.ConcurrencyUnavailable => unreachable, // passed concurrency=false + error.Canceled => |e| return e, + }; + maybe_sync.leaveSync(ev); + while (true) { + batchDrainReady(batch) catch |err| switch (err) { + error.Timeout => unreachable, // no timeout + }; + if (batch.completed.head != .none or batch.pending.head == .none) return; + ev.yield(null, .{ .batch_await = batch }); + } +} + +fn batchAwaitConcurrent( + userdata: ?*anyopaque, + batch: *Io.Batch, + timeout: Io.Timeout, +) Io.Batch.AwaitConcurrentError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; + defer maybe_sync.deinit(ev); + try ev.batchDrainSubmitted(&maybe_sync, batch, true); + maybe_sync.leaveSync(ev); + const timespec: linux.kernel_timespec, const clock: Io.Clock, const timeout_flags: u32 = while (true) { + batchDrainReady(batch) catch |err| switch (err) { + error.Timeout => unreachable, // no timeout + }; + if (batch.completed.head != .none or batch.pending.head == .none) return; + switch (timeout) { + .none => ev.yield(null, .{ .batch_await = batch }), + .duration => |duration| { + const ns = duration.raw.toNanoseconds(); + break .{ + .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + duration.clock, + 0, + }; + }, + .deadline => |deadline| { + const ns = deadline.raw.toNanoseconds(); + break .{ + .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + deadline.clock, + linux.IORING_TIMEOUT_ABS, + }; + }, + } + }; + { + const thread = try maybe_sync.cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .TIMEOUT, + .flags = 0, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = @intFromPtr(&timespec), + .len = 1, + .rw_flags = timeout_flags | @as(u32, switch (clock) { + .real => linux.IORING_TIMEOUT_REALTIME, + else => 0, + .boot => linux.IORING_TIMEOUT_BOOTTIME, + }), + .user_data = @intFromPtr(&batch.userdata) | 0b11, + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + } + while (batch.completed.head == .none and batch.pending.head != .none) { + ev.yield(null, .{ .batch_await = batch }); + batchDrainReady(batch) catch |err| switch (err) { + error.Timeout => |e| return if (batch.completed.head == .none and + batch.pending.head != .none) e, + }; + } + const thread = try maybe_sync.cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .TIMEOUT_REMOVE, + .flags = 0, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = @intFromPtr(&batch.userdata) | 0b11, + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(maybe_sync.cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (maybe_sync.cancel_region.errno()) { + .SUCCESS => return, + .BUSY, .NOENT => {}, + else => |err| unexpectedErrno(err) catch {}, + } + while (true) { + batchDrainReady(batch) catch |err| switch (err) { + error.Timeout => return, + }; + ev.yield(null, .{ .batch_await = batch }); + } +} + +/// If `concurrency` is false, `error.ConcurrencyUnavailable` is unreachable. +fn batchDrainSubmitted( + ev: *Evented, + maybe_sync: *CancelRegion.Sync.Maybe, + batch: *Io.Batch, + concurrency: bool, +) (Io.ConcurrentError || Io.Cancelable)!void { + var index = batch.submitted.head; + if (index == .none) return; + const thread = try maybe_sync.cancelRegion().awaitIoUring(); + errdefer batch.submitted.head = index; + while (index != .none) { + const storage = &batch.storage[index.toIndex()]; + const next_index = storage.submission.node.next; + if (@as(?Io.Operation.Result, result: switch (storage.submission.operation) { + .file_read_streaming => |o| { + const buffer = for (o.data) |buffer| { + if (buffer.len > 0) break buffer; + } else break :result .{ .file_read_streaming = 0 }; + const fd = o.file.handle; + storage.* = .{ .pending = .{ + .node = .{ .prev = batch.pending.tail, .next = .none }, + .tag = .file_read_streaming, + .userdata = undefined, + } }; + thread.enqueue().* = .{ + .opcode = .READ, + .flags = 0, + .ioprio = 0, + .fd = fd, + .off = std.math.maxInt(u64), + .addr = @intFromPtr(buffer.ptr), + .len = @min(buffer.len, 0xfffff000), + .rw_flags = 0, + .user_data = @intFromPtr(&storage.pending.userdata) | 0b10, + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + break :result null; + }, + .file_write_streaming => |o| { + const buffer = buffer: { + if (o.header.len != 0) break :buffer o.header; + for (o.data[0 .. o.data.len - 1]) |buffer| { + if (buffer.len > 0) break :buffer buffer; + } + if (o.splat > 0) break :buffer o.data[o.data.len - 1]; + break :result .{ .file_write_streaming = 0 }; + }; + const fd = o.file.handle; + storage.* = .{ .pending = .{ + .node = .{ .prev = batch.pending.tail, .next = .none }, + .tag = .file_write_streaming, + .userdata = undefined, + } }; + thread.enqueue().* = .{ + .opcode = .WRITE, + .flags = 0, + .ioprio = 0, + .fd = fd, + .off = std.math.maxInt(u64), + .addr = @intFromPtr(buffer.ptr), + .len = @min(buffer.len, 0xfffff000), + .rw_flags = 0, + .user_data = @intFromPtr(&storage.pending.userdata) | 0b10, + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + break :result null; + }, + .device_io_control => |o| if (concurrency) + return error.ConcurrencyUnavailable + else + .{ .device_io_control = try ev.deviceIoControl(try maybe_sync.enterSync(ev), o) }, + })) |result| { + switch (batch.completed.tail) { + .none => batch.completed.head = index, + else => |tail_index| batch.storage[tail_index.toIndex()].completion.node.next = index, + } + batch.completed.tail = index; + storage.* = .{ .completion = .{ .node = .{ .next = .none }, .result = result } }; + } else { + switch (batch.pending.tail) { + .none => batch.pending.head = index, + else => |tail_index| batch.storage[tail_index.toIndex()].pending.node.next = index, + } + batch.pending.tail = index; + storage.pending.userdata[0] = @intFromPtr(batch); + } + index = next_index; + } + batch.submitted = .{ .head = .none, .tail = .none }; +} + +fn batchDrainReady(batch: *Io.Batch) Io.Timeout.Error!void { + while (@atomicRmw(?*anyopaque, &batch.userdata, .Xchg, null, .acquire)) |head| { + var next: usize = @intFromPtr(head); + var timeout = false; + while (cond: switch (@as(u2, @truncate(next))) { + 0b00 => if (timeout) return error.Timeout else false, + 0b01 => { + assert(!timeout); + return error.Timeout; + }, + 0b10 => true, + 0b11 => { + assert(!timeout); + timeout = true; + break :cond true; + }, + }) { + var operation_userdata: *Io.Operation.Storage.Pending.Userdata = + @ptrFromInt(next & ~@as(usize, 0b11)); + next = operation_userdata[0]; + const completion: Completion = .{ + .result = @bitCast(@as(u32, @intCast(operation_userdata[1]))), + .flags = @intCast(operation_userdata[2]), + }; + const pending: *Io.Operation.Storage.Pending = + @fieldParentPtr("userdata", operation_userdata); + const storage: *Io.Operation.Storage = @fieldParentPtr("pending", pending); + const index: Io.Operation.OptionalIndex = .fromIndex(storage - batch.storage.ptr); + assert(completion.flags & linux.IORING_CQE_F_SKIP == 0); + switch (pending.node.prev) { + .none => batch.pending.head = pending.node.next, + else => |prev_index| batch.storage[prev_index.toIndex()].pending.node.next = + pending.node.next, + } + switch (pending.node.next) { + .none => batch.pending.tail = pending.node.prev, + else => |prev_index| batch.storage[prev_index.toIndex()].pending.node.prev = + pending.node.prev, + } + if (@as(?Io.Operation.Result, result: switch (pending.tag) { + .file_read_streaming => .{ + .file_read_streaming = switch (completion.errno()) { + .SUCCESS => @as(u32, @bitCast(completion.result)), + .INTR => 0, + .CANCELED => break :result null, + .INVAL => |err| errnoBug(err), + .FAULT => |err| errnoBug(err), + .AGAIN => error.WouldBlock, + .BADF => |err| errnoBug(err), // File descriptor used after closed + .IO => error.InputOutput, + .ISDIR => error.IsDir, + .NOBUFS => error.SystemResources, + .NOMEM => error.SystemResources, + .NOTCONN => error.SocketUnconnected, + .CONNRESET => error.ConnectionResetByPeer, + else => |err| unexpectedErrno(err), + }, + }, + .file_write_streaming => .{ + .file_write_streaming = switch (completion.errno()) { + .SUCCESS => @as(u32, @bitCast(completion.result)), + .INTR => 0, + .CANCELED => break :result null, + .INVAL => |err| errnoBug(err), + .FAULT => |err| errnoBug(err), + .AGAIN => error.WouldBlock, + .BADF => error.NotOpenForWriting, // Can be a race condition. + .DESTADDRREQ => |err| errnoBug(err), // `connect` was never called. + .DQUOT => error.DiskQuota, + .FBIG => error.FileTooBig, + .IO => error.InputOutput, + .NOSPC => error.NoSpaceLeft, + .PERM => error.PermissionDenied, + .PIPE => error.BrokenPipe, + .CONNRESET => |err| errnoBug(err), // Not a socket handle. + .BUSY => error.DeviceBusy, + else => |err| unexpectedErrno(err), + }, + }, + .device_io_control => unreachable, + })) |result| { + switch (batch.completed.tail) { + .none => batch.completed.head = index, + else => |tail_index| batch.storage[tail_index.toIndex()].completion.node.next = + index, + } + storage.* = .{ .completion = .{ .node = .{ .next = .none }, .result = result } }; + batch.completed.tail = index; + } else { + switch (batch.unused.tail) { + .none => batch.unused.head = index, + else => |tail_index| batch.storage[tail_index.toIndex()].unused.next = index, + } + storage.* = .{ .unused = .{ .prev = batch.unused.tail, .next = .none } }; + batch.unused.tail = index; + } + } + } +} + +fn batchCancel(userdata: ?*anyopaque, batch: *Io.Batch) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + batchDrainReady(batch) catch |err| switch (err) { + error.Timeout => unreachable, // no timeout + }; + var index = batch.pending.head; + if (index == .none) return; + var cancel_region: CancelRegion = .initBlocked(); + defer cancel_region.deinit(); + const thread = cancel_region.awaitIoUring() catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; + while (index != .none) { + const pending = &batch.storage[index.toIndex()].pending; + thread.enqueue().* = .{ + .opcode = .ASYNC_CANCEL, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = @intFromPtr(&pending.userdata) | 0b10, + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.Userdata.wakeup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + index = pending.node.next; + } + while (batch.pending.head != .none) batchDrainReady(batch) catch |err| switch (err) { + error.Timeout => unreachable, // no timeout + }; +} + +fn dirCreateDir( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + permissions: Dir.Permissions, +) Dir.CreateDirError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .MKDIRAT, + .flags = 0, + .ioprio = 0, + .fd = dir.handle, + .off = 0, + .addr = @intFromPtr(sub_path_posix.ptr), + .len = permissions.toMode(), + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .ACCES => return error.AccessDenied, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .PERM => return error.PermissionDenied, + .DQUOT => return error.DiskQuota, + .EXIST => return error.PathAlreadyExists, + .FAULT => |err| return errnoBug(err), + .LOOP => return error.SymLinkLoop, + .MLINK => return error.LinkQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .NOTDIR => return error.NotDir, + .ROFS => return error.ReadOnlyFileSystem, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirCreateDirPath( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + permissions: Dir.Permissions, +) Dir.CreateDirPathError!Dir.CreatePathStatus { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var it = Dir.path.componentIterator(sub_path); + var status: Dir.CreatePathStatus = .existed; + var component = it.last() orelse return error.BadPathName; + while (true) { + if (dirCreateDir(ev, dir, component.path, permissions)) |_| { + status = .created; + } else |err| switch (err) { + error.PathAlreadyExists => { + // stat the file and return an error if it's not a directory + // this is important because otherwise a dangling symlink + // could cause an infinite loop + const kind = try ev.filePathKind(dir, component.path); + if (kind != .directory) return error.NotDir; + }, + error.FileNotFound => |e| { + component = it.previous() orelse return e; + continue; + }, + else => |e| return e, + } + component = it.next() orelse return status; + } +} + +fn filePathKind(ev: *Evented, dir: Dir, sub_path: []const u8) !File.Kind { + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + var statx_buf = std.mem.zeroes(linux.Statx); + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .STATX, + .flags = 0, + .ioprio = 0, + .fd = dir.handle, + .off = @intFromPtr(&statx_buf), + .addr = @intFromPtr(sub_path_posix.ptr), + .len = @bitCast(linux.STATX{ .TYPE = true }), + .rw_flags = linux.AT.NO_AUTOMOUNT | linux.AT.SYMLINK_NOFOLLOW, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => { + if (!statx_buf.mask.TYPE) return error.Unexpected; + return statxKind(statx_buf.mode); + }, + .INTR, .CANCELED => continue, + .ACCES => |err| return errnoBug(err), + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .LOOP => |err| return errnoBug(err), + .NAMETOOLONG => |err| return errnoBug(err), + .NOENT => |err| return errnoBug(err), + .NOMEM => return error.SystemResources, + .NOTDIR => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirCreateDirPathOpen( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + permissions: Dir.Permissions, + options: Dir.OpenOptions, +) Dir.CreateDirPathOpenError!Dir { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return dirOpenDir(ev, dir, sub_path, options) catch |err| switch (err) { + error.FileNotFound => { + _ = try dirCreateDirPath(ev, dir, sub_path, permissions); + return dirOpenDir(ev, dir, sub_path, options); + }, + else => |e| return e, + }; +} + +fn dirOpenDir( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + options: Dir.OpenOptions, +) Dir.OpenError!Dir { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return .{ + .handle = ev.openat(&cancel_region, dir.handle, sub_path_posix, .{ + .ACCMODE = .RDONLY, + .DIRECTORY = true, + .NOFOLLOW = !options.follow_symlinks, + .CLOEXEC = true, + .PATH = !options.iterate, + }, 0) catch |err| switch (err) { + error.IsDir => return errnoBug(.ISDIR), + error.WouldBlock => return errnoBug(.AGAIN), + error.FileTooBig => return errnoBug(.FBIG), + error.NoSpaceLeft => return errnoBug(.NOSPC), + error.DeviceBusy => return errnoBug(.BUSY), // O_EXCL not passed + error.FileBusy => return errnoBug(.TXTBSY), + error.PathAlreadyExists => return errnoBug(.EXIST), // Not creating. + error.PipeBusy => return error.Unexpected, // Not opening a pipe. + error.AntivirusInterference => unreachable, // Windows-only + error.FileLocksUnsupported => return errnoBug(.OPNOTSUPP), // Not asking for locks. + else => |e| return e, + }, + }; +} + +fn dirStat(userdata: ?*anyopaque, dir: Dir) Dir.StatError!Dir.Stat { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.stat(&cancel_region, dir.handle); +} + +fn dirStatFile( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + options: Dir.StatFileOptions, +) Dir.StatFileError!File.Stat { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.statx(&cancel_region, dir.handle, sub_path_posix, linux.AT.NO_AUTOMOUNT | + @as(u32, if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW)); +} + +fn dirAccess( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + options: Dir.AccessOptions, +) Dir.AccessError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + const mode: u32 = + @as(u32, if (options.read) linux.R_OK else 0) | + @as(u32, if (options.write) linux.W_OK else 0) | + @as(u32, if (options.execute) linux.X_OK else 0); + const flags: u32 = if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW; + + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + while (true) { + try sync.cancel_region.await(.nothing); + switch (linux.errno(linux.faccessat(dir.handle, sub_path_posix, mode, flags))) { + .SUCCESS => return, + .INTR => continue, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + .LOOP => return error.SymLinkLoop, + .TXTBSY => return error.FileBusy, + .NOTDIR => return error.FileNotFound, + .NOENT => return error.FileNotFound, + .NAMETOOLONG => return error.NameTooLong, + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .IO => return error.InputOutput, + .NOMEM => return error.SystemResources, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirCreateFile( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + flags: File.CreateFlags, +) File.OpenError!File { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; + defer maybe_sync.deinit(ev); + const fd = try ev.openat(&maybe_sync.cancel_region, dir.handle, sub_path_posix, .{ + .ACCMODE = if (flags.read) .RDWR else .WRONLY, + .CREAT = true, + .TRUNC = flags.truncate, + .EXCL = flags.exclusive, + .CLOEXEC = true, + }, flags.permissions.toMode()); + errdefer ev.close(fd); + + switch (flags.lock) { + .none => {}, + .shared, .exclusive => try ev.flock( + try maybe_sync.enterSync(ev), + fd, + flags.lock, + if (flags.lock_nonblocking) .nonblocking else .blocking, + ), + } + + return .{ .handle = fd, .flags = .{ .nonblocking = false } }; +} + +fn dirCreateFileAtomic( + userdata: ?*anyopaque, + dir: Dir, + dest_path: []const u8, + options: Dir.CreateFileAtomicOptions, +) Dir.CreateFileAtomicError!File.Atomic { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + // Linux has O_TMPFILE, but linkat() does not support AT_REPLACE, so it's + // useless when we have to make up a bogus path name to do the rename() + // anyway. + if (!options.replace) tmpfile: { + const flags: linux.O = if (@hasField(linux.O, "TMPFILE")) .{ + .ACCMODE = .RDWR, + .TMPFILE = true, + .DIRECTORY = true, + .CLOEXEC = true, + } else if (@hasField(linux.O, "TMPFILE0") and !@hasField(linux.O, "TMPFILE2")) .{ + .ACCMODE = .RDWR, + .TMPFILE0 = true, + .TMPFILE1 = true, + .DIRECTORY = true, + .CLOEXEC = true, + } else break :tmpfile; + + const dest_dirname = Dir.path.dirname(dest_path); + if (dest_dirname) |dirname| { + // This has a nice side effect of preemptively triggering EISDIR or + // ENOENT, avoiding the ambiguity below. + _ = dirCreateDirPath(ev, dir, dirname, .default_dir) catch |err| switch (err) { + // None of these make sense in this context. + error.IsDir, + error.Streaming, + error.DiskQuota, + error.PathAlreadyExists, + error.LinkQuotaExceeded, + error.PipeBusy, + error.FileTooBig, + error.DeviceBusy, + error.FileLocksUnsupported, + error.FileBusy, + => return error.Unexpected, + + else => |e| return e, + }; + } + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(dest_dirname orelse ".", &path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return .{ + .file = .{ + .handle = ev.openat( + &cancel_region, + dir.handle, + sub_path_posix, + flags, + options.permissions.toMode(), + ) catch |err| switch (err) { + error.IsDir, error.FileNotFound => { + // Ambiguous error code. It might mean the file system + // does not support O_TMPFILE. Therefore, we must fall + // back to not using O_TMPFILE. + break :tmpfile; + }, + error.FileTooBig => return errnoBug(.FBIG), + error.DeviceBusy => return errnoBug(.BUSY), // O_EXCL not passed + error.PathAlreadyExists => return errnoBug(.EXIST), // Not creating. + error.PipeBusy => return error.Unexpected, // Not opening a pipe. + error.AntivirusInterference => unreachable, // Windows-only + error.FileLocksUnsupported => return errnoBug(.OPNOTSUPP), // Not asking for locks. + else => |e| return e, + }, + .flags = .{ .nonblocking = false }, + }, + .file_basename_hex = 0, + .dest_sub_path = dest_path, + .file_open = true, + .file_exists = false, + .close_dir_on_deinit = false, + .dir = dir, + }; + } + + if (Dir.path.dirname(dest_path)) |dirname| { + const new_dir = if (options.make_path) + dirCreateDirPathOpen(ev, dir, dirname, .default_dir, .{}) catch |err| switch (err) { + // None of these make sense in this context. + error.IsDir, + error.Streaming, + error.DiskQuota, + error.PathAlreadyExists, + error.LinkQuotaExceeded, + error.PipeBusy, + error.FileTooBig, + error.FileLocksUnsupported, + error.DeviceBusy, + => return error.Unexpected, + + else => |e| return e, + } + else + try dirOpenDir(ev, dir, dirname, .{}); + + return ev.atomicFileInit(Dir.path.basename(dest_path), options.permissions, new_dir, true); + } + + return ev.atomicFileInit(dest_path, options.permissions, dir, false); +} + +fn atomicFileInit( + ev: *Evented, + dest_basename: []const u8, + permissions: File.Permissions, + dir: Dir, + close_dir_on_deinit: bool, +) Dir.CreateFileAtomicError!File.Atomic { + while (true) { + var random_integer: u64 = undefined; + random(ev, @ptrCast(&random_integer)); + const tmp_sub_path = std.fmt.hex(random_integer); + const file = dirCreateFile(ev, dir, &tmp_sub_path, .{ + .permissions = permissions, + .exclusive = true, + }) catch |err| switch (err) { + error.PathAlreadyExists => continue, + error.DeviceBusy => continue, + error.FileBusy => continue, + + error.IsDir => return error.Unexpected, // No path components. + error.FileTooBig => return error.Unexpected, // Creating, not opening. + error.FileLocksUnsupported => return error.Unexpected, // Not asking for locks. + error.PipeBusy => return error.Unexpected, // Not opening a pipe. + + else => |e| return e, + }; + return .{ + .file = file, + .file_basename_hex = random_integer, + .dest_sub_path = dest_basename, + .file_open = true, + .file_exists = true, + .close_dir_on_deinit = close_dir_on_deinit, + .dir = dir, + }; + } +} + +fn dirOpenFile( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + flags: File.OpenFlags, +) File.OpenError!File { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; + defer maybe_sync.deinit(ev); + const fd = try ev.openat(&maybe_sync.cancel_region, dir.handle, sub_path_posix, .{ + .ACCMODE = switch (flags.mode) { + .read_only => .RDONLY, + .write_only => .WRONLY, + .read_write => .RDWR, + }, + .NOCTTY = !flags.allow_ctty, + .NOFOLLOW = !flags.follow_symlinks, + .CLOEXEC = true, + .PATH = flags.path_only, + }, 0); + errdefer ev.close(fd); + + if (!flags.allow_directory) { + const is_dir = is_dir: { + const s = ev.stat(&maybe_sync.cancel_region, fd) catch |err| switch (err) { + // The directory-ness is either unknown or unknowable + error.Streaming => break :is_dir false, + else => |e| return e, + }; + break :is_dir s.kind == .directory; + }; + if (is_dir) return error.IsDir; + } + + switch (flags.lock) { + .none => {}, + .shared, .exclusive => try ev.flock( + try maybe_sync.enterSync(ev), + fd, + flags.lock, + if (flags.lock_nonblocking) .nonblocking else .blocking, + ), + } + + return .{ .handle = fd, .flags = .{ .nonblocking = false } }; +} + +fn dirClose(userdata: ?*anyopaque, dirs: []const Dir) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + for (dirs) |dir| ev.close(dir.handle); +} + +fn dirRead(userdata: ?*anyopaque, dr: *Dir.Reader, buffer: []Dir.Entry) Dir.Reader.Error!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var buffer_index: usize = 0; + while (buffer.len - buffer_index != 0) { + if (dr.end - dr.index == 0) { + // Refill the buffer, unless we've already created references to + // buffered data. + if (buffer_index != 0) break; + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + if (dr.state == .reset) { + ev.lseek(&sync, dr.dir.handle, 0, linux.SEEK.SET) catch |err| switch (err) { + error.Unseekable => return error.Unexpected, + else => |e| return e, + }; + dr.state = .reading; + } + const n = while (true) { + try sync.cancel_region.await(.nothing); + const rc = linux.getdents64(dr.dir.handle, dr.buffer.ptr, dr.buffer.len); + switch (linux.errno(rc)) { + .SUCCESS => break rc, + .INTR => continue, + .BADF => |err| return errnoBug(err), // Dir is invalid or was opened without iteration ability. + .FAULT => |err| return errnoBug(err), + .NOTDIR => |err| return errnoBug(err), + // To be consistent across platforms, iteration + // ends if the directory being iterated is deleted + // during iteration. This matches the behavior of + // non-Linux, non-WASI UNIX platforms. + .NOENT => { + dr.state = .finished; + return 0; + }, + // This can occur when reading /proc/$PID/net, or + // if the provided buffer is too small. Neither + // scenario is intended to be handled by this API. + .INVAL => return error.Unexpected, + .ACCES => return error.AccessDenied, // Lacking permission to iterate this directory. + else => |err| return unexpectedErrno(err), + } + }; + if (n == 0) { + dr.state = .finished; + return 0; + } + dr.index = 0; + dr.end = n; + } + // Linux aligns the header by padding after the null byte of the name + // to align the next entry. This means we can find the end of the name + // by looking at only the 8 bytes before the next record. However since + // file names are usually short it's better to keep the machine code + // simpler. + // + // Furthermore, I observed qemu user mode to not align this struct, so + // this code makes the conservative choice to not assume alignment. + const linux_entry: *align(1) linux.dirent64 = @ptrCast(&dr.buffer[dr.index]); + const next_index = dr.index + linux_entry.reclen; + dr.index = next_index; + const name_ptr: [*]u8 = &linux_entry.name; + const padded_name = name_ptr[0 .. linux_entry.reclen - @offsetOf(linux.dirent64, "name")]; + const name_len = std.mem.findScalar(u8, padded_name, 0).?; + const name = name_ptr[0..name_len :0]; + + if (std.mem.eql(u8, name, ".") or std.mem.eql(u8, name, "..")) continue; + + const entry_kind: File.Kind = switch (linux_entry.type) { + linux.DT.BLK => .block_device, + linux.DT.CHR => .character_device, + linux.DT.DIR => .directory, + linux.DT.FIFO => .named_pipe, + linux.DT.LNK => .sym_link, + linux.DT.REG => .file, + linux.DT.SOCK => .unix_domain_socket, + else => .unknown, + }; + buffer[buffer_index] = .{ + .name = name, + .kind = entry_kind, + .inode = linux_entry.ino, + }; + buffer_index += 1; + } + return buffer_index; +} + +fn dirRealPath(userdata: ?*anyopaque, dir: Dir, out_buffer: []u8) Dir.RealPathError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + return ev.realPath(&sync, dir.handle, out_buffer); +} + +fn dirRealPathFile( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + out_buffer: []u8, +) Dir.RealPathFileError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; + defer maybe_sync.deinit(ev); + const fd = ev.openat(&maybe_sync.cancel_region, dir.handle, sub_path_posix, .{ + .CLOEXEC = true, + .PATH = true, + }, 0) catch |err| switch (err) { + error.WouldBlock => return errnoBug(.AGAIN), + error.FileLocksUnsupported => return errnoBug(.OPNOTSUPP), // Not asking for locks. + else => |e| return e, + }; + defer ev.close(fd); + return ev.realPath(try maybe_sync.enterSync(ev), fd, out_buffer); +} + +fn dirDeleteFile(userdata: ?*anyopaque, dir: Dir, sub_path: []const u8) Dir.DeleteFileError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .UNLINKAT, + .flags = 0, + .ioprio = 0, + .fd = dir.handle, + .off = 0, + .addr = @intFromPtr(sub_path_posix.ptr), + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .PERM => return error.PermissionDenied, + .ACCES => return error.AccessDenied, + .BUSY => return error.FileBusy, + .FAULT => |err| return errnoBug(err), + .IO => return error.FileSystem, + .ISDIR => return error.IsDir, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + .ROFS => return error.ReadOnlyFileSystem, + .EXIST => |err| return errnoBug(err), + .NOTEMPTY => |err| return errnoBug(err), // Not passing AT.REMOVEDIR + .ILSEQ => return error.BadPathName, + .INVAL => |err| return errnoBug(err), // invalid flags, or pathname has . as last component + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirDeleteDir(userdata: ?*anyopaque, dir: Dir, sub_path: []const u8) Dir.DeleteDirError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .UNLINKAT, + .flags = 0, + .ioprio = 0, + .fd = dir.handle, + .off = 0, + .addr = @intFromPtr(sub_path_posix.ptr), + .len = 0, + .rw_flags = linux.AT.REMOVEDIR, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .BUSY => return error.FileBusy, + .FAULT => |err| return errnoBug(err), + .IO => return error.FileSystem, + .ISDIR => |err| return errnoBug(err), + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + .ROFS => return error.ReadOnlyFileSystem, + .EXIST => |err| return errnoBug(err), + .NOTEMPTY => return error.DirNotEmpty, + .ILSEQ => return error.BadPathName, + .INVAL => |err| return errnoBug(err), // invalid flags, or pathname has . as last component + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirRename( + userdata: ?*anyopaque, + old_dir: Dir, + old_sub_path: []const u8, + new_dir: Dir, + new_sub_path: []const u8, +) Dir.RenameError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var old_path_buffer: [PATH_MAX]u8 = undefined; + var new_path_buffer: [PATH_MAX]u8 = undefined; + + const old_sub_path_posix = try pathToPosix(old_sub_path, &old_path_buffer); + const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.renameat( + &cancel_region, + old_dir.handle, + old_sub_path_posix, + new_dir.handle, + new_sub_path_posix, + .{}, + ); +} + +fn dirRenamePreserve( + userdata: ?*anyopaque, + old_dir: Dir, + old_sub_path: []const u8, + new_dir: Dir, + new_sub_path: []const u8, +) Dir.RenamePreserveError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var old_path_buffer: [PATH_MAX]u8 = undefined; + var new_path_buffer: [PATH_MAX]u8 = undefined; + + const old_sub_path_posix = try pathToPosix(old_sub_path, &old_path_buffer); + const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.renameat( + &cancel_region, + old_dir.handle, + old_sub_path_posix, + new_dir.handle, + new_sub_path_posix, + .{ .NOREPLACE = true }, + ); +} + +fn dirSymLink( + userdata: ?*anyopaque, + dir: Dir, + target_path: []const u8, + sym_link_path: []const u8, + flags: Dir.SymLinkFlags, +) Dir.SymLinkError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = flags; + + var target_path_buffer: [PATH_MAX]u8 = undefined; + var sym_link_path_buffer: [PATH_MAX]u8 = undefined; + + const target_path_posix = try pathToPosix(target_path, &target_path_buffer); + const sym_link_path_posix = try pathToPosix(sym_link_path, &sym_link_path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .SYMLINKAT, + .flags = 0, + .ioprio = 0, + .fd = dir.handle, + .off = @intFromPtr(sym_link_path_posix.ptr), + .addr = @intFromPtr(target_path_posix.ptr), + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .DQUOT => return error.DiskQuota, + .EXIST => return error.PathAlreadyExists, + .IO => return error.FileSystem, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .ROFS => return error.ReadOnlyFileSystem, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirReadLink( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + buffer: []u8, +) Dir.ReadLinkError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var sub_path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &sub_path_buffer); + + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + while (true) { + try sync.cancel_region.await(.nothing); + const rc = linux.readlinkat(dir.handle, sub_path_posix, buffer.ptr, buffer.len); + switch (linux.errno(rc)) { + .SUCCESS => return @bitCast(rc), + .INTR => continue, + .ACCES => return error.AccessDenied, + .FAULT => |err| return errnoBug(err), + .INVAL => return error.NotLink, + .IO => return error.FileSystem, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.NotDir, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirSetOwner( + userdata: ?*anyopaque, + dir: Dir, + owner: ?File.Uid, + group: ?File.Gid, +) Dir.SetOwnerError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + try ev.fchownat( + &sync, + dir.handle, + "", + owner orelse std.math.maxInt(linux.uid_t), + group orelse std.math.maxInt(linux.gid_t), + linux.AT.EMPTY_PATH, + ); +} + +fn dirSetFileOwner( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + owner: ?File.Uid, + group: ?File.Gid, + options: Dir.SetFileOwnerOptions, +) Dir.SetFileOwnerError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + try ev.fchownat( + &sync, + dir.handle, + sub_path_posix, + owner orelse std.math.maxInt(linux.uid_t), + group orelse std.math.maxInt(linux.gid_t), + if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW, + ); +} + +fn dirSetPermissions( + userdata: ?*anyopaque, + dir: Dir, + permissions: Dir.Permissions, +) Dir.SetPermissionsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + ev.fchmodat( + &sync, + dir.handle, + "", + permissions.toMode(), + linux.AT.EMPTY_PATH, + ) catch |err| switch (err) { + error.NameTooLong => return errnoBug(.NAMETOOLONG), + error.BadPathName => return errnoBug(.ILSEQ), + error.ProcessFdQuotaExceeded => return errnoBug(.MFILE), + error.SystemFdQuotaExceeded => return errnoBug(.NFILE), + error.OperationUnsupported => return errnoBug(.OPNOTSUPP), + else => |e| return e, + }; +} + +fn dirSetFilePermissions( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + permissions: Dir.Permissions, + options: Dir.SetFilePermissionsOptions, +) Dir.SetFilePermissionsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + try ev.fchmodat( + &sync, + dir.handle, + sub_path_posix, + permissions.toMode(), + if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW, + ); +} + +fn dirSetTimestamps( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + options: Dir.SetTimestampsOptions, +) Dir.SetTimestampsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + var cancel_region: CancelRegion.Sync = try .init(ev); + defer cancel_region.deinit(ev); + try ev.utimensat( + &cancel_region, + dir.handle, + sub_path_posix, + if (options.modify_timestamp != .now or options.access_timestamp != .now) &.{ + setTimestampToPosix(options.access_timestamp), + setTimestampToPosix(options.modify_timestamp), + } else null, + if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW, + ); +} + +fn dirHardLink( + userdata: ?*anyopaque, + old_dir: Dir, + old_sub_path: []const u8, + new_dir: Dir, + new_sub_path: []const u8, + options: Dir.HardLinkOptions, +) Dir.HardLinkError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var old_path_buffer: [PATH_MAX]u8 = undefined; + var new_path_buffer: [PATH_MAX]u8 = undefined; + + const old_sub_path_posix = try pathToPosix(old_sub_path, &old_path_buffer); + const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.linkat( + &cancel_region, + old_dir.handle, + old_sub_path_posix, + new_dir.handle, + new_sub_path_posix, + if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW, + ); +} + +fn fileStat(userdata: ?*anyopaque, file: File) File.StatError!File.Stat { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.stat(&cancel_region, file.handle); +} + +fn fileLength(userdata: ?*anyopaque, file: File) File.LengthError!u64 { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + var statx_buf = std.mem.zeroes(linux.Statx); + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .STATX, + .flags = 0, + .ioprio = 0, + .fd = file.handle, + .off = @intFromPtr(&statx_buf), + .addr = @intFromPtr(""), + .len = @bitCast(linux.STATX{ .SIZE = true }), + .rw_flags = linux.AT.EMPTY_PATH, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => { + if (!statx_buf.mask.SIZE) return error.Unexpected; + return statx_buf.size; + }, + .INTR, .CANCELED => continue, + .ACCES => |err| return errnoBug(err), + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .LOOP => |err| return errnoBug(err), + .NAMETOOLONG => |err| return errnoBug(err), + .NOENT => |err| return errnoBug(err), + .NOMEM => return error.SystemResources, + .NOTDIR => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn fileClose(userdata: ?*anyopaque, files: []const File) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + for (files) |file| ev.close(file.handle); +} + +fn fileWritePositional( + userdata: ?*anyopaque, + file: File, + header: []const u8, + data: []const []const u8, + splat: usize, + offset: u64, +) File.WritePositionalError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var iovecs: [max_iovecs_len]iovec_const = undefined; + var iovlen: iovlen_t = 0; + addBuf(&iovecs, &iovlen, header); + for (data[0 .. data.len - 1]) |bytes| addBuf(&iovecs, &iovlen, bytes); + const pattern = data[data.len - 1]; + var backup_buffer: [splat_buffer_size]u8 = undefined; + if (iovecs.len - iovlen != 0) switch (splat) { + 0 => {}, + 1 => addBuf(&iovecs, &iovlen, pattern), + else => switch (pattern.len) { + 0 => {}, + 1 => { + const splat_buffer = &backup_buffer; + const memset_len = @min(splat_buffer.len, splat); + const buf = splat_buffer[0..memset_len]; + @memset(buf, pattern[0]); + addBuf(&iovecs, &iovlen, buf); + var remaining_splat = splat - buf.len; + while (remaining_splat > splat_buffer.len and iovecs.len - iovlen != 0) { + assert(buf.len == splat_buffer.len); + addBuf(&iovecs, &iovlen, splat_buffer); + remaining_splat -= splat_buffer.len; + } + addBuf(&iovecs, &iovlen, splat_buffer[0..@min(remaining_splat, splat_buffer.len)]); + }, + else => for (0..@min(splat, iovecs.len - iovlen)) |_| { + addBuf(&iovecs, &iovlen, pattern); + }, + }, + }; + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.pwritev(&cancel_region, file.handle, iovecs[0..iovlen], offset); +} + +/// This is either usize or u32. Since, either is fine, let's use the same +/// `addBuf` function for both writing to a file and sending network messages. +const iovlen_t = @FieldType(linux.msghdr_const, "iovlen"); + +fn addBuf(v: []iovec_const, i: *iovlen_t, bytes: []const u8) void { + // OS checks ptr addr before length so zero length vectors must be omitted. + if (bytes.len == 0) return; + if (v.len - i.* == 0) return; + v[i.*] = .{ .base = bytes.ptr, .len = bytes.len }; + i.* += 1; +} + +fn fileWriteFileStreaming( + userdata: ?*anyopaque, + file: File, + header: []const u8, + file_reader: *File.Reader, + limit: Io.Limit, +) File.Writer.WriteFileError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = file; + _ = header; + _ = file_reader; + _ = limit; + return error.Unimplemented; +} + +fn fileWriteFilePositional( + userdata: ?*anyopaque, + file: File, + header: []const u8, + file_reader: *File.Reader, + limit: Io.Limit, + offset: u64, +) File.WriteFilePositionalError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = file; + _ = header; + _ = file_reader; + _ = limit; + _ = offset; + return error.Unimplemented; +} + +fn fileReadPositional( + userdata: ?*anyopaque, + file: File, + data: []const []u8, + offset: u64, +) File.ReadPositionalError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var iovecs_buffer: [max_iovecs_len]iovec = undefined; + var i: usize = 0; + for (data) |buf| { + if (iovecs_buffer.len - i == 0) break; + if (buf.len > 0) { + iovecs_buffer[i] = .{ .base = buf.ptr, .len = buf.len }; + i += 1; + } + } + if (i == 0) return 0; + const dest = iovecs_buffer[0..i]; + assert(dest[0].len > 0); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.preadv(&cancel_region, file.handle, dest, offset) catch |err| switch (err) { + error.SocketUnconnected => return errnoBug(.NOTCONN), // not a socket + error.ConnectionResetByPeer => return errnoBug(.CONNRESET), // not a socket + else => |e| return e, + }; +} + +fn fileSeekBy(userdata: ?*anyopaque, file: File, offset: i64) File.SeekError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + try ev.lseek(&sync, file.handle, @bitCast(offset), linux.SEEK.CUR); +} + +fn fileSeekTo(userdata: ?*anyopaque, file: File, offset: u64) File.SeekError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + try ev.lseek(&sync, file.handle, offset, linux.SEEK.SET); +} + +fn fileSync(userdata: ?*anyopaque, file: File) File.SyncError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .FSYNC, + .flags = 0, + .ioprio = 0, + .fd = file.handle, + .off = 0, + .addr = 0, + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .BADF => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ROFS => |err| return errnoBug(err), + .IO => return error.InputOutput, + .NOSPC => return error.NoSpaceLeft, + .DQUOT => return error.DiskQuota, + else => |err| return unexpectedErrno(err), + } + } +} + +fn fileIsTty(userdata: ?*anyopaque, file: File) Io.Cancelable!bool { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + while (true) { + try sync.cancel_region.await(.nothing); + var wsz: winsize = undefined; + const rc = linux.ioctl(file.handle, linux.T.IOCGWINSZ, @intFromPtr(&wsz)); + switch (linux.errno(rc)) { + .SUCCESS => return true, + .INTR => continue, + else => return false, + } + } +} + +fn fileEnableAnsiEscapeCodes(userdata: ?*anyopaque, file: File) File.EnableAnsiEscapeCodesError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (!try fileIsTty(ev, file)) return error.NotTerminalDevice; +} + +fn fileSetLength(userdata: ?*anyopaque, file: File, length: u64) File.SetLengthError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .FTRUNCATE, + .flags = 0, + .ioprio = 0, + .fd = file.handle, + .off = length, + .addr = 0, + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .FBIG => return error.FileTooBig, + .IO => return error.InputOutput, + .PERM => return error.PermissionDenied, + .TXTBSY => return error.FileBusy, + .BADF => |err| return errnoBug(err), // Handle not open for writing. + .INVAL => return error.NonResizable, // This is returned for /dev/null for example. + else => |err| return unexpectedErrno(err), + } + } +} + +fn fileSetOwner( + userdata: ?*anyopaque, + file: File, + owner: ?File.Uid, + group: ?File.Gid, +) File.SetOwnerError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + try ev.fchownat( + &sync, + file.handle, + "", + owner orelse std.math.maxInt(linux.uid_t), + group orelse std.math.maxInt(linux.gid_t), + linux.AT.EMPTY_PATH, + ); +} + +fn fileSetPermissions( + userdata: ?*anyopaque, + file: File, + permissions: File.Permissions, +) File.SetPermissionsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + ev.fchmodat( + &sync, + file.handle, + "", + permissions.toMode(), + linux.AT.EMPTY_PATH, + ) catch |err| switch (err) { + error.NameTooLong => return errnoBug(.NAMETOOLONG), + error.BadPathName => return errnoBug(.ILSEQ), + error.ProcessFdQuotaExceeded => return errnoBug(.MFILE), + error.SystemFdQuotaExceeded => return errnoBug(.NFILE), + error.OperationUnsupported => return errnoBug(.OPNOTSUPP), + else => |e| return e, + }; +} + +fn fileSetTimestamps( + userdata: ?*anyopaque, + file: File, + options: File.SetTimestampsOptions, +) File.SetTimestampsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + try ev.utimensat( + &sync, + file.handle, + "", + if (options.modify_timestamp != .now or options.access_timestamp != .now) &.{ + setTimestampToPosix(options.access_timestamp), + setTimestampToPosix(options.modify_timestamp), + } else null, + linux.AT.EMPTY_PATH, + ); +} + +fn fileLock(userdata: ?*anyopaque, file: File, lock: File.Lock) File.LockError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + ev.flock(&sync, file.handle, lock, .blocking) catch |err| switch (err) { + error.WouldBlock => unreachable, // blocking + else => |e| return e, + }; +} + +fn fileTryLock(userdata: ?*anyopaque, file: File, lock: File.Lock) File.LockError!bool { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + ev.flock(&sync, file.handle, lock, switch (lock) { + .none => .blocking, + .shared, .exclusive => .nonblocking, + }) catch |err| switch (err) { + error.WouldBlock => return false, + else => |e| return e, + }; + return true; +} + +fn fileUnlock(userdata: ?*anyopaque, file: File) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = .initBlocked(ev); + defer sync.deinit(ev); + ev.flock(&sync, file.handle, .none, .blocking) catch |err| switch (err) { + error.Canceled => unreachable, // blocked + error.WouldBlock => unreachable, // blocking + error.SystemResources => return recoverableOsBugDetected(), // Resource deallocation. + error.FileLocksUnsupported => return recoverableOsBugDetected(), // We already got the lock. + error.Unexpected => return recoverableOsBugDetected(), // Resource deallocation must succeed. + }; +} + +fn fileDowngradeLock(userdata: ?*anyopaque, file: File) File.DowngradeLockError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + ev.flock(&sync, file.handle, .shared, .nonblocking) catch |err| switch (err) { + error.WouldBlock => return errnoBug(.AGAIN), // File was not locked in exclusive mode. + error.SystemResources => return errnoBug(.NOLCK), // Lock already obtained. + error.FileLocksUnsupported => return errnoBug(.OPNOTSUPP), // Lock already obtained. + else => |e| return e, + }; +} + +fn fileRealPath(userdata: ?*anyopaque, file: File, out_buffer: []u8) File.RealPathError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + return ev.realPath(&sync, file.handle, out_buffer); +} + +fn fileHardLink( + userdata: ?*anyopaque, + file: File, + new_dir: Dir, + new_sub_path: []const u8, + options: File.HardLinkOptions, +) File.HardLinkError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var new_path_buffer: [PATH_MAX]u8 = undefined; + const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.linkat( + &cancel_region, + file.handle, + "", + new_dir.handle, + new_sub_path_posix, + linux.AT.EMPTY_PATH | @as(u32, if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW), + ); +} + +fn fileMemoryMapCreate( + userdata: ?*anyopaque, + file: File, + options: File.MemoryMap.CreateOptions, +) File.MemoryMap.CreateError!File.MemoryMap { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + const prot: linux.PROT = .{ + .READ = options.protection.read, + .WRITE = options.protection.write, + .EXEC = options.protection.execute, + }; + const flags: linux.MAP = .{ + .TYPE = .SHARED_VALIDATE, + .POPULATE = options.populate, + }; + + const page_align = std.heap.page_size_min; + + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + const contents = while (true) { + try sync.cancel_region.await(.nothing); + const casted_offset = std.math.cast(i64, options.offset) orelse return error.Unseekable; + const rc = linux.mmap(null, options.len, prot, flags, file.handle, casted_offset); + switch (linux.errno(rc)) { + .SUCCESS => break @as([*]align(page_align) u8, @ptrFromInt(rc))[0..options.len], + .INTR => continue, + .ACCES => return error.AccessDenied, + .AGAIN => return error.LockedMemoryLimitExceeded, + .MFILE => return error.ProcessFdQuotaExceeded, + .NFILE => return error.SystemFdQuotaExceeded, + .NOMEM => return error.OutOfMemory, + .PERM => return error.PermissionDenied, + .OVERFLOW => return error.Unseekable, + .BADF => |err| return errnoBug(err), // Always a race condition. + .INVAL => |err| return errnoBug(err), // Invalid parameters to mmap() + .OPNOTSUPP => |err| return errnoBug(err), // Bad flags with MAP.SHARED_VALIDATE on Linux. + else => |err| return unexpectedErrno(err), + } + }; + return .{ + .file = file, + .offset = options.offset, + .memory = contents, + .section = {}, + }; +} + +fn fileMemoryMapDestroy(userdata: ?*anyopaque, mm: *File.MemoryMap) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const memory = mm.memory; + if (memory.len == 0) return; + switch (linux.errno(linux.munmap(memory.ptr, memory.len))) { + .SUCCESS => {}, + else => |err| if (builtin.mode == .Debug) + std.log.err("failed to unmap {d} bytes at {*}: {t}", .{ memory.len, memory.ptr, err }), + } + mm.* = undefined; +} + +fn fileMemoryMapSetLength( + userdata: ?*anyopaque, + mm: *File.MemoryMap, + new_len: usize, +) File.MemoryMap.SetLengthError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + const page_size = std.heap.pageSize(); + const alignment: Alignment = .fromByteUnits(page_size); + const page_align = std.heap.page_size_min; + const old_memory = mm.memory; + + if (alignment.forward(new_len) == alignment.forward(old_memory.len)) { + mm.memory.len = new_len; + return; + } + const flags: linux.MREMAP = .{ .MAYMOVE = true }; + const addr_hint: ?[*]const u8 = null; + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + const new_memory = while (true) { + try sync.cancel_region.await(.nothing); + const rc = linux.mremap(old_memory.ptr, old_memory.len, new_len, flags, addr_hint); + switch (linux.errno(rc)) { + .SUCCESS => break @as([*]align(page_align) u8, @ptrFromInt(rc))[0..new_len], + .INTR => continue, + .AGAIN => return error.LockedMemoryLimitExceeded, + .NOMEM => return error.OutOfMemory, + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + }; + mm.memory = new_memory; +} + +fn fileMemoryMapRead(userdata: ?*anyopaque, mm: *File.MemoryMap) File.ReadPositionalError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = mm; +} + +fn fileMemoryMapWrite(userdata: ?*anyopaque, mm: *File.MemoryMap) File.WritePositionalError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = mm; +} + +fn processExecutableOpen( + userdata: ?*anyopaque, + flags: File.OpenFlags, +) process.OpenExecutableError!File { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return dirOpenFile(ev, .{ .handle = linux.AT.FDCWD }, "/proc/self/exe", flags); +} + +fn processExecutablePath(userdata: ?*anyopaque, out_buffer: []u8) process.ExecutablePathError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return dirReadLink(ev, .cwd(), "/proc/self/exe", out_buffer) catch |err| switch (err) { + error.UnsupportedReparsePointType => unreachable, // Windows-only + error.NetworkNotFound => unreachable, // Windows-only + error.FileBusy => unreachable, // Windows-only + else => |e| return e, + }; +} + +fn lockStderr(userdata: ?*anyopaque, terminal_mode: ?Io.Terminal.Mode) Io.Cancelable!Io.LockedStderr { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + ev.stderr_mutex.lockUncancelable(ev_io); + errdefer ev.stderr_mutex.unlock(ev_io); + return ev.initLockedStderr(terminal_mode); +} + +fn tryLockStderr( + userdata: ?*anyopaque, + terminal_mode: ?Io.Terminal.Mode, +) Io.Cancelable!?Io.LockedStderr { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + if (!ev.stderr_mutex.tryLock()) return null; + errdefer ev.stderr_mutex.unlock(ev_io); + return try ev.initLockedStderr(terminal_mode); +} + +fn initLockedStderr(ev: *Evented, terminal_mode: ?Io.Terminal.Mode) Io.Cancelable!Io.LockedStderr { + if (!ev.stderr_writer_initialized) { + const ev_io = ev.io(); + const cancel_protection = swapCancelProtection(ev, .blocked); + defer assert(swapCancelProtection(ev, cancel_protection) == .blocked); + ev.scanEnviron() catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; + const NO_COLOR = ev.environ.exist.NO_COLOR; + const CLICOLOR_FORCE = ev.environ.exist.CLICOLOR_FORCE; + ev.stderr_mode = Io.Terminal.Mode.detect( + ev_io, + ev.stderr_writer.file, + NO_COLOR, + CLICOLOR_FORCE, + ) catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; + ev.stderr_writer_initialized = true; + } + return .{ + .file_writer = &ev.stderr_writer, + .terminal_mode = terminal_mode orelse ev.stderr_mode, + }; +} + +fn unlockStderr(userdata: ?*anyopaque) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (ev.stderr_writer.err == null) ev.stderr_writer.interface.flush() catch {}; + if (ev.stderr_writer.err) |err| { + switch (err) { + error.Canceled => Thread.current().currentFiber().cancel_protection.recancel(), + else => {}, + } + ev.stderr_writer.err = null; + } + ev.stderr_writer.interface.end = 0; + ev.stderr_writer.interface.buffer = &.{}; + ev.stderr_mutex.unlock(ev.io()); +} + +fn processCurrentPath(userdata: ?*anyopaque, buffer: []u8) process.CurrentPathError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + while (true) { + try sync.cancel_region.await(.nothing); + switch (linux.errno(linux.getcwd(buffer.ptr, buffer.len))) { + .SUCCESS => return std.mem.findScalar(u8, buffer, 0).?, + .INTR => continue, + .NOENT => return error.CurrentDirUnlinked, + .RANGE => return error.NameTooLong, + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn processSetCurrentDir(userdata: ?*anyopaque, dir: Dir) process.SetCurrentDirError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (dir.handle == linux.AT.FDCWD) return; + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + return ev.fchdir(&sync, dir.handle); +} + +fn processSetCurrentPath(userdata: ?*anyopaque, dir_path: []const u8) ChdirError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var path_buffer: [PATH_MAX]u8 = undefined; + const dir_path_posix = try pathToPosix(dir_path, &path_buffer); + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + return ev.chdir(&sync, dir_path_posix); +} + +fn processReplace(userdata: ?*anyopaque, options: process.ReplaceOptions) process.ReplaceError { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + try ev.scanEnviron(); // for PATH + const PATH = ev.environ.string.PATH orelse default_PATH; + + var arena_allocator = std.heap.ArenaAllocator.init(ev.allocator()); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + const argv_buf = try arena.allocSentinel(?[*:0]const u8, options.argv.len, null); + for (options.argv, 0..) |arg, i| argv_buf[i] = (try arena.dupeZ(u8, arg)).ptr; + + const env_block = env_block: { + const prog_fd: i32 = -1; + if (options.environ_map) |environ_map| break :env_block try environ_map.createPosixBlock(arena, .{ + .zig_progress_fd = prog_fd, + }); + break :env_block try ev.environ.process_environ.createPosixBlock(arena, .{ + .zig_progress_fd = prog_fd, + }); + }; + + var sync: CancelRegion.Sync = try .init(ev); + defer sync.deinit(ev); + return ev.execv(&sync, options.expand_arg0, argv_buf.ptr[0].?, argv_buf.ptr, env_block, PATH); +} + +fn processReplacePath( + userdata: ?*anyopaque, + dir: Dir, + options: process.ReplaceOptions, +) process.ReplaceError { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = dir; + _ = options; + @panic("TODO processReplacePath"); +} + +fn processSpawn(userdata: ?*anyopaque, options: process.SpawnOptions) process.SpawnError!process.Child { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const spawned = try ev.spawn(options); + var cancel_region: CancelRegion = .initBlocked(); + defer cancel_region.deinit(); + defer ev.close(spawned.err_fd); + + // Wait for the child to report any errors in or before `execvpe`. + var child_err: ForkBailError = undefined; + ev.readAll(&cancel_region, spawned.err_fd, @ptrCast(&child_err)) catch |read_err| { + switch (read_err) { + error.Canceled => unreachable, // blocked + error.EndOfStream => { + // Write end closed by CLOEXEC at the time of the `execvpe` call, + // indicating success. + }, + else => { + // Problem reading the error from the error reporting pipe. We + // don't know if the child is alive or dead. Better to assume it is + // alive so the resource does not risk being leaked. + }, + } + return .{ + .id = spawned.pid, + .thread_handle = {}, + .stdin = spawned.stdin, + .stdout = spawned.stdout, + .stderr = spawned.stderr, + .request_resource_usage_statistics = options.request_resource_usage_statistics, + }; + }; + return child_err; +} + +fn processSpawnPath( + userdata: ?*anyopaque, + dir: Dir, + options: process.SpawnOptions, +) process.SpawnError!process.Child { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = dir; + _ = options; + @panic("TODO processSpawnPath"); +} + +const prog_fileno = @max(linux.STDIN_FILENO, linux.STDOUT_FILENO, linux.STDERR_FILENO); + +const Spawned = struct { + pid: pid_t, + err_fd: fd_t, + stdin: ?File, + stdout: ?File, + stderr: ?File, +}; +fn spawn(ev: *Evented, options: process.SpawnOptions) process.SpawnError!Spawned { + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + + // The child process does need to access (one end of) these pipes. However, + // we must initially set CLOEXEC to avoid a race condition. If another thread + // is racing to spawn a different child process, we don't want it to inherit + // these FDs in any scenario; that would mean that, for instance, calls to + // `poll` from the parent would not report the child's stdout as closing when + // expected, since the other child may retain a reference to the write end of + // the pipe. So, we create the pipes with CLOEXEC initially. After fork, we + // need to do something in the new child to make sure we preserve the reference + // we want. We could use `fcntl` to remove CLOEXEC from the FD, but as it + // turns out, we `dup2` everything anyway, so there's no need! + const pipe_flags: linux.O = .{ .CLOEXEC = true }; + + const stdin_pipe = if (options.stdin == .pipe) try pipe2(pipe_flags) else undefined; + errdefer if (options.stdin == .pipe) { + ev.destroyPipe(stdin_pipe); + }; + + const stdout_pipe = if (options.stdout == .pipe) try pipe2(pipe_flags) else undefined; + errdefer if (options.stdout == .pipe) { + ev.destroyPipe(stdout_pipe); + }; + + const stderr_pipe = if (options.stderr == .pipe) try pipe2(pipe_flags) else undefined; + errdefer if (options.stderr == .pipe) { + ev.destroyPipe(stderr_pipe); + }; + + const any_ignore = + options.stdin == .ignore or options.stdout == .ignore or options.stderr == .ignore; + const dev_null_fd = if (any_ignore) try ev.null_fd.open(ev, &cancel_region, "/dev/null", .{ + .ACCMODE = .RDWR, + }) else undefined; + + const prog_pipe: [2]fd_t = if (options.progress_node.index != .none) pipe: { + // We use CLOEXEC for the same reason as in `pipe_flags`. + const pipe = try pipe2(.{ .NONBLOCK = true, .CLOEXEC = true }); + _ = linux.fcntl(pipe[0], linux.F.SETPIPE_SZ, @as(u32, std.Progress.max_packet_len * 2)); + break :pipe pipe; + } else .{ -1, -1 }; + errdefer ev.destroyPipe(prog_pipe); + + var arena_allocator = std.heap.ArenaAllocator.init(ev.allocator()); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + // The POSIX standard does not allow malloc() between fork() and execve(), + // and this allocator may be a libc allocator. + // I have personally observed the child process deadlocking when it tries + // to call malloc() due to a heap allocation between fork() and execve(), + // in musl v1.1.24. + // Additionally, we want to reduce the number of possible ways things + // can fail between fork() and execve(). + // Therefore, we do all the allocation for the execve() before the fork(). + // This means we must do the null-termination of argv and env vars here. + const argv_buf = try arena.allocSentinel(?[*:0]const u8, options.argv.len, null); + for (options.argv, 0..) |arg, i| argv_buf[i] = (try arena.dupeZ(u8, arg)).ptr; + + const env_block = env_block: { + const prog_fd: i32 = if (prog_pipe[1] == -1) -1 else prog_fileno; + if (options.environ_map) |environ_map| break :env_block try environ_map.createPosixBlock(arena, .{ + .zig_progress_fd = prog_fd, + }); + break :env_block try ev.environ.process_environ.createPosixBlock(arena, .{ + .zig_progress_fd = prog_fd, + }); + }; + + // This pipe communicates to the parent errors in the child between `fork` and `execvpe`. + // It is closed by the child (via CLOEXEC) without writing if `execvpe` succeeds. + const err_pipe: [2]fd_t = try pipe2(.{ .CLOEXEC = true }); + errdefer ev.destroyPipe(err_pipe); + + try ev.scanEnviron(); // for PATH + const PATH = ev.environ.string.PATH orelse default_PATH; + + const pid_result: pid_t = fork: { + const rc = linux.fork(); + switch (linux.errno(rc)) { + .SUCCESS => break :fork @intCast(rc), + .AGAIN => return error.SystemResources, + .NOMEM => return error.SystemResources, + .NOSYS => return error.OperationUnsupported, + else => |err| return unexpectedErrno(err), + } + }; + + if (pid_result == 0) { + defer comptime unreachable; // We are the child. + var sync: CancelRegion.Sync = .{ .cancel_region = .initBlocked() }; + const err = ev.setUpChild(&sync, .{ + .stdin_pipe = stdin_pipe[0], + .stdout_pipe = stdout_pipe[1], + .stderr_pipe = stderr_pipe[1], + .dev_null_fd = dev_null_fd, + .prog_pipe = prog_pipe[1], + .argv_buf = argv_buf, + .env_block = env_block, + .PATH = PATH, + .spawn = options, + }); + ev.writeAll(&sync.cancel_region, err_pipe[1], @ptrCast(&err)) catch {}; + const exit = if (builtin.single_threaded) linux.exit else linux.exit_group; + exit(1); + } + + const pid: pid_t = @intCast(pid_result); // We are the parent. + errdefer comptime unreachable; // The child is forked; we must not error from now on + + ev.close(err_pipe[1]); // make sure only the child holds the write end open + + if (options.stdin == .pipe) ev.close(stdin_pipe[0]); + if (options.stdout == .pipe) ev.close(stdout_pipe[1]); + if (options.stderr == .pipe) ev.close(stderr_pipe[1]); + + if (prog_pipe[1] != -1) ev.close(prog_pipe[1]); + + options.progress_node.setIpcFile(ev, .{ .handle = prog_pipe[0], .flags = .{ .nonblocking = true } }); + + return .{ + .pid = pid, + .err_fd = err_pipe[0], + .stdin = switch (options.stdin) { + .pipe => .{ .handle = stdin_pipe[1], .flags = .{ .nonblocking = false } }, + else => null, + }, + .stdout = switch (options.stdout) { + .pipe => .{ .handle = stdout_pipe[0], .flags = .{ .nonblocking = false } }, + else => null, + }, + .stderr = switch (options.stderr) { + .pipe => .{ .handle = stderr_pipe[0], .flags = .{ .nonblocking = false } }, + else => null, + }, + }; +} + +pub const PipeError = error{ + SystemFdQuotaExceeded, + ProcessFdQuotaExceeded, +} || Io.UnexpectedError; +pub fn pipe2(flags: linux.O) PipeError![2]fd_t { + var fds: [2]fd_t = undefined; + switch (linux.errno(linux.pipe2(&fds, flags))) { + .SUCCESS => return fds, + .INVAL => |err| return errnoBug(err), // Invalid flags + .NFILE => return error.SystemFdQuotaExceeded, + .MFILE => return error.ProcessFdQuotaExceeded, + else => |err| return unexpectedErrno(err), + } +} +fn destroyPipe(ev: *Evented, pipe: [2]fd_t) void { + if (pipe[0] != -1) ev.close(pipe[0]); + if (pipe[0] != pipe[1]) ev.close(pipe[1]); +} + +/// Errors that can occur between fork() and execv() +const ForkBailError = process.SetCurrentDirError || ChdirError || + process.SpawnError || process.ReplaceError; +fn setUpChild(ev: *Evented, sync: *CancelRegion.Sync, options: struct { + stdin_pipe: fd_t, + stdout_pipe: fd_t, + stderr_pipe: fd_t, + dev_null_fd: fd_t, + prog_pipe: fd_t, + argv_buf: [:null]?[*:0]const u8, + env_block: process.Environ.Block, + PATH: []const u8, + spawn: process.SpawnOptions, +}) ForkBailError { + try ev.setUpChildIo( + sync, + options.spawn.stdin, + options.stdin_pipe, + linux.STDIN_FILENO, + options.dev_null_fd, + ); + try ev.setUpChildIo( + sync, + options.spawn.stdout, + options.stdout_pipe, + linux.STDOUT_FILENO, + options.dev_null_fd, + ); + try ev.setUpChildIo( + sync, + options.spawn.stderr, + options.stderr_pipe, + linux.STDERR_FILENO, + options.dev_null_fd, + ); + + switch (options.spawn.cwd) { + .inherit => {}, + .dir => |cwd_dir| try ev.fchdir(sync, cwd_dir.handle), + .path => |cwd_path| { + var cwd_path_buffer: [PATH_MAX]u8 = undefined; + const cwd_path_posix = try pathToPosix(cwd_path, &cwd_path_buffer); + try ev.chdir(sync, cwd_path_posix); + }, + } + + // Must happen after fchdir above, the cwd file descriptor might be + // equal to prog_fileno and be clobbered by this dup2 call. + if (options.prog_pipe != -1) try ev.dup2(sync, options.prog_pipe, prog_fileno); + + if (options.spawn.gid) |gid| { + switch (linux.errno(linux.setregid(gid, gid))) { + .SUCCESS => {}, + .AGAIN => return error.ResourceLimitReached, + .INVAL => return error.InvalidUserId, + .PERM => return error.PermissionDenied, + else => return error.Unexpected, + } + } + + if (options.spawn.uid) |uid| { + switch (linux.errno(linux.setreuid(uid, uid))) { + .SUCCESS => {}, + .AGAIN => return error.ResourceLimitReached, + .INVAL => return error.InvalidUserId, + .PERM => return error.PermissionDenied, + else => return error.Unexpected, + } + } + + if (options.spawn.pgid) |pid| { + switch (linux.errno(linux.setpgid(0, pid))) { + .SUCCESS => {}, + .ACCES => return error.ProcessAlreadyExec, + .INVAL => return error.InvalidProcessGroupId, + .PERM => return error.PermissionDenied, + else => return error.Unexpected, + } + } + + if (options.spawn.start_suspended) { + switch (linux.errno(linux.kill(0, .STOP))) { + .SUCCESS => {}, + .PERM => return error.PermissionDenied, + else => return error.Unexpected, + } + } + + return ev.execv( + sync, + options.spawn.expand_arg0, + options.argv_buf.ptr[0].?, + options.argv_buf.ptr, + options.env_block, + options.PATH, + ); +} + +fn setUpChildIo( + ev: *Evented, + sync: *CancelRegion.Sync, + stdio: process.SpawnOptions.StdIo, + pipe_fd: fd_t, + std_fileno: i32, + dev_null_fd: fd_t, +) !void { + switch (stdio) { + .pipe => try ev.dup2(sync, pipe_fd, std_fileno), + .close => _ = linux.close(std_fileno), + .inherit => {}, + .ignore => try ev.dup2(sync, dev_null_fd, std_fileno), + .file => |file| { + if (file.flags.nonblocking) @panic("TODO implement setUpChildIo when nonblocking file is used"); + try ev.dup2(sync, file.handle, std_fileno); + }, + } +} + +pub const DupError = error{ + ProcessFdQuotaExceeded, + SystemResources, +} || Io.UnexpectedError || Io.Cancelable; +pub fn dup2(ev: *Evented, sync: *CancelRegion.Sync, old_fd: fd_t, new_fd: fd_t) DupError!void { + _ = ev; + while (true) { + try sync.cancel_region.await(.nothing); + switch (linux.errno(linux.dup2(old_fd, new_fd))) { + .SUCCESS => {}, + .BUSY, .INTR => continue, + .INVAL => |err| return errnoBug(err), // invalid parameters + .BADF => |err| return errnoBug(err), // use after free + .MFILE => return error.ProcessFdQuotaExceeded, + .NOMEM => return error.SystemResources, + else => |err| return unexpectedErrno(err), + } + } +} + +fn execv( + ev: *Evented, + sync: *CancelRegion.Sync, + arg0_expand: process.ArgExpansion, + file: [*:0]const u8, + child_argv: [*:null]?[*:0]const u8, + env_block: process.Environ.PosixBlock, + PATH: []const u8, +) process.ReplaceError { + const file_slice = std.mem.sliceTo(file, 0); + if (std.mem.findScalar(u8, file_slice, '/') != null) return ev.execvPath(sync, file, child_argv, env_block); + + // Use of PATH_MAX here is valid as the path_buf will be passed + // directly to the operating system in posixExecvPath. + var path_buf: [PATH_MAX]u8 = undefined; + var it = std.mem.tokenizeScalar(u8, PATH, ':'); + var seen_eacces = false; + var err: process.ReplaceError = error.FileNotFound; + + // In case of expanding arg0 we must put it back if we return with an error. + const prev_arg0 = child_argv[0]; + defer switch (arg0_expand) { + .expand => child_argv[0] = prev_arg0, + .no_expand => {}, + }; + + while (it.next()) |search_path| { + const path_len = search_path.len + file_slice.len + 1; + if (path_buf.len < path_len + 1) return error.NameTooLong; + @memcpy(path_buf[0..search_path.len], search_path); + path_buf[search_path.len] = '/'; + @memcpy(path_buf[search_path.len + 1 ..][0..file_slice.len], file_slice); + path_buf[path_len] = 0; + const full_path = path_buf[0..path_len :0].ptr; + switch (arg0_expand) { + .expand => child_argv[0] = full_path, + .no_expand => {}, + } + err = ev.execvPath(sync, full_path, child_argv, env_block); + switch (err) { + error.AccessDenied => seen_eacces = true, + error.FileNotFound, error.NotDir => {}, + else => |e| return e, + } + } + if (seen_eacces) return error.AccessDenied; + return err; +} +/// This function ignores PATH environment variable. +pub fn execvPath( + ev: *Evented, + sync: *CancelRegion.Sync, + path: [*:0]const u8, + child_argv: [*:null]const ?[*:0]const u8, + env_block: process.Environ.PosixBlock, +) process.ReplaceError { + _ = ev; + try sync.cancel_region.await(.nothing); + switch (linux.errno(linux.execve(path, child_argv, env_block.slice.ptr))) { + .FAULT => |err| return errnoBug(err), // Bad pointer parameter. + .@"2BIG" => return error.SystemResources, + .MFILE => return error.ProcessFdQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NFILE => return error.SystemFdQuotaExceeded, + .NOMEM => return error.SystemResources, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .INVAL => return error.InvalidExe, + .NOEXEC => return error.InvalidExe, + .IO => return error.FileSystem, + .LOOP => return error.FileSystem, + .ISDIR => return error.IsDir, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .TXTBSY => return error.FileBusy, + .LIBBAD => return error.InvalidExe, + else => |err| return unexpectedErrno(err), + } +} + +fn childWait(userdata: ?*anyopaque, child: *process.Child) process.Child.WaitError!process.Child.Term { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; + defer maybe_sync.deinit(ev); + defer ev.childCleanup(child); + + const pid = child.id.?; + var info: linux.siginfo_t = undefined; + while (true) { + const thread = try maybe_sync.cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .WAITID, + .flags = 0, + .ioprio = 0, + .fd = pid, + .off = @intFromPtr(&info), + .addr = 0, + .len = @intFromEnum(linux.P.PID), + .rw_flags = 0, + .user_data = @intFromPtr(maybe_sync.cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = linux.W.EXITED | + @as(i32, if (child.request_resource_usage_statistics) linux.W.NOWAIT else 0), + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (maybe_sync.cancel_region.errno()) { + .SUCCESS => { + if (child.request_resource_usage_statistics) { + const sync = try maybe_sync.enterSync(ev); + while (true) { + try sync.cancel_region.await(.nothing); + var rusage: linux.rusage = undefined; + switch (linux.errno(linux.waitid( + .PID, + pid, + &info, + linux.W.EXITED | linux.W.NOHANG, + &rusage, + ))) { + .SUCCESS => { + child.resource_usage_statistics.rusage = rusage; + break; + }, + .INTR, .CANCELED => continue, + .CHILD => |err| return errnoBug(err), // Double-free. + else => |err| return unexpectedErrno(err), + } + } + } + const status: u32 = @bitCast(info.fields.common.second.sigchld.status); + const code: linux.CLD = @enumFromInt(info.code); + return switch (code) { + .EXITED => .{ .exited = @truncate(status) }, + .KILLED, .DUMPED => .{ .signal = @enumFromInt(status) }, + .TRAPPED, .STOPPED => .{ .stopped = status }, + _, .CONTINUED => .{ .unknown = status }, + }; + }, + .INTR, .CANCELED => continue, + .CHILD => |err| return errnoBug(err), // Double-free. + else => |err| return unexpectedErrno(err), + } + } +} + +fn childKill(userdata: ?*anyopaque, child: *process.Child) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var maybe_sync: CancelRegion.Sync.Maybe = .{ .sync = .initBlocked(ev) }; + defer maybe_sync.deinit(ev); + defer ev.childCleanup(child); + + const pid = child.id.?; + while (true) switch (linux.errno(linux.kill(pid, .TERM))) { + .SUCCESS => break, + .INTR => continue, + .PERM => return, + .INVAL => |err| return errnoBug(err) catch {}, + .SRCH => |err| return errnoBug(err) catch {}, + else => |err| return unexpectedErrno(err) catch {}, + }; + maybe_sync.leaveSync(ev); + + var info: linux.siginfo_t = undefined; + while (true) { + const thread = maybe_sync.cancel_region.awaitIoUring() catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; + thread.enqueue().* = .{ + .opcode = .WAITID, + .flags = 0, + .ioprio = 0, + .fd = pid, + .off = @intFromPtr(&info), + .addr = 0, + .len = @intFromEnum(linux.P.PID), + .rw_flags = 0, + .user_data = @intFromPtr(maybe_sync.cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = linux.W.EXITED, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (maybe_sync.cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .CHILD => |err| return errnoBug(err) catch {}, // Double-free. + else => |err| return unexpectedErrno(err) catch {}, + } + } +} + +fn childCleanup(ev: *Evented, child: *process.Child) void { + if (child.stdin) |*stdin| { + ev.close(stdin.handle); + child.stdin = null; + } + if (child.stdout) |*stdout| { + ev.close(stdout.handle); + child.stdout = null; + } + if (child.stderr) |*stderr| { + ev.close(stderr.handle); + child.stderr = null; + } + child.id = null; +} + +fn progressParentFile(userdata: ?*anyopaque) std.Progress.ParentFileError!File { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const cancel_protection = swapCancelProtection(ev, .blocked); + defer assert(swapCancelProtection(ev, cancel_protection) == .blocked); + ev.scanEnviron() catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; + return ev.environ.zig_progress_file; +} + +fn scanEnviron(ev: *Evented) Io.Cancelable!void { + const ev_io = ev.io(); + try ev.environ_mutex.lock(ev_io); + defer ev.environ_mutex.unlock(ev_io); + if (ev.environ_initialized) return; + ev.environ.scan(ev.allocator()); + ev.environ_initialized = true; +} + +fn clockResolution(userdata: ?*anyopaque, clock: Io.Clock) Io.Clock.ResolutionError!Io.Duration { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const clock_id = clockToPosix(clock); + var timespec: linux.timespec = undefined; + return switch (linux.errno(linux.clock_getres(clock_id, &timespec))) { + .SUCCESS => .fromNanoseconds(nanosecondsFromPosix(&timespec)), + .INVAL => return error.ClockUnavailable, + else => |err| return unexpectedErrno(err), + }; +} + +fn now(userdata: ?*anyopaque, clock: Io.Clock) Io.Timestamp { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + var tp: linux.timespec = undefined; + switch (linux.errno(linux.clock_gettime(clockToPosix(clock), &tp))) { + .SUCCESS => return timestampFromPosix(&tp), + else => return .zero, + } +} + +fn sleep(userdata: ?*anyopaque, timeout: Io.Timeout) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + const timespec: linux.kernel_timespec, const clock: Io.Clock, const timeout_flags: u32 = timespec: switch (timeout) { + .none => .{ + .{ + .sec = std.math.maxInt(i64), + .nsec = std.time.ns_per_s - 1, + }, + .awake, + linux.IORING_TIMEOUT_ABS, + }, + .duration => |duration| { + const ns = duration.raw.toNanoseconds(); + break :timespec .{ + .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + duration.clock, + 0, + }; + }, + .deadline => |deadline| { + const ns = deadline.raw.toNanoseconds(); + break :timespec .{ + .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + deadline.clock, + linux.IORING_TIMEOUT_ABS, + }; + }, + }; + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .TIMEOUT, + .flags = 0, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = @intFromPtr(&timespec), + .len = 1, + .rw_flags = timeout_flags | @as(u32, switch (clock) { + .real => linux.IORING_TIMEOUT_REALTIME, + else => 0, + .boot => linux.IORING_TIMEOUT_BOOTTIME, + }), + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + // Handles SUCCESS as well as clock not available and unexpected + // errors. The user had a chance to check clock resolution before + // getting here, which would have reported 0, making this a legal + // amount of time to sleep. + else => return, + .INTR, .CANCELED => return error.Canceled, + } +} + +fn random(userdata: ?*anyopaque, buffer: []u8) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var thread: *Thread = .current(); + if (!thread.csprng.isInitialized()) { + @branchHint(.unlikely); + var seed: [Csprng.seed_len]u8 = undefined; + { + const ev_io = ev.io(); + ev.csprng_mutex.lockUncancelable(ev_io); + defer ev.csprng_mutex.unlock(ev_io); + if (!ev.csprng.isInitialized()) { + @branchHint(.unlikely); + var cancel_region: CancelRegion = .initBlocked(); + defer cancel_region.deinit(); + ev.urandomReadAll(&cancel_region, &seed) catch |err| switch (err) { + error.Canceled => unreachable, // blocked + else => fallbackSeed(ev, &seed), + }; + ev.csprng.rng = .init(seed); + thread = .current(); + } + ev.csprng.rng.fill(&seed); + } + if (!thread.csprng.isInitialized()) { + @branchHint(.likely); + thread.csprng.rng = .init(seed); + } else thread.csprng.rng.addEntropy(&seed); + } + thread.csprng.rng.fill(buffer); +} + +fn randomSecure(userdata: ?*anyopaque, buffer: []u8) Io.RandomSecureError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (buffer.len == 0) return; + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + ev.urandomReadAll(&cancel_region, buffer) catch |err| switch (err) { + error.Canceled => return error.Canceled, + else => return error.EntropyUnavailable, + }; +} + +fn netListenIpUnavailable( + userdata: ?*anyopaque, + address: net.IpAddress, + options: net.IpAddress.ListenOptions, +) net.IpAddress.ListenError!net.Server { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + _ = options; + return error.NetworkDown; +} + +fn netAcceptUnavailable( + userdata: ?*anyopaque, + listen_handle: net.Socket.Handle, +) net.Server.AcceptError!net.Stream { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = listen_handle; + return error.NetworkDown; +} + +fn netBindIp( + userdata: ?*anyopaque, + address: *const net.IpAddress, + options: net.IpAddress.BindOptions, +) net.IpAddress.BindError!net.Socket { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const family = posixAddressFamily(address); + var maybe_sync: CancelRegion.Sync.Maybe = .{ .cancel_region = .init() }; + defer maybe_sync.deinit(ev); + const socket_fd = try ev.socket(&maybe_sync.cancel_region, family, options); + errdefer ev.close(socket_fd); + var storage: PosixAddress = undefined; + var addr_len = addressToPosix(address, &storage); + try ev.bind(&maybe_sync.cancel_region, socket_fd, &storage.any, addr_len); + try ev.getsockname(try maybe_sync.enterSync(ev), socket_fd, &storage.any, &addr_len); + return .{ + .handle = socket_fd, + .address = addressFromPosix(&storage), + }; +} + +fn netConnectIpUnavailable( + userdata: ?*anyopaque, + address: *const net.IpAddress, + options: net.IpAddress.ConnectOptions, +) net.IpAddress.ConnectError!net.Stream { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + _ = options; + return error.NetworkDown; +} + +fn netListenUnixUnavailable( + userdata: ?*anyopaque, + address: *const net.UnixAddress, + options: net.UnixAddress.ListenOptions, +) net.UnixAddress.ListenError!net.Socket.Handle { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + _ = options; + return error.AddressFamilyUnsupported; +} + +fn netConnectUnixUnavailable( + userdata: ?*anyopaque, + address: *const net.UnixAddress, +) net.UnixAddress.ConnectError!net.Socket.Handle { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + return error.AddressFamilyUnsupported; +} + +fn netSocketCreatePairUnavailable( + userdata: ?*anyopaque, + options: net.Socket.CreatePairOptions, +) net.Socket.CreatePairError![2]net.Socket { + _ = userdata; + _ = options; + return error.OperationUnsupported; +} + +fn netSendUnavailable( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + messages: []net.OutgoingMessage, + flags: net.SendFlags, +) struct { ?net.Socket.SendError, usize } { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = handle; + _ = messages; + _ = flags; + return .{ error.NetworkDown, 0 }; +} + +fn netReceive( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + message_buffer: []net.IncomingMessage, + data_buffer: []u8, + flags: net.ReceiveFlags, + timeout: Io.Timeout, +) struct { ?net.Socket.ReceiveTimeoutError, usize } { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + + var message_i: usize = 0; + var data_i: usize = 0; + + const deadline: ?struct { + raw: Io.Timestamp, + timespec: linux.kernel_timespec, + clock: Io.Clock, + } = if (timeout.toTimestamp(ev_io)) |deadline| deadline: { + const ns = deadline.raw.toNanoseconds(); + break :deadline .{ + .raw = deadline.raw, + .timespec = .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + .clock = deadline.clock, + }; + } else null; + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + if (message_buffer.len - message_i == 0) return .{ null, message_i }; + const message = &message_buffer[message_i]; + const remaining_data_buffer = data_buffer[data_i..]; + var storage: PosixAddress = undefined; + var iov: iovec = .{ .base = remaining_data_buffer.ptr, .len = remaining_data_buffer.len }; + var msg: linux.msghdr = .{ + .name = &storage.any, + .namelen = @sizeOf(PosixAddress), + .iov = (&iov)[0..1], + .iovlen = 1, + .control = message.control.ptr, + .controllen = @intCast(message.control.len), + .flags = undefined, + }; + + const thread = cancel_region.awaitIoUring() catch |err| return .{ err, message_i }; + thread.enqueue().* = .{ + .opcode = .RECVMSG, + .flags = if (deadline) |_| linux.IOSQE_IO_LINK else 0, + .ioprio = 0, + .fd = handle, + .off = 0, + .addr = @intFromPtr(&msg), + .len = 0, + .rw_flags = linux.MSG.NOSIGNAL | + @as(u32, if (flags.oob) linux.MSG.OOB else 0) | + @as(u32, if (flags.peek) linux.MSG.PEEK else 0) | + @as(u32, if (flags.trunc) linux.MSG.TRUNC else 0), + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + if (deadline) |*deadline_ptr| thread.enqueue().* = .{ + .opcode = .LINK_TIMEOUT, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = @intFromPtr(&deadline_ptr.timespec), + .len = 1, + .rw_flags = linux.IORING_TIMEOUT_ABS | @as(u32, switch (deadline_ptr.clock) { + .real => linux.IORING_TIMEOUT_REALTIME, + else => 0, + .boot => linux.IORING_TIMEOUT_BOOTTIME, + }), + .user_data = @intFromEnum(Completion.Userdata.wakeup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + const completion = cancel_region.completion(); + switch (completion.errno()) { + .SUCCESS => { + const data = remaining_data_buffer[0..@intCast(completion.result)]; + data_i += data.len; + message.* = .{ + .from = addressFromPosix(&storage), + .data = data, + .control = if (msg.control) |ptr| @as([*]u8, @ptrCast(ptr))[0..msg.controllen] else message.control, + .flags = .{ + .eor = (msg.flags & linux.MSG.EOR) != 0, + .trunc = (msg.flags & linux.MSG.TRUNC) != 0, + .ctrunc = (msg.flags & linux.MSG.CTRUNC) != 0, + .oob = (msg.flags & linux.MSG.OOB) != 0, + .errqueue = if (@hasDecl(linux.MSG, "ERRQUEUE")) (msg.flags & linux.MSG.ERRQUEUE) != 0 else false, + }, + }; + message_i += 1; + continue; + }, + .AGAIN => unreachable, + .INTR, .CANCELED => { + if (deadline) |d| { + if (now(ev, d.clock).nanoseconds >= d.raw.nanoseconds) return .{ error.Timeout, message_i }; + } + continue; + }, + + .BADF => |err| return .{ errnoBug(err), message_i }, + .NFILE => return .{ error.SystemFdQuotaExceeded, message_i }, + .MFILE => return .{ error.ProcessFdQuotaExceeded, message_i }, + .FAULT => |err| return .{ errnoBug(err), message_i }, + .INVAL => |err| return .{ errnoBug(err), message_i }, + .NOBUFS => return .{ error.SystemResources, message_i }, + .NOMEM => return .{ error.SystemResources, message_i }, + .NOTCONN => return .{ error.SocketUnconnected, message_i }, + .NOTSOCK => |err| return .{ errnoBug(err), message_i }, + .MSGSIZE => return .{ error.MessageOversize, message_i }, + .PIPE => return .{ error.SocketUnconnected, message_i }, + .OPNOTSUPP => |err| return .{ errnoBug(err), message_i }, + .CONNRESET => return .{ error.ConnectionResetByPeer, message_i }, + .NETDOWN => return .{ error.NetworkDown, message_i }, + else => |err| return .{ unexpectedErrno(err), message_i }, + } + } +} + +fn netReadUnavailable( + userdata: ?*anyopaque, + fd: net.Socket.Handle, + data: [][]u8, +) net.Stream.Reader.Error!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = fd; + _ = data; + return error.NetworkDown; +} + +fn netWriteUnavailable( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + header: []const u8, + data: []const []const u8, + splat: usize, +) net.Stream.Writer.Error!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = handle; + _ = header; + _ = data; + _ = splat; + return error.NetworkDown; +} + +fn netWriteFileUnavailable( + userdata: ?*anyopaque, + socket_handle: net.Socket.Handle, + header: []const u8, + file_reader: *File.Reader, + limit: Io.Limit, +) net.Stream.Writer.WriteFileError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = socket_handle; + _ = header; + _ = file_reader; + _ = limit; + return error.NetworkDown; +} + +fn netClose(userdata: ?*anyopaque, handles: []const net.Socket.Handle) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + for (handles) |handle| ev.close(handle); +} + +fn netShutdown( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + how: net.ShutdownHow, +) net.ShutdownError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .SHUTDOWN, + .flags = 0, + .ioprio = 0, + .fd = handle, + .off = 0, + .addr = 0, + .len = switch (how) { + .recv => linux.SHUT.RD, + .send => linux.SHUT.WR, + .both => linux.SHUT.RDWR, + }, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .BADF, .NOTSOCK, .INVAL => |err| return errnoBug(err), + .NOTCONN => return error.SocketUnconnected, + .NOBUFS => return error.SystemResources, + else => |err| return unexpectedErrno(err), + } + } +} + +fn netInterfaceNameResolveUnavailable( + userdata: ?*anyopaque, + name: *const net.Interface.Name, +) net.Interface.Name.ResolveError!net.Interface { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = name; + return error.InterfaceNotFound; +} + +fn netInterfaceNameUnavailable( + userdata: ?*anyopaque, + interface: net.Interface, +) net.Interface.NameError!net.Interface.Name { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = interface; + return error.Unexpected; +} + +fn netLookupUnavailable( + userdata: ?*anyopaque, + host_name: net.HostName, + resolved: *Io.Queue(net.HostName.LookupResult), + options: net.HostName.LookupOptions, +) net.HostName.LookupError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = host_name; + _ = options; + resolved.close(ev.io()); + return error.NetworkDown; +} + +fn bind( + ev: *Evented, + cancel_region: *CancelRegion, + socket_fd: fd_t, + addr: *const linux.sockaddr, + addr_len: linux.socklen_t, +) !void { + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .BIND, + .flags = 0, + .ioprio = 0, + .fd = socket_fd, + .off = addr_len, + .addr = @intFromPtr(addr), + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .ADDRINUSE => return error.AddressInUse, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .INVAL => |err| return errnoBug(err), // invalid parameters + .NOTSOCK => |err| return errnoBug(err), // invalid `sockfd` + .AFNOSUPPORT => return error.AddressFamilyUnsupported, + .ADDRNOTAVAIL => return error.AddressUnavailable, + .FAULT => |err| return errnoBug(err), // invalid `addr` pointer + .NOMEM => return error.SystemResources, + else => |err| return unexpectedErrno(err), + } + } +} + +fn chdir(ev: *Evented, sync: *CancelRegion.Sync, path: [*:0]const u8) ChdirError!void { + _ = ev; + while (true) { + try sync.cancel_region.await(.nothing); + switch (linux.errno(linux.chdir(path))) { + .SUCCESS => return, + .INTR => continue, + .ACCES => return error.AccessDenied, + .IO => return error.FileSystem, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.NotDir, + .ILSEQ => return error.BadPathName, + .FAULT => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn close(ev: *Evented, fd: fd_t) void { + _ = ev; + const thread: *Thread = .current(); + thread.enqueue().* = .{ + .opcode = .CLOSE, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = fd, + .off = 0, + .addr = 0, + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.Userdata.close), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; +} + +fn fchdir(ev: *Evented, sync: *CancelRegion.Sync, dir: fd_t) process.SetCurrentDirError!void { + _ = ev; + if (dir == linux.AT.FDCWD) return; + while (true) { + try sync.cancel_region.await(.nothing); + switch (linux.errno(linux.fchdir(dir))) { + .SUCCESS => return, + .INTR => continue, + .ACCES => return error.AccessDenied, + .NOTDIR => return error.NotDir, + .IO => return error.FileSystem, + .BADF => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn fchmodat( + ev: *Evented, + sync: *CancelRegion.Sync, + dir: fd_t, + path: [*:0]const u8, + mode: linux.mode_t, + flags: u32, +) Dir.SetFilePermissionsError!void { + _ = ev; + while (true) { + try sync.cancel_region.await(.nothing); + switch (linux.errno(linux.fchmodat2(dir, path, mode, flags))) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .IO => return error.InputOutput, + .LOOP => return error.SymLinkLoop, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.FileNotFound, + .OPNOTSUPP => return error.OperationUnsupported, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + else => |err| return unexpectedErrno(err), + } + } +} + +fn fchownat( + ev: *Evented, + sync: *CancelRegion.Sync, + dir: fd_t, + path: [*:0]const u8, + owner: linux.uid_t, + group: linux.gid_t, + flags: u32, +) File.SetOwnerError!void { + _ = ev; + while (true) { + try sync.cancel_region.await(.nothing); + switch (linux.errno(linux.fchownat(dir, path, owner, group, flags))) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), // likely fd refers to directory opened without `Dir.OpenOptions.iterate` + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .IO => return error.InputOutput, + .LOOP => return error.SymLinkLoop, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.FileNotFound, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + else => |err| return unexpectedErrno(err), + } + } +} + +fn flock( + ev: *Evented, + sync: *CancelRegion.Sync, + fd: fd_t, + op: File.Lock, + blocking: enum { blocking, nonblocking }, +) (File.LockError || error{WouldBlock})!void { + while (true) { + try sync.cancel_region.await(.nothing); + switch (linux.errno(linux.flock(fd, LOCK.NB | @as(i32, switch (op) { + .none => LOCK.UN, + .shared => LOCK.SH, + .exclusive => LOCK.EX, + })))) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), // invalid parameters + .NOLCK => return error.SystemResources, + .AGAIN => { + const thread = try sync.cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .NOP, + .flags = 0, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = 0, + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(sync.cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (sync.cancel_region.errno()) { + .SUCCESS, .INTR, .CANCELED => {}, + else => unreachable, + } + switch (blocking) { + .blocking => continue, + .nonblocking => return error.WouldBlock, + } + }, + .OPNOTSUPP => return error.FileLocksUnsupported, + else => |err| return unexpectedErrno(err), + } + } +} + +fn getsockname( + ev: *Evented, + sync: *CancelRegion.Sync, + socket_fd: fd_t, + addr: *linux.sockaddr, + addr_len: *linux.socklen_t, +) !void { + _ = ev; + while (true) { + try sync.cancel_region.await(.nothing); + switch (linux.errno(linux.getsockname(socket_fd, addr, addr_len))) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), // invalid parameters + .NOTSOCK => |err| return errnoBug(err), // always a race condition + .NOBUFS => return error.SystemResources, + else => |err| return unexpectedErrno(err), + } + } +} + +fn linkat( + ev: *Evented, + cancel_region: *CancelRegion, + old_dir: fd_t, + old_path: [*:0]const u8, + new_dir: fd_t, + new_path: [*:0]const u8, + flags: u32, +) File.HardLinkError!void { + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .LINKAT, + .flags = 0, + .ioprio = 0, + .fd = old_dir, + .off = @intFromPtr(new_path), + .addr = @intFromPtr(old_path), + .len = @bitCast(new_dir), + .rw_flags = flags, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .ACCES => return error.AccessDenied, + .DQUOT => return error.DiskQuota, + .EXIST => return error.PathAlreadyExists, + .IO => return error.HardwareFailure, + .LOOP => return error.SymLinkLoop, + .MLINK => return error.LinkQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .NOTDIR => return error.NotDir, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + .XDEV => return error.CrossDevice, + .ILSEQ => return error.BadPathName, + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn lseek( + ev: *Evented, + sync: *CancelRegion.Sync, + fd: fd_t, + offset: u64, + whence: u32, +) File.SeekError!void { + _ = ev; + while (true) { + try sync.cancel_region.await(.nothing); + var result: u64 = undefined; + switch (linux.errno(switch (@sizeOf(usize)) { + else => comptime unreachable, + 4 => linux.llseek(fd, offset, &result, whence), + 8 => linux.lseek(fd, @bitCast(offset), whence), + })) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .INVAL => return error.Unseekable, + .OVERFLOW => return error.Unseekable, + .SPIPE => return error.Unseekable, + .NXIO => return error.Unseekable, + else => |err| return unexpectedErrno(err), + } + } +} + +fn openat( + ev: *Evented, + cancel_region: *CancelRegion, + dir: fd_t, + path: [*:0]const u8, + flags: linux.O, + mode: linux.mode_t, +) File.OpenError!fd_t { + var mut_flags = flags; + if (@hasField(linux.O, "LARGEFILE")) mut_flags.LARGEFILE = true; + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .OPENAT, + .flags = 0, + .ioprio = 0, + .fd = dir, + .off = 0, + .addr = @intFromPtr(path), + .len = mode, + .rw_flags = @bitCast(mut_flags), + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + const completion = cancel_region.completion(); + switch (completion.errno()) { + .SUCCESS => return completion.result, + .INTR, .CANCELED => continue, + .FAULT => |err| return errnoBug(err), + .INVAL => return error.BadPathName, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .ACCES => return error.AccessDenied, + .FBIG => return error.FileTooBig, + .OVERFLOW => return error.FileTooBig, + .ISDIR => return error.IsDir, + .LOOP => return error.SymLinkLoop, + .MFILE => return error.ProcessFdQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NFILE => return error.SystemFdQuotaExceeded, + .NODEV => return error.NoDevice, + .NOENT => return error.FileNotFound, + .SRCH => return error.FileNotFound, // Linux when opening procfs files. + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .NOTDIR => return error.NotDir, + .PERM => return error.PermissionDenied, + .EXIST => return error.PathAlreadyExists, + .BUSY => return error.DeviceBusy, + .OPNOTSUPP => return error.FileLocksUnsupported, + .AGAIN => return error.WouldBlock, + .TXTBSY => return error.FileBusy, + .NXIO => return error.NoDevice, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + } +} + +fn preadv( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + iov: []const iovec, + offset: ?u64, +) File.Reader.Error!usize { + if (iov.len == 0) return 0; + const gather = iov.len > 1 or iov[0].len > 0xfffff000; + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = if (gather) .READV else .READ, + .flags = 0, + .ioprio = 0, + .fd = fd, + .off = offset orelse std.math.maxInt(u64), + .addr = if (gather) @intFromPtr(iov.ptr) else @intFromPtr(iov[0].base), + .len = @intCast(if (gather) iov.len else iov[0].len), + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + const completion = cancel_region.completion(); + switch (completion.errno()) { + .SUCCESS => return @as(u32, @bitCast(completion.result)), + .INTR, .CANCELED => continue, + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .AGAIN => return error.WouldBlock, + .BADF => |err| return errnoBug(err), // File descriptor used after closed + .IO => return error.InputOutput, + .ISDIR => return error.IsDir, + .NOBUFS => return error.SystemResources, + .NOMEM => return error.SystemResources, + .NOTCONN => return error.SocketUnconnected, + .CONNRESET => return error.ConnectionResetByPeer, + else => |err| return unexpectedErrno(err), + } + } +} + +fn pwritev( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + iov: []const iovec_const, + offset: ?u64, +) File.Writer.Error!usize { + if (iov.len == 0) return 0; + const scatter = iov.len > 1 or iov[0].len > 0xfffff000; + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = if (scatter) .WRITEV else .WRITE, + .flags = 0, + .ioprio = 0, + .fd = fd, + .off = offset orelse std.math.maxInt(u64), + .addr = if (scatter) @intFromPtr(iov.ptr) else @intFromPtr(iov[0].base), + .len = @intCast(if (scatter) iov.len else iov[0].len), + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + const completion = cancel_region.completion(); + switch (completion.errno()) { + .SUCCESS => return @as(u32, @bitCast(completion.result)), + .INTR, .CANCELED => continue, + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .AGAIN => return error.WouldBlock, + .BADF => return error.NotOpenForWriting, // Can be a race condition. + .DESTADDRREQ => |err| return errnoBug(err), // `connect` was never called. + .DQUOT => return error.DiskQuota, + .FBIG => return error.FileTooBig, + .IO => return error.InputOutput, + .NOSPC => return error.NoSpaceLeft, + .PERM => return error.PermissionDenied, + .PIPE => return error.BrokenPipe, + .CONNRESET => |err| return errnoBug(err), // Not a socket handle. + .BUSY => return error.DeviceBusy, + else => |err| return unexpectedErrno(err), + } + } +} + +fn readAll( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + buffer: []u8, +) (File.Reader.Error || error{EndOfStream})!void { + var index: usize = 0; + while (buffer.len - index != 0) { + const len = try ev.preadv(cancel_region, fd, &.{ + .{ .base = buffer[index..].ptr, .len = buffer.len - index }, + }, null); + if (len == 0) return error.EndOfStream; + index += len; + } +} + +fn realPath( + ev: *Evented, + sync: *CancelRegion.Sync, + fd: fd_t, + out_buffer: []u8, +) File.RealPathError!usize { + _ = ev; + var procfs_buf: [std.fmt.count("/proc/self/fd/{d}\x00", .{std.math.minInt(fd_t)})]u8 = undefined; + const proc_path = std.fmt.bufPrintSentinel(&procfs_buf, "/proc/self/fd/{d}", .{fd}, 0) catch + unreachable; + while (true) { + try sync.cancel_region.await(.nothing); + const rc = linux.readlink(proc_path, out_buffer.ptr, out_buffer.len); + switch (linux.errno(rc)) { + .SUCCESS => return rc, + .INTR => continue, + .ACCES => return error.AccessDenied, + .FAULT => |err| return errnoBug(err), + .IO => return error.FileSystem, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.NotDir, + .ILSEQ => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn renameat( + ev: *Evented, + cancel_region: *CancelRegion, + old_dir: fd_t, + old_path: [*:0]const u8, + new_dir: fd_t, + new_path: [*:0]const u8, + flags: linux.RENAME, +) Dir.RenameError!void { + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .RENAMEAT, + .flags = 0, + .ioprio = 0, + .fd = old_dir, + .off = @intFromPtr(new_path), + .addr = @intFromPtr(old_path), + .len = @bitCast(new_dir), + .rw_flags = @bitCast(flags), + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .BUSY => return error.FileBusy, + .DQUOT => return error.DiskQuota, + .ISDIR => return error.IsDir, + .IO => return error.HardwareFailure, + .LOOP => return error.SymLinkLoop, + .MLINK => return error.LinkQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .EXIST => return error.DirNotEmpty, + .NOTEMPTY => return error.DirNotEmpty, + .ROFS => return error.ReadOnlyFileSystem, + .XDEV => return error.CrossDevice, + .ILSEQ => return error.BadPathName, + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn setsockopt( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + level: i32, + opt_name: u32, + option: u32, +) !void { + const o: []const u8 = @ptrCast(&option); + while (true) { + const off: extern struct { + cmd_op: linux.IO_URING_SOCKET_OP, + pad: u32, + } align(@alignOf(u64)) = .{ + .cmd_op = .SETSOCKOPT, + .pad = 0, + }; + const addr: extern struct { level: i32, opt_name: u32 } align(@alignOf(u64)) = .{ + .level = level, + .opt_name = opt_name, + }; + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .URING_CMD, + .flags = 0, + .ioprio = 0, + .fd = fd, + .off = @as(*const u64, @ptrCast(&off)).*, + .addr = @as(*const u64, @ptrCast(&addr)).*, + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = @intCast(o.len), + .addr3 = @intFromPtr(o.ptr), + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .NOTSOCK => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn socket( + ev: *Evented, + cancel_region: *CancelRegion, + family: linux.sa_family_t, + options: net.IpAddress.BindOptions, +) error{ + AddressFamilyUnsupported, + ProtocolUnsupportedBySystem, + ProcessFdQuotaExceeded, + SystemFdQuotaExceeded, + SystemResources, + ProtocolUnsupportedByAddressFamily, + SocketModeUnsupported, + OptionUnsupported, + Unexpected, + Canceled, +}!fd_t { + const mode = posixSocketMode(options.mode); + const protocol = posixProtocol(options.protocol); + const socket_fd = while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .SOCKET, + .flags = 0, + .ioprio = 0, + .fd = family, + .off = mode | linux.SOCK.CLOEXEC, + .addr = 0, + .len = protocol, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + const completion = cancel_region.completion(); + switch (completion.errno()) { + .SUCCESS => break completion.result, + .INTR, .CANCELED => continue, + .AFNOSUPPORT => return error.AddressFamilyUnsupported, + .INVAL => return error.ProtocolUnsupportedBySystem, + .MFILE => return error.ProcessFdQuotaExceeded, + .NFILE => return error.SystemFdQuotaExceeded, + .NOBUFS => return error.SystemResources, + .NOMEM => return error.SystemResources, + .PROTONOSUPPORT => return error.ProtocolUnsupportedByAddressFamily, + .PROTOTYPE => return error.SocketModeUnsupported, + else => |err| return unexpectedErrno(err), + } + }; + errdefer ev.close(socket_fd); + + if (options.ip6_only) { + if (linux.IPV6 == void) return error.OptionUnsupported; + try ev.setsockopt(cancel_region, socket_fd, linux.IPPROTO.IPV6, linux.IPV6.V6ONLY, 0); + } + + return socket_fd; +} + +fn stat(ev: *Evented, cancel_region: *CancelRegion, fd: fd_t) Dir.StatError!Dir.Stat { + return ev.statx(cancel_region, fd, "", linux.AT.EMPTY_PATH) catch |err| switch (err) { + error.BadPathName, error.NameTooLong => unreachable, // path is empty + error.AccessDenied => return errnoBug(.ACCES), + error.SymLinkLoop => return errnoBug(.LOOP), + error.FileNotFound => return errnoBug(.NOENT), + error.NotDir => return errnoBug(.NOTDIR), + else => |e| return e, + }; +} + +fn statx( + ev: *Evented, + cancel_region: *CancelRegion, + dir: fd_t, + path: [*:0]const u8, + flags: u32, +) (Dir.StatError || Dir.PathNameError || error{ FileNotFound, NotDir, SymLinkLoop })!Dir.Stat { + while (true) { + var statx_buf = std.mem.zeroes(linux.Statx); + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .STATX, + .flags = 0, + .ioprio = 0, + .fd = dir, + .off = @intFromPtr(&statx_buf), + .addr = @intFromPtr(path), + .len = @bitCast(linux_statx_request), + .rw_flags = flags, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return statFromLinux(&statx_buf), + .INTR, .CANCELED => continue, + .ACCES => return error.AccessDenied, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => |err| return errnoBug(err), + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + else => |err| return unexpectedErrno(err), + } + } +} + +fn urandomReadAll( + ev: *Evented, + cancel_region: *CancelRegion, + buffer: []u8, +) (File.OpenError || File.Reader.Error || error{EndOfStream})!void { + return ev.readAll(cancel_region, try ev.random_fd.open(ev, cancel_region, "/dev/urandom", .{ + .ACCMODE = .RDONLY, + .CLOEXEC = true, + }), buffer); +} + +fn utimensat( + ev: *Evented, + sync: *CancelRegion.Sync, + dir: fd_t, + path: [*:0]const u8, + times: ?*const [2]linux.timespec, + flags: u32, +) File.SetTimestampsError!void { + _ = ev; + while (true) { + try sync.cancel_region.await(.nothing); + switch (linux.errno(linux.utimensat(dir, path, times, flags))) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), // always a race condition + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + else => |err| return unexpectedErrno(err), + } + } +} + +fn writeAll( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + buffer: []const u8, +) (File.Writer.Error || error{EndOfStream})!void { + var index: usize = 0; + while (buffer.len - index != 0) { + const len = try ev.pwritev(cancel_region, fd, &.{ + .{ .base = buffer[index..].ptr, .len = buffer.len - index }, + }, null); + if (len == 0) return error.EndOfStream; + index += len; + } +} + +test { + _ = Fiber.CancelProtection; +} diff --git a/lib/std/Io/fiber.zig b/lib/std/Io/fiber.zig @@ -0,0 +1,201 @@ +pub const supported = switch (builtin.cpu.arch) { + .aarch64, .x86_64 => true, + else => false, +}; + +/// Stores the cpu state of an inactive fiber. +pub const Context = switch (builtin.cpu.arch) { + .aarch64 => extern struct { + sp: u64, + fp: u64, + pc: u64, + }, + .x86_64 => extern struct { + rsp: u64, + rbp: u64, + rip: u64, + }, + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), +}; + +pub const Switch = extern struct { old: *Context, new: *Context }; + +/// Fills `s.old` with the current cpu state, and restores the cpu state stored in `s.new`. +pub inline fn contextSwitch(s: *const Switch) *const Switch { + return switch (builtin.cpu.arch) { + .aarch64 => asm volatile ( + \\ ldp x0, x2, [x1] + \\ ldr x3, [x2, #16] + \\ mov x4, sp + \\ stp x4, fp, [x0] + \\ adr x5, 0f + \\ ldp x4, fp, [x2] + \\ str x5, [x0, #16] + \\ mov sp, x4 + \\ br x3 + \\0: + : [received_message] "={x1}" (-> *const Switch), + : [message_to_send] "{x1}" (s), + : .{ + .x0 = true, + .x1 = true, + .x2 = true, + .x3 = true, + .x4 = true, + .x5 = true, + .x6 = true, + .x7 = true, + .x8 = true, + .x9 = true, + .x10 = true, + .x11 = true, + .x12 = true, + .x13 = true, + .x14 = true, + .x15 = true, + .x16 = true, + .x17 = true, + .x19 = true, + .x20 = true, + .x21 = true, + .x22 = true, + .x23 = true, + .x24 = true, + .x25 = true, + .x26 = true, + .x27 = true, + .x28 = true, + .x30 = true, + .z0 = true, + .z1 = true, + .z2 = true, + .z3 = true, + .z4 = true, + .z5 = true, + .z6 = true, + .z7 = true, + .z8 = true, + .z9 = true, + .z10 = true, + .z11 = true, + .z12 = true, + .z13 = true, + .z14 = true, + .z15 = true, + .z16 = true, + .z17 = true, + .z18 = true, + .z19 = true, + .z20 = true, + .z21 = true, + .z22 = true, + .z23 = true, + .z24 = true, + .z25 = true, + .z26 = true, + .z27 = true, + .z28 = true, + .z29 = true, + .z30 = true, + .z31 = true, + .p0 = true, + .p1 = true, + .p2 = true, + .p3 = true, + .p4 = true, + .p5 = true, + .p6 = true, + .p7 = true, + .p8 = true, + .p9 = true, + .p10 = true, + .p11 = true, + .p12 = true, + .p13 = true, + .p14 = true, + .p15 = true, + .fpcr = true, + .fpsr = true, + .ffr = true, + .memory = true, + }), + .x86_64 => asm volatile ( + \\ movq 0(%%rsi), %%rax + \\ movq 8(%%rsi), %%rcx + \\ leaq 0f(%%rip), %%rdx + \\ movq %%rsp, 0(%%rax) + \\ movq %%rbp, 8(%%rax) + \\ movq %%rdx, 16(%%rax) + \\ movq 0(%%rcx), %%rsp + \\ movq 8(%%rcx), %%rbp + \\ jmpq *16(%%rcx) + \\0: + : [received_message] "={rsi}" (-> *const Switch), + : [message_to_send] "{rsi}" (s), + : .{ + .rax = true, + .rcx = true, + .rdx = true, + .rbx = true, + .rsi = true, + .rdi = true, + .r8 = true, + .r9 = true, + .r10 = true, + .r11 = true, + .r12 = true, + .r13 = true, + .r14 = true, + .r15 = true, + .mm0 = true, + .mm1 = true, + .mm2 = true, + .mm3 = true, + .mm4 = true, + .mm5 = true, + .mm6 = true, + .mm7 = true, + .zmm0 = true, + .zmm1 = true, + .zmm2 = true, + .zmm3 = true, + .zmm4 = true, + .zmm5 = true, + .zmm6 = true, + .zmm7 = true, + .zmm8 = true, + .zmm9 = true, + .zmm10 = true, + .zmm11 = true, + .zmm12 = true, + .zmm13 = true, + .zmm14 = true, + .zmm15 = true, + .zmm16 = true, + .zmm17 = true, + .zmm18 = true, + .zmm19 = true, + .zmm20 = true, + .zmm21 = true, + .zmm22 = true, + .zmm23 = true, + .zmm24 = true, + .zmm25 = true, + .zmm26 = true, + .zmm27 = true, + .zmm28 = true, + .zmm29 = true, + .zmm30 = true, + .zmm31 = true, + .fpsr = true, + .fpcr = true, + .mxcsr = true, + .rflags = true, + .dirflag = true, + .memory = true, + }), + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + }; +} + +const builtin = @import("builtin"); diff --git a/lib/std/c.zig b/lib/std/c.zig @@ -10724,6 +10724,7 @@ pub extern "c" fn chmod(path: [*:0]const u8, mode: mode_t) c_int; pub extern "c" fn fchmod(fd: fd_t, mode: mode_t) c_int; pub extern "c" fn fchmodat(fd: fd_t, path: [*:0]const u8, mode: mode_t, flags: c_uint) c_int; pub extern "c" fn fchown(fd: fd_t, owner: uid_t, group: gid_t) c_int; +pub extern "c" fn fchownat(fd: fd_t, path: [*:0]const u8, owner: uid_t, group: gid_t, flags: c_uint) c_int; pub extern "c" fn umask(mode: mode_t) mode_t; pub extern "c" fn rmdir(path: [*:0]const u8) c_int; @@ -10864,6 +10865,7 @@ pub const pthread_setname_np = switch (native_os) { pub extern "c" fn pthread_getname_np(thread: pthread_t, name: [*:0]u8, len: usize) c_int; pub extern "c" fn pthread_kill(pthread_t, signal: SIG) c_int; +pub extern "c" fn pthread_exit(ptr: ?*anyopaque) noreturn; pub const pthread_threadid_np = switch (native_os) { .driverkit, .ios, .maccatalyst, .macos, .tvos, .visionos, .watchos => private.pthread_threadid_np, @@ -11296,16 +11298,7 @@ pub const clock_get_time = darwin.clock_get_time; pub const clock_serv_t = darwin.clock_serv_t; pub const clock_res_t = darwin.clock_res_t; pub const @"close$NOCANCEL" = darwin.@"close$NOCANCEL"; -pub const dispatch_function_t = darwin.dispatch_function_t; -pub const dispatch_once_f = darwin.dispatch_once_f; -pub const dispatch_once_t = darwin.dispatch_once_t; -pub const dispatch_release = darwin.dispatch_release; -pub const dispatch_semaphore_create = darwin.dispatch_semaphore_create; -pub const dispatch_semaphore_signal = darwin.dispatch_semaphore_signal; -pub const dispatch_semaphore_t = darwin.dispatch_semaphore_t; -pub const dispatch_semaphore_wait = darwin.dispatch_semaphore_wait; -pub const dispatch_time = darwin.dispatch_time; -pub const dispatch_time_t = darwin.dispatch_time_t; +pub const dispatch = darwin.dispatch; pub const fcopyfile = darwin.fcopyfile; pub const host_t = darwin.host_t; pub const integer_t = darwin.integer_t; diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig @@ -18,6 +18,9 @@ comptime { assert(builtin.os.tag.isDarwin()); // Prevent access of std.c symbols on wrong OS. } +// Grand Central Dispatch is exposed by libSystem. +pub const dispatch = @import("darwin/dispatch.zig"); + pub const mach_port_t = c_uint; pub const EXC = enum(exception_type_t) { @@ -896,31 +899,6 @@ pub const qos_class_t = enum(c_uint) { _, }; -// Grand Central Dispatch is exposed by libSystem. -pub extern "c" fn dispatch_release(object: *anyopaque) void; - -pub const dispatch_semaphore_t = *opaque {}; -pub extern "c" fn dispatch_semaphore_create(value: isize) ?dispatch_semaphore_t; -pub extern "c" fn dispatch_semaphore_wait(dsema: dispatch_semaphore_t, timeout: dispatch_time_t) isize; -pub extern "c" fn dispatch_semaphore_signal(dsema: dispatch_semaphore_t) isize; - -pub const DISPATCH_TIME_NOW = @compileError("use dispatch_time_t.NOW"); -pub const DISPATCH_TIME_FOREVER = @compileError("use dispatch_time_t.FOREVER"); -pub const dispatch_time_t = enum(u64) { - NOW = 0, - FOREVER = ~0, - _, -}; -pub extern "c" fn dispatch_time(when: dispatch_time_t, delta: i64) dispatch_time_t; - -pub const dispatch_once_t = usize; -pub const dispatch_function_t = fn (?*anyopaque) callconv(.c) void; -pub extern fn dispatch_once_f( - predicate: *dispatch_once_t, - context: ?*anyopaque, - function: dispatch_function_t, -) void; - /// Undocumented futex-like API available on darwin 16+ /// (macOS 10.12+, iOS 10.0+, tvOS 10.0+, watchOS 3.0+, catalyst 13.0+). /// diff --git a/lib/std/c/darwin/dispatch.zig b/lib/std/c/darwin/dispatch.zig @@ -0,0 +1,308 @@ +// dispatch/base.h +pub const function_t = *const fn (?*anyopaque) callconv(.c) void; + +// dispatch/object.h +pub const object_t = *_os_object_s; +pub const retain = dispatch_retain; +pub const release = dispatch_release; +pub const get_context = dispatch_get_context; +pub const set_context = dispatch_set_context; +pub const set_finalizer_f = dispatch_set_finalizer_f; +pub const activate = dispatch_activate; +pub const @"suspend" = dispatch_suspend; +pub const @"resume" = dispatch_resume; + +const _os_object_s = opaque { + pub const retain = dispatch_retain; + pub const release = dispatch_release; + pub const get_context = dispatch_get_context; + pub const set_context = dispatch_set_context; + pub const set_finalizer = dispatch_set_finalizer_f; + pub const activate = dispatch_activate; + pub const @"suspend" = dispatch_suspend; + pub const @"resume" = dispatch_resume; + pub const set_target_queue = dispatch_set_target_queue; +}; +extern "c" fn dispatch_retain(object: object_t) void; +extern "c" fn dispatch_release(object: object_t) void; +extern "c" fn dispatch_get_context(object: object_t) ?*anyopaque; +extern "c" fn dispatch_set_context(object: object_t, context: ?*anyopaque) void; +extern "c" fn dispatch_set_finalizer_f(object: object_t, finalizer: ?function_t) void; +extern "c" fn dispatch_activate(object: object_t) void; +extern "c" fn dispatch_suspend(object: object_t) void; +extern "c" fn dispatch_resume(object: object_t) void; + +// dispatch/once.h +pub const once_t = enum(isize) { + init = 0, + done = -1, + _, + + pub inline fn once(predicate: *once_t, context: ?*anyopaque, function: function_t) void { + if (@atomicLoad(once_t, predicate, .unordered) != .done) { + @branchHint(.unlikely); + once_f(predicate, context, function); + } else asm volatile ("" ::: .{ .memory = true }); + switch (builtin.mode) { + .Debug, .ReleaseSafe => {}, + .ReleaseFast, .ReleaseSmall => if (@atomicLoad(once_t, predicate, .unordered) != .done) + unreachable, + } + } +}; +pub const once_f = dispatch_once_f; + +extern "c" fn dispatch_once_f(predicate: *once_t, context: ?*anyopaque, function: function_t) void; + +// dispatch/queue.h +pub const queue_t = *queue_s; +pub const queue_global_t = queue_t; +pub const queue_serial_executor_t = queue_t; +pub const queue_serial_t = queue_t; +pub const queue_main_t = queue_serial_t; +pub const queue_concurrent_t = queue_t; +pub const async_f = dispatch_async_f; +pub const sync_f = dispatch_sync_f; +pub const async_and_wait_f = dispatch_async_and_wait_f; +pub const apply_f = dispatch_apply_f; +pub const get_current_queue = dispatch_get_current_queue; +pub inline fn get_main_queue() queue_main_t { + return &_dispatch_main_q; +} +pub const queue_priority_t = enum(c_long) { + HIGH = 2, + DEFAULT = 0, + LOW = -1, + BACKGROUND = std.math.minInt(i16), + _, +}; +pub const get_global_queue = dispatch_get_global_queue; +pub const queue_attr_t = ?*queue_attr_s; +pub inline fn QUEUE_SERIAL() queue_attr_t { + return null; +} +pub inline fn QUEUE_INACTIVE() queue_attr_t { + return queue_attr_make_initially_inactive(QUEUE_SERIAL()); +} +pub inline fn QUEUE_CONCURRENT() queue_attr_t { + return &_dispatch_queue_attr_concurrent; +} +pub inline fn QUEUE_CONCURRENT_INACTIVE() queue_attr_t { + return queue_attr_make_initially_inactive(QUEUE_CONCURRENT()); +} +pub const queue_attr_make_initially_inactive = dispatch_queue_attr_make_initially_inactive; +pub const TARGET_QUEUE_DEFAULT: ?queue_t = null; +pub const queue_create_with_target = dispatch_queue_create_with_target; +pub const queue_create = dispatch_queue_create; +pub const CURRENT_QUEUE_LABEL: ?[*:0]const u8 = null; +pub const queue_get_label = dispatch_queue_get_label; +pub const main = dispatch_main; +pub const after_f = dispatch_after_f; + +const queue_s = opaque { + pub inline fn as_object(queue: queue_t) object_t { + return @ptrCast(queue); + } + pub const async = async_f; + pub const sync = sync_f; + pub const async_and_wait = async_and_wait_f; + pub const apply = apply_f; + pub const get_current = get_current_queue; + pub const get_main = get_main_queue; + pub const get_global = get_global_queue; + pub const create_with_target = queue_create_with_target; + pub const create = queue_create; + pub const get_label = queue_get_label; +}; +extern "c" fn dispatch_async_f(queue: queue_t, context: ?*anyopaque, work: function_t) void; +extern "c" fn dispatch_sync_f(queue: queue_t, context: ?*anyopaque, work: function_t) void; +extern "c" fn dispatch_async_and_wait_f(queue: queue_t, context: ?*anyopaque, work: function_t) void; +extern "c" fn dispatch_apply_f(iterations: usize, queue: ?queue_t, context: ?*anyopaque, work: *const fn (context: ?*anyopaque, iteration: usize) callconv(.c) void) void; +extern "c" fn dispatch_get_current_queue() queue_t; +extern "c" var _dispatch_main_q: queue_s; +extern "c" fn dispatch_get_global_queue(identifier: isize, flags: usize) queue_global_t; +const queue_attr_s = opaque { + pub inline fn as_object(queue_attr: queue_attr_t) object_t { + return @ptrCast(queue_attr); + } + pub const SERIAL = QUEUE_SERIAL; + pub const INACTIVE = QUEUE_INACTIVE; + pub const CONCURRENT = QUEUE_CONCURRENT; + pub const CONCURRENT_INACTIVE = QUEUE_CONCURRENT_INACTIVE; +}; +extern "c" var _dispatch_queue_attr_concurrent: queue_attr_s; +extern "c" fn dispatch_queue_attr_make_initially_inactive(attr: queue_attr_t) queue_attr_t; +extern "c" fn dispatch_queue_create_with_target(label: ?[*:0]const u8, attr: queue_attr_t, target: ?queue_t) ?queue_t; +extern "c" fn dispatch_queue_create(label: ?[*:0]const u8, attr: queue_attr_t) ?queue_t; +extern "c" fn dispatch_queue_get_label(queue: ?queue_t) [*:0]const u8; +extern "c" fn dispatch_set_target_queue(object: object_t, queue: ?queue_t) void; +extern "c" fn dispatch_main() noreturn; +extern "c" fn dispatch_after_f(when: time_t, queue: queue_t, context: ?*anyopaque, work: function_t) void; + +// dispatch/semaphore.h +pub const semaphore_t = *semaphore_s; +pub const semaphore_create = dispatch_semaphore_create; +pub const semaphore_wait = dispatch_semaphore_wait; +pub const semaphore_signal = dispatch_semaphore_signal; + +const semaphore_s = opaque { + pub inline fn as_object(semaphore: semaphore_t) object_t { + return @ptrCast(semaphore); + } + pub const create = semaphore_create; + pub const wait = semaphore_wait; + pub const signal = semaphore_signal; +}; +extern "c" fn dispatch_semaphore_create(value: isize) ?semaphore_t; +extern "c" fn dispatch_semaphore_wait(dsema: semaphore_t, timeout: time_t) isize; +extern "c" fn dispatch_semaphore_signal(dsema: semaphore_t) isize; + +// dispatch/source.h +pub const source_t = *source_s; +pub const source_type_t = *const source_type_s; +pub const SOURCE_TYPE_DATA_ADD = &_dispatch_source_type_data_add; +pub const SOURCE_TYPE_DATA_OR = &_dispatch_source_type_data_or; +pub const SOURCE_TYPE_DATA_REPLACE = &_dispatch_source_type_data_replace; +pub const SOURCE_TYPE_MACH_SEND = &_dispatch_source_type_mach_send; +pub const SOURCE_TYPE_MACH_RECV = &_dispatch_source_type_mach_recv; +pub const SOURCE_TYPE_MEMORYPRESSURE = &_dispatch_source_type_memorypressure; +pub const SOURCE_TYPE_PROC = &_dispatch_source_type_proc; +pub const SOURCE_TYPE_READ = &_dispatch_source_type_read; +pub const SOURCE_TYPE_SIGNAL = &_dispatch_source_type_signal; +pub const SOURCE_TYPE_TIMER = &_dispatch_source_type_timer; +pub const SOURCE_TYPE_VNODE = &_dispatch_source_type_vnode; +pub const SOURCE_TYPE_WRITE = &_dispatch_source_type_write; +pub const source_mach_send_flags_t = packed struct(usize) { + DEAD: bool = false, + unused1: @Int(.unsigned, @bitSizeOf(usize) - 1) = 0, +}; +pub const source_mach_recv_flags_t = packed struct(usize) { + unused0: @Int(.unsigned, @bitSizeOf(usize) - 0) = 0, +}; +pub const source_memorypressure_flags_t = packed struct(usize) { + NORMAL: bool = false, + WARN: bool = false, + CRITICAL: bool = false, + unused3: @Int(.unsigned, @bitSizeOf(usize) - 3) = 0, +}; +pub const source_proc_flags_t = packed struct(usize) { + unused0: u27 = 0, + SIGNAL: bool = false, + unused28: u1 = 0, + EXEC: bool = false, + FORK: bool = false, + EXIT: bool = false, + unused32: @Int(.unsigned, @bitSizeOf(usize) - 32) = 0, +}; +pub const source_vnode_flags_t = packed struct(usize) { + DELETE: bool = false, + WRITE: bool = false, + EXTEND: bool = false, + ATTRIB: bool = false, + LINK: bool = false, + RENAME: bool = false, + REVOKE: bool = false, + unused7: u1 = 0, + FUNLOCK: bool = false, + unused9: @Int(.unsigned, @bitSizeOf(usize) - 9) = 0, +}; +pub const source_timer_flags_t = packed struct(usize) { + STRICT: bool = false, + unused1: @Int(.unsigned, @bitSizeOf(usize) - 1) = 0, +}; +pub const source_flags_t = packed union { + raw: usize, + MACH_SEND: source_mach_send_flags_t, + MACH_RECV: source_mach_recv_flags_t, + MEMORYPRESSURE: source_memorypressure_flags_t, + PROC: source_proc_flags_t, + VNODE: source_vnode_flags_t, + pub const none: source_flags_t = .{ .raw = 0 }; +}; +pub const source_create = dispatch_source_create; +pub const source_set_event_handler_f = dispatch_source_set_event_handler_f; +pub const source_set_cancel_handler_f = dispatch_source_set_cancel_handler_f; +pub const source_cancel = dispatch_source_cancel; +pub const source_testcancel = dispatch_source_testcancel; +pub const source_get_handle = dispatch_source_get_handle; +pub const source_get_mask = dispatch_source_get_mask; +pub const source_get_data = dispatch_source_get_data; +pub const source_merge_data = dispatch_source_merge_data; +pub const source_set_timer = dispatch_source_set_timer; +pub const source_set_registration_handler_f = dispatch_source_set_registration_handler_f; + +const source_s = opaque { + pub inline fn as_object(source: source_t) object_t { + return @ptrCast(source); + } + pub const set_event_handler = source_set_event_handler_f; + pub const set_cancel_handler = source_set_cancel_handler_f; + pub const cancel = source_cancel; + pub const testcancel = source_testcancel; + pub const get_handle = source_get_handle; + pub const get_mask = source_get_mask; + pub const get_data = source_get_data; + pub const merge_data = source_merge_data; + pub const set_timer = source_set_timer; + pub const set_registration_handler = source_set_registration_handler_f; +}; +const source_type_s = opaque { + pub const DATA_ADD = SOURCE_TYPE_DATA_ADD; + pub const DATA_OR = SOURCE_TYPE_DATA_OR; + pub const DATA_REPLACE = SOURCE_TYPE_DATA_REPLACE; + pub const MACH_SEND = SOURCE_TYPE_MACH_SEND; + pub const MACH_RECV = SOURCE_TYPE_MACH_RECV; + pub const MEMORYPRESSURE = SOURCE_TYPE_MEMORYPRESSURE; + pub const PROC = SOURCE_TYPE_PROC; + pub const READ = SOURCE_TYPE_READ; + pub const SIGNAL = SOURCE_TYPE_SIGNAL; + pub const TIMER = SOURCE_TYPE_TIMER; + pub const VNODE = SOURCE_TYPE_VNODE; + pub const WRITE = SOURCE_TYPE_WRITE; +}; +extern "c" const _dispatch_source_type_data_add: source_type_s; +extern "c" const _dispatch_source_type_data_or: source_type_s; +extern "c" const _dispatch_source_type_data_replace: source_type_s; +extern "c" const _dispatch_source_type_mach_send: source_type_s; +extern "c" const _dispatch_source_type_mach_recv: source_type_s; +extern "c" const _dispatch_source_type_memorypressure: source_type_s; +extern "c" const _dispatch_source_type_proc: source_type_s; +extern "c" const _dispatch_source_type_read: source_type_s; +extern "c" const _dispatch_source_type_signal: source_type_s; +extern "c" const _dispatch_source_type_timer: source_type_s; +extern "c" const _dispatch_source_type_vnode: source_type_s; +extern "c" const _dispatch_source_type_write: source_type_s; +extern "c" fn dispatch_source_create(type: source_type_t, handle: usize, mask: source_flags_t, queue: ?queue_t) ?source_t; +extern "c" fn dispatch_source_set_event_handler_f(source: source_t, handler: ?function_t) void; +extern "c" fn dispatch_source_set_cancel_handler_f(source: source_t, handler: ?function_t) void; +extern "c" fn dispatch_source_cancel(source: source_t) void; +extern "c" fn dispatch_source_testcancel(source: source_t) isize; +extern "c" fn dispatch_source_get_handle(source: source_t) usize; +extern "c" fn dispatch_source_get_mask(source: source_t) source_flags_t; +extern "c" fn dispatch_source_get_data(source: source_t) usize; +extern "c" fn dispatch_source_merge_data(source: source_t, value: usize) void; +extern "c" fn dispatch_source_set_timer(source: source_t, start: time_t, interval: u64, leeway: u64) void; +extern "c" fn dispatch_source_set_registration_handler_f(source: source_t, handler: ?function_t) void; + +// dispatch/time.h +pub const time_t = enum(u64) { + WALL_NOW = WALLTIME_NOW, + NOW = TIME_NOW, + FOREVER = TIME_FOREVER, + _, + + pub const time = dispatch_time; + pub const walltime = dispatch_walltime; + pub const after = dispatch_after_f; +}; +pub const WALLTIME_NOW = ~@as(u64, 1); +pub const TIME_NOW: u64 = 0; +pub const TIME_FOREVER = ~@as(u64, 0); +pub const time = dispatch_time; +pub const walltime = dispatch_walltime; + +extern "c" fn dispatch_time(when: time_t, delta: i64) time_t; +extern "c" fn dispatch_walltime(when: ?*const std.c.timespec, delta: i64) time_t; + +const builtin = @import("builtin"); +const std = @import("std"); diff --git a/lib/std/debug.zig b/lib/std/debug.zig @@ -533,9 +533,7 @@ pub fn defaultPanic(msg: []const u8, first_trace_addr: ?usize) noreturn { else => {}, } - // Don't try to cancel during a panic. No need to re-enable cancelation, - // because the panic handler doesn't return. - _ = std.Options.debug_io.swapCancelProtection(.blocked); + std.Options.debug_io.vtable.crashHandler(std.Options.debug_io.userdata); if (enable_segfault_handler) { // If a segfault happens while panicking, we want it to actually segfault, not trigger @@ -1535,9 +1533,7 @@ fn handleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?CpuContextPtr) noret } pub fn defaultHandleSegfault(addr: ?usize, name: []const u8, opt_ctx: ?CpuContextPtr) noreturn { - // Don't try to cancel during a segfault. No need to re-enable cancelation, - // because the segfault handler doesn't return. - _ = std.Options.debug_io.swapCancelProtection(.blocked); + std.Options.debug_io.vtable.crashHandler(std.Options.debug_io.userdata); // There is very similar logic to the following in `defaultPanic`. switch (panic_stage) { diff --git a/lib/std/process/Environ.zig b/lib/std/process/Environ.zig @@ -39,6 +39,10 @@ pub const GlobalBlock = struct { pub const global: GlobalBlock = .{ .use_global = true }; pub fn deinit(_: GlobalBlock, _: Allocator) void {} + + pub fn isEmpty(block: GlobalBlock) bool { + return !block.use_global; + } }; pub const PosixBlock = struct { @@ -51,6 +55,10 @@ pub const PosixBlock = struct { gpa.free(block.slice); } + pub fn isEmpty(block: PosixBlock) bool { + return block.slice.len == 0; + } + pub const View = struct { slice: []const [*:0]const u8, @@ -72,6 +80,10 @@ pub const WindowsBlock = struct { gpa.free(block.slice); } + pub fn isEmpty(block: WindowsBlock) bool { + return block.slice[0] == 0; + } + pub const View = struct { ptr: [*:0]const u16, diff --git a/src/crash_report.zig b/src/crash_report.zig @@ -79,6 +79,8 @@ fn dumpCrashContext() Io.Writer.Error!void { if (S.already_dumped) return; S.already_dumped = true; + std.Options.debug_io.vtable.crashHandler(std.Options.debug_io.userdata); + // TODO: this does mean that a different thread could grab the stderr mutex between the context // and the actual panic printing, which would be quite confusing. const stderr = std.debug.lockStderr(&.{});