Merge pull request 'std.Thread sync primitives roundup' (#31084) from sync-cleanup into master - zig

commit e7e700334d1432efec7d19887c6656d956e260e7 (tree)
parent e9eadee00654f5f762abe3cdc596359b79893eab
Author: Andrew Kelley <andrew@ziglang.org>
Date:   Tue,  3 Feb 2026 07:52:02 +0100

Merge pull request 'std.Thread sync primitives roundup' (#31084) from sync-cleanup into master

Reviewed-on: https://codeberg.org/ziglang/zig/pulls/31084

Diffstat:
M CMakeLists.txt  | 2 --
M lib/compiler/build_runner.zig  | 19 +++++++++++--------
M lib/compiler_rt/emutls.zig  | 9 +++++++--
M lib/fuzzer.zig  | 2 +-
M lib/std/Io.zig  | 15 ++++++++++++++-
A lib/std/Io/RwLock.zig  | 238 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A lib/std/Io/Semaphore.zig  | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M lib/std/Io/Threaded.zig  | 170 +++++++++++++++++++++++++++++++++++++++++--------------------------------------
M lib/std/Thread.zig  | 14 +++-----------
D lib/std/Thread/Condition.zig  | 683 -------------------------------------------------------------------------------
D lib/std/Thread/Futex.zig  | 1063 -------------------------------------------------------------------------------
D lib/std/Thread/Mutex.zig  | 367 -------------------------------------------------------------------------------
M lib/std/Thread/Mutex/Recursive.zig  | 12 ++++++------
D lib/std/Thread/RwLock.zig  | 386 -------------------------------------------------------------------------------
D lib/std/Thread/Semaphore.zig  | 111 -------------------------------------------------------------------------------
M lib/std/atomic.zig  | 25 +++++++++++++++++++++----
M lib/std/debug.zig  | 9 ++++++---
M lib/std/debug/Coverage.zig  | 20 +++++++++++---------
M lib/std/debug/Info.zig  | 4 ++--
M lib/std/debug/SelfInfo/Elf.zig  | 48 ++++++++++++++++++++++++------------------------
M lib/std/debug/SelfInfo/MachO.zig  | 32 ++++++++++++++++----------------
M lib/std/debug/SelfInfo/Windows.zig  | 20 ++++++++++----------
M lib/std/heap/SmpAllocator.zig  | 2 +-
M lib/std/heap/ThreadSafeAllocator.zig  | 35 +++++++++++++++++++++--------------
M lib/std/heap/debug_allocator.zig  | 51 ++++++++++-----------------------------------------
M lib/std/heap/sbrk_allocator.zig  | 18 ++++++++++--------
M lib/std/http/Client.zig  | 67 ++++++++++++++++++++++++++++++++++++-------------------------------
D lib/std/once.zig  | 71 -----------------------------------------------------------------------
M lib/std/std.zig  | 1 -

29 files changed, 602 insertions(+), 2957 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -408,8 +408,6 @@ set(ZIG_STAGE2_SOURCES
     lib/std/Target/wasm.zig
     lib/std/Target/x86.zig
     lib/std/Thread.zig
-    lib/std/Thread/Futex.zig
-    lib/std/Thread/Mutex.zig
     lib/std/array_hash_map.zig
     lib/std/array_list.zig
     lib/std/ascii.zig
diff --git a/lib/compiler/build_runner.zig b/lib/compiler/build_runner.zig
@@ -30,14 +30,6 @@ pub fn main(init: process.Init.Minimal) !void {
     defer _ = debug_gpa_state.deinit();
     const gpa = debug_gpa_state.allocator();
 
-    // ...but we'll back our arena by `std.heap.page_allocator` for efficiency.
-    var single_threaded_arena: std.heap.ArenaAllocator = .init(std.heap.page_allocator);
-    defer single_threaded_arena.deinit();
-    var thread_safe_arena: std.heap.ThreadSafeAllocator = .{ .child_allocator = single_threaded_arena.allocator() };
-    const arena = thread_safe_arena.allocator();
-
-    const args = try init.args.toSlice(arena);
-
     var threaded: std.Io.Threaded = .init(gpa, .{
         .environ = init.environ,
         .argv0 = .init(init.args),
@@ -45,6 +37,17 @@ pub fn main(init: process.Init.Minimal) !void {
     defer threaded.deinit();
     const io = threaded.io();
 
+    // ...but we'll back our arena by `std.heap.page_allocator` for efficiency.
+    var single_threaded_arena: std.heap.ArenaAllocator = .init(std.heap.page_allocator);
+    defer single_threaded_arena.deinit();
+    var thread_safe_arena: std.heap.ThreadSafeAllocator = .{
+        .child_allocator = single_threaded_arena.allocator(),
+        .io = io,
+    };
+    const arena = thread_safe_arena.allocator();
+
+    const args = try init.args.toSlice(arena);
+
     // skip my own exe name
     var arg_idx: usize = 1;
 
diff --git a/lib/compiler_rt/emutls.zig b/lib/compiler_rt/emutls.zig
@@ -147,7 +147,8 @@ const ObjectArray = struct {
 // It provides thread-safety for on-demand storage of Thread Objects.
 const current_thread_storage = struct {
     var key: std.c.pthread_key_t = undefined;
-    var init_once = std.once(current_thread_storage.init);
+    var init_mutex: std.c.pthread_mutex_t = std.c.PTHREAD_MUTEX_INITIALIZER;
+    var init_done: bool = false;
 
     /// Return a per thread ObjectArray with at least the expected index.
     pub fn getArray(index: usize) *ObjectArray {
@@ -183,9 +184,13 @@ const current_thread_storage = struct {
 
     /// Initialize pthread_key_t.
     fn init() void {
+        if (@atomicLoad(bool, &init_done, .monotonic)) return;
+        _ = std.c.pthread_mutex_lock(&init_mutex);
         if (std.c.pthread_key_create(&current_thread_storage.key, current_thread_storage.deinit) != .SUCCESS) {
             abort();
         }
+        @atomicStore(bool, &init_done, true, .release);
+        _ = std.c.pthread_mutex_unlock(&init_mutex);
     }
 
     /// Invoked by pthread specific destructor. the passed argument is the ObjectArray pointer.
@@ -283,7 +288,7 @@ const emutls_control = extern struct {
     /// Get the pointer on allocated storage for emutls variable.
     pub fn getPointer(self: *emutls_control) *anyopaque {
         // ensure current_thread_storage initialization is done
-        current_thread_storage.init_once.call();
+        current_thread_storage.init();
 
         const index = self.getIndex();
         var array = current_thread_storage.getArray(index);
diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig
@@ -632,7 +632,7 @@ export fn fuzzer_main(limit_kind: abi.LimitKind, amount: u64) void {
 
 export fn fuzzer_unslide_address(addr: usize) usize {
     const si = std.debug.getSelfDebugInfo() catch @compileError("unsupported");
-    const slide = si.getModuleSlide(std.debug.getDebugInfoAllocator(), addr) catch |err| {
+    const slide = si.getModuleSlide(std.debug.getDebugInfoAllocator(), io, addr) catch |err| {
         std.debug.panic("failed to find virtual address slide: {t}", .{err});
     };
     return addr - slide;
diff --git a/lib/std/Io.zig b/lib/std/Io.zig
@@ -47,6 +47,9 @@ pub const Dir = @import("Io/Dir.zig");
 pub const File = @import("Io/File.zig");
 pub const Terminal = @import("Io/Terminal.zig");
 
+pub const RwLock = @import("Io/RwLock.zig");
+pub const Semaphore = @import("Io/Semaphore.zig");
+
 pub const VTable = struct {
     /// If it returns `null` it means `result` has been already populated and
     /// `await` will be a no-op.
@@ -882,7 +885,7 @@ pub const Timeout = union(enum) {
 
     pub const Error = error{ Timeout, UnsupportedClock };
 
-    pub fn toDeadline(t: Timeout, io: Io) Clock.Error!?Clock.Timestamp {
+    pub fn toTimestamp(t: Timeout, io: Io) Clock.Error!?Clock.Timestamp {
         return switch (t) {
             .none => null,
             .duration => |d| try .fromNow(io, d),
@@ -890,6 +893,14 @@ pub const Timeout = union(enum) {
         };
     }
 
+    pub fn toDeadline(t: Timeout, io: Io) Timeout {
+        return switch (t) {
+            .none => .none,
+            .duration => |d| .{ .deadline = Clock.Timestamp.fromNow(io, d) catch @panic("TODO") },
+            .deadline => |d| .{ .deadline = d },
+        };
+    }
+
     pub fn toDurationFromNow(t: Timeout, io: Io) Clock.Error!?Clock.Duration {
         return switch (t) {
             .none => null,
@@ -2153,5 +2164,7 @@ test {
     _ = Writer;
     _ = Evented;
     _ = Threaded;
+    _ = RwLock;
+    _ = Semaphore;
     _ = @import("Io/test.zig");
 }
diff --git a/lib/std/Io/RwLock.zig b/lib/std/Io/RwLock.zig
@@ -0,0 +1,238 @@
+//! A lock that supports one writer or many readers.
+const RwLock = @This();
+
+const builtin = @import("builtin");
+
+const std = @import("../std.zig");
+const Io = std.Io;
+const assert = std.debug.assert;
+const testing = std.testing;
+
+state: usize,
+mutex: Io.Mutex,
+semaphore: Io.Semaphore,
+
+pub const init: RwLock = .{
+    .state = 0,
+    .mutex = .init,
+    .semaphore = .{},
+};
+
+const is_writing: usize = 1;
+const writer: usize = 1 << 1;
+const reader: usize = 1 << (1 + @bitSizeOf(Count));
+const writer_mask: usize = std.math.maxInt(Count) << @ctz(writer);
+const reader_mask: usize = std.math.maxInt(Count) << @ctz(reader);
+const Count = @Int(.unsigned, @divFloor(@bitSizeOf(usize) - 1, 2));
+
+pub fn tryLock(rl: *RwLock, io: Io) bool {
+    if (rl.mutex.tryLock()) {
+        const state = @atomicLoad(usize, &rl.state, .seq_cst);
+        if (state & reader_mask == 0) {
+            _ = @atomicRmw(usize, &rl.state, .Or, is_writing, .seq_cst);
+            return true;
+        }
+
+        rl.mutex.unlock(io);
+    }
+
+    return false;
+}
+
+pub fn lockUncancelable(rl: *RwLock, io: Io) void {
+    _ = @atomicRmw(usize, &rl.state, .Add, writer, .seq_cst);
+    rl.mutex.lockUncancelable(io);
+
+    const state = @atomicRmw(usize, &rl.state, .Add, is_writing -% writer, .seq_cst);
+    if (state & reader_mask != 0)
+        rl.semaphore.waitUncancelable(io);
+}
+
+pub fn unlock(rl: *RwLock, io: Io) void {
+    _ = @atomicRmw(usize, &rl.state, .And, ~is_writing, .seq_cst);
+    rl.mutex.unlock(io);
+}
+
+pub fn tryLockShared(rl: *RwLock, io: Io) bool {
+    const state = @atomicLoad(usize, &rl.state, .seq_cst);
+    if (state & (is_writing | writer_mask) == 0) {
+        _ = @cmpxchgStrong(
+            usize,
+            &rl.state,
+            state,
+            state + reader,
+            .seq_cst,
+            .seq_cst,
+        ) orelse return true;
+    }
+
+    if (rl.mutex.tryLock()) {
+        _ = @atomicRmw(usize, &rl.state, .Add, reader, .seq_cst);
+        rl.mutex.unlock(io);
+        return true;
+    }
+
+    return false;
+}
+
+pub fn lockSharedUncancelable(rl: *RwLock, io: Io) void {
+    var state = @atomicLoad(usize, &rl.state, .seq_cst);
+    while (state & (is_writing | writer_mask) == 0) {
+        state = @cmpxchgWeak(
+            usize,
+            &rl.state,
+            state,
+            state + reader,
+            .seq_cst,
+            .seq_cst,
+        ) orelse return;
+    }
+
+    rl.mutex.lockUncancelable(io);
+    _ = @atomicRmw(usize, &rl.state, .Add, reader, .seq_cst);
+    rl.mutex.unlock(io);
+}
+
+pub fn unlockShared(rl: *RwLock, io: Io) void {
+    const state = @atomicRmw(usize, &rl.state, .Sub, reader, .seq_cst);
+
+    if ((state & reader_mask == reader) and (state & is_writing != 0))
+        rl.semaphore.post(io);
+}
+
+test "internal state" {
+    const io = testing.io;
+
+    var rl: Io.RwLock = .init;
+
+    // The following failed prior to the fix for Issue #13163,
+    // where the WRITER flag was subtracted by the lock method.
+
+    rl.lockUncancelable(io);
+    rl.unlock(io);
+    try testing.expectEqual(rl, Io.RwLock.init);
+}
+
+test "smoke test" {
+    const io = testing.io;
+
+    var rl: Io.RwLock = .init;
+
+    rl.lockUncancelable(io);
+    try testing.expect(!rl.tryLock(io));
+    try testing.expect(!rl.tryLockShared(io));
+    rl.unlock(io);
+
+    try testing.expect(rl.tryLock(io));
+    try testing.expect(!rl.tryLock(io));
+    try testing.expect(!rl.tryLockShared(io));
+    rl.unlock(io);
+
+    rl.lockSharedUncancelable(io);
+    try testing.expect(!rl.tryLock(io));
+    try testing.expect(rl.tryLockShared(io));
+    rl.unlockShared(io);
+    rl.unlockShared(io);
+
+    try testing.expect(rl.tryLockShared(io));
+    try testing.expect(!rl.tryLock(io));
+    try testing.expect(rl.tryLockShared(io));
+    rl.unlockShared(io);
+    rl.unlockShared(io);
+
+    rl.lockUncancelable(io);
+    rl.unlock(io);
+}
+
+test "concurrent access" {
+    if (builtin.single_threaded) return;
+
+    const io = testing.io;
+    const num_writers: usize = 2;
+    const num_readers: usize = 4;
+    const num_writes: usize = 1000;
+    const num_reads: usize = 2000;
+
+    const Runner = struct {
+        const Runner = @This();
+
+        io: Io,
+
+        rl: Io.RwLock,
+        writes: usize,
+        reads: std.atomic.Value(usize),
+
+        val_a: usize,
+        val_b: usize,
+
+        fn reader(run: *Runner, thread_idx: usize) !void {
+            var prng = std.Random.DefaultPrng.init(thread_idx);
+            const rnd = prng.random();
+            while (true) {
+                run.rl.lockSharedUncancelable(run.io);
+                defer run.rl.unlockShared(run.io);
+
+                try testing.expect(run.writes <= num_writes);
+                if (run.reads.fetchAdd(1, .monotonic) >= num_reads) break;
+
+                // We use `volatile` accesses so that we can make sure the memory is accessed either
+                // side of a yield, maximising chances of a race.
+                const a_ptr: *const volatile usize = &run.val_a;
+                const b_ptr: *const volatile usize = &run.val_b;
+
+                const old_a = a_ptr.*;
+                if (rnd.boolean()) try std.Thread.yield();
+                const old_b = b_ptr.*;
+                try testing.expect(old_a == old_b);
+            }
+        }
+
+        fn writer(run: *Runner, thread_idx: usize) !void {
+            var prng = std.Random.DefaultPrng.init(thread_idx);
+            const rnd = prng.random();
+            while (true) {
+                run.rl.lockUncancelable(run.io);
+                defer run.rl.unlock(run.io);
+
+                try testing.expect(run.writes <= num_writes);
+                if (run.writes == num_writes) break;
+
+                // We use `volatile` accesses so that we can make sure the memory is accessed either
+                // side of a yield, maximising chances of a race.
+                const a_ptr: *volatile usize = &run.val_a;
+                const b_ptr: *volatile usize = &run.val_b;
+
+                const new_val = rnd.int(usize);
+
+                const old_a = a_ptr.*;
+                a_ptr.* = new_val;
+                if (rnd.boolean()) try std.Thread.yield();
+                const old_b = b_ptr.*;
+                b_ptr.* = new_val;
+                try testing.expect(old_a == old_b);
+
+                run.writes += 1;
+            }
+        }
+    };
+
+    var run: Runner = .{
+        .io = io,
+        .rl = .init,
+        .writes = 0,
+        .reads = .init(0),
+        .val_a = 0,
+        .val_b = 0,
+    };
+    var write_threads: [num_writers]std.Thread = undefined;
+    var read_threads: [num_readers]std.Thread = undefined;
+
+    for (&write_threads, 0..) |*t, i| t.* = try .spawn(.{}, Runner.writer, .{ &run, i });
+    for (&read_threads, num_writers..) |*t, i| t.* = try .spawn(.{}, Runner.reader, .{ &run, i });
+
+    for (write_threads) |t| t.join();
+    for (read_threads) |t| t.join();
+
+    try testing.expect(run.writes == num_writes);
+    try testing.expect(run.reads.raw >= num_reads);
+}
diff --git a/lib/std/Io/Semaphore.zig b/lib/std/Io/Semaphore.zig
@@ -0,0 +1,65 @@
+//! An unsigned integer that blocks the kernel thread if the number would
+//! become negative.
+//!
+//! This API supports static initialization and does not require deinitialization.
+const Semaphore = @This();
+
+const builtin = @import("builtin");
+
+const std = @import("../std.zig");
+const Io = std.Io;
+const testing = std.testing;
+
+mutex: Io.Mutex = .init,
+cond: Io.Condition = .init,
+/// It is OK to initialize this field to any value.
+permits: usize = 0,
+
+pub fn wait(s: *Semaphore, io: Io) Io.Cancelable!void {
+    try s.mutex.lock(io);
+    defer s.mutex.unlock(io);
+    while (s.permits == 0) try s.cond.wait(io, &s.mutex);
+    s.permits -= 1;
+    if (s.permits > 0) s.cond.signal(io);
+}
+
+pub fn waitUncancelable(s: *Semaphore, io: Io) void {
+    s.mutex.lockUncancelable(io);
+    defer s.mutex.unlock(io);
+    while (s.permits == 0) s.cond.waitUncancelable(io, &s.mutex);
+    s.permits -= 1;
+    if (s.permits > 0) s.cond.signal(io);
+}
+
+pub fn post(s: *Semaphore, io: Io) void {
+    s.mutex.lockUncancelable(io);
+    defer s.mutex.unlock(io);
+
+    s.permits += 1;
+    s.cond.signal(io);
+}
+
+test Semaphore {
+    if (builtin.single_threaded) return error.SkipZigTest;
+    const io = testing.io;
+
+    const TestContext = struct {
+        sem: *Semaphore,
+        n: *i32,
+        fn worker(ctx: *@This()) !void {
+            try ctx.sem.wait(io);
+            ctx.n.* += 1;
+            ctx.sem.post(io);
+        }
+    };
+    const num_threads = 3;
+    var sem: Semaphore = .{ .permits = 1 };
+    var threads: [num_threads]std.Thread = undefined;
+    var n: i32 = 0;
+    var ctx = TestContext{ .sem = &sem, .n = &n };
+
+    for (&threads) |*t| t.* = try std.Thread.spawn(.{}, TestContext.worker, .{&ctx});
+    for (threads) |t| t.join();
+    try sem.wait(io);
+    try testing.expect(n == num_threads);
+}
diff --git a/lib/std/Io/Threaded.zig b/lib/std/Io/Threaded.zig
@@ -1126,6 +1126,25 @@ const Thread = struct {
             return @ptrFromInt(@as(usize, @bitCast(split)));
         }
     };
+
+    /// Same as `Io.Mutex.lock` but avoids the VTable.
+    fn mutexLock(m: *Io.Mutex) Io.Cancelable!void {
+        const initial_state = m.state.cmpxchgWeak(
+            .unlocked,
+            .locked_once,
+            .acquire,
+            .monotonic,
+        ) orelse {
+            @branchHint(.likely);
+            return;
+        };
+        if (initial_state == .contended) {
+            try Thread.futexWait(@ptrCast(&m.state.raw), @intFromEnum(Io.Mutex.State.contended), null);
+        }
+        while (m.state.swap(.contended, .acquire) != .unlocked) {
+            try Thread.futexWait(@ptrCast(&m.state.raw), @intFromEnum(Io.Mutex.State.contended), null);
+        }
+    }
 };
 
 const Syscall = struct {
@@ -1486,8 +1505,8 @@ var global_single_threaded_instance: Threaded = .init_single_threaded;
 pub const global_single_threaded: *Threaded = &global_single_threaded_instance;
 
 pub fn setAsyncLimit(t: *Threaded, new_limit: Io.Limit) void {
-    mutexLockUncancelable(&t.mutex);
-    defer mutexUnlock(&t.mutex);
+    mutexLockInternal(&t.mutex);
+    defer mutexUnlockInternal(&t.mutex);
     t.async_limit = new_limit;
 }
 
@@ -1508,8 +1527,8 @@ pub fn deinit(t: *Threaded) void {
 fn join(t: *Threaded) void {
     if (builtin.single_threaded) return;
     {
-        mutexLockUncancelable(&t.mutex);
-        defer mutexUnlock(&t.mutex);
+        mutexLockInternal(&t.mutex);
+        defer mutexUnlockInternal(&t.mutex);
         t.join_requested = true;
     }
     condBroadcast(&t.cond);
@@ -1574,16 +1593,16 @@ fn worker(t: *Threaded) void {
 
     defer t.wait_group.finish();
 
-    mutexLockUncancelable(&t.mutex);
-    defer mutexUnlock(&t.mutex);
+    mutexLockInternal(&t.mutex);
+    defer mutexUnlockInternal(&t.mutex);
 
     while (true) {
         while (t.run_queue.popFirst()) |runnable_node| {
-            mutexUnlock(&t.mutex);
+            mutexUnlockInternal(&t.mutex);
             thread.cancel_protection = .unblocked;
             const runnable: *Runnable = @fieldParentPtr("node", runnable_node);
             runnable.startFn(runnable, &thread, t);
-            mutexLockUncancelable(&t.mutex);
+            mutexLockInternal(&t.mutex);
             t.busy_count -= 1;
         }
         if (t.join_requested) break;
@@ -2004,12 +2023,12 @@ fn async(
         },
     };
 
-    mutexLockUncancelable(&t.mutex);
+    mutexLockInternal(&t.mutex);
 
     const busy_count = t.busy_count;
 
     if (busy_count >= @intFromEnum(t.async_limit)) {
-        mutexUnlock(&t.mutex);
+        mutexUnlockInternal(&t.mutex);
         future.destroy(gpa);
         start(context.ptr, result.ptr);
         return null;
@@ -2023,7 +2042,7 @@ fn async(
         const thread = std.Thread.spawn(.{ .stack_size = t.stack_size }, worker, .{t}) catch {
             t.wait_group.finish();
             t.busy_count = busy_count;
-            mutexUnlock(&t.mutex);
+            mutexUnlockInternal(&t.mutex);
             future.destroy(gpa);
             start(context.ptr, result.ptr);
             return null;
@@ -2033,7 +2052,7 @@ fn async(
 
     t.run_queue.prepend(&future.runnable.node);
 
-    mutexUnlock(&t.mutex);
+    mutexUnlockInternal(&t.mutex);
     condSignal(&t.cond);
     return @ptrCast(future);
 }
@@ -2056,8 +2075,8 @@ fn concurrent(
     };
     errdefer future.destroy(gpa);
 
-    mutexLockUncancelable(&t.mutex);
-    defer mutexUnlock(&t.mutex);
+    mutexLockInternal(&t.mutex);
+    defer mutexUnlockInternal(&t.mutex);
 
     const busy_count = t.busy_count;
 
@@ -2101,12 +2120,12 @@ fn groupAsync(
         error.OutOfMemory => return groupAsyncEager(start, context.ptr),
     };
 
-    mutexLockUncancelable(&t.mutex);
+    mutexLockInternal(&t.mutex);
 
     const busy_count = t.busy_count;
 
     if (busy_count >= @intFromEnum(t.async_limit)) {
-        mutexUnlock(&t.mutex);
+        mutexUnlockInternal(&t.mutex);
         task.destroy(gpa);
         return groupAsyncEager(start, context.ptr);
     }
@@ -2119,7 +2138,7 @@ fn groupAsync(
         const thread = std.Thread.spawn(.{ .stack_size = t.stack_size }, worker, .{t}) catch {
             t.wait_group.finish();
             t.busy_count = busy_count;
-            mutexUnlock(&t.mutex);
+            mutexUnlockInternal(&t.mutex);
             task.destroy(gpa);
             return groupAsyncEager(start, context.ptr);
         };
@@ -2136,7 +2155,7 @@ fn groupAsync(
     }, .monotonic);
     t.run_queue.prepend(&task.runnable.node);
 
-    mutexUnlock(&t.mutex);
+    mutexUnlockInternal(&t.mutex);
     condSignal(&t.cond);
 }
 fn groupAsyncEager(
@@ -2201,8 +2220,8 @@ fn groupConcurrent(
     };
     errdefer task.destroy(gpa);
 
-    mutexLockUncancelable(&t.mutex);
-    defer mutexUnlock(&t.mutex);
+    mutexLockInternal(&t.mutex);
+    defer mutexUnlockInternal(&t.mutex);
 
     const busy_count = t.busy_count;
 
@@ -2636,7 +2655,7 @@ fn batchAwaitAsync(userdata: ?*anyopaque, b: *Io.Batch) Io.Cancelable!void {
 fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout) Io.Batch.AwaitConcurrentError!void {
     const t: *Threaded = @ptrCast(@alignCast(userdata));
     if (is_windows) {
-        const deadline: ?Io.Clock.Timestamp = timeout.toDeadline(ioBasic(t)) catch |err| switch (err) {
+        const deadline: ?Io.Clock.Timestamp = timeout.toTimestamp(ioBasic(t)) catch |err| switch (err) {
             error.Unexpected => deadline: {
                 recoverableOsBugDetected();
                 break :deadline .{ .raw = .{ .nanoseconds = 0 }, .clock = .awake };
@@ -2735,7 +2754,7 @@ fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout
         else => {},
     }
     const t_io = ioBasic(t);
-    const deadline = timeout.toDeadline(t_io) catch return error.UnsupportedClock;
+    const deadline = timeout.toTimestamp(t_io) catch return error.UnsupportedClock;
     while (true) {
         const timeout_ms: i32 = t: {
             if (b.completions.head != .none) {
@@ -3838,8 +3857,8 @@ fn fileStatWindows(userdata: ?*anyopaque, file: File) File.StatError!File.Stat {
 
 fn systemBasicInformation(t: *Threaded) ?*const windows.SYSTEM_BASIC_INFORMATION {
     if (!t.system_basic_information.initialized.load(.acquire)) {
-        mutexLockUncancelable(&t.mutex);
-        defer mutexUnlock(&t.mutex);
+        mutexLockInternal(&t.mutex);
+        defer mutexUnlockInternal(&t.mutex);
 
         switch (windows.ntdll.NtQuerySystemInformation(
             .SystemBasicInformation,
@@ -10899,7 +10918,7 @@ fn nowWasi(clock: Io.Clock) Io.Clock.Error!Io.Timestamp {
 fn sleep(userdata: ?*anyopaque, timeout: Io.Timeout) Io.SleepError!void {
     const t: *Threaded = @ptrCast(@alignCast(userdata));
     if (timeout == .none) return;
-    if (use_parking_sleep) return parking_sleep.sleep(try timeout.toDeadline(ioBasic(t)));
+    if (use_parking_sleep) return parking_sleep.sleep(try timeout.toTimestamp(ioBasic(t)));
     if (native_os == .wasi) return sleepWasi(t, timeout);
     if (@TypeOf(posix.system.clock_nanosleep) != void) return sleepPosix(timeout);
     return sleepNanosleep(t, timeout);
@@ -12611,7 +12630,7 @@ fn netReceivePosix(
     var message_i: usize = 0;
     var data_i: usize = 0;
 
-    const deadline = timeout.toDeadline(t_io) catch |err| return .{ err, message_i };
+    const deadline = timeout.toTimestamp(t_io) catch |err| return .{ err, message_i };
 
     recv: while (true) {
         if (message_buffer.len - message_i == 0) return .{ null, message_i };
@@ -14373,8 +14392,8 @@ const WindowsEnvironStrings = struct {
 };
 
 fn scanEnviron(t: *Threaded) void {
-    mutexLockUncancelable(&t.mutex);
-    defer mutexUnlock(&t.mutex);
+    mutexLockInternal(&t.mutex);
+    defer mutexUnlockInternal(&t.mutex);
 
     if (t.environ.initialized) return;
     t.environ.initialized = true;
@@ -14729,8 +14748,8 @@ fn spawnPosix(t: *Threaded, options: process.SpawnOptions) process.SpawnError!Sp
 
 fn getDevNullFd(t: *Threaded) !posix.fd_t {
     {
-        mutexLockUncancelable(&t.mutex);
-        defer mutexUnlock(&t.mutex);
+        mutexLockInternal(&t.mutex);
+        defer mutexUnlockInternal(&t.mutex);
         if (t.null_file.fd != -1) return t.null_file.fd;
     }
     const mode: u32 = 0;
@@ -14741,8 +14760,8 @@ fn getDevNullFd(t: *Threaded) !posix.fd_t {
             .SUCCESS => {
                 syscall.finish();
                 const fresh_fd: posix.fd_t = @intCast(rc);
-                mutexLockUncancelable(&t.mutex); // Another thread might have won the race.
-                defer mutexUnlock(&t.mutex);
+                mutexLockInternal(&t.mutex); // Another thread might have won the race.
+                defer mutexUnlockInternal(&t.mutex);
                 if (t.null_file.fd != -1) {
                     posix.close(fresh_fd);
                     return t.null_file.fd;
@@ -15402,8 +15421,8 @@ fn processSpawnWindows(userdata: ?*anyopaque, options: process.SpawnOptions) pro
 
 fn getCngHandle(t: *Threaded) Io.RandomSecureError!windows.HANDLE {
     {
-        mutexLockUncancelable(&t.mutex);
-        defer mutexUnlock(&t.mutex);
+        mutexLockInternal(&t.mutex);
+        defer mutexUnlockInternal(&t.mutex);
         if (t.random_file.handle) |handle| return handle;
     }
 
@@ -15437,8 +15456,8 @@ fn getCngHandle(t: *Threaded) Io.RandomSecureError!windows.HANDLE {
     )) {
         .SUCCESS => {
             syscall.finish();
-            mutexLockUncancelable(&t.mutex); // Another thread might have won the race.
-            defer mutexUnlock(&t.mutex);
+            mutexLockInternal(&t.mutex); // Another thread might have won the race.
+            defer mutexUnlockInternal(&t.mutex);
             if (t.random_file.handle) |prev_handle| {
                 windows.CloseHandle(fresh_handle);
                 return prev_handle;
@@ -15458,8 +15477,8 @@ fn getCngHandle(t: *Threaded) Io.RandomSecureError!windows.HANDLE {
 
 fn getNulHandle(t: *Threaded) !windows.HANDLE {
     {
-        mutexLockUncancelable(&t.mutex);
-        defer mutexUnlock(&t.mutex);
+        mutexLockInternal(&t.mutex);
+        defer mutexUnlockInternal(&t.mutex);
         if (t.null_file.handle) |handle| return handle;
     }
 
@@ -15505,8 +15524,8 @@ fn getNulHandle(t: *Threaded) !windows.HANDLE {
     )) {
         .SUCCESS => {
             syscall.finish();
-            mutexLockUncancelable(&t.mutex); // Another thread might have won the race.
-            defer mutexUnlock(&t.mutex);
+            mutexLockInternal(&t.mutex); // Another thread might have won the race.
+            defer mutexUnlockInternal(&t.mutex);
             if (t.null_file.handle) |prev_handle| {
                 windows.CloseHandle(fresh_handle);
                 return prev_handle;
@@ -16551,15 +16570,15 @@ fn random(userdata: ?*anyopaque, buffer: []u8) void {
 }
 
 fn randomMainThread(t: *Threaded, buffer: []u8) void {
-    mutexLockUncancelable(&t.mutex);
-    defer mutexUnlock(&t.mutex);
+    mutexLockInternal(&t.mutex);
+    defer mutexUnlockInternal(&t.mutex);
 
     if (!t.csprng.isInitialized()) {
         @branchHint(.unlikely);
         var seed: [Csprng.seed_len]u8 = undefined;
         {
-            mutexUnlock(&t.mutex);
-            defer mutexLockUncancelable(&t.mutex);
+            mutexUnlockInternal(&t.mutex);
+            defer mutexLockInternal(&t.mutex);
 
             const prev = swapCancelProtection(t, .blocked);
             defer _ = swapCancelProtection(t, prev);
@@ -16744,8 +16763,8 @@ fn randomSecure(userdata: ?*anyopaque, buffer: []u8) Io.RandomSecureError!void {
 
 fn getRandomFd(t: *Threaded) Io.RandomSecureError!posix.fd_t {
     {
-        mutexLockUncancelable(&t.mutex);
-        defer mutexUnlock(&t.mutex);
+        mutexLockInternal(&t.mutex);
+        defer mutexUnlockInternal(&t.mutex);
 
         if (t.random_file.fd == -2) return error.EntropyUnavailable;
         if (t.random_file.fd != -1) return t.random_file.fd;
@@ -16785,8 +16804,8 @@ fn getRandomFd(t: *Threaded) Io.RandomSecureError!posix.fd_t {
                     .SUCCESS => {
                         syscall.finish();
                         if (!statx.mask.TYPE) return error.EntropyUnavailable;
-                        mutexLockUncancelable(&t.mutex); // Another thread might have won the race.
-                        defer mutexUnlock(&t.mutex);
+                        mutexLockInternal(&t.mutex); // Another thread might have won the race.
+                        defer mutexUnlockInternal(&t.mutex);
                         if (t.random_file.fd >= 0) {
                             posix.close(fd);
                             return t.random_file.fd;
@@ -16813,8 +16832,8 @@ fn getRandomFd(t: *Threaded) Io.RandomSecureError!posix.fd_t {
                 switch (posix.errno(fstat_sym(fd, &stat))) {
                     .SUCCESS => {
                         syscall.finish();
-                        mutexLockUncancelable(&t.mutex); // Another thread might have won the race.
-                        defer mutexUnlock(&t.mutex);
+                        mutexLockInternal(&t.mutex); // Another thread might have won the race.
+                        defer mutexUnlockInternal(&t.mutex);
                         if (t.random_file.fd >= 0) {
                             posix.close(fd);
                             return t.random_file.fd;
@@ -16947,8 +16966,8 @@ const parking_futex = struct {
         var status_buf: std.atomic.Value(Thread.Status) = undefined;
 
         {
-            mutexLockUncancelable(&bucket.mutex);
-            defer mutexUnlock(&bucket.mutex);
+            mutexLockInternal(&bucket.mutex);
+            defer mutexUnlockInternal(&bucket.mutex);
 
             _ = bucket.num_waiters.fetchAdd(1, .acquire);
 
@@ -17017,8 +17036,8 @@ const parking_futex = struct {
                 .parked => {
                     // We saw a timeout and updated our own status from `.parked` to `.none`. It is
                     // our responsibility to remove `waiter` from `bucket`.
-                    mutexLockUncancelable(&bucket.mutex);
-                    defer mutexUnlock(&bucket.mutex);
+                    mutexLockInternal(&bucket.mutex);
+                    defer mutexUnlockInternal(&bucket.mutex);
                     bucket.waiters.remove(&waiter.node);
                     assert(bucket.num_waiters.fetchSub(1, .monotonic) > 0);
                 },
@@ -17057,8 +17076,8 @@ const parking_futex = struct {
         // of the critical section. This forms a singly-linked list of waiters using `Waiter.node.next`.
         var waking_head: ?*std.DoublyLinkedList.Node = null;
         {
-            mutexLockUncancelable(&bucket.mutex);
-            defer mutexUnlock(&bucket.mutex);
+            mutexLockInternal(&bucket.mutex);
+            defer mutexUnlockInternal(&bucket.mutex);
 
             var num_removed: u32 = 0;
             var it = bucket.waiters.first;
@@ -17113,8 +17132,8 @@ const parking_futex = struct {
 
     fn removeCanceledWaiter(waiter: *Waiter) void {
         const bucket = bucketForAddress(waiter.address);
-        mutexLockUncancelable(&bucket.mutex);
-        defer mutexUnlock(&bucket.mutex);
+        mutexLockInternal(&bucket.mutex);
+        defer mutexUnlockInternal(&bucket.mutex);
         bucket.waiters.remove(&waiter.node);
         assert(bucket.num_waiters.fetchSub(1, .monotonic) > 0);
         waiter.done.store(true, .release); // potentially invalidates `waiter.*`
@@ -18163,8 +18182,8 @@ fn condWait(cond: *Condition, mutex: *Mutex) void {
         assert(prev_state.waiters < std.math.maxInt(u16)); // overflow caused by too many waiters
     }
 
-    mutexUnlock(mutex);
-    defer mutexLockUncancelable(mutex);
+    mutexUnlockInternal(mutex);
+    defer mutexLockInternal(mutex);
 
     while (true) {
         Thread.futexWaitUncancelable(&cond.epoch.raw, epoch, null);
@@ -18189,28 +18208,13 @@ const Mutex = if (!is_windows) Io.Mutex else struct {
     const init: @This() = .{ .srwlock = .{} };
 };
 
-/// Same as `Io.Mutex.lockUncancelable` but avoids the VTable.
-fn mutexLock(m: *Io.Mutex) Io.Cancelable!void {
-    const initial_state = m.state.cmpxchgWeak(
-        .unlocked,
-        .locked_once,
-        .acquire,
-        .monotonic,
-    ) orelse {
-        @branchHint(.likely);
-        return;
-    };
-    if (initial_state == .contended) {
-        try Thread.futexWait(@ptrCast(&m.state.raw), @intFromEnum(Io.Mutex.State.contended), null);
-    }
-    while (m.state.swap(.contended, .acquire) != .unlocked) {
-        try Thread.futexWait(@ptrCast(&m.state.raw), @intFromEnum(Io.Mutex.State.contended), null);
-    }
+fn mutexLockInternal(m: *Mutex) void {
+    if (is_windows) return windows.ntdll.RtlAcquireSRWLockExclusive(&m.srwlock);
+    return mutexLock(m);
 }
 
 /// Same as `Io.Mutex.lockUncancelable` but avoids the VTable.
-fn mutexLockUncancelable(m: *Mutex) void {
-    if (is_windows) return windows.ntdll.RtlAcquireSRWLockExclusive(&m.srwlock);
+pub fn mutexLock(m: *Io.Mutex) void {
     const initial_state = m.state.cmpxchgWeak(
         .unlocked,
         .locked_once,
@@ -18228,9 +18232,13 @@ fn mutexLockUncancelable(m: *Mutex) void {
     }
 }
 
-/// Same as `Io.Mutex.unlock` but avoids the VTable.
-fn mutexUnlock(m: *Mutex) void {
+fn mutexUnlockInternal(m: *Mutex) void {
     if (is_windows) return windows.ntdll.RtlReleaseSRWLockExclusive(&m.srwlock);
+    return mutexUnlock(m);
+}
+
+/// Same as `Io.Mutex.unlock` but avoids the VTable.
+pub fn mutexUnlock(m: *Io.Mutex) void {
     switch (m.state.swap(.unlocked, .release)) {
         .unlocked => unreachable,
         .locked_once => {},
diff --git a/lib/std/Thread.zig b/lib/std/Thread.zig
@@ -14,13 +14,9 @@ const posix = std.posix;
 const windows = std.os.windows;
 const testing = std.testing;
 
-pub const Futex = @import("Thread/Futex.zig");
-pub const Mutex = @import("Thread/Mutex.zig");
-pub const Semaphore = @import("Thread/Semaphore.zig");
-pub const Condition = @import("Thread/Condition.zig");
-pub const RwLock = @import("Thread/RwLock.zig");
-
-pub const Pool = @compileError("deprecated; consider using 'std.Io.Group' with 'std.Io.Threaded'");
+pub const Mutex = struct {
+    pub const Recursive = @import("Thread/Mutex/Recursive.zig");
+};
 
 pub const use_pthreads = native_os != .windows and native_os != .wasi and builtin.link_libc;
 
@@ -1609,11 +1605,7 @@ test "setName, getName" {
 }
 
 test {
-    _ = Futex;
     _ = Mutex;
-    _ = Semaphore;
-    _ = Condition;
-    _ = RwLock;
 }
 
 fn testIncrementNotify(io: Io, value: *usize, event: *Io.Event) void {
diff --git a/lib/std/Thread/Condition.zig b/lib/std/Thread/Condition.zig
@@ -1,683 +0,0 @@
-//! Condition variables are used with a Mutex to efficiently wait for an arbitrary condition to occur.
-//! It does this by atomically unlocking the mutex, blocking the thread until notified, and finally re-locking the mutex.
-//! Condition can be statically initialized and is at most `@sizeOf(u64)` large.
-//!
-//! Example:
-//! ```
-//! var m = Mutex{};
-//! var c = Condition{};
-//! var predicate = false;
-//!
-//! fn consumer() void {
-//!     m.lock();
-//!     defer m.unlock();
-//!
-//!     while (!predicate) {
-//!         c.wait(&m);
-//!     }
-//! }
-//!
-//! fn producer() void {
-//!     {
-//!         m.lock();
-//!         defer m.unlock();
-//!         predicate = true;
-//!     }
-//!     c.signal();
-//! }
-//!
-//! const thread = try std.Thread.spawn(.{}, producer, .{});
-//! consumer();
-//! thread.join();
-//! ```
-//!
-//! Note that condition variables can only reliably unblock threads that are sequenced before them using the same Mutex.
-//! This means that the following is allowed to deadlock:
-//! ```
-//! thread-1: mutex.lock()
-//! thread-1: condition.wait(&mutex)
-//!
-//! thread-2: // mutex.lock() (without this, the following signal may not see the waiting thread-1)
-//! thread-2: // mutex.unlock() (this is optional for correctness once locked above, as signal can be called while holding the mutex)
-//! thread-2: condition.signal()
-//! ```
-
-const std = @import("../std.zig");
-const builtin = @import("builtin");
-const Condition = @This();
-const Mutex = std.Thread.Mutex;
-
-const os = std.os;
-const assert = std.debug.assert;
-const testing = std.testing;
-const Futex = std.Thread.Futex;
-
-impl: Impl = .{},
-
-/// Atomically releases the Mutex, blocks the caller thread, then re-acquires the Mutex on return.
-/// "Atomically" here refers to accesses done on the Condition after acquiring the Mutex.
-///
-/// The Mutex must be locked by the caller's thread when this function is called.
-/// A Mutex can have multiple Conditions waiting with it concurrently, but not the opposite.
-/// It is undefined behavior for multiple threads to wait ith different mutexes using the same Condition concurrently.
-/// Once threads have finished waiting with one Mutex, the Condition can be used to wait with another Mutex.
-///
-/// A blocking call to wait() is unblocked from one of the following conditions:
-/// - a spurious ("at random") wake up occurs
-/// - a future call to `signal()` or `broadcast()` which has acquired the Mutex and is sequenced after this `wait()`.
-///
-/// Given wait() can be interrupted spuriously, the blocking condition should be checked continuously
-/// irrespective of any notifications from `signal()` or `broadcast()`.
-pub fn wait(self: *Condition, mutex: *Mutex) void {
-    self.impl.wait(mutex, null) catch |err| switch (err) {
-        error.Timeout => unreachable, // no timeout provided so we shouldn't have timed-out
-    };
-}
-
-/// Atomically releases the Mutex, blocks the caller thread, then re-acquires the Mutex on return.
-/// "Atomically" here refers to accesses done on the Condition after acquiring the Mutex.
-///
-/// The Mutex must be locked by the caller's thread when this function is called.
-/// A Mutex can have multiple Conditions waiting with it concurrently, but not the opposite.
-/// It is undefined behavior for multiple threads to wait ith different mutexes using the same Condition concurrently.
-/// Once threads have finished waiting with one Mutex, the Condition can be used to wait with another Mutex.
-///
-/// A blocking call to `timedWait()` is unblocked from one of the following conditions:
-/// - a spurious ("at random") wake occurs
-/// - the caller was blocked for around `timeout_ns` nanoseconds, in which `error.Timeout` is returned.
-/// - a future call to `signal()` or `broadcast()` which has acquired the Mutex and is sequenced after this `timedWait()`.
-///
-/// Given `timedWait()` can be interrupted spuriously, the blocking condition should be checked continuously
-/// irrespective of any notifications from `signal()` or `broadcast()`.
-pub fn timedWait(self: *Condition, mutex: *Mutex, timeout_ns: u64) error{Timeout}!void {
-    return self.impl.wait(mutex, timeout_ns);
-}
-
-/// Unblocks at least one thread blocked in a call to `wait()` or `timedWait()` with a given Mutex.
-/// The blocked thread must be sequenced before this call with respect to acquiring the same Mutex in order to be observable for unblocking.
-/// `signal()` can be called with or without the relevant Mutex being acquired and have no "effect" if there's no observable blocked threads.
-pub fn signal(self: *Condition) void {
-    self.impl.wake(.one);
-}
-
-/// Unblocks all threads currently blocked in a call to `wait()` or `timedWait()` with a given Mutex.
-/// The blocked threads must be sequenced before this call with respect to acquiring the same Mutex in order to be observable for unblocking.
-/// `broadcast()` can be called with or without the relevant Mutex being acquired and have no "effect" if there's no observable blocked threads.
-pub fn broadcast(self: *Condition) void {
-    self.impl.wake(.all);
-}
-
-const Impl = Impl: {
-    if (builtin.single_threaded) break :Impl SingleThreadedImpl;
-    if (builtin.os.tag == .windows) break :Impl WindowsImpl;
-
-    if (builtin.os.tag.isDarwin() or
-        builtin.target.os.tag == .linux or
-        builtin.target.os.tag == .freebsd or
-        builtin.target.os.tag == .openbsd or
-        builtin.target.os.tag == .dragonfly or
-        builtin.target.cpu.arch.isWasm())
-    {
-        // Futex is the system's synchronization primitive; use that.
-        break :Impl FutexImpl;
-    }
-
-    if (std.Thread.use_pthreads) {
-        // This system doesn't have a futex primitive, so `std.Thread.Futex` is using `PosixImpl`,
-        // which implements futex *on top of* pthread mutexes and conditions. Therefore, instead
-        // of going through that long inefficient path, just use pthread condition variable directly.
-        break :Impl PosixImpl;
-    }
-
-    break :Impl FutexImpl;
-};
-
-const Notify = enum {
-    one, // wake up only one thread
-    all, // wake up all threads
-};
-
-const SingleThreadedImpl = struct {
-    fn wait(self: *Impl, mutex: *Mutex, timeout: ?u64) error{Timeout}!void {
-        _ = self;
-        _ = mutex;
-        // There are no other threads to wake us up.
-        // So if we wait without a timeout we would never wake up.
-        assert(timeout != null); // Deadlock detected.
-        return error.Timeout;
-    }
-
-    fn wake(self: *Impl, comptime notify: Notify) void {
-        // There are no other threads to wake up.
-        _ = self;
-        _ = notify;
-    }
-};
-
-const WindowsImpl = struct {
-    condition: os.windows.CONDITION_VARIABLE = .{},
-
-    fn wait(self: *Impl, mutex: *Mutex, timeout: ?u64) error{Timeout}!void {
-        var timeout_overflowed = false;
-        var timeout_ms: os.windows.DWORD = os.windows.INFINITE;
-
-        if (timeout) |timeout_ns| {
-            // Round the nanoseconds to the nearest millisecond,
-            // then saturating cast it to windows DWORD for use in kernel32 call.
-            const ms = (timeout_ns +| (std.time.ns_per_ms / 2)) / std.time.ns_per_ms;
-            timeout_ms = std.math.cast(os.windows.DWORD, ms) orelse std.math.maxInt(os.windows.DWORD);
-
-            // Track if the timeout overflowed into INFINITE and make sure not to wait forever.
-            if (timeout_ms == os.windows.INFINITE) {
-                timeout_overflowed = true;
-                timeout_ms -= 1;
-            }
-        }
-
-        if (builtin.mode == .Debug) {
-            // The internal state of the DebugMutex needs to be handled here as well.
-            mutex.impl.locking_thread.store(0, .unordered);
-        }
-        const rc = os.windows.kernel32.SleepConditionVariableSRW(
-            &self.condition,
-            if (builtin.mode == .Debug) &mutex.impl.impl.srwlock else &mutex.impl.srwlock,
-            timeout_ms,
-            0, // the srwlock was assumed to acquired in exclusive mode not shared
-        );
-        if (builtin.mode == .Debug) {
-            // The internal state of the DebugMutex needs to be handled here as well.
-            mutex.impl.locking_thread.store(std.Thread.getCurrentId(), .unordered);
-        }
-
-        // Return error.Timeout if we know the timeout elapsed correctly.
-        if (rc == os.windows.FALSE) {
-            assert(os.windows.GetLastError() == .TIMEOUT);
-            if (!timeout_overflowed) return error.Timeout;
-        }
-    }
-
-    fn wake(self: *Impl, comptime notify: Notify) void {
-        switch (notify) {
-            .one => os.windows.ntdll.RtlWakeConditionVariable(&self.condition),
-            .all => os.windows.ntdll.RtlWakeAllConditionVariable(&self.condition),
-        }
-    }
-};
-
-const FutexImpl = struct {
-    state: std.atomic.Value(u32) = std.atomic.Value(u32).init(0),
-    epoch: std.atomic.Value(u32) = std.atomic.Value(u32).init(0),
-
-    const one_waiter = 1;
-    const waiter_mask = 0xffff;
-
-    const one_signal = 1 << 16;
-    const signal_mask = 0xffff << 16;
-
-    fn wait(self: *Impl, mutex: *Mutex, timeout: ?u64) error{Timeout}!void {
-        // Observe the epoch, then check the state again to see if we should wake up.
-        // The epoch must be observed before we check the state or we could potentially miss a wake() and deadlock:
-        //
-        // - T1: s = LOAD(&state)
-        // - T2: UPDATE(&s, signal)
-        // - T2: UPDATE(&epoch, 1) + FUTEX_WAKE(&epoch)
-        // - T1: e = LOAD(&epoch) (was reordered after the state load)
-        // - T1: s & signals == 0 -> FUTEX_WAIT(&epoch, e) (missed the state update + the epoch change)
-        //
-        // Acquire barrier to ensure the epoch load happens before the state load.
-        var epoch = self.epoch.load(.acquire);
-        var state = self.state.fetchAdd(one_waiter, .monotonic);
-        assert(state & waiter_mask != waiter_mask);
-        state += one_waiter;
-
-        mutex.unlock();
-        defer mutex.lock();
-
-        var futex_deadline = Futex.Deadline.init(timeout);
-
-        while (true) {
-            futex_deadline.wait(&self.epoch, epoch) catch |err| switch (err) {
-                // On timeout, we must decrement the waiter we added above.
-                error.Timeout => {
-                    while (true) {
-                        // If there's a signal when we're timing out, consume it and report being woken up instead.
-                        // Acquire barrier ensures code before the wake() which added the signal happens before we decrement it and return.
-                        while (state & signal_mask != 0) {
-                            const new_state = state - one_waiter - one_signal;
-                            state = self.state.cmpxchgWeak(state, new_state, .acquire, .monotonic) orelse return;
-                        }
-
-                        // Remove the waiter we added and officially return timed out.
-                        const new_state = state - one_waiter;
-                        state = self.state.cmpxchgWeak(state, new_state, .monotonic, .monotonic) orelse return err;
-                    }
-                },
-            };
-
-            epoch = self.epoch.load(.acquire);
-            state = self.state.load(.monotonic);
-
-            // Try to wake up by consuming a signal and decremented the waiter we added previously.
-            // Acquire barrier ensures code before the wake() which added the signal happens before we decrement it and return.
-            while (state & signal_mask != 0) {
-                const new_state = state - one_waiter - one_signal;
-                state = self.state.cmpxchgWeak(state, new_state, .acquire, .monotonic) orelse return;
-            }
-        }
-    }
-
-    fn wake(self: *Impl, comptime notify: Notify) void {
-        var state = self.state.load(.monotonic);
-        while (true) {
-            const waiters = (state & waiter_mask) / one_waiter;
-            const signals = (state & signal_mask) / one_signal;
-
-            // Reserves which waiters to wake up by incrementing the signals count.
-            // Therefore, the signals count is always less than or equal to the waiters count.
-            // We don't need to Futex.wake if there's nothing to wake up or if other wake() threads have reserved to wake up the current waiters.
-            const wakeable = waiters - signals;
-            if (wakeable == 0) {
-                return;
-            }
-
-            const to_wake = switch (notify) {
-                .one => 1,
-                .all => wakeable,
-            };
-
-            // Reserve the amount of waiters to wake by incrementing the signals count.
-            // Release barrier ensures code before the wake() happens before the signal it posted and consumed by the wait() threads.
-            const new_state = state + (one_signal * to_wake);
-            state = self.state.cmpxchgWeak(state, new_state, .release, .monotonic) orelse {
-                // Wake up the waiting threads we reserved above by changing the epoch value.
-                // NOTE: a waiting thread could miss a wake up if *exactly* ((1<<32)-1) wake()s happen between it observing the epoch and sleeping on it.
-                // This is very unlikely due to how many precise amount of Futex.wake() calls that would be between the waiting thread's potential preemption.
-                //
-                // Release barrier ensures the signal being added to the state happens before the epoch is changed.
-                // If not, the waiting thread could potentially deadlock from missing both the state and epoch change:
-                //
-                // - T2: UPDATE(&epoch, 1) (reordered before the state change)
-                // - T1: e = LOAD(&epoch)
-                // - T1: s = LOAD(&state)
-                // - T2: UPDATE(&state, signal) + FUTEX_WAKE(&epoch)
-                // - T1: s & signals == 0 -> FUTEX_WAIT(&epoch, e) (missed both epoch change and state change)
-                _ = self.epoch.fetchAdd(1, .release);
-                Futex.wake(&self.epoch, to_wake);
-                return;
-            };
-        }
-    }
-};
-
-const PosixImpl = struct {
-    cond: std.c.pthread_cond_t = .{},
-
-    fn wait(self: *Impl, mutex: *Mutex, timeout: ?u64) error{Timeout}!void {
-        if (builtin.mode == .Debug) {
-            mutex.impl.locking_thread.store(0, .unordered);
-        }
-        defer if (builtin.mode == .Debug) {
-            mutex.impl.locking_thread.store(std.Thread.getCurrentId(), .unordered);
-        };
-
-        const mtx = if (builtin.mode == .Debug) &mutex.impl.impl.mutex else &mutex.impl.mutex;
-
-        if (timeout) |t| {
-            switch (std.c.pthread_cond_timedwait(&self.cond, mtx, &.{
-                .sec = @intCast(@divFloor(t, std.time.ns_per_s)),
-                .nsec = @intCast(@mod(t, std.time.ns_per_s)),
-            })) {
-                .SUCCESS => return,
-                .TIMEDOUT => return error.Timeout,
-                else => unreachable,
-            }
-        }
-
-        assert(std.c.pthread_cond_wait(&self.cond, mtx) == .SUCCESS);
-    }
-
-    fn wake(self: *Impl, comptime notify: Notify) void {
-        assert(switch (notify) {
-            .one => std.c.pthread_cond_signal(&self.cond),
-            .all => std.c.pthread_cond_broadcast(&self.cond),
-        } == .SUCCESS);
-    }
-};
-
-test "smoke test" {
-    var mutex = Mutex{};
-    var cond = Condition{};
-
-    // Try to wake outside the mutex
-    defer cond.signal();
-    defer cond.broadcast();
-
-    mutex.lock();
-    defer mutex.unlock();
-
-    // Try to wait with a timeout (should not deadlock)
-    try testing.expectError(error.Timeout, cond.timedWait(&mutex, 0));
-    try testing.expectError(error.Timeout, cond.timedWait(&mutex, std.time.ns_per_ms));
-
-    // Try to wake inside the mutex.
-    cond.signal();
-    cond.broadcast();
-}
-
-// Inspired from: https://github.com/Amanieu/parking_lot/pull/129
-test "wait and signal" {
-    // This test requires spawning threads
-    if (builtin.single_threaded) {
-        return error.SkipZigTest;
-    }
-
-    const io = testing.io;
-
-    const num_threads = 4;
-
-    const MultiWait = struct {
-        mutex: Mutex = .{},
-        cond: Condition = .{},
-        threads: [num_threads]std.Thread = undefined,
-        spawn_count: std.math.IntFittingRange(0, num_threads) = 0,
-
-        fn run(self: *@This()) void {
-            self.mutex.lock();
-            defer self.mutex.unlock();
-            self.spawn_count += 1;
-
-            self.cond.wait(&self.mutex);
-            self.cond.timedWait(&self.mutex, std.time.ns_per_ms) catch {};
-            self.cond.signal();
-        }
-    };
-
-    var multi_wait = MultiWait{};
-    for (&multi_wait.threads) |*t| {
-        t.* = try std.Thread.spawn(.{}, MultiWait.run, .{&multi_wait});
-    }
-
-    while (true) {
-        try std.Io.Clock.Duration.sleep(.{ .clock = .awake, .raw = .fromMilliseconds(100) }, io);
-
-        multi_wait.mutex.lock();
-        defer multi_wait.mutex.unlock();
-        // Make sure all of the threads have finished spawning to avoid a deadlock.
-        if (multi_wait.spawn_count == num_threads) break;
-    }
-
-    multi_wait.cond.signal();
-    for (multi_wait.threads) |t| {
-        t.join();
-    }
-}
-
-test signal {
-    // This test requires spawning threads
-    if (builtin.single_threaded) {
-        return error.SkipZigTest;
-    }
-
-    const io = testing.io;
-
-    const num_threads = 4;
-
-    const SignalTest = struct {
-        mutex: Mutex = .{},
-        cond: Condition = .{},
-        notified: bool = false,
-        threads: [num_threads]std.Thread = undefined,
-        spawn_count: std.math.IntFittingRange(0, num_threads) = 0,
-
-        fn run(self: *@This()) void {
-            self.mutex.lock();
-            defer self.mutex.unlock();
-            self.spawn_count += 1;
-
-            // Use timedWait() a few times before using wait()
-            // to test multiple threads timing out frequently.
-            var i: usize = 0;
-            while (!self.notified) : (i +%= 1) {
-                if (i < 5) {
-                    self.cond.timedWait(&self.mutex, 1) catch {};
-                } else {
-                    self.cond.wait(&self.mutex);
-                }
-            }
-
-            // Once we received the signal, notify another thread (inside the lock).
-            assert(self.notified);
-            self.cond.signal();
-        }
-    };
-
-    var signal_test = SignalTest{};
-    for (&signal_test.threads) |*t| {
-        t.* = try std.Thread.spawn(.{}, SignalTest.run, .{&signal_test});
-    }
-
-    while (true) {
-        try std.Io.Clock.Duration.sleep(.{ .clock = .awake, .raw = .fromMilliseconds(10) }, io);
-
-        signal_test.mutex.lock();
-        defer signal_test.mutex.unlock();
-        // Make sure at least one thread has finished spawning to avoid testing nothing.
-        if (signal_test.spawn_count > 0) break;
-    }
-
-    {
-        // Wake up one of them (outside the lock) after setting notified=true.
-        defer signal_test.cond.signal();
-
-        signal_test.mutex.lock();
-        defer signal_test.mutex.unlock();
-
-        try testing.expect(!signal_test.notified);
-        signal_test.notified = true;
-    }
-
-    for (signal_test.threads) |t| {
-        t.join();
-    }
-}
-
-test "multi signal" {
-    // This test requires spawning threads
-    if (builtin.single_threaded) {
-        return error.SkipZigTest;
-    }
-
-    const num_threads = 4;
-    const num_iterations = 4;
-
-    const Paddle = struct {
-        mutex: Mutex = .{},
-        cond: Condition = .{},
-        value: u32 = 0,
-
-        fn hit(self: *@This()) void {
-            defer self.cond.signal();
-
-            self.mutex.lock();
-            defer self.mutex.unlock();
-
-            self.value += 1;
-        }
-
-        fn run(self: *@This(), hit_to: *@This()) !void {
-            self.mutex.lock();
-            defer self.mutex.unlock();
-
-            var current: u32 = 0;
-            while (current < num_iterations) : (current += 1) {
-                // Wait for the value to change from hit()
-                while (self.value == current) {
-                    self.cond.wait(&self.mutex);
-                }
-
-                // hit the next paddle
-                try testing.expectEqual(self.value, current + 1);
-                hit_to.hit();
-            }
-        }
-    };
-
-    var paddles = [_]Paddle{.{}} ** num_threads;
-    var threads = [_]std.Thread{undefined} ** num_threads;
-
-    // Create a circle of paddles which hit each other
-    for (&threads, 0..) |*t, i| {
-        const paddle = &paddles[i];
-        const hit_to = &paddles[(i + 1) % paddles.len];
-        t.* = try std.Thread.spawn(.{}, Paddle.run, .{ paddle, hit_to });
-    }
-
-    // Hit the first paddle and wait for them all to complete by hitting each other for num_iterations.
-    paddles[0].hit();
-    for (threads) |t| t.join();
-
-    // The first paddle will be hit one last time by the last paddle.
-    for (paddles, 0..) |p, i| {
-        const expected = @as(u32, num_iterations) + @intFromBool(i == 0);
-        try testing.expectEqual(p.value, expected);
-    }
-}
-
-test broadcast {
-    // This test requires spawning threads
-    if (builtin.single_threaded) {
-        return error.SkipZigTest;
-    }
-
-    const num_threads = 10;
-
-    const BroadcastTest = struct {
-        mutex: Mutex = .{},
-        cond: Condition = .{},
-        completed: Condition = .{},
-        count: usize = 0,
-        threads: [num_threads]std.Thread = undefined,
-
-        fn run(self: *@This()) void {
-            self.mutex.lock();
-            defer self.mutex.unlock();
-
-            // The last broadcast thread to start tells the main test thread it's completed.
-            self.count += 1;
-            if (self.count == num_threads) {
-                self.completed.signal();
-            }
-
-            // Waits for the count to reach zero after the main test thread observes it at num_threads.
-            // Tries to use timedWait() a bit before falling back to wait() to test multiple threads timing out.
-            var i: usize = 0;
-            while (self.count != 0) : (i +%= 1) {
-                if (i < 10) {
-                    self.cond.timedWait(&self.mutex, 1) catch {};
-                } else {
-                    self.cond.wait(&self.mutex);
-                }
-            }
-        }
-    };
-
-    var broadcast_test = BroadcastTest{};
-    for (&broadcast_test.threads) |*t| {
-        t.* = try std.Thread.spawn(.{}, BroadcastTest.run, .{&broadcast_test});
-    }
-
-    {
-        broadcast_test.mutex.lock();
-        defer broadcast_test.mutex.unlock();
-
-        // Wait for all the broadcast threads to spawn.
-        // timedWait() to detect any potential deadlocks.
-        while (broadcast_test.count != num_threads) {
-            broadcast_test.completed.timedWait(
-                &broadcast_test.mutex,
-                1 * std.time.ns_per_s,
-            ) catch {};
-        }
-
-        // Reset the counter and wake all the threads to exit.
-        broadcast_test.count = 0;
-        broadcast_test.cond.broadcast();
-    }
-
-    for (broadcast_test.threads) |t| {
-        t.join();
-    }
-}
-
-test "broadcasting - wake all threads" {
-    // Tests issue #12877
-    // This test requires spawning threads
-    if (builtin.single_threaded) {
-        return error.SkipZigTest;
-    }
-
-    var num_runs: usize = 1;
-    const num_threads = 10;
-
-    while (num_runs > 0) : (num_runs -= 1) {
-        const BroadcastTest = struct {
-            mutex: Mutex = .{},
-            cond: Condition = .{},
-            completed: Condition = .{},
-            count: usize = 0,
-            thread_id_to_wake: usize = 0,
-            threads: [num_threads]std.Thread = undefined,
-            wakeups: usize = 0,
-
-            fn run(self: *@This(), thread_id: usize) void {
-                self.mutex.lock();
-                defer self.mutex.unlock();
-
-                // The last broadcast thread to start tells the main test thread it's completed.
-                self.count += 1;
-                if (self.count == num_threads) {
-                    self.completed.signal();
-                }
-
-                while (self.thread_id_to_wake != thread_id) {
-                    self.cond.timedWait(&self.mutex, 1 * std.time.ns_per_s) catch {};
-                    self.wakeups += 1;
-                }
-                if (self.thread_id_to_wake <= num_threads) {
-                    // Signal next thread to wake up.
-                    self.thread_id_to_wake += 1;
-                    self.cond.broadcast();
-                }
-            }
-        };
-
-        var broadcast_test = BroadcastTest{};
-        var thread_id: usize = 1;
-        for (&broadcast_test.threads) |*t| {
-            t.* = try std.Thread.spawn(.{}, BroadcastTest.run, .{ &broadcast_test, thread_id });
-            thread_id += 1;
-        }
-
-        {
-            broadcast_test.mutex.lock();
-            defer broadcast_test.mutex.unlock();
-
-            // Wait for all the broadcast threads to spawn.
-            // timedWait() to detect any potential deadlocks.
-            while (broadcast_test.count != num_threads) {
-                broadcast_test.completed.timedWait(
-                    &broadcast_test.mutex,
-                    1 * std.time.ns_per_s,
-                ) catch {};
-            }
-
-            // Signal thread 1 to wake up
-            broadcast_test.thread_id_to_wake = 1;
-            broadcast_test.cond.broadcast();
-        }
-
-        for (broadcast_test.threads) |t| {
-            t.join();
-        }
-    }
-}
diff --git a/lib/std/Thread/Futex.zig b/lib/std/Thread/Futex.zig
@@ -1,1063 +0,0 @@
-//! A mechanism used to block (`wait`) and unblock (`wake`) threads using a
-//! 32bit memory address as hints.
-//!
-//! Blocking a thread is acknowledged only if the 32bit memory address is equal
-//! to a given value. This check helps avoid block/unblock deadlocks which
-//! occur if a `wake()` happens before a `wait()`.
-//!
-//! Using Futex, other Thread synchronization primitives can be built which
-//! efficiently wait for cross-thread events or signals.
-
-const std = @import("../std.zig");
-const builtin = @import("builtin");
-const Futex = @This();
-const windows = std.os.windows;
-const linux = std.os.linux;
-const c = std.c;
-
-const assert = std.debug.assert;
-const testing = std.testing;
-const atomic = std.atomic;
-
-/// Checks if `ptr` still contains the value `expect` and, if so, blocks the caller until either:
-/// - The value at `ptr` is no longer equal to `expect` and `wake()` is called on the same address.
-/// - The caller is unblocked spuriously ("at random").
-///
-/// The checking of `ptr` and `expect`, along with blocking the caller, is done atomically
-/// and totally ordered (sequentially consistent) with respect to other wait()/wake() calls on the same `ptr`.
-pub fn wait(ptr: *const atomic.Value(u32), expect: u32) void {
-    @branchHint(.cold);
-
-    Impl.wait(ptr, expect, null) catch |err| switch (err) {
-        error.Timeout => unreachable, // null timeout meant to wait forever
-    };
-}
-
-/// Checks if `ptr` still contains the value `expect` and, if so, blocks the caller until either:
-/// - The value at `ptr` is no longer equal to `expect`.
-/// - The caller is unblocked by a matching `wake()`.
-/// - The caller is unblocked spuriously ("at random").
-/// - The caller blocks for longer than the given timeout. In which case, `error.Timeout` is returned.
-///
-/// The checking of `ptr` and `expect`, along with blocking the caller, is done atomically
-/// and totally ordered (sequentially consistent) with respect to other wait()/wake() calls on the same `ptr`.
-pub fn timedWait(ptr: *const atomic.Value(u32), expect: u32, timeout_ns: u64) error{Timeout}!void {
-    @branchHint(.cold);
-
-    // Avoid calling into the OS for no-op timeouts.
-    if (timeout_ns == 0) {
-        if (ptr.load(.seq_cst) != expect) return;
-        return error.Timeout;
-    }
-
-    return Impl.wait(ptr, expect, timeout_ns);
-}
-
-/// Unblocks at most `max_waiters` callers blocked in a `wait()` call on `ptr`.
-pub fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
-    @branchHint(.cold);
-
-    // Avoid calling into the OS if there's nothing to wake up.
-    if (max_waiters == 0) {
-        return;
-    }
-
-    Impl.wake(ptr, max_waiters);
-}
-
-const Impl = if (builtin.single_threaded)
-    SingleThreadedImpl
-else if (builtin.os.tag == .windows)
-    WindowsImpl
-else if (builtin.os.tag.isDarwin())
-    DarwinImpl
-else if (builtin.os.tag == .linux)
-    LinuxImpl
-else if (builtin.os.tag == .freebsd)
-    FreebsdImpl
-else if (builtin.os.tag == .openbsd)
-    OpenbsdImpl
-else if (builtin.os.tag == .dragonfly)
-    DragonflyImpl
-else if (builtin.target.cpu.arch.isWasm())
-    WasmImpl
-else if (std.Thread.use_pthreads)
-    PosixImpl
-else
-    UnsupportedImpl;
-
-/// We can't do @compileError() in the `Impl` switch statement above as its eagerly evaluated.
-/// So instead, we @compileError() on the methods themselves for platforms which don't support futex.
-const UnsupportedImpl = struct {
-    fn wait(ptr: *const atomic.Value(u32), expect: u32, timeout: ?u64) error{Timeout}!void {
-        return unsupported(.{ ptr, expect, timeout });
-    }
-
-    fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
-        return unsupported(.{ ptr, max_waiters });
-    }
-
-    fn unsupported(unused: anytype) noreturn {
-        _ = unused;
-        @compileError("Unsupported operating system " ++ @tagName(builtin.target.os.tag));
-    }
-};
-
-const SingleThreadedImpl = struct {
-    fn wait(ptr: *const atomic.Value(u32), expect: u32, timeout: ?u64) error{Timeout}!void {
-        if (ptr.raw != expect) {
-            return;
-        }
-
-        // There are no threads to wake us up.
-        // So if we wait without a timeout we would never wake up.
-        const delay = timeout orelse {
-            unreachable; // deadlock detected
-        };
-
-        _ = delay;
-        return error.Timeout;
-    }
-
-    fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
-        // There are no other threads to possibly wake up
-        _ = ptr;
-        _ = max_waiters;
-    }
-};
-
-// We use WaitOnAddress through NtDll instead of API-MS-Win-Core-Synch-l1-2-0.dll
-// as it's generally already a linked target and is autoloaded into all processes anyway.
-const WindowsImpl = struct {
-    fn wait(ptr: *const atomic.Value(u32), expect: u32, timeout: ?u64) error{Timeout}!void {
-        var timeout_value: windows.LARGE_INTEGER = undefined;
-        var timeout_ptr: ?*const windows.LARGE_INTEGER = null;
-
-        // NTDLL functions work with time in units of 100 nanoseconds.
-        // Positive values are absolute deadlines while negative values are relative durations.
-        if (timeout) |delay| {
-            timeout_value = @as(windows.LARGE_INTEGER, @intCast(delay / 100));
-            timeout_value = -timeout_value;
-            timeout_ptr = &timeout_value;
-        }
-
-        const rc = windows.ntdll.RtlWaitOnAddress(
-            ptr,
-            &expect,
-            @sizeOf(@TypeOf(expect)),
-            timeout_ptr,
-        );
-
-        switch (rc) {
-            .SUCCESS => {},
-            .TIMEOUT => {
-                assert(timeout != null);
-                return error.Timeout;
-            },
-            else => unreachable,
-        }
-    }
-
-    fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
-        const address: ?*const anyopaque = ptr;
-        assert(max_waiters != 0);
-
-        switch (max_waiters) {
-            1 => windows.ntdll.RtlWakeAddressSingle(address),
-            else => windows.ntdll.RtlWakeAddressAll(address),
-        }
-    }
-};
-
-const DarwinImpl = struct {
-    fn wait(ptr: *const atomic.Value(u32), expect: u32, timeout: ?u64) error{Timeout}!void {
-        // Darwin XNU 7195.50.7.100.1 introduced __ulock_wait2 and migrated code paths (notably pthread_cond_t) towards it:
-        // https://github.com/apple/darwin-xnu/commit/d4061fb0260b3ed486147341b72468f836ed6c8f#diff-08f993cc40af475663274687b7c326cc6c3031e0db3ac8de7b24624610616be6
-        //
-        // This XNU version appears to correspond to 11.0.1:
-        // https://kernelshaman.blogspot.com/2021/01/building-xnu-for-macos-big-sur-1101.html
-        //
-        // ulock_wait() uses 32-bit micro-second timeouts where 0 = INFINITE or no-timeout
-        // ulock_wait2() uses 64-bit nano-second timeouts (with the same convention)
-        const supports_ulock_wait2 = builtin.target.os.version_range.semver.min.major >= 11;
-
-        var timeout_ns: u64 = 0;
-        if (timeout) |delay| {
-            assert(delay != 0); // handled by timedWait()
-            timeout_ns = delay;
-        }
-
-        // If we're using `__ulock_wait` and `timeout` is too big to fit inside a `u32` count of
-        // micro-seconds (around 70min), we'll request a shorter timeout. This is fine (users
-        // should handle spurious wakeups), but we need to remember that we did so, so that
-        // we don't return `Timeout` incorrectly. If that happens, we set this variable to
-        // true so that we we know to ignore the ETIMEDOUT result.
-        var timeout_overflowed = false;
-
-        const addr: *const anyopaque = ptr;
-        const flags: c.UL = .{
-            .op = .COMPARE_AND_WAIT,
-            .NO_ERRNO = true,
-        };
-        const status = blk: {
-            if (supports_ulock_wait2) {
-                break :blk c.__ulock_wait2(flags, addr, expect, timeout_ns, 0);
-            }
-
-            const timeout_us = std.math.cast(u32, timeout_ns / std.time.ns_per_us) orelse overflow: {
-                timeout_overflowed = true;
-                break :overflow std.math.maxInt(u32);
-            };
-
-            break :blk c.__ulock_wait(flags, addr, expect, timeout_us);
-        };
-
-        if (status >= 0) return;
-        switch (@as(c.E, @enumFromInt(-status))) {
-            // Wait was interrupted by the OS or other spurious signalling.
-            .INTR => {},
-            // Address of the futex was paged out. This is unlikely, but possible in theory, and
-            // pthread/libdispatch on darwin bother to handle it. In this case we'll return
-            // without waiting, but the caller should retry anyway.
-            .FAULT => {},
-            // Only report Timeout if we didn't have to cap the timeout
-            .TIMEDOUT => {
-                assert(timeout != null);
-                if (!timeout_overflowed) return error.Timeout;
-            },
-            else => unreachable,
-        }
-    }
-
-    fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
-        const flags: c.UL = .{
-            .op = .COMPARE_AND_WAIT,
-            .NO_ERRNO = true,
-            .WAKE_ALL = max_waiters > 1,
-        };
-
-        while (true) {
-            const addr: *const anyopaque = ptr;
-            const status = c.__ulock_wake(flags, addr, 0);
-
-            if (status >= 0) return;
-            switch (@as(c.E, @enumFromInt(-status))) {
-                .INTR => continue, // spurious wake()
-                .FAULT => unreachable, // __ulock_wake doesn't generate EFAULT according to darwin pthread_cond_t
-                .NOENT => return, // nothing was woken up
-                .ALREADY => unreachable, // only for UL.Op.WAKE_THREAD
-                else => unreachable,
-            }
-        }
-    }
-};
-
-// https://man7.org/linux/man-pages/man2/futex.2.html
-const LinuxImpl = struct {
-    fn wait(ptr: *const atomic.Value(u32), expect: u32, timeout: ?u64) error{Timeout}!void {
-        var ts: linux.timespec = undefined;
-        if (timeout) |timeout_ns| {
-            ts.sec = @as(@TypeOf(ts.sec), @intCast(timeout_ns / std.time.ns_per_s));
-            ts.nsec = @as(@TypeOf(ts.nsec), @intCast(timeout_ns % std.time.ns_per_s));
-        }
-
-        const rc = linux.futex_4arg(
-            &ptr.raw,
-            .{ .cmd = .WAIT, .private = true },
-            expect,
-            if (timeout != null) &ts else null,
-        );
-
-        switch (linux.errno(rc)) {
-            .SUCCESS => {}, // notified by `wake()`
-            .INTR => {}, // spurious wakeup
-            .AGAIN => {}, // ptr.* != expect
-            .TIMEDOUT => {
-                assert(timeout != null);
-                return error.Timeout;
-            },
-            .INVAL => {}, // possibly timeout overflow
-            .FAULT => unreachable, // ptr was invalid
-            else => unreachable,
-        }
-    }
-
-    fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
-        const rc = linux.futex_3arg(
-            &ptr.raw,
-            .{ .cmd = .WAKE, .private = true },
-            @min(max_waiters, std.math.maxInt(i32)),
-        );
-
-        switch (linux.errno(rc)) {
-            .SUCCESS => {}, // successful wake up
-            .INVAL => {}, // invalid futex_wait() on ptr done elsewhere
-            .FAULT => {}, // pointer became invalid while doing the wake
-            else => unreachable,
-        }
-    }
-};
-
-// https://www.freebsd.org/cgi/man.cgi?query=_umtx_op&sektion=2&n=1
-const FreebsdImpl = struct {
-    fn wait(ptr: *const atomic.Value(u32), expect: u32, timeout: ?u64) error{Timeout}!void {
-        var tm_size: usize = 0;
-        var tm: c._umtx_time = undefined;
-        var tm_ptr: ?*const c._umtx_time = null;
-
-        if (timeout) |timeout_ns| {
-            tm_ptr = &tm;
-            tm_size = @sizeOf(@TypeOf(tm));
-
-            tm.flags = 0; // use relative time not UMTX_ABSTIME
-            tm.clockid = .MONOTONIC;
-            tm.timeout.sec = @as(@TypeOf(tm.timeout.sec), @intCast(timeout_ns / std.time.ns_per_s));
-            tm.timeout.nsec = @as(@TypeOf(tm.timeout.nsec), @intCast(timeout_ns % std.time.ns_per_s));
-        }
-
-        const rc = c._umtx_op(
-            @intFromPtr(&ptr.raw),
-            @intFromEnum(c.UMTX_OP.WAIT_UINT_PRIVATE),
-            @as(c_ulong, expect),
-            tm_size,
-            @intFromPtr(tm_ptr),
-        );
-
-        switch (std.posix.errno(rc)) {
-            .SUCCESS => {},
-            .FAULT => unreachable, // one of the args points to invalid memory
-            .INVAL => unreachable, // arguments should be correct
-            .TIMEDOUT => {
-                assert(timeout != null);
-                return error.Timeout;
-            },
-            .INTR => {}, // spurious wake
-            else => unreachable,
-        }
-    }
-
-    fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
-        const rc = c._umtx_op(
-            @intFromPtr(&ptr.raw),
-            @intFromEnum(c.UMTX_OP.WAKE_PRIVATE),
-            @as(c_ulong, max_waiters),
-            0, // there is no timeout struct
-            0, // there is no timeout struct pointer
-        );
-
-        switch (std.posix.errno(rc)) {
-            .SUCCESS => {},
-            .FAULT => {}, // it's ok if the ptr doesn't point to valid memory
-            .INVAL => unreachable, // arguments should be correct
-            else => unreachable,
-        }
-    }
-};
-
-// https://man.openbsd.org/futex.2
-const OpenbsdImpl = struct {
-    fn wait(ptr: *const atomic.Value(u32), expect: u32, timeout: ?u64) error{Timeout}!void {
-        var ts: c.timespec = undefined;
-        if (timeout) |timeout_ns| {
-            ts.sec = @as(@TypeOf(ts.sec), @intCast(timeout_ns / std.time.ns_per_s));
-            ts.nsec = @as(@TypeOf(ts.nsec), @intCast(timeout_ns % std.time.ns_per_s));
-        }
-
-        const rc = c.futex(
-            @as(*const volatile u32, @ptrCast(&ptr.raw)),
-            c.FUTEX.WAIT | c.FUTEX.PRIVATE_FLAG,
-            @as(c_int, @bitCast(expect)),
-            if (timeout != null) &ts else null,
-            null, // FUTEX.WAIT takes no requeue address
-        );
-
-        switch (std.posix.errno(rc)) {
-            .SUCCESS => {}, // woken up by wake
-            .NOSYS => unreachable, // the futex operation shouldn't be invalid
-            .FAULT => unreachable, // ptr was invalid
-            .AGAIN => {}, // ptr != expect
-            .INVAL => unreachable, // invalid timeout
-            .TIMEDOUT => {
-                assert(timeout != null);
-                return error.Timeout;
-            },
-            .INTR => {}, // spurious wake from signal
-            .CANCELED => {}, // spurious wake from signal with SA_RESTART
-            else => unreachable,
-        }
-    }
-
-    fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
-        const rc = c.futex(
-            @as(*const volatile u32, @ptrCast(&ptr.raw)),
-            c.FUTEX.WAKE | c.FUTEX.PRIVATE_FLAG,
-            std.math.cast(c_int, max_waiters) orelse std.math.maxInt(c_int),
-            null, // FUTEX.WAKE takes no timeout ptr
-            null, // FUTEX.WAKE takes no requeue address
-        );
-
-        // returns number of threads woken up.
-        assert(rc >= 0);
-    }
-};
-
-// https://man.dragonflybsd.org/?command=umtx&section=2
-const DragonflyImpl = struct {
-    fn wait(ptr: *const atomic.Value(u32), expect: u32, timeout: ?u64) error{Timeout}!void {
-        // Dragonfly uses a scheme where 0 timeout means wait until signaled or spurious wake.
-        // It's reporting of timeout's is also unrealiable so we use an external timing source (Timer) instead.
-        var timeout_us: c_int = 0;
-        var timeout_overflowed = false;
-        var sleep_timer: std.time.Timer = undefined;
-
-        if (timeout) |delay| {
-            assert(delay != 0); // handled by timedWait().
-            timeout_us = std.math.cast(c_int, delay / std.time.ns_per_us) orelse blk: {
-                timeout_overflowed = true;
-                break :blk std.math.maxInt(c_int);
-            };
-
-            // Only need to record the start time if we can provide somewhat accurate error.Timeout's
-            if (!timeout_overflowed) {
-                sleep_timer = std.time.Timer.start() catch unreachable;
-            }
-        }
-
-        const value = @as(c_int, @bitCast(expect));
-        const addr = @as(*const volatile c_int, @ptrCast(&ptr.raw));
-        const rc = c.umtx_sleep(addr, value, timeout_us);
-
-        switch (std.posix.errno(rc)) {
-            .SUCCESS => {},
-            .BUSY => {}, // ptr != expect
-            .AGAIN => { // maybe timed out, or paged out, or hit 2s kernel refresh
-                if (timeout) |timeout_ns| {
-                    // Report error.Timeout only if we know the timeout duration has passed.
-                    // If not, there's not much choice other than treating it as a spurious wake.
-                    if (!timeout_overflowed and sleep_timer.read() >= timeout_ns) {
-                        return error.Timeout;
-                    }
-                }
-            },
-            .INTR => {}, // spurious wake
-            .INVAL => unreachable, // invalid timeout
-            else => unreachable,
-        }
-    }
-
-    fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
-        // A count of zero means wake all waiters.
-        assert(max_waiters != 0);
-        const to_wake = std.math.cast(c_int, max_waiters) orelse 0;
-
-        // https://man.dragonflybsd.org/?command=umtx&section=2
-        // > umtx_wakeup() will generally return 0 unless the address is bad.
-        // We are fine with the address being bad (e.g. for Semaphore.post() where Semaphore.wait() frees the Semaphore)
-        const addr = @as(*const volatile c_int, @ptrCast(&ptr.raw));
-        _ = c.umtx_wakeup(addr, to_wake);
-    }
-};
-
-const WasmImpl = struct {
-    fn wait(ptr: *const atomic.Value(u32), expect: u32, timeout: ?u64) error{Timeout}!void {
-        if (!comptime builtin.cpu.has(.wasm, .atomics)) @compileError("WASI target missing cpu feature 'atomics'");
-
-        const to: i64 = if (timeout) |to| @intCast(to) else -1;
-        const result = asm volatile (
-            \\local.get %[ptr]
-            \\local.get %[expected]
-            \\local.get %[timeout]
-            \\memory.atomic.wait32 0
-            \\local.set %[ret]
-            : [ret] "=r" (-> u32),
-            : [ptr] "r" (&ptr.raw),
-              [expected] "r" (@as(i32, @bitCast(expect))),
-              [timeout] "r" (to),
-        );
-        switch (result) {
-            0 => {}, // ok
-            1 => {}, // expected =! loaded
-            2 => return error.Timeout,
-            else => unreachable,
-        }
-    }
-
-    fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
-        if (!comptime builtin.cpu.has(.wasm, .atomics)) @compileError("WASI target missing cpu feature 'atomics'");
-
-        assert(max_waiters != 0);
-        const woken_count = asm volatile (
-            \\local.get %[ptr]
-            \\local.get %[waiters]
-            \\memory.atomic.notify 0
-            \\local.set %[ret]
-            : [ret] "=r" (-> u32),
-            : [ptr] "r" (&ptr.raw),
-              [waiters] "r" (max_waiters),
-        );
-        _ = woken_count; // can be 0 when linker flag 'shared-memory' is not enabled
-    }
-};
-
-/// Modified version of linux's futex and Go's sema to implement userspace wait queues with pthread:
-/// https://code.woboq.org/linux/linux/kernel/futex.c.html
-/// https://go.dev/src/runtime/sema.go
-const PosixImpl = struct {
-    const Event = struct {
-        cond: c.pthread_cond_t,
-        mutex: c.pthread_mutex_t,
-        state: enum { empty, waiting, notified },
-
-        fn init(self: *Event) void {
-            // Use static init instead of pthread_cond/mutex_init() since this is generally faster.
-            self.cond = .{};
-            self.mutex = .{};
-            self.state = .empty;
-        }
-
-        fn deinit(self: *Event) void {
-            // Some platforms reportedly give EINVAL for statically initialized pthread types.
-            const rc = c.pthread_cond_destroy(&self.cond);
-            assert(rc == .SUCCESS or rc == .INVAL);
-
-            const rm = c.pthread_mutex_destroy(&self.mutex);
-            assert(rm == .SUCCESS or rm == .INVAL);
-
-            self.* = undefined;
-        }
-
-        fn wait(self: *Event, timeout: ?u64) error{Timeout}!void {
-            assert(c.pthread_mutex_lock(&self.mutex) == .SUCCESS);
-            defer assert(c.pthread_mutex_unlock(&self.mutex) == .SUCCESS);
-
-            // Early return if the event was already set.
-            if (self.state == .notified) {
-                return;
-            }
-
-            // Compute the absolute timeout if one was specified.
-            // POSIX requires that REALTIME is used by default for the pthread timedwait functions.
-            // This can be changed with pthread_condattr_setclock, but it's an extension and may not be available everywhere.
-            var ts: c.timespec = undefined;
-            if (timeout) |timeout_ns| {
-                ts = std.posix.clock_gettime(c.CLOCK.REALTIME) catch unreachable;
-                ts.sec +|= @as(@TypeOf(ts.sec), @intCast(timeout_ns / std.time.ns_per_s));
-                ts.nsec += @as(@TypeOf(ts.nsec), @intCast(timeout_ns % std.time.ns_per_s));
-
-                if (ts.nsec >= std.time.ns_per_s) {
-                    ts.sec +|= 1;
-                    ts.nsec -= std.time.ns_per_s;
-                }
-            }
-
-            // Start waiting on the event - there can be only one thread waiting.
-            assert(self.state == .empty);
-            self.state = .waiting;
-
-            while (true) {
-                // Block using either pthread_cond_wait or pthread_cond_timewait if there's an absolute timeout.
-                const rc = blk: {
-                    if (timeout == null) break :blk c.pthread_cond_wait(&self.cond, &self.mutex);
-                    break :blk c.pthread_cond_timedwait(&self.cond, &self.mutex, &ts);
-                };
-
-                // After waking up, check if the event was set.
-                if (self.state == .notified) {
-                    return;
-                }
-
-                assert(self.state == .waiting);
-                switch (rc) {
-                    .SUCCESS => {},
-                    .TIMEDOUT => {
-                        // If timed out, reset the event to avoid the set() thread doing an unnecessary signal().
-                        self.state = .empty;
-                        return error.Timeout;
-                    },
-                    .INVAL => unreachable, // cond, mutex, and potentially ts should all be valid
-                    .PERM => unreachable, // mutex is locked when cond_*wait() functions are called
-                    else => unreachable,
-                }
-            }
-        }
-
-        fn set(self: *Event) void {
-            assert(c.pthread_mutex_lock(&self.mutex) == .SUCCESS);
-            defer assert(c.pthread_mutex_unlock(&self.mutex) == .SUCCESS);
-
-            // Make sure that multiple calls to set() were not done on the same Event.
-            const old_state = self.state;
-            assert(old_state != .notified);
-
-            // Mark the event as set and wake up the waiting thread if there was one.
-            // This must be done while the mutex as the wait() thread could deallocate
-            // the condition variable once it observes the new state, potentially causing a UAF if done unlocked.
-            self.state = .notified;
-            if (old_state == .waiting) {
-                assert(c.pthread_cond_signal(&self.cond) == .SUCCESS);
-            }
-        }
-    };
-
-    const Treap = std.Treap(usize, std.math.order);
-    const Waiter = struct {
-        node: Treap.Node,
-        prev: ?*Waiter,
-        next: ?*Waiter,
-        tail: ?*Waiter,
-        is_queued: bool,
-        event: Event,
-    };
-
-    // An unordered set of Waiters
-    const WaitList = struct {
-        top: ?*Waiter = null,
-        len: usize = 0,
-
-        fn push(self: *WaitList, waiter: *Waiter) void {
-            waiter.next = self.top;
-            self.top = waiter;
-            self.len += 1;
-        }
-
-        fn pop(self: *WaitList) ?*Waiter {
-            const waiter = self.top orelse return null;
-            self.top = waiter.next;
-            self.len -= 1;
-            return waiter;
-        }
-    };
-
-    const WaitQueue = struct {
-        fn insert(treap: *Treap, address: usize, waiter: *Waiter) void {
-            // prepare the waiter to be inserted.
-            waiter.next = null;
-            waiter.is_queued = true;
-
-            // Find the wait queue entry associated with the address.
-            // If there isn't a wait queue on the address, this waiter creates the queue.
-            var entry = treap.getEntryFor(address);
-            const entry_node = entry.node orelse {
-                waiter.prev = null;
-                waiter.tail = waiter;
-                entry.set(&waiter.node);
-                return;
-            };
-
-            // There's a wait queue on the address; get the queue head and tail.
-            const head: *Waiter = @fieldParentPtr("node", entry_node);
-            const tail = head.tail orelse unreachable;
-
-            // Push the waiter to the tail by replacing it and linking to the previous tail.
-            head.tail = waiter;
-            tail.next = waiter;
-            waiter.prev = tail;
-        }
-
-        fn remove(treap: *Treap, address: usize, max_waiters: usize) WaitList {
-            // Find the wait queue associated with this address and get the head/tail if any.
-            var entry = treap.getEntryFor(address);
-            var queue_head: ?*Waiter = if (entry.node) |node| @fieldParentPtr("node", node) else null;
-            const queue_tail = if (queue_head) |head| head.tail else null;
-
-            // Once we're done updating the head, fix it's tail pointer and update the treap's queue head as well.
-            defer entry.set(blk: {
-                const new_head = queue_head orelse break :blk null;
-                new_head.tail = queue_tail;
-                break :blk &new_head.node;
-            });
-
-            var removed = WaitList{};
-            while (removed.len < max_waiters) {
-                // dequeue and collect waiters from their wait queue.
-                const waiter = queue_head orelse break;
-                queue_head = waiter.next;
-                removed.push(waiter);
-
-                // When dequeueing, we must mark is_queued as false.
-                // This ensures that a waiter which calls tryRemove() returns false.
-                assert(waiter.is_queued);
-                waiter.is_queued = false;
-            }
-
-            return removed;
-        }
-
-        fn tryRemove(treap: *Treap, address: usize, waiter: *Waiter) bool {
-            if (!waiter.is_queued) {
-                return false;
-            }
-
-            queue_remove: {
-                // Find the wait queue associated with the address.
-                var entry = blk: {
-                    // A waiter without a previous link means it's the queue head that's in the treap so we can avoid lookup.
-                    if (waiter.prev == null) {
-                        assert(waiter.node.key == address);
-                        break :blk treap.getEntryForExisting(&waiter.node);
-                    }
-                    break :blk treap.getEntryFor(address);
-                };
-
-                // The queue head and tail must exist if we're removing a queued waiter.
-                const head: *Waiter = @fieldParentPtr("node", entry.node orelse unreachable);
-                const tail = head.tail orelse unreachable;
-
-                // A waiter with a previous link is never the head of the queue.
-                if (waiter.prev) |prev| {
-                    assert(waiter != head);
-                    prev.next = waiter.next;
-
-                    // A waiter with both a previous and next link is in the middle.
-                    // We only need to update the surrounding waiter's links to remove it.
-                    if (waiter.next) |next| {
-                        assert(waiter != tail);
-                        next.prev = waiter.prev;
-                        break :queue_remove;
-                    }
-
-                    // A waiter with a previous but no next link means it's the tail of the queue.
-                    // In that case, we need to update the head's tail reference.
-                    assert(waiter == tail);
-                    head.tail = waiter.prev;
-                    break :queue_remove;
-                }
-
-                // A waiter with no previous link means it's the queue head of queue.
-                // We must replace (or remove) the head waiter reference in the treap.
-                assert(waiter == head);
-                entry.set(blk: {
-                    const new_head = waiter.next orelse break :blk null;
-                    new_head.tail = head.tail;
-                    break :blk &new_head.node;
-                });
-            }
-
-            // Mark the waiter as successfully removed.
-            waiter.is_queued = false;
-            return true;
-        }
-    };
-
-    const Bucket = struct {
-        mutex: c.pthread_mutex_t align(atomic.cache_line) = .{},
-        pending: atomic.Value(usize) = atomic.Value(usize).init(0),
-        treap: Treap = .{},
-
-        // Global array of buckets that addresses map to.
-        // Bucket array size is pretty much arbitrary here, but it must be a power of two for fibonacci hashing.
-        var buckets = [_]Bucket{.{}} ** @bitSizeOf(usize);
-
-        // https://github.com/Amanieu/parking_lot/blob/1cf12744d097233316afa6c8b7d37389e4211756/core/src/parking_lot.rs#L343-L353
-        fn from(address: usize) *Bucket {
-            // The upper `@bitSizeOf(usize)` bits of the fibonacci golden ratio.
-            // Hashing this via (h * k) >> (64 - b) where k=golden-ration and b=bitsize-of-array
-            // evenly lays out h=hash values over the bit range even when the hash has poor entropy (identity-hash for pointers).
-            const max_multiplier_bits = @bitSizeOf(usize);
-            const fibonacci_multiplier = 0x9E3779B97F4A7C15 >> (64 - max_multiplier_bits);
-
-            const max_bucket_bits = @ctz(buckets.len);
-            comptime assert(std.math.isPowerOfTwo(buckets.len));
-
-            const index = (address *% fibonacci_multiplier) >> (max_multiplier_bits - max_bucket_bits);
-            return &buckets[index];
-        }
-    };
-
-    const Address = struct {
-        fn from(ptr: *const atomic.Value(u32)) usize {
-            // Get the alignment of the pointer.
-            const alignment = @alignOf(atomic.Value(u32));
-            comptime assert(std.math.isPowerOfTwo(alignment));
-
-            // Make sure the pointer is aligned,
-            // then cut off the zero bits from the alignment to get the unique address.
-            const addr = @intFromPtr(ptr);
-            assert(addr & (alignment - 1) == 0);
-            return addr >> @ctz(@as(usize, alignment));
-        }
-    };
-
-    fn wait(ptr: *const atomic.Value(u32), expect: u32, timeout: ?u64) error{Timeout}!void {
-        const address = Address.from(ptr);
-        const bucket = Bucket.from(address);
-
-        // Announce that there's a waiter in the bucket before checking the ptr/expect condition.
-        // If the announcement is reordered after the ptr check, the waiter could deadlock:
-        //
-        // - T1: checks ptr == expect which is true
-        // - T2: updates ptr to != expect
-        // - T2: does Futex.wake(), sees no pending waiters, exits
-        // - T1: bumps pending waiters (was reordered after the ptr == expect check)
-        // - T1: goes to sleep and misses both the ptr change and T2's wake up
-        //
-        // acquire barrier to ensure the announcement happens before the ptr check below.
-        var pending = bucket.pending.fetchAdd(1, .acquire);
-        assert(pending < std.math.maxInt(usize));
-
-        // If the wait gets canceled, remove the pending count we previously added.
-        // This is done outside the mutex lock to keep the critical section short in case of contention.
-        var canceled = false;
-        defer if (canceled) {
-            pending = bucket.pending.fetchSub(1, .monotonic);
-            assert(pending > 0);
-        };
-
-        var waiter: Waiter = undefined;
-        {
-            assert(c.pthread_mutex_lock(&bucket.mutex) == .SUCCESS);
-            defer assert(c.pthread_mutex_unlock(&bucket.mutex) == .SUCCESS);
-
-            canceled = ptr.load(.monotonic) != expect;
-            if (canceled) {
-                return;
-            }
-
-            waiter.event.init();
-            WaitQueue.insert(&bucket.treap, address, &waiter);
-        }
-
-        defer {
-            assert(!waiter.is_queued);
-            waiter.event.deinit();
-        }
-
-        waiter.event.wait(timeout) catch {
-            // If we fail to cancel after a timeout, it means a wake() thread dequeued us and will wake us up.
-            // We must wait until the event is set as that's a signal that the wake() thread won't access the waiter memory anymore.
-            // If we return early without waiting, the waiter on the stack would be invalidated and the wake() thread risks a UAF.
-            defer if (!canceled) waiter.event.wait(null) catch unreachable;
-
-            assert(c.pthread_mutex_lock(&bucket.mutex) == .SUCCESS);
-            defer assert(c.pthread_mutex_unlock(&bucket.mutex) == .SUCCESS);
-
-            canceled = WaitQueue.tryRemove(&bucket.treap, address, &waiter);
-            if (canceled) {
-                return error.Timeout;
-            }
-        };
-    }
-
-    fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
-        const address = Address.from(ptr);
-        const bucket = Bucket.from(address);
-
-        // Quick check if there's even anything to wake up.
-        // The change to the ptr's value must happen before we check for pending waiters.
-        // If not, the wake() thread could miss a sleeping waiter and have it deadlock:
-        //
-        // - T2: p = has pending waiters (reordered before the ptr update)
-        // - T1: bump pending waiters
-        // - T1: if ptr == expected: sleep()
-        // - T2: update ptr != expected
-        // - T2: p is false from earlier so doesn't wake (T1 missed ptr update and T2 missed T1 sleeping)
-        //
-        // What we really want here is a Release load, but that doesn't exist under the C11 memory model.
-        // We could instead do `bucket.pending.fetchAdd(0, Release) == 0` which achieves effectively the same thing,
-        // LLVM lowers the fetchAdd(0, .release) into an mfence+load which avoids gaining ownership of the cache-line.
-        if (bucket.pending.fetchAdd(0, .release) == 0) {
-            return;
-        }
-
-        // Keep a list of all the waiters notified and wake then up outside the mutex critical section.
-        var notified = WaitList{};
-        defer if (notified.len > 0) {
-            const pending = bucket.pending.fetchSub(notified.len, .monotonic);
-            assert(pending >= notified.len);
-
-            while (notified.pop()) |waiter| {
-                assert(!waiter.is_queued);
-                waiter.event.set();
-            }
-        };
-
-        assert(c.pthread_mutex_lock(&bucket.mutex) == .SUCCESS);
-        defer assert(c.pthread_mutex_unlock(&bucket.mutex) == .SUCCESS);
-
-        // Another pending check again to avoid the WaitQueue lookup if not necessary.
-        if (bucket.pending.load(.monotonic) > 0) {
-            notified = WaitQueue.remove(&bucket.treap, address, max_waiters);
-        }
-    }
-};
-
-test "smoke test" {
-    var value = atomic.Value(u32).init(0);
-
-    // Try waits with invalid values.
-    Futex.wait(&value, 0xdeadbeef);
-    Futex.timedWait(&value, 0xdeadbeef, 0) catch {};
-
-    // Try timeout waits.
-    try testing.expectError(error.Timeout, Futex.timedWait(&value, 0, 0));
-    try testing.expectError(error.Timeout, Futex.timedWait(&value, 0, std.time.ns_per_ms));
-
-    // Try wakes
-    Futex.wake(&value, 0);
-    Futex.wake(&value, 1);
-    Futex.wake(&value, std.math.maxInt(u32));
-}
-
-test "signaling" {
-    // This test requires spawning threads
-    if (builtin.single_threaded) {
-        return error.SkipZigTest;
-    }
-
-    const num_threads = 4;
-    const num_iterations = 4;
-
-    const Paddle = struct {
-        value: atomic.Value(u32) = atomic.Value(u32).init(0),
-        current: u32 = 0,
-
-        fn hit(self: *@This()) void {
-            _ = self.value.fetchAdd(1, .release);
-            Futex.wake(&self.value, 1);
-        }
-
-        fn run(self: *@This(), hit_to: *@This()) !void {
-            while (self.current < num_iterations) {
-                // Wait for the value to change from hit()
-                var new_value: u32 = undefined;
-                while (true) {
-                    new_value = self.value.load(.acquire);
-                    if (new_value != self.current) break;
-                    Futex.wait(&self.value, self.current);
-                }
-
-                // change the internal "current" value
-                try testing.expectEqual(new_value, self.current + 1);
-                self.current = new_value;
-
-                // hit the next paddle
-                hit_to.hit();
-            }
-        }
-    };
-
-    var paddles = [_]Paddle{.{}} ** num_threads;
-    var threads = [_]std.Thread{undefined} ** num_threads;
-
-    // Create a circle of paddles which hit each other
-    for (&threads, 0..) |*t, i| {
-        const paddle = &paddles[i];
-        const hit_to = &paddles[(i + 1) % paddles.len];
-        t.* = try std.Thread.spawn(.{}, Paddle.run, .{ paddle, hit_to });
-    }
-
-    // Hit the first paddle and wait for them all to complete by hitting each other for num_iterations.
-    paddles[0].hit();
-    for (threads) |t| t.join();
-    for (paddles) |p| try testing.expectEqual(p.current, num_iterations);
-}
-
-test "broadcasting" {
-    // This test requires spawning threads
-    if (builtin.single_threaded) {
-        return error.SkipZigTest;
-    }
-
-    const num_threads = 4;
-    const num_iterations = 4;
-
-    const Barrier = struct {
-        count: atomic.Value(u32) = atomic.Value(u32).init(num_threads),
-        futex: atomic.Value(u32) = atomic.Value(u32).init(0),
-
-        fn wait(self: *@This()) !void {
-            // Decrement the counter.
-            // Release ensures stuff before this barrier.wait() happens before the last one.
-            // Acquire for the last counter ensures stuff before previous barrier.wait()s happened before it.
-            const count = self.count.fetchSub(1, .acq_rel);
-            try testing.expect(count <= num_threads);
-            try testing.expect(count > 0);
-
-            // First counter to reach zero wakes all other threads.
-            // Release on futex update ensures stuff before all barrier.wait()'s happens before they all return.
-            if (count - 1 == 0) {
-                self.futex.store(1, .release);
-                Futex.wake(&self.futex, num_threads - 1);
-                return;
-            }
-
-            // Other threads wait until last counter wakes them up.
-            // Acquire on futex synchronizes with last barrier count to ensure stuff before all barrier.wait()'s happen before us.
-            while (self.futex.load(.acquire) == 0) {
-                Futex.wait(&self.futex, 0);
-            }
-        }
-    };
-
-    const Broadcast = struct {
-        barriers: [num_iterations]Barrier = [_]Barrier{.{}} ** num_iterations,
-        threads: [num_threads]std.Thread = undefined,
-
-        fn run(self: *@This()) !void {
-            for (&self.barriers) |*barrier| {
-                try barrier.wait();
-            }
-        }
-    };
-
-    var broadcast = Broadcast{};
-    for (&broadcast.threads) |*t| t.* = try std.Thread.spawn(.{}, Broadcast.run, .{&broadcast});
-    for (broadcast.threads) |t| t.join();
-}
-
-/// Deadline is used to wait efficiently for a pointer's value to change using Futex and a fixed timeout.
-///
-/// Futex's timedWait() api uses a relative duration which suffers from over-waiting
-/// when used in a loop which is often required due to the possibility of spurious wakeups.
-///
-/// Deadline instead converts the relative timeout to an absolute one so that multiple calls
-/// to Futex timedWait() can block for and report more accurate error.Timeouts.
-pub const Deadline = struct {
-    timeout: ?u64,
-    started: std.time.Timer,
-
-    /// Create the deadline to expire after the given amount of time in nanoseconds passes.
-    /// Pass in `null` to have the deadline call `Futex.wait()` and never expire.
-    pub fn init(expires_in_ns: ?u64) Deadline {
-        var deadline: Deadline = undefined;
-        deadline.timeout = expires_in_ns;
-
-        // std.time.Timer is required to be supported for somewhat accurate reportings of error.Timeout.
-        if (deadline.timeout != null) {
-            deadline.started = std.time.Timer.start() catch unreachable;
-        }
-
-        return deadline;
-    }
-
-    /// Wait until either:
-    /// - the `ptr`'s value changes from `expect`.
-    /// - `Futex.wake()` is called on the `ptr`.
-    /// - A spurious wake occurs.
-    /// - The deadline expires; In which case `error.Timeout` is returned.
-    pub fn wait(self: *Deadline, ptr: *const atomic.Value(u32), expect: u32) error{Timeout}!void {
-        @branchHint(.cold);
-
-        // Check if we actually have a timeout to wait until.
-        // If not just wait "forever".
-        const timeout_ns = self.timeout orelse {
-            return Futex.wait(ptr, expect);
-        };
-
-        // Get how much time has passed since we started waiting
-        // then subtract that from the init() timeout to get how much longer to wait.
-        // Use overflow to detect when we've been waiting longer than the init() timeout.
-        const elapsed_ns = self.started.read();
-        const until_timeout_ns = std.math.sub(u64, timeout_ns, elapsed_ns) catch 0;
-        return Futex.timedWait(ptr, expect, until_timeout_ns);
-    }
-};
-
-test "Deadline" {
-    var deadline = Deadline.init(100 * std.time.ns_per_ms);
-    var futex_word = atomic.Value(u32).init(0);
-
-    while (true) {
-        deadline.wait(&futex_word, 0) catch break;
-    }
-}
diff --git a/lib/std/Thread/Mutex.zig b/lib/std/Thread/Mutex.zig
@@ -1,367 +0,0 @@
-//! Mutex is a synchronization primitive which enforces atomic access to a
-//! shared region of code known as the "critical section".
-//!
-//! It does this by blocking ensuring only one thread is in the critical
-//! section at any given point in time by blocking the others.
-//!
-//! Mutex can be statically initialized and is at most `@sizeOf(u64)` large.
-//! Use `lock()` or `tryLock()` to enter the critical section and `unlock()` to leave it.
-
-const std = @import("../std.zig");
-const builtin = @import("builtin");
-const Mutex = @This();
-
-const assert = std.debug.assert;
-const testing = std.testing;
-const Thread = std.Thread;
-const Futex = Thread.Futex;
-
-impl: Impl = .{},
-
-pub const Recursive = @import("Mutex/Recursive.zig");
-
-/// Tries to acquire the mutex without blocking the caller's thread.
-/// Returns `false` if the calling thread would have to block to acquire it.
-/// Otherwise, returns `true` and the caller should `unlock()` the Mutex to release it.
-pub fn tryLock(self: *Mutex) bool {
-    return self.impl.tryLock();
-}
-
-/// Acquires the mutex, blocking the caller's thread until it can.
-/// It is undefined behavior if the mutex is already held by the caller's thread.
-/// Once acquired, call `unlock()` on the Mutex to release it.
-pub fn lock(self: *Mutex) void {
-    self.impl.lock();
-}
-
-/// Releases the mutex which was previously acquired with `lock()` or `tryLock()`.
-/// It is undefined behavior if the mutex is unlocked from a different thread that it was locked from.
-pub fn unlock(self: *Mutex) void {
-    self.impl.unlock();
-}
-
-const Impl = if (builtin.mode == .Debug and !builtin.single_threaded)
-    DebugImpl
-else
-    ReleaseImpl;
-
-const ReleaseImpl = Impl: {
-    if (builtin.single_threaded) break :Impl SingleThreadedImpl;
-    if (builtin.os.tag == .windows) break :Impl WindowsImpl;
-    if (builtin.os.tag.isDarwin()) break :Impl DarwinImpl;
-
-    if (builtin.target.os.tag == .linux or
-        builtin.target.os.tag == .freebsd or
-        builtin.target.os.tag == .openbsd or
-        builtin.target.os.tag == .dragonfly or
-        builtin.target.cpu.arch.isWasm())
-    {
-        // Futex is the system's synchronization primitive; use that.
-        break :Impl FutexImpl;
-    }
-
-    if (std.Thread.use_pthreads) {
-        // This system doesn't have a futex primitive, so `std.Thread.Futex` is using `PosixImpl`,
-        // which implements futex *on top of* pthread mutexes and conditions. Therefore, instead
-        // of going through that long inefficient path, just use pthread mutex directly.
-        break :Impl PosixImpl;
-    }
-
-    break :Impl FutexImpl;
-};
-
-const DebugImpl = struct {
-    locking_thread: std.atomic.Value(Thread.Id) = std.atomic.Value(Thread.Id).init(0), // 0 means it's not locked.
-    impl: ReleaseImpl = .{},
-
-    inline fn tryLock(self: *@This()) bool {
-        const locking = self.impl.tryLock();
-        if (locking) {
-            self.locking_thread.store(Thread.getCurrentId(), .unordered);
-        }
-        return locking;
-    }
-
-    inline fn lock(self: *@This()) void {
-        const current_id = Thread.getCurrentId();
-        if (self.locking_thread.load(.unordered) == current_id and current_id != 0) {
-            @panic("Deadlock detected");
-        }
-        self.impl.lock();
-        self.locking_thread.store(current_id, .unordered);
-    }
-
-    inline fn unlock(self: *@This()) void {
-        assert(self.locking_thread.load(.unordered) == Thread.getCurrentId());
-        self.locking_thread.store(0, .unordered);
-        self.impl.unlock();
-    }
-};
-
-const SingleThreadedImpl = struct {
-    is_locked: bool = false,
-
-    fn tryLock(self: *@This()) bool {
-        if (self.is_locked) return false;
-        self.is_locked = true;
-        return true;
-    }
-
-    fn lock(self: *@This()) void {
-        if (!self.tryLock()) {
-            unreachable; // deadlock detected
-        }
-    }
-
-    fn unlock(self: *@This()) void {
-        assert(self.is_locked);
-        self.is_locked = false;
-    }
-};
-
-/// SRWLOCK on windows is almost always faster than Futex solution.
-/// It also implements an efficient Condition with requeue support for us.
-const WindowsImpl = struct {
-    srwlock: windows.SRWLOCK = .{},
-
-    fn tryLock(self: *@This()) bool {
-        return windows.ntdll.RtlTryAcquireSRWLockExclusive(&self.srwlock) != windows.FALSE;
-    }
-
-    fn lock(self: *@This()) void {
-        windows.ntdll.RtlAcquireSRWLockExclusive(&self.srwlock);
-    }
-
-    fn unlock(self: *@This()) void {
-        windows.ntdll.RtlReleaseSRWLockExclusive(&self.srwlock);
-    }
-
-    const windows = std.os.windows;
-};
-
-/// os_unfair_lock on darwin supports priority inheritance and is generally faster than Futex solutions.
-const DarwinImpl = struct {
-    oul: c.os_unfair_lock = .{},
-
-    fn tryLock(self: *@This()) bool {
-        return c.os_unfair_lock_trylock(&self.oul);
-    }
-
-    fn lock(self: *@This()) void {
-        c.os_unfair_lock_lock(&self.oul);
-    }
-
-    fn unlock(self: *@This()) void {
-        c.os_unfair_lock_unlock(&self.oul);
-    }
-
-    const c = std.c;
-};
-
-const FutexImpl = struct {
-    state: std.atomic.Value(u32) = std.atomic.Value(u32).init(unlocked),
-
-    const unlocked: u32 = 0b00;
-    const locked: u32 = 0b01;
-    const contended: u32 = 0b11; // must contain the `locked` bit for x86 optimization below
-
-    fn lock(self: *@This()) void {
-        if (!self.tryLock())
-            self.lockSlow();
-    }
-
-    fn tryLock(self: *@This()) bool {
-        // On x86, use `lock bts` instead of `lock cmpxchg` as:
-        // - they both seem to mark the cache-line as modified regardless: https://stackoverflow.com/a/63350048
-        // - `lock bts` is smaller instruction-wise which makes it better for inlining
-        if (builtin.target.cpu.arch.isX86()) {
-            const locked_bit = @ctz(locked);
-            return self.state.bitSet(locked_bit, .acquire) == 0;
-        }
-
-        // Acquire barrier ensures grabbing the lock happens before the critical section
-        // and that the previous lock holder's critical section happens before we grab the lock.
-        return self.state.cmpxchgWeak(unlocked, locked, .acquire, .monotonic) == null;
-    }
-
-    fn lockSlow(self: *@This()) void {
-        @branchHint(.cold);
-
-        // Avoid doing an atomic swap below if we already know the state is contended.
-        // An atomic swap unconditionally stores which marks the cache-line as modified unnecessarily.
-        if (self.state.load(.monotonic) == contended) {
-            Futex.wait(&self.state, contended);
-        }
-
-        // Try to acquire the lock while also telling the existing lock holder that there are threads waiting.
-        //
-        // Once we sleep on the Futex, we must acquire the mutex using `contended` rather than `locked`.
-        // If not, threads sleeping on the Futex wouldn't see the state change in unlock and potentially deadlock.
-        // The downside is that the last mutex unlocker will see `contended` and do an unnecessary Futex wake
-        // but this is better than having to wake all waiting threads on mutex unlock.
-        //
-        // Acquire barrier ensures grabbing the lock happens before the critical section
-        // and that the previous lock holder's critical section happens before we grab the lock.
-        while (self.state.swap(contended, .acquire) != unlocked) {
-            Futex.wait(&self.state, contended);
-        }
-    }
-
-    fn unlock(self: *@This()) void {
-        // Unlock the mutex and wake up a waiting thread if any.
-        //
-        // A waiting thread will acquire with `contended` instead of `locked`
-        // which ensures that it wakes up another thread on the next unlock().
-        //
-        // Release barrier ensures the critical section happens before we let go of the lock
-        // and that our critical section happens before the next lock holder grabs the lock.
-        const state = self.state.swap(unlocked, .release);
-        assert(state != unlocked);
-
-        if (state == contended) {
-            Futex.wake(&self.state, 1);
-        }
-    }
-};
-
-const PosixImpl = struct {
-    mutex: std.c.pthread_mutex_t = .{},
-
-    fn tryLock(impl: *PosixImpl) bool {
-        switch (std.c.pthread_mutex_trylock(&impl.mutex)) {
-            .SUCCESS => return true,
-            .BUSY => return false,
-            .INVAL => unreachable, // mutex is initialized correctly
-            else => unreachable,
-        }
-    }
-
-    fn lock(impl: *PosixImpl) void {
-        switch (std.c.pthread_mutex_lock(&impl.mutex)) {
-            .SUCCESS => return,
-            .INVAL => unreachable, // mutex is initialized correctly
-            .DEADLK => unreachable, // not an error checking mutex
-            else => unreachable,
-        }
-    }
-
-    fn unlock(impl: *PosixImpl) void {
-        switch (std.c.pthread_mutex_unlock(&impl.mutex)) {
-            .SUCCESS => return,
-            .INVAL => unreachable, // mutex is initialized correctly
-            .PERM => unreachable, // not an error checking mutex
-            else => unreachable,
-        }
-    }
-};
-
-test "smoke test" {
-    var mutex = Mutex{};
-
-    try testing.expect(mutex.tryLock());
-    try testing.expect(!mutex.tryLock());
-    mutex.unlock();
-
-    mutex.lock();
-    try testing.expect(!mutex.tryLock());
-    mutex.unlock();
-}
-
-// A counter which is incremented without atomic instructions
-const NonAtomicCounter = struct {
-    // direct u128 could maybe use xmm ops on x86 which are atomic
-    value: [2]u64 = [_]u64{ 0, 0 },
-
-    fn get(self: NonAtomicCounter) u128 {
-        return @as(u128, @bitCast(self.value));
-    }
-
-    fn inc(self: *NonAtomicCounter) void {
-        for (@as([2]u64, @bitCast(self.get() + 1)), 0..) |v, i| {
-            @as(*volatile u64, @ptrCast(&self.value[i])).* = v;
-        }
-    }
-};
-
-test "many uncontended" {
-    // This test requires spawning threads.
-    if (builtin.single_threaded) {
-        return error.SkipZigTest;
-    }
-
-    const num_threads = 4;
-    const num_increments = 1000;
-
-    const Runner = struct {
-        mutex: Mutex = .{},
-        thread: Thread = undefined,
-        counter: NonAtomicCounter = .{},
-
-        fn run(self: *@This()) void {
-            var i: usize = num_increments;
-            while (i > 0) : (i -= 1) {
-                self.mutex.lock();
-                defer self.mutex.unlock();
-
-                self.counter.inc();
-            }
-        }
-    };
-
-    var runners = [_]Runner{.{}} ** num_threads;
-    for (&runners) |*r| r.thread = try Thread.spawn(.{}, Runner.run, .{r});
-    for (runners) |r| r.thread.join();
-    for (runners) |r| try testing.expectEqual(r.counter.get(), num_increments);
-}
-
-test "many contended" {
-    // This test requires spawning threads.
-    if (builtin.single_threaded) {
-        return error.SkipZigTest;
-    }
-
-    const num_threads = 4;
-    const num_increments = 1000;
-
-    const Runner = struct {
-        mutex: Mutex = .{},
-        counter: NonAtomicCounter = .{},
-
-        fn run(self: *@This()) void {
-            var i: usize = num_increments;
-            while (i > 0) : (i -= 1) {
-                // Occasionally hint to let another thread run.
-                defer if (i % 100 == 0) Thread.yield() catch {};
-
-                self.mutex.lock();
-                defer self.mutex.unlock();
-
-                self.counter.inc();
-            }
-        }
-    };
-
-    var runner = Runner{};
-
-    var threads: [num_threads]Thread = undefined;
-    for (&threads) |*t| t.* = try Thread.spawn(.{}, Runner.run, .{&runner});
-    for (threads) |t| t.join();
-
-    try testing.expectEqual(runner.counter.get(), num_increments * num_threads);
-}
-
-// https://github.com/ziglang/zig/issues/19295
-//test @This() {
-//    var m: Mutex = .{};
-//
-//    {
-//        m.lock();
-//        defer m.unlock();
-//        // ... critical section code
-//    }
-//
-//    if (m.tryLock()) {
-//        defer m.unlock();
-//        // ... critical section code
-//    }
-//}
diff --git a/lib/std/Thread/Mutex/Recursive.zig b/lib/std/Thread/Mutex/Recursive.zig
@@ -7,18 +7,18 @@
 //! A recursive mutex is an abstraction layer on top of a regular mutex;
 //! therefore it is recommended to use instead `std.Mutex` unless there is a
 //! specific reason a recursive mutex is warranted.
+const Recursive = @This();
 
 const std = @import("../../std.zig");
-const Recursive = @This();
-const Mutex = std.Thread.Mutex;
+const Io = std.Io;
 const assert = std.debug.assert;
 
-mutex: Mutex,
+mutex: Io.Mutex,
 thread_id: std.Thread.Id,
 lock_count: usize,
 
 pub const init: Recursive = .{
-    .mutex = .{},
+    .mutex = .init,
     .thread_id = invalid_thread_id,
     .lock_count = 0,
 };
@@ -49,7 +49,7 @@ pub fn tryLock(r: *Recursive) bool {
 pub fn lock(r: *Recursive) void {
     const current_thread_id = std.Thread.getCurrentId();
     if (@atomicLoad(std.Thread.Id, &r.thread_id, .unordered) != current_thread_id) {
-        r.mutex.lock();
+        Io.Threaded.mutexLock(&r.mutex);
         assert(r.lock_count == 0);
         @atomicStore(std.Thread.Id, &r.thread_id, current_thread_id, .unordered);
     }
@@ -64,7 +64,7 @@ pub fn unlock(r: *Recursive) void {
     r.lock_count -= 1;
     if (r.lock_count == 0) {
         @atomicStore(std.Thread.Id, &r.thread_id, invalid_thread_id, .unordered);
-        r.mutex.unlock();
+        Io.Threaded.mutexUnlock(&r.mutex);
     }
 }
 
diff --git a/lib/std/Thread/RwLock.zig b/lib/std/Thread/RwLock.zig
@@ -1,386 +0,0 @@
-//! A lock that supports one writer or many readers.
-//! This API is for kernel threads, not evented I/O.
-//! This API requires being initialized at runtime, and initialization
-//! can fail. Once initialized, the core operations cannot fail.
-
-impl: Impl = .{},
-
-const RwLock = @This();
-const std = @import("../std.zig");
-const builtin = @import("builtin");
-const assert = std.debug.assert;
-const testing = std.testing;
-
-pub const Impl = if (builtin.single_threaded)
-    SingleThreadedRwLock
-else if (std.Thread.use_pthreads)
-    PthreadRwLock
-else
-    DefaultRwLock;
-
-/// Attempts to obtain exclusive lock ownership.
-/// Returns `true` if the lock is obtained, `false` otherwise.
-pub fn tryLock(rwl: *RwLock) bool {
-    return rwl.impl.tryLock();
-}
-
-/// Blocks until exclusive lock ownership is acquired.
-pub fn lock(rwl: *RwLock) void {
-    return rwl.impl.lock();
-}
-
-/// Releases a held exclusive lock.
-/// Asserts the lock is held exclusively.
-pub fn unlock(rwl: *RwLock) void {
-    return rwl.impl.unlock();
-}
-
-/// Attempts to obtain shared lock ownership.
-/// Returns `true` if the lock is obtained, `false` otherwise.
-pub fn tryLockShared(rwl: *RwLock) bool {
-    return rwl.impl.tryLockShared();
-}
-
-/// Obtains shared lock ownership.
-/// Blocks if another thread has exclusive ownership.
-/// May block if another thread is attempting to get exclusive ownership.
-pub fn lockShared(rwl: *RwLock) void {
-    return rwl.impl.lockShared();
-}
-
-/// Releases a held shared lock.
-pub fn unlockShared(rwl: *RwLock) void {
-    return rwl.impl.unlockShared();
-}
-
-/// Single-threaded applications use this for deadlock checks in
-/// debug mode, and no-ops in release modes.
-pub const SingleThreadedRwLock = struct {
-    state: enum { unlocked, locked_exclusive, locked_shared } = .unlocked,
-    shared_count: usize = 0,
-
-    /// Attempts to obtain exclusive lock ownership.
-    /// Returns `true` if the lock is obtained, `false` otherwise.
-    pub fn tryLock(rwl: *SingleThreadedRwLock) bool {
-        switch (rwl.state) {
-            .unlocked => {
-                assert(rwl.shared_count == 0);
-                rwl.state = .locked_exclusive;
-                return true;
-            },
-            .locked_exclusive, .locked_shared => return false,
-        }
-    }
-
-    /// Blocks until exclusive lock ownership is acquired.
-    pub fn lock(rwl: *SingleThreadedRwLock) void {
-        assert(rwl.state == .unlocked); // deadlock detected
-        assert(rwl.shared_count == 0); // corrupted state detected
-        rwl.state = .locked_exclusive;
-    }
-
-    /// Releases a held exclusive lock.
-    /// Asserts the lock is held exclusively.
-    pub fn unlock(rwl: *SingleThreadedRwLock) void {
-        assert(rwl.state == .locked_exclusive);
-        assert(rwl.shared_count == 0); // corrupted state detected
-        rwl.state = .unlocked;
-    }
-
-    /// Attempts to obtain shared lock ownership.
-    /// Returns `true` if the lock is obtained, `false` otherwise.
-    pub fn tryLockShared(rwl: *SingleThreadedRwLock) bool {
-        switch (rwl.state) {
-            .unlocked => {
-                rwl.state = .locked_shared;
-                assert(rwl.shared_count == 0);
-                rwl.shared_count = 1;
-                return true;
-            },
-            .locked_shared => {
-                rwl.shared_count += 1;
-                return true;
-            },
-            .locked_exclusive => return false,
-        }
-    }
-
-    /// Blocks until shared lock ownership is acquired.
-    pub fn lockShared(rwl: *SingleThreadedRwLock) void {
-        switch (rwl.state) {
-            .unlocked => {
-                rwl.state = .locked_shared;
-                assert(rwl.shared_count == 0);
-                rwl.shared_count = 1;
-            },
-            .locked_shared => {
-                rwl.shared_count += 1;
-            },
-            .locked_exclusive => unreachable, // deadlock detected
-        }
-    }
-
-    /// Releases a held shared lock.
-    pub fn unlockShared(rwl: *SingleThreadedRwLock) void {
-        switch (rwl.state) {
-            .unlocked => unreachable, // too many calls to `unlockShared`
-            .locked_exclusive => unreachable, // exclusively held lock
-            .locked_shared => {
-                rwl.shared_count -= 1;
-                if (rwl.shared_count == 0) {
-                    rwl.state = .unlocked;
-                }
-            },
-        }
-    }
-};
-
-pub const PthreadRwLock = struct {
-    rwlock: std.c.pthread_rwlock_t = .{},
-
-    pub fn tryLock(rwl: *PthreadRwLock) bool {
-        return std.c.pthread_rwlock_trywrlock(&rwl.rwlock) == .SUCCESS;
-    }
-
-    pub fn lock(rwl: *PthreadRwLock) void {
-        const rc = std.c.pthread_rwlock_wrlock(&rwl.rwlock);
-        assert(rc == .SUCCESS);
-    }
-
-    pub fn unlock(rwl: *PthreadRwLock) void {
-        const rc = std.c.pthread_rwlock_unlock(&rwl.rwlock);
-        assert(rc == .SUCCESS);
-    }
-
-    pub fn tryLockShared(rwl: *PthreadRwLock) bool {
-        return std.c.pthread_rwlock_tryrdlock(&rwl.rwlock) == .SUCCESS;
-    }
-
-    pub fn lockShared(rwl: *PthreadRwLock) void {
-        const rc = std.c.pthread_rwlock_rdlock(&rwl.rwlock);
-        assert(rc == .SUCCESS);
-    }
-
-    pub fn unlockShared(rwl: *PthreadRwLock) void {
-        const rc = std.c.pthread_rwlock_unlock(&rwl.rwlock);
-        assert(rc == .SUCCESS);
-    }
-};
-
-pub const DefaultRwLock = struct {
-    state: usize = 0,
-    mutex: std.Thread.Mutex = .{},
-    semaphore: std.Thread.Semaphore = .{},
-
-    const IS_WRITING: usize = 1;
-    const WRITER: usize = 1 << 1;
-    const READER: usize = 1 << (1 + @bitSizeOf(Count));
-    const WRITER_MASK: usize = std.math.maxInt(Count) << @ctz(WRITER);
-    const READER_MASK: usize = std.math.maxInt(Count) << @ctz(READER);
-    const Count = std.meta.Int(.unsigned, @divFloor(@bitSizeOf(usize) - 1, 2));
-
-    pub fn tryLock(rwl: *DefaultRwLock) bool {
-        if (rwl.mutex.tryLock()) {
-            const state = @atomicLoad(usize, &rwl.state, .seq_cst);
-            if (state & READER_MASK == 0) {
-                _ = @atomicRmw(usize, &rwl.state, .Or, IS_WRITING, .seq_cst);
-                return true;
-            }
-
-            rwl.mutex.unlock();
-        }
-
-        return false;
-    }
-
-    pub fn lock(rwl: *DefaultRwLock) void {
-        _ = @atomicRmw(usize, &rwl.state, .Add, WRITER, .seq_cst);
-        rwl.mutex.lock();
-
-        const state = @atomicRmw(usize, &rwl.state, .Add, IS_WRITING -% WRITER, .seq_cst);
-        if (state & READER_MASK != 0)
-            rwl.semaphore.wait();
-    }
-
-    pub fn unlock(rwl: *DefaultRwLock) void {
-        _ = @atomicRmw(usize, &rwl.state, .And, ~IS_WRITING, .seq_cst);
-        rwl.mutex.unlock();
-    }
-
-    pub fn tryLockShared(rwl: *DefaultRwLock) bool {
-        const state = @atomicLoad(usize, &rwl.state, .seq_cst);
-        if (state & (IS_WRITING | WRITER_MASK) == 0) {
-            _ = @cmpxchgStrong(
-                usize,
-                &rwl.state,
-                state,
-                state + READER,
-                .seq_cst,
-                .seq_cst,
-            ) orelse return true;
-        }
-
-        if (rwl.mutex.tryLock()) {
-            _ = @atomicRmw(usize, &rwl.state, .Add, READER, .seq_cst);
-            rwl.mutex.unlock();
-            return true;
-        }
-
-        return false;
-    }
-
-    pub fn lockShared(rwl: *DefaultRwLock) void {
-        var state = @atomicLoad(usize, &rwl.state, .seq_cst);
-        while (state & (IS_WRITING | WRITER_MASK) == 0) {
-            state = @cmpxchgWeak(
-                usize,
-                &rwl.state,
-                state,
-                state + READER,
-                .seq_cst,
-                .seq_cst,
-            ) orelse return;
-        }
-
-        rwl.mutex.lock();
-        _ = @atomicRmw(usize, &rwl.state, .Add, READER, .seq_cst);
-        rwl.mutex.unlock();
-    }
-
-    pub fn unlockShared(rwl: *DefaultRwLock) void {
-        const state = @atomicRmw(usize, &rwl.state, .Sub, READER, .seq_cst);
-
-        if ((state & READER_MASK == READER) and (state & IS_WRITING != 0))
-            rwl.semaphore.post();
-    }
-};
-
-test "DefaultRwLock - internal state" {
-    var rwl = DefaultRwLock{};
-
-    // The following failed prior to the fix for Issue #13163,
-    // where the WRITER flag was subtracted by the lock method.
-
-    rwl.lock();
-    rwl.unlock();
-    try testing.expectEqual(rwl, DefaultRwLock{});
-}
-
-test "smoke test" {
-    var rwl = RwLock{};
-
-    rwl.lock();
-    try testing.expect(!rwl.tryLock());
-    try testing.expect(!rwl.tryLockShared());
-    rwl.unlock();
-
-    try testing.expect(rwl.tryLock());
-    try testing.expect(!rwl.tryLock());
-    try testing.expect(!rwl.tryLockShared());
-    rwl.unlock();
-
-    rwl.lockShared();
-    try testing.expect(!rwl.tryLock());
-    try testing.expect(rwl.tryLockShared());
-    rwl.unlockShared();
-    rwl.unlockShared();
-
-    try testing.expect(rwl.tryLockShared());
-    try testing.expect(!rwl.tryLock());
-    try testing.expect(rwl.tryLockShared());
-    rwl.unlockShared();
-    rwl.unlockShared();
-
-    rwl.lock();
-    rwl.unlock();
-}
-
-test "concurrent access" {
-    if (builtin.single_threaded)
-        return;
-
-    const num_writers: usize = 2;
-    const num_readers: usize = 4;
-    const num_writes: usize = 1000;
-    const num_reads: usize = 2000;
-
-    const Runner = struct {
-        const Runner = @This();
-
-        rwl: RwLock,
-        writes: usize,
-        reads: std.atomic.Value(usize),
-
-        val_a: usize,
-        val_b: usize,
-
-        fn reader(run: *Runner, thread_idx: usize) !void {
-            var prng = std.Random.DefaultPrng.init(thread_idx);
-            const rnd = prng.random();
-            while (true) {
-                run.rwl.lockShared();
-                defer run.rwl.unlockShared();
-
-                try testing.expect(run.writes <= num_writes);
-                if (run.reads.fetchAdd(1, .monotonic) >= num_reads) break;
-
-                // We use `volatile` accesses so that we can make sure the memory is accessed either
-                // side of a yield, maximising chances of a race.
-                const a_ptr: *const volatile usize = &run.val_a;
-                const b_ptr: *const volatile usize = &run.val_b;
-
-                const old_a = a_ptr.*;
-                if (rnd.boolean()) try std.Thread.yield();
-                const old_b = b_ptr.*;
-                try testing.expect(old_a == old_b);
-            }
-        }
-
-        fn writer(run: *Runner, thread_idx: usize) !void {
-            var prng = std.Random.DefaultPrng.init(thread_idx);
-            const rnd = prng.random();
-            while (true) {
-                run.rwl.lock();
-                defer run.rwl.unlock();
-
-                try testing.expect(run.writes <= num_writes);
-                if (run.writes == num_writes) break;
-
-                // We use `volatile` accesses so that we can make sure the memory is accessed either
-                // side of a yield, maximising chances of a race.
-                const a_ptr: *volatile usize = &run.val_a;
-                const b_ptr: *volatile usize = &run.val_b;
-
-                const new_val = rnd.int(usize);
-
-                const old_a = a_ptr.*;
-                a_ptr.* = new_val;
-                if (rnd.boolean()) try std.Thread.yield();
-                const old_b = b_ptr.*;
-                b_ptr.* = new_val;
-                try testing.expect(old_a == old_b);
-
-                run.writes += 1;
-            }
-        }
-    };
-
-    var run: Runner = .{
-        .rwl = .{},
-        .writes = 0,
-        .reads = .init(0),
-        .val_a = 0,
-        .val_b = 0,
-    };
-    var write_threads: [num_writers]std.Thread = undefined;
-    var read_threads: [num_readers]std.Thread = undefined;
-
-    for (&write_threads, 0..) |*t, i| t.* = try .spawn(.{}, Runner.writer, .{ &run, i });
-    for (&read_threads, num_writers..) |*t, i| t.* = try .spawn(.{}, Runner.reader, .{ &run, i });
-
-    for (write_threads) |t| t.join();
-    for (read_threads) |t| t.join();
-
-    try testing.expect(run.writes == num_writes);
-    try testing.expect(run.reads.raw >= num_reads);
-}
diff --git a/lib/std/Thread/Semaphore.zig b/lib/std/Thread/Semaphore.zig
@@ -1,111 +0,0 @@
-//! A semaphore is an unsigned integer that blocks the kernel thread if
-//! the number would become negative.
-//! This API supports static initialization and does not require deinitialization.
-//!
-//! Example:
-//! ```
-//! var s = Semaphore{};
-//!
-//! fn consumer() void {
-//!     s.wait();
-//! }
-//!
-//! fn producer() void {
-//!     s.post();
-//! }
-//!
-//! const thread = try std.Thread.spawn(.{}, producer, .{});
-//! consumer();
-//! thread.join();
-//! ```
-
-mutex: Mutex = .{},
-cond: Condition = .{},
-/// It is OK to initialize this field to any value.
-permits: usize = 0,
-
-const Semaphore = @This();
-const std = @import("../std.zig");
-const Mutex = std.Thread.Mutex;
-const Condition = std.Thread.Condition;
-const builtin = @import("builtin");
-const testing = std.testing;
-
-pub fn wait(sem: *Semaphore) void {
-    sem.mutex.lock();
-    defer sem.mutex.unlock();
-
-    while (sem.permits == 0)
-        sem.cond.wait(&sem.mutex);
-
-    sem.permits -= 1;
-    if (sem.permits > 0)
-        sem.cond.signal();
-}
-
-pub fn timedWait(sem: *Semaphore, timeout_ns: u64) error{Timeout}!void {
-    var timeout_timer = std.time.Timer.start() catch unreachable;
-
-    sem.mutex.lock();
-    defer sem.mutex.unlock();
-
-    while (sem.permits == 0) {
-        const elapsed = timeout_timer.read();
-        if (elapsed > timeout_ns)
-            return error.Timeout;
-
-        const local_timeout_ns = timeout_ns - elapsed;
-        try sem.cond.timedWait(&sem.mutex, local_timeout_ns);
-    }
-
-    sem.permits -= 1;
-    if (sem.permits > 0)
-        sem.cond.signal();
-}
-
-pub fn post(sem: *Semaphore) void {
-    sem.mutex.lock();
-    defer sem.mutex.unlock();
-
-    sem.permits += 1;
-    sem.cond.signal();
-}
-
-test Semaphore {
-    if (builtin.single_threaded) {
-        return error.SkipZigTest;
-    }
-
-    const TestContext = struct {
-        sem: *Semaphore,
-        n: *i32,
-        fn worker(ctx: *@This()) void {
-            ctx.sem.wait();
-            ctx.n.* += 1;
-            ctx.sem.post();
-        }
-    };
-    const num_threads = 3;
-    var sem = Semaphore{ .permits = 1 };
-    var threads: [num_threads]std.Thread = undefined;
-    var n: i32 = 0;
-    var ctx = TestContext{ .sem = &sem, .n = &n };
-
-    for (&threads) |*t| t.* = try std.Thread.spawn(.{}, TestContext.worker, .{&ctx});
-    for (threads) |t| t.join();
-    sem.wait();
-    try testing.expect(n == num_threads);
-}
-
-test timedWait {
-    var sem = Semaphore{};
-    try testing.expectEqual(0, sem.permits);
-
-    try testing.expectError(error.Timeout, sem.timedWait(1));
-
-    sem.post();
-    try testing.expectEqual(1, sem.permits);
-
-    try sem.timedWait(1);
-    try testing.expectEqual(0, sem.permits);
-}
diff --git a/lib/std/atomic.zig b/lib/std/atomic.zig
@@ -1,3 +1,10 @@
+const builtin = @import("builtin");
+
+const std = @import("std.zig");
+const AtomicOrder = std.builtin.AtomicOrder;
+const testing = std.testing;
+const assert = std.debug.assert;
+
 /// This is a thin wrapper around a primitive value to prevent accidental data races.
 pub fn Value(comptime T: type) type {
     return extern struct {
@@ -496,7 +503,17 @@ test "current CPU has a cache line size" {
     _ = cache_line;
 }
 
-const std = @import("std.zig");
-const builtin = @import("builtin");
-const AtomicOrder = std.builtin.AtomicOrder;
-const testing = std.testing;
+/// A lock-free single-owner resource.
+pub const Mutex = enum(u8) {
+    unlocked,
+    locked,
+
+    pub fn tryLock(m: *Mutex) bool {
+        return @cmpxchgWeak(Mutex, m, .unlocked, .locked, .acquire, .monotonic) == null;
+    }
+
+    pub fn unlock(m: *Mutex) void {
+        assert(m.* == .locked);
+        @atomicStore(Mutex, m, .unlocked, .release);
+    }
+};
diff --git a/lib/std/debug.zig b/lib/std/debug.zig
@@ -696,7 +696,7 @@ pub noinline fn writeCurrentStackTrace(options: StackUnwindOptions, t: Io.Termin
                 .useless, .unsafe => {},
                 .safe, .ideal => continue, // no need to even warn
             }
-            const module_name = di.getModuleName(di_gpa, unwind_error.address) catch "???";
+            const module_name = di.getModuleName(di_gpa, io, unwind_error.address) catch "???";
             const caption: []const u8 = switch (unwind_error.err) {
                 error.MissingDebugInfo => "unwind info unavailable",
                 error.InvalidDebugInfo => "unwind info invalid",
@@ -1141,7 +1141,7 @@ fn printSourceAtAddress(
         symbol.source_location,
         address,
         symbol.name orelse "???",
-        symbol.compile_unit_name orelse debug_info.getModuleName(gpa, address) catch "???",
+        symbol.compile_unit_name orelse debug_info.getModuleName(gpa, io, address) catch "???",
     );
 }
 fn printLineInfo(
@@ -1356,7 +1356,10 @@ pub fn getDebugInfoAllocator() Allocator {
     // Otherwise, use a global arena backed by the page allocator
     const S = struct {
         var arena: std.heap.ArenaAllocator = .init(std.heap.page_allocator);
-        var ts_arena: std.heap.ThreadSafeAllocator = .{ .child_allocator = arena.allocator() };
+        var ts_arena: std.heap.ThreadSafeAllocator = .{
+            .child_allocator = arena.allocator(),
+            .io = std.Options.debug_io,
+        };
     };
     return S.ts_arena.allocator();
 }
diff --git a/lib/std/debug/Coverage.zig b/lib/std/debug/Coverage.zig
@@ -1,11 +1,12 @@
+const Coverage = @This();
+
 const std = @import("../std.zig");
+const Io = std.Io;
 const Allocator = std.mem.Allocator;
 const Hash = std.hash.Wyhash;
 const Dwarf = std.debug.Dwarf;
 const assert = std.debug.assert;
 
-const Coverage = @This();
-
 /// Provides a globally-scoped integer index for directories.
 ///
 /// As opposed to, for example, a directory index that is compilation-unit
@@ -23,12 +24,12 @@ directories: std.ArrayHashMapUnmanaged(String, void, String.MapContext, false),
 files: std.ArrayHashMapUnmanaged(File, void, File.MapContext, false),
 string_bytes: std.ArrayList(u8),
 /// Protects the other fields.
-mutex: std.Thread.Mutex,
+mutex: Io.Mutex,
 
 pub const init: Coverage = .{
     .directories = .{},
     .files = .{},
-    .mutex = .{},
+    .mutex = .init,
     .string_bytes = .{},
 };
 
@@ -140,11 +141,12 @@ pub fn stringAt(cov: *Coverage, index: String) [:0]const u8 {
     return span(cov.string_bytes.items[@intFromEnum(index)..]);
 }
 
-pub const ResolveAddressesDwarfError = Dwarf.ScanError;
+pub const ResolveAddressesDwarfError = Dwarf.ScanError || Io.Cancelable;
 
 pub fn resolveAddressesDwarf(
     cov: *Coverage,
     gpa: Allocator,
+    io: Io,
     endian: std.builtin.Endian,
     /// Asserts the addresses are in ascending order.
     sorted_pc_addrs: []const u64,
@@ -161,8 +163,8 @@ pub fn resolveAddressesDwarf(
     var prev_pc: u64 = 0;
     var prev_cu: ?*std.debug.Dwarf.CompileUnit = null;
     // Protects directories and files tables from other threads.
-    cov.mutex.lock();
-    defer cov.mutex.unlock();
+    try cov.mutex.lock(io);
+    defer cov.mutex.unlock(io);
     next_pc: for (sorted_pc_addrs, output) |pc, *out| {
         assert(pc >= prev_pc);
         prev_pc = pc;
@@ -183,8 +185,8 @@ pub fn resolveAddressesDwarf(
         if (cu != prev_cu) {
             prev_cu = cu;
             if (cu.src_loc_cache == null) {
-                cov.mutex.unlock();
-                defer cov.mutex.lock();
+                cov.mutex.unlock(io);
+                defer cov.mutex.lockUncancelable(io);
                 d.populateSrcLocCache(gpa, endian, cu) catch |err| switch (err) {
                     error.MissingDebugInfo, error.InvalidDebugInfo => {
                         out.* = SourceLocation.invalid;
diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig
@@ -93,7 +93,7 @@ pub fn resolveAddresses(
 ) ResolveAddressesError!void {
     assert(sorted_pc_addrs.len == output.len);
     switch (info.impl) {
-        .elf => |*ef| return info.coverage.resolveAddressesDwarf(gpa, ef.endian, sorted_pc_addrs, output, &ef.dwarf.?),
+        .elf => |*ef| return info.coverage.resolveAddressesDwarf(gpa, io, ef.endian, sorted_pc_addrs, output, &ef.dwarf.?),
         .macho => |*mf| {
             // Resolving all of the addresses at once unfortunately isn't so easy in Mach-O binaries
             // due to split debug information. For now, we'll just resolve the addreses one by one.
@@ -112,7 +112,7 @@ pub fn resolveAddresses(
                         else => |e| return e,
                     };
                 }
-                try info.coverage.resolveAddressesDwarf(gpa, .little, &.{dwarf_pc_addr}, src_loc[0..1], dwarf);
+                try info.coverage.resolveAddressesDwarf(gpa, io, .little, &.{dwarf_pc_addr}, src_loc[0..1], dwarf);
             }
         },
     }
diff --git a/lib/std/debug/SelfInfo/Elf.zig b/lib/std/debug/SelfInfo/Elf.zig
@@ -1,4 +1,4 @@
-rwlock: std.Thread.RwLock,
+rwlock: Io.RwLock,
 
 modules: std.ArrayList(Module),
 ranges: std.ArrayList(Module.Range),
@@ -6,7 +6,7 @@ ranges: std.ArrayList(Module.Range),
 unwind_cache: if (can_unwind) ?[]Dwarf.SelfUnwinder.CacheEntry else ?noreturn,
 
 pub const init: SelfInfo = .{
-    .rwlock = .{},
+    .rwlock = .init,
     .modules = .empty,
     .ranges = .empty,
     .unwind_cache = null,
@@ -29,8 +29,8 @@ pub fn deinit(si: *SelfInfo, gpa: Allocator) void {
 }
 
 pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!std.debug.Symbol {
-    const module = try si.findModule(gpa, address, .exclusive);
-    defer si.rwlock.unlock();
+    const module = try si.findModule(gpa, io, address, .exclusive);
+    defer si.rwlock.unlock(io);
 
     const vaddr = address - module.load_offset;
 
@@ -73,15 +73,15 @@ pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!st
         error.OutOfMemory => |e| return e,
     };
 }
-pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 {
-    const module = try si.findModule(gpa, address, .shared);
-    defer si.rwlock.unlockShared();
+pub fn getModuleName(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error![]const u8 {
+    const module = try si.findModule(gpa, io, address, .shared);
+    defer si.rwlock.unlockShared(io);
     if (module.name.len == 0) return error.MissingDebugInfo;
     return module.name;
 }
-pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize {
-    const module = try si.findModule(gpa, address, .shared);
-    defer si.rwlock.unlockShared();
+pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!usize {
+    const module = try si.findModule(gpa, io, address, .shared);
+    defer si.rwlock.unlockShared(io);
     return module.load_offset;
 }
 
@@ -183,8 +183,8 @@ pub fn unwindFrame(si: *SelfInfo, gpa: Allocator, io: Io, context: *UnwindContex
     comptime assert(can_unwind);
 
     {
-        si.rwlock.lockShared();
-        defer si.rwlock.unlockShared();
+        si.rwlock.lockSharedUncancelable(io);
+        defer si.rwlock.unlockShared(io);
         if (si.unwind_cache) |cache| {
             if (Dwarf.SelfUnwinder.CacheEntry.find(cache, context.pc)) |entry| {
                 return context.next(gpa, entry);
@@ -192,8 +192,8 @@ pub fn unwindFrame(si: *SelfInfo, gpa: Allocator, io: Io, context: *UnwindContex
         }
     }
 
-    const module = try si.findModule(gpa, context.pc, .exclusive);
-    defer si.rwlock.unlock();
+    const module = try si.findModule(gpa, io, context.pc, .exclusive);
+    defer si.rwlock.unlock(io);
 
     if (si.unwind_cache == null) {
         si.unwind_cache = try gpa.alloc(Dwarf.SelfUnwinder.CacheEntry, 2048);
@@ -375,11 +375,11 @@ const Module = struct {
     }
 };
 
-fn findModule(si: *SelfInfo, gpa: Allocator, address: usize, lock: enum { shared, exclusive }) Error!*Module {
+fn findModule(si: *SelfInfo, gpa: Allocator, io: Io, address: usize, lock: enum { shared, exclusive }) Error!*Module {
     // With the requested lock, scan the module ranges looking for `address`.
     switch (lock) {
-        .shared => si.rwlock.lockShared(),
-        .exclusive => si.rwlock.lock(),
+        .shared => si.rwlock.lockSharedUncancelable(io),
+        .exclusive => si.rwlock.lockUncancelable(io),
     }
     for (si.ranges.items) |*range| {
         if (address >= range.start and address < range.start + range.len) {
@@ -390,14 +390,14 @@ fn findModule(si: *SelfInfo, gpa: Allocator, address: usize, lock: enum { shared
     // a new module was loaded. Upgrade to an exclusive lock if necessary.
     switch (lock) {
         .shared => {
-            si.rwlock.unlockShared();
-            si.rwlock.lock();
+            si.rwlock.unlockShared(io);
+            si.rwlock.lockUncancelable(io);
         },
         .exclusive => {},
     }
     // Rebuild module list with the exclusive lock.
     {
-        errdefer si.rwlock.unlock();
+        errdefer si.rwlock.unlock(io);
         for (si.modules.items) |*mod| {
             unwind: {
                 const u = &(mod.unwind orelse break :unwind catch break :unwind);
@@ -416,8 +416,8 @@ fn findModule(si: *SelfInfo, gpa: Allocator, address: usize, lock: enum { shared
     // Downgrade the lock back to shared if necessary.
     switch (lock) {
         .shared => {
-            si.rwlock.unlock();
-            si.rwlock.lockShared();
+            si.rwlock.unlock(io);
+            si.rwlock.lockSharedUncancelable(io);
         },
         .exclusive => {},
     }
@@ -429,8 +429,8 @@ fn findModule(si: *SelfInfo, gpa: Allocator, address: usize, lock: enum { shared
     }
     // Still nothing; unlock and error.
     switch (lock) {
-        .shared => si.rwlock.unlockShared(),
-        .exclusive => si.rwlock.unlock(),
+        .shared => si.rwlock.unlockShared(io),
+        .exclusive => si.rwlock.unlock(io),
     }
     return error.MissingDebugInfo;
 }
diff --git a/lib/std/debug/SelfInfo/MachO.zig b/lib/std/debug/SelfInfo/MachO.zig
@@ -1,9 +1,9 @@
-mutex: std.Thread.Mutex,
+mutex: Io.Mutex,
 /// Accessed through `Module.Adapter`.
 modules: std.ArrayHashMapUnmanaged(Module, void, Module.Context, false),
 
 pub const init: SelfInfo = .{
-    .mutex = .{},
+    .mutex = .init,
     .modules = .empty,
 };
 pub fn deinit(si: *SelfInfo, gpa: Allocator) void {
@@ -21,8 +21,8 @@ pub fn deinit(si: *SelfInfo, gpa: Allocator) void {
 }
 
 pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!std.debug.Symbol {
-    const module = try si.findModule(gpa, address);
-    defer si.mutex.unlock();
+    const module = try si.findModule(gpa, io, address);
+    defer si.mutex.unlock(io);
 
     const file = try module.getFile(gpa, io);
 
@@ -76,9 +76,10 @@ pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!st
         ) catch null,
     };
 }
-pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 {
+pub fn getModuleName(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error![]const u8 {
     _ = si;
     _ = gpa;
+    _ = io;
     // This function is marked as deprecated; however, it is significantly more
     // performant than `dladdr` (since the latter also does a very slow symbol
     // lookup), so let's use it since it's still available.
@@ -86,9 +87,9 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons
         @ptrFromInt(address),
     ) orelse return error.MissingDebugInfo);
 }
-pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize {
-    const module = try si.findModule(gpa, address);
-    defer si.mutex.unlock();
+pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!usize {
+    const module = try si.findModule(gpa, io, address);
+    defer si.mutex.unlock(io);
     const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base);
     const raw_macho: [*]u8 = @ptrCast(header);
     var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable;
@@ -107,8 +108,7 @@ pub const UnwindContext = std.debug.Dwarf.SelfUnwinder;
 /// If the compact encoding can't encode a way to unwind a frame, it will
 /// defer unwinding to DWARF, in which case `__eh_frame` will be used if available.
 pub fn unwindFrame(si: *SelfInfo, gpa: Allocator, io: Io, context: *UnwindContext) Error!usize {
-    _ = io;
-    return unwindFrameInner(si, gpa, context) catch |err| switch (err) {
+    return unwindFrameInner(si, gpa, io, context) catch |err| switch (err) {
         error.InvalidDebugInfo,
         error.MissingDebugInfo,
         error.UnsupportedDebugInfo,
@@ -134,9 +134,9 @@ pub fn unwindFrame(si: *SelfInfo, gpa: Allocator, io: Io, context: *UnwindContex
         => return error.InvalidDebugInfo,
     };
 }
-fn unwindFrameInner(si: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize {
-    const module = try si.findModule(gpa, context.pc);
-    defer si.mutex.unlock();
+fn unwindFrameInner(si: *SelfInfo, gpa: Allocator, io: Io, context: *UnwindContext) !usize {
+    const module = try si.findModule(gpa, io, context.pc);
+    defer si.mutex.unlock(io);
 
     const unwind: *Module.Unwind = try module.getUnwindInfo(gpa);
 
@@ -430,15 +430,15 @@ fn unwindFrameInner(si: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usi
 }
 
 /// Acquires the mutex on success.
-fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) Error!*Module {
+fn findModule(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!*Module {
     // This function is marked as deprecated; however, it is significantly more
     // performant than `dladdr` (since the latter also does a very slow symbol
     // lookup), so let's use it since it's still available.
     const text_base = std.c._dyld_get_image_header_containing_address(
         @ptrFromInt(address),
     ) orelse return error.MissingDebugInfo;
-    si.mutex.lock();
-    errdefer si.mutex.unlock();
+    try si.mutex.lock(io);
+    errdefer si.mutex.unlock(io);
     const gop = try si.modules.getOrPutAdapted(gpa, @intFromPtr(text_base), Module.Adapter{});
     errdefer comptime unreachable;
     if (!gop.found_existing) gop.key_ptr.* = .{
diff --git a/lib/std/debug/SelfInfo/Windows.zig b/lib/std/debug/SelfInfo/Windows.zig
@@ -1,9 +1,9 @@
-mutex: std.Thread.Mutex,
+mutex: Io.Mutex,
 modules: std.ArrayList(Module),
 module_name_arena: std.heap.ArenaAllocator.State,
 
 pub const init: SelfInfo = .{
-    .mutex = .{},
+    .mutex = .init,
     .modules = .empty,
     .module_name_arena = .{},
 };
@@ -21,21 +21,21 @@ pub fn deinit(si: *SelfInfo, gpa: Allocator) void {
 }
 
 pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!std.debug.Symbol {
-    si.mutex.lock();
-    defer si.mutex.unlock();
+    try si.mutex.lock(io);
+    defer si.mutex.unlock(io);
     const module = try si.findModule(gpa, address);
     const di = try module.getDebugInfo(gpa, io);
     return di.getSymbol(gpa, address - module.base_address);
 }
-pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 {
-    si.mutex.lock();
-    defer si.mutex.unlock();
+pub fn getModuleName(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error![]const u8 {
+    try si.mutex.lock(io);
+    defer si.mutex.unlock(io);
     const module = try si.findModule(gpa, address);
     return module.name;
 }
-pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize {
-    si.mutex.lock();
-    defer si.mutex.unlock();
+pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!usize {
+    try si.mutex.lock(io);
+    defer si.mutex.unlock(io);
     const module = try si.findModule(gpa, address);
     return module.base_address;
 }
diff --git a/lib/std/heap/SmpAllocator.zig b/lib/std/heap/SmpAllocator.zig
@@ -62,7 +62,7 @@ const Thread = struct {
     ///
     /// Threads lock this before accessing their own state in order
     /// to support freelist reclamation.
-    mutex: std.Thread.Mutex = .{},
+    mutex: std.atomic.Mutex = .unlocked,
 
     /// For each size class, tracks the next address to be returned from
     /// `alloc` when the freelist is empty.
diff --git a/lib/std/heap/ThreadSafeAllocator.zig b/lib/std/heap/ThreadSafeAllocator.zig
@@ -1,7 +1,14 @@
-//! Wraps a non-thread-safe allocator and makes it thread-safe.
+//! Deprecated. Thread safety should be built into each Allocator instance
+//! directly rather than trying to do this "composable allocators" thing.
+const ThreadSafeAllocator = @This();
+
+const std = @import("../std.zig");
+const Io = std.Io;
+const Allocator = std.mem.Allocator;
 
 child_allocator: Allocator,
-mutex: std.Thread.Mutex = .{},
+io: Io,
+mutex: Io.Mutex = .init,
 
 pub fn allocator(self: *ThreadSafeAllocator) Allocator {
     return .{
@@ -17,39 +24,39 @@ pub fn allocator(self: *ThreadSafeAllocator) Allocator {
 
 fn alloc(ctx: *anyopaque, n: usize, alignment: std.mem.Alignment, ra: usize) ?[*]u8 {
     const self: *ThreadSafeAllocator = @ptrCast(@alignCast(ctx));
-    self.mutex.lock();
-    defer self.mutex.unlock();
+    const io = self.io;
+    self.mutex.lockUncancelable(io);
+    defer self.mutex.unlock(io);
 
     return self.child_allocator.rawAlloc(n, alignment, ra);
 }
 
 fn resize(ctx: *anyopaque, buf: []u8, alignment: std.mem.Alignment, new_len: usize, ret_addr: usize) bool {
     const self: *ThreadSafeAllocator = @ptrCast(@alignCast(ctx));
+    const io = self.io;
 
-    self.mutex.lock();
-    defer self.mutex.unlock();
+    self.mutex.lockUncancelable(io);
+    defer self.mutex.unlock(io);
 
     return self.child_allocator.rawResize(buf, alignment, new_len, ret_addr);
 }
 
 fn remap(context: *anyopaque, memory: []u8, alignment: std.mem.Alignment, new_len: usize, return_address: usize) ?[*]u8 {
     const self: *ThreadSafeAllocator = @ptrCast(@alignCast(context));
+    const io = self.io;
 
-    self.mutex.lock();
-    defer self.mutex.unlock();
+    self.mutex.lockUncancelable(io);
+    defer self.mutex.unlock(io);
 
     return self.child_allocator.rawRemap(memory, alignment, new_len, return_address);
 }
 
 fn free(ctx: *anyopaque, buf: []u8, alignment: std.mem.Alignment, ret_addr: usize) void {
     const self: *ThreadSafeAllocator = @ptrCast(@alignCast(ctx));
+    const io = self.io;
 
-    self.mutex.lock();
-    defer self.mutex.unlock();
+    self.mutex.lockUncancelable(io);
+    defer self.mutex.unlock(io);
 
     return self.child_allocator.rawFree(buf, alignment, ret_addr);
 }
-
-const std = @import("../std.zig");
-const ThreadSafeAllocator = @This();
-const Allocator = std.mem.Allocator;
diff --git a/lib/std/heap/debug_allocator.zig b/lib/std/heap/debug_allocator.zig
@@ -126,16 +126,6 @@ pub const Config = struct {
     /// Whether the allocator may be used simultaneously from multiple threads.
     thread_safe: bool = !builtin.single_threaded,
 
-    /// What type of mutex you'd like to use, for thread safety.
-    /// when specified, the mutex type must have the same shape as `std.Thread.Mutex` and
-    /// `DummyMutex`, and have no required fields. Specifying this field causes
-    /// the `thread_safe` field to be ignored.
-    ///
-    /// when null (default):
-    /// * the mutex type defaults to `std.Thread.Mutex` when thread_safe is enabled.
-    /// * the mutex type defaults to `DummyMutex` otherwise.
-    MutexType: ?type = null,
-
     /// This is a temporary debugging trick you can use to turn segfaults into more helpful
     /// logged error messages with stack trace details. The downside is that every allocation
     /// will be leaked, unless used with retain_metadata!
@@ -204,17 +194,8 @@ pub fn DebugAllocator(comptime config: Config) type {
         const total_requested_bytes_init = if (config.enable_memory_limit) @as(usize, 0) else {};
         const requested_memory_limit_init = if (config.enable_memory_limit) @as(usize, math.maxInt(usize)) else {};
 
-        const mutex_init = if (config.MutexType) |T|
-            T{}
-        else if (config.thread_safe)
-            std.Thread.Mutex{}
-        else
-            DummyMutex{};
-
-        const DummyMutex = struct {
-            inline fn lock(_: DummyMutex) void {}
-            inline fn unlock(_: DummyMutex) void {}
-        };
+        const have_mutex = config.thread_safe;
+        const mutex_init = if (have_mutex) std.Io.Mutex.init else {};
 
         const stack_n = config.stack_trace_frames;
         const one_trace_size = @sizeOf(usize) * stack_n;
@@ -737,8 +718,8 @@ pub fn DebugAllocator(comptime config: Config) type {
 
         fn alloc(context: *anyopaque, len: usize, alignment: mem.Alignment, ret_addr: usize) ?[*]u8 {
             const self: *Self = @ptrCast(@alignCast(context));
-            self.mutex.lock();
-            defer self.mutex.unlock();
+            if (have_mutex) std.Io.Threaded.mutexLock(&self.mutex);
+            defer if (have_mutex) std.Io.Threaded.mutexUnlock(&self.mutex);
 
             if (config.enable_memory_limit) {
                 const new_req_bytes = self.total_requested_bytes + len;
@@ -850,8 +831,8 @@ pub fn DebugAllocator(comptime config: Config) type {
             return_address: usize,
         ) bool {
             const self: *Self = @ptrCast(@alignCast(context));
-            self.mutex.lock();
-            defer self.mutex.unlock();
+            if (have_mutex) std.Io.Threaded.mutexLock(&self.mutex);
+            defer if (have_mutex) std.Io.Threaded.mutexUnlock(&self.mutex);
 
             const size_class_index: usize = @max(@bitSizeOf(usize) - @clz(memory.len - 1), @intFromEnum(alignment));
             if (size_class_index >= self.buckets.len) {
@@ -869,8 +850,8 @@ pub fn DebugAllocator(comptime config: Config) type {
             return_address: usize,
         ) ?[*]u8 {
             const self: *Self = @ptrCast(@alignCast(context));
-            self.mutex.lock();
-            defer self.mutex.unlock();
+            if (have_mutex) std.Io.Threaded.mutexLock(&self.mutex);
+            defer if (have_mutex) std.Io.Threaded.mutexUnlock(&self.mutex);
 
             const size_class_index: usize = @max(@bitSizeOf(usize) - @clz(memory.len - 1), @intFromEnum(alignment));
             if (size_class_index >= self.buckets.len) {
@@ -887,8 +868,8 @@ pub fn DebugAllocator(comptime config: Config) type {
             return_address: usize,
         ) void {
             const self: *Self = @ptrCast(@alignCast(context));
-            self.mutex.lock();
-            defer self.mutex.unlock();
+            if (have_mutex) std.Io.Threaded.mutexLock(&self.mutex);
+            defer if (have_mutex) std.Io.Threaded.mutexUnlock(&self.mutex);
 
             const size_class_index: usize = @max(@bitSizeOf(usize) - @clz(old_memory.len - 1), @intFromEnum(alignment));
             if (size_class_index >= self.buckets.len) {
@@ -1331,18 +1312,6 @@ test "realloc large object to small object" {
     try std.testing.expect(slice[16] == 0x34);
 }
 
-test "overridable mutexes" {
-    var gpa = DebugAllocator(.{ .MutexType = std.Thread.Mutex }){
-        .backing_allocator = std.testing.allocator,
-        .mutex = std.Thread.Mutex{},
-    };
-    defer std.testing.expect(gpa.deinit() == .ok) catch @panic("leak");
-    const allocator = gpa.allocator();
-
-    const ptr = try allocator.create(i32);
-    defer allocator.destroy(ptr);
-}
-
 test "non-page-allocator backing allocator" {
     var gpa: DebugAllocator(.{
         .backing_allocator_zeroes = false,
diff --git a/lib/std/heap/sbrk_allocator.zig b/lib/std/heap/sbrk_allocator.zig
@@ -1,5 +1,7 @@
-const std = @import("../std.zig");
 const builtin = @import("builtin");
+
+const std = @import("../std.zig");
+const Io = std.Io;
 const math = std.math;
 const Allocator = std.mem.Allocator;
 const mem = std.mem;
@@ -39,12 +41,12 @@ pub fn SbrkAllocator(comptime sbrk: *const fn (n: usize) usize) type {
         var big_frees = [1]usize{0} ** big_size_class_count;
 
         // TODO don't do the naive locking strategy
-        var lock: std.Thread.Mutex = .{};
+        var mutex: Io.Mutex = .{};
         fn alloc(ctx: *anyopaque, len: usize, alignment: mem.Alignment, return_address: usize) ?[*]u8 {
             _ = ctx;
             _ = return_address;
-            lock.lock();
-            defer lock.unlock();
+            Io.Threaded.mutexLock(&mutex);
+            defer Io.Threaded.mutexUnlock(&mutex);
             // Make room for the freelist next pointer.
             const actual_len = @max(len +| @sizeOf(usize), alignment.toByteUnits());
             const slot_size = math.ceilPowerOfTwo(usize, actual_len) catch return null;
@@ -88,8 +90,8 @@ pub fn SbrkAllocator(comptime sbrk: *const fn (n: usize) usize) type {
         ) bool {
             _ = ctx;
             _ = return_address;
-            lock.lock();
-            defer lock.unlock();
+            Io.Threaded.mutexLock(&mutex);
+            defer Io.Threaded.mutexUnlock(&mutex);
             // We don't want to move anything from one size class to another, but we
             // can recover bytes in between powers of two.
             const buf_align = alignment.toByteUnits();
@@ -127,8 +129,8 @@ pub fn SbrkAllocator(comptime sbrk: *const fn (n: usize) usize) type {
         ) void {
             _ = ctx;
             _ = return_address;
-            lock.lock();
-            defer lock.unlock();
+            Io.Threaded.mutexLock(&mutex);
+            defer Io.Threaded.mutexUnlock(&mutex);
             const buf_align = alignment.toByteUnits();
             const actual_len = @max(buf.len + @sizeOf(usize), buf_align);
             const slot_size = math.ceilPowerOfTwoAssert(usize, actual_len);
diff --git a/lib/std/http/Client.zig b/lib/std/http/Client.zig
@@ -3,22 +3,25 @@
 //! Connections are opened in a thread-safe manner, but individual Requests are not.
 //!
 //! TLS support may be disabled via `std.options.http_disable_tls`.
+//!
+//! TODO all the lockUncancelable in this file should be changed to regular lock and
+//! `error.Canceled` added to more error sets.
+const Client = @This();
 
-const std = @import("../std.zig");
 const builtin = @import("builtin");
+
+const std = @import("../std.zig");
+const Io = std.Io;
 const testing = std.testing;
 const http = std.http;
 const mem = std.mem;
 const Uri = std.Uri;
-const Allocator = mem.Allocator;
+const Allocator = std.mem.Allocator;
 const assert = std.debug.assert;
-const Io = std.Io;
 const Writer = std.Io.Writer;
 const Reader = std.Io.Reader;
 const HostName = std.Io.net.HostName;
 
-const Client = @This();
-
 pub const disable_tls = std.options.http_disable_tls;
 
 /// Used for all client allocations. Must be thread-safe.
@@ -27,7 +30,7 @@ allocator: Allocator,
 io: Io,
 
 ca_bundle: if (disable_tls) void else std.crypto.Certificate.Bundle = if (disable_tls) {} else .{},
-ca_bundle_mutex: std.Thread.Mutex = .{},
+ca_bundle_mutex: Io.Mutex = .init,
 /// Used both for the reader and writer buffers.
 tls_buffer_size: if (disable_tls) u0 else usize = if (disable_tls) 0 else std.crypto.tls.Client.min_buffer_len,
 /// If non-null, ssl secrets are logged to a stream. Creating such a stream
@@ -62,7 +65,7 @@ https_proxy: ?*Proxy = null,
 
 /// A Least-Recently-Used cache of open connections to be reused.
 pub const ConnectionPool = struct {
-    mutex: std.Thread.Mutex = .{},
+    mutex: Io.Mutex = .init,
     /// Open connections that are currently in use.
     used: std.DoublyLinkedList = .{},
     /// Open connections that are not currently in use.
@@ -81,9 +84,9 @@ pub const ConnectionPool = struct {
     /// If no connection is found, null is returned.
     ///
     /// Threadsafe.
-    pub fn findConnection(pool: *ConnectionPool, criteria: Criteria) ?*Connection {
-        pool.mutex.lock();
-        defer pool.mutex.unlock();
+    pub fn findConnection(pool: *ConnectionPool, io: Io, criteria: Criteria) ?*Connection {
+        pool.mutex.lockUncancelable(io);
+        defer pool.mutex.unlock(io);
 
         var next = pool.free.last;
         while (next) |node| : (next = node.prev) {
@@ -110,9 +113,9 @@ pub const ConnectionPool = struct {
     }
 
     /// Acquires an existing connection from the connection pool. This function is threadsafe.
-    pub fn acquire(pool: *ConnectionPool, connection: *Connection) void {
-        pool.mutex.lock();
-        defer pool.mutex.unlock();
+    pub fn acquire(pool: *ConnectionPool, io: Io, connection: *Connection) void {
+        pool.mutex.lockUncancelable(io);
+        defer pool.mutex.unlock(io);
 
         return pool.acquireUnsafe(connection);
     }
@@ -122,8 +125,8 @@ pub const ConnectionPool = struct {
     ///
     /// Threadsafe.
     pub fn release(pool: *ConnectionPool, connection: *Connection, io: Io) void {
-        pool.mutex.lock();
-        defer pool.mutex.unlock();
+        pool.mutex.lockUncancelable(io);
+        defer pool.mutex.unlock(io);
 
         pool.used.remove(&connection.pool_node);
 
@@ -147,9 +150,9 @@ pub const ConnectionPool = struct {
     }
 
     /// Adds a newly created node to the pool of used connections. This function is threadsafe.
-    pub fn addUsed(pool: *ConnectionPool, connection: *Connection) void {
-        pool.mutex.lock();
-        defer pool.mutex.unlock();
+    pub fn addUsed(pool: *ConnectionPool, io: Io, connection: *Connection) void {
+        pool.mutex.lockUncancelable(io);
+        defer pool.mutex.unlock(io);
 
         pool.used.append(&connection.pool_node);
     }
@@ -159,9 +162,9 @@ pub const ConnectionPool = struct {
     /// If the new size is smaller than the current size, then idle connections will be closed until the pool is the new size.
     ///
     /// Threadsafe.
-    pub fn resize(pool: *ConnectionPool, allocator: Allocator, new_size: usize) void {
-        pool.mutex.lock();
-        defer pool.mutex.unlock();
+    pub fn resize(pool: *ConnectionPool, io: Io, allocator: Allocator, new_size: usize) void {
+        pool.mutex.lockUncancelable(io);
+        defer pool.mutex.unlock(io);
 
         const next = pool.free.first;
         _ = next;
@@ -182,7 +185,7 @@ pub const ConnectionPool = struct {
     ///
     /// Threadsafe.
     pub fn deinit(pool: *ConnectionPool, io: Io) void {
-        pool.mutex.lock();
+        pool.mutex.lockUncancelable(io);
 
         var next = pool.free.first;
         while (next) |node| {
@@ -1308,9 +1311,11 @@ pub fn deinit(client: *Client) void {
 /// Uses `arena` for a few small allocations that must outlive the client, or
 /// at least until those fields are set to different values.
 pub fn initDefaultProxies(client: *Client, arena: Allocator, environ_map: *std.process.Environ.Map) !void {
+    const io = client.io;
+
     // Prevent any new connections from being created.
-    client.connection_pool.mutex.lock();
-    defer client.connection_pool.mutex.unlock();
+    client.connection_pool.mutex.lockUncancelable(io);
+    defer client.connection_pool.mutex.unlock(io);
 
     assert(client.connection_pool.used.first == null); // There are active requests.
 
@@ -1437,7 +1442,7 @@ pub fn connectTcpOptions(client: *Client, options: ConnectTcpOptions) ConnectTcp
     const proxied_host = options.proxied_host orelse host;
     const proxied_port = options.proxied_port orelse port;
 
-    if (client.connection_pool.findConnection(.{
+    if (client.connection_pool.findConnection(io, .{
         .host = proxied_host,
         .port = proxied_port,
         .protocol = protocol,
@@ -1455,12 +1460,12 @@ pub fn connectTcpOptions(client: *Client, options: ConnectTcpOptions) ConnectTcp
                 error.Canceled => |e| return e,
                 else => return error.TlsInitializationFailed,
             };
-            client.connection_pool.addUsed(&tc.connection);
+            client.connection_pool.addUsed(io, &tc.connection);
             return &tc.connection;
         },
         .plain => {
             const pc = try Connection.Plain.create(client, proxied_host, proxied_port, stream);
-            client.connection_pool.addUsed(&pc.connection);
+            client.connection_pool.addUsed(io, &pc.connection);
             return &pc.connection;
         },
     }
@@ -1474,7 +1479,7 @@ pub const ConnectUnixError = Allocator.Error || std.posix.SocketError || error{N
 pub fn connectUnix(client: *Client, path: []const u8) ConnectUnixError!*Connection {
     const io = client.io;
 
-    if (client.connection_pool.findConnection(.{
+    if (client.connection_pool.findConnection(io, .{
         .host = path,
         .port = 0,
         .protocol = .plain,
@@ -1516,7 +1521,7 @@ pub fn connectProxied(
     const io = client.io;
     if (!proxy.supports_connect) return error.TunnelNotSupported;
 
-    if (client.connection_pool.findConnection(.{
+    if (client.connection_pool.findConnection(io, .{
         .host = proxied_host,
         .port = proxied_port,
         .protocol = proxy.protocol,
@@ -1691,8 +1696,8 @@ pub fn request(
     if (protocol == .tls) {
         if (disable_tls) unreachable;
         {
-            client.ca_bundle_mutex.lock();
-            defer client.ca_bundle_mutex.unlock();
+            client.ca_bundle_mutex.lockUncancelable(io);
+            defer client.ca_bundle_mutex.unlock(io);
 
             if (client.now == null) {
                 const now = try Io.Clock.real.now(io);
diff --git a/lib/std/once.zig b/lib/std/once.zig
@@ -1,71 +0,0 @@
-const std = @import("std.zig");
-const builtin = @import("builtin");
-const testing = std.testing;
-
-pub fn once(comptime f: fn () void) Once(f) {
-    return Once(f){};
-}
-
-/// An object that executes the function `f` just once.
-/// It is undefined behavior if `f` re-enters the same Once instance.
-pub fn Once(comptime f: fn () void) type {
-    return struct {
-        done: bool = false,
-        mutex: std.Thread.Mutex = std.Thread.Mutex{},
-
-        /// Call the function `f`.
-        /// If `call` is invoked multiple times `f` will be executed only the
-        /// first time.
-        /// The invocations are thread-safe.
-        pub fn call(self: *@This()) void {
-            if (@atomicLoad(bool, &self.done, .acquire))
-                return;
-
-            return self.callSlow();
-        }
-
-        fn callSlow(self: *@This()) void {
-            @branchHint(.cold);
-
-            self.mutex.lock();
-            defer self.mutex.unlock();
-
-            // The first thread to acquire the mutex gets to run the initializer
-            if (!self.done) {
-                f();
-                @atomicStore(bool, &self.done, true, .release);
-            }
-        }
-    };
-}
-
-var global_number: i32 = 0;
-var global_once = once(incr);
-
-fn incr() void {
-    global_number += 1;
-}
-
-test "Once executes its function just once" {
-    if (builtin.single_threaded) {
-        global_once.call();
-        global_once.call();
-    } else {
-        var threads: [10]std.Thread = undefined;
-        var thread_count: usize = 0;
-        defer for (threads[0..thread_count]) |handle| handle.join();
-
-        for (&threads) |*handle| {
-            handle.* = try std.Thread.spawn(.{}, struct {
-                fn thread_fn(x: u8) void {
-                    _ = x;
-                    global_once.call();
-                    if (global_number != 1) @panic("memory ordering bug");
-                }
-            }.thread_fn, .{0});
-            thread_count += 1;
-        }
-    }
-
-    try testing.expectEqual(@as(i32, 1), global_number);
-}
diff --git a/lib/std/std.zig b/lib/std/std.zig
@@ -86,7 +86,6 @@ pub const math = @import("math.zig");
 pub const mem = @import("mem.zig");
 pub const meta = @import("meta.zig");
 pub const os = @import("os.zig");
-pub const once = @import("once.zig").once;
 pub const pdb = @import("pdb.zig");
 pub const pie = @import("pie.zig");
 pub const posix = @import("posix.zig");

	zig fork of https://codeberg.org/ziglang/zig
	Log \| Files \| Refs \| README \| LICENSE

M	CMakeLists.txt	\|	2	--
M	lib/compiler/build_runner.zig	\|	19	+++++++++++--------
M	lib/compiler_rt/emutls.zig	\|	9	+++++++--
M	lib/fuzzer.zig	\|	2	+-
M	lib/std/Io.zig	\|	15	++++++++++++++-
A	lib/std/Io/RwLock.zig	\|	238	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	lib/std/Io/Semaphore.zig	\|	65	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	lib/std/Io/Threaded.zig	\|	170	+++++++++++++++++++++++++++++++++++++++++--------------------------------------
M	lib/std/Thread.zig	\|	14	+++-----------
D	lib/std/Thread/Condition.zig	\|	683	-------------------------------------------------------------------------------
D	lib/std/Thread/Futex.zig	\|	1063	-------------------------------------------------------------------------------
D	lib/std/Thread/Mutex.zig	\|	367	-------------------------------------------------------------------------------
M	lib/std/Thread/Mutex/Recursive.zig	\|	12	++++++------
D	lib/std/Thread/RwLock.zig	\|	386	-------------------------------------------------------------------------------
D	lib/std/Thread/Semaphore.zig	\|	111	-------------------------------------------------------------------------------
M	lib/std/atomic.zig	\|	25	+++++++++++++++++++++----
M	lib/std/debug.zig	\|	9	++++++---
M	lib/std/debug/Coverage.zig	\|	20	+++++++++++---------
M	lib/std/debug/Info.zig	\|	4	++--
M	lib/std/debug/SelfInfo/Elf.zig	\|	48	++++++++++++++++++++++++------------------------
M	lib/std/debug/SelfInfo/MachO.zig	\|	32	++++++++++++++++----------------
M	lib/std/debug/SelfInfo/Windows.zig	\|	20	++++++++++----------
M	lib/std/heap/SmpAllocator.zig	\|	2	+-
M	lib/std/heap/ThreadSafeAllocator.zig	\|	35	+++++++++++++++++++++--------------
M	lib/std/heap/debug_allocator.zig	\|	51	++++++++++-----------------------------------------
M	lib/std/heap/sbrk_allocator.zig	\|	18	++++++++++--------
M	lib/std/http/Client.zig	\|	67	++++++++++++++++++++++++++++++++++++-------------------------------
D	lib/std/once.zig	\|	71	-----------------------------------------------------------------------
M	lib/std/std.zig	\|	1	-