link.MachO: update parallel hasher to std.Io - zig - fork of https://codeberg.org/ziglang/zig

commit 6f46570958af8ae27308eb4a9470e05f33aaa522 (tree)
parent 181ac08459f8d4001c504330ee66037135e56908
Author: Andrew Kelley <andrew@ziglang.org>
Date:   Mon,  8 Dec 2025 15:23:18 -0800

link.MachO: update parallel hasher to std.Io

Diffstat:
M lib/std/Build/Cache.zig  | 66 +++++++++++++++++++++++++++++-------------------------------------
M lib/std/Build/WebServer.zig  | 6 +++---
M lib/std/Io.zig  | 4 ++--
M lib/std/Io/File.zig  | 34 ++++++++++++++++++++++++++++++++--
M lib/std/fs/test.zig  | 14 +++++++-------
M src/link/MachO/CodeSignature.zig  | 17 +++++++++--------
M src/link/MachO/hasher.zig  | 24 +++++++++---------------
M src/link/MachO/uuid.zig  | 16 +++++++++-------

8 files changed, 100 insertions(+), 81 deletions(-)
diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig
@@ -800,7 +800,7 @@ pub const Manifest = struct {
                 }
 
                 var actual_digest: BinDigest = undefined;
-                hashFile(this_file, &actual_digest) catch |err| {
+                hashFile(io, this_file, &actual_digest) catch |err| {
                     self.diagnostic = .{ .file_read = .{
                         .file_index = idx,
                         .err = err,
@@ -908,9 +908,11 @@ pub const Manifest = struct {
         }
     }
 
-    fn populateFileHashHandle(self: *Manifest, ch_file: *File, handle: Io.File) !void {
+    fn populateFileHashHandle(self: *Manifest, ch_file: *File, io_file: Io.File) !void {
         const io = self.cache.io;
-        const actual_stat = try handle.stat(io);
+        const gpa = self.cache.gpa;
+
+        const actual_stat = try io_file.stat(io);
         ch_file.stat = .{
             .size = actual_stat.size,
             .mtime = actual_stat.mtime,
@@ -924,19 +926,17 @@ pub const Manifest = struct {
         }
 
         if (ch_file.max_file_size) |max_file_size| {
-            if (ch_file.stat.size > max_file_size) {
-                return error.FileTooBig;
-            }
+            if (ch_file.stat.size > max_file_size) return error.FileTooBig;
 
-            const contents = try self.cache.gpa.alloc(u8, @as(usize, @intCast(ch_file.stat.size)));
-            errdefer self.cache.gpa.free(contents);
+            // Hash while reading from disk, to keep the contents in the cpu
+            // cache while doing hashing.
+            const contents = try gpa.alloc(u8, @intCast(ch_file.stat.size));
+            errdefer gpa.free(contents);
 
-            // Hash while reading from disk, to keep the contents in the cpu cache while
-            // doing hashing.
             var hasher = hasher_init;
             var off: usize = 0;
             while (true) {
-                const bytes_read = try handle.pread(contents[off..], off);
+                const bytes_read = try io_file.readPositional(io, &.{contents[off..]}, off);
                 if (bytes_read == 0) break;
                 hasher.update(contents[off..][0..bytes_read]);
                 off += bytes_read;
@@ -945,7 +945,7 @@ pub const Manifest = struct {
 
             ch_file.contents = contents;
         } else {
-            try hashFile(handle, &ch_file.bin_digest);
+            try hashFile(io, io_file, &ch_file.bin_digest);
         }
 
         self.hash.hasher.update(&ch_file.bin_digest);
@@ -1169,13 +1169,11 @@ pub const Manifest = struct {
 
     fn downgradeToSharedLock(self: *Manifest) !void {
         if (!self.have_exclusive_lock) return;
+        const io = self.cache.io;
 
-        // WASI does not currently support flock, so we bypass it here.
-        // TODO: If/when flock is supported on WASI, this check should be removed.
-        //       See https://github.com/WebAssembly/wasi-filesystem/issues/2
-        if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) {
+        if (std.process.can_spawn or !builtin.single_threaded) {
             const manifest_file = self.manifest_file.?;
-            try manifest_file.downgradeLock();
+            try manifest_file.downgradeLock(io);
         }
 
         self.have_exclusive_lock = false;
@@ -1184,16 +1182,14 @@ pub const Manifest = struct {
     fn upgradeToExclusiveLock(self: *Manifest) error{CacheCheckFailed}!bool {
         if (self.have_exclusive_lock) return false;
         assert(self.manifest_file != null);
+        const io = self.cache.io;
 
-        // WASI does not currently support flock, so we bypass it here.
-        // TODO: If/when flock is supported on WASI, this check should be removed.
-        //       See https://github.com/WebAssembly/wasi-filesystem/issues/2
-        if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) {
+        if (std.process.can_spawn or !builtin.single_threaded) {
             const manifest_file = self.manifest_file.?;
             // Here we intentionally have a period where the lock is released, in case there are
             // other processes holding a shared lock.
-            manifest_file.unlock();
-            manifest_file.lock(.exclusive) catch |err| {
+            manifest_file.unlock(io);
+            manifest_file.lock(io, .exclusive) catch |err| {
                 self.diagnostic = .{ .manifest_lock = err };
                 return error.CacheCheckFailed;
             };
@@ -1206,12 +1202,8 @@ pub const Manifest = struct {
     /// The `Manifest` remains safe to deinit.
     /// Don't forget to call `writeManifest` before this!
     pub fn toOwnedLock(self: *Manifest) Lock {
-        const lock: Lock = .{
-            .manifest_file = self.manifest_file.?,
-        };
-
-        self.manifest_file = null;
-        return lock;
+        defer self.manifest_file = null;
+        return .{ .manifest_file = self.manifest_file.? };
     }
 
     /// Releases the manifest file and frees any memory the Manifest was using.
@@ -1223,7 +1215,7 @@ pub const Manifest = struct {
         if (self.manifest_file) |file| {
             if (builtin.os.tag == .windows) {
                 // See Lock.release for why this is required on Windows
-                file.unlock();
+                file.unlock(io);
             }
 
             file.close(io);
@@ -1308,15 +1300,15 @@ pub fn writeSmallFile(dir: Io.Dir, sub_path: []const u8, data: []const u8) !void
     }
 }
 
-fn hashFile(file: Io.File, bin_digest: *[Hasher.mac_length]u8) Io.File.PReadError!void {
-    var buf: [1024]u8 = undefined;
+fn hashFile(io: Io, file: Io.File, bin_digest: *[Hasher.mac_length]u8) Io.File.ReadPositionalError!void {
+    var buffer: [2048]u8 = undefined;
     var hasher = hasher_init;
-    var off: u64 = 0;
+    var offset: u64 = 0;
     while (true) {
-        const bytes_read = try file.pread(&buf, off);
-        if (bytes_read == 0) break;
-        hasher.update(buf[0..bytes_read]);
-        off += bytes_read;
+        const n = try file.readPositional(io, &.{&buffer}, offset);
+        if (n == 0) break;
+        hasher.update(buffer[0..n]);
+        offset += n;
     }
     hasher.final(bin_digest);
 }
diff --git a/lib/std/Build/WebServer.zig b/lib/std/Build/WebServer.zig
@@ -218,9 +218,9 @@ pub fn finishBuild(ws: *WebServer, opts: struct {
             else => {},
         }
         if (@bitSizeOf(usize) != 64) {
-            // Current implementation depends on posix.mmap()'s second parameter, `length: usize`,
-            // being compatible with `std.fs.getEndPos() u64`'s return value. This is not the case
-            // on 32-bit platforms.
+            // Current implementation depends on posix.mmap()'s second
+            // parameter, `length: usize`, being compatible with file system's
+            // u64 return value. This is not the case on 32-bit platforms.
             // Affects or affected by issues #5185, #22523, and #22464.
             std.process.fatal("--fuzz not yet implemented on {d}-bit platforms", .{@bitSizeOf(usize)});
         }
diff --git a/lib/std/Io.zig b/lib/std/Io.zig
@@ -692,9 +692,9 @@ pub const VTable = struct {
     fileWriteFileStreaming: *const fn (?*anyopaque, File, header: []const u8, *Io.File.Reader, Io.Limit) File.Writer.WriteFileError!usize,
     fileWriteFilePositional: *const fn (?*anyopaque, File, header: []const u8, *Io.File.Reader, Io.Limit, offset: u64) File.WriteFilePositionalError!usize,
     /// Returns 0 on end of stream.
-    fileReadStreaming: *const fn (?*anyopaque, File, data: [][]u8) File.Reader.Error!usize,
+    fileReadStreaming: *const fn (?*anyopaque, File, data: []const []u8) File.Reader.Error!usize,
     /// Returns 0 on end of stream.
-    fileReadPositional: *const fn (?*anyopaque, File, data: [][]u8, offset: u64) File.ReadPositionalError!usize,
+    fileReadPositional: *const fn (?*anyopaque, File, data: []const []u8, offset: u64) File.ReadPositionalError!usize,
     fileSeekBy: *const fn (?*anyopaque, File, relative_offset: i64) File.SeekError!void,
     fileSeekTo: *const fn (?*anyopaque, File, absolute_offset: u64) File.SeekError!void,
     fileSync: *const fn (?*anyopaque, File) File.SyncError!void,
diff --git a/lib/std/Io/File.zig b/lib/std/Io/File.zig
@@ -466,13 +466,21 @@ pub fn setTimestampsNow(file: File, io: Io) SetTimestampsError!void {
 
 pub const ReadPositionalError = Reader.Error || error{Unseekable};
 
-pub fn readPositional(file: File, io: Io, buffer: [][]u8, offset: u64) ReadPositionalError!usize {
+/// Returns 0 on end of stream.
+///
+/// See also:
+/// * `reader`
+pub fn readPositional(file: File, io: Io, buffer: []const []u8, offset: u64) ReadPositionalError!usize {
     return io.vtable.fileReadPositional(io.userdata, file, buffer, offset);
 }
 
 pub const WritePositionalError = Writer.Error || error{Unseekable};
 
-pub fn writePositional(file: File, io: Io, buffer: [][]const u8, offset: u64) WritePositionalError!usize {
+/// Returns 0 on end of stream.
+///
+/// See also:
+/// * `writer`
+pub fn writePositional(file: File, io: Io, buffer: []const []const u8, offset: u64) WritePositionalError!usize {
     return io.vtable.fileWritePositional(io.userdata, file, buffer, offset);
 }
 
@@ -501,13 +509,35 @@ pub const WriteFilePositionalError = Writer.WriteFileError || error{Unseekable};
 ///
 /// Positional is more threadsafe, since the global seek position is not
 /// affected.
+///
+/// See also:
+/// * `readerStreaming`
 pub fn reader(file: File, io: Io, buffer: []u8) Reader {
     return .init(file, io, buffer);
 }
 
+/// Equivalent to creating a positional reader and reading multiple times to fill `buffer`.
+///
+/// Returns number of bytes read into `buffer`. If less than `buffer.len`, end of file occurred.
+///
+/// See also:
+/// * `reader`
+pub fn readPositionalAll(file: File, io: Io, buffer: []u8, offset: u64) ReadPositionalError!usize {
+    var index: usize = 0;
+    while (index != buffer.len) {
+        const amt = try file.readPositional(io, &.{buffer[index..]}, offset + index);
+        if (amt == 0) break;
+        index += amt;
+    }
+    return index;
+}
+
 /// Positional is more threadsafe, since the global seek position is not
 /// affected, but when such syscalls are not available, preemptively
 /// initializing in streaming mode skips a failed syscall.
+///
+/// See also:
+/// * `reader`
 pub fn readerStreaming(file: File, io: Io, buffer: []u8) Reader {
     return .initStreaming(file, io, buffer);
 }
diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig
@@ -1455,7 +1455,7 @@ test "writev, readv" {
 
     try writer.interface.writeVecAll(&write_vecs);
     try writer.interface.flush();
-    try testing.expectEqual(@as(u64, line1.len + line2.len), try src_file.getEndPos());
+    try testing.expectEqual(@as(u64, line1.len + line2.len), try src_file.length(io));
 
     var reader = writer.moveToReader(io);
     try reader.seekTo(0);
@@ -1486,7 +1486,7 @@ test "pwritev, preadv" {
     try writer.seekTo(16);
     try writer.interface.writeVecAll(&lines);
     try writer.interface.flush();
-    try testing.expectEqual(@as(u64, 16 + line1.len + line2.len), try src_file.getEndPos());
+    try testing.expectEqual(@as(u64, 16 + line1.len + line2.len), try src_file.length(io));
 
     var reader = writer.moveToReader(io);
     try reader.seekTo(16);
@@ -1511,13 +1511,13 @@ test "setEndPos" {
     const f = try tmp.dir.openFile(io, file_name, .{ .mode = .read_write });
     defer f.close(io);
 
-    const initial_size = try f.getEndPos();
+    const initial_size = try f.length(io);
     var buffer: [32]u8 = undefined;
     var reader = f.reader(io, &.{});
 
     {
         try f.setEndPos(initial_size);
-        try testing.expectEqual(initial_size, try f.getEndPos());
+        try testing.expectEqual(initial_size, try f.length(io));
         try reader.seekTo(0);
         try testing.expectEqual(initial_size, try reader.interface.readSliceShort(&buffer));
         try testing.expectEqualStrings("ninebytes", buffer[0..@intCast(initial_size)]);
@@ -1526,7 +1526,7 @@ test "setEndPos" {
     {
         const larger = initial_size + 4;
         try f.setEndPos(larger);
-        try testing.expectEqual(larger, try f.getEndPos());
+        try testing.expectEqual(larger, try f.length(io));
         try reader.seekTo(0);
         try testing.expectEqual(larger, try reader.interface.readSliceShort(&buffer));
         try testing.expectEqualStrings("ninebytes\x00\x00\x00\x00", buffer[0..@intCast(larger)]);
@@ -1535,14 +1535,14 @@ test "setEndPos" {
     {
         const smaller = initial_size - 5;
         try f.setEndPos(smaller);
-        try testing.expectEqual(smaller, try f.getEndPos());
+        try testing.expectEqual(smaller, try f.length(io));
         try reader.seekTo(0);
         try testing.expectEqual(smaller, try reader.interface.readSliceShort(&buffer));
         try testing.expectEqualStrings("nine", buffer[0..@intCast(smaller)]);
     }
 
     try f.setEndPos(0);
-    try testing.expectEqual(0, try f.getEndPos());
+    try testing.expectEqual(0, try f.length(io));
     try reader.seekTo(0);
     try testing.expectEqual(0, try reader.interface.readSliceShort(&buffer));
 }
diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig
@@ -12,7 +12,7 @@ const Sha256 = std.crypto.hash.sha2.Sha256;
 const Allocator = std.mem.Allocator;
 
 const trace = @import("../../tracy.zig").trace;
-const Hasher = @import("hasher.zig").ParallelHasher;
+const ParallelHasher = @import("hasher.zig").ParallelHasher;
 const MachO = @import("../MachO.zig");
 
 const hash_size = Sha256.digest_length;
@@ -268,7 +268,9 @@ pub fn writeAdhocSignature(
     const tracy = trace(@src());
     defer tracy.end();
 
-    const allocator = macho_file.base.comp.gpa;
+    const comp = macho_file.base.comp;
+    const gpa = comp.gpa;
+    const io = comp.io;
 
     var header: macho.SuperBlob = .{
         .magic = macho.CSMAGIC_EMBEDDED_SIGNATURE,
@@ -276,7 +278,7 @@ pub fn writeAdhocSignature(
         .count = 0,
     };
 
-    var blobs = std.array_list.Managed(Blob).init(allocator);
+    var blobs = std.array_list.Managed(Blob).init(gpa);
     defer blobs.deinit();
 
     self.code_directory.inner.execSegBase = opts.exec_seg_base;
@@ -286,13 +288,12 @@ pub fn writeAdhocSignature(
 
     const total_pages = @as(u32, @intCast(mem.alignForward(usize, opts.file_size, self.page_size) / self.page_size));
 
-    try self.code_directory.code_slots.ensureTotalCapacityPrecise(allocator, total_pages);
+    try self.code_directory.code_slots.ensureTotalCapacityPrecise(gpa, total_pages);
     self.code_directory.code_slots.items.len = total_pages;
     self.code_directory.inner.nCodeSlots = total_pages;
 
     // Calculate hash for each page (in file) and write it to the buffer
-    var hasher = Hasher(Sha256){ .allocator = allocator, .io = macho_file.base.comp.io };
-    try hasher.hash(opts.file, self.code_directory.code_slots.items, .{
+    try ParallelHasher(Sha256).hash(gpa, io, opts.file, self.code_directory.code_slots.items, .{
         .chunk_size = self.page_size,
         .max_file_size = opts.file_size,
     });
@@ -304,7 +305,7 @@ pub fn writeAdhocSignature(
     var hash: [hash_size]u8 = undefined;
 
     if (self.requirements) |*req| {
-        var a: std.Io.Writer.Allocating = .init(allocator);
+        var a: std.Io.Writer.Allocating = .init(gpa);
         defer a.deinit();
         try req.write(&a.writer);
         Sha256.hash(a.written(), &hash, .{});
@@ -316,7 +317,7 @@ pub fn writeAdhocSignature(
     }
 
     if (self.entitlements) |*ents| {
-        var a: std.Io.Writer.Allocating = .init(allocator);
+        var a: std.Io.Writer.Allocating = .init(gpa);
         defer a.deinit();
         try ents.write(&a.writer);
         Sha256.hash(a.written(), &hash, .{});
diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig
@@ -1,5 +1,6 @@
 const std = @import("std");
 const Io = std.Io;
+const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 
 const trace = @import("../../tracy.zig").trace;
@@ -8,20 +9,15 @@ pub fn ParallelHasher(comptime Hasher: type) type {
     const hash_size = Hasher.digest_length;
 
     return struct {
-        allocator: Allocator,
-        io: std.Io,
-
-        pub fn hash(self: Self, file: Io.File, out: [][hash_size]u8, opts: struct {
+        pub fn hash(self: Self, io: Io, file: Io.File, out: [][hash_size]u8, opts: struct {
             chunk_size: u64 = 0x4000,
             max_file_size: ?u64 = null,
         }) !void {
             const tracy = trace(@src());
             defer tracy.end();
 
-            const io = self.io;
-
             const file_size = blk: {
-                const file_size = opts.max_file_size orelse try file.getEndPos();
+                const file_size = opts.max_file_size orelse try file.length(io);
                 break :blk std.math.cast(usize, file_size) orelse return error.Overflow;
             };
             const chunk_size = std.math.cast(usize, opts.chunk_size) orelse return error.Overflow;
@@ -29,12 +25,12 @@ pub fn ParallelHasher(comptime Hasher: type) type {
             const buffer = try self.allocator.alloc(u8, chunk_size * out.len);
             defer self.allocator.free(buffer);
 
-            const results = try self.allocator.alloc(Io.File.PReadError!usize, out.len);
+            const results = try self.allocator.alloc(Io.File.ReadPositionalError!usize, out.len);
             defer self.allocator.free(results);
 
             {
-                var group: std.Io.Group = .init;
-                errdefer group.cancel(io);
+                var group: Io.Group = .init;
+                defer group.cancel(io);
 
                 for (out, results, 0..) |*out_buf, *result, i| {
                     const fstart = i * chunk_size;
@@ -42,7 +38,7 @@ pub fn ParallelHasher(comptime Hasher: type) type {
                         file_size - fstart
                     else
                         chunk_size;
-                    group.async(io, worker, .{
+                    group.async(worker, .{
                         file,
                         fstart,
                         buffer[fstart..][0..fsize],
@@ -61,11 +57,9 @@ pub fn ParallelHasher(comptime Hasher: type) type {
             fstart: usize,
             buffer: []u8,
             out: *[hash_size]u8,
-            err: *Io.File.PReadError!usize,
+            err: *Io.File.ReadPositionalError!usize,
         ) void {
-            const tracy = trace(@src());
-            defer tracy.end();
-            err.* = file.preadAll(buffer, fstart);
+            err.* = file.readPositionalAll(buffer, fstart);
             Hasher.hash(buffer, out, .{});
         }
 
diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig
@@ -4,7 +4,7 @@ const Md5 = std.crypto.hash.Md5;
 
 const trace = @import("../../tracy.zig").trace;
 const Compilation = @import("../../Compilation.zig");
-const Hasher = @import("hasher.zig").ParallelHasher;
+const ParallelHasher = @import("hasher.zig").ParallelHasher;
 
 /// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce
 /// the final digest.
@@ -16,21 +16,23 @@ pub fn calcUuid(comp: *const Compilation, file: Io.File, file_size: u64, out: *[
     const tracy = trace(@src());
     defer tracy.end();
 
+    const gpa = comp.gpa;
+    const io = comp.io;
+
     const chunk_size: usize = 1024 * 1024;
     const num_chunks: usize = std.math.cast(usize, @divTrunc(file_size, chunk_size)) orelse return error.Overflow;
     const actual_num_chunks = if (@rem(file_size, chunk_size) > 0) num_chunks + 1 else num_chunks;
 
-    const hashes = try comp.gpa.alloc([Md5.digest_length]u8, actual_num_chunks);
-    defer comp.gpa.free(hashes);
+    const hashes = try gpa.alloc([Md5.digest_length]u8, actual_num_chunks);
+    defer gpa.free(hashes);
 
-    var hasher = Hasher(Md5){ .allocator = comp.gpa, .io = comp.io };
-    try hasher.hash(file, hashes, .{
+    try ParallelHasher(Md5).hash(gpa, io, file, hashes, .{
         .chunk_size = chunk_size,
         .max_file_size = file_size,
     });
 
-    const final_buffer = try comp.gpa.alloc(u8, actual_num_chunks * Md5.digest_length);
-    defer comp.gpa.free(final_buffer);
+    const final_buffer = try gpa.alloc(u8, actual_num_chunks * Md5.digest_length);
+    defer gpa.free(final_buffer);
 
     for (hashes, 0..) |hash, i| {
         @memcpy(final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);

	zig fork of https://codeberg.org/ziglang/zig
	Log \| Files \| Refs \| README \| LICENSE

M	lib/std/Build/Cache.zig	\|	66	+++++++++++++++++++++++++++++-------------------------------------
M	lib/std/Build/WebServer.zig	\|	6	+++---
M	lib/std/Io.zig	\|	4	++--
M	lib/std/Io/File.zig	\|	34	++++++++++++++++++++++++++++++++--
M	lib/std/fs/test.zig	\|	14	+++++++-------
M	src/link/MachO/CodeSignature.zig	\|	17	+++++++++--------
M	src/link/MachO/hasher.zig	\|	24	+++++++++---------------
M	src/link/MachO/uuid.zig	\|	16	+++++++++-------