commit 6f46570958af8ae27308eb4a9470e05f33aaa522 (tree)
parent 181ac08459f8d4001c504330ee66037135e56908
Author: Andrew Kelley <andrew@ziglang.org>
Date: Mon, 8 Dec 2025 15:23:18 -0800
link.MachO: update parallel hasher to std.Io
Diffstat:
8 files changed, 100 insertions(+), 81 deletions(-)
diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig
@@ -800,7 +800,7 @@ pub const Manifest = struct {
}
var actual_digest: BinDigest = undefined;
- hashFile(this_file, &actual_digest) catch |err| {
+ hashFile(io, this_file, &actual_digest) catch |err| {
self.diagnostic = .{ .file_read = .{
.file_index = idx,
.err = err,
@@ -908,9 +908,11 @@ pub const Manifest = struct {
}
}
- fn populateFileHashHandle(self: *Manifest, ch_file: *File, handle: Io.File) !void {
+ fn populateFileHashHandle(self: *Manifest, ch_file: *File, io_file: Io.File) !void {
const io = self.cache.io;
- const actual_stat = try handle.stat(io);
+ const gpa = self.cache.gpa;
+
+ const actual_stat = try io_file.stat(io);
ch_file.stat = .{
.size = actual_stat.size,
.mtime = actual_stat.mtime,
@@ -924,19 +926,17 @@ pub const Manifest = struct {
}
if (ch_file.max_file_size) |max_file_size| {
- if (ch_file.stat.size > max_file_size) {
- return error.FileTooBig;
- }
+ if (ch_file.stat.size > max_file_size) return error.FileTooBig;
- const contents = try self.cache.gpa.alloc(u8, @as(usize, @intCast(ch_file.stat.size)));
- errdefer self.cache.gpa.free(contents);
+ // Hash while reading from disk, to keep the contents in the cpu
+ // cache while doing hashing.
+ const contents = try gpa.alloc(u8, @intCast(ch_file.stat.size));
+ errdefer gpa.free(contents);
- // Hash while reading from disk, to keep the contents in the cpu cache while
- // doing hashing.
var hasher = hasher_init;
var off: usize = 0;
while (true) {
- const bytes_read = try handle.pread(contents[off..], off);
+ const bytes_read = try io_file.readPositional(io, &.{contents[off..]}, off);
if (bytes_read == 0) break;
hasher.update(contents[off..][0..bytes_read]);
off += bytes_read;
@@ -945,7 +945,7 @@ pub const Manifest = struct {
ch_file.contents = contents;
} else {
- try hashFile(handle, &ch_file.bin_digest);
+ try hashFile(io, io_file, &ch_file.bin_digest);
}
self.hash.hasher.update(&ch_file.bin_digest);
@@ -1169,13 +1169,11 @@ pub const Manifest = struct {
fn downgradeToSharedLock(self: *Manifest) !void {
if (!self.have_exclusive_lock) return;
+ const io = self.cache.io;
- // WASI does not currently support flock, so we bypass it here.
- // TODO: If/when flock is supported on WASI, this check should be removed.
- // See https://github.com/WebAssembly/wasi-filesystem/issues/2
- if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) {
+ if (std.process.can_spawn or !builtin.single_threaded) {
const manifest_file = self.manifest_file.?;
- try manifest_file.downgradeLock();
+ try manifest_file.downgradeLock(io);
}
self.have_exclusive_lock = false;
@@ -1184,16 +1182,14 @@ pub const Manifest = struct {
fn upgradeToExclusiveLock(self: *Manifest) error{CacheCheckFailed}!bool {
if (self.have_exclusive_lock) return false;
assert(self.manifest_file != null);
+ const io = self.cache.io;
- // WASI does not currently support flock, so we bypass it here.
- // TODO: If/when flock is supported on WASI, this check should be removed.
- // See https://github.com/WebAssembly/wasi-filesystem/issues/2
- if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) {
+ if (std.process.can_spawn or !builtin.single_threaded) {
const manifest_file = self.manifest_file.?;
// Here we intentionally have a period where the lock is released, in case there are
// other processes holding a shared lock.
- manifest_file.unlock();
- manifest_file.lock(.exclusive) catch |err| {
+ manifest_file.unlock(io);
+ manifest_file.lock(io, .exclusive) catch |err| {
self.diagnostic = .{ .manifest_lock = err };
return error.CacheCheckFailed;
};
@@ -1206,12 +1202,8 @@ pub const Manifest = struct {
/// The `Manifest` remains safe to deinit.
/// Don't forget to call `writeManifest` before this!
pub fn toOwnedLock(self: *Manifest) Lock {
- const lock: Lock = .{
- .manifest_file = self.manifest_file.?,
- };
-
- self.manifest_file = null;
- return lock;
+ defer self.manifest_file = null;
+ return .{ .manifest_file = self.manifest_file.? };
}
/// Releases the manifest file and frees any memory the Manifest was using.
@@ -1223,7 +1215,7 @@ pub const Manifest = struct {
if (self.manifest_file) |file| {
if (builtin.os.tag == .windows) {
// See Lock.release for why this is required on Windows
- file.unlock();
+ file.unlock(io);
}
file.close(io);
@@ -1308,15 +1300,15 @@ pub fn writeSmallFile(dir: Io.Dir, sub_path: []const u8, data: []const u8) !void
}
}
-fn hashFile(file: Io.File, bin_digest: *[Hasher.mac_length]u8) Io.File.PReadError!void {
- var buf: [1024]u8 = undefined;
+fn hashFile(io: Io, file: Io.File, bin_digest: *[Hasher.mac_length]u8) Io.File.ReadPositionalError!void {
+ var buffer: [2048]u8 = undefined;
var hasher = hasher_init;
- var off: u64 = 0;
+ var offset: u64 = 0;
while (true) {
- const bytes_read = try file.pread(&buf, off);
- if (bytes_read == 0) break;
- hasher.update(buf[0..bytes_read]);
- off += bytes_read;
+ const n = try file.readPositional(io, &.{&buffer}, offset);
+ if (n == 0) break;
+ hasher.update(buffer[0..n]);
+ offset += n;
}
hasher.final(bin_digest);
}
diff --git a/lib/std/Build/WebServer.zig b/lib/std/Build/WebServer.zig
@@ -218,9 +218,9 @@ pub fn finishBuild(ws: *WebServer, opts: struct {
else => {},
}
if (@bitSizeOf(usize) != 64) {
- // Current implementation depends on posix.mmap()'s second parameter, `length: usize`,
- // being compatible with `std.fs.getEndPos() u64`'s return value. This is not the case
- // on 32-bit platforms.
+ // Current implementation depends on posix.mmap()'s second
+ // parameter, `length: usize`, being compatible with file system's
+ // u64 return value. This is not the case on 32-bit platforms.
// Affects or affected by issues #5185, #22523, and #22464.
std.process.fatal("--fuzz not yet implemented on {d}-bit platforms", .{@bitSizeOf(usize)});
}
diff --git a/lib/std/Io.zig b/lib/std/Io.zig
@@ -692,9 +692,9 @@ pub const VTable = struct {
fileWriteFileStreaming: *const fn (?*anyopaque, File, header: []const u8, *Io.File.Reader, Io.Limit) File.Writer.WriteFileError!usize,
fileWriteFilePositional: *const fn (?*anyopaque, File, header: []const u8, *Io.File.Reader, Io.Limit, offset: u64) File.WriteFilePositionalError!usize,
/// Returns 0 on end of stream.
- fileReadStreaming: *const fn (?*anyopaque, File, data: [][]u8) File.Reader.Error!usize,
+ fileReadStreaming: *const fn (?*anyopaque, File, data: []const []u8) File.Reader.Error!usize,
/// Returns 0 on end of stream.
- fileReadPositional: *const fn (?*anyopaque, File, data: [][]u8, offset: u64) File.ReadPositionalError!usize,
+ fileReadPositional: *const fn (?*anyopaque, File, data: []const []u8, offset: u64) File.ReadPositionalError!usize,
fileSeekBy: *const fn (?*anyopaque, File, relative_offset: i64) File.SeekError!void,
fileSeekTo: *const fn (?*anyopaque, File, absolute_offset: u64) File.SeekError!void,
fileSync: *const fn (?*anyopaque, File) File.SyncError!void,
diff --git a/lib/std/Io/File.zig b/lib/std/Io/File.zig
@@ -466,13 +466,21 @@ pub fn setTimestampsNow(file: File, io: Io) SetTimestampsError!void {
pub const ReadPositionalError = Reader.Error || error{Unseekable};
-pub fn readPositional(file: File, io: Io, buffer: [][]u8, offset: u64) ReadPositionalError!usize {
+/// Returns 0 on end of stream.
+///
+/// See also:
+/// * `reader`
+pub fn readPositional(file: File, io: Io, buffer: []const []u8, offset: u64) ReadPositionalError!usize {
return io.vtable.fileReadPositional(io.userdata, file, buffer, offset);
}
pub const WritePositionalError = Writer.Error || error{Unseekable};
-pub fn writePositional(file: File, io: Io, buffer: [][]const u8, offset: u64) WritePositionalError!usize {
+/// Returns 0 on end of stream.
+///
+/// See also:
+/// * `writer`
+pub fn writePositional(file: File, io: Io, buffer: []const []const u8, offset: u64) WritePositionalError!usize {
return io.vtable.fileWritePositional(io.userdata, file, buffer, offset);
}
@@ -501,13 +509,35 @@ pub const WriteFilePositionalError = Writer.WriteFileError || error{Unseekable};
///
/// Positional is more threadsafe, since the global seek position is not
/// affected.
+///
+/// See also:
+/// * `readerStreaming`
pub fn reader(file: File, io: Io, buffer: []u8) Reader {
return .init(file, io, buffer);
}
+/// Equivalent to creating a positional reader and reading multiple times to fill `buffer`.
+///
+/// Returns number of bytes read into `buffer`. If less than `buffer.len`, end of file occurred.
+///
+/// See also:
+/// * `reader`
+pub fn readPositionalAll(file: File, io: Io, buffer: []u8, offset: u64) ReadPositionalError!usize {
+ var index: usize = 0;
+ while (index != buffer.len) {
+ const amt = try file.readPositional(io, &.{buffer[index..]}, offset + index);
+ if (amt == 0) break;
+ index += amt;
+ }
+ return index;
+}
+
/// Positional is more threadsafe, since the global seek position is not
/// affected, but when such syscalls are not available, preemptively
/// initializing in streaming mode skips a failed syscall.
+///
+/// See also:
+/// * `reader`
pub fn readerStreaming(file: File, io: Io, buffer: []u8) Reader {
return .initStreaming(file, io, buffer);
}
diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig
@@ -1455,7 +1455,7 @@ test "writev, readv" {
try writer.interface.writeVecAll(&write_vecs);
try writer.interface.flush();
- try testing.expectEqual(@as(u64, line1.len + line2.len), try src_file.getEndPos());
+ try testing.expectEqual(@as(u64, line1.len + line2.len), try src_file.length(io));
var reader = writer.moveToReader(io);
try reader.seekTo(0);
@@ -1486,7 +1486,7 @@ test "pwritev, preadv" {
try writer.seekTo(16);
try writer.interface.writeVecAll(&lines);
try writer.interface.flush();
- try testing.expectEqual(@as(u64, 16 + line1.len + line2.len), try src_file.getEndPos());
+ try testing.expectEqual(@as(u64, 16 + line1.len + line2.len), try src_file.length(io));
var reader = writer.moveToReader(io);
try reader.seekTo(16);
@@ -1511,13 +1511,13 @@ test "setEndPos" {
const f = try tmp.dir.openFile(io, file_name, .{ .mode = .read_write });
defer f.close(io);
- const initial_size = try f.getEndPos();
+ const initial_size = try f.length(io);
var buffer: [32]u8 = undefined;
var reader = f.reader(io, &.{});
{
try f.setEndPos(initial_size);
- try testing.expectEqual(initial_size, try f.getEndPos());
+ try testing.expectEqual(initial_size, try f.length(io));
try reader.seekTo(0);
try testing.expectEqual(initial_size, try reader.interface.readSliceShort(&buffer));
try testing.expectEqualStrings("ninebytes", buffer[0..@intCast(initial_size)]);
@@ -1526,7 +1526,7 @@ test "setEndPos" {
{
const larger = initial_size + 4;
try f.setEndPos(larger);
- try testing.expectEqual(larger, try f.getEndPos());
+ try testing.expectEqual(larger, try f.length(io));
try reader.seekTo(0);
try testing.expectEqual(larger, try reader.interface.readSliceShort(&buffer));
try testing.expectEqualStrings("ninebytes\x00\x00\x00\x00", buffer[0..@intCast(larger)]);
@@ -1535,14 +1535,14 @@ test "setEndPos" {
{
const smaller = initial_size - 5;
try f.setEndPos(smaller);
- try testing.expectEqual(smaller, try f.getEndPos());
+ try testing.expectEqual(smaller, try f.length(io));
try reader.seekTo(0);
try testing.expectEqual(smaller, try reader.interface.readSliceShort(&buffer));
try testing.expectEqualStrings("nine", buffer[0..@intCast(smaller)]);
}
try f.setEndPos(0);
- try testing.expectEqual(0, try f.getEndPos());
+ try testing.expectEqual(0, try f.length(io));
try reader.seekTo(0);
try testing.expectEqual(0, try reader.interface.readSliceShort(&buffer));
}
diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig
@@ -12,7 +12,7 @@ const Sha256 = std.crypto.hash.sha2.Sha256;
const Allocator = std.mem.Allocator;
const trace = @import("../../tracy.zig").trace;
-const Hasher = @import("hasher.zig").ParallelHasher;
+const ParallelHasher = @import("hasher.zig").ParallelHasher;
const MachO = @import("../MachO.zig");
const hash_size = Sha256.digest_length;
@@ -268,7 +268,9 @@ pub fn writeAdhocSignature(
const tracy = trace(@src());
defer tracy.end();
- const allocator = macho_file.base.comp.gpa;
+ const comp = macho_file.base.comp;
+ const gpa = comp.gpa;
+ const io = comp.io;
var header: macho.SuperBlob = .{
.magic = macho.CSMAGIC_EMBEDDED_SIGNATURE,
@@ -276,7 +278,7 @@ pub fn writeAdhocSignature(
.count = 0,
};
- var blobs = std.array_list.Managed(Blob).init(allocator);
+ var blobs = std.array_list.Managed(Blob).init(gpa);
defer blobs.deinit();
self.code_directory.inner.execSegBase = opts.exec_seg_base;
@@ -286,13 +288,12 @@ pub fn writeAdhocSignature(
const total_pages = @as(u32, @intCast(mem.alignForward(usize, opts.file_size, self.page_size) / self.page_size));
- try self.code_directory.code_slots.ensureTotalCapacityPrecise(allocator, total_pages);
+ try self.code_directory.code_slots.ensureTotalCapacityPrecise(gpa, total_pages);
self.code_directory.code_slots.items.len = total_pages;
self.code_directory.inner.nCodeSlots = total_pages;
// Calculate hash for each page (in file) and write it to the buffer
- var hasher = Hasher(Sha256){ .allocator = allocator, .io = macho_file.base.comp.io };
- try hasher.hash(opts.file, self.code_directory.code_slots.items, .{
+ try ParallelHasher(Sha256).hash(gpa, io, opts.file, self.code_directory.code_slots.items, .{
.chunk_size = self.page_size,
.max_file_size = opts.file_size,
});
@@ -304,7 +305,7 @@ pub fn writeAdhocSignature(
var hash: [hash_size]u8 = undefined;
if (self.requirements) |*req| {
- var a: std.Io.Writer.Allocating = .init(allocator);
+ var a: std.Io.Writer.Allocating = .init(gpa);
defer a.deinit();
try req.write(&a.writer);
Sha256.hash(a.written(), &hash, .{});
@@ -316,7 +317,7 @@ pub fn writeAdhocSignature(
}
if (self.entitlements) |*ents| {
- var a: std.Io.Writer.Allocating = .init(allocator);
+ var a: std.Io.Writer.Allocating = .init(gpa);
defer a.deinit();
try ents.write(&a.writer);
Sha256.hash(a.written(), &hash, .{});
diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig
@@ -1,5 +1,6 @@
const std = @import("std");
const Io = std.Io;
+const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const trace = @import("../../tracy.zig").trace;
@@ -8,20 +9,15 @@ pub fn ParallelHasher(comptime Hasher: type) type {
const hash_size = Hasher.digest_length;
return struct {
- allocator: Allocator,
- io: std.Io,
-
- pub fn hash(self: Self, file: Io.File, out: [][hash_size]u8, opts: struct {
+ pub fn hash(self: Self, io: Io, file: Io.File, out: [][hash_size]u8, opts: struct {
chunk_size: u64 = 0x4000,
max_file_size: ?u64 = null,
}) !void {
const tracy = trace(@src());
defer tracy.end();
- const io = self.io;
-
const file_size = blk: {
- const file_size = opts.max_file_size orelse try file.getEndPos();
+ const file_size = opts.max_file_size orelse try file.length(io);
break :blk std.math.cast(usize, file_size) orelse return error.Overflow;
};
const chunk_size = std.math.cast(usize, opts.chunk_size) orelse return error.Overflow;
@@ -29,12 +25,12 @@ pub fn ParallelHasher(comptime Hasher: type) type {
const buffer = try self.allocator.alloc(u8, chunk_size * out.len);
defer self.allocator.free(buffer);
- const results = try self.allocator.alloc(Io.File.PReadError!usize, out.len);
+ const results = try self.allocator.alloc(Io.File.ReadPositionalError!usize, out.len);
defer self.allocator.free(results);
{
- var group: std.Io.Group = .init;
- errdefer group.cancel(io);
+ var group: Io.Group = .init;
+ defer group.cancel(io);
for (out, results, 0..) |*out_buf, *result, i| {
const fstart = i * chunk_size;
@@ -42,7 +38,7 @@ pub fn ParallelHasher(comptime Hasher: type) type {
file_size - fstart
else
chunk_size;
- group.async(io, worker, .{
+ group.async(worker, .{
file,
fstart,
buffer[fstart..][0..fsize],
@@ -61,11 +57,9 @@ pub fn ParallelHasher(comptime Hasher: type) type {
fstart: usize,
buffer: []u8,
out: *[hash_size]u8,
- err: *Io.File.PReadError!usize,
+ err: *Io.File.ReadPositionalError!usize,
) void {
- const tracy = trace(@src());
- defer tracy.end();
- err.* = file.preadAll(buffer, fstart);
+ err.* = file.readPositionalAll(buffer, fstart);
Hasher.hash(buffer, out, .{});
}
diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig
@@ -4,7 +4,7 @@ const Md5 = std.crypto.hash.Md5;
const trace = @import("../../tracy.zig").trace;
const Compilation = @import("../../Compilation.zig");
-const Hasher = @import("hasher.zig").ParallelHasher;
+const ParallelHasher = @import("hasher.zig").ParallelHasher;
/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce
/// the final digest.
@@ -16,21 +16,23 @@ pub fn calcUuid(comp: *const Compilation, file: Io.File, file_size: u64, out: *[
const tracy = trace(@src());
defer tracy.end();
+ const gpa = comp.gpa;
+ const io = comp.io;
+
const chunk_size: usize = 1024 * 1024;
const num_chunks: usize = std.math.cast(usize, @divTrunc(file_size, chunk_size)) orelse return error.Overflow;
const actual_num_chunks = if (@rem(file_size, chunk_size) > 0) num_chunks + 1 else num_chunks;
- const hashes = try comp.gpa.alloc([Md5.digest_length]u8, actual_num_chunks);
- defer comp.gpa.free(hashes);
+ const hashes = try gpa.alloc([Md5.digest_length]u8, actual_num_chunks);
+ defer gpa.free(hashes);
- var hasher = Hasher(Md5){ .allocator = comp.gpa, .io = comp.io };
- try hasher.hash(file, hashes, .{
+ try ParallelHasher(Md5).hash(gpa, io, file, hashes, .{
.chunk_size = chunk_size,
.max_file_size = file_size,
});
- const final_buffer = try comp.gpa.alloc(u8, actual_num_chunks * Md5.digest_length);
- defer comp.gpa.free(final_buffer);
+ const final_buffer = try gpa.alloc(u8, actual_num_chunks * Md5.digest_length);
+ defer gpa.free(final_buffer);
for (hashes, 0..) |hash, i| {
@memcpy(final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);