zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 6f46570958af8ae27308eb4a9470e05f33aaa522 (tree)
parent 181ac08459f8d4001c504330ee66037135e56908
Author: Andrew Kelley <andrew@ziglang.org>
Date:   Mon,  8 Dec 2025 15:23:18 -0800

link.MachO: update parallel hasher to std.Io

Diffstat:
Mlib/std/Build/Cache.zig | 66+++++++++++++++++++++++++++++-------------------------------------
Mlib/std/Build/WebServer.zig | 6+++---
Mlib/std/Io.zig | 4++--
Mlib/std/Io/File.zig | 34++++++++++++++++++++++++++++++++--
Mlib/std/fs/test.zig | 14+++++++-------
Msrc/link/MachO/CodeSignature.zig | 17+++++++++--------
Msrc/link/MachO/hasher.zig | 24+++++++++---------------
Msrc/link/MachO/uuid.zig | 16+++++++++-------
8 files changed, 100 insertions(+), 81 deletions(-)

diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig @@ -800,7 +800,7 @@ pub const Manifest = struct { } var actual_digest: BinDigest = undefined; - hashFile(this_file, &actual_digest) catch |err| { + hashFile(io, this_file, &actual_digest) catch |err| { self.diagnostic = .{ .file_read = .{ .file_index = idx, .err = err, @@ -908,9 +908,11 @@ pub const Manifest = struct { } } - fn populateFileHashHandle(self: *Manifest, ch_file: *File, handle: Io.File) !void { + fn populateFileHashHandle(self: *Manifest, ch_file: *File, io_file: Io.File) !void { const io = self.cache.io; - const actual_stat = try handle.stat(io); + const gpa = self.cache.gpa; + + const actual_stat = try io_file.stat(io); ch_file.stat = .{ .size = actual_stat.size, .mtime = actual_stat.mtime, @@ -924,19 +926,17 @@ pub const Manifest = struct { } if (ch_file.max_file_size) |max_file_size| { - if (ch_file.stat.size > max_file_size) { - return error.FileTooBig; - } + if (ch_file.stat.size > max_file_size) return error.FileTooBig; - const contents = try self.cache.gpa.alloc(u8, @as(usize, @intCast(ch_file.stat.size))); - errdefer self.cache.gpa.free(contents); + // Hash while reading from disk, to keep the contents in the cpu + // cache while doing hashing. + const contents = try gpa.alloc(u8, @intCast(ch_file.stat.size)); + errdefer gpa.free(contents); - // Hash while reading from disk, to keep the contents in the cpu cache while - // doing hashing. var hasher = hasher_init; var off: usize = 0; while (true) { - const bytes_read = try handle.pread(contents[off..], off); + const bytes_read = try io_file.readPositional(io, &.{contents[off..]}, off); if (bytes_read == 0) break; hasher.update(contents[off..][0..bytes_read]); off += bytes_read; @@ -945,7 +945,7 @@ pub const Manifest = struct { ch_file.contents = contents; } else { - try hashFile(handle, &ch_file.bin_digest); + try hashFile(io, io_file, &ch_file.bin_digest); } self.hash.hasher.update(&ch_file.bin_digest); @@ -1169,13 +1169,11 @@ pub const Manifest = struct { fn downgradeToSharedLock(self: *Manifest) !void { if (!self.have_exclusive_lock) return; + const io = self.cache.io; - // WASI does not currently support flock, so we bypass it here. - // TODO: If/when flock is supported on WASI, this check should be removed. - // See https://github.com/WebAssembly/wasi-filesystem/issues/2 - if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) { + if (std.process.can_spawn or !builtin.single_threaded) { const manifest_file = self.manifest_file.?; - try manifest_file.downgradeLock(); + try manifest_file.downgradeLock(io); } self.have_exclusive_lock = false; @@ -1184,16 +1182,14 @@ pub const Manifest = struct { fn upgradeToExclusiveLock(self: *Manifest) error{CacheCheckFailed}!bool { if (self.have_exclusive_lock) return false; assert(self.manifest_file != null); + const io = self.cache.io; - // WASI does not currently support flock, so we bypass it here. - // TODO: If/when flock is supported on WASI, this check should be removed. - // See https://github.com/WebAssembly/wasi-filesystem/issues/2 - if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) { + if (std.process.can_spawn or !builtin.single_threaded) { const manifest_file = self.manifest_file.?; // Here we intentionally have a period where the lock is released, in case there are // other processes holding a shared lock. - manifest_file.unlock(); - manifest_file.lock(.exclusive) catch |err| { + manifest_file.unlock(io); + manifest_file.lock(io, .exclusive) catch |err| { self.diagnostic = .{ .manifest_lock = err }; return error.CacheCheckFailed; }; @@ -1206,12 +1202,8 @@ pub const Manifest = struct { /// The `Manifest` remains safe to deinit. /// Don't forget to call `writeManifest` before this! pub fn toOwnedLock(self: *Manifest) Lock { - const lock: Lock = .{ - .manifest_file = self.manifest_file.?, - }; - - self.manifest_file = null; - return lock; + defer self.manifest_file = null; + return .{ .manifest_file = self.manifest_file.? }; } /// Releases the manifest file and frees any memory the Manifest was using. @@ -1223,7 +1215,7 @@ pub const Manifest = struct { if (self.manifest_file) |file| { if (builtin.os.tag == .windows) { // See Lock.release for why this is required on Windows - file.unlock(); + file.unlock(io); } file.close(io); @@ -1308,15 +1300,15 @@ pub fn writeSmallFile(dir: Io.Dir, sub_path: []const u8, data: []const u8) !void } } -fn hashFile(file: Io.File, bin_digest: *[Hasher.mac_length]u8) Io.File.PReadError!void { - var buf: [1024]u8 = undefined; +fn hashFile(io: Io, file: Io.File, bin_digest: *[Hasher.mac_length]u8) Io.File.ReadPositionalError!void { + var buffer: [2048]u8 = undefined; var hasher = hasher_init; - var off: u64 = 0; + var offset: u64 = 0; while (true) { - const bytes_read = try file.pread(&buf, off); - if (bytes_read == 0) break; - hasher.update(buf[0..bytes_read]); - off += bytes_read; + const n = try file.readPositional(io, &.{&buffer}, offset); + if (n == 0) break; + hasher.update(buffer[0..n]); + offset += n; } hasher.final(bin_digest); } diff --git a/lib/std/Build/WebServer.zig b/lib/std/Build/WebServer.zig @@ -218,9 +218,9 @@ pub fn finishBuild(ws: *WebServer, opts: struct { else => {}, } if (@bitSizeOf(usize) != 64) { - // Current implementation depends on posix.mmap()'s second parameter, `length: usize`, - // being compatible with `std.fs.getEndPos() u64`'s return value. This is not the case - // on 32-bit platforms. + // Current implementation depends on posix.mmap()'s second + // parameter, `length: usize`, being compatible with file system's + // u64 return value. This is not the case on 32-bit platforms. // Affects or affected by issues #5185, #22523, and #22464. std.process.fatal("--fuzz not yet implemented on {d}-bit platforms", .{@bitSizeOf(usize)}); } diff --git a/lib/std/Io.zig b/lib/std/Io.zig @@ -692,9 +692,9 @@ pub const VTable = struct { fileWriteFileStreaming: *const fn (?*anyopaque, File, header: []const u8, *Io.File.Reader, Io.Limit) File.Writer.WriteFileError!usize, fileWriteFilePositional: *const fn (?*anyopaque, File, header: []const u8, *Io.File.Reader, Io.Limit, offset: u64) File.WriteFilePositionalError!usize, /// Returns 0 on end of stream. - fileReadStreaming: *const fn (?*anyopaque, File, data: [][]u8) File.Reader.Error!usize, + fileReadStreaming: *const fn (?*anyopaque, File, data: []const []u8) File.Reader.Error!usize, /// Returns 0 on end of stream. - fileReadPositional: *const fn (?*anyopaque, File, data: [][]u8, offset: u64) File.ReadPositionalError!usize, + fileReadPositional: *const fn (?*anyopaque, File, data: []const []u8, offset: u64) File.ReadPositionalError!usize, fileSeekBy: *const fn (?*anyopaque, File, relative_offset: i64) File.SeekError!void, fileSeekTo: *const fn (?*anyopaque, File, absolute_offset: u64) File.SeekError!void, fileSync: *const fn (?*anyopaque, File) File.SyncError!void, diff --git a/lib/std/Io/File.zig b/lib/std/Io/File.zig @@ -466,13 +466,21 @@ pub fn setTimestampsNow(file: File, io: Io) SetTimestampsError!void { pub const ReadPositionalError = Reader.Error || error{Unseekable}; -pub fn readPositional(file: File, io: Io, buffer: [][]u8, offset: u64) ReadPositionalError!usize { +/// Returns 0 on end of stream. +/// +/// See also: +/// * `reader` +pub fn readPositional(file: File, io: Io, buffer: []const []u8, offset: u64) ReadPositionalError!usize { return io.vtable.fileReadPositional(io.userdata, file, buffer, offset); } pub const WritePositionalError = Writer.Error || error{Unseekable}; -pub fn writePositional(file: File, io: Io, buffer: [][]const u8, offset: u64) WritePositionalError!usize { +/// Returns 0 on end of stream. +/// +/// See also: +/// * `writer` +pub fn writePositional(file: File, io: Io, buffer: []const []const u8, offset: u64) WritePositionalError!usize { return io.vtable.fileWritePositional(io.userdata, file, buffer, offset); } @@ -501,13 +509,35 @@ pub const WriteFilePositionalError = Writer.WriteFileError || error{Unseekable}; /// /// Positional is more threadsafe, since the global seek position is not /// affected. +/// +/// See also: +/// * `readerStreaming` pub fn reader(file: File, io: Io, buffer: []u8) Reader { return .init(file, io, buffer); } +/// Equivalent to creating a positional reader and reading multiple times to fill `buffer`. +/// +/// Returns number of bytes read into `buffer`. If less than `buffer.len`, end of file occurred. +/// +/// See also: +/// * `reader` +pub fn readPositionalAll(file: File, io: Io, buffer: []u8, offset: u64) ReadPositionalError!usize { + var index: usize = 0; + while (index != buffer.len) { + const amt = try file.readPositional(io, &.{buffer[index..]}, offset + index); + if (amt == 0) break; + index += amt; + } + return index; +} + /// Positional is more threadsafe, since the global seek position is not /// affected, but when such syscalls are not available, preemptively /// initializing in streaming mode skips a failed syscall. +/// +/// See also: +/// * `reader` pub fn readerStreaming(file: File, io: Io, buffer: []u8) Reader { return .initStreaming(file, io, buffer); } diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig @@ -1455,7 +1455,7 @@ test "writev, readv" { try writer.interface.writeVecAll(&write_vecs); try writer.interface.flush(); - try testing.expectEqual(@as(u64, line1.len + line2.len), try src_file.getEndPos()); + try testing.expectEqual(@as(u64, line1.len + line2.len), try src_file.length(io)); var reader = writer.moveToReader(io); try reader.seekTo(0); @@ -1486,7 +1486,7 @@ test "pwritev, preadv" { try writer.seekTo(16); try writer.interface.writeVecAll(&lines); try writer.interface.flush(); - try testing.expectEqual(@as(u64, 16 + line1.len + line2.len), try src_file.getEndPos()); + try testing.expectEqual(@as(u64, 16 + line1.len + line2.len), try src_file.length(io)); var reader = writer.moveToReader(io); try reader.seekTo(16); @@ -1511,13 +1511,13 @@ test "setEndPos" { const f = try tmp.dir.openFile(io, file_name, .{ .mode = .read_write }); defer f.close(io); - const initial_size = try f.getEndPos(); + const initial_size = try f.length(io); var buffer: [32]u8 = undefined; var reader = f.reader(io, &.{}); { try f.setEndPos(initial_size); - try testing.expectEqual(initial_size, try f.getEndPos()); + try testing.expectEqual(initial_size, try f.length(io)); try reader.seekTo(0); try testing.expectEqual(initial_size, try reader.interface.readSliceShort(&buffer)); try testing.expectEqualStrings("ninebytes", buffer[0..@intCast(initial_size)]); @@ -1526,7 +1526,7 @@ test "setEndPos" { { const larger = initial_size + 4; try f.setEndPos(larger); - try testing.expectEqual(larger, try f.getEndPos()); + try testing.expectEqual(larger, try f.length(io)); try reader.seekTo(0); try testing.expectEqual(larger, try reader.interface.readSliceShort(&buffer)); try testing.expectEqualStrings("ninebytes\x00\x00\x00\x00", buffer[0..@intCast(larger)]); @@ -1535,14 +1535,14 @@ test "setEndPos" { { const smaller = initial_size - 5; try f.setEndPos(smaller); - try testing.expectEqual(smaller, try f.getEndPos()); + try testing.expectEqual(smaller, try f.length(io)); try reader.seekTo(0); try testing.expectEqual(smaller, try reader.interface.readSliceShort(&buffer)); try testing.expectEqualStrings("nine", buffer[0..@intCast(smaller)]); } try f.setEndPos(0); - try testing.expectEqual(0, try f.getEndPos()); + try testing.expectEqual(0, try f.length(io)); try reader.seekTo(0); try testing.expectEqual(0, try reader.interface.readSliceShort(&buffer)); } diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig @@ -12,7 +12,7 @@ const Sha256 = std.crypto.hash.sha2.Sha256; const Allocator = std.mem.Allocator; const trace = @import("../../tracy.zig").trace; -const Hasher = @import("hasher.zig").ParallelHasher; +const ParallelHasher = @import("hasher.zig").ParallelHasher; const MachO = @import("../MachO.zig"); const hash_size = Sha256.digest_length; @@ -268,7 +268,9 @@ pub fn writeAdhocSignature( const tracy = trace(@src()); defer tracy.end(); - const allocator = macho_file.base.comp.gpa; + const comp = macho_file.base.comp; + const gpa = comp.gpa; + const io = comp.io; var header: macho.SuperBlob = .{ .magic = macho.CSMAGIC_EMBEDDED_SIGNATURE, @@ -276,7 +278,7 @@ pub fn writeAdhocSignature( .count = 0, }; - var blobs = std.array_list.Managed(Blob).init(allocator); + var blobs = std.array_list.Managed(Blob).init(gpa); defer blobs.deinit(); self.code_directory.inner.execSegBase = opts.exec_seg_base; @@ -286,13 +288,12 @@ pub fn writeAdhocSignature( const total_pages = @as(u32, @intCast(mem.alignForward(usize, opts.file_size, self.page_size) / self.page_size)); - try self.code_directory.code_slots.ensureTotalCapacityPrecise(allocator, total_pages); + try self.code_directory.code_slots.ensureTotalCapacityPrecise(gpa, total_pages); self.code_directory.code_slots.items.len = total_pages; self.code_directory.inner.nCodeSlots = total_pages; // Calculate hash for each page (in file) and write it to the buffer - var hasher = Hasher(Sha256){ .allocator = allocator, .io = macho_file.base.comp.io }; - try hasher.hash(opts.file, self.code_directory.code_slots.items, .{ + try ParallelHasher(Sha256).hash(gpa, io, opts.file, self.code_directory.code_slots.items, .{ .chunk_size = self.page_size, .max_file_size = opts.file_size, }); @@ -304,7 +305,7 @@ pub fn writeAdhocSignature( var hash: [hash_size]u8 = undefined; if (self.requirements) |*req| { - var a: std.Io.Writer.Allocating = .init(allocator); + var a: std.Io.Writer.Allocating = .init(gpa); defer a.deinit(); try req.write(&a.writer); Sha256.hash(a.written(), &hash, .{}); @@ -316,7 +317,7 @@ pub fn writeAdhocSignature( } if (self.entitlements) |*ents| { - var a: std.Io.Writer.Allocating = .init(allocator); + var a: std.Io.Writer.Allocating = .init(gpa); defer a.deinit(); try ents.write(&a.writer); Sha256.hash(a.written(), &hash, .{}); diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig @@ -1,5 +1,6 @@ const std = @import("std"); const Io = std.Io; +const assert = std.debug.assert; const Allocator = std.mem.Allocator; const trace = @import("../../tracy.zig").trace; @@ -8,20 +9,15 @@ pub fn ParallelHasher(comptime Hasher: type) type { const hash_size = Hasher.digest_length; return struct { - allocator: Allocator, - io: std.Io, - - pub fn hash(self: Self, file: Io.File, out: [][hash_size]u8, opts: struct { + pub fn hash(self: Self, io: Io, file: Io.File, out: [][hash_size]u8, opts: struct { chunk_size: u64 = 0x4000, max_file_size: ?u64 = null, }) !void { const tracy = trace(@src()); defer tracy.end(); - const io = self.io; - const file_size = blk: { - const file_size = opts.max_file_size orelse try file.getEndPos(); + const file_size = opts.max_file_size orelse try file.length(io); break :blk std.math.cast(usize, file_size) orelse return error.Overflow; }; const chunk_size = std.math.cast(usize, opts.chunk_size) orelse return error.Overflow; @@ -29,12 +25,12 @@ pub fn ParallelHasher(comptime Hasher: type) type { const buffer = try self.allocator.alloc(u8, chunk_size * out.len); defer self.allocator.free(buffer); - const results = try self.allocator.alloc(Io.File.PReadError!usize, out.len); + const results = try self.allocator.alloc(Io.File.ReadPositionalError!usize, out.len); defer self.allocator.free(results); { - var group: std.Io.Group = .init; - errdefer group.cancel(io); + var group: Io.Group = .init; + defer group.cancel(io); for (out, results, 0..) |*out_buf, *result, i| { const fstart = i * chunk_size; @@ -42,7 +38,7 @@ pub fn ParallelHasher(comptime Hasher: type) type { file_size - fstart else chunk_size; - group.async(io, worker, .{ + group.async(worker, .{ file, fstart, buffer[fstart..][0..fsize], @@ -61,11 +57,9 @@ pub fn ParallelHasher(comptime Hasher: type) type { fstart: usize, buffer: []u8, out: *[hash_size]u8, - err: *Io.File.PReadError!usize, + err: *Io.File.ReadPositionalError!usize, ) void { - const tracy = trace(@src()); - defer tracy.end(); - err.* = file.preadAll(buffer, fstart); + err.* = file.readPositionalAll(buffer, fstart); Hasher.hash(buffer, out, .{}); } diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig @@ -4,7 +4,7 @@ const Md5 = std.crypto.hash.Md5; const trace = @import("../../tracy.zig").trace; const Compilation = @import("../../Compilation.zig"); -const Hasher = @import("hasher.zig").ParallelHasher; +const ParallelHasher = @import("hasher.zig").ParallelHasher; /// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce /// the final digest. @@ -16,21 +16,23 @@ pub fn calcUuid(comp: *const Compilation, file: Io.File, file_size: u64, out: *[ const tracy = trace(@src()); defer tracy.end(); + const gpa = comp.gpa; + const io = comp.io; + const chunk_size: usize = 1024 * 1024; const num_chunks: usize = std.math.cast(usize, @divTrunc(file_size, chunk_size)) orelse return error.Overflow; const actual_num_chunks = if (@rem(file_size, chunk_size) > 0) num_chunks + 1 else num_chunks; - const hashes = try comp.gpa.alloc([Md5.digest_length]u8, actual_num_chunks); - defer comp.gpa.free(hashes); + const hashes = try gpa.alloc([Md5.digest_length]u8, actual_num_chunks); + defer gpa.free(hashes); - var hasher = Hasher(Md5){ .allocator = comp.gpa, .io = comp.io }; - try hasher.hash(file, hashes, .{ + try ParallelHasher(Md5).hash(gpa, io, file, hashes, .{ .chunk_size = chunk_size, .max_file_size = file_size, }); - const final_buffer = try comp.gpa.alloc(u8, actual_num_chunks * Md5.digest_length); - defer comp.gpa.free(final_buffer); + const final_buffer = try gpa.alloc(u8, actual_num_chunks * Md5.digest_length); + defer gpa.free(final_buffer); for (hashes, 0..) |hash, i| { @memcpy(final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);