diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig index d846917fd3..cd57d3ad40 100644 --- a/lib/std/Build/Cache.zig +++ b/lib/std/Build/Cache.zig @@ -55,7 +55,15 @@ pub fn prefixes(cache: *const Cache) []const Directory { const PrefixedPath = struct { prefix: u8, - sub_path: []u8, + sub_path: []const u8, + + fn eql(a: PrefixedPath, b: PrefixedPath) bool { + return a.prefix == b.prefix and std.mem.eql(u8, a.sub_path, b.sub_path); + } + + fn hash(pp: PrefixedPath) u32 { + return @truncate(std.hash.Wyhash.hash(pp.prefix, pp.sub_path)); + } }; fn findPrefix(cache: *const Cache, file_path: []const u8) !PrefixedPath { @@ -132,7 +140,7 @@ pub const hasher_init: Hasher = Hasher.init(&[_]u8{ }); pub const File = struct { - prefixed_path: ?PrefixedPath, + prefixed_path: PrefixedPath, max_file_size: ?usize, stat: Stat, bin_digest: BinDigest, @@ -145,16 +153,18 @@ pub const File = struct { }; pub fn deinit(self: *File, gpa: Allocator) void { - if (self.prefixed_path) |pp| { - gpa.free(pp.sub_path); - self.prefixed_path = null; - } + gpa.free(self.prefixed_path.sub_path); if (self.contents) |contents| { gpa.free(contents); self.contents = null; } self.* = undefined; } + + pub fn updateMaxSize(file: *File, new_max_size: ?usize) void { + const new = new_max_size orelse return; + file.max_file_size = if (file.max_file_size) |old| @max(old, new) else new; + } }; pub const HashHelper = struct { @@ -296,7 +306,7 @@ pub const Manifest = struct { // order to obtain a problematic timestamp for the next call. Calls after that // will then use the same timestamp, to avoid unnecessary filesystem writes. want_refresh_timestamp: bool = true, - files: std.ArrayListUnmanaged(File) = .{}, + files: Files = .{}, hex_digest: HexDigest, /// Populated when hit() returns an error because of one /// of the files listed in the manifest. @@ -305,6 +315,34 @@ pub const Manifest = struct { /// what time the file system thinks it is, according to its own granularity. recent_problematic_timestamp: i128 = 0, + pub const Files = std.ArrayHashMapUnmanaged(File, void, FilesContext, false); + + pub const FilesContext = struct { + pub fn hash(fc: FilesContext, file: File) u32 { + _ = fc; + return file.prefixed_path.hash(); + } + + pub fn eql(fc: FilesContext, a: File, b: File, b_index: usize) bool { + _ = fc; + _ = b_index; + return a.prefixed_path.eql(b.prefixed_path); + } + }; + + const FilesAdapter = struct { + pub fn eql(context: @This(), a: PrefixedPath, b: File, b_index: usize) bool { + _ = context; + _ = b_index; + return a.eql(b.prefixed_path); + } + + pub fn hash(context: @This(), key: PrefixedPath) u32 { + _ = context; + return key.hash(); + } + }; + /// Add a file as a dependency of process being cached. When `hit` is /// called, the file's contents will be checked to ensure that it matches /// the contents from previous times. @@ -317,7 +355,7 @@ pub const Manifest = struct { /// to access the contents of the file after calling `hit()` like so: /// /// ``` - /// var file_contents = cache_hash.files.items[file_index].contents.?; + /// var file_contents = cache_hash.files.keys()[file_index].contents.?; /// ``` pub fn addFile(self: *Manifest, file_path: []const u8, max_file_size: ?usize) !usize { assert(self.manifest_file == null); @@ -327,7 +365,12 @@ pub const Manifest = struct { const prefixed_path = try self.cache.findPrefix(file_path); errdefer gpa.free(prefixed_path.sub_path); - self.files.addOneAssumeCapacity().* = .{ + const gop = self.files.getOrPutAssumeCapacityAdapted(prefixed_path, FilesAdapter{}); + if (gop.found_existing) { + gop.key_ptr.updateMaxSize(max_file_size); + return gop.index; + } + gop.key_ptr.* = .{ .prefixed_path = prefixed_path, .contents = null, .max_file_size = max_file_size, @@ -338,7 +381,7 @@ pub const Manifest = struct { self.hash.add(prefixed_path.prefix); self.hash.addBytes(prefixed_path.sub_path); - return self.files.items.len - 1; + return gop.index; } pub fn addOptionalFile(self: *Manifest, optional_file_path: ?[]const u8) !void { @@ -418,7 +461,7 @@ pub const Manifest = struct { self.want_refresh_timestamp = true; - const input_file_count = self.files.items.len; + const input_file_count = self.files.entries.len; while (true) : (self.unhit(bin_digest, input_file_count)) { const file_contents = try self.manifest_file.?.reader().readAllAlloc(gpa, manifest_file_size_max); defer gpa.free(file_contents); @@ -430,7 +473,7 @@ pub const Manifest = struct { if (try self.upgradeToExclusiveLock()) continue; self.manifest_dirty = true; while (idx < input_file_count) : (idx += 1) { - const ch_file = &self.files.items[idx]; + const ch_file = &self.files.keys()[idx]; self.populateFileHash(ch_file) catch |err| { self.failed_file_index = idx; return err; @@ -441,18 +484,6 @@ pub const Manifest = struct { while (line_iter.next()) |line| { defer idx += 1; - const cache_hash_file = if (idx < input_file_count) &self.files.items[idx] else blk: { - const new = try self.files.addOne(gpa); - new.* = .{ - .prefixed_path = null, - .contents = null, - .max_file_size = null, - .stat = undefined, - .bin_digest = undefined, - }; - break :blk new; - }; - var iter = mem.tokenizeScalar(u8, line, ' '); const size = iter.next() orelse return error.InvalidFormat; const inode = iter.next() orelse return error.InvalidFormat; @@ -461,30 +492,61 @@ pub const Manifest = struct { const prefix_str = iter.next() orelse return error.InvalidFormat; const file_path = iter.rest(); - cache_hash_file.stat.size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat; - cache_hash_file.stat.inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat; - cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; - _ = fmt.hexToBytes(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat; + const stat_size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat; + const stat_inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat; + const stat_mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; + const file_bin_digest = b: { + if (digest_str.len != hex_digest_len) return error.InvalidFormat; + var bd: BinDigest = undefined; + _ = fmt.hexToBytes(&bd, digest_str) catch return error.InvalidFormat; + break :b bd; + }; + const prefix = fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidFormat; if (prefix >= self.cache.prefixes_len) return error.InvalidFormat; - if (file_path.len == 0) { - return error.InvalidFormat; - } - if (cache_hash_file.prefixed_path) |pp| { - if (pp.prefix != prefix or !mem.eql(u8, file_path, pp.sub_path)) { - return error.InvalidFormat; - } - } + if (file_path.len == 0) return error.InvalidFormat; - if (cache_hash_file.prefixed_path == null) { - cache_hash_file.prefixed_path = .{ + const cache_hash_file = f: { + const prefixed_path: PrefixedPath = .{ .prefix = prefix, - .sub_path = try gpa.dupe(u8, file_path), + .sub_path = file_path, // expires with file_contents }; - } + if (idx < input_file_count) { + const file = &self.files.keys()[idx]; + if (!file.prefixed_path.eql(prefixed_path)) + return error.InvalidFormat; - const pp = cache_hash_file.prefixed_path.?; + file.stat = .{ + .size = stat_size, + .inode = stat_inode, + .mtime = stat_mtime, + }; + file.bin_digest = file_bin_digest; + break :f file; + } + const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{}); + errdefer assert(self.files.popOrNull() != null); + if (!gop.found_existing) { + gop.key_ptr.* = .{ + .prefixed_path = .{ + .prefix = prefix, + .sub_path = try gpa.dupe(u8, file_path), + }, + .contents = null, + .max_file_size = null, + .stat = .{ + .size = stat_size, + .inode = stat_inode, + .mtime = stat_mtime, + }, + .bin_digest = file_bin_digest, + }; + } + break :f gop.key_ptr; + }; + + const pp = cache_hash_file.prefixed_path; const dir = self.cache.prefixes()[pp.prefix].handle; const this_file = dir.openFile(pp.sub_path, .{ .mode = .read_only }) catch |err| switch (err) { error.FileNotFound => { @@ -548,7 +610,7 @@ pub const Manifest = struct { if (try self.upgradeToExclusiveLock()) continue; self.manifest_dirty = true; while (idx < input_file_count) : (idx += 1) { - const ch_file = &self.files.items[idx]; + const ch_file = &self.files.keys()[idx]; self.populateFileHash(ch_file) catch |err| { self.failed_file_index = idx; return err; @@ -571,12 +633,12 @@ pub const Manifest = struct { self.hash.hasher.update(&bin_digest); // Remove files not in the initial hash. - for (self.files.items[input_file_count..]) |*file| { + for (self.files.keys()[input_file_count..]) |*file| { file.deinit(self.cache.gpa); } self.files.shrinkRetainingCapacity(input_file_count); - for (self.files.items) |file| { + for (self.files.keys()) |file| { self.hash.hasher.update(&file.bin_digest); } } @@ -616,7 +678,7 @@ pub const Manifest = struct { } fn populateFileHash(self: *Manifest, ch_file: *File) !void { - const pp = ch_file.prefixed_path.?; + const pp = ch_file.prefixed_path; const dir = self.cache.prefixes()[pp.prefix].handle; const file = try dir.openFile(pp.sub_path, .{}); defer file.close(); @@ -682,7 +744,7 @@ pub const Manifest = struct { .bin_digest = undefined, .contents = null, }; - errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); + errdefer self.files.shrinkRetainingCapacity(self.files.entries.len - 1); try self.populateFileHash(new_ch_file); @@ -690,9 +752,11 @@ pub const Manifest = struct { } /// Add a file as a dependency of process being cached, after the initial hash has been - /// calculated. This is useful for processes that don't know the all the files that - /// are depended on ahead of time. For example, a source file that can import other files - /// will need to be recompiled if the imported file is changed. + /// calculated. + /// + /// This is useful for processes that don't know the all the files that are + /// depended on ahead of time. For example, a source file that can import + /// other files will need to be recompiled if the imported file is changed. pub fn addFilePost(self: *Manifest, file_path: []const u8) !void { assert(self.manifest_file != null); @@ -700,17 +764,26 @@ pub const Manifest = struct { const prefixed_path = try self.cache.findPrefix(file_path); errdefer gpa.free(prefixed_path.sub_path); - const new_ch_file = try self.files.addOne(gpa); - new_ch_file.* = .{ + const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{}); + errdefer assert(self.files.popOrNull() != null); + + if (gop.found_existing) { + gpa.free(prefixed_path.sub_path); + return; + } + + gop.key_ptr.* = .{ .prefixed_path = prefixed_path, .max_file_size = null, .stat = undefined, .bin_digest = undefined, .contents = null, }; - errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); - try self.populateFileHash(new_ch_file); + self.files.lockPointers(); + defer self.files.unlockPointers(); + + try self.populateFileHash(gop.key_ptr); } /// Like `addFilePost` but when the file contents have already been loaded from disk. @@ -724,13 +797,20 @@ pub const Manifest = struct { assert(self.manifest_file != null); const gpa = self.cache.gpa; - const ch_file = try self.files.addOne(gpa); - errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); - const prefixed_path = try self.cache.findPrefixResolved(resolved_path); errdefer gpa.free(prefixed_path.sub_path); - ch_file.* = .{ + const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{}); + errdefer assert(self.files.popOrNull() != null); + + if (gop.found_existing) { + gpa.free(prefixed_path.sub_path); + return; + } + + const new_file = gop.key_ptr; + + new_file.* = .{ .prefixed_path = prefixed_path, .max_file_size = null, .stat = stat, @@ -738,19 +818,19 @@ pub const Manifest = struct { .contents = null, }; - if (self.isProblematicTimestamp(ch_file.stat.mtime)) { + if (self.isProblematicTimestamp(new_file.stat.mtime)) { // The actual file has an unreliable timestamp, force it to be hashed - ch_file.stat.mtime = 0; - ch_file.stat.inode = 0; + new_file.stat.mtime = 0; + new_file.stat.inode = 0; } { var hasher = hasher_init; hasher.update(bytes); - hasher.final(&ch_file.bin_digest); + hasher.final(&new_file.bin_digest); } - self.hash.hasher.update(&ch_file.bin_digest); + self.hash.hasher.update(&new_file.bin_digest); } pub fn addDepFilePost(self: *Manifest, dir: fs.Dir, dep_file_basename: []const u8) !void { @@ -816,14 +896,14 @@ pub const Manifest = struct { const writer = contents.writer(); try writer.writeAll(manifest_header ++ "\n"); - for (self.files.items) |file| { + for (self.files.keys()) |file| { try writer.print("{d} {d} {d} {} {d} {s}\n", .{ file.stat.size, file.stat.inode, file.stat.mtime, fmt.fmtSliceHexLower(&file.bin_digest), - file.prefixed_path.?.prefix, - file.prefixed_path.?.sub_path, + file.prefixed_path.prefix, + file.prefixed_path.sub_path, }); } @@ -892,7 +972,7 @@ pub const Manifest = struct { file.close(); } - for (self.files.items) |*file| { + for (self.files.keys()) |*file| { file.deinit(self.cache.gpa); } self.files.deinit(self.cache.gpa); @@ -1061,7 +1141,7 @@ test "check that changing a file makes cache fail" { // There should be nothing in the cache try testing.expectEqual(false, try ch.hit()); - try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.items[temp_file_idx].contents.?)); + try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.keys()[temp_file_idx].contents.?)); digest1 = ch.final(); @@ -1081,7 +1161,7 @@ test "check that changing a file makes cache fail" { try testing.expectEqual(false, try ch.hit()); // The cache system does not keep the contents of re-hashed input files. - try testing.expect(ch.files.items[temp_file_idx].contents == null); + try testing.expect(ch.files.keys()[temp_file_idx].contents == null); digest2 = ch.final(); diff --git a/lib/std/Build/Step.zig b/lib/std/Build/Step.zig index f67cba6c3f..48af9e54d6 100644 --- a/lib/std/Build/Step.zig +++ b/lib/std/Build/Step.zig @@ -544,7 +544,7 @@ pub fn cacheHit(s: *Step, man: *std.Build.Cache.Manifest) !bool { fn failWithCacheError(s: *Step, man: *const std.Build.Cache.Manifest, err: anyerror) anyerror { const i = man.failed_file_index orelse return err; - const pp = man.files.items[i].prefixed_path orelse return err; + const pp = man.files.keys()[i].prefixed_path; const prefix = man.cache.prefixes()[pp.prefix].path orelse ""; return s.fail("{s}: {s}/{s}", .{ @errorName(err), prefix, pp.sub_path }); } diff --git a/src/Compilation.zig b/src/Compilation.zig index 7a84848b80..2c047504e9 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1999,7 +1999,7 @@ pub fn update(comp: *Compilation, main_progress_node: *std.Progress.Node) !void const is_hit = man.hit() catch |err| { const i = man.failed_file_index orelse return err; - const pp = man.files.items[i].prefixed_path orelse return err; + const pp = man.files.keys()[i].prefixed_path; const prefix = man.cache.prefixes()[pp.prefix]; return comp.setMiscFailure( .check_whole_cache, @@ -4147,7 +4147,7 @@ pub fn cImport(comp: *Compilation, c_src: []const u8, owner_mod: *Package.Module const prev_hash_state = man.hash.peekBin(); const actual_hit = hit: { _ = try man.hit(); - if (man.files.items.len == 0) { + if (man.files.entries.len == 0) { man.unhit(prev_hash_state, 0); break :hit false; } diff --git a/src/glibc.zig b/src/glibc.zig index 9765e0ad78..365f5cab2b 100644 --- a/src/glibc.zig +++ b/src/glibc.zig @@ -713,7 +713,7 @@ pub fn buildSharedObjects(comp: *Compilation, prog_node: *std.Progress.Node) !vo }; defer o_directory.handle.close(); - const abilists_contents = man.files.items[abilists_index].contents.?; + const abilists_contents = man.files.keys()[abilists_index].contents.?; const metadata = try loadMetaData(comp.gpa, abilists_contents); defer metadata.destroy(comp.gpa);