Cache: introduce prefixes to manifests
Before, cache manifest files would have absolute file paths. This is problematic for two reasons: * Absolute file paths are not portable. Some operating systems such as WASI have trouble with them. The files themselves are less portable; they cannot be migrated from one user's home directory to another's. And finally they can break due to file paths exceeding maximum path component size. * They would prevent some advanced use cases of Zig, where the lib dir has a different path in a different invocation but is ultimately the same Zig version and lib directory as before. This commit adds a new column that specifies the prefix directory for each file. 0 is an escape hatch and has the previous behavior. The other two prefixes introduced are zig lib directory, and the cache directory. This means files in zig-cache manifests can reference files local to these directories. In practice, this means it is possible to use a different file path for the zig lib directory in a subsequent run of zig and have it still take advantage of the global cache, provided that the files inside remain unchanged. closes #13050
This commit is contained in:
175
src/Cache.zig
175
src/Cache.zig
@@ -1,3 +1,7 @@
|
||||
//! Manages `zig-cache` directories.
|
||||
//! This is not a general-purpose cache. It is designed to be fast and simple,
|
||||
//! not to withstand attacks using specially-crafted input.
|
||||
|
||||
gpa: Allocator,
|
||||
manifest_dir: fs.Dir,
|
||||
hash: HashHelper = .{},
|
||||
@@ -5,6 +9,14 @@ hash: HashHelper = .{},
|
||||
recent_problematic_timestamp: i128 = 0,
|
||||
mutex: std.Thread.Mutex = .{},
|
||||
|
||||
/// A set of strings such as the zig library directory or project source root, which
|
||||
/// are stripped from the file paths before putting into the cache. They
|
||||
/// are replaced with single-character indicators. This is not to save
|
||||
/// space but to eliminate absolute file paths. This improves portability
|
||||
/// and usefulness of the cache for advanced use cases.
|
||||
prefixes_buffer: [3]Compilation.Directory = undefined,
|
||||
prefixes_len: usize = 0,
|
||||
|
||||
const Cache = @This();
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
@@ -18,6 +30,11 @@ const Allocator = std.mem.Allocator;
|
||||
const Compilation = @import("Compilation.zig");
|
||||
const log = std.log.scoped(.cache);
|
||||
|
||||
pub fn addPrefix(cache: *Cache, directory: Compilation.Directory) void {
|
||||
cache.prefixes_buffer[cache.prefixes_len] = directory;
|
||||
cache.prefixes_len += 1;
|
||||
}
|
||||
|
||||
/// Be sure to call `Manifest.deinit` after successful initialization.
|
||||
pub fn obtain(cache: *Cache) Manifest {
|
||||
return Manifest{
|
||||
@@ -29,6 +46,48 @@ pub fn obtain(cache: *Cache) Manifest {
|
||||
};
|
||||
}
|
||||
|
||||
pub fn prefixes(cache: *const Cache) []const Compilation.Directory {
|
||||
return cache.prefixes_buffer[0..cache.prefixes_len];
|
||||
}
|
||||
|
||||
const PrefixedPath = struct {
|
||||
prefix: u8,
|
||||
sub_path: []u8,
|
||||
};
|
||||
|
||||
fn findPrefix(cache: *const Cache, file_path: []const u8) !PrefixedPath {
|
||||
const gpa = cache.gpa;
|
||||
const resolved_path = try fs.path.resolve(gpa, &[_][]const u8{file_path});
|
||||
errdefer gpa.free(resolved_path);
|
||||
return findPrefixResolved(cache, resolved_path);
|
||||
}
|
||||
|
||||
/// Takes ownership of `resolved_path` on success.
|
||||
fn findPrefixResolved(cache: *const Cache, resolved_path: []u8) !PrefixedPath {
|
||||
const gpa = cache.gpa;
|
||||
const prefixes_slice = cache.prefixes();
|
||||
var i: u8 = 1; // Start at 1 to skip over checking the null prefix.
|
||||
while (i < prefixes_slice.len) : (i += 1) {
|
||||
const p = prefixes_slice[i].path.?;
|
||||
if (mem.startsWith(u8, resolved_path, p)) {
|
||||
// +1 to skip over the path separator here
|
||||
const sub_path = try gpa.dupe(u8, resolved_path[p.len + 1 ..]);
|
||||
gpa.free(resolved_path);
|
||||
return PrefixedPath{
|
||||
.prefix = @intCast(u8, i),
|
||||
.sub_path = sub_path,
|
||||
};
|
||||
} else {
|
||||
log.debug("'{s}' does not start with '{s}'", .{ resolved_path, p });
|
||||
}
|
||||
}
|
||||
|
||||
return PrefixedPath{
|
||||
.prefix = 0,
|
||||
.sub_path = resolved_path,
|
||||
};
|
||||
}
|
||||
|
||||
/// This is 128 bits - Even with 2^54 cache entries, the probably of a collision would be under 10^-6
|
||||
pub const bin_digest_len = 16;
|
||||
pub const hex_digest_len = bin_digest_len * 2;
|
||||
@@ -45,7 +104,7 @@ pub const Hasher = crypto.auth.siphash.SipHash128(1, 3);
|
||||
pub const hasher_init: Hasher = Hasher.init(&[_]u8{0} ** Hasher.key_length);
|
||||
|
||||
pub const File = struct {
|
||||
path: ?[]const u8,
|
||||
prefixed_path: ?PrefixedPath,
|
||||
max_file_size: ?usize,
|
||||
stat: Stat,
|
||||
bin_digest: BinDigest,
|
||||
@@ -57,13 +116,13 @@ pub const File = struct {
|
||||
mtime: i128,
|
||||
};
|
||||
|
||||
pub fn deinit(self: *File, allocator: Allocator) void {
|
||||
if (self.path) |owned_slice| {
|
||||
allocator.free(owned_slice);
|
||||
self.path = null;
|
||||
pub fn deinit(self: *File, gpa: Allocator) void {
|
||||
if (self.prefixed_path) |pp| {
|
||||
gpa.free(pp.sub_path);
|
||||
self.prefixed_path = null;
|
||||
}
|
||||
if (self.contents) |contents| {
|
||||
allocator.free(contents);
|
||||
gpa.free(contents);
|
||||
self.contents = null;
|
||||
}
|
||||
self.* = undefined;
|
||||
@@ -175,9 +234,6 @@ pub const Lock = struct {
|
||||
}
|
||||
};
|
||||
|
||||
/// Manifest manages project-local `zig-cache` directories.
|
||||
/// This is not a general-purpose cache.
|
||||
/// It is designed to be fast and simple, not to withstand attacks using specially-crafted input.
|
||||
pub const Manifest = struct {
|
||||
cache: *Cache,
|
||||
/// Current state for incremental hashing.
|
||||
@@ -220,21 +276,27 @@ pub const Manifest = struct {
|
||||
pub fn addFile(self: *Manifest, file_path: []const u8, max_file_size: ?usize) !usize {
|
||||
assert(self.manifest_file == null);
|
||||
|
||||
try self.files.ensureUnusedCapacity(self.cache.gpa, 1);
|
||||
const resolved_path = try fs.path.resolve(self.cache.gpa, &[_][]const u8{file_path});
|
||||
const gpa = self.cache.gpa;
|
||||
try self.files.ensureUnusedCapacity(gpa, 1);
|
||||
const prefixed_path = try self.cache.findPrefix(file_path);
|
||||
errdefer gpa.free(prefixed_path.sub_path);
|
||||
|
||||
log.debug("Manifest.addFile {s} -> {d} {s}", .{
|
||||
file_path, prefixed_path.prefix, prefixed_path.sub_path,
|
||||
});
|
||||
|
||||
const idx = self.files.items.len;
|
||||
self.files.addOneAssumeCapacity().* = .{
|
||||
.path = resolved_path,
|
||||
.prefixed_path = prefixed_path,
|
||||
.contents = null,
|
||||
.max_file_size = max_file_size,
|
||||
.stat = undefined,
|
||||
.bin_digest = undefined,
|
||||
};
|
||||
|
||||
self.hash.addBytes(resolved_path);
|
||||
self.hash.add(prefixed_path.prefix);
|
||||
self.hash.addBytes(prefixed_path.sub_path);
|
||||
|
||||
return idx;
|
||||
return self.files.items.len - 1;
|
||||
}
|
||||
|
||||
pub fn hashCSource(self: *Manifest, c_source: Compilation.CSourceFile) !void {
|
||||
@@ -281,6 +343,7 @@ pub const Manifest = struct {
|
||||
/// option, one may call `toOwnedLock` to obtain a smaller object which can represent
|
||||
/// the lock. `deinit` is safe to call whether or not `toOwnedLock` has been called.
|
||||
pub fn hit(self: *Manifest) !bool {
|
||||
const gpa = self.cache.gpa;
|
||||
assert(self.manifest_file == null);
|
||||
|
||||
self.failed_file_index = null;
|
||||
@@ -362,8 +425,8 @@ pub const Manifest = struct {
|
||||
|
||||
self.want_refresh_timestamp = true;
|
||||
|
||||
const file_contents = try self.manifest_file.?.reader().readAllAlloc(self.cache.gpa, manifest_file_size_max);
|
||||
defer self.cache.gpa.free(file_contents);
|
||||
const file_contents = try self.manifest_file.?.reader().readAllAlloc(gpa, manifest_file_size_max);
|
||||
defer gpa.free(file_contents);
|
||||
|
||||
const input_file_count = self.files.items.len;
|
||||
var any_file_changed = false;
|
||||
@@ -373,9 +436,9 @@ pub const Manifest = struct {
|
||||
defer idx += 1;
|
||||
|
||||
const cache_hash_file = if (idx < input_file_count) &self.files.items[idx] else blk: {
|
||||
const new = try self.files.addOne(self.cache.gpa);
|
||||
const new = try self.files.addOne(gpa);
|
||||
new.* = .{
|
||||
.path = null,
|
||||
.prefixed_path = null,
|
||||
.contents = null,
|
||||
.max_file_size = null,
|
||||
.stat = undefined,
|
||||
@@ -389,27 +452,35 @@ pub const Manifest = struct {
|
||||
const inode = iter.next() orelse return error.InvalidFormat;
|
||||
const mtime_nsec_str = iter.next() orelse return error.InvalidFormat;
|
||||
const digest_str = iter.next() orelse return error.InvalidFormat;
|
||||
const prefix_str = iter.next() orelse return error.InvalidFormat;
|
||||
const file_path = iter.rest();
|
||||
|
||||
cache_hash_file.stat.size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat;
|
||||
cache_hash_file.stat.inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat;
|
||||
cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;
|
||||
_ = std.fmt.hexToBytes(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat;
|
||||
const prefix = fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidFormat;
|
||||
if (prefix >= self.cache.prefixes_len) return error.InvalidFormat;
|
||||
|
||||
if (file_path.len == 0) {
|
||||
return error.InvalidFormat;
|
||||
}
|
||||
if (cache_hash_file.path) |p| {
|
||||
if (!mem.eql(u8, file_path, p)) {
|
||||
if (cache_hash_file.prefixed_path) |pp| {
|
||||
if (pp.prefix != prefix or !mem.eql(u8, file_path, pp.sub_path)) {
|
||||
return error.InvalidFormat;
|
||||
}
|
||||
}
|
||||
|
||||
if (cache_hash_file.path == null) {
|
||||
cache_hash_file.path = try self.cache.gpa.dupe(u8, file_path);
|
||||
if (cache_hash_file.prefixed_path == null) {
|
||||
cache_hash_file.prefixed_path = .{
|
||||
.prefix = prefix,
|
||||
.sub_path = try gpa.dupe(u8, file_path),
|
||||
};
|
||||
}
|
||||
|
||||
const this_file = fs.cwd().openFile(cache_hash_file.path.?, .{ .mode = .read_only }) catch |err| switch (err) {
|
||||
const pp = cache_hash_file.prefixed_path.?;
|
||||
const dir = self.cache.prefixes()[pp.prefix].handle;
|
||||
const this_file = dir.openFile(pp.sub_path, .{ .mode = .read_only }) catch |err| switch (err) {
|
||||
error.FileNotFound => {
|
||||
try self.upgradeToExclusiveLock();
|
||||
return false;
|
||||
@@ -535,8 +606,9 @@ pub const Manifest = struct {
|
||||
}
|
||||
|
||||
fn populateFileHash(self: *Manifest, ch_file: *File) !void {
|
||||
log.debug("populateFileHash {s}", .{ch_file.path.?});
|
||||
const file = try fs.cwd().openFile(ch_file.path.?, .{});
|
||||
const pp = ch_file.prefixed_path.?;
|
||||
const dir = self.cache.prefixes()[pp.prefix].handle;
|
||||
const file = try dir.openFile(pp.sub_path, .{});
|
||||
defer file.close();
|
||||
|
||||
const actual_stat = try file.stat();
|
||||
@@ -588,12 +660,17 @@ pub const Manifest = struct {
|
||||
pub fn addFilePostFetch(self: *Manifest, file_path: []const u8, max_file_size: usize) ![]const u8 {
|
||||
assert(self.manifest_file != null);
|
||||
|
||||
const resolved_path = try fs.path.resolve(self.cache.gpa, &[_][]const u8{file_path});
|
||||
errdefer self.cache.gpa.free(resolved_path);
|
||||
const gpa = self.cache.gpa;
|
||||
const prefixed_path = try self.cache.findPrefix(file_path);
|
||||
errdefer gpa.free(prefixed_path.sub_path);
|
||||
|
||||
const new_ch_file = try self.files.addOne(self.cache.gpa);
|
||||
log.debug("Manifest.addFilePostFetch {s} -> {d} {s}", .{
|
||||
file_path, prefixed_path.prefix, prefixed_path.sub_path,
|
||||
});
|
||||
|
||||
const new_ch_file = try self.files.addOne(gpa);
|
||||
new_ch_file.* = .{
|
||||
.path = resolved_path,
|
||||
.prefixed_path = prefixed_path,
|
||||
.max_file_size = max_file_size,
|
||||
.stat = undefined,
|
||||
.bin_digest = undefined,
|
||||
@@ -613,12 +690,17 @@ pub const Manifest = struct {
|
||||
pub fn addFilePost(self: *Manifest, file_path: []const u8) !void {
|
||||
assert(self.manifest_file != null);
|
||||
|
||||
const resolved_path = try fs.path.resolve(self.cache.gpa, &[_][]const u8{file_path});
|
||||
errdefer self.cache.gpa.free(resolved_path);
|
||||
const gpa = self.cache.gpa;
|
||||
const prefixed_path = try self.cache.findPrefix(file_path);
|
||||
errdefer gpa.free(prefixed_path.sub_path);
|
||||
|
||||
const new_ch_file = try self.files.addOne(self.cache.gpa);
|
||||
log.debug("Manifest.addFilePost {s} -> {d} {s}", .{
|
||||
file_path, prefixed_path.prefix, prefixed_path.sub_path,
|
||||
});
|
||||
|
||||
const new_ch_file = try self.files.addOne(gpa);
|
||||
new_ch_file.* = .{
|
||||
.path = resolved_path,
|
||||
.prefixed_path = prefixed_path,
|
||||
.max_file_size = null,
|
||||
.stat = undefined,
|
||||
.bin_digest = undefined,
|
||||
@@ -633,17 +715,27 @@ pub const Manifest = struct {
|
||||
/// On success, cache takes ownership of `resolved_path`.
|
||||
pub fn addFilePostContents(
|
||||
self: *Manifest,
|
||||
resolved_path: []const u8,
|
||||
resolved_path: []u8,
|
||||
bytes: []const u8,
|
||||
stat: File.Stat,
|
||||
) error{OutOfMemory}!void {
|
||||
assert(self.manifest_file != null);
|
||||
const gpa = self.cache.gpa;
|
||||
|
||||
const ch_file = try self.files.addOne(self.cache.gpa);
|
||||
const ch_file = try self.files.addOne(gpa);
|
||||
errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1);
|
||||
|
||||
log.debug("Manifest.addFilePostContents resolved_path={s}", .{resolved_path});
|
||||
|
||||
const prefixed_path = try self.cache.findPrefixResolved(resolved_path);
|
||||
errdefer gpa.free(prefixed_path.sub_path);
|
||||
|
||||
log.debug("Manifest.addFilePostContents -> {d} {s}", .{
|
||||
prefixed_path.prefix, prefixed_path.sub_path,
|
||||
});
|
||||
|
||||
ch_file.* = .{
|
||||
.path = resolved_path,
|
||||
.prefixed_path = prefixed_path,
|
||||
.max_file_size = null,
|
||||
.stat = stat,
|
||||
.bin_digest = undefined,
|
||||
@@ -742,12 +834,13 @@ pub const Manifest = struct {
|
||||
"{s}",
|
||||
.{std.fmt.fmtSliceHexLower(&file.bin_digest)},
|
||||
) catch unreachable;
|
||||
try writer.print("{d} {d} {d} {s} {s}\n", .{
|
||||
try writer.print("{d} {d} {d} {s} {d} {s}\n", .{
|
||||
file.stat.size,
|
||||
file.stat.inode,
|
||||
file.stat.mtime,
|
||||
&encoded_digest,
|
||||
file.path.?,
|
||||
file.prefixed_path.?.prefix,
|
||||
file.prefixed_path.?.sub_path,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -889,6 +982,7 @@ test "cache file and then recall it" {
|
||||
.gpa = testing.allocator,
|
||||
.manifest_dir = try cwd.makeOpenPath(temp_manifest_dir, .{}),
|
||||
};
|
||||
cache.addPrefix(.{ .path = null, .handle = fs.cwd() });
|
||||
defer cache.manifest_dir.close();
|
||||
|
||||
{
|
||||
@@ -960,6 +1054,7 @@ test "check that changing a file makes cache fail" {
|
||||
.gpa = testing.allocator,
|
||||
.manifest_dir = try cwd.makeOpenPath(temp_manifest_dir, .{}),
|
||||
};
|
||||
cache.addPrefix(.{ .path = null, .handle = fs.cwd() });
|
||||
defer cache.manifest_dir.close();
|
||||
|
||||
{
|
||||
@@ -1022,6 +1117,7 @@ test "no file inputs" {
|
||||
.gpa = testing.allocator,
|
||||
.manifest_dir = try cwd.makeOpenPath(temp_manifest_dir, .{}),
|
||||
};
|
||||
cache.addPrefix(.{ .path = null, .handle = fs.cwd() });
|
||||
defer cache.manifest_dir.close();
|
||||
|
||||
{
|
||||
@@ -1080,6 +1176,7 @@ test "Manifest with files added after initial hash work" {
|
||||
.gpa = testing.allocator,
|
||||
.manifest_dir = try cwd.makeOpenPath(temp_manifest_dir, .{}),
|
||||
};
|
||||
cache.addPrefix(.{ .path = null, .handle = fs.cwd() });
|
||||
defer cache.manifest_dir.close();
|
||||
|
||||
{
|
||||
|
||||
@@ -1456,23 +1456,27 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
|
||||
else => @as(u8, 3),
|
||||
};
|
||||
|
||||
// We put everything into the cache hash that *cannot be modified during an incremental update*.
|
||||
// For example, one cannot change the target between updates, but one can change source files,
|
||||
// so the target goes into the cache hash, but source files do not. This is so that we can
|
||||
// find the same binary and incrementally update it even if there are modified source files.
|
||||
// We do this even if outputting to the current directory because we need somewhere to store
|
||||
// incremental compilation metadata.
|
||||
// We put everything into the cache hash that *cannot be modified
|
||||
// during an incremental update*. For example, one cannot change the
|
||||
// target between updates, but one can change source files, so the
|
||||
// target goes into the cache hash, but source files do not. This is so
|
||||
// that we can find the same binary and incrementally update it even if
|
||||
// there are modified source files. We do this even if outputting to
|
||||
// the current directory because we need somewhere to store incremental
|
||||
// compilation metadata.
|
||||
const cache = try arena.create(Cache);
|
||||
cache.* = .{
|
||||
.gpa = gpa,
|
||||
.manifest_dir = try options.local_cache_directory.handle.makeOpenPath("h", .{}),
|
||||
};
|
||||
cache.addPrefix(.{ .path = null, .handle = fs.cwd() });
|
||||
cache.addPrefix(options.zig_lib_directory);
|
||||
cache.addPrefix(options.local_cache_directory);
|
||||
errdefer cache.manifest_dir.close();
|
||||
|
||||
// This is shared hasher state common to zig source and all C source files.
|
||||
cache.hash.addBytes(build_options.version);
|
||||
cache.hash.add(builtin.zig_backend);
|
||||
cache.hash.addBytes(options.zig_lib_directory.path orelse ".");
|
||||
cache.hash.add(options.optimize_mode);
|
||||
cache.hash.add(options.target.cpu.arch);
|
||||
cache.hash.addBytes(options.target.cpu.model.name);
|
||||
@@ -2265,8 +2269,9 @@ pub fn update(comp: *Compilation) !void {
|
||||
const is_hit = man.hit() catch |err| {
|
||||
// TODO properly bubble these up instead of emitting a warning
|
||||
const i = man.failed_file_index orelse return err;
|
||||
const file_path = man.files.items[i].path orelse return err;
|
||||
std.log.warn("{s}: {s}", .{ @errorName(err), file_path });
|
||||
const pp = man.files.items[i].prefixed_path orelse return err;
|
||||
const prefix = man.cache.prefixes()[pp.prefix].path orelse "";
|
||||
std.log.warn("{s}: {s}{s}", .{ @errorName(err), prefix, pp.sub_path });
|
||||
return err;
|
||||
};
|
||||
if (is_hit) {
|
||||
|
||||
@@ -653,6 +653,9 @@ pub fn buildSharedObjects(comp: *Compilation) !void {
|
||||
.gpa = comp.gpa,
|
||||
.manifest_dir = try comp.global_cache_directory.handle.makeOpenPath("h", .{}),
|
||||
};
|
||||
cache.addPrefix(.{ .path = null, .handle = fs.cwd() });
|
||||
cache.addPrefix(comp.zig_lib_directory);
|
||||
cache.addPrefix(comp.global_cache_directory);
|
||||
defer cache.manifest_dir.close();
|
||||
|
||||
var man = cache.obtain();
|
||||
|
||||
@@ -302,6 +302,10 @@ pub fn buildImportLib(comp: *Compilation, lib_name: []const u8) !void {
|
||||
.gpa = comp.gpa,
|
||||
.manifest_dir = comp.cache_parent.manifest_dir,
|
||||
};
|
||||
for (comp.cache_parent.prefixes()) |prefix| {
|
||||
cache.addPrefix(prefix);
|
||||
}
|
||||
|
||||
cache.hash.addBytes(build_options.version);
|
||||
cache.hash.addOptionalBytes(comp.zig_lib_directory.path);
|
||||
cache.hash.add(target.cpu.arch);
|
||||
|
||||
Reference in New Issue
Block a user