blob d2e4e28a (12431B) - Raw
1 const Blake3 = @import("crypto.zig").Blake3; 2 const fs = @import("fs.zig"); 3 const File = fs.File; 4 const base64 = @import("base64.zig"); 5 const ArrayList = @import("array_list.zig").ArrayList; 6 const debug = @import("debug.zig"); 7 const testing = @import("testing.zig"); 8 const mem = @import("mem.zig"); 9 const fmt = @import("fmt.zig"); 10 const Allocator = mem.Allocator; 11 const Buffer = @import("buffer.zig").Buffer; 12 const os = @import("os.zig"); 13 14 const base64_encoder = fs.base64_encoder; 15 const base64_decoder = fs.base64_decoder; 16 const BIN_DIGEST_LEN = 32; 17 18 pub const CacheHashFile = struct { 19 path: ?[]const u8, 20 stat: fs.File.Stat, 21 file_handle: os.fd_t, 22 bin_digest: [BIN_DIGEST_LEN]u8, 23 contents: ?[]const u8, 24 25 pub fn deinit(self: *@This(), alloc: *Allocator) void { 26 if (self.path) |owned_slice| { 27 alloc.free(owned_slice); 28 self.path = null; 29 } 30 if (self.contents) |owned_slice| { 31 alloc.free(owned_slice); 32 self.contents = null; 33 } 34 } 35 }; 36 37 pub const CacheHash = struct { 38 alloc: *Allocator, 39 blake3: Blake3, 40 manifest_dir: []const u8, 41 manifest_file_path: ?[]const u8, 42 manifest_file: ?File, 43 manifest_dirty: bool, 44 force_check_manifest: bool, 45 files: ArrayList(CacheHashFile), 46 b64_digest: ArrayList(u8), 47 48 pub fn init(alloc: *Allocator, manifest_dir_path: []const u8) !@This() { 49 return CacheHash{ 50 .alloc = alloc, 51 .blake3 = Blake3.init(), 52 .manifest_dir = manifest_dir_path, 53 .manifest_file_path = null, 54 .manifest_file = null, 55 .manifest_dirty = false, 56 .force_check_manifest = false, 57 .files = ArrayList(CacheHashFile).init(alloc), 58 .b64_digest = ArrayList(u8).init(alloc), 59 }; 60 } 61 62 pub fn cache_buf(self: *@This(), val: []const u8) void { 63 debug.assert(self.manifest_file_path == null); 64 65 self.blake3.update(val); 66 self.blake3.update(&[_]u8{0}); 67 } 68 69 pub fn cache(self: *@This(), val: var) void { 70 debug.assert(self.manifest_file_path == null); 71 72 const val_type = @TypeOf(val); 73 switch (@typeInfo(val_type)) { 74 .Int => |int_info| if (int_info.bits != 0 and int_info.bits % 8 == 0) { 75 const buf_len = @divExact(int_info.bits, 8); 76 var buf: [buf_len]u8 = undefined; 77 mem.writeIntNative(val_type, &buf, val); 78 self.cache_buf(&buf); 79 } else { 80 @compileError("Unsupported integer size. Please use a multiple of 8, manually convert to a u8 slice."); 81 }, 82 .Bool => { 83 var buf: [1]u8 = undefined; 84 buf[0] = if (val) 1 else 0; 85 self.blake3.update(&buf); 86 }, 87 else => @compileError("Unsupported type"), 88 } 89 } 90 91 pub fn cache_file(self: *@This(), file_path: []const u8) !void { 92 debug.assert(self.manifest_file_path == null); 93 94 var cache_hash_file = try self.files.addOne(); 95 cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); 96 97 self.cache_buf(cache_hash_file.path.?); 98 } 99 100 pub fn hit(self: *@This(), out_digest: *ArrayList(u8)) !bool { 101 debug.assert(self.manifest_file_path == null); 102 103 var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; 104 self.blake3.final(&bin_digest); 105 106 const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); 107 try self.b64_digest.resize(OUT_DIGEST_LEN); 108 base64_encoder.encode(self.b64_digest.toSlice(), &bin_digest); 109 110 if (self.files.toSlice().len == 0 and !self.force_check_manifest) { 111 try out_digest.resize(OUT_DIGEST_LEN); 112 mem.copy(u8, out_digest.toSlice(), self.b64_digest.toSlice()); 113 return true; 114 } 115 116 self.blake3 = Blake3.init(); 117 self.blake3.update(&bin_digest); 118 119 { 120 const manifest_file_path_slice = try fs.path.join(self.alloc, &[_][]const u8{ self.manifest_dir, self.b64_digest.toSlice() }); 121 var path_buf = ArrayList(u8).fromOwnedSlice(self.alloc, manifest_file_path_slice); 122 defer path_buf.deinit(); 123 try path_buf.appendSlice(".txt"); 124 125 self.manifest_file_path = path_buf.toOwnedSlice(); 126 } 127 128 const cwd = fs.cwd(); 129 130 try cwd.makePath(self.manifest_dir); 131 132 // TODO: Open file with a file lock 133 self.manifest_file = try cwd.createFile(self.manifest_file_path.?, .{ .read = true, .truncate = false }); 134 135 // create a buffer instead of using readAllAlloc 136 // See: https://github.com/ziglang/zig/issues/4656 137 var file_buffer = try Buffer.initCapacity(self.alloc, 16 * 1024); 138 defer file_buffer.deinit(); 139 140 // TODO: Figure out a good max value? 141 try self.manifest_file.?.inStream().stream.readAllBuffer(&file_buffer, 16 * 1024); 142 const file_contents = file_buffer.toSliceConst(); 143 144 const input_file_count = self.files.len; 145 var any_file_changed = false; 146 var line_iter = mem.tokenize(file_contents, "\n"); 147 var idx: usize = 0; 148 while (line_iter.next()) |line| { 149 defer idx += 1; 150 151 var cache_hash_file: *CacheHashFile = undefined; 152 if (idx < input_file_count) { 153 cache_hash_file = self.files.ptrAt(idx); 154 } else { 155 cache_hash_file = try self.files.addOne(); 156 cache_hash_file.path = null; 157 } 158 159 var iter = mem.tokenize(line, " "); 160 const file_handle_str = iter.next() orelse return error.InvalidFormat; 161 const mtime_nsec_str = iter.next() orelse return error.InvalidFormat; 162 const digest_str = iter.next() orelse return error.InvalidFormat; 163 const file_path = iter.rest(); 164 165 cache_hash_file.file_handle = fmt.parseInt(os.fd_t, file_handle_str, 10) catch return error.InvalidFormat; 166 cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; 167 base64_decoder.decode(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat; 168 169 if (file_path.len == 0) { 170 return error.InvalidFormat; 171 } 172 if (cache_hash_file.path != null and !mem.eql(u8, file_path, cache_hash_file.path.?)) { 173 return error.InvalidFormat; 174 } 175 176 const this_file = cwd.openFile(cache_hash_file.path.?, .{ .read = true }) catch { 177 self.manifest_file.?.close(); 178 self.manifest_file = null; 179 return error.CacheUnavailable; 180 }; 181 defer this_file.close(); 182 cache_hash_file.stat = try this_file.stat(); 183 // TODO: check mtime 184 if (false) {} else { 185 self.manifest_dirty = true; 186 187 // TODO: check for problematic timestamp 188 189 var actual_digest: [32]u8 = undefined; 190 try hash_file(self.alloc, &actual_digest, &this_file); 191 192 if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) { 193 mem.copy(u8, &cache_hash_file.bin_digest, &actual_digest); 194 // keep going until we have the input file digests 195 any_file_changed = true; 196 } 197 } 198 199 if (!any_file_changed) { 200 self.blake3.update(&cache_hash_file.bin_digest); 201 } 202 } 203 204 if (any_file_changed) { 205 // cache miss 206 // keep the manifest file open (TODO: with rw lock) 207 // reset the hash 208 self.blake3 = Blake3.init(); 209 self.blake3.update(&bin_digest); 210 try self.files.resize(input_file_count); 211 for (self.files.toSlice()) |file| { 212 self.blake3.update(&file.bin_digest); 213 } 214 return false; 215 } 216 217 if (idx < input_file_count or idx == 0) { 218 self.manifest_dirty = true; 219 while (idx < input_file_count) : (idx += 1) { 220 var cache_hash_file = self.files.ptrAt(idx); 221 self.populate_file_hash(cache_hash_file) catch |err| { 222 self.manifest_file.?.close(); 223 self.manifest_file = null; 224 return error.CacheUnavailable; 225 }; 226 } 227 return false; 228 } 229 230 try self.final(out_digest); 231 return true; 232 } 233 234 pub fn populate_file_hash(self: *@This(), cache_hash_file: *CacheHashFile) !void { 235 debug.assert(cache_hash_file.path != null); 236 237 const this_file = try fs.cwd().openFile(cache_hash_file.path.?, .{}); 238 defer this_file.close(); 239 240 cache_hash_file.stat = try this_file.stat(); 241 242 // TODO: check for problematic timestamp 243 244 try hash_file(self.alloc, &cache_hash_file.bin_digest, &this_file); 245 self.blake3.update(&cache_hash_file.bin_digest); 246 } 247 248 pub fn final(self: *@This(), out_digest: *ArrayList(u8)) !void { 249 debug.assert(self.manifest_file_path != null); 250 251 var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; 252 self.blake3.final(&bin_digest); 253 254 const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); 255 try out_digest.resize(OUT_DIGEST_LEN); 256 base64_encoder.encode(out_digest.toSlice(), &bin_digest); 257 } 258 259 pub fn write_manifest(self: *@This()) !void { 260 debug.assert(self.manifest_file_path != null); 261 262 const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); 263 var encoded_digest = try Buffer.initSize(self.alloc, OUT_DIGEST_LEN); 264 defer encoded_digest.deinit(); 265 var contents = try Buffer.init(self.alloc, ""); 266 defer contents.deinit(); 267 268 for (self.files.toSlice()) |file| { 269 base64_encoder.encode(encoded_digest.toSlice(), &file.bin_digest); 270 try contents.print("{} {} {} {}\n", .{ file.file_handle, file.stat.mtime, encoded_digest.toSlice(), file.path }); 271 } 272 273 try self.manifest_file.?.seekTo(0); 274 try self.manifest_file.?.writeAll(contents.toSlice()); 275 } 276 277 pub fn release(self: *@This()) void { 278 debug.assert(self.manifest_file_path != null); 279 280 if (self.manifest_dirty) { 281 self.write_manifest() catch |err| { 282 debug.warn("Unable to write cache file '{}': {}\n", .{ self.manifest_file_path, err }); 283 }; 284 } 285 286 self.manifest_file.?.close(); 287 if (self.manifest_file_path) |owned_slice| { 288 self.alloc.free(owned_slice); 289 } 290 for (self.files.toSlice()) |*file| { 291 file.deinit(self.alloc); 292 } 293 self.files.deinit(); 294 self.b64_digest.deinit(); 295 } 296 }; 297 298 fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const File) !void { 299 var blake3 = Blake3.init(); 300 var in_stream = handle.inStream().stream; 301 302 const contents = try handle.inStream().stream.readAllAlloc(alloc, 64 * 1024); 303 defer alloc.free(contents); 304 305 blake3.update(contents); 306 307 blake3.final(bin_digest); 308 } 309 310 test "cache file and the recall it" { 311 const cwd = fs.cwd(); 312 313 const temp_manifest_dir = "temp_manifest_dir"; 314 315 try cwd.writeFile("test.txt", "Hello, world!\n"); 316 317 var digest1 = try ArrayList(u8).initCapacity(testing.allocator, 32); 318 defer digest1.deinit(); 319 var digest2 = try ArrayList(u8).initCapacity(testing.allocator, 32); 320 defer digest2.deinit(); 321 322 { 323 var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); 324 defer ch.release(); 325 326 ch.cache(true); 327 ch.cache(@as(u16, 1234)); 328 ch.cache_buf("1234"); 329 try ch.cache_file("test.txt"); 330 331 // There should be nothing in the cache 332 debug.assert((try ch.hit(&digest1)) == false); 333 334 try ch.final(&digest1); 335 } 336 { 337 var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); 338 defer ch.release(); 339 340 ch.cache(true); 341 ch.cache(@as(u16, 1234)); 342 ch.cache_buf("1234"); 343 try ch.cache_file("test.txt"); 344 345 // Cache hit! We just "built" the same file 346 debug.assert((try ch.hit(&digest2)) == true); 347 } 348 349 debug.assert(mem.eql(u8, digest1.toSlice(), digest2.toSlice())); 350 351 try cwd.deleteTree(temp_manifest_dir); 352 }