zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

blob d2e4e28a (12431B) - Raw


      1 const Blake3 = @import("crypto.zig").Blake3;
      2 const fs = @import("fs.zig");
      3 const File = fs.File;
      4 const base64 = @import("base64.zig");
      5 const ArrayList = @import("array_list.zig").ArrayList;
      6 const debug = @import("debug.zig");
      7 const testing = @import("testing.zig");
      8 const mem = @import("mem.zig");
      9 const fmt = @import("fmt.zig");
     10 const Allocator = mem.Allocator;
     11 const Buffer = @import("buffer.zig").Buffer;
     12 const os = @import("os.zig");
     13 
     14 const base64_encoder = fs.base64_encoder;
     15 const base64_decoder = fs.base64_decoder;
     16 const BIN_DIGEST_LEN = 32;
     17 
     18 pub const CacheHashFile = struct {
     19     path: ?[]const u8,
     20     stat: fs.File.Stat,
     21     file_handle: os.fd_t,
     22     bin_digest: [BIN_DIGEST_LEN]u8,
     23     contents: ?[]const u8,
     24 
     25     pub fn deinit(self: *@This(), alloc: *Allocator) void {
     26         if (self.path) |owned_slice| {
     27             alloc.free(owned_slice);
     28             self.path = null;
     29         }
     30         if (self.contents) |owned_slice| {
     31             alloc.free(owned_slice);
     32             self.contents = null;
     33         }
     34     }
     35 };
     36 
     37 pub const CacheHash = struct {
     38     alloc: *Allocator,
     39     blake3: Blake3,
     40     manifest_dir: []const u8,
     41     manifest_file_path: ?[]const u8,
     42     manifest_file: ?File,
     43     manifest_dirty: bool,
     44     force_check_manifest: bool,
     45     files: ArrayList(CacheHashFile),
     46     b64_digest: ArrayList(u8),
     47 
     48     pub fn init(alloc: *Allocator, manifest_dir_path: []const u8) !@This() {
     49         return CacheHash{
     50             .alloc = alloc,
     51             .blake3 = Blake3.init(),
     52             .manifest_dir = manifest_dir_path,
     53             .manifest_file_path = null,
     54             .manifest_file = null,
     55             .manifest_dirty = false,
     56             .force_check_manifest = false,
     57             .files = ArrayList(CacheHashFile).init(alloc),
     58             .b64_digest = ArrayList(u8).init(alloc),
     59         };
     60     }
     61 
     62     pub fn cache_buf(self: *@This(), val: []const u8) void {
     63         debug.assert(self.manifest_file_path == null);
     64 
     65         self.blake3.update(val);
     66         self.blake3.update(&[_]u8{0});
     67     }
     68 
     69     pub fn cache(self: *@This(), val: var) void {
     70         debug.assert(self.manifest_file_path == null);
     71 
     72         const val_type = @TypeOf(val);
     73         switch (@typeInfo(val_type)) {
     74             .Int => |int_info| if (int_info.bits != 0 and int_info.bits % 8 == 0) {
     75                 const buf_len = @divExact(int_info.bits, 8);
     76                 var buf: [buf_len]u8 = undefined;
     77                 mem.writeIntNative(val_type, &buf, val);
     78                 self.cache_buf(&buf);
     79             } else {
     80                 @compileError("Unsupported integer size. Please use a multiple of 8, manually convert to a u8 slice.");
     81             },
     82             .Bool => {
     83                 var buf: [1]u8 = undefined;
     84                 buf[0] = if (val) 1 else 0;
     85                 self.blake3.update(&buf);
     86             },
     87             else => @compileError("Unsupported type"),
     88         }
     89     }
     90 
     91     pub fn cache_file(self: *@This(), file_path: []const u8) !void {
     92         debug.assert(self.manifest_file_path == null);
     93 
     94         var cache_hash_file = try self.files.addOne();
     95         cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path});
     96 
     97         self.cache_buf(cache_hash_file.path.?);
     98     }
     99 
    100     pub fn hit(self: *@This(), out_digest: *ArrayList(u8)) !bool {
    101         debug.assert(self.manifest_file_path == null);
    102 
    103         var bin_digest: [BIN_DIGEST_LEN]u8 = undefined;
    104         self.blake3.final(&bin_digest);
    105 
    106         const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN);
    107         try self.b64_digest.resize(OUT_DIGEST_LEN);
    108         base64_encoder.encode(self.b64_digest.toSlice(), &bin_digest);
    109 
    110         if (self.files.toSlice().len == 0 and !self.force_check_manifest) {
    111             try out_digest.resize(OUT_DIGEST_LEN);
    112             mem.copy(u8, out_digest.toSlice(), self.b64_digest.toSlice());
    113             return true;
    114         }
    115 
    116         self.blake3 = Blake3.init();
    117         self.blake3.update(&bin_digest);
    118 
    119         {
    120             const manifest_file_path_slice = try fs.path.join(self.alloc, &[_][]const u8{ self.manifest_dir, self.b64_digest.toSlice() });
    121             var path_buf = ArrayList(u8).fromOwnedSlice(self.alloc, manifest_file_path_slice);
    122             defer path_buf.deinit();
    123             try path_buf.appendSlice(".txt");
    124 
    125             self.manifest_file_path = path_buf.toOwnedSlice();
    126         }
    127 
    128         const cwd = fs.cwd();
    129 
    130         try cwd.makePath(self.manifest_dir);
    131 
    132         // TODO: Open file with a file lock
    133         self.manifest_file = try cwd.createFile(self.manifest_file_path.?, .{ .read = true, .truncate = false });
    134 
    135         // create a buffer instead of using readAllAlloc
    136         // See: https://github.com/ziglang/zig/issues/4656
    137         var file_buffer = try Buffer.initCapacity(self.alloc, 16 * 1024);
    138         defer file_buffer.deinit();
    139 
    140         // TODO: Figure out a good max value?
    141         try self.manifest_file.?.inStream().stream.readAllBuffer(&file_buffer, 16 * 1024);
    142         const file_contents = file_buffer.toSliceConst();
    143 
    144         const input_file_count = self.files.len;
    145         var any_file_changed = false;
    146         var line_iter = mem.tokenize(file_contents, "\n");
    147         var idx: usize = 0;
    148         while (line_iter.next()) |line| {
    149             defer idx += 1;
    150 
    151             var cache_hash_file: *CacheHashFile = undefined;
    152             if (idx < input_file_count) {
    153                 cache_hash_file = self.files.ptrAt(idx);
    154             } else {
    155                 cache_hash_file = try self.files.addOne();
    156                 cache_hash_file.path = null;
    157             }
    158 
    159             var iter = mem.tokenize(line, " ");
    160             const file_handle_str = iter.next() orelse return error.InvalidFormat;
    161             const mtime_nsec_str = iter.next() orelse return error.InvalidFormat;
    162             const digest_str = iter.next() orelse return error.InvalidFormat;
    163             const file_path = iter.rest();
    164 
    165             cache_hash_file.file_handle = fmt.parseInt(os.fd_t, file_handle_str, 10) catch return error.InvalidFormat;
    166             cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;
    167             base64_decoder.decode(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat;
    168 
    169             if (file_path.len == 0) {
    170                 return error.InvalidFormat;
    171             }
    172             if (cache_hash_file.path != null and !mem.eql(u8, file_path, cache_hash_file.path.?)) {
    173                 return error.InvalidFormat;
    174             }
    175 
    176             const this_file = cwd.openFile(cache_hash_file.path.?, .{ .read = true }) catch {
    177                 self.manifest_file.?.close();
    178                 self.manifest_file = null;
    179                 return error.CacheUnavailable;
    180             };
    181             defer this_file.close();
    182             cache_hash_file.stat = try this_file.stat();
    183             // TODO: check mtime
    184             if (false) {} else {
    185                 self.manifest_dirty = true;
    186 
    187                 // TODO: check for problematic timestamp
    188 
    189                 var actual_digest: [32]u8 = undefined;
    190                 try hash_file(self.alloc, &actual_digest, &this_file);
    191 
    192                 if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) {
    193                     mem.copy(u8, &cache_hash_file.bin_digest, &actual_digest);
    194                     // keep going until we have the input file digests
    195                     any_file_changed = true;
    196                 }
    197             }
    198 
    199             if (!any_file_changed) {
    200                 self.blake3.update(&cache_hash_file.bin_digest);
    201             }
    202         }
    203 
    204         if (any_file_changed) {
    205             // cache miss
    206             // keep the manifest file open (TODO: with rw lock)
    207             // reset the hash
    208             self.blake3 = Blake3.init();
    209             self.blake3.update(&bin_digest);
    210             try self.files.resize(input_file_count);
    211             for (self.files.toSlice()) |file| {
    212                 self.blake3.update(&file.bin_digest);
    213             }
    214             return false;
    215         }
    216 
    217         if (idx < input_file_count or idx == 0) {
    218             self.manifest_dirty = true;
    219             while (idx < input_file_count) : (idx += 1) {
    220                 var cache_hash_file = self.files.ptrAt(idx);
    221                 self.populate_file_hash(cache_hash_file) catch |err| {
    222                     self.manifest_file.?.close();
    223                     self.manifest_file = null;
    224                     return error.CacheUnavailable;
    225                 };
    226             }
    227             return false;
    228         }
    229 
    230         try self.final(out_digest);
    231         return true;
    232     }
    233 
    234     pub fn populate_file_hash(self: *@This(), cache_hash_file: *CacheHashFile) !void {
    235         debug.assert(cache_hash_file.path != null);
    236 
    237         const this_file = try fs.cwd().openFile(cache_hash_file.path.?, .{});
    238         defer this_file.close();
    239 
    240         cache_hash_file.stat = try this_file.stat();
    241 
    242         // TODO: check for problematic timestamp
    243 
    244         try hash_file(self.alloc, &cache_hash_file.bin_digest, &this_file);
    245         self.blake3.update(&cache_hash_file.bin_digest);
    246     }
    247 
    248     pub fn final(self: *@This(), out_digest: *ArrayList(u8)) !void {
    249         debug.assert(self.manifest_file_path != null);
    250 
    251         var bin_digest: [BIN_DIGEST_LEN]u8 = undefined;
    252         self.blake3.final(&bin_digest);
    253 
    254         const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN);
    255         try out_digest.resize(OUT_DIGEST_LEN);
    256         base64_encoder.encode(out_digest.toSlice(), &bin_digest);
    257     }
    258 
    259     pub fn write_manifest(self: *@This()) !void {
    260         debug.assert(self.manifest_file_path != null);
    261 
    262         const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN);
    263         var encoded_digest = try Buffer.initSize(self.alloc, OUT_DIGEST_LEN);
    264         defer encoded_digest.deinit();
    265         var contents = try Buffer.init(self.alloc, "");
    266         defer contents.deinit();
    267 
    268         for (self.files.toSlice()) |file| {
    269             base64_encoder.encode(encoded_digest.toSlice(), &file.bin_digest);
    270             try contents.print("{} {} {} {}\n", .{ file.file_handle, file.stat.mtime, encoded_digest.toSlice(), file.path });
    271         }
    272 
    273         try self.manifest_file.?.seekTo(0);
    274         try self.manifest_file.?.writeAll(contents.toSlice());
    275     }
    276 
    277     pub fn release(self: *@This()) void {
    278         debug.assert(self.manifest_file_path != null);
    279 
    280         if (self.manifest_dirty) {
    281             self.write_manifest() catch |err| {
    282                 debug.warn("Unable to write cache file '{}': {}\n", .{ self.manifest_file_path, err });
    283             };
    284         }
    285 
    286         self.manifest_file.?.close();
    287         if (self.manifest_file_path) |owned_slice| {
    288             self.alloc.free(owned_slice);
    289         }
    290         for (self.files.toSlice()) |*file| {
    291             file.deinit(self.alloc);
    292         }
    293         self.files.deinit();
    294         self.b64_digest.deinit();
    295     }
    296 };
    297 
    298 fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const File) !void {
    299     var blake3 = Blake3.init();
    300     var in_stream = handle.inStream().stream;
    301 
    302     const contents = try handle.inStream().stream.readAllAlloc(alloc, 64 * 1024);
    303     defer alloc.free(contents);
    304 
    305     blake3.update(contents);
    306 
    307     blake3.final(bin_digest);
    308 }
    309 
    310 test "cache file and the recall it" {
    311     const cwd = fs.cwd();
    312 
    313     const temp_manifest_dir = "temp_manifest_dir";
    314 
    315     try cwd.writeFile("test.txt", "Hello, world!\n");
    316 
    317     var digest1 = try ArrayList(u8).initCapacity(testing.allocator, 32);
    318     defer digest1.deinit();
    319     var digest2 = try ArrayList(u8).initCapacity(testing.allocator, 32);
    320     defer digest2.deinit();
    321 
    322     {
    323         var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
    324         defer ch.release();
    325 
    326         ch.cache(true);
    327         ch.cache(@as(u16, 1234));
    328         ch.cache_buf("1234");
    329         try ch.cache_file("test.txt");
    330 
    331         // There should be nothing in the cache
    332         debug.assert((try ch.hit(&digest1)) == false);
    333 
    334         try ch.final(&digest1);
    335     }
    336     {
    337         var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
    338         defer ch.release();
    339 
    340         ch.cache(true);
    341         ch.cache(@as(u16, 1234));
    342         ch.cache_buf("1234");
    343         try ch.cache_file("test.txt");
    344 
    345         // Cache hit! We just "built" the same file
    346         debug.assert((try ch.hit(&digest2)) == true);
    347     }
    348 
    349     debug.assert(mem.eql(u8, digest1.toSlice(), digest2.toSlice()));
    350 
    351     try cwd.deleteTree(temp_manifest_dir);
    352 }