diff --git a/build.zig.zon b/build.zig.zon index 3b0cd4a11d..4f4b948217 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -1,7 +1,7 @@ // The Zig compiler is not intended to be consumed as a package. // The sole purpose of this manifest file is to test the compiler. .{ - .name = "zig", + .name = .zig, .version = "0.0.0", .dependencies = .{ .standalone_test_cases = .{ @@ -12,4 +12,5 @@ }, }, .paths = .{""}, + .fingerprint = 0xc1ce108124179e16, } diff --git a/doc/build.zig.zon.md b/doc/build.zig.zon.md index dc3ac749a9..690905a400 100644 --- a/doc/build.zig.zon.md +++ b/doc/build.zig.zon.md @@ -10,7 +10,7 @@ build.zig. ### `name` -String. Required. +Enum literal. Required. This is the default name used by packages depending on this one. For example, when a user runs `zig fetch --save `, this field is used as the key in the @@ -20,12 +20,42 @@ will stick with this provided value. It is redundant to include "zig" in this name because it is already within the Zig package namespace. +Must be a valid bare Zig identifier (don't `@` me), limited to 32 bytes. + +Together with `fingerprint`, this represents a globally unique package identifier. + +### `fingerprint` + +Together with `name`, this represents a globally unique package identifier. This +field is auto-initialized by the toolchain when the package is first created, +and then *never changes*. This allows Zig to unambiguously detect when one +package is an updated version of another. + +When forking a Zig project, this fingerprint should be regenerated if the upstream +project is still maintained. Otherwise, the fork is *hostile*, attempting to +take control over the original project's identity. The fingerprint can be regenerated +by deleting the field and running `zig build`. + +This 64-bit integer is the combination of a 32-bit id component and a 32-bit +checksum. + +The id component within the fingerprint has these restrictions: + +`0x00000000` is reserved for legacy packages. + +`0xffffffff` is reserved to represent "naked" packages. + +The checksum is computed from `name` and serves to protect Zig users from +accidental id collisions. + ### `version` String. Required. [semver](https://semver.org/) +Limited to 32 bytes. + ### `minimum_zig_version` String. Optional. diff --git a/lib/init/build.zig b/lib/init/build.zig index 9be615ac31..4db31713e4 100644 --- a/lib/init/build.zig +++ b/lib/init/build.zig @@ -42,14 +42,14 @@ pub fn build(b: *std.Build) void { // Modules can depend on one another using the `std.Build.Module.addImport` function. // This is what allows Zig source code to use `@import("foo")` where 'foo' is not a // file path. In this case, we set up `exe_mod` to import `lib_mod`. - exe_mod.addImport("$_lib", lib_mod); + exe_mod.addImport(".NAME_lib", lib_mod); // Now, we will create a static library based on the module we created above. // This creates a `std.Build.Step.Compile`, which is the build step responsible // for actually invoking the compiler. const lib = b.addLibrary(.{ .linkage = .static, - .name = "$", + .name = ".NAME", .root_module = lib_mod, }); @@ -61,7 +61,7 @@ pub fn build(b: *std.Build) void { // This creates another `std.Build.Step.Compile`, but this one builds an executable // rather than a static library. const exe = b.addExecutable(.{ - .name = "$", + .name = ".NAME", .root_module = exe_mod, }); diff --git a/lib/init/build.zig.zon b/lib/init/build.zig.zon index cb7229042f..1dbe7519e7 100644 --- a/lib/init/build.zig.zon +++ b/lib/init/build.zig.zon @@ -6,12 +6,30 @@ // // It is redundant to include "zig" in this name because it is already // within the Zig package namespace. - .name = "$", + .name = .LITNAME, // This is a [Semantic Version](https://semver.org/). // In a future version of Zig it will be used for package deduplication. .version = "0.0.0", + // Together with name, this represents a globally unique package + // identifier. This field is generated by the Zig toolchain when the + // package is first created, and then *never changes*. This allows + // unambiguous detection of one package being an updated version of + // another. + // + // When forking a Zig project, this id should be regenerated (delete the + // field and run `zig build`) if the upstream project is still maintained. + // Otherwise, the fork is *hostile*, attempting to take control over the + // original project's identity. Thus it is recommended to leave the comment + // on the following line intact, so that it shows up in code reviews that + // modify the field. + .fingerprint = .FINGERPRINT, // Changing this has security and trust implications. + + // Tracks the earliest Zig version that the package considers to be a + // supported use case. + .minimum_zig_version = ".ZIGVER", + // This field is optional. // This is currently advisory only; Zig does not yet do anything // with this value. diff --git a/lib/init/src/main.zig b/lib/init/src/main.zig index 66d5648c1e..85d8c93551 100644 --- a/lib/init/src/main.zig +++ b/lib/init/src/main.zig @@ -43,4 +43,4 @@ test "fuzz example" { const std = @import("std"); /// This imports the separate module containing `root.zig`. Take a look in `build.zig` for details. -const lib = @import("$_lib"); +const lib = @import(".NAME_lib"); diff --git a/lib/std/array_list.zig b/lib/std/array_list.zig index 1b2bbcb919..7849e98b42 100644 --- a/lib/std/array_list.zig +++ b/lib/std/array_list.zig @@ -2250,10 +2250,3 @@ test "return OutOfMemory when capacity would exceed maximum usize integer value" try testing.expectError(error.OutOfMemory, list.ensureUnusedCapacity(2)); } } - -test "ArrayListAligned with non-native alignment compiles unusedCapabitySlice" { - var list = ArrayListAligned(u8, 4).init(testing.allocator); - defer list.deinit(); - try list.appendNTimes(1, 4); - _ = list.unusedCapacitySlice(); -} diff --git a/src/Package.zig b/src/Package.zig index 61f90727f3..7f231f5ad7 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -1,8 +1,200 @@ +const std = @import("std"); +const assert = std.debug.assert; + pub const Module = @import("Package/Module.zig"); pub const Fetch = @import("Package/Fetch.zig"); pub const build_zig_basename = "build.zig"; pub const Manifest = @import("Package/Manifest.zig"); +pub const multihash_len = 1 + 1 + Hash.Algo.digest_length; +pub const multihash_hex_digest_len = 2 * multihash_len; +pub const MultiHashHexDigest = [multihash_hex_digest_len]u8; + +pub const Fingerprint = packed struct(u64) { + id: u32, + checksum: u32, + + pub fn generate(name: []const u8) Fingerprint { + return .{ + .id = std.crypto.random.intRangeLessThan(u32, 1, 0xffffffff), + .checksum = std.hash.Crc32.hash(name), + }; + } + + pub fn validate(n: Fingerprint, name: []const u8) bool { + switch (n.id) { + 0x00000000, 0xffffffff => return false, + else => return std.hash.Crc32.hash(name) == n.checksum, + } + } + + pub fn int(n: Fingerprint) u64 { + return @bitCast(n); + } +}; + +/// A user-readable, file system safe hash that identifies an exact package +/// snapshot, including file contents. +/// +/// The hash is not only to prevent collisions but must resist attacks where +/// the adversary fully controls the contents being hashed. Thus, it contains +/// a full SHA-256 digest. +/// +/// This data structure can be used to store the legacy hash format too. Legacy +/// hash format is scheduled to be removed after 0.14.0 is tagged. +/// +/// There's also a third way this structure is used. When using path rather than +/// hash, a unique hash is still needed, so one is computed based on the path. +pub const Hash = struct { + /// Maximum size of a package hash. Unused bytes at the end are + /// filled with zeroes. + bytes: [max_len]u8, + + pub const Algo = std.crypto.hash.sha2.Sha256; + pub const Digest = [Algo.digest_length]u8; + + /// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" + pub const max_len = 32 + 1 + 32 + 1 + (32 + 32 + 200) / 6; + + pub fn fromSlice(s: []const u8) Hash { + assert(s.len <= max_len); + var result: Hash = undefined; + @memcpy(result.bytes[0..s.len], s); + @memset(result.bytes[s.len..], 0); + return result; + } + + pub fn toSlice(ph: *const Hash) []const u8 { + var end: usize = ph.bytes.len; + while (true) { + end -= 1; + if (ph.bytes[end] != 0) return ph.bytes[0 .. end + 1]; + } + } + + pub fn eql(a: *const Hash, b: *const Hash) bool { + return std.mem.eql(u8, &a.bytes, &b.bytes); + } + + /// Distinguishes whether the legacy multihash format is being stored here. + pub fn isOld(h: *const Hash) bool { + if (h.bytes.len < 2) return false; + const their_multihash_func = std.fmt.parseInt(u8, h.bytes[0..2], 16) catch return false; + if (@as(MultihashFunction, @enumFromInt(their_multihash_func)) != multihash_function) return false; + if (h.toSlice().len != multihash_hex_digest_len) return false; + return std.mem.indexOfScalar(u8, &h.bytes, '-') == null; + } + + test isOld { + const h: Hash = .fromSlice("1220138f4aba0c01e66b68ed9e1e1e74614c06e4743d88bc58af4f1c3dd0aae5fea7"); + try std.testing.expect(h.isOld()); + } + + /// Produces "$name-$semver-$hashplus". + /// * name is the name field from build.zig.zon, asserted to be at most 32 + /// bytes and assumed be a valid zig identifier + /// * semver is the version field from build.zig.zon, asserted to be at + /// most 32 bytes + /// * hashplus is the following 33-byte array, base64 encoded using -_ to make + /// it filesystem safe: + /// - (4 bytes) LE u32 Package ID + /// - (4 bytes) LE u32 total decompressed size in bytes, overflow saturated + /// - (25 bytes) truncated SHA-256 digest of hashed files of the package + pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u32, size: u32) Hash { + assert(name.len <= 32); + assert(ver.len <= 32); + var result: Hash = undefined; + var buf: std.ArrayListUnmanaged(u8) = .initBuffer(&result.bytes); + buf.appendSliceAssumeCapacity(name); + buf.appendAssumeCapacity('-'); + buf.appendSliceAssumeCapacity(ver); + buf.appendAssumeCapacity('-'); + var hashplus: [33]u8 = undefined; + std.mem.writeInt(u32, hashplus[0..4], id, .little); + std.mem.writeInt(u32, hashplus[4..8], size, .little); + hashplus[8..].* = digest[0..25].*; + _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(44), &hashplus); + @memset(buf.unusedCapacitySlice(), 0); + return result; + } + + /// Produces a unique hash based on the path provided. The result should + /// not be user-visible. + pub fn initPath(sub_path: []const u8, is_global: bool) Hash { + var result: Hash = .{ .bytes = @splat(0) }; + var i: usize = 0; + if (is_global) { + result.bytes[0] = '/'; + i += 1; + } + if (i + sub_path.len <= result.bytes.len) { + @memcpy(result.bytes[i..][0..sub_path.len], sub_path); + return result; + } + var bin_digest: [Algo.digest_length]u8 = undefined; + Algo.hash(sub_path, &bin_digest, .{}); + _ = std.fmt.bufPrint(result.bytes[i..], "{}", .{std.fmt.fmtSliceHexLower(&bin_digest)}) catch unreachable; + return result; + } +}; + +pub const MultihashFunction = enum(u16) { + identity = 0x00, + sha1 = 0x11, + @"sha2-256" = 0x12, + @"sha2-512" = 0x13, + @"sha3-512" = 0x14, + @"sha3-384" = 0x15, + @"sha3-256" = 0x16, + @"sha3-224" = 0x17, + @"sha2-384" = 0x20, + @"sha2-256-trunc254-padded" = 0x1012, + @"sha2-224" = 0x1013, + @"sha2-512-224" = 0x1014, + @"sha2-512-256" = 0x1015, + @"blake2b-256" = 0xb220, + _, +}; + +pub const multihash_function: MultihashFunction = switch (Hash.Algo) { + std.crypto.hash.sha2.Sha256 => .@"sha2-256", + else => unreachable, +}; + +pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest { + const hex_charset = std.fmt.hex_charset; + + var result: MultiHashHexDigest = undefined; + + result[0] = hex_charset[@intFromEnum(multihash_function) >> 4]; + result[1] = hex_charset[@intFromEnum(multihash_function) & 15]; + + result[2] = hex_charset[Hash.Algo.digest_length >> 4]; + result[3] = hex_charset[Hash.Algo.digest_length & 15]; + + for (digest, 0..) |byte, i| { + result[4 + i * 2] = hex_charset[byte >> 4]; + result[5 + i * 2] = hex_charset[byte & 15]; + } + return result; +} + +comptime { + // We avoid unnecessary uleb128 code in hexDigest by asserting here the + // values are small enough to be contained in the one-byte encoding. + assert(@intFromEnum(multihash_function) < 127); + assert(Hash.Algo.digest_length < 127); +} + +test Hash { + const example_digest: Hash.Digest = .{ + 0xc7, 0xf5, 0x71, 0xb7, 0xb4, 0xe7, 0x6f, 0x3c, 0xdb, 0x87, 0x7a, 0x7f, 0xdd, 0xf9, 0x77, 0x87, + 0x9d, 0xd3, 0x86, 0xfa, 0x73, 0x57, 0x9a, 0xf7, 0x9d, 0x1e, 0xdb, 0x8f, 0x3a, 0xd9, 0xbd, 0x9f, + }; + const result: Hash = .init(example_digest, "nasm", "2.16.1-3", 0xcafebabe, 10 * 1024 * 1024); + try std.testing.expectEqualStrings("nasm-2.16.1-3-vrr-ygAAoADH9XG3tOdvPNuHen_d-XeHndOG-nNXmved", result.toSlice()); +} + test { _ = Fetch; } diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 0d6cf55636..efee6605ed 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -44,6 +44,8 @@ omit_missing_hash_error: bool, /// which specifies inclusion rules. This is intended to be true for the first /// fetch task and false for the recursive dependencies. allow_missing_paths_field: bool, +allow_missing_fingerprint: bool, +allow_name_string: bool, /// If true and URL points to a Git repository, will use the latest commit. use_latest_commit: bool, @@ -56,7 +58,7 @@ package_root: Cache.Path, error_bundle: ErrorBundle.Wip, manifest: ?Manifest, manifest_ast: std.zig.Ast, -actual_hash: Manifest.Digest, +computed_hash: ComputedHash, /// Fetch logic notices whether a package has a build.zig file and sets this flag. has_build_zig: bool, /// Indicates whether the task aborted due to an out-of-memory condition. @@ -116,8 +118,8 @@ pub const JobQueue = struct { /// as lazy. unlazy_set: UnlazySet = .{}, - pub const Table = std.AutoArrayHashMapUnmanaged(Manifest.MultiHashHexDigest, *Fetch); - pub const UnlazySet = std.AutoArrayHashMapUnmanaged(Manifest.MultiHashHexDigest, void); + pub const Table = std.AutoArrayHashMapUnmanaged(Package.Hash, *Fetch); + pub const UnlazySet = std.AutoArrayHashMapUnmanaged(Package.Hash, void); pub fn deinit(jq: *JobQueue) void { if (jq.all_fetches.items.len == 0) return; @@ -160,22 +162,24 @@ pub const JobQueue = struct { // Ensure the generated .zig file is deterministic. jq.table.sortUnstable(@as(struct { - keys: []const Manifest.MultiHashHexDigest, + keys: []const Package.Hash, pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool { - return std.mem.lessThan(u8, &ctx.keys[a_index], &ctx.keys[b_index]); + return std.mem.lessThan(u8, &ctx.keys[a_index].bytes, &ctx.keys[b_index].bytes); } }, .{ .keys = keys })); - for (keys, jq.table.values()) |hash, fetch| { + for (keys, jq.table.values()) |*hash, fetch| { if (fetch == jq.all_fetches.items[0]) { // The first one is a dummy package for the current project. continue; } + const hash_slice = hash.toSlice(); + try buf.writer().print( \\ pub const {} = struct {{ \\ - , .{std.zig.fmtId(&hash)}); + , .{std.zig.fmtId(hash_slice)}); lazy: { switch (fetch.lazy_status) { @@ -207,7 +211,7 @@ pub const JobQueue = struct { try buf.writer().print( \\ pub const build_zig = @import("{}"); \\ - , .{std.zig.fmtEscapes(&hash)}); + , .{std.zig.fmtEscapes(hash_slice)}); } if (fetch.manifest) |*manifest| { @@ -219,7 +223,7 @@ pub const JobQueue = struct { const h = depDigest(fetch.package_root, jq.global_cache, dep) orelse continue; try buf.writer().print( " .{{ \"{}\", \"{}\" }},\n", - .{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(&h) }, + .{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(h.toSlice()) }, ); } @@ -251,7 +255,7 @@ pub const JobQueue = struct { const h = depDigest(root_fetch.package_root, jq.global_cache, dep) orelse continue; try buf.writer().print( " .{{ \"{}\", \"{}\" }},\n", - .{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(&h) }, + .{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(h.toSlice()) }, ); } try buf.appendSlice("};\n"); @@ -283,7 +287,7 @@ pub const Location = union(enum) { url: []const u8, /// If this is null it means the user omitted the hash field from a dependency. /// It will be an error but the logic should still fetch and print the discovered hash. - hash: ?Manifest.MultiHashHexDigest, + hash: ?Package.Hash, }; }; @@ -325,9 +329,11 @@ pub fn run(f: *Fetch) RunError!void { // "p/$hash/foo", with possibly more directories after "foo". // We want to fail unless the resolved relative path has a // prefix of "p/$hash/". - const digest_len = @typeInfo(Manifest.MultiHashHexDigest).array.len; const prefix_len: usize = if (f.job_queue.read_only) 0 else "p/".len; - const expected_prefix = f.parent_package_root.sub_path[0 .. prefix_len + digest_len]; + const parent_sub_path = f.parent_package_root.sub_path; + const end = std.mem.indexOfScalarPos(u8, parent_sub_path, prefix_len, fs.path.sep) orelse + parent_sub_path.len; + const expected_prefix = parent_sub_path[prefix_len..end]; if (!std.mem.startsWith(u8, pkg_root.sub_path, expected_prefix)) { return f.fail( f.location_tok, @@ -367,9 +373,13 @@ pub fn run(f: *Fetch) RunError!void { }, }; - const s = fs.path.sep_str; if (remote.hash) |expected_hash| { - const prefixed_pkg_sub_path = "p" ++ s ++ expected_hash; + var prefixed_pkg_sub_path_buffer: [Package.Hash.max_len + 2]u8 = undefined; + prefixed_pkg_sub_path_buffer[0] = 'p'; + prefixed_pkg_sub_path_buffer[1] = fs.path.sep; + const hash_slice = expected_hash.toSlice(); + @memcpy(prefixed_pkg_sub_path_buffer[2..][0..hash_slice.len], hash_slice); + const prefixed_pkg_sub_path = prefixed_pkg_sub_path_buffer[0 .. 2 + hash_slice.len]; const prefix_len: usize = if (f.job_queue.read_only) "p/".len else 0; const pkg_sub_path = prefixed_pkg_sub_path[prefix_len..]; if (cache_root.handle.access(pkg_sub_path, .{})) |_| { @@ -437,7 +447,7 @@ fn runResource( f: *Fetch, uri_path: []const u8, resource: *Resource, - remote_hash: ?Manifest.MultiHashHexDigest, + remote_hash: ?Package.Hash, ) RunError!void { defer resource.deinit(); const arena = f.arena.allocator(); @@ -499,7 +509,7 @@ fn runResource( // Empty directories have already been omitted by `unpackResource`. // Compute the package hash based on the remaining files in the temporary // directory. - f.actual_hash = try computeHash(f, pkg_path, filter); + f.computed_hash = try computeHash(f, pkg_path, filter); break :blk if (unpack_result.root_dir.len > 0) try fs.path.join(arena, &.{ tmp_dir_sub_path, unpack_result.root_dir }) @@ -507,6 +517,8 @@ fn runResource( tmp_dir_sub_path; }; + const computed_package_hash = computedPackageHash(f); + // Rename the temporary directory into the global zig package cache // directory. If the hash already exists, delete the temporary directory // and leave the zig package cache directory untouched as it may be in use @@ -515,7 +527,7 @@ fn runResource( f.package_root = .{ .root_dir = cache_root, - .sub_path = try arena.dupe(u8, "p" ++ s ++ Manifest.hexDigest(f.actual_hash)), + .sub_path = try std.fmt.allocPrint(arena, "p" ++ s ++ "{s}", .{computed_package_hash.toSlice()}), }; renameTmpIntoCache(cache_root.handle, package_sub_path, f.package_root.sub_path) catch |err| { const src = try cache_root.join(arena, &.{tmp_dir_sub_path}); @@ -534,13 +546,22 @@ fn runResource( // Validate the computed hash against the expected hash. If invalid, this // job is done. - const actual_hex = Manifest.hexDigest(f.actual_hash); if (remote_hash) |declared_hash| { - if (!std.mem.eql(u8, &declared_hash, &actual_hex)) { - return f.fail(f.hash_tok, try eb.printString( - "hash mismatch: manifest declares {s} but the fetched package has {s}", - .{ declared_hash, actual_hex }, - )); + if (declared_hash.isOld()) { + const actual_hex = Package.multiHashHexDigest(f.computed_hash.digest); + if (!std.mem.eql(u8, declared_hash.toSlice(), &actual_hex)) { + return f.fail(f.hash_tok, try eb.printString( + "hash mismatch: manifest declares {s} but the fetched package has {s}", + .{ declared_hash.toSlice(), actual_hex }, + )); + } + } else { + if (!computed_package_hash.eql(&declared_hash)) { + return f.fail(f.hash_tok, try eb.printString( + "hash mismatch: manifest declares {s} but the fetched package has {s}", + .{ declared_hash.toSlice(), computed_package_hash.toSlice() }, + )); + } } } else if (!f.omit_missing_hash_error) { const notes_len = 1; @@ -551,7 +572,7 @@ fn runResource( }); const notes_start = try eb.reserveNotes(notes_len); eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("expected .hash = \"{s}\",", .{&actual_hex}), + .msg = try eb.printString("expected .hash = \"{s}\",", .{computed_package_hash.toSlice()}), })); return error.FetchFailed; } @@ -562,6 +583,18 @@ fn runResource( return queueJobsForDeps(f); } +pub fn computedPackageHash(f: *const Fetch) Package.Hash { + const saturated_size = std.math.cast(u32, f.computed_hash.total_size) orelse std.math.maxInt(u32); + if (f.manifest) |man| { + var version_buffer: [32]u8 = undefined; + const version: []const u8 = std.fmt.bufPrint(&version_buffer, "{}", .{man.version}) catch &version_buffer; + return .init(f.computed_hash.digest, man.name, version, man.id, saturated_size); + } + // In the future build.zig.zon fields will be added to allow overriding these values + // for naked tarballs. + return .init(f.computed_hash.digest, "N", "V", 0xffff, saturated_size); +} + /// `computeHash` gets a free check for the existence of `build.zig`, but when /// not computing a hash, we need to do a syscall to check for it. fn checkBuildFileExistence(f: *Fetch) RunError!void { @@ -616,11 +649,13 @@ fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void { f.manifest = try Manifest.parse(arena, ast.*, .{ .allow_missing_paths_field = f.allow_missing_paths_field, + .allow_missing_fingerprint = f.allow_missing_fingerprint, + .allow_name_string = f.allow_name_string, }); const manifest = &f.manifest.?; if (manifest.errors.len > 0) { - const src_path = try eb.printString("{}{s}", .{ pkg_root, Manifest.basename }); + const src_path = try eb.printString("{}" ++ fs.path.sep_str ++ "{s}", .{ pkg_root, Manifest.basename }); try manifest.copyErrorsIntoBundle(ast.*, src_path, eb); return error.FetchFailed; } @@ -673,9 +708,8 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { .url = url, .hash = h: { const h = dep.hash orelse break :h null; - const digest_len = @typeInfo(Manifest.MultiHashHexDigest).array.len; - const multihash_digest = h[0..digest_len].*; - const gop = f.job_queue.table.getOrPutAssumeCapacity(multihash_digest); + const pkg_hash: Package.Hash = .fromSlice(h); + const gop = f.job_queue.table.getOrPutAssumeCapacity(pkg_hash); if (gop.found_existing) { if (!dep.lazy) { gop.value_ptr.*.lazy_status = .eager; @@ -683,15 +717,15 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { continue; } gop.value_ptr.* = new_fetch; - break :h multihash_digest; + break :h pkg_hash; }, } }, .path => |rel_path| l: { // This might produce an invalid path, which is checked for // at the beginning of run(). const new_root = try f.package_root.resolvePosix(parent_arena, rel_path); - const multihash_digest = relativePathDigest(new_root, cache_root); - const gop = f.job_queue.table.getOrPutAssumeCapacity(multihash_digest); + const pkg_hash = relativePathDigest(new_root, cache_root); + const gop = f.job_queue.table.getOrPutAssumeCapacity(pkg_hash); if (gop.found_existing) { if (!dep.lazy) { gop.value_ptr.*.lazy_status = .eager; @@ -718,13 +752,15 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { .job_queue = f.job_queue, .omit_missing_hash_error = false, .allow_missing_paths_field = true, + .allow_missing_fingerprint = true, + .allow_name_string = true, .use_latest_commit = false, .package_root = undefined, .error_bundle = undefined, .manifest = null, .manifest_ast = undefined, - .actual_hash = undefined, + .computed_hash = undefined, .has_build_zig = false, .oom_flag = false, .latest_commit = null, @@ -746,20 +782,8 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { } } -pub fn relativePathDigest( - pkg_root: Cache.Path, - cache_root: Cache.Directory, -) Manifest.MultiHashHexDigest { - var hasher = Manifest.Hash.init(.{}); - // This hash is a tuple of: - // * whether it relative to the global cache directory or to the root package - // * the relative file path from there to the build root of the package - hasher.update(if (pkg_root.root_dir.eql(cache_root)) - &package_hash_prefix_cached - else - &package_hash_prefix_project); - hasher.update(pkg_root.sub_path); - return Manifest.hexDigest(hasher.finalResult()); +pub fn relativePathDigest(pkg_root: Cache.Path, cache_root: Cache.Directory) Package.Hash { + return .initPath(pkg_root.sub_path, pkg_root.root_dir.eql(cache_root)); } pub fn workerRun(f: *Fetch, prog_name: []const u8) void { @@ -1387,11 +1411,7 @@ fn recursiveDirectoryCopy(f: *Fetch, dir: fs.Dir, tmp_dir: fs.Dir) anyerror!void } } -pub fn renameTmpIntoCache( - cache_dir: fs.Dir, - tmp_dir_sub_path: []const u8, - dest_dir_sub_path: []const u8, -) !void { +pub fn renameTmpIntoCache(cache_dir: fs.Dir, tmp_dir_sub_path: []const u8, dest_dir_sub_path: []const u8) !void { assert(dest_dir_sub_path[1] == fs.path.sep); var handled_missing_dir = false; while (true) { @@ -1417,16 +1437,17 @@ pub fn renameTmpIntoCache( } } +const ComputedHash = struct { + digest: Package.Hash.Digest, + total_size: u64, +}; + /// Assumes that files not included in the package have already been filtered /// prior to calling this function. This ensures that files not protected by /// the hash are not present on the file system. Empty directories are *not /// hashed* and must not be present on the file system when calling this /// function. -fn computeHash( - f: *Fetch, - pkg_path: Cache.Path, - filter: Filter, -) RunError!Manifest.Digest { +fn computeHash(f: *Fetch, pkg_path: Cache.Path, filter: Filter) RunError!ComputedHash { // All the path name strings need to be in memory for sorting. const arena = f.arena.allocator(); const gpa = f.arena.child_allocator; @@ -1449,6 +1470,9 @@ fn computeHash( var walker = try root_dir.walk(gpa); defer walker.deinit(); + // Total number of bytes of file contents included in the package. + var total_size: u64 = 0; + { // The final hash will be a hash of each file hashed independently. This // allows hashing in parallel. @@ -1506,6 +1530,7 @@ fn computeHash( .kind = kind, .hash = undefined, // to be populated by the worker .failure = undefined, // to be populated by the worker + .size = undefined, // to be populated by the worker }; thread_pool.spawnWg(&wait_group, workerHashFile, .{ root_dir, hashed_file }); try all_files.append(hashed_file); @@ -1544,7 +1569,7 @@ fn computeHash( std.mem.sortUnstable(*HashedFile, all_files.items, {}, HashedFile.lessThan); - var hasher = Manifest.Hash.init(.{}); + var hasher = Package.Hash.Algo.init(.{}); var any_failures = false; for (all_files.items) |hashed_file| { hashed_file.failure catch |err| { @@ -1556,6 +1581,7 @@ fn computeHash( }); }; hasher.update(&hashed_file.hash); + total_size += hashed_file.size; } for (deleted_files.items) |deleted_file| { deleted_file.failure catch |err| { @@ -1580,7 +1606,10 @@ fn computeHash( }; } - return hasher.finalResult(); + return .{ + .digest = hasher.finalResult(), + .total_size = total_size, + }; } fn dumpHashInfo(all_files: []const *const HashedFile) !void { @@ -1609,8 +1638,9 @@ fn workerDeleteFile(dir: fs.Dir, deleted_file: *DeletedFile) void { fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void { var buf: [8000]u8 = undefined; - var hasher = Manifest.Hash.init(.{}); + var hasher = Package.Hash.Algo.init(.{}); hasher.update(hashed_file.normalized_path); + var file_size: u64 = 0; switch (hashed_file.kind) { .file => { @@ -1622,6 +1652,7 @@ fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void while (true) { const bytes_read = try file.read(&buf); if (bytes_read == 0) break; + file_size += bytes_read; hasher.update(buf[0..bytes_read]); file_header.update(buf[0..bytes_read]); } @@ -1641,6 +1672,7 @@ fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void }, } hasher.final(&hashed_file.hash); + hashed_file.size = file_size; } fn deleteFileFallible(dir: fs.Dir, deleted_file: *DeletedFile) DeletedFile.Error!void { @@ -1667,9 +1699,10 @@ const DeletedFile = struct { const HashedFile = struct { fs_path: []const u8, normalized_path: []const u8, - hash: Manifest.Digest, + hash: Package.Hash.Digest, failure: Error!void, kind: Kind, + size: u64, const Error = fs.File.OpenError || @@ -1744,12 +1777,8 @@ const Filter = struct { } }; -pub fn depDigest( - pkg_root: Cache.Path, - cache_root: Cache.Directory, - dep: Manifest.Dependency, -) ?Manifest.MultiHashHexDigest { - if (dep.hash) |h| return h[0..Manifest.multihash_hex_digest_len].*; +pub fn depDigest(pkg_root: Cache.Path, cache_root: Cache.Directory, dep: Manifest.Dependency) ?Package.Hash { + if (dep.hash) |h| return .fromSlice(h); switch (dep.location) { .url => return null, @@ -1763,10 +1792,6 @@ pub fn depDigest( } } -// These are random bytes. -const package_hash_prefix_cached = [8]u8{ 0x53, 0x7e, 0xfa, 0x94, 0x65, 0xe9, 0xf8, 0x73 }; -const package_hash_prefix_project = [8]u8{ 0xe1, 0x25, 0xee, 0xfa, 0xa6, 0x17, 0x38, 0xcc }; - const builtin = @import("builtin"); const std = @import("std"); const fs = std.fs; @@ -2137,7 +2162,7 @@ test "tarball with excluded duplicate paths" { defer fb.deinit(); try fetch.run(); - const hex_digest = Package.Manifest.hexDigest(fetch.actual_hash); + const hex_digest = Package.multiHashHexDigest(fetch.computed_hash.digest); try std.testing.expectEqualStrings( "12200bafe035cbb453dd717741b66e9f9d1e6c674069d06121dafa1b2e62eb6b22da", &hex_digest, @@ -2181,7 +2206,7 @@ test "tarball without root folder" { defer fb.deinit(); try fetch.run(); - const hex_digest = Package.Manifest.hexDigest(fetch.actual_hash); + const hex_digest = Package.multiHashHexDigest(fetch.computed_hash.digest); try std.testing.expectEqualStrings( "12209f939bfdcb8b501a61bb4a43124dfa1b2848adc60eec1e4624c560357562b793", &hex_digest, @@ -2222,7 +2247,7 @@ test "set executable bit based on file content" { try fetch.run(); try std.testing.expectEqualStrings( "1220fecb4c06a9da8673c87fe8810e15785f1699212f01728eadce094d21effeeef3", - &Manifest.hexDigest(fetch.actual_hash), + &Package.multiHashHexDigest(fetch.computed_hash.digest), ); var out = try fb.packageDir(); @@ -2298,13 +2323,15 @@ const TestFetchBuilder = struct { .job_queue = &self.job_queue, .omit_missing_hash_error = true, .allow_missing_paths_field = false, + .allow_missing_fingerprint = true, // so we can keep using the old testdata .tar.gz + .allow_name_string = true, // so we can keep using the old testdata .tar.gz .use_latest_commit = true, .package_root = undefined, .error_bundle = undefined, .manifest = null, .manifest_ast = undefined, - .actual_hash = undefined, + .computed_hash = undefined, .has_build_zig = false, .oom_flag = false, .latest_commit = null, diff --git a/src/Package/Manifest.zig b/src/Package/Manifest.zig index 4eed6cc386..c526854df2 100644 --- a/src/Package/Manifest.zig +++ b/src/Package/Manifest.zig @@ -5,15 +5,12 @@ const Allocator = std.mem.Allocator; const assert = std.debug.assert; const Ast = std.zig.Ast; const testing = std.testing; -const hex_charset = std.fmt.hex_charset; +const Package = @import("../Package.zig"); pub const max_bytes = 10 * 1024 * 1024; pub const basename = "build.zig.zon"; -pub const Hash = std.crypto.hash.sha2.Sha256; -pub const Digest = [Hash.digest_length]u8; -pub const multihash_len = 1 + 1 + Hash.digest_length; -pub const multihash_hex_digest_len = 2 * multihash_len; -pub const MultiHashHexDigest = [multihash_hex_digest_len]u8; +pub const max_name_len = 32; +pub const max_version_len = 32; pub const Dependency = struct { location: Location, @@ -38,36 +35,8 @@ pub const ErrorMessage = struct { off: u32, }; -pub const MultihashFunction = enum(u16) { - identity = 0x00, - sha1 = 0x11, - @"sha2-256" = 0x12, - @"sha2-512" = 0x13, - @"sha3-512" = 0x14, - @"sha3-384" = 0x15, - @"sha3-256" = 0x16, - @"sha3-224" = 0x17, - @"sha2-384" = 0x20, - @"sha2-256-trunc254-padded" = 0x1012, - @"sha2-224" = 0x1013, - @"sha2-512-224" = 0x1014, - @"sha2-512-256" = 0x1015, - @"blake2b-256" = 0xb220, - _, -}; - -pub const multihash_function: MultihashFunction = switch (Hash) { - std.crypto.hash.sha2.Sha256 => .@"sha2-256", - else => @compileError("unreachable"), -}; -comptime { - // We avoid unnecessary uleb128 code in hexDigest by asserting here the - // values are small enough to be contained in the one-byte encoding. - assert(@intFromEnum(multihash_function) < 127); - assert(Hash.digest_length < 127); -} - name: []const u8, +id: u32, version: std.SemanticVersion, version_node: Ast.Node.Index, dependencies: std.StringArrayHashMapUnmanaged(Dependency), @@ -80,6 +49,10 @@ arena_state: std.heap.ArenaAllocator.State, pub const ParseOptions = struct { allow_missing_paths_field: bool = false, + /// Deprecated, to be removed after 0.14.0 is tagged. + allow_name_string: bool = true, + /// Deprecated, to be removed after 0.14.0 is tagged. + allow_missing_fingerprint: bool = true, }; pub const Error = Allocator.Error; @@ -100,12 +73,15 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { .errors = .{}, .name = undefined, + .id = 0, .version = undefined, .version_node = 0, .dependencies = .{}, .dependencies_node = 0, .paths = .{}, .allow_missing_paths_field = options.allow_missing_paths_field, + .allow_name_string = options.allow_name_string, + .allow_missing_fingerprint = options.allow_missing_fingerprint, .minimum_zig_version = null, .buf = .{}, }; @@ -121,6 +97,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { return .{ .name = p.name, + .id = p.id, .version = p.version, .version_node = p.version_node, .dependencies = try p.dependencies.clone(p.arena), @@ -164,22 +141,6 @@ pub fn copyErrorsIntoBundle( } } -pub fn hexDigest(digest: Digest) MultiHashHexDigest { - var result: MultiHashHexDigest = undefined; - - result[0] = hex_charset[@intFromEnum(multihash_function) >> 4]; - result[1] = hex_charset[@intFromEnum(multihash_function) & 15]; - - result[2] = hex_charset[Hash.digest_length >> 4]; - result[3] = hex_charset[Hash.digest_length & 15]; - - for (digest, 0..) |byte, i| { - result[4 + i * 2] = hex_charset[byte >> 4]; - result[5 + i * 2] = hex_charset[byte & 15]; - } - return result; -} - const Parse = struct { gpa: Allocator, ast: Ast, @@ -188,12 +149,15 @@ const Parse = struct { errors: std.ArrayListUnmanaged(ErrorMessage), name: []const u8, + id: u32, version: std.SemanticVersion, version_node: Ast.Node.Index, dependencies: std.StringArrayHashMapUnmanaged(Dependency), dependencies_node: Ast.Node.Index, paths: std.StringArrayHashMapUnmanaged(void), allow_missing_paths_field: bool, + allow_name_string: bool, + allow_missing_fingerprint: bool, minimum_zig_version: ?std.SemanticVersion, const InnerError = error{ ParseFailure, OutOfMemory }; @@ -211,6 +175,7 @@ const Parse = struct { var have_name = false; var have_version = false; var have_included_paths = false; + var fingerprint: ?Package.Fingerprint = null; for (struct_init.ast.fields) |field_init| { const name_token = ast.firstToken(field_init) - 2; @@ -225,11 +190,16 @@ const Parse = struct { have_included_paths = true; try parseIncludedPaths(p, field_init); } else if (mem.eql(u8, field_name, "name")) { - p.name = try parseString(p, field_init); + p.name = try parseName(p, field_init); have_name = true; + } else if (mem.eql(u8, field_name, "fingerprint")) { + fingerprint = try parseFingerprint(p, field_init); } else if (mem.eql(u8, field_name, "version")) { p.version_node = field_init; const version_text = try parseString(p, field_init); + if (version_text.len > max_version_len) { + try appendError(p, main_tokens[field_init], "version string length {d} exceeds maximum of {d}", .{ version_text.len, max_version_len }); + } p.version = std.SemanticVersion.parse(version_text) catch |err| v: { try appendError(p, main_tokens[field_init], "unable to parse semantic version: {s}", .{@errorName(err)}); break :v undefined; @@ -249,6 +219,21 @@ const Parse = struct { if (!have_name) { try appendError(p, main_token, "missing top-level 'name' field", .{}); + } else { + if (fingerprint) |n| { + if (!n.validate(p.name)) { + return fail(p, main_token, "invalid fingerprint: 0x{x}; if this is a new or forked package, use this value: 0x{x}", .{ + n.int(), Package.Fingerprint.generate(p.name).int(), + }); + } + p.id = n.id; + } else if (!p.allow_missing_fingerprint) { + try appendError(p, main_token, "missing top-level 'fingerprint' field; suggested value: 0x{x}", .{ + Package.Fingerprint.generate(p.name).int(), + }); + } else { + p.id = 0; + } } if (!have_version) { @@ -400,6 +385,59 @@ const Parse = struct { } } + fn parseFingerprint(p: *Parse, node: Ast.Node.Index) !Package.Fingerprint { + const ast = p.ast; + const node_tags = ast.nodes.items(.tag); + const main_tokens = ast.nodes.items(.main_token); + const main_token = main_tokens[node]; + if (node_tags[node] != .number_literal) { + return fail(p, main_token, "expected integer literal", .{}); + } + const token_bytes = ast.tokenSlice(main_token); + const parsed = std.zig.parseNumberLiteral(token_bytes); + switch (parsed) { + .int => |n| return @bitCast(n), + .big_int, .float => return fail(p, main_token, "expected u64 integer literal, found {s}", .{ + @tagName(parsed), + }), + .failure => |err| return fail(p, main_token, "bad integer literal: {s}", .{@tagName(err)}), + } + } + + fn parseName(p: *Parse, node: Ast.Node.Index) ![]const u8 { + const ast = p.ast; + const node_tags = ast.nodes.items(.tag); + const main_tokens = ast.nodes.items(.main_token); + const main_token = main_tokens[node]; + + if (p.allow_name_string and node_tags[node] == .string_literal) { + const name = try parseString(p, node); + if (!std.zig.isValidId(name)) + return fail(p, main_token, "name must be a valid bare zig identifier (hint: switch from string to enum literal)", .{}); + + if (name.len > max_name_len) + return fail(p, main_token, "name '{}' exceeds max length of {d}", .{ + std.zig.fmtId(name), max_name_len, + }); + + return name; + } + + if (node_tags[node] != .enum_literal) + return fail(p, main_token, "expected enum literal", .{}); + + const ident_name = ast.tokenSlice(main_token); + if (mem.startsWith(u8, ident_name, "@")) + return fail(p, main_token, "name must be a valid bare zig identifier", .{}); + + if (ident_name.len > max_name_len) + return fail(p, main_token, "name '{}' exceeds max length of {d}", .{ + std.zig.fmtId(ident_name), max_name_len, + }); + + return ident_name; + } + fn parseString(p: *Parse, node: Ast.Node.Index) ![]const u8 { const ast = p.ast; const node_tags = ast.nodes.items(.tag); @@ -421,21 +459,8 @@ const Parse = struct { const tok = main_tokens[node]; const h = try parseString(p, node); - if (h.len >= 2) { - const their_multihash_func = std.fmt.parseInt(u8, h[0..2], 16) catch |err| { - return fail(p, tok, "invalid multihash value: unable to parse hash function: {s}", .{ - @errorName(err), - }); - }; - if (@as(MultihashFunction, @enumFromInt(their_multihash_func)) != multihash_function) { - return fail(p, tok, "unsupported hash function: only sha2-256 is supported", .{}); - } - } - - if (h.len != multihash_hex_digest_len) { - return fail(p, tok, "wrong hash size. expected: {d}, found: {d}", .{ - multihash_hex_digest_len, h.len, - }); + if (h.len > Package.Hash.max_len) { + return fail(p, tok, "hash length exceeds maximum: {d}", .{h.len}); } return h; diff --git a/src/main.zig b/src/main.zig index 5e66244484..7b1bc50bd6 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4741,6 +4741,7 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { const cwd_path = try process.getCwdAlloc(arena); const cwd_basename = fs.path.basename(cwd_path); + const sanitized_root_name = try sanitizeExampleName(arena, cwd_basename); const s = fs.path.sep_str; const template_paths = [_][]const u8{ @@ -4751,8 +4752,10 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { }; var ok_count: usize = 0; + const fingerprint: Package.Fingerprint = .generate(sanitized_root_name); + for (template_paths) |template_path| { - if (templates.write(arena, fs.cwd(), cwd_basename, template_path)) |_| { + if (templates.write(arena, fs.cwd(), sanitized_root_name, template_path, fingerprint)) |_| { std.log.info("created {s}", .{template_path}); ok_count += 1; } else |err| switch (err) { @@ -4769,6 +4772,37 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { return cleanExit(); } +fn sanitizeExampleName(arena: Allocator, bytes: []const u8) error{OutOfMemory}![]const u8 { + var result: std.ArrayListUnmanaged(u8) = .empty; + for (bytes, 0..) |byte, i| switch (byte) { + '0'...'9' => { + if (i == 0) try result.append(arena, '_'); + try result.append(arena, byte); + }, + '_', 'a'...'z', 'A'...'Z' => try result.append(arena, byte), + '-', '.', ' ' => try result.append(arena, '_'), + else => continue, + }; + if (result.items.len == 0) return "foo"; + if (result.items.len > Package.Manifest.max_name_len) + result.shrinkRetainingCapacity(Package.Manifest.max_name_len); + + return result.toOwnedSlice(arena); +} + +test sanitizeExampleName { + var arena_instance = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + try std.testing.expectEqualStrings("foo_bar", try sanitizeExampleName(arena, "foo bar+")); + try std.testing.expectEqualStrings("foo", try sanitizeExampleName(arena, "")); + try std.testing.expectEqualStrings("foo", try sanitizeExampleName(arena, "!")); + try std.testing.expectEqualStrings("a", try sanitizeExampleName(arena, "!a")); + try std.testing.expectEqualStrings("a_b", try sanitizeExampleName(arena, "a.b!")); + try std.testing.expectEqualStrings("_01234", try sanitizeExampleName(arena, "01234")); +} + fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { dev.check(.build_command); @@ -5191,13 +5225,15 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { .job_queue = &job_queue, .omit_missing_hash_error = true, .allow_missing_paths_field = false, + .allow_missing_fingerprint = false, + .allow_name_string = false, .use_latest_commit = false, .package_root = undefined, .error_bundle = undefined, .manifest = null, .manifest_ast = undefined, - .actual_hash = undefined, + .computed_hash = undefined, .has_build_zig = true, .oom_flag = false, .latest_commit = null, @@ -5244,13 +5280,14 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { const hashes = job_queue.table.keys(); const fetches = job_queue.table.values(); try deps_mod.deps.ensureUnusedCapacity(arena, @intCast(hashes.len)); - for (hashes, fetches) |hash, f| { + for (hashes, fetches) |*hash, f| { if (f == &fetch) { // The first one is a dummy package for the current project. continue; } if (!f.has_build_zig) continue; + const hash_slice = hash.toSlice(); const m = try Package.Module.create(arena, .{ .global_cache_directory = global_cache_directory, .paths = .{ @@ -5260,7 +5297,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { .fully_qualified_name = try std.fmt.allocPrint( arena, "root.@dependencies.{s}", - .{&hash}, + .{hash_slice}, ), .cc_argv = &.{}, .inherited = .{}, @@ -5269,7 +5306,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { .builtin_mod = builtin_mod, .builtin_modules = null, // `builtin_mod` is specified }); - const hash_cloned = try arena.dupe(u8, &hash); + const hash_cloned = try arena.dupe(u8, hash_slice); deps_mod.deps.putAssumeCapacityNoClobber(hash_cloned, m); f.module = m; } @@ -5385,23 +5422,22 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { var any_errors = false; while (it.next()) |hash| { if (hash.len == 0) continue; - const digest_len = @typeInfo(Package.Manifest.MultiHashHexDigest).array.len; - if (hash.len != digest_len) { - std.log.err("invalid digest (length {d} instead of {d}): '{s}'", .{ - hash.len, digest_len, hash, + if (hash.len > Package.Hash.max_len) { + std.log.err("invalid digest (length {d} exceeds maximum): '{s}'", .{ + hash.len, hash, }); any_errors = true; continue; } - try unlazy_set.put(arena, hash[0..digest_len].*, {}); + try unlazy_set.put(arena, .fromSlice(hash), {}); } if (any_errors) process.exit(3); if (system_pkg_dir_path) |p| { // In this mode, the system needs to provide these packages; they // cannot be fetched by Zig. - for (unlazy_set.keys()) |hash| { + for (unlazy_set.keys()) |*hash| { std.log.err("lazy dependency package not found: {s}" ++ s ++ "{s}", .{ - p, hash, + p, hash.toSlice(), }); } std.log.info("remote package fetching disabled due to --system mode", .{}); @@ -7091,13 +7127,15 @@ fn cmdFetch( .job_queue = &job_queue, .omit_missing_hash_error = true, .allow_missing_paths_field = false, + .allow_missing_fingerprint = true, + .allow_name_string = true, .use_latest_commit = true, .package_root = undefined, .error_bundle = undefined, .manifest = null, .manifest_ast = undefined, - .actual_hash = undefined, + .computed_hash = undefined, .has_build_zig = false, .oom_flag = false, .latest_commit = null, @@ -7117,14 +7155,15 @@ fn cmdFetch( process.exit(1); } - const hex_digest = Package.Manifest.hexDigest(fetch.actual_hash); + const package_hash = fetch.computedPackageHash(); + const package_hash_slice = package_hash.toSlice(); root_prog_node.end(); root_prog_node = .{ .index = .none }; const name = switch (save) { .no => { - try io.getStdOut().writeAll(hex_digest ++ "\n"); + try io.getStdOut().writer().print("{s}\n", .{package_hash_slice}); return cleanExit(); }, .yes, .exact => |name| name: { @@ -7145,7 +7184,7 @@ fn cmdFetch( // The name to use in case the manifest file needs to be created now. const init_root_name = fs.path.basename(build_root.directory.path orelse cwd_path); var manifest, var ast = try loadManifest(gpa, arena, .{ - .root_name = init_root_name, + .root_name = try sanitizeExampleName(arena, init_root_name), .dir = build_root.directory.handle, .color = color, }); @@ -7194,7 +7233,7 @@ fn cmdFetch( \\ }} , .{ std.zig.fmtEscapes(saved_path_or_url), - std.zig.fmtEscapes(&hex_digest), + std.zig.fmtEscapes(package_hash_slice), }); const new_node_text = try std.fmt.allocPrint(arena, ".{p_} = {s},\n", .{ @@ -7213,7 +7252,7 @@ fn cmdFetch( if (dep.hash) |h| { switch (dep.location) { .url => |u| { - if (mem.eql(u8, h, &hex_digest) and mem.eql(u8, u, saved_path_or_url)) { + if (mem.eql(u8, h, package_hash_slice) and mem.eql(u8, u, saved_path_or_url)) { std.log.info("existing dependency named '{s}' is up-to-date", .{name}); process.exit(0); } @@ -7230,7 +7269,7 @@ fn cmdFetch( const hash_replace = try std.fmt.allocPrint( arena, "\"{}\"", - .{std.zig.fmtEscapes(&hex_digest)}, + .{std.zig.fmtEscapes(package_hash_slice)}, ); warn("overwriting existing dependency named '{s}'", .{name}); @@ -7429,10 +7468,10 @@ fn loadManifest( 0, ) catch |err| switch (err) { error.FileNotFound => { + const fingerprint: Package.Fingerprint = .generate(options.root_name); var templates = findTemplates(gpa, arena); defer templates.deinit(); - - templates.write(arena, options.dir, options.root_name, Package.Manifest.basename) catch |e| { + templates.write(arena, options.dir, options.root_name, Package.Manifest.basename, fingerprint) catch |e| { fatal("unable to write {s}: {s}", .{ Package.Manifest.basename, @errorName(e), }); @@ -7490,6 +7529,7 @@ const Templates = struct { out_dir: fs.Dir, root_name: []const u8, template_path: []const u8, + fingerprint: Package.Fingerprint, ) !void { if (fs.path.dirname(template_path)) |dirname| { out_dir.makePath(dirname) catch |err| { @@ -7503,12 +7543,30 @@ const Templates = struct { }; templates.buffer.clearRetainingCapacity(); try templates.buffer.ensureUnusedCapacity(contents.len); - for (contents) |c| { - if (c == '$') { - try templates.buffer.appendSlice(root_name); - } else { - try templates.buffer.append(c); + var i: usize = 0; + while (i < contents.len) { + if (contents[i] == '.') { + if (std.mem.startsWith(u8, contents[i..], ".LITNAME")) { + try templates.buffer.append('.'); + try templates.buffer.appendSlice(root_name); + i += ".LITNAME".len; + continue; + } else if (std.mem.startsWith(u8, contents[i..], ".NAME")) { + try templates.buffer.appendSlice(root_name); + i += ".NAME".len; + continue; + } else if (std.mem.startsWith(u8, contents[i..], ".FINGERPRINT")) { + try templates.buffer.writer().print("0x{x}", .{fingerprint.int()}); + i += ".FINGERPRINT".len; + continue; + } else if (std.mem.startsWith(u8, contents[i..], ".ZIGVER")) { + try templates.buffer.appendSlice(build_options.version); + i += ".ZIGVER".len; + continue; + } } + try templates.buffer.append(contents[i]); + i += 1; } return out_dir.writeFile(.{