From e0129b387ff962c0f89d62c8ab0409145a19f453 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 23 Feb 2025 13:43:36 -0800 Subject: [PATCH 01/15] std.ArrayList: delete unit test tests should use the API, not only verify compilation succeeds. --- lib/std/array_list.zig | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lib/std/array_list.zig b/lib/std/array_list.zig index 1b2bbcb919..7849e98b42 100644 --- a/lib/std/array_list.zig +++ b/lib/std/array_list.zig @@ -2250,10 +2250,3 @@ test "return OutOfMemory when capacity would exceed maximum usize integer value" try testing.expectError(error.OutOfMemory, list.ensureUnusedCapacity(2)); } } - -test "ArrayListAligned with non-native alignment compiles unusedCapabitySlice" { - var list = ArrayListAligned(u8, 4).init(testing.allocator); - defer list.deinit(); - try list.appendNTimes(1, 4); - _ = list.unusedCapacitySlice(); -} From 12355cfb4cb14ba78423fb38838f4485bb563c9b Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 23 Feb 2025 15:58:51 -0800 Subject: [PATCH 02/15] Package: new hash format legacy format is also supported. closes #20178 --- src/Package.zig | 156 +++++++++++++++++++++++++++++++++++++++ src/Package/Fetch.zig | 154 ++++++++++++++++++++++---------------- src/Package/Manifest.zig | 69 +---------------- src/main.zig | 35 ++++----- 4 files changed, 269 insertions(+), 145 deletions(-) diff --git a/src/Package.zig b/src/Package.zig index 61f90727f3..c9f41f6f37 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -1,8 +1,164 @@ +const std = @import("std"); +const assert = std.debug.assert; + pub const Module = @import("Package/Module.zig"); pub const Fetch = @import("Package/Fetch.zig"); pub const build_zig_basename = "build.zig"; pub const Manifest = @import("Package/Manifest.zig"); +pub const multihash_len = 1 + 1 + Hash.Algo.digest_length; +pub const multihash_hex_digest_len = 2 * multihash_len; +pub const MultiHashHexDigest = [multihash_hex_digest_len]u8; + +/// A user-readable, file system safe hash that identifies an exact package +/// snapshot, including file contents. +/// +/// This data structure can be used to store the legacy hash format too. Legacy +/// hash format is scheduled to be removed after 0.14.0 is tagged. +pub const Hash = struct { + /// Maximum size of a package hash. Unused bytes at the end are + /// filled with zeroes. + bytes: [max_len]u8, + + pub const Algo = std.crypto.hash.sha2.Sha256; + pub const Digest = [Algo.digest_length]u8; + + pub const max_len = 32 + 1 + 32 + 1 + 12; + + pub fn fromSlice(s: []const u8) Hash { + assert(s.len <= max_len); + var result: Hash = undefined; + @memcpy(result.bytes[0..s.len], s); + @memset(result.bytes[s.len..], 0); + return result; + } + + pub fn toSlice(ph: *const Hash) []const u8 { + var end: usize = ph.bytes.len; + while (true) { + end -= 1; + if (ph.bytes[end] != 0) return ph.bytes[0 .. end + 1]; + } + } + + pub fn eql(a: *const Hash, b: *const Hash) bool { + return std.mem.eql(u8, &a.bytes, &b.bytes); + } + + /// Distinguishes whether the legacy multihash format is being stored here. + pub fn isOld(h: *const Hash) bool { + if (h.bytes.len < 2) return false; + const their_multihash_func = std.fmt.parseInt(u8, h.bytes[0..2], 16) catch return false; + if (@as(MultihashFunction, @enumFromInt(their_multihash_func)) != multihash_function) return false; + if (h.toSlice().len != multihash_hex_digest_len) return false; + return std.mem.indexOfScalar(u8, &h.bytes, '-') == null; + } + + test isOld { + const h: Hash = .fromSlice("1220138f4aba0c01e66b68ed9e1e1e74614c06e4743d88bc58af4f1c3dd0aae5fea7"); + try std.testing.expect(h.isOld()); + } + + /// Produces "$name-$semver-$sizedhash". + /// * name is the name field from build.zig.zon, truncated at 32 bytes and must + /// be a valid zig identifier + /// * semver is the version field from build.zig.zon, truncated at 32 bytes + /// * sizedhash is the following 9-byte array, base64 encoded using -_ to make + /// it filesystem safe: + /// - (4 bytes) LE u32 total decompressed size in bytes + /// - (5 bytes) truncated SHA-256 of hashed files of the package + /// + /// example: "nasm-2.16.1-2-BWdcABvF_jM1" + pub fn init(digest: Digest, name: []const u8, ver: []const u8, size: u32) Hash { + var result: Hash = undefined; + var buf: std.ArrayListUnmanaged(u8) = .initBuffer(&result.bytes); + buf.appendSliceAssumeCapacity(name[0..@min(name.len, 32)]); + buf.appendAssumeCapacity('-'); + buf.appendSliceAssumeCapacity(ver[0..@min(ver.len, 32)]); + buf.appendAssumeCapacity('-'); + var sizedhash: [9]u8 = undefined; + std.mem.writeInt(u32, sizedhash[0..4], size, .little); + sizedhash[4..].* = digest[0..5].*; + _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(12), &sizedhash); + @memset(buf.unusedCapacitySlice(), 0); + return result; + } + + /// Produces "$hashiname-N-$sizedhash". For packages that lack "build.zig.zon" metadata. + /// * hashiname is [5..][0..24] bytes of the SHA-256, urlsafe-base64-encoded, for a total of 32 bytes encoded + /// * the semver section is replaced with a hardcoded N which stands for + /// "naked". It acts as a version number so that any future updates to the + /// hash format can tell this hash format apart. Note that "N" is an + /// invalid semver. + /// * sizedhash is the same as in `init`. + /// + /// The hash is broken up this way so that "sizedhash" can be calculated + /// exactly the same way in both cases, and so that "name" and "hashiname" can + /// be used interchangeably in both cases. + pub fn initNaked(digest: Digest, size: u32) Hash { + var name: [32]u8 = undefined; + _ = std.base64.url_safe_no_pad.Encoder.encode(&name, digest[5..][0..24]); + return init(digest, &name, "N", size); + } +}; + +pub const MultihashFunction = enum(u16) { + identity = 0x00, + sha1 = 0x11, + @"sha2-256" = 0x12, + @"sha2-512" = 0x13, + @"sha3-512" = 0x14, + @"sha3-384" = 0x15, + @"sha3-256" = 0x16, + @"sha3-224" = 0x17, + @"sha2-384" = 0x20, + @"sha2-256-trunc254-padded" = 0x1012, + @"sha2-224" = 0x1013, + @"sha2-512-224" = 0x1014, + @"sha2-512-256" = 0x1015, + @"blake2b-256" = 0xb220, + _, +}; + +pub const multihash_function: MultihashFunction = switch (Hash.Algo) { + std.crypto.hash.sha2.Sha256 => .@"sha2-256", + else => @compileError("unreachable"), +}; + +pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest { + const hex_charset = std.fmt.hex_charset; + + var result: MultiHashHexDigest = undefined; + + result[0] = hex_charset[@intFromEnum(multihash_function) >> 4]; + result[1] = hex_charset[@intFromEnum(multihash_function) & 15]; + + result[2] = hex_charset[Hash.Algo.digest_length >> 4]; + result[3] = hex_charset[Hash.Algo.digest_length & 15]; + + for (digest, 0..) |byte, i| { + result[4 + i * 2] = hex_charset[byte >> 4]; + result[5 + i * 2] = hex_charset[byte & 15]; + } + return result; +} + +comptime { + // We avoid unnecessary uleb128 code in hexDigest by asserting here the + // values are small enough to be contained in the one-byte encoding. + assert(@intFromEnum(multihash_function) < 127); + assert(Hash.Algo.digest_length < 127); +} + +test Hash { + const example_digest: Hash.Digest = .{ + 0xc7, 0xf5, 0x71, 0xb7, 0xb4, 0xe7, 0x6f, 0x3c, 0xdb, 0x87, 0x7a, 0x7f, 0xdd, 0xf9, 0x77, 0x87, + 0x9d, 0xd3, 0x86, 0xfa, 0x73, 0x57, 0x9a, 0xf7, 0x9d, 0x1e, 0xdb, 0x8f, 0x3a, 0xd9, 0xbd, 0x9f, + }; + const result: Hash = .init(example_digest, "nasm", "2.16.1-2", 10 * 1024 * 1024); + try std.testing.expectEqualStrings("nasm-2.16.1-2-AACgAMf1cbe0", result.toSlice()); +} + test { _ = Fetch; } diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 0d6cf55636..11878a12b7 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -56,7 +56,7 @@ package_root: Cache.Path, error_bundle: ErrorBundle.Wip, manifest: ?Manifest, manifest_ast: std.zig.Ast, -actual_hash: Manifest.Digest, +computed_hash: ComputedHash, /// Fetch logic notices whether a package has a build.zig file and sets this flag. has_build_zig: bool, /// Indicates whether the task aborted due to an out-of-memory condition. @@ -116,8 +116,8 @@ pub const JobQueue = struct { /// as lazy. unlazy_set: UnlazySet = .{}, - pub const Table = std.AutoArrayHashMapUnmanaged(Manifest.MultiHashHexDigest, *Fetch); - pub const UnlazySet = std.AutoArrayHashMapUnmanaged(Manifest.MultiHashHexDigest, void); + pub const Table = std.AutoArrayHashMapUnmanaged(Package.Hash, *Fetch); + pub const UnlazySet = std.AutoArrayHashMapUnmanaged(Package.Hash, void); pub fn deinit(jq: *JobQueue) void { if (jq.all_fetches.items.len == 0) return; @@ -160,22 +160,24 @@ pub const JobQueue = struct { // Ensure the generated .zig file is deterministic. jq.table.sortUnstable(@as(struct { - keys: []const Manifest.MultiHashHexDigest, + keys: []const Package.Hash, pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool { - return std.mem.lessThan(u8, &ctx.keys[a_index], &ctx.keys[b_index]); + return std.mem.lessThan(u8, &ctx.keys[a_index].bytes, &ctx.keys[b_index].bytes); } }, .{ .keys = keys })); - for (keys, jq.table.values()) |hash, fetch| { + for (keys, jq.table.values()) |*hash, fetch| { if (fetch == jq.all_fetches.items[0]) { // The first one is a dummy package for the current project. continue; } + const hash_slice = hash.toSlice(); + try buf.writer().print( \\ pub const {} = struct {{ \\ - , .{std.zig.fmtId(&hash)}); + , .{std.zig.fmtId(hash_slice)}); lazy: { switch (fetch.lazy_status) { @@ -207,7 +209,7 @@ pub const JobQueue = struct { try buf.writer().print( \\ pub const build_zig = @import("{}"); \\ - , .{std.zig.fmtEscapes(&hash)}); + , .{std.zig.fmtEscapes(hash_slice)}); } if (fetch.manifest) |*manifest| { @@ -219,7 +221,7 @@ pub const JobQueue = struct { const h = depDigest(fetch.package_root, jq.global_cache, dep) orelse continue; try buf.writer().print( " .{{ \"{}\", \"{}\" }},\n", - .{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(&h) }, + .{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(h.toSlice()) }, ); } @@ -251,7 +253,7 @@ pub const JobQueue = struct { const h = depDigest(root_fetch.package_root, jq.global_cache, dep) orelse continue; try buf.writer().print( " .{{ \"{}\", \"{}\" }},\n", - .{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(&h) }, + .{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(h.toSlice()) }, ); } try buf.appendSlice("};\n"); @@ -283,7 +285,7 @@ pub const Location = union(enum) { url: []const u8, /// If this is null it means the user omitted the hash field from a dependency. /// It will be an error but the logic should still fetch and print the discovered hash. - hash: ?Manifest.MultiHashHexDigest, + hash: ?Package.Hash, }; }; @@ -325,9 +327,11 @@ pub fn run(f: *Fetch) RunError!void { // "p/$hash/foo", with possibly more directories after "foo". // We want to fail unless the resolved relative path has a // prefix of "p/$hash/". - const digest_len = @typeInfo(Manifest.MultiHashHexDigest).array.len; const prefix_len: usize = if (f.job_queue.read_only) 0 else "p/".len; - const expected_prefix = f.parent_package_root.sub_path[0 .. prefix_len + digest_len]; + const parent_sub_path = f.parent_package_root.sub_path; + const end = std.mem.indexOfScalarPos(u8, parent_sub_path, prefix_len, fs.path.sep) orelse + parent_sub_path.len; + const expected_prefix = parent_sub_path[prefix_len..end]; if (!std.mem.startsWith(u8, pkg_root.sub_path, expected_prefix)) { return f.fail( f.location_tok, @@ -367,9 +371,13 @@ pub fn run(f: *Fetch) RunError!void { }, }; - const s = fs.path.sep_str; if (remote.hash) |expected_hash| { - const prefixed_pkg_sub_path = "p" ++ s ++ expected_hash; + var prefixed_pkg_sub_path_buffer: [100]u8 = undefined; + prefixed_pkg_sub_path_buffer[0] = 'p'; + prefixed_pkg_sub_path_buffer[1] = fs.path.sep; + const hash_slice = expected_hash.toSlice(); + @memcpy(prefixed_pkg_sub_path_buffer[2..][0..hash_slice.len], hash_slice); + const prefixed_pkg_sub_path = prefixed_pkg_sub_path_buffer[0 .. 2 + hash_slice.len]; const prefix_len: usize = if (f.job_queue.read_only) "p/".len else 0; const pkg_sub_path = prefixed_pkg_sub_path[prefix_len..]; if (cache_root.handle.access(pkg_sub_path, .{})) |_| { @@ -437,7 +445,7 @@ fn runResource( f: *Fetch, uri_path: []const u8, resource: *Resource, - remote_hash: ?Manifest.MultiHashHexDigest, + remote_hash: ?Package.Hash, ) RunError!void { defer resource.deinit(); const arena = f.arena.allocator(); @@ -499,7 +507,7 @@ fn runResource( // Empty directories have already been omitted by `unpackResource`. // Compute the package hash based on the remaining files in the temporary // directory. - f.actual_hash = try computeHash(f, pkg_path, filter); + f.computed_hash = try computeHash(f, pkg_path, filter); break :blk if (unpack_result.root_dir.len > 0) try fs.path.join(arena, &.{ tmp_dir_sub_path, unpack_result.root_dir }) @@ -507,6 +515,8 @@ fn runResource( tmp_dir_sub_path; }; + const computed_package_hash = computedPackageHash(f); + // Rename the temporary directory into the global zig package cache // directory. If the hash already exists, delete the temporary directory // and leave the zig package cache directory untouched as it may be in use @@ -515,7 +525,7 @@ fn runResource( f.package_root = .{ .root_dir = cache_root, - .sub_path = try arena.dupe(u8, "p" ++ s ++ Manifest.hexDigest(f.actual_hash)), + .sub_path = try std.fmt.allocPrint(arena, "p" ++ s ++ "{s}", .{computed_package_hash.toSlice()}), }; renameTmpIntoCache(cache_root.handle, package_sub_path, f.package_root.sub_path) catch |err| { const src = try cache_root.join(arena, &.{tmp_dir_sub_path}); @@ -534,13 +544,22 @@ fn runResource( // Validate the computed hash against the expected hash. If invalid, this // job is done. - const actual_hex = Manifest.hexDigest(f.actual_hash); if (remote_hash) |declared_hash| { - if (!std.mem.eql(u8, &declared_hash, &actual_hex)) { - return f.fail(f.hash_tok, try eb.printString( - "hash mismatch: manifest declares {s} but the fetched package has {s}", - .{ declared_hash, actual_hex }, - )); + if (declared_hash.isOld()) { + const actual_hex = Package.multiHashHexDigest(f.computed_hash.digest); + if (!std.mem.eql(u8, declared_hash.toSlice(), &actual_hex)) { + return f.fail(f.hash_tok, try eb.printString( + "hash mismatch: manifest declares {s} but the fetched package has {s}", + .{ declared_hash.toSlice(), actual_hex }, + )); + } + } else { + if (!computed_package_hash.eql(&declared_hash)) { + return f.fail(f.hash_tok, try eb.printString( + "hash mismatch: manifest declares {s} but the fetched package has {s}", + .{ declared_hash.toSlice(), computed_package_hash.toSlice() }, + )); + } } } else if (!f.omit_missing_hash_error) { const notes_len = 1; @@ -551,7 +570,7 @@ fn runResource( }); const notes_start = try eb.reserveNotes(notes_len); eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("expected .hash = \"{s}\",", .{&actual_hex}), + .msg = try eb.printString("expected .hash = \"{s}\",", .{computed_package_hash.toSlice()}), })); return error.FetchFailed; } @@ -562,6 +581,16 @@ fn runResource( return queueJobsForDeps(f); } +pub fn computedPackageHash(f: *const Fetch) Package.Hash { + const saturated_size = std.math.cast(u32, f.computed_hash.total_size) orelse std.math.maxInt(u32); + if (f.manifest) |man| { + var version_buffer: [32]u8 = undefined; + const version: []const u8 = std.fmt.bufPrint(&version_buffer, "{}", .{man.version}) catch &version_buffer; + return .init(f.computed_hash.digest, man.name, version, saturated_size); + } + return .initNaked(f.computed_hash.digest, saturated_size); +} + /// `computeHash` gets a free check for the existence of `build.zig`, but when /// not computing a hash, we need to do a syscall to check for it. fn checkBuildFileExistence(f: *Fetch) RunError!void { @@ -673,9 +702,8 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { .url = url, .hash = h: { const h = dep.hash orelse break :h null; - const digest_len = @typeInfo(Manifest.MultiHashHexDigest).array.len; - const multihash_digest = h[0..digest_len].*; - const gop = f.job_queue.table.getOrPutAssumeCapacity(multihash_digest); + const pkg_hash: Package.Hash = .fromSlice(h); + const gop = f.job_queue.table.getOrPutAssumeCapacity(pkg_hash); if (gop.found_existing) { if (!dep.lazy) { gop.value_ptr.*.lazy_status = .eager; @@ -683,15 +711,15 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { continue; } gop.value_ptr.* = new_fetch; - break :h multihash_digest; + break :h pkg_hash; }, } }, .path => |rel_path| l: { // This might produce an invalid path, which is checked for // at the beginning of run(). const new_root = try f.package_root.resolvePosix(parent_arena, rel_path); - const multihash_digest = relativePathDigest(new_root, cache_root); - const gop = f.job_queue.table.getOrPutAssumeCapacity(multihash_digest); + const pkg_hash = relativePathDigest(new_root, cache_root); + const gop = f.job_queue.table.getOrPutAssumeCapacity(pkg_hash); if (gop.found_existing) { if (!dep.lazy) { gop.value_ptr.*.lazy_status = .eager; @@ -724,7 +752,7 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { .error_bundle = undefined, .manifest = null, .manifest_ast = undefined, - .actual_hash = undefined, + .computed_hash = undefined, .has_build_zig = false, .oom_flag = false, .latest_commit = null, @@ -746,11 +774,8 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { } } -pub fn relativePathDigest( - pkg_root: Cache.Path, - cache_root: Cache.Directory, -) Manifest.MultiHashHexDigest { - var hasher = Manifest.Hash.init(.{}); +pub fn relativePathDigest(pkg_root: Cache.Path, cache_root: Cache.Directory) Package.Hash { + var hasher = Package.Hash.Algo.init(.{}); // This hash is a tuple of: // * whether it relative to the global cache directory or to the root package // * the relative file path from there to the build root of the package @@ -759,7 +784,7 @@ pub fn relativePathDigest( else &package_hash_prefix_project); hasher.update(pkg_root.sub_path); - return Manifest.hexDigest(hasher.finalResult()); + return .fromSlice(&hasher.finalResult()); } pub fn workerRun(f: *Fetch, prog_name: []const u8) void { @@ -1387,11 +1412,7 @@ fn recursiveDirectoryCopy(f: *Fetch, dir: fs.Dir, tmp_dir: fs.Dir) anyerror!void } } -pub fn renameTmpIntoCache( - cache_dir: fs.Dir, - tmp_dir_sub_path: []const u8, - dest_dir_sub_path: []const u8, -) !void { +pub fn renameTmpIntoCache(cache_dir: fs.Dir, tmp_dir_sub_path: []const u8, dest_dir_sub_path: []const u8) !void { assert(dest_dir_sub_path[1] == fs.path.sep); var handled_missing_dir = false; while (true) { @@ -1417,16 +1438,17 @@ pub fn renameTmpIntoCache( } } +const ComputedHash = struct { + digest: Package.Hash.Digest, + total_size: u64, +}; + /// Assumes that files not included in the package have already been filtered /// prior to calling this function. This ensures that files not protected by /// the hash are not present on the file system. Empty directories are *not /// hashed* and must not be present on the file system when calling this /// function. -fn computeHash( - f: *Fetch, - pkg_path: Cache.Path, - filter: Filter, -) RunError!Manifest.Digest { +fn computeHash(f: *Fetch, pkg_path: Cache.Path, filter: Filter) RunError!ComputedHash { // All the path name strings need to be in memory for sorting. const arena = f.arena.allocator(); const gpa = f.arena.child_allocator; @@ -1449,6 +1471,9 @@ fn computeHash( var walker = try root_dir.walk(gpa); defer walker.deinit(); + // Total number of bytes of file contents included in the package. + var total_size: u64 = 0; + { // The final hash will be a hash of each file hashed independently. This // allows hashing in parallel. @@ -1506,6 +1531,7 @@ fn computeHash( .kind = kind, .hash = undefined, // to be populated by the worker .failure = undefined, // to be populated by the worker + .size = undefined, // to be populated by the worker }; thread_pool.spawnWg(&wait_group, workerHashFile, .{ root_dir, hashed_file }); try all_files.append(hashed_file); @@ -1544,7 +1570,7 @@ fn computeHash( std.mem.sortUnstable(*HashedFile, all_files.items, {}, HashedFile.lessThan); - var hasher = Manifest.Hash.init(.{}); + var hasher = Package.Hash.Algo.init(.{}); var any_failures = false; for (all_files.items) |hashed_file| { hashed_file.failure catch |err| { @@ -1556,6 +1582,7 @@ fn computeHash( }); }; hasher.update(&hashed_file.hash); + total_size += hashed_file.size; } for (deleted_files.items) |deleted_file| { deleted_file.failure catch |err| { @@ -1580,7 +1607,10 @@ fn computeHash( }; } - return hasher.finalResult(); + return .{ + .digest = hasher.finalResult(), + .total_size = total_size, + }; } fn dumpHashInfo(all_files: []const *const HashedFile) !void { @@ -1609,8 +1639,9 @@ fn workerDeleteFile(dir: fs.Dir, deleted_file: *DeletedFile) void { fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void { var buf: [8000]u8 = undefined; - var hasher = Manifest.Hash.init(.{}); + var hasher = Package.Hash.Algo.init(.{}); hasher.update(hashed_file.normalized_path); + var file_size: u64 = 0; switch (hashed_file.kind) { .file => { @@ -1622,6 +1653,7 @@ fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void while (true) { const bytes_read = try file.read(&buf); if (bytes_read == 0) break; + file_size += bytes_read; hasher.update(buf[0..bytes_read]); file_header.update(buf[0..bytes_read]); } @@ -1641,6 +1673,7 @@ fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void }, } hasher.final(&hashed_file.hash); + hashed_file.size = file_size; } fn deleteFileFallible(dir: fs.Dir, deleted_file: *DeletedFile) DeletedFile.Error!void { @@ -1667,9 +1700,10 @@ const DeletedFile = struct { const HashedFile = struct { fs_path: []const u8, normalized_path: []const u8, - hash: Manifest.Digest, + hash: Package.Hash.Digest, failure: Error!void, kind: Kind, + size: u64, const Error = fs.File.OpenError || @@ -1744,12 +1778,8 @@ const Filter = struct { } }; -pub fn depDigest( - pkg_root: Cache.Path, - cache_root: Cache.Directory, - dep: Manifest.Dependency, -) ?Manifest.MultiHashHexDigest { - if (dep.hash) |h| return h[0..Manifest.multihash_hex_digest_len].*; +pub fn depDigest(pkg_root: Cache.Path, cache_root: Cache.Directory, dep: Manifest.Dependency) ?Package.Hash { + if (dep.hash) |h| return .fromSlice(h); switch (dep.location) { .url => return null, @@ -2137,7 +2167,7 @@ test "tarball with excluded duplicate paths" { defer fb.deinit(); try fetch.run(); - const hex_digest = Package.Manifest.hexDigest(fetch.actual_hash); + const hex_digest = Package.multiHashHexDigest(fetch.computed_hash.digest); try std.testing.expectEqualStrings( "12200bafe035cbb453dd717741b66e9f9d1e6c674069d06121dafa1b2e62eb6b22da", &hex_digest, @@ -2181,7 +2211,7 @@ test "tarball without root folder" { defer fb.deinit(); try fetch.run(); - const hex_digest = Package.Manifest.hexDigest(fetch.actual_hash); + const hex_digest = Package.multiHashHexDigest(fetch.computed_hash.digest); try std.testing.expectEqualStrings( "12209f939bfdcb8b501a61bb4a43124dfa1b2848adc60eec1e4624c560357562b793", &hex_digest, @@ -2222,7 +2252,7 @@ test "set executable bit based on file content" { try fetch.run(); try std.testing.expectEqualStrings( "1220fecb4c06a9da8673c87fe8810e15785f1699212f01728eadce094d21effeeef3", - &Manifest.hexDigest(fetch.actual_hash), + &Package.multiHashHexDigest(fetch.computed_hash.digest), ); var out = try fb.packageDir(); @@ -2304,7 +2334,7 @@ const TestFetchBuilder = struct { .error_bundle = undefined, .manifest = null, .manifest_ast = undefined, - .actual_hash = undefined, + .computed_hash = undefined, .has_build_zig = false, .oom_flag = false, .latest_commit = null, diff --git a/src/Package/Manifest.zig b/src/Package/Manifest.zig index 4eed6cc386..bfc5c813e2 100644 --- a/src/Package/Manifest.zig +++ b/src/Package/Manifest.zig @@ -5,15 +5,10 @@ const Allocator = std.mem.Allocator; const assert = std.debug.assert; const Ast = std.zig.Ast; const testing = std.testing; -const hex_charset = std.fmt.hex_charset; +const Package = @import("../Package.zig"); pub const max_bytes = 10 * 1024 * 1024; pub const basename = "build.zig.zon"; -pub const Hash = std.crypto.hash.sha2.Sha256; -pub const Digest = [Hash.digest_length]u8; -pub const multihash_len = 1 + 1 + Hash.digest_length; -pub const multihash_hex_digest_len = 2 * multihash_len; -pub const MultiHashHexDigest = [multihash_hex_digest_len]u8; pub const Dependency = struct { location: Location, @@ -38,35 +33,6 @@ pub const ErrorMessage = struct { off: u32, }; -pub const MultihashFunction = enum(u16) { - identity = 0x00, - sha1 = 0x11, - @"sha2-256" = 0x12, - @"sha2-512" = 0x13, - @"sha3-512" = 0x14, - @"sha3-384" = 0x15, - @"sha3-256" = 0x16, - @"sha3-224" = 0x17, - @"sha2-384" = 0x20, - @"sha2-256-trunc254-padded" = 0x1012, - @"sha2-224" = 0x1013, - @"sha2-512-224" = 0x1014, - @"sha2-512-256" = 0x1015, - @"blake2b-256" = 0xb220, - _, -}; - -pub const multihash_function: MultihashFunction = switch (Hash) { - std.crypto.hash.sha2.Sha256 => .@"sha2-256", - else => @compileError("unreachable"), -}; -comptime { - // We avoid unnecessary uleb128 code in hexDigest by asserting here the - // values are small enough to be contained in the one-byte encoding. - assert(@intFromEnum(multihash_function) < 127); - assert(Hash.digest_length < 127); -} - name: []const u8, version: std.SemanticVersion, version_node: Ast.Node.Index, @@ -164,22 +130,6 @@ pub fn copyErrorsIntoBundle( } } -pub fn hexDigest(digest: Digest) MultiHashHexDigest { - var result: MultiHashHexDigest = undefined; - - result[0] = hex_charset[@intFromEnum(multihash_function) >> 4]; - result[1] = hex_charset[@intFromEnum(multihash_function) & 15]; - - result[2] = hex_charset[Hash.digest_length >> 4]; - result[3] = hex_charset[Hash.digest_length & 15]; - - for (digest, 0..) |byte, i| { - result[4 + i * 2] = hex_charset[byte >> 4]; - result[5 + i * 2] = hex_charset[byte & 15]; - } - return result; -} - const Parse = struct { gpa: Allocator, ast: Ast, @@ -421,21 +371,8 @@ const Parse = struct { const tok = main_tokens[node]; const h = try parseString(p, node); - if (h.len >= 2) { - const their_multihash_func = std.fmt.parseInt(u8, h[0..2], 16) catch |err| { - return fail(p, tok, "invalid multihash value: unable to parse hash function: {s}", .{ - @errorName(err), - }); - }; - if (@as(MultihashFunction, @enumFromInt(their_multihash_func)) != multihash_function) { - return fail(p, tok, "unsupported hash function: only sha2-256 is supported", .{}); - } - } - - if (h.len != multihash_hex_digest_len) { - return fail(p, tok, "wrong hash size. expected: {d}, found: {d}", .{ - multihash_hex_digest_len, h.len, - }); + if (h.len > Package.Hash.max_len) { + return fail(p, tok, "hash length exceeds maximum: {d}", .{h.len}); } return h; diff --git a/src/main.zig b/src/main.zig index 5e66244484..d6b20f94f9 100644 --- a/src/main.zig +++ b/src/main.zig @@ -5197,7 +5197,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { .error_bundle = undefined, .manifest = null, .manifest_ast = undefined, - .actual_hash = undefined, + .computed_hash = undefined, .has_build_zig = true, .oom_flag = false, .latest_commit = null, @@ -5244,13 +5244,14 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { const hashes = job_queue.table.keys(); const fetches = job_queue.table.values(); try deps_mod.deps.ensureUnusedCapacity(arena, @intCast(hashes.len)); - for (hashes, fetches) |hash, f| { + for (hashes, fetches) |*hash, f| { if (f == &fetch) { // The first one is a dummy package for the current project. continue; } if (!f.has_build_zig) continue; + const hash_slice = hash.toSlice(); const m = try Package.Module.create(arena, .{ .global_cache_directory = global_cache_directory, .paths = .{ @@ -5260,7 +5261,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { .fully_qualified_name = try std.fmt.allocPrint( arena, "root.@dependencies.{s}", - .{&hash}, + .{hash_slice}, ), .cc_argv = &.{}, .inherited = .{}, @@ -5269,7 +5270,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { .builtin_mod = builtin_mod, .builtin_modules = null, // `builtin_mod` is specified }); - const hash_cloned = try arena.dupe(u8, &hash); + const hash_cloned = try arena.dupe(u8, hash_slice); deps_mod.deps.putAssumeCapacityNoClobber(hash_cloned, m); f.module = m; } @@ -5385,23 +5386,22 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { var any_errors = false; while (it.next()) |hash| { if (hash.len == 0) continue; - const digest_len = @typeInfo(Package.Manifest.MultiHashHexDigest).array.len; - if (hash.len != digest_len) { - std.log.err("invalid digest (length {d} instead of {d}): '{s}'", .{ - hash.len, digest_len, hash, + if (hash.len > Package.Hash.max_len) { + std.log.err("invalid digest (length {d} exceeds maximum): '{s}'", .{ + hash.len, hash, }); any_errors = true; continue; } - try unlazy_set.put(arena, hash[0..digest_len].*, {}); + try unlazy_set.put(arena, .fromSlice(hash), {}); } if (any_errors) process.exit(3); if (system_pkg_dir_path) |p| { // In this mode, the system needs to provide these packages; they // cannot be fetched by Zig. - for (unlazy_set.keys()) |hash| { + for (unlazy_set.keys()) |*hash| { std.log.err("lazy dependency package not found: {s}" ++ s ++ "{s}", .{ - p, hash, + p, hash.toSlice(), }); } std.log.info("remote package fetching disabled due to --system mode", .{}); @@ -7097,7 +7097,7 @@ fn cmdFetch( .error_bundle = undefined, .manifest = null, .manifest_ast = undefined, - .actual_hash = undefined, + .computed_hash = undefined, .has_build_zig = false, .oom_flag = false, .latest_commit = null, @@ -7117,14 +7117,15 @@ fn cmdFetch( process.exit(1); } - const hex_digest = Package.Manifest.hexDigest(fetch.actual_hash); + const package_hash = fetch.computedPackageHash(); + const package_hash_slice = package_hash.toSlice(); root_prog_node.end(); root_prog_node = .{ .index = .none }; const name = switch (save) { .no => { - try io.getStdOut().writeAll(hex_digest ++ "\n"); + try io.getStdOut().writer().print("{s}\n", .{package_hash_slice}); return cleanExit(); }, .yes, .exact => |name| name: { @@ -7194,7 +7195,7 @@ fn cmdFetch( \\ }} , .{ std.zig.fmtEscapes(saved_path_or_url), - std.zig.fmtEscapes(&hex_digest), + std.zig.fmtEscapes(package_hash_slice), }); const new_node_text = try std.fmt.allocPrint(arena, ".{p_} = {s},\n", .{ @@ -7213,7 +7214,7 @@ fn cmdFetch( if (dep.hash) |h| { switch (dep.location) { .url => |u| { - if (mem.eql(u8, h, &hex_digest) and mem.eql(u8, u, saved_path_or_url)) { + if (mem.eql(u8, h, package_hash_slice) and mem.eql(u8, u, saved_path_or_url)) { std.log.info("existing dependency named '{s}' is up-to-date", .{name}); process.exit(0); } @@ -7230,7 +7231,7 @@ fn cmdFetch( const hash_replace = try std.fmt.allocPrint( arena, "\"{}\"", - .{std.zig.fmtEscapes(&hex_digest)}, + .{std.zig.fmtEscapes(package_hash_slice)}, ); warn("overwriting existing dependency named '{s}'", .{name}); From e03bc7ac78820b7763d6ecd21cfa19653535f8d0 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 23 Feb 2025 17:23:53 -0800 Subject: [PATCH 03/15] require package names to be valid zig identifiers --- src/Package/Manifest.zig | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/Package/Manifest.zig b/src/Package/Manifest.zig index bfc5c813e2..0221b87bd6 100644 --- a/src/Package/Manifest.zig +++ b/src/Package/Manifest.zig @@ -46,6 +46,8 @@ arena_state: std.heap.ArenaAllocator.State, pub const ParseOptions = struct { allow_missing_paths_field: bool = false, + /// Deprecated, to be removed after 0.14.0 is tagged. + allow_name_string: bool = true, }; pub const Error = Allocator.Error; @@ -72,6 +74,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { .dependencies_node = 0, .paths = .{}, .allow_missing_paths_field = options.allow_missing_paths_field, + .allow_name_string = options.allow_name_string, .minimum_zig_version = null, .buf = .{}, }; @@ -144,6 +147,7 @@ const Parse = struct { dependencies_node: Ast.Node.Index, paths: std.StringArrayHashMapUnmanaged(void), allow_missing_paths_field: bool, + allow_name_string: bool, minimum_zig_version: ?std.SemanticVersion, const InnerError = error{ ParseFailure, OutOfMemory }; @@ -175,7 +179,7 @@ const Parse = struct { have_included_paths = true; try parseIncludedPaths(p, field_init); } else if (mem.eql(u8, field_name, "name")) { - p.name = try parseString(p, field_init); + p.name = try parseName(p, field_init); have_name = true; } else if (mem.eql(u8, field_name, "version")) { p.version_node = field_init; @@ -350,6 +354,30 @@ const Parse = struct { } } + fn parseName(p: *Parse, node: Ast.Node.Index) ![]const u8 { + const ast = p.ast; + const node_tags = ast.nodes.items(.tag); + const main_tokens = ast.nodes.items(.main_token); + const main_token = main_tokens[node]; + + if (p.allow_name_string and node_tags[node] == .string_literal) { + const name = try parseString(p, node); + if (!std.zig.isValidId(name)) + return fail(p, main_token, "name must be a valid bare zig identifier (hint: switch from string to enum literal)", .{}); + + return name; + } + + if (node_tags[node] != .enum_literal) + return fail(p, main_token, "expected enum literal", .{}); + + const ident_name = ast.tokenSlice(main_token); + if (mem.startsWith(u8, ident_name, "@")) + return fail(p, main_token, "name must be a valid bare zig identifier", .{}); + + return ident_name; + } + fn parseString(p: *Parse, node: Ast.Node.Index) ![]const u8 { const ast = p.ast; const node_tags = ast.nodes.items(.tag); From a57b0a0f2fe5a3a0af740495c60d4f3d0b6abfec Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 23 Feb 2025 19:20:38 -0800 Subject: [PATCH 04/15] fix generated hash of by-path dependencies This branch regressed from master by switching to binary rather than hex digest, allowing null bytes to end up in identifiers in the zig file. This commit fixes it by changing the "hash" to be literally equal to the sub_path (with a prefix '/' to indicate "global") if it can fit. If it is too long then it is actually hashed, and that value used instead. --- src/Package.zig | 22 ++++++++++++++++++++++ src/Package/Fetch.zig | 15 +-------------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/Package.zig b/src/Package.zig index c9f41f6f37..b585644d9e 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -15,6 +15,9 @@ pub const MultiHashHexDigest = [multihash_hex_digest_len]u8; /// /// This data structure can be used to store the legacy hash format too. Legacy /// hash format is scheduled to be removed after 0.14.0 is tagged. +/// +/// There's also a third way this structure is used. When using path rather than +/// hash, a unique hash is still needed, so one is computed based on the path. pub const Hash = struct { /// Maximum size of a package hash. Unused bytes at the end are /// filled with zeroes. @@ -100,6 +103,25 @@ pub const Hash = struct { _ = std.base64.url_safe_no_pad.Encoder.encode(&name, digest[5..][0..24]); return init(digest, &name, "N", size); } + + /// Produces a unique hash based on the path provided. The result should + /// not be user-visible. + pub fn initPath(sub_path: []const u8, is_global: bool) Hash { + var result: Hash = .{ .bytes = @splat(0) }; + var i: usize = 0; + if (is_global) { + result.bytes[0] = '/'; + i += 1; + } + if (i + sub_path.len <= result.bytes.len) { + @memcpy(result.bytes[i..][0..sub_path.len], sub_path); + return result; + } + var bin_digest: [Algo.digest_length]u8 = undefined; + Algo.hash(sub_path, &bin_digest, .{}); + _ = std.fmt.bufPrint(result.bytes[i..], "{}", .{std.fmt.fmtSliceHexLower(&bin_digest)}) catch unreachable; + return result; + } }; pub const MultihashFunction = enum(u16) { diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 11878a12b7..326b8917a5 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -775,16 +775,7 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { } pub fn relativePathDigest(pkg_root: Cache.Path, cache_root: Cache.Directory) Package.Hash { - var hasher = Package.Hash.Algo.init(.{}); - // This hash is a tuple of: - // * whether it relative to the global cache directory or to the root package - // * the relative file path from there to the build root of the package - hasher.update(if (pkg_root.root_dir.eql(cache_root)) - &package_hash_prefix_cached - else - &package_hash_prefix_project); - hasher.update(pkg_root.sub_path); - return .fromSlice(&hasher.finalResult()); + return .initPath(pkg_root.sub_path, pkg_root.root_dir.eql(cache_root)); } pub fn workerRun(f: *Fetch, prog_name: []const u8) void { @@ -1793,10 +1784,6 @@ pub fn depDigest(pkg_root: Cache.Path, cache_root: Cache.Directory, dep: Manifes } } -// These are random bytes. -const package_hash_prefix_cached = [8]u8{ 0x53, 0x7e, 0xfa, 0x94, 0x65, 0xe9, 0xf8, 0x73 }; -const package_hash_prefix_project = [8]u8{ 0xe1, 0x25, 0xee, 0xfa, 0xa6, 0x17, 0x38, 0xcc }; - const builtin = @import("builtin"); const std = @import("std"); const fs = std.fs; From 76e8b297b1f0c6013d71e9ae670cb4006b3e4b76 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 24 Feb 2025 17:36:23 -0800 Subject: [PATCH 05/15] Package.Manifest: enforce name limit of 32 --- src/Package/Manifest.zig | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Package/Manifest.zig b/src/Package/Manifest.zig index 0221b87bd6..b64f4eb7a1 100644 --- a/src/Package/Manifest.zig +++ b/src/Package/Manifest.zig @@ -9,6 +9,7 @@ const Package = @import("../Package.zig"); pub const max_bytes = 10 * 1024 * 1024; pub const basename = "build.zig.zon"; +pub const max_name_len = 32; pub const Dependency = struct { location: Location, @@ -365,6 +366,11 @@ const Parse = struct { if (!std.zig.isValidId(name)) return fail(p, main_token, "name must be a valid bare zig identifier (hint: switch from string to enum literal)", .{}); + if (name.len > max_name_len) + return fail(p, main_token, "name '{s}' exceeds max length of {d}", .{ + std.zig.fmtId(name), max_name_len, + }); + return name; } @@ -375,6 +381,11 @@ const Parse = struct { if (mem.startsWith(u8, ident_name, "@")) return fail(p, main_token, "name must be a valid bare zig identifier", .{}); + if (ident_name.len > max_name_len) + return fail(p, main_token, "name '{s}' exceeds max length of {d}", .{ + std.zig.fmtId(ident_name), max_name_len, + }); + return ident_name; } From 9763dd2901069f80dbdaae7c6b8004fbe1cf1b26 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 24 Feb 2025 17:39:01 -0800 Subject: [PATCH 06/15] Package.Manifest: enforce maximum version string length of 32 --- src/Package/Manifest.zig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Package/Manifest.zig b/src/Package/Manifest.zig index b64f4eb7a1..82c850d705 100644 --- a/src/Package/Manifest.zig +++ b/src/Package/Manifest.zig @@ -10,6 +10,7 @@ const Package = @import("../Package.zig"); pub const max_bytes = 10 * 1024 * 1024; pub const basename = "build.zig.zon"; pub const max_name_len = 32; +pub const max_version_len = 32; pub const Dependency = struct { location: Location, @@ -185,6 +186,9 @@ const Parse = struct { } else if (mem.eql(u8, field_name, "version")) { p.version_node = field_init; const version_text = try parseString(p, field_init); + if (version_text.len > max_version_len) { + try appendError(p, main_tokens[field_init], "version string length {d} exceeds maximum of {d}", .{ version_text.len, max_version_len }); + } p.version = std.SemanticVersion.parse(version_text) catch |err| v: { try appendError(p, main_tokens[field_init], "unable to parse semantic version: {s}", .{@errorName(err)}); break :v undefined; From d6a88ed74db270c14c669ab334f3ab715cfd2b76 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 24 Feb 2025 20:24:52 -0800 Subject: [PATCH 07/15] introduce package id and redo hash format again Introduces the `id` field to `build.zig.zon`. Together with name, this represents a globally unique package identifier. This field should be initialized with a 16-bit random number when the package is first created, and then *never change*. This allows Zig to unambiguously detect when one package is an updated version of another. When forking a Zig project, this id should be regenerated with a new random number if the upstream project is still maintained. Otherwise, the fork is *hostile*, attempting to take control over the original project's identity. `0x0000` is invalid because it obviously means a random number wasn't used. `0xffff` is reserved to represent "naked" packages. Tracking issue #14288 Additionally: * Fix bad path in error messages regarding build.zig.zon file. * Manifest validates that `name` and `version` field of build.zig.zon are maximum 32 bytes. * Introduce error for root package to not switch to enum literal for name. * Introduce error for root package to omit `id`. * Update init template to generate `id` * Update init template to populate `minimum_zig_version`. * New package hash format changes: - name and version limited to 32 bytes via error rather than truncation - truncate sha256 to 192 bits rather than 40 bits - include the package id This means that, given only the package hashes for a complete dependency tree, it is possible to perform version selection and know the final size on disk, without doing any fetching whatsoever. This prevents wasted bandwidth since package versions not selected do not need to be fetched. --- doc/build.zig.zon.md | 21 ++++++++++++++- lib/init/build.zig | 6 ++--- lib/init/build.zig.zon | 19 ++++++++++++- lib/init/src/main.zig | 2 +- src/Package.zig | 58 +++++++++++++++++++--------------------- src/Package/Fetch.zig | 10 ++++--- src/Package/Manifest.zig | 49 +++++++++++++++++++++++++++++++-- src/main.zig | 38 +++++++++++++++++++------- 8 files changed, 151 insertions(+), 52 deletions(-) diff --git a/doc/build.zig.zon.md b/doc/build.zig.zon.md index dc3ac749a9..2e406a8afd 100644 --- a/doc/build.zig.zon.md +++ b/doc/build.zig.zon.md @@ -10,7 +10,7 @@ build.zig. ### `name` -String. Required. +Enum literal. Required. This is the default name used by packages depending on this one. For example, when a user runs `zig fetch --save `, this field is used as the key in the @@ -20,12 +20,31 @@ will stick with this provided value. It is redundant to include "zig" in this name because it is already within the Zig package namespace. +Must be a valid bare Zig identifier (don't `@` me), limited to 32 bytes. + +### `id` + +Together with name, this represents a globally unique package identifier. This +field should be initialized with a 16-bit random number when the package is +first created, and then *never change*. This allows Zig to unambiguously detect +when one package is an updated version of another. + +When forking a Zig project, this id should be regenerated with a new random +number if the upstream project is still maintained. Otherwise, the fork is +*hostile*, attempting to take control over the original project's identity. + +`0x0000` is invalid because it obviously means a random number wasn't used. + +`0xffff` is reserved to represent "naked" packages. + ### `version` String. Required. [semver](https://semver.org/) +Limited to 32 bytes. + ### `minimum_zig_version` String. Optional. diff --git a/lib/init/build.zig b/lib/init/build.zig index 9be615ac31..ec25698c68 100644 --- a/lib/init/build.zig +++ b/lib/init/build.zig @@ -42,14 +42,14 @@ pub fn build(b: *std.Build) void { // Modules can depend on one another using the `std.Build.Module.addImport` function. // This is what allows Zig source code to use `@import("foo")` where 'foo' is not a // file path. In this case, we set up `exe_mod` to import `lib_mod`. - exe_mod.addImport("$_lib", lib_mod); + exe_mod.addImport("$n_lib", lib_mod); // Now, we will create a static library based on the module we created above. // This creates a `std.Build.Step.Compile`, which is the build step responsible // for actually invoking the compiler. const lib = b.addLibrary(.{ .linkage = .static, - .name = "$", + .name = "$n", .root_module = lib_mod, }); @@ -61,7 +61,7 @@ pub fn build(b: *std.Build) void { // This creates another `std.Build.Step.Compile`, but this one builds an executable // rather than a static library. const exe = b.addExecutable(.{ - .name = "$", + .name = "$n", .root_module = exe_mod, }); diff --git a/lib/init/build.zig.zon b/lib/init/build.zig.zon index cb7229042f..85fca48108 100644 --- a/lib/init/build.zig.zon +++ b/lib/init/build.zig.zon @@ -6,12 +6,29 @@ // // It is redundant to include "zig" in this name because it is already // within the Zig package namespace. - .name = "$", + .name = .$n, // This is a [Semantic Version](https://semver.org/). // In a future version of Zig it will be used for package deduplication. .version = "0.0.0", + // Together with name, this represents a globally unique package + // identifier. This field should be initialized with a 16-bit random number + // when the package is first created, and then *never change*. This allows + // unambiguous detection when one package is an updated version of another. + // + // When forking a Zig project, this id should be regenerated with a new + // random number if the upstream project is still maintained. Otherwise, + // the fork is *hostile*, attempting to take control over the original + // project's identity. Thus it is recommended to leave the comment on the + // following line intact, so that it shows up in code reviews that modify + // the field. + .id = $i, // Changing this has security and trust implications. + + // Tracks the earliest Zig version that the package considers to be a + // supported use case. + .minimum_zig_version = "$v", + // This field is optional. // This is currently advisory only; Zig does not yet do anything // with this value. diff --git a/lib/init/src/main.zig b/lib/init/src/main.zig index 66d5648c1e..cc69127d4f 100644 --- a/lib/init/src/main.zig +++ b/lib/init/src/main.zig @@ -43,4 +43,4 @@ test "fuzz example" { const std = @import("std"); /// This imports the separate module containing `root.zig`. Take a look in `build.zig` for details. -const lib = @import("$_lib"); +const lib = @import("$n_lib"); diff --git a/src/Package.zig b/src/Package.zig index b585644d9e..6d370e9855 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -10,9 +10,17 @@ pub const multihash_len = 1 + 1 + Hash.Algo.digest_length; pub const multihash_hex_digest_len = 2 * multihash_len; pub const MultiHashHexDigest = [multihash_hex_digest_len]u8; +pub fn randomId() u16 { + return std.crypto.random.intRangeLessThan(u16, 0x0001, 0xffff); +} + /// A user-readable, file system safe hash that identifies an exact package /// snapshot, including file contents. /// +/// The hash is not only to prevent collisions but must resist attacks where +/// the adversary fully controls the contents being hashed. Thus, it contains +/// a full SHA-256 digest. +/// /// This data structure can be used to store the legacy hash format too. Legacy /// hash format is scheduled to be removed after 0.14.0 is tagged. /// @@ -26,7 +34,8 @@ pub const Hash = struct { pub const Algo = std.crypto.hash.sha2.Sha256; pub const Digest = [Algo.digest_length]u8; - pub const max_len = 32 + 1 + 32 + 1 + 12; + /// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" + pub const max_len = 32 + 1 + 32 + 1 + (16 + 32 + 192) / 6; pub fn fromSlice(s: []const u8) Hash { assert(s.len <= max_len); @@ -62,48 +71,35 @@ pub const Hash = struct { try std.testing.expect(h.isOld()); } - /// Produces "$name-$semver-$sizedhash". + /// Produces "$name-$semver-$hashplus". /// * name is the name field from build.zig.zon, truncated at 32 bytes and must /// be a valid zig identifier /// * semver is the version field from build.zig.zon, truncated at 32 bytes - /// * sizedhash is the following 9-byte array, base64 encoded using -_ to make + /// * hashplus is the following 39-byte array, base64 encoded using -_ to make /// it filesystem safe: - /// - (4 bytes) LE u32 total decompressed size in bytes - /// - (5 bytes) truncated SHA-256 of hashed files of the package + /// - (2 bytes) LE u16 Package ID + /// - (4 bytes) LE u32 total decompressed size in bytes, overflow saturated + /// - (24 bytes) truncated SHA-256 digest of hashed files of the package /// - /// example: "nasm-2.16.1-2-BWdcABvF_jM1" - pub fn init(digest: Digest, name: []const u8, ver: []const u8, size: u32) Hash { + /// example: "nasm-2.16.1-3-AAD_ZlwACpGU-c3QXp_yNyn07Q5U9Rq-Cb1ur2G1" + pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u16, size: u32) Hash { + assert(name.len <= 32); + assert(ver.len <= 32); var result: Hash = undefined; var buf: std.ArrayListUnmanaged(u8) = .initBuffer(&result.bytes); - buf.appendSliceAssumeCapacity(name[0..@min(name.len, 32)]); + buf.appendSliceAssumeCapacity(name); buf.appendAssumeCapacity('-'); - buf.appendSliceAssumeCapacity(ver[0..@min(ver.len, 32)]); + buf.appendSliceAssumeCapacity(ver); buf.appendAssumeCapacity('-'); - var sizedhash: [9]u8 = undefined; - std.mem.writeInt(u32, sizedhash[0..4], size, .little); - sizedhash[4..].* = digest[0..5].*; - _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(12), &sizedhash); + var hashplus: [30]u8 = undefined; + std.mem.writeInt(u16, hashplus[0..2], id, .little); + std.mem.writeInt(u32, hashplus[2..6], size, .little); + hashplus[6..].* = digest[0..24].*; + _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(40), &hashplus); @memset(buf.unusedCapacitySlice(), 0); return result; } - /// Produces "$hashiname-N-$sizedhash". For packages that lack "build.zig.zon" metadata. - /// * hashiname is [5..][0..24] bytes of the SHA-256, urlsafe-base64-encoded, for a total of 32 bytes encoded - /// * the semver section is replaced with a hardcoded N which stands for - /// "naked". It acts as a version number so that any future updates to the - /// hash format can tell this hash format apart. Note that "N" is an - /// invalid semver. - /// * sizedhash is the same as in `init`. - /// - /// The hash is broken up this way so that "sizedhash" can be calculated - /// exactly the same way in both cases, and so that "name" and "hashiname" can - /// be used interchangeably in both cases. - pub fn initNaked(digest: Digest, size: u32) Hash { - var name: [32]u8 = undefined; - _ = std.base64.url_safe_no_pad.Encoder.encode(&name, digest[5..][0..24]); - return init(digest, &name, "N", size); - } - /// Produces a unique hash based on the path provided. The result should /// not be user-visible. pub fn initPath(sub_path: []const u8, is_global: bool) Hash { @@ -144,7 +140,7 @@ pub const MultihashFunction = enum(u16) { pub const multihash_function: MultihashFunction = switch (Hash.Algo) { std.crypto.hash.sha2.Sha256 => .@"sha2-256", - else => @compileError("unreachable"), + else => unreachable, }; pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest { diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 326b8917a5..bb9fbd9664 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -586,9 +586,11 @@ pub fn computedPackageHash(f: *const Fetch) Package.Hash { if (f.manifest) |man| { var version_buffer: [32]u8 = undefined; const version: []const u8 = std.fmt.bufPrint(&version_buffer, "{}", .{man.version}) catch &version_buffer; - return .init(f.computed_hash.digest, man.name, version, saturated_size); + return .init(f.computed_hash.digest, man.name, version, man.id, saturated_size); } - return .initNaked(f.computed_hash.digest, saturated_size); + // In the future build.zig.zon fields will be added to allow overriding these values + // for naked tarballs. + return .init(f.computed_hash.digest, "N", "V", 0xffff, saturated_size); } /// `computeHash` gets a free check for the existence of `build.zig`, but when @@ -645,11 +647,13 @@ fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void { f.manifest = try Manifest.parse(arena, ast.*, .{ .allow_missing_paths_field = f.allow_missing_paths_field, + .allow_missing_id = f.allow_missing_paths_field, + .allow_name_string = f.allow_missing_paths_field, }); const manifest = &f.manifest.?; if (manifest.errors.len > 0) { - const src_path = try eb.printString("{}{s}", .{ pkg_root, Manifest.basename }); + const src_path = try eb.printString("{}" ++ fs.path.sep_str ++ "{s}", .{ pkg_root, Manifest.basename }); try manifest.copyErrorsIntoBundle(ast.*, src_path, eb); return error.FetchFailed; } diff --git a/src/Package/Manifest.zig b/src/Package/Manifest.zig index 82c850d705..083b56264d 100644 --- a/src/Package/Manifest.zig +++ b/src/Package/Manifest.zig @@ -36,6 +36,7 @@ pub const ErrorMessage = struct { }; name: []const u8, +id: u16, version: std.SemanticVersion, version_node: Ast.Node.Index, dependencies: std.StringArrayHashMapUnmanaged(Dependency), @@ -50,6 +51,8 @@ pub const ParseOptions = struct { allow_missing_paths_field: bool = false, /// Deprecated, to be removed after 0.14.0 is tagged. allow_name_string: bool = true, + /// Deprecated, to be removed after 0.14.0 is tagged. + allow_missing_id: bool = true, }; pub const Error = Allocator.Error; @@ -70,6 +73,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { .errors = .{}, .name = undefined, + .id = 0, .version = undefined, .version_node = 0, .dependencies = .{}, @@ -77,6 +81,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { .paths = .{}, .allow_missing_paths_field = options.allow_missing_paths_field, .allow_name_string = options.allow_name_string, + .allow_missing_id = options.allow_missing_id, .minimum_zig_version = null, .buf = .{}, }; @@ -92,6 +97,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { return .{ .name = p.name, + .id = p.id, .version = p.version, .version_node = p.version_node, .dependencies = try p.dependencies.clone(p.arena), @@ -143,6 +149,7 @@ const Parse = struct { errors: std.ArrayListUnmanaged(ErrorMessage), name: []const u8, + id: u16, version: std.SemanticVersion, version_node: Ast.Node.Index, dependencies: std.StringArrayHashMapUnmanaged(Dependency), @@ -150,6 +157,7 @@ const Parse = struct { paths: std.StringArrayHashMapUnmanaged(void), allow_missing_paths_field: bool, allow_name_string: bool, + allow_missing_id: bool, minimum_zig_version: ?std.SemanticVersion, const InnerError = error{ ParseFailure, OutOfMemory }; @@ -167,6 +175,7 @@ const Parse = struct { var have_name = false; var have_version = false; var have_included_paths = false; + var have_id = false; for (struct_init.ast.fields) |field_init| { const name_token = ast.firstToken(field_init) - 2; @@ -183,6 +192,9 @@ const Parse = struct { } else if (mem.eql(u8, field_name, "name")) { p.name = try parseName(p, field_init); have_name = true; + } else if (mem.eql(u8, field_name, "id")) { + p.id = try parseId(p, field_init); + have_id = true; } else if (mem.eql(u8, field_name, "version")) { p.version_node = field_init; const version_text = try parseString(p, field_init); @@ -206,6 +218,12 @@ const Parse = struct { } } + if (!have_id and !p.allow_missing_id) { + try appendError(p, main_token, "missing top-level 'id' field; suggested value: 0x{x}", .{ + Package.randomId(), + }); + } + if (!have_name) { try appendError(p, main_token, "missing top-level 'name' field", .{}); } @@ -359,6 +377,33 @@ const Parse = struct { } } + fn parseId(p: *Parse, node: Ast.Node.Index) !u16 { + const ast = p.ast; + const node_tags = ast.nodes.items(.tag); + const main_tokens = ast.nodes.items(.main_token); + const main_token = main_tokens[node]; + if (node_tags[node] != .number_literal) { + return fail(p, main_token, "expected integer literal", .{}); + } + const token_bytes = ast.tokenSlice(main_token); + const parsed = std.zig.parseNumberLiteral(token_bytes); + const n = switch (parsed) { + .int => |n| n, + .big_int, .float => return fail(p, main_token, "expected u16 integer literal, found {s}", .{ + @tagName(parsed), + }), + .failure => |err| return fail(p, main_token, "bad integer literal: {s}", .{@tagName(err)}), + }; + const casted = std.math.cast(u16, n) orelse + return fail(p, main_token, "integer value {d} does not fit into u16", .{n}); + switch (casted) { + 0x0000, 0xffff => return fail(p, main_token, "id value 0x{x} reserved; use 0x{x} instead", .{ + casted, Package.randomId(), + }), + else => return casted, + } + } + fn parseName(p: *Parse, node: Ast.Node.Index) ![]const u8 { const ast = p.ast; const node_tags = ast.nodes.items(.tag); @@ -371,7 +416,7 @@ const Parse = struct { return fail(p, main_token, "name must be a valid bare zig identifier (hint: switch from string to enum literal)", .{}); if (name.len > max_name_len) - return fail(p, main_token, "name '{s}' exceeds max length of {d}", .{ + return fail(p, main_token, "name '{}' exceeds max length of {d}", .{ std.zig.fmtId(name), max_name_len, }); @@ -386,7 +431,7 @@ const Parse = struct { return fail(p, main_token, "name must be a valid bare zig identifier", .{}); if (ident_name.len > max_name_len) - return fail(p, main_token, "name '{s}' exceeds max length of {d}", .{ + return fail(p, main_token, "name '{}' exceeds max length of {d}", .{ std.zig.fmtId(ident_name), max_name_len, }); diff --git a/src/main.zig b/src/main.zig index d6b20f94f9..b1680dbf8e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4751,8 +4751,10 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { }; var ok_count: usize = 0; + const id = Package.randomId(); + for (template_paths) |template_path| { - if (templates.write(arena, fs.cwd(), cwd_basename, template_path)) |_| { + if (templates.write(arena, fs.cwd(), cwd_basename, template_path, id)) |_| { std.log.info("created {s}", .{template_path}); ok_count += 1; } else |err| switch (err) { @@ -7430,10 +7432,10 @@ fn loadManifest( 0, ) catch |err| switch (err) { error.FileNotFound => { + const id = Package.randomId(); var templates = findTemplates(gpa, arena); defer templates.deinit(); - - templates.write(arena, options.dir, options.root_name, Package.Manifest.basename) catch |e| { + templates.write(arena, options.dir, options.root_name, Package.Manifest.basename, id) catch |e| { fatal("unable to write {s}: {s}", .{ Package.Manifest.basename, @errorName(e), }); @@ -7491,6 +7493,7 @@ const Templates = struct { out_dir: fs.Dir, root_name: []const u8, template_path: []const u8, + id: u16, ) !void { if (fs.path.dirname(template_path)) |dirname| { out_dir.makePath(dirname) catch |err| { @@ -7504,13 +7507,28 @@ const Templates = struct { }; templates.buffer.clearRetainingCapacity(); try templates.buffer.ensureUnusedCapacity(contents.len); - for (contents) |c| { - if (c == '$') { - try templates.buffer.appendSlice(root_name); - } else { - try templates.buffer.append(c); - } - } + var state: enum { start, dollar } = .start; + for (contents) |c| switch (state) { + .start => switch (c) { + '$' => state = .dollar, + else => try templates.buffer.append(c), + }, + .dollar => switch (c) { + 'n' => { + try templates.buffer.appendSlice(root_name); + state = .start; + }, + 'i' => { + try templates.buffer.writer().print("0x{x}", .{id}); + state = .start; + }, + 'v' => { + try templates.buffer.appendSlice(build_options.version); + state = .start; + }, + else => fatal("unknown substitution: ${c}", .{c}), + }, + }; return out_dir.writeFile(.{ .sub_path = template_path, From 7cedc01b7e8b0788bb707456bfc0a7b344a783a6 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 24 Feb 2025 21:00:30 -0800 Subject: [PATCH 08/15] zig init: sanitize generated name Adhere to the new rules: 32 byte limit + must be a valid bare zig identifier --- src/main.zig | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/main.zig b/src/main.zig index b1680dbf8e..cbdc32e203 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4741,6 +4741,7 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { const cwd_path = try process.getCwdAlloc(arena); const cwd_basename = fs.path.basename(cwd_path); + const sanitized_root_name = try sanitizeExampleName(arena, cwd_basename); const s = fs.path.sep_str; const template_paths = [_][]const u8{ @@ -4754,7 +4755,7 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { const id = Package.randomId(); for (template_paths) |template_path| { - if (templates.write(arena, fs.cwd(), cwd_basename, template_path, id)) |_| { + if (templates.write(arena, fs.cwd(), sanitized_root_name, template_path, id)) |_| { std.log.info("created {s}", .{template_path}); ok_count += 1; } else |err| switch (err) { @@ -4771,6 +4772,23 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { return cleanExit(); } +fn sanitizeExampleName(arena: Allocator, bytes: []const u8) error{OutOfMemory}![]const u8 { + if (bytes.len == 0) return "foo"; + var result: std.ArrayListUnmanaged(u8) = .empty; + try result.append(arena, switch (bytes[0]) { + '_', 'a'...'z', 'A'...'Z' => |c| c, + else => '_', + }); + for (bytes[1..]) |byte| switch (byte) { + '_', 'a'...'z', 'A'...'Z', '0'...'9' => try result.append(arena, byte), + else => continue, + }; + if (result.items.len > Package.Manifest.max_name_len) + result.shrinkRetainingCapacity(Package.Manifest.max_name_len); + + return result.toOwnedSlice(arena); +} + fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { dev.check(.build_command); @@ -7148,7 +7166,7 @@ fn cmdFetch( // The name to use in case the manifest file needs to be created now. const init_root_name = fs.path.basename(build_root.directory.path orelse cwd_path); var manifest, var ast = try loadManifest(gpa, arena, .{ - .root_name = init_root_name, + .root_name = try sanitizeExampleName(arena, init_root_name), .dir = build_root.directory.handle, .color = color, }); From 512bb9ad3e3765f4137dc99378036db7617875c4 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 24 Feb 2025 21:07:01 -0800 Subject: [PATCH 09/15] update zig's own manifest file to conform to new rules --- build.zig.zon | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build.zig.zon b/build.zig.zon index 3b0cd4a11d..34b737d9cc 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -1,7 +1,7 @@ // The Zig compiler is not intended to be consumed as a package. // The sole purpose of this manifest file is to test the compiler. .{ - .name = "zig", + .name = .zig, .version = "0.0.0", .dependencies = .{ .standalone_test_cases = .{ @@ -12,4 +12,5 @@ }, }, .paths = .{""}, + .id = 0x1cb6, } From eff1716b6c7800e6876d95d900b472fe351b0355 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 25 Feb 2025 15:34:05 -0800 Subject: [PATCH 10/15] Package: update unit tests to new API --- src/Package.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Package.zig b/src/Package.zig index 6d370e9855..c4bb8aa46a 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -173,8 +173,8 @@ test Hash { 0xc7, 0xf5, 0x71, 0xb7, 0xb4, 0xe7, 0x6f, 0x3c, 0xdb, 0x87, 0x7a, 0x7f, 0xdd, 0xf9, 0x77, 0x87, 0x9d, 0xd3, 0x86, 0xfa, 0x73, 0x57, 0x9a, 0xf7, 0x9d, 0x1e, 0xdb, 0x8f, 0x3a, 0xd9, 0xbd, 0x9f, }; - const result: Hash = .init(example_digest, "nasm", "2.16.1-2", 10 * 1024 * 1024); - try std.testing.expectEqualStrings("nasm-2.16.1-2-AACgAMf1cbe0", result.toSlice()); + const result: Hash = .init(example_digest, "nasm", "2.16.1-2", 0xcafe, 10 * 1024 * 1024); + try std.testing.expectEqualStrings("nasm-2.16.1-2-_soAAKAAx_Vxt7Tnbzzbh3p_3fl3h53ThvpzV5r3", result.toSlice()); } test { From a70307e7ffc608643bbd940796eaeb5bca6bbc8f Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 25 Feb 2025 15:34:17 -0800 Subject: [PATCH 11/15] CLI: add unit test and improve sanitizeExampleName --- src/main.zig | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/main.zig b/src/main.zig index cbdc32e203..f32d205538 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4773,22 +4773,36 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { } fn sanitizeExampleName(arena: Allocator, bytes: []const u8) error{OutOfMemory}![]const u8 { - if (bytes.len == 0) return "foo"; var result: std.ArrayListUnmanaged(u8) = .empty; - try result.append(arena, switch (bytes[0]) { - '_', 'a'...'z', 'A'...'Z' => |c| c, - else => '_', - }); - for (bytes[1..]) |byte| switch (byte) { - '_', 'a'...'z', 'A'...'Z', '0'...'9' => try result.append(arena, byte), + for (bytes, 0..) |byte, i| switch (byte) { + '0'...'9' => { + if (i == 0) try result.append(arena, '_'); + try result.append(arena, byte); + }, + '_', 'a'...'z', 'A'...'Z' => try result.append(arena, byte), + '-', '.', ' ' => try result.append(arena, '_'), else => continue, }; + if (result.items.len == 0) return "foo"; if (result.items.len > Package.Manifest.max_name_len) result.shrinkRetainingCapacity(Package.Manifest.max_name_len); return result.toOwnedSlice(arena); } +test sanitizeExampleName { + var arena_instance = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + try std.testing.expectEqualStrings("foo_bar", try sanitizeExampleName(arena, "foo bar+")); + try std.testing.expectEqualStrings("foo", try sanitizeExampleName(arena, "")); + try std.testing.expectEqualStrings("foo", try sanitizeExampleName(arena, "!")); + try std.testing.expectEqualStrings("a", try sanitizeExampleName(arena, "!a")); + try std.testing.expectEqualStrings("a_b", try sanitizeExampleName(arena, "a.b!")); + try std.testing.expectEqualStrings("_01234", try sanitizeExampleName(arena, "01234")); +} + fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { dev.check(.build_command); From 0fc7c9f57c0042cfe6dab9deb3b5fe2f9404d744 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 25 Feb 2025 17:26:19 -0800 Subject: [PATCH 12/15] switch from "id" to "nonce" mainly this addresses the following use case: 1. Someone creates a template with build.zig.zon, id field included (note that zig init does not create this problem since it generates fresh id every time it runs). 2. User A uses the template, changing package name to "example" but not id field. 3. User B uses the same template, changing package name also to "example", also not changing the id field. Here, both packages have unintentional conflicting logical ids. By making the field a combination of name checksum + random id, this accident is avoided. "nonce" is an OK name for this. Also relaxes errors on remote packages when using `zig fetch`. --- build.zig.zon | 2 +- doc/build.zig.zon.md | 25 +++++++++++++------- lib/init/build.zig.zon | 21 +++++++++-------- src/Package.zig | 33 +++++++++++++++++++++----- src/Package/Fetch.zig | 12 +++++++--- src/Package/Manifest.zig | 50 ++++++++++++++++++++-------------------- src/main.zig | 16 ++++++++----- 7 files changed, 100 insertions(+), 59 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index 34b737d9cc..a7e00b27f9 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -12,5 +12,5 @@ }, }, .paths = .{""}, - .id = 0x1cb6, + .nonce = 0xc1ce10810000f013, } diff --git a/doc/build.zig.zon.md b/doc/build.zig.zon.md index 2e406a8afd..a48094b6b8 100644 --- a/doc/build.zig.zon.md +++ b/doc/build.zig.zon.md @@ -22,21 +22,30 @@ Zig package namespace. Must be a valid bare Zig identifier (don't `@` me), limited to 32 bytes. -### `id` +### `nonce` Together with name, this represents a globally unique package identifier. This -field should be initialized with a 16-bit random number when the package is -first created, and then *never change*. This allows Zig to unambiguously detect -when one package is an updated version of another. +field is auto-initialized by the toolchain when the package is first created, +and then *never changes*. This allows Zig to unambiguously detect when one +package is an updated version of another. -When forking a Zig project, this id should be regenerated with a new random -number if the upstream project is still maintained. Otherwise, the fork is -*hostile*, attempting to take control over the original project's identity. +When forking a Zig project, this nonce should be regenerated if the upstream +project is still maintained. Otherwise, the fork is *hostile*, attempting to +take control over the original project's identity. The nonce can be regenerated +by deleting the field and running `zig build`. -`0x0000` is invalid because it obviously means a random number wasn't used. +This 64-bit integer is the combination of a 16-bit id component, a 32-bit +checksum, and 16 bits of reserved zeroes. + +The id component within the nonce has these restrictions: + +`0x0000` is reserved for legacy packages. `0xffff` is reserved to represent "naked" packages. +The checksum is computed from `name` and serves to protect Zig users from +accidental id collisions. + ### `version` String. Required. diff --git a/lib/init/build.zig.zon b/lib/init/build.zig.zon index 85fca48108..061254fd67 100644 --- a/lib/init/build.zig.zon +++ b/lib/init/build.zig.zon @@ -13,17 +13,18 @@ .version = "0.0.0", // Together with name, this represents a globally unique package - // identifier. This field should be initialized with a 16-bit random number - // when the package is first created, and then *never change*. This allows - // unambiguous detection when one package is an updated version of another. + // identifier. This field is generated by the Zig toolchain when the + // package is first created, and then *never changes*. This allows + // unambiguous detection of one package being an updated version of + // another. // - // When forking a Zig project, this id should be regenerated with a new - // random number if the upstream project is still maintained. Otherwise, - // the fork is *hostile*, attempting to take control over the original - // project's identity. Thus it is recommended to leave the comment on the - // following line intact, so that it shows up in code reviews that modify - // the field. - .id = $i, // Changing this has security and trust implications. + // When forking a Zig project, this id should be regenerated (delete the + // field and run `zig build`) if the upstream project is still maintained. + // Otherwise, the fork is *hostile*, attempting to take control over the + // original project's identity. Thus it is recommended to leave the comment + // on the following line intact, so that it shows up in code reviews that + // modify the field. + .nonce = $i, // Changing this has security and trust implications. // Tracks the earliest Zig version that the package considers to be a // supported use case. diff --git a/src/Package.zig b/src/Package.zig index c4bb8aa46a..e5513bd349 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -10,9 +10,29 @@ pub const multihash_len = 1 + 1 + Hash.Algo.digest_length; pub const multihash_hex_digest_len = 2 * multihash_len; pub const MultiHashHexDigest = [multihash_hex_digest_len]u8; -pub fn randomId() u16 { - return std.crypto.random.intRangeLessThan(u16, 0x0001, 0xffff); -} +pub const Nonce = packed struct(u64) { + id: u16, + reserved: u16 = 0, + checksum: u32, + + pub fn generate(name: []const u8) Nonce { + return .{ + .id = std.crypto.random.intRangeLessThan(u16, 0x0001, 0xffff), + .checksum = std.hash.Crc32.hash(name), + }; + } + + pub fn validate(n: Nonce, name: []const u8) bool { + switch (n.id) { + 0x0000, 0xffff => return false, + else => return std.hash.Crc32.hash(name) == n.checksum, + } + } + + pub fn int(n: Nonce) u64 { + return @bitCast(n); + } +}; /// A user-readable, file system safe hash that identifies an exact package /// snapshot, including file contents. @@ -72,9 +92,10 @@ pub const Hash = struct { } /// Produces "$name-$semver-$hashplus". - /// * name is the name field from build.zig.zon, truncated at 32 bytes and must - /// be a valid zig identifier - /// * semver is the version field from build.zig.zon, truncated at 32 bytes + /// * name is the name field from build.zig.zon, asserted to be at most 32 + /// bytes and assumed be a valid zig identifier + /// * semver is the version field from build.zig.zon, asserted to be at + /// most 32 bytes /// * hashplus is the following 39-byte array, base64 encoded using -_ to make /// it filesystem safe: /// - (2 bytes) LE u16 Package ID diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index bb9fbd9664..bbf13e57b8 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -44,6 +44,8 @@ omit_missing_hash_error: bool, /// which specifies inclusion rules. This is intended to be true for the first /// fetch task and false for the recursive dependencies. allow_missing_paths_field: bool, +allow_missing_nonce: bool, +allow_name_string: bool, /// If true and URL points to a Git repository, will use the latest commit. use_latest_commit: bool, @@ -372,7 +374,7 @@ pub fn run(f: *Fetch) RunError!void { }; if (remote.hash) |expected_hash| { - var prefixed_pkg_sub_path_buffer: [100]u8 = undefined; + var prefixed_pkg_sub_path_buffer: [Package.Hash.max_len + 2]u8 = undefined; prefixed_pkg_sub_path_buffer[0] = 'p'; prefixed_pkg_sub_path_buffer[1] = fs.path.sep; const hash_slice = expected_hash.toSlice(); @@ -647,8 +649,8 @@ fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void { f.manifest = try Manifest.parse(arena, ast.*, .{ .allow_missing_paths_field = f.allow_missing_paths_field, - .allow_missing_id = f.allow_missing_paths_field, - .allow_name_string = f.allow_missing_paths_field, + .allow_missing_nonce = f.allow_missing_nonce, + .allow_name_string = f.allow_name_string, }); const manifest = &f.manifest.?; @@ -750,6 +752,8 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { .job_queue = f.job_queue, .omit_missing_hash_error = false, .allow_missing_paths_field = true, + .allow_missing_nonce = true, + .allow_name_string = true, .use_latest_commit = false, .package_root = undefined, @@ -2319,6 +2323,8 @@ const TestFetchBuilder = struct { .job_queue = &self.job_queue, .omit_missing_hash_error = true, .allow_missing_paths_field = false, + .allow_missing_nonce = true, // so we can keep using the old testdata .tar.gz + .allow_name_string = true, // so we can keep using the old testdata .tar.gz .use_latest_commit = true, .package_root = undefined, diff --git a/src/Package/Manifest.zig b/src/Package/Manifest.zig index 083b56264d..fb8f0cff2b 100644 --- a/src/Package/Manifest.zig +++ b/src/Package/Manifest.zig @@ -52,7 +52,7 @@ pub const ParseOptions = struct { /// Deprecated, to be removed after 0.14.0 is tagged. allow_name_string: bool = true, /// Deprecated, to be removed after 0.14.0 is tagged. - allow_missing_id: bool = true, + allow_missing_nonce: bool = true, }; pub const Error = Allocator.Error; @@ -81,7 +81,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { .paths = .{}, .allow_missing_paths_field = options.allow_missing_paths_field, .allow_name_string = options.allow_name_string, - .allow_missing_id = options.allow_missing_id, + .allow_missing_nonce = options.allow_missing_nonce, .minimum_zig_version = null, .buf = .{}, }; @@ -157,7 +157,7 @@ const Parse = struct { paths: std.StringArrayHashMapUnmanaged(void), allow_missing_paths_field: bool, allow_name_string: bool, - allow_missing_id: bool, + allow_missing_nonce: bool, minimum_zig_version: ?std.SemanticVersion, const InnerError = error{ ParseFailure, OutOfMemory }; @@ -175,7 +175,7 @@ const Parse = struct { var have_name = false; var have_version = false; var have_included_paths = false; - var have_id = false; + var nonce: ?Package.Nonce = null; for (struct_init.ast.fields) |field_init| { const name_token = ast.firstToken(field_init) - 2; @@ -192,9 +192,8 @@ const Parse = struct { } else if (mem.eql(u8, field_name, "name")) { p.name = try parseName(p, field_init); have_name = true; - } else if (mem.eql(u8, field_name, "id")) { - p.id = try parseId(p, field_init); - have_id = true; + } else if (mem.eql(u8, field_name, "nonce")) { + nonce = try parseNonce(p, field_init); } else if (mem.eql(u8, field_name, "version")) { p.version_node = field_init; const version_text = try parseString(p, field_init); @@ -218,14 +217,23 @@ const Parse = struct { } } - if (!have_id and !p.allow_missing_id) { - try appendError(p, main_token, "missing top-level 'id' field; suggested value: 0x{x}", .{ - Package.randomId(), - }); - } - if (!have_name) { try appendError(p, main_token, "missing top-level 'name' field", .{}); + } else { + if (nonce) |n| { + if (!n.validate(p.name)) { + return fail(p, main_token, "invalid nonce: 0x{x}; if this is a new or forked package, use this value: 0x{x}", .{ + n.int(), Package.Nonce.generate(p.name).int(), + }); + } + p.id = n.id; + } else if (!p.allow_missing_nonce) { + try appendError(p, main_token, "missing top-level 'nonce' field; suggested value: 0x{x}", .{ + Package.Nonce.generate(p.name).int(), + }); + } else { + p.id = 0; + } } if (!have_version) { @@ -377,7 +385,7 @@ const Parse = struct { } } - fn parseId(p: *Parse, node: Ast.Node.Index) !u16 { + fn parseNonce(p: *Parse, node: Ast.Node.Index) !Package.Nonce { const ast = p.ast; const node_tags = ast.nodes.items(.tag); const main_tokens = ast.nodes.items(.main_token); @@ -387,20 +395,12 @@ const Parse = struct { } const token_bytes = ast.tokenSlice(main_token); const parsed = std.zig.parseNumberLiteral(token_bytes); - const n = switch (parsed) { - .int => |n| n, - .big_int, .float => return fail(p, main_token, "expected u16 integer literal, found {s}", .{ + switch (parsed) { + .int => |n| return @bitCast(n), + .big_int, .float => return fail(p, main_token, "expected u64 integer literal, found {s}", .{ @tagName(parsed), }), .failure => |err| return fail(p, main_token, "bad integer literal: {s}", .{@tagName(err)}), - }; - const casted = std.math.cast(u16, n) orelse - return fail(p, main_token, "integer value {d} does not fit into u16", .{n}); - switch (casted) { - 0x0000, 0xffff => return fail(p, main_token, "id value 0x{x} reserved; use 0x{x} instead", .{ - casted, Package.randomId(), - }), - else => return casted, } } diff --git a/src/main.zig b/src/main.zig index f32d205538..d22d682ded 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4752,10 +4752,10 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { }; var ok_count: usize = 0; - const id = Package.randomId(); + const nonce: Package.Nonce = .generate(sanitized_root_name); for (template_paths) |template_path| { - if (templates.write(arena, fs.cwd(), sanitized_root_name, template_path, id)) |_| { + if (templates.write(arena, fs.cwd(), sanitized_root_name, template_path, nonce)) |_| { std.log.info("created {s}", .{template_path}); ok_count += 1; } else |err| switch (err) { @@ -5225,6 +5225,8 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { .job_queue = &job_queue, .omit_missing_hash_error = true, .allow_missing_paths_field = false, + .allow_missing_nonce = false, + .allow_name_string = false, .use_latest_commit = false, .package_root = undefined, @@ -7125,6 +7127,8 @@ fn cmdFetch( .job_queue = &job_queue, .omit_missing_hash_error = true, .allow_missing_paths_field = false, + .allow_missing_nonce = true, + .allow_name_string = true, .use_latest_commit = true, .package_root = undefined, @@ -7464,10 +7468,10 @@ fn loadManifest( 0, ) catch |err| switch (err) { error.FileNotFound => { - const id = Package.randomId(); + const nonce: Package.Nonce = .generate(options.root_name); var templates = findTemplates(gpa, arena); defer templates.deinit(); - templates.write(arena, options.dir, options.root_name, Package.Manifest.basename, id) catch |e| { + templates.write(arena, options.dir, options.root_name, Package.Manifest.basename, nonce) catch |e| { fatal("unable to write {s}: {s}", .{ Package.Manifest.basename, @errorName(e), }); @@ -7525,7 +7529,7 @@ const Templates = struct { out_dir: fs.Dir, root_name: []const u8, template_path: []const u8, - id: u16, + nonce: Package.Nonce, ) !void { if (fs.path.dirname(template_path)) |dirname| { out_dir.makePath(dirname) catch |err| { @@ -7551,7 +7555,7 @@ const Templates = struct { state = .start; }, 'i' => { - try templates.buffer.writer().print("0x{x}", .{id}); + try templates.buffer.writer().print("0x{x}", .{nonce.int()}); state = .start; }, 'v' => { From ea516f0e81d055e0d7504eff36dfde694831bec1 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 25 Feb 2025 17:58:57 -0800 Subject: [PATCH 13/15] bump package id component to 32 bits and to make the base64 round even, bump sha256 to 200 bits (up from 192) --- build.zig.zon | 2 +- doc/build.zig.zon.md | 12 +++++++----- src/Package.zig | 35 ++++++++++++++++------------------- src/Package/Manifest.zig | 4 ++-- 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index a7e00b27f9..412c9c099b 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -12,5 +12,5 @@ }, }, .paths = .{""}, - .nonce = 0xc1ce10810000f013, + .nonce = 0xc1ce108124179e16, } diff --git a/doc/build.zig.zon.md b/doc/build.zig.zon.md index a48094b6b8..541408d30e 100644 --- a/doc/build.zig.zon.md +++ b/doc/build.zig.zon.md @@ -22,9 +22,11 @@ Zig package namespace. Must be a valid bare Zig identifier (don't `@` me), limited to 32 bytes. +Together with `nonce`, this represents a globally unique package identifier. + ### `nonce` -Together with name, this represents a globally unique package identifier. This +Together with `name`, this represents a globally unique package identifier. This field is auto-initialized by the toolchain when the package is first created, and then *never changes*. This allows Zig to unambiguously detect when one package is an updated version of another. @@ -34,14 +36,14 @@ project is still maintained. Otherwise, the fork is *hostile*, attempting to take control over the original project's identity. The nonce can be regenerated by deleting the field and running `zig build`. -This 64-bit integer is the combination of a 16-bit id component, a 32-bit -checksum, and 16 bits of reserved zeroes. +This 64-bit integer is the combination of a 32-bit id component and a 32-bit +checksum. The id component within the nonce has these restrictions: -`0x0000` is reserved for legacy packages. +`0x00000000` is reserved for legacy packages. -`0xffff` is reserved to represent "naked" packages. +`0xffffffff` is reserved to represent "naked" packages. The checksum is computed from `name` and serves to protect Zig users from accidental id collisions. diff --git a/src/Package.zig b/src/Package.zig index e5513bd349..145678291a 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -11,20 +11,19 @@ pub const multihash_hex_digest_len = 2 * multihash_len; pub const MultiHashHexDigest = [multihash_hex_digest_len]u8; pub const Nonce = packed struct(u64) { - id: u16, - reserved: u16 = 0, + id: u32, checksum: u32, pub fn generate(name: []const u8) Nonce { return .{ - .id = std.crypto.random.intRangeLessThan(u16, 0x0001, 0xffff), + .id = std.crypto.random.intRangeLessThan(u32, 1, 0xffffffff), .checksum = std.hash.Crc32.hash(name), }; } pub fn validate(n: Nonce, name: []const u8) bool { switch (n.id) { - 0x0000, 0xffff => return false, + 0x00000000, 0xffffffff => return false, else => return std.hash.Crc32.hash(name) == n.checksum, } } @@ -54,8 +53,8 @@ pub const Hash = struct { pub const Algo = std.crypto.hash.sha2.Sha256; pub const Digest = [Algo.digest_length]u8; - /// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" - pub const max_len = 32 + 1 + 32 + 1 + (16 + 32 + 192) / 6; + /// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" + pub const max_len = 32 + 1 + 32 + 1 + (32 + 32 + 200) / 6; pub fn fromSlice(s: []const u8) Hash { assert(s.len <= max_len); @@ -96,14 +95,12 @@ pub const Hash = struct { /// bytes and assumed be a valid zig identifier /// * semver is the version field from build.zig.zon, asserted to be at /// most 32 bytes - /// * hashplus is the following 39-byte array, base64 encoded using -_ to make + /// * hashplus is the following 33-byte array, base64 encoded using -_ to make /// it filesystem safe: - /// - (2 bytes) LE u16 Package ID + /// - (4 bytes) LE u32 Package ID /// - (4 bytes) LE u32 total decompressed size in bytes, overflow saturated - /// - (24 bytes) truncated SHA-256 digest of hashed files of the package - /// - /// example: "nasm-2.16.1-3-AAD_ZlwACpGU-c3QXp_yNyn07Q5U9Rq-Cb1ur2G1" - pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u16, size: u32) Hash { + /// - (25 bytes) truncated SHA-256 digest of hashed files of the package + pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u32, size: u32) Hash { assert(name.len <= 32); assert(ver.len <= 32); var result: Hash = undefined; @@ -112,11 +109,11 @@ pub const Hash = struct { buf.appendAssumeCapacity('-'); buf.appendSliceAssumeCapacity(ver); buf.appendAssumeCapacity('-'); - var hashplus: [30]u8 = undefined; - std.mem.writeInt(u16, hashplus[0..2], id, .little); - std.mem.writeInt(u32, hashplus[2..6], size, .little); - hashplus[6..].* = digest[0..24].*; - _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(40), &hashplus); + var hashplus: [33]u8 = undefined; + std.mem.writeInt(u32, hashplus[0..4], id, .little); + std.mem.writeInt(u32, hashplus[4..8], size, .little); + hashplus[8..].* = digest[0..25].*; + _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(44), &hashplus); @memset(buf.unusedCapacitySlice(), 0); return result; } @@ -194,8 +191,8 @@ test Hash { 0xc7, 0xf5, 0x71, 0xb7, 0xb4, 0xe7, 0x6f, 0x3c, 0xdb, 0x87, 0x7a, 0x7f, 0xdd, 0xf9, 0x77, 0x87, 0x9d, 0xd3, 0x86, 0xfa, 0x73, 0x57, 0x9a, 0xf7, 0x9d, 0x1e, 0xdb, 0x8f, 0x3a, 0xd9, 0xbd, 0x9f, }; - const result: Hash = .init(example_digest, "nasm", "2.16.1-2", 0xcafe, 10 * 1024 * 1024); - try std.testing.expectEqualStrings("nasm-2.16.1-2-_soAAKAAx_Vxt7Tnbzzbh3p_3fl3h53ThvpzV5r3", result.toSlice()); + const result: Hash = .init(example_digest, "nasm", "2.16.1-3", 0xcafebabe, 10 * 1024 * 1024); + try std.testing.expectEqualStrings("nasm-2.16.1-3-vrr-ygAAoADH9XG3tOdvPNuHen_d-XeHndOG-nNXmved", result.toSlice()); } test { diff --git a/src/Package/Manifest.zig b/src/Package/Manifest.zig index fb8f0cff2b..7e15dde225 100644 --- a/src/Package/Manifest.zig +++ b/src/Package/Manifest.zig @@ -36,7 +36,7 @@ pub const ErrorMessage = struct { }; name: []const u8, -id: u16, +id: u32, version: std.SemanticVersion, version_node: Ast.Node.Index, dependencies: std.StringArrayHashMapUnmanaged(Dependency), @@ -149,7 +149,7 @@ const Parse = struct { errors: std.ArrayListUnmanaged(ErrorMessage), name: []const u8, - id: u16, + id: u32, version: std.SemanticVersion, version_node: Ast.Node.Index, dependencies: std.StringArrayHashMapUnmanaged(Dependency), From 67904e925d2b33c48dda3d4ddaf158328964dc2e Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 25 Feb 2025 20:25:17 -0800 Subject: [PATCH 14/15] zig init: adjust template lang to allow zig fmt passthrough --- lib/init/build.zig | 6 +++--- lib/init/build.zig.zon | 6 +++--- lib/init/src/main.zig | 2 +- src/main.zig | 41 ++++++++++++++++++++++------------------- 4 files changed, 29 insertions(+), 26 deletions(-) diff --git a/lib/init/build.zig b/lib/init/build.zig index ec25698c68..4db31713e4 100644 --- a/lib/init/build.zig +++ b/lib/init/build.zig @@ -42,14 +42,14 @@ pub fn build(b: *std.Build) void { // Modules can depend on one another using the `std.Build.Module.addImport` function. // This is what allows Zig source code to use `@import("foo")` where 'foo' is not a // file path. In this case, we set up `exe_mod` to import `lib_mod`. - exe_mod.addImport("$n_lib", lib_mod); + exe_mod.addImport(".NAME_lib", lib_mod); // Now, we will create a static library based on the module we created above. // This creates a `std.Build.Step.Compile`, which is the build step responsible // for actually invoking the compiler. const lib = b.addLibrary(.{ .linkage = .static, - .name = "$n", + .name = ".NAME", .root_module = lib_mod, }); @@ -61,7 +61,7 @@ pub fn build(b: *std.Build) void { // This creates another `std.Build.Step.Compile`, but this one builds an executable // rather than a static library. const exe = b.addExecutable(.{ - .name = "$n", + .name = ".NAME", .root_module = exe_mod, }); diff --git a/lib/init/build.zig.zon b/lib/init/build.zig.zon index 061254fd67..b154ddc16b 100644 --- a/lib/init/build.zig.zon +++ b/lib/init/build.zig.zon @@ -6,7 +6,7 @@ // // It is redundant to include "zig" in this name because it is already // within the Zig package namespace. - .name = .$n, + .name = .LITNAME, // This is a [Semantic Version](https://semver.org/). // In a future version of Zig it will be used for package deduplication. @@ -24,11 +24,11 @@ // original project's identity. Thus it is recommended to leave the comment // on the following line intact, so that it shows up in code reviews that // modify the field. - .nonce = $i, // Changing this has security and trust implications. + .nonce = .NONCE, // Changing this has security and trust implications. // Tracks the earliest Zig version that the package considers to be a // supported use case. - .minimum_zig_version = "$v", + .minimum_zig_version = ".ZIGVER", // This field is optional. // This is currently advisory only; Zig does not yet do anything diff --git a/lib/init/src/main.zig b/lib/init/src/main.zig index cc69127d4f..85d8c93551 100644 --- a/lib/init/src/main.zig +++ b/lib/init/src/main.zig @@ -43,4 +43,4 @@ test "fuzz example" { const std = @import("std"); /// This imports the separate module containing `root.zig`. Take a look in `build.zig` for details. -const lib = @import("$n_lib"); +const lib = @import(".NAME_lib"); diff --git a/src/main.zig b/src/main.zig index d22d682ded..3dc6155db6 100644 --- a/src/main.zig +++ b/src/main.zig @@ -7543,28 +7543,31 @@ const Templates = struct { }; templates.buffer.clearRetainingCapacity(); try templates.buffer.ensureUnusedCapacity(contents.len); - var state: enum { start, dollar } = .start; - for (contents) |c| switch (state) { - .start => switch (c) { - '$' => state = .dollar, - else => try templates.buffer.append(c), - }, - .dollar => switch (c) { - 'n' => { + var i: usize = 0; + while (i < contents.len) { + if (contents[i] == '.') { + if (std.mem.startsWith(u8, contents[i..], ".LITNAME")) { + try templates.buffer.append('.'); try templates.buffer.appendSlice(root_name); - state = .start; - }, - 'i' => { + i += ".LITNAME".len; + continue; + } else if (std.mem.startsWith(u8, contents[i..], ".NAME")) { + try templates.buffer.appendSlice(root_name); + i += ".NAME".len; + continue; + } else if (std.mem.startsWith(u8, contents[i..], ".NONCE")) { try templates.buffer.writer().print("0x{x}", .{nonce.int()}); - state = .start; - }, - 'v' => { + i += ".NONCE".len; + continue; + } else if (std.mem.startsWith(u8, contents[i..], ".ZIGVER")) { try templates.buffer.appendSlice(build_options.version); - state = .start; - }, - else => fatal("unknown substitution: ${c}", .{c}), - }, - }; + i += ".ZIGVER".len; + continue; + } + } + try templates.buffer.append(contents[i]); + i += 1; + } return out_dir.writeFile(.{ .sub_path = template_path, From de43f5eb6ae4a569efe15e3469b3de76a86d9cd1 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 26 Feb 2025 04:01:28 -0800 Subject: [PATCH 15/15] rename "nonce" to "fingerprint" --- build.zig.zon | 2 +- doc/build.zig.zon.md | 10 +++++----- lib/init/build.zig.zon | 2 +- src/Package.zig | 8 ++++---- src/Package/Fetch.zig | 8 ++++---- src/Package/Manifest.zig | 26 +++++++++++++------------- src/main.zig | 20 ++++++++++---------- 7 files changed, 38 insertions(+), 38 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index 412c9c099b..4f4b948217 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -12,5 +12,5 @@ }, }, .paths = .{""}, - .nonce = 0xc1ce108124179e16, + .fingerprint = 0xc1ce108124179e16, } diff --git a/doc/build.zig.zon.md b/doc/build.zig.zon.md index 541408d30e..690905a400 100644 --- a/doc/build.zig.zon.md +++ b/doc/build.zig.zon.md @@ -22,24 +22,24 @@ Zig package namespace. Must be a valid bare Zig identifier (don't `@` me), limited to 32 bytes. -Together with `nonce`, this represents a globally unique package identifier. +Together with `fingerprint`, this represents a globally unique package identifier. -### `nonce` +### `fingerprint` Together with `name`, this represents a globally unique package identifier. This field is auto-initialized by the toolchain when the package is first created, and then *never changes*. This allows Zig to unambiguously detect when one package is an updated version of another. -When forking a Zig project, this nonce should be regenerated if the upstream +When forking a Zig project, this fingerprint should be regenerated if the upstream project is still maintained. Otherwise, the fork is *hostile*, attempting to -take control over the original project's identity. The nonce can be regenerated +take control over the original project's identity. The fingerprint can be regenerated by deleting the field and running `zig build`. This 64-bit integer is the combination of a 32-bit id component and a 32-bit checksum. -The id component within the nonce has these restrictions: +The id component within the fingerprint has these restrictions: `0x00000000` is reserved for legacy packages. diff --git a/lib/init/build.zig.zon b/lib/init/build.zig.zon index b154ddc16b..1dbe7519e7 100644 --- a/lib/init/build.zig.zon +++ b/lib/init/build.zig.zon @@ -24,7 +24,7 @@ // original project's identity. Thus it is recommended to leave the comment // on the following line intact, so that it shows up in code reviews that // modify the field. - .nonce = .NONCE, // Changing this has security and trust implications. + .fingerprint = .FINGERPRINT, // Changing this has security and trust implications. // Tracks the earliest Zig version that the package considers to be a // supported use case. diff --git a/src/Package.zig b/src/Package.zig index 145678291a..7f231f5ad7 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -10,25 +10,25 @@ pub const multihash_len = 1 + 1 + Hash.Algo.digest_length; pub const multihash_hex_digest_len = 2 * multihash_len; pub const MultiHashHexDigest = [multihash_hex_digest_len]u8; -pub const Nonce = packed struct(u64) { +pub const Fingerprint = packed struct(u64) { id: u32, checksum: u32, - pub fn generate(name: []const u8) Nonce { + pub fn generate(name: []const u8) Fingerprint { return .{ .id = std.crypto.random.intRangeLessThan(u32, 1, 0xffffffff), .checksum = std.hash.Crc32.hash(name), }; } - pub fn validate(n: Nonce, name: []const u8) bool { + pub fn validate(n: Fingerprint, name: []const u8) bool { switch (n.id) { 0x00000000, 0xffffffff => return false, else => return std.hash.Crc32.hash(name) == n.checksum, } } - pub fn int(n: Nonce) u64 { + pub fn int(n: Fingerprint) u64 { return @bitCast(n); } }; diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index bbf13e57b8..efee6605ed 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -44,7 +44,7 @@ omit_missing_hash_error: bool, /// which specifies inclusion rules. This is intended to be true for the first /// fetch task and false for the recursive dependencies. allow_missing_paths_field: bool, -allow_missing_nonce: bool, +allow_missing_fingerprint: bool, allow_name_string: bool, /// If true and URL points to a Git repository, will use the latest commit. use_latest_commit: bool, @@ -649,7 +649,7 @@ fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void { f.manifest = try Manifest.parse(arena, ast.*, .{ .allow_missing_paths_field = f.allow_missing_paths_field, - .allow_missing_nonce = f.allow_missing_nonce, + .allow_missing_fingerprint = f.allow_missing_fingerprint, .allow_name_string = f.allow_name_string, }); const manifest = &f.manifest.?; @@ -752,7 +752,7 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { .job_queue = f.job_queue, .omit_missing_hash_error = false, .allow_missing_paths_field = true, - .allow_missing_nonce = true, + .allow_missing_fingerprint = true, .allow_name_string = true, .use_latest_commit = false, @@ -2323,7 +2323,7 @@ const TestFetchBuilder = struct { .job_queue = &self.job_queue, .omit_missing_hash_error = true, .allow_missing_paths_field = false, - .allow_missing_nonce = true, // so we can keep using the old testdata .tar.gz + .allow_missing_fingerprint = true, // so we can keep using the old testdata .tar.gz .allow_name_string = true, // so we can keep using the old testdata .tar.gz .use_latest_commit = true, diff --git a/src/Package/Manifest.zig b/src/Package/Manifest.zig index 7e15dde225..c526854df2 100644 --- a/src/Package/Manifest.zig +++ b/src/Package/Manifest.zig @@ -52,7 +52,7 @@ pub const ParseOptions = struct { /// Deprecated, to be removed after 0.14.0 is tagged. allow_name_string: bool = true, /// Deprecated, to be removed after 0.14.0 is tagged. - allow_missing_nonce: bool = true, + allow_missing_fingerprint: bool = true, }; pub const Error = Allocator.Error; @@ -81,7 +81,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { .paths = .{}, .allow_missing_paths_field = options.allow_missing_paths_field, .allow_name_string = options.allow_name_string, - .allow_missing_nonce = options.allow_missing_nonce, + .allow_missing_fingerprint = options.allow_missing_fingerprint, .minimum_zig_version = null, .buf = .{}, }; @@ -157,7 +157,7 @@ const Parse = struct { paths: std.StringArrayHashMapUnmanaged(void), allow_missing_paths_field: bool, allow_name_string: bool, - allow_missing_nonce: bool, + allow_missing_fingerprint: bool, minimum_zig_version: ?std.SemanticVersion, const InnerError = error{ ParseFailure, OutOfMemory }; @@ -175,7 +175,7 @@ const Parse = struct { var have_name = false; var have_version = false; var have_included_paths = false; - var nonce: ?Package.Nonce = null; + var fingerprint: ?Package.Fingerprint = null; for (struct_init.ast.fields) |field_init| { const name_token = ast.firstToken(field_init) - 2; @@ -192,8 +192,8 @@ const Parse = struct { } else if (mem.eql(u8, field_name, "name")) { p.name = try parseName(p, field_init); have_name = true; - } else if (mem.eql(u8, field_name, "nonce")) { - nonce = try parseNonce(p, field_init); + } else if (mem.eql(u8, field_name, "fingerprint")) { + fingerprint = try parseFingerprint(p, field_init); } else if (mem.eql(u8, field_name, "version")) { p.version_node = field_init; const version_text = try parseString(p, field_init); @@ -220,16 +220,16 @@ const Parse = struct { if (!have_name) { try appendError(p, main_token, "missing top-level 'name' field", .{}); } else { - if (nonce) |n| { + if (fingerprint) |n| { if (!n.validate(p.name)) { - return fail(p, main_token, "invalid nonce: 0x{x}; if this is a new or forked package, use this value: 0x{x}", .{ - n.int(), Package.Nonce.generate(p.name).int(), + return fail(p, main_token, "invalid fingerprint: 0x{x}; if this is a new or forked package, use this value: 0x{x}", .{ + n.int(), Package.Fingerprint.generate(p.name).int(), }); } p.id = n.id; - } else if (!p.allow_missing_nonce) { - try appendError(p, main_token, "missing top-level 'nonce' field; suggested value: 0x{x}", .{ - Package.Nonce.generate(p.name).int(), + } else if (!p.allow_missing_fingerprint) { + try appendError(p, main_token, "missing top-level 'fingerprint' field; suggested value: 0x{x}", .{ + Package.Fingerprint.generate(p.name).int(), }); } else { p.id = 0; @@ -385,7 +385,7 @@ const Parse = struct { } } - fn parseNonce(p: *Parse, node: Ast.Node.Index) !Package.Nonce { + fn parseFingerprint(p: *Parse, node: Ast.Node.Index) !Package.Fingerprint { const ast = p.ast; const node_tags = ast.nodes.items(.tag); const main_tokens = ast.nodes.items(.main_token); diff --git a/src/main.zig b/src/main.zig index 3dc6155db6..7b1bc50bd6 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4752,10 +4752,10 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { }; var ok_count: usize = 0; - const nonce: Package.Nonce = .generate(sanitized_root_name); + const fingerprint: Package.Fingerprint = .generate(sanitized_root_name); for (template_paths) |template_path| { - if (templates.write(arena, fs.cwd(), sanitized_root_name, template_path, nonce)) |_| { + if (templates.write(arena, fs.cwd(), sanitized_root_name, template_path, fingerprint)) |_| { std.log.info("created {s}", .{template_path}); ok_count += 1; } else |err| switch (err) { @@ -5225,7 +5225,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { .job_queue = &job_queue, .omit_missing_hash_error = true, .allow_missing_paths_field = false, - .allow_missing_nonce = false, + .allow_missing_fingerprint = false, .allow_name_string = false, .use_latest_commit = false, @@ -7127,7 +7127,7 @@ fn cmdFetch( .job_queue = &job_queue, .omit_missing_hash_error = true, .allow_missing_paths_field = false, - .allow_missing_nonce = true, + .allow_missing_fingerprint = true, .allow_name_string = true, .use_latest_commit = true, @@ -7468,10 +7468,10 @@ fn loadManifest( 0, ) catch |err| switch (err) { error.FileNotFound => { - const nonce: Package.Nonce = .generate(options.root_name); + const fingerprint: Package.Fingerprint = .generate(options.root_name); var templates = findTemplates(gpa, arena); defer templates.deinit(); - templates.write(arena, options.dir, options.root_name, Package.Manifest.basename, nonce) catch |e| { + templates.write(arena, options.dir, options.root_name, Package.Manifest.basename, fingerprint) catch |e| { fatal("unable to write {s}: {s}", .{ Package.Manifest.basename, @errorName(e), }); @@ -7529,7 +7529,7 @@ const Templates = struct { out_dir: fs.Dir, root_name: []const u8, template_path: []const u8, - nonce: Package.Nonce, + fingerprint: Package.Fingerprint, ) !void { if (fs.path.dirname(template_path)) |dirname| { out_dir.makePath(dirname) catch |err| { @@ -7555,9 +7555,9 @@ const Templates = struct { try templates.buffer.appendSlice(root_name); i += ".NAME".len; continue; - } else if (std.mem.startsWith(u8, contents[i..], ".NONCE")) { - try templates.buffer.writer().print("0x{x}", .{nonce.int()}); - i += ".NONCE".len; + } else if (std.mem.startsWith(u8, contents[i..], ".FINGERPRINT")) { + try templates.buffer.writer().print("0x{x}", .{fingerprint.int()}); + i += ".FINGERPRINT".len; continue; } else if (std.mem.startsWith(u8, contents[i..], ".ZIGVER")) { try templates.buffer.appendSlice(build_options.version);