diff --git a/doc/build.zig.zon.md b/doc/build.zig.zon.md index dc3ac749a9..2e406a8afd 100644 --- a/doc/build.zig.zon.md +++ b/doc/build.zig.zon.md @@ -10,7 +10,7 @@ build.zig. ### `name` -String. Required. +Enum literal. Required. This is the default name used by packages depending on this one. For example, when a user runs `zig fetch --save `, this field is used as the key in the @@ -20,12 +20,31 @@ will stick with this provided value. It is redundant to include "zig" in this name because it is already within the Zig package namespace. +Must be a valid bare Zig identifier (don't `@` me), limited to 32 bytes. + +### `id` + +Together with name, this represents a globally unique package identifier. This +field should be initialized with a 16-bit random number when the package is +first created, and then *never change*. This allows Zig to unambiguously detect +when one package is an updated version of another. + +When forking a Zig project, this id should be regenerated with a new random +number if the upstream project is still maintained. Otherwise, the fork is +*hostile*, attempting to take control over the original project's identity. + +`0x0000` is invalid because it obviously means a random number wasn't used. + +`0xffff` is reserved to represent "naked" packages. + ### `version` String. Required. [semver](https://semver.org/) +Limited to 32 bytes. + ### `minimum_zig_version` String. Optional. diff --git a/lib/init/build.zig b/lib/init/build.zig index 9be615ac31..ec25698c68 100644 --- a/lib/init/build.zig +++ b/lib/init/build.zig @@ -42,14 +42,14 @@ pub fn build(b: *std.Build) void { // Modules can depend on one another using the `std.Build.Module.addImport` function. // This is what allows Zig source code to use `@import("foo")` where 'foo' is not a // file path. In this case, we set up `exe_mod` to import `lib_mod`. - exe_mod.addImport("$_lib", lib_mod); + exe_mod.addImport("$n_lib", lib_mod); // Now, we will create a static library based on the module we created above. // This creates a `std.Build.Step.Compile`, which is the build step responsible // for actually invoking the compiler. const lib = b.addLibrary(.{ .linkage = .static, - .name = "$", + .name = "$n", .root_module = lib_mod, }); @@ -61,7 +61,7 @@ pub fn build(b: *std.Build) void { // This creates another `std.Build.Step.Compile`, but this one builds an executable // rather than a static library. const exe = b.addExecutable(.{ - .name = "$", + .name = "$n", .root_module = exe_mod, }); diff --git a/lib/init/build.zig.zon b/lib/init/build.zig.zon index cb7229042f..85fca48108 100644 --- a/lib/init/build.zig.zon +++ b/lib/init/build.zig.zon @@ -6,12 +6,29 @@ // // It is redundant to include "zig" in this name because it is already // within the Zig package namespace. - .name = "$", + .name = .$n, // This is a [Semantic Version](https://semver.org/). // In a future version of Zig it will be used for package deduplication. .version = "0.0.0", + // Together with name, this represents a globally unique package + // identifier. This field should be initialized with a 16-bit random number + // when the package is first created, and then *never change*. This allows + // unambiguous detection when one package is an updated version of another. + // + // When forking a Zig project, this id should be regenerated with a new + // random number if the upstream project is still maintained. Otherwise, + // the fork is *hostile*, attempting to take control over the original + // project's identity. Thus it is recommended to leave the comment on the + // following line intact, so that it shows up in code reviews that modify + // the field. + .id = $i, // Changing this has security and trust implications. + + // Tracks the earliest Zig version that the package considers to be a + // supported use case. + .minimum_zig_version = "$v", + // This field is optional. // This is currently advisory only; Zig does not yet do anything // with this value. diff --git a/lib/init/src/main.zig b/lib/init/src/main.zig index 66d5648c1e..cc69127d4f 100644 --- a/lib/init/src/main.zig +++ b/lib/init/src/main.zig @@ -43,4 +43,4 @@ test "fuzz example" { const std = @import("std"); /// This imports the separate module containing `root.zig`. Take a look in `build.zig` for details. -const lib = @import("$_lib"); +const lib = @import("$n_lib"); diff --git a/src/Package.zig b/src/Package.zig index b585644d9e..6d370e9855 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -10,9 +10,17 @@ pub const multihash_len = 1 + 1 + Hash.Algo.digest_length; pub const multihash_hex_digest_len = 2 * multihash_len; pub const MultiHashHexDigest = [multihash_hex_digest_len]u8; +pub fn randomId() u16 { + return std.crypto.random.intRangeLessThan(u16, 0x0001, 0xffff); +} + /// A user-readable, file system safe hash that identifies an exact package /// snapshot, including file contents. /// +/// The hash is not only to prevent collisions but must resist attacks where +/// the adversary fully controls the contents being hashed. Thus, it contains +/// a full SHA-256 digest. +/// /// This data structure can be used to store the legacy hash format too. Legacy /// hash format is scheduled to be removed after 0.14.0 is tagged. /// @@ -26,7 +34,8 @@ pub const Hash = struct { pub const Algo = std.crypto.hash.sha2.Sha256; pub const Digest = [Algo.digest_length]u8; - pub const max_len = 32 + 1 + 32 + 1 + 12; + /// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" + pub const max_len = 32 + 1 + 32 + 1 + (16 + 32 + 192) / 6; pub fn fromSlice(s: []const u8) Hash { assert(s.len <= max_len); @@ -62,48 +71,35 @@ pub const Hash = struct { try std.testing.expect(h.isOld()); } - /// Produces "$name-$semver-$sizedhash". + /// Produces "$name-$semver-$hashplus". /// * name is the name field from build.zig.zon, truncated at 32 bytes and must /// be a valid zig identifier /// * semver is the version field from build.zig.zon, truncated at 32 bytes - /// * sizedhash is the following 9-byte array, base64 encoded using -_ to make + /// * hashplus is the following 39-byte array, base64 encoded using -_ to make /// it filesystem safe: - /// - (4 bytes) LE u32 total decompressed size in bytes - /// - (5 bytes) truncated SHA-256 of hashed files of the package + /// - (2 bytes) LE u16 Package ID + /// - (4 bytes) LE u32 total decompressed size in bytes, overflow saturated + /// - (24 bytes) truncated SHA-256 digest of hashed files of the package /// - /// example: "nasm-2.16.1-2-BWdcABvF_jM1" - pub fn init(digest: Digest, name: []const u8, ver: []const u8, size: u32) Hash { + /// example: "nasm-2.16.1-3-AAD_ZlwACpGU-c3QXp_yNyn07Q5U9Rq-Cb1ur2G1" + pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u16, size: u32) Hash { + assert(name.len <= 32); + assert(ver.len <= 32); var result: Hash = undefined; var buf: std.ArrayListUnmanaged(u8) = .initBuffer(&result.bytes); - buf.appendSliceAssumeCapacity(name[0..@min(name.len, 32)]); + buf.appendSliceAssumeCapacity(name); buf.appendAssumeCapacity('-'); - buf.appendSliceAssumeCapacity(ver[0..@min(ver.len, 32)]); + buf.appendSliceAssumeCapacity(ver); buf.appendAssumeCapacity('-'); - var sizedhash: [9]u8 = undefined; - std.mem.writeInt(u32, sizedhash[0..4], size, .little); - sizedhash[4..].* = digest[0..5].*; - _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(12), &sizedhash); + var hashplus: [30]u8 = undefined; + std.mem.writeInt(u16, hashplus[0..2], id, .little); + std.mem.writeInt(u32, hashplus[2..6], size, .little); + hashplus[6..].* = digest[0..24].*; + _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(40), &hashplus); @memset(buf.unusedCapacitySlice(), 0); return result; } - /// Produces "$hashiname-N-$sizedhash". For packages that lack "build.zig.zon" metadata. - /// * hashiname is [5..][0..24] bytes of the SHA-256, urlsafe-base64-encoded, for a total of 32 bytes encoded - /// * the semver section is replaced with a hardcoded N which stands for - /// "naked". It acts as a version number so that any future updates to the - /// hash format can tell this hash format apart. Note that "N" is an - /// invalid semver. - /// * sizedhash is the same as in `init`. - /// - /// The hash is broken up this way so that "sizedhash" can be calculated - /// exactly the same way in both cases, and so that "name" and "hashiname" can - /// be used interchangeably in both cases. - pub fn initNaked(digest: Digest, size: u32) Hash { - var name: [32]u8 = undefined; - _ = std.base64.url_safe_no_pad.Encoder.encode(&name, digest[5..][0..24]); - return init(digest, &name, "N", size); - } - /// Produces a unique hash based on the path provided. The result should /// not be user-visible. pub fn initPath(sub_path: []const u8, is_global: bool) Hash { @@ -144,7 +140,7 @@ pub const MultihashFunction = enum(u16) { pub const multihash_function: MultihashFunction = switch (Hash.Algo) { std.crypto.hash.sha2.Sha256 => .@"sha2-256", - else => @compileError("unreachable"), + else => unreachable, }; pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest { diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 326b8917a5..bb9fbd9664 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -586,9 +586,11 @@ pub fn computedPackageHash(f: *const Fetch) Package.Hash { if (f.manifest) |man| { var version_buffer: [32]u8 = undefined; const version: []const u8 = std.fmt.bufPrint(&version_buffer, "{}", .{man.version}) catch &version_buffer; - return .init(f.computed_hash.digest, man.name, version, saturated_size); + return .init(f.computed_hash.digest, man.name, version, man.id, saturated_size); } - return .initNaked(f.computed_hash.digest, saturated_size); + // In the future build.zig.zon fields will be added to allow overriding these values + // for naked tarballs. + return .init(f.computed_hash.digest, "N", "V", 0xffff, saturated_size); } /// `computeHash` gets a free check for the existence of `build.zig`, but when @@ -645,11 +647,13 @@ fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void { f.manifest = try Manifest.parse(arena, ast.*, .{ .allow_missing_paths_field = f.allow_missing_paths_field, + .allow_missing_id = f.allow_missing_paths_field, + .allow_name_string = f.allow_missing_paths_field, }); const manifest = &f.manifest.?; if (manifest.errors.len > 0) { - const src_path = try eb.printString("{}{s}", .{ pkg_root, Manifest.basename }); + const src_path = try eb.printString("{}" ++ fs.path.sep_str ++ "{s}", .{ pkg_root, Manifest.basename }); try manifest.copyErrorsIntoBundle(ast.*, src_path, eb); return error.FetchFailed; } diff --git a/src/Package/Manifest.zig b/src/Package/Manifest.zig index 82c850d705..083b56264d 100644 --- a/src/Package/Manifest.zig +++ b/src/Package/Manifest.zig @@ -36,6 +36,7 @@ pub const ErrorMessage = struct { }; name: []const u8, +id: u16, version: std.SemanticVersion, version_node: Ast.Node.Index, dependencies: std.StringArrayHashMapUnmanaged(Dependency), @@ -50,6 +51,8 @@ pub const ParseOptions = struct { allow_missing_paths_field: bool = false, /// Deprecated, to be removed after 0.14.0 is tagged. allow_name_string: bool = true, + /// Deprecated, to be removed after 0.14.0 is tagged. + allow_missing_id: bool = true, }; pub const Error = Allocator.Error; @@ -70,6 +73,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { .errors = .{}, .name = undefined, + .id = 0, .version = undefined, .version_node = 0, .dependencies = .{}, @@ -77,6 +81,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { .paths = .{}, .allow_missing_paths_field = options.allow_missing_paths_field, .allow_name_string = options.allow_name_string, + .allow_missing_id = options.allow_missing_id, .minimum_zig_version = null, .buf = .{}, }; @@ -92,6 +97,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { return .{ .name = p.name, + .id = p.id, .version = p.version, .version_node = p.version_node, .dependencies = try p.dependencies.clone(p.arena), @@ -143,6 +149,7 @@ const Parse = struct { errors: std.ArrayListUnmanaged(ErrorMessage), name: []const u8, + id: u16, version: std.SemanticVersion, version_node: Ast.Node.Index, dependencies: std.StringArrayHashMapUnmanaged(Dependency), @@ -150,6 +157,7 @@ const Parse = struct { paths: std.StringArrayHashMapUnmanaged(void), allow_missing_paths_field: bool, allow_name_string: bool, + allow_missing_id: bool, minimum_zig_version: ?std.SemanticVersion, const InnerError = error{ ParseFailure, OutOfMemory }; @@ -167,6 +175,7 @@ const Parse = struct { var have_name = false; var have_version = false; var have_included_paths = false; + var have_id = false; for (struct_init.ast.fields) |field_init| { const name_token = ast.firstToken(field_init) - 2; @@ -183,6 +192,9 @@ const Parse = struct { } else if (mem.eql(u8, field_name, "name")) { p.name = try parseName(p, field_init); have_name = true; + } else if (mem.eql(u8, field_name, "id")) { + p.id = try parseId(p, field_init); + have_id = true; } else if (mem.eql(u8, field_name, "version")) { p.version_node = field_init; const version_text = try parseString(p, field_init); @@ -206,6 +218,12 @@ const Parse = struct { } } + if (!have_id and !p.allow_missing_id) { + try appendError(p, main_token, "missing top-level 'id' field; suggested value: 0x{x}", .{ + Package.randomId(), + }); + } + if (!have_name) { try appendError(p, main_token, "missing top-level 'name' field", .{}); } @@ -359,6 +377,33 @@ const Parse = struct { } } + fn parseId(p: *Parse, node: Ast.Node.Index) !u16 { + const ast = p.ast; + const node_tags = ast.nodes.items(.tag); + const main_tokens = ast.nodes.items(.main_token); + const main_token = main_tokens[node]; + if (node_tags[node] != .number_literal) { + return fail(p, main_token, "expected integer literal", .{}); + } + const token_bytes = ast.tokenSlice(main_token); + const parsed = std.zig.parseNumberLiteral(token_bytes); + const n = switch (parsed) { + .int => |n| n, + .big_int, .float => return fail(p, main_token, "expected u16 integer literal, found {s}", .{ + @tagName(parsed), + }), + .failure => |err| return fail(p, main_token, "bad integer literal: {s}", .{@tagName(err)}), + }; + const casted = std.math.cast(u16, n) orelse + return fail(p, main_token, "integer value {d} does not fit into u16", .{n}); + switch (casted) { + 0x0000, 0xffff => return fail(p, main_token, "id value 0x{x} reserved; use 0x{x} instead", .{ + casted, Package.randomId(), + }), + else => return casted, + } + } + fn parseName(p: *Parse, node: Ast.Node.Index) ![]const u8 { const ast = p.ast; const node_tags = ast.nodes.items(.tag); @@ -371,7 +416,7 @@ const Parse = struct { return fail(p, main_token, "name must be a valid bare zig identifier (hint: switch from string to enum literal)", .{}); if (name.len > max_name_len) - return fail(p, main_token, "name '{s}' exceeds max length of {d}", .{ + return fail(p, main_token, "name '{}' exceeds max length of {d}", .{ std.zig.fmtId(name), max_name_len, }); @@ -386,7 +431,7 @@ const Parse = struct { return fail(p, main_token, "name must be a valid bare zig identifier", .{}); if (ident_name.len > max_name_len) - return fail(p, main_token, "name '{s}' exceeds max length of {d}", .{ + return fail(p, main_token, "name '{}' exceeds max length of {d}", .{ std.zig.fmtId(ident_name), max_name_len, }); diff --git a/src/main.zig b/src/main.zig index d6b20f94f9..b1680dbf8e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4751,8 +4751,10 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { }; var ok_count: usize = 0; + const id = Package.randomId(); + for (template_paths) |template_path| { - if (templates.write(arena, fs.cwd(), cwd_basename, template_path)) |_| { + if (templates.write(arena, fs.cwd(), cwd_basename, template_path, id)) |_| { std.log.info("created {s}", .{template_path}); ok_count += 1; } else |err| switch (err) { @@ -7430,10 +7432,10 @@ fn loadManifest( 0, ) catch |err| switch (err) { error.FileNotFound => { + const id = Package.randomId(); var templates = findTemplates(gpa, arena); defer templates.deinit(); - - templates.write(arena, options.dir, options.root_name, Package.Manifest.basename) catch |e| { + templates.write(arena, options.dir, options.root_name, Package.Manifest.basename, id) catch |e| { fatal("unable to write {s}: {s}", .{ Package.Manifest.basename, @errorName(e), }); @@ -7491,6 +7493,7 @@ const Templates = struct { out_dir: fs.Dir, root_name: []const u8, template_path: []const u8, + id: u16, ) !void { if (fs.path.dirname(template_path)) |dirname| { out_dir.makePath(dirname) catch |err| { @@ -7504,13 +7507,28 @@ const Templates = struct { }; templates.buffer.clearRetainingCapacity(); try templates.buffer.ensureUnusedCapacity(contents.len); - for (contents) |c| { - if (c == '$') { - try templates.buffer.appendSlice(root_name); - } else { - try templates.buffer.append(c); - } - } + var state: enum { start, dollar } = .start; + for (contents) |c| switch (state) { + .start => switch (c) { + '$' => state = .dollar, + else => try templates.buffer.append(c), + }, + .dollar => switch (c) { + 'n' => { + try templates.buffer.appendSlice(root_name); + state = .start; + }, + 'i' => { + try templates.buffer.writer().print("0x{x}", .{id}); + state = .start; + }, + 'v' => { + try templates.buffer.appendSlice(build_options.version); + state = .start; + }, + else => fatal("unknown substitution: ${c}", .{c}), + }, + }; return out_dir.writeFile(.{ .sub_path = template_path,