commit 6d84caf72771cf05997518ae2fa40a94de709de4 (tree)
parent 7bae6d90648e6ef0782c7f5e8a72066742feacaf
Author: Andrew Kelley <andrew@ziglang.org>
Date: Sun, 8 Oct 2023 17:29:55 -0700
move some package management related source files around
Diffstat:
7 files changed, 1469 insertions(+), 1471 deletions(-)
diff --git a/src/Package.zig b/src/Package.zig
@@ -1,7 +1,7 @@
pub const Module = @import("Package/Module.zig");
pub const Fetch = @import("Package/Fetch.zig");
pub const build_zig_basename = "build.zig";
-pub const Manifest = @import("Manifest.zig");
+pub const Manifest = @import("Package/Manifest.zig");
pub const Path = struct {
root_dir: Cache.Directory,
diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig
@@ -1449,9 +1449,9 @@ const Allocator = std.mem.Allocator;
const Cache = std.Build.Cache;
const ThreadPool = std.Thread.Pool;
const WaitGroup = std.Thread.WaitGroup;
-const Manifest = @import("../Manifest.zig");
const Fetch = @This();
const main = @import("../main.zig");
-const git = @import("../git.zig");
+const git = @import("Fetch/git.zig");
const Package = @import("../Package.zig");
+const Manifest = Package.Manifest;
const ErrorBundle = std.zig.ErrorBundle;
diff --git a/src/Package/Fetch/git.zig b/src/Package/Fetch/git.zig
@@ -0,0 +1,1466 @@
+//! Git support for package fetching.
+//!
+//! This is not intended to support all features of Git: it is limited to the
+//! basic functionality needed to clone a repository for the purpose of fetching
+//! a package.
+
+const std = @import("std");
+const mem = std.mem;
+const testing = std.testing;
+const Allocator = mem.Allocator;
+const Sha1 = std.crypto.hash.Sha1;
+const assert = std.debug.assert;
+
+pub const oid_length = Sha1.digest_length;
+pub const fmt_oid_length = 2 * oid_length;
+/// The ID of a Git object (an SHA-1 hash).
+pub const Oid = [oid_length]u8;
+
+pub fn parseOid(s: []const u8) !Oid {
+ if (s.len != fmt_oid_length) return error.InvalidOid;
+ var oid: Oid = undefined;
+ for (&oid, 0..) |*b, i| {
+ b.* = std.fmt.parseUnsigned(u8, s[2 * i ..][0..2], 16) catch return error.InvalidOid;
+ }
+ return oid;
+}
+
+test parseOid {
+ try testing.expectEqualSlices(
+ u8,
+ &.{ 0xCE, 0x91, 0x9C, 0xCF, 0x45, 0x95, 0x18, 0x56, 0xA7, 0x62, 0xFF, 0xDB, 0x8E, 0xF8, 0x50, 0x30, 0x1C, 0xD8, 0xC5, 0x88 },
+ &try parseOid("ce919ccf45951856a762ffdb8ef850301cd8c588"),
+ );
+ try testing.expectError(error.InvalidOid, parseOid("ce919ccf"));
+ try testing.expectError(error.InvalidOid, parseOid("master"));
+ try testing.expectError(error.InvalidOid, parseOid("HEAD"));
+}
+
+pub const Diagnostics = struct {
+ allocator: Allocator,
+ errors: std.ArrayListUnmanaged(Error) = .{},
+
+ pub const Error = union(enum) {
+ unable_to_create_sym_link: struct {
+ code: anyerror,
+ file_name: []const u8,
+ link_name: []const u8,
+ },
+ };
+
+ pub fn deinit(d: *Diagnostics) void {
+ for (d.errors.items) |item| {
+ switch (item) {
+ .unable_to_create_sym_link => |info| {
+ d.allocator.free(info.file_name);
+ d.allocator.free(info.link_name);
+ },
+ }
+ }
+ d.errors.deinit(d.allocator);
+ d.* = undefined;
+ }
+};
+
+pub const Repository = struct {
+ odb: Odb,
+
+ pub fn init(allocator: Allocator, pack_file: std.fs.File, index_file: std.fs.File) !Repository {
+ return .{ .odb = try Odb.init(allocator, pack_file, index_file) };
+ }
+
+ pub fn deinit(repository: *Repository) void {
+ repository.odb.deinit();
+ repository.* = undefined;
+ }
+
+ /// Checks out the repository at `commit_oid` to `worktree`.
+ pub fn checkout(
+ repository: *Repository,
+ worktree: std.fs.Dir,
+ commit_oid: Oid,
+ diagnostics: *Diagnostics,
+ ) !void {
+ try repository.odb.seekOid(commit_oid);
+ const tree_oid = tree_oid: {
+ var commit_object = try repository.odb.readObject();
+ if (commit_object.type != .commit) return error.NotACommit;
+ break :tree_oid try getCommitTree(commit_object.data);
+ };
+ try repository.checkoutTree(worktree, tree_oid, "", diagnostics);
+ }
+
+ /// Checks out the tree at `tree_oid` to `worktree`.
+ fn checkoutTree(
+ repository: *Repository,
+ dir: std.fs.Dir,
+ tree_oid: Oid,
+ current_path: []const u8,
+ diagnostics: *Diagnostics,
+ ) !void {
+ try repository.odb.seekOid(tree_oid);
+ const tree_object = try repository.odb.readObject();
+ if (tree_object.type != .tree) return error.NotATree;
+ // The tree object may be evicted from the object cache while we're
+ // iterating over it, so we can make a defensive copy here to make sure
+ // it remains valid until we're done with it
+ const tree_data = try repository.odb.allocator.dupe(u8, tree_object.data);
+ defer repository.odb.allocator.free(tree_data);
+
+ var tree_iter: TreeIterator = .{ .data = tree_data };
+ while (try tree_iter.next()) |entry| {
+ switch (entry.type) {
+ .directory => {
+ try dir.makeDir(entry.name);
+ var subdir = try dir.openDir(entry.name, .{});
+ defer subdir.close();
+ const sub_path = try std.fs.path.join(repository.odb.allocator, &.{ current_path, entry.name });
+ defer repository.odb.allocator.free(sub_path);
+ try repository.checkoutTree(subdir, entry.oid, sub_path, diagnostics);
+ },
+ .file => {
+ var file = try dir.createFile(entry.name, .{});
+ defer file.close();
+ try repository.odb.seekOid(entry.oid);
+ var file_object = try repository.odb.readObject();
+ if (file_object.type != .blob) return error.InvalidFile;
+ try file.writeAll(file_object.data);
+ try file.sync();
+ },
+ .symlink => {
+ try repository.odb.seekOid(entry.oid);
+ var symlink_object = try repository.odb.readObject();
+ if (symlink_object.type != .blob) return error.InvalidFile;
+ const link_name = symlink_object.data;
+ dir.symLink(link_name, entry.name, .{}) catch |e| {
+ const file_name = try std.fs.path.join(diagnostics.allocator, &.{ current_path, entry.name });
+ errdefer diagnostics.allocator.free(file_name);
+ const link_name_dup = try diagnostics.allocator.dupe(u8, link_name);
+ errdefer diagnostics.allocator.free(link_name_dup);
+ try diagnostics.errors.append(diagnostics.allocator, .{ .unable_to_create_sym_link = .{
+ .code = e,
+ .file_name = file_name,
+ .link_name = link_name_dup,
+ } });
+ };
+ },
+ .gitlink => {
+ // Consistent with git archive behavior, create the directory but
+ // do nothing else
+ try dir.makeDir(entry.name);
+ },
+ }
+ }
+ }
+
+ /// Returns the ID of the tree associated with the given commit (provided as
+ /// raw object data).
+ fn getCommitTree(commit_data: []const u8) !Oid {
+ if (!mem.startsWith(u8, commit_data, "tree ") or
+ commit_data.len < "tree ".len + fmt_oid_length + "\n".len or
+ commit_data["tree ".len + fmt_oid_length] != '\n')
+ {
+ return error.InvalidCommit;
+ }
+ return try parseOid(commit_data["tree ".len..][0..fmt_oid_length]);
+ }
+
+ const TreeIterator = struct {
+ data: []const u8,
+ pos: usize = 0,
+
+ const Entry = struct {
+ type: Type,
+ executable: bool,
+ name: [:0]const u8,
+ oid: Oid,
+
+ const Type = enum(u4) {
+ directory = 0o4,
+ file = 0o10,
+ symlink = 0o12,
+ gitlink = 0o16,
+ };
+ };
+
+ fn next(iterator: *TreeIterator) !?Entry {
+ if (iterator.pos == iterator.data.len) return null;
+
+ const mode_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, ' ') orelse return error.InvalidTree;
+ const mode: packed struct {
+ permission: u9,
+ unused: u3,
+ type: u4,
+ } = @bitCast(std.fmt.parseUnsigned(u16, iterator.data[iterator.pos..mode_end], 8) catch return error.InvalidTree);
+ const @"type" = std.meta.intToEnum(Entry.Type, mode.type) catch return error.InvalidTree;
+ const executable = switch (mode.permission) {
+ 0 => if (@"type" == .file) return error.InvalidTree else false,
+ 0o644 => if (@"type" != .file) return error.InvalidTree else false,
+ 0o755 => if (@"type" != .file) return error.InvalidTree else true,
+ else => return error.InvalidTree,
+ };
+ iterator.pos = mode_end + 1;
+
+ const name_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, 0) orelse return error.InvalidTree;
+ const name = iterator.data[iterator.pos..name_end :0];
+ iterator.pos = name_end + 1;
+
+ if (iterator.pos + oid_length > iterator.data.len) return error.InvalidTree;
+ const oid = iterator.data[iterator.pos..][0..oid_length].*;
+ iterator.pos += oid_length;
+
+ return .{ .type = @"type", .executable = executable, .name = name, .oid = oid };
+ }
+ };
+};
+
+/// A Git object database backed by a packfile. A packfile index is also used
+/// for efficient access to objects in the packfile.
+///
+/// The format of the packfile and its associated index are documented in
+/// [pack-format](https://git-scm.com/docs/pack-format).
+const Odb = struct {
+ pack_file: std.fs.File,
+ index_header: IndexHeader,
+ index_file: std.fs.File,
+ cache: ObjectCache = .{},
+ allocator: Allocator,
+
+ /// Initializes the database from open pack and index files.
+ fn init(allocator: Allocator, pack_file: std.fs.File, index_file: std.fs.File) !Odb {
+ try pack_file.seekTo(0);
+ try index_file.seekTo(0);
+ const index_header = try IndexHeader.read(index_file.reader());
+ return .{
+ .pack_file = pack_file,
+ .index_header = index_header,
+ .index_file = index_file,
+ .allocator = allocator,
+ };
+ }
+
+ fn deinit(odb: *Odb) void {
+ odb.cache.deinit(odb.allocator);
+ odb.* = undefined;
+ }
+
+ /// Reads the object at the current position in the database.
+ fn readObject(odb: *Odb) !Object {
+ var base_offset = try odb.pack_file.getPos();
+ var base_header: EntryHeader = undefined;
+ var delta_offsets = std.ArrayListUnmanaged(u64){};
+ defer delta_offsets.deinit(odb.allocator);
+ const base_object = while (true) {
+ if (odb.cache.get(base_offset)) |base_object| break base_object;
+
+ base_header = try EntryHeader.read(odb.pack_file.reader());
+ switch (base_header) {
+ .ofs_delta => |ofs_delta| {
+ try delta_offsets.append(odb.allocator, base_offset);
+ base_offset = std.math.sub(u64, base_offset, ofs_delta.offset) catch return error.InvalidFormat;
+ try odb.pack_file.seekTo(base_offset);
+ },
+ .ref_delta => |ref_delta| {
+ try delta_offsets.append(odb.allocator, base_offset);
+ try odb.seekOid(ref_delta.base_object);
+ base_offset = try odb.pack_file.getPos();
+ },
+ else => {
+ const base_data = try readObjectRaw(odb.allocator, odb.pack_file.reader(), base_header.uncompressedLength());
+ errdefer odb.allocator.free(base_data);
+ const base_object: Object = .{ .type = base_header.objectType(), .data = base_data };
+ try odb.cache.put(odb.allocator, base_offset, base_object);
+ break base_object;
+ },
+ }
+ };
+
+ const base_data = try resolveDeltaChain(
+ odb.allocator,
+ odb.pack_file,
+ base_object,
+ delta_offsets.items,
+ &odb.cache,
+ );
+
+ return .{ .type = base_object.type, .data = base_data };
+ }
+
+ /// Seeks to the beginning of the object with the given ID.
+ fn seekOid(odb: *Odb, oid: Oid) !void {
+ const key = oid[0];
+ var start_index = if (key > 0) odb.index_header.fan_out_table[key - 1] else 0;
+ var end_index = odb.index_header.fan_out_table[key];
+ const found_index = while (start_index < end_index) {
+ const mid_index = start_index + (end_index - start_index) / 2;
+ try odb.index_file.seekTo(IndexHeader.size + mid_index * oid_length);
+ const mid_oid = try odb.index_file.reader().readBytesNoEof(oid_length);
+ switch (mem.order(u8, &mid_oid, &oid)) {
+ .lt => start_index = mid_index + 1,
+ .gt => end_index = mid_index,
+ .eq => break mid_index,
+ }
+ } else return error.ObjectNotFound;
+
+ const n_objects = odb.index_header.fan_out_table[255];
+ const offset_values_start = IndexHeader.size + n_objects * (oid_length + 4);
+ try odb.index_file.seekTo(offset_values_start + found_index * 4);
+ const l1_offset: packed struct { value: u31, big: bool } = @bitCast(try odb.index_file.reader().readIntBig(u32));
+ const pack_offset = pack_offset: {
+ if (l1_offset.big) {
+ const l2_offset_values_start = offset_values_start + n_objects * 4;
+ try odb.index_file.seekTo(l2_offset_values_start + l1_offset.value * 4);
+ break :pack_offset try odb.index_file.reader().readIntBig(u64);
+ } else {
+ break :pack_offset l1_offset.value;
+ }
+ };
+
+ try odb.pack_file.seekTo(pack_offset);
+ }
+};
+
+const Object = struct {
+ type: Type,
+ data: []const u8,
+
+ const Type = enum {
+ commit,
+ tree,
+ blob,
+ tag,
+ };
+};
+
+/// A cache for object data.
+///
+/// The purpose of this cache is to speed up resolution of deltas by caching the
+/// results of resolving delta objects, while maintaining a maximum cache size
+/// to avoid excessive memory usage. If the total size of the objects in the
+/// cache exceeds the maximum, the cache will begin evicting the least recently
+/// used objects: when resolving delta chains, the most recently used objects
+/// will likely be more helpful as they will be further along in the chain
+/// (skipping earlier reconstruction steps).
+///
+/// Object data stored in the cache is managed by the cache. It should not be
+/// freed by the caller at any point after inserting it into the cache. Any
+/// objects remaining in the cache will be freed when the cache itself is freed.
+const ObjectCache = struct {
+ objects: std.AutoHashMapUnmanaged(u64, CacheEntry) = .{},
+ lru_nodes: LruList = .{},
+ byte_size: usize = 0,
+
+ const max_byte_size = 128 * 1024 * 1024; // 128MiB
+ /// A list of offsets stored in the cache, with the most recently used
+ /// entries at the end.
+ const LruList = std.DoublyLinkedList(u64);
+ const CacheEntry = struct { object: Object, lru_node: *LruList.Node };
+
+ fn deinit(cache: *ObjectCache, allocator: Allocator) void {
+ var object_iterator = cache.objects.iterator();
+ while (object_iterator.next()) |object| {
+ allocator.free(object.value_ptr.object.data);
+ allocator.destroy(object.value_ptr.lru_node);
+ }
+ cache.objects.deinit(allocator);
+ cache.* = undefined;
+ }
+
+ /// Gets an object from the cache, moving it to the most recently used
+ /// position if it is present.
+ fn get(cache: *ObjectCache, offset: u64) ?Object {
+ if (cache.objects.get(offset)) |entry| {
+ cache.lru_nodes.remove(entry.lru_node);
+ cache.lru_nodes.append(entry.lru_node);
+ return entry.object;
+ } else {
+ return null;
+ }
+ }
+
+ /// Puts an object in the cache, possibly evicting older entries if the
+ /// cache exceeds its maximum size. Note that, although old objects may
+ /// be evicted, the object just added to the cache with this function
+ /// will not be evicted before the next call to `put` or `deinit` even if
+ /// it exceeds the maximum cache size.
+ fn put(cache: *ObjectCache, allocator: Allocator, offset: u64, object: Object) !void {
+ const lru_node = try allocator.create(LruList.Node);
+ errdefer allocator.destroy(lru_node);
+ lru_node.data = offset;
+
+ const gop = try cache.objects.getOrPut(allocator, offset);
+ if (gop.found_existing) {
+ cache.byte_size -= gop.value_ptr.object.data.len;
+ cache.lru_nodes.remove(gop.value_ptr.lru_node);
+ allocator.destroy(gop.value_ptr.lru_node);
+ allocator.free(gop.value_ptr.object.data);
+ }
+ gop.value_ptr.* = .{ .object = object, .lru_node = lru_node };
+ cache.byte_size += object.data.len;
+ cache.lru_nodes.append(lru_node);
+
+ while (cache.byte_size > max_byte_size and cache.lru_nodes.len > 1) {
+ // The > 1 check is to make sure that we don't evict the most
+ // recently added node, even if it by itself happens to exceed the
+ // maximum size of the cache.
+ const evict_node = cache.lru_nodes.popFirst().?;
+ const evict_offset = evict_node.data;
+ allocator.destroy(evict_node);
+ const evict_object = cache.objects.get(evict_offset).?.object;
+ cache.byte_size -= evict_object.data.len;
+ allocator.free(evict_object.data);
+ _ = cache.objects.remove(evict_offset);
+ }
+ }
+};
+
+/// A single pkt-line in the Git protocol.
+///
+/// The format of a pkt-line is documented in
+/// [protocol-common](https://git-scm.com/docs/protocol-common). The special
+/// meanings of the delimiter and response-end packets are documented in
+/// [protocol-v2](https://git-scm.com/docs/protocol-v2).
+const Packet = union(enum) {
+ flush,
+ delimiter,
+ response_end,
+ data: []const u8,
+
+ const max_data_length = 65516;
+
+ /// Reads a packet in pkt-line format.
+ fn read(reader: anytype, buf: *[max_data_length]u8) !Packet {
+ const length = std.fmt.parseUnsigned(u16, &try reader.readBytesNoEof(4), 16) catch return error.InvalidPacket;
+ switch (length) {
+ 0 => return .flush,
+ 1 => return .delimiter,
+ 2 => return .response_end,
+ 3 => return error.InvalidPacket,
+ else => if (length - 4 > max_data_length) return error.InvalidPacket,
+ }
+ const data = buf[0 .. length - 4];
+ try reader.readNoEof(data);
+ return .{ .data = data };
+ }
+
+ /// Writes a packet in pkt-line format.
+ fn write(packet: Packet, writer: anytype) !void {
+ switch (packet) {
+ .flush => try writer.writeAll("0000"),
+ .delimiter => try writer.writeAll("0001"),
+ .response_end => try writer.writeAll("0002"),
+ .data => |data| {
+ assert(data.len <= max_data_length);
+ try writer.print("{x:0>4}", .{data.len + 4});
+ try writer.writeAll(data);
+ },
+ }
+ }
+};
+
+/// A client session for the Git protocol, currently limited to an HTTP(S)
+/// transport. Only protocol version 2 is supported, as documented in
+/// [protocol-v2](https://git-scm.com/docs/protocol-v2).
+pub const Session = struct {
+ transport: *std.http.Client,
+ uri: std.Uri,
+ supports_agent: bool = false,
+ supports_shallow: bool = false,
+
+ const agent = "zig/" ++ @import("builtin").zig_version_string;
+ const agent_capability = std.fmt.comptimePrint("agent={s}\n", .{agent});
+
+ /// Discovers server capabilities. This should be called before using any
+ /// other client functionality, or the client will be forced to default to
+ /// the bare minimum server requirements, which may be considerably less
+ /// efficient (e.g. no shallow fetches).
+ ///
+ /// See the note on `getCapabilities` regarding `redirect_uri`.
+ pub fn discoverCapabilities(
+ session: *Session,
+ allocator: Allocator,
+ redirect_uri: *[]u8,
+ ) !void {
+ var capability_iterator = try session.getCapabilities(allocator, redirect_uri);
+ defer capability_iterator.deinit();
+ while (try capability_iterator.next()) |capability| {
+ if (mem.eql(u8, capability.key, "agent")) {
+ session.supports_agent = true;
+ } else if (mem.eql(u8, capability.key, "fetch")) {
+ var feature_iterator = mem.splitScalar(u8, capability.value orelse continue, ' ');
+ while (feature_iterator.next()) |feature| {
+ if (mem.eql(u8, feature, "shallow")) {
+ session.supports_shallow = true;
+ }
+ }
+ }
+ }
+ }
+
+ /// Returns an iterator over capabilities supported by the server.
+ ///
+ /// If the server redirects the request, `error.Redirected` is returned and
+ /// `redirect_uri` is populated with the URI resulting from the redirects.
+ /// When this occurs, the value of `redirect_uri` must be freed with
+ /// `allocator` when the caller is done with it.
+ fn getCapabilities(
+ session: Session,
+ allocator: Allocator,
+ redirect_uri: *[]u8,
+ ) !CapabilityIterator {
+ var info_refs_uri = session.uri;
+ info_refs_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "info/refs" });
+ defer allocator.free(info_refs_uri.path);
+ info_refs_uri.query = "service=git-upload-pack";
+ info_refs_uri.fragment = null;
+
+ var headers = std.http.Headers.init(allocator);
+ defer headers.deinit();
+ try headers.append("Git-Protocol", "version=2");
+
+ var request = try session.transport.request(.GET, info_refs_uri, headers, .{
+ .max_redirects = 3,
+ });
+ errdefer request.deinit();
+ try request.start(.{});
+ try request.finish();
+
+ try request.wait();
+ if (request.response.status != .ok) return error.ProtocolError;
+ if (request.redirects_left < 3) {
+ if (!mem.endsWith(u8, request.uri.path, "/info/refs")) return error.UnparseableRedirect;
+ var new_uri = request.uri;
+ new_uri.path = new_uri.path[0 .. new_uri.path.len - "/info/refs".len];
+ new_uri.query = null;
+ redirect_uri.* = try std.fmt.allocPrint(allocator, "{+/}", .{new_uri});
+ return error.Redirected;
+ }
+
+ const reader = request.reader();
+ var buf: [Packet.max_data_length]u8 = undefined;
+ var state: enum { response_start, response_content } = .response_start;
+ while (true) {
+ // Some Git servers (at least GitHub) include an additional
+ // '# service=git-upload-pack' informative response before sending
+ // the expected 'version 2' packet and capability information.
+ // This is not universal: SourceHut, for example, does not do this.
+ // Thus, we need to skip any such useless additional responses
+ // before we get the one we're actually looking for. The responses
+ // will be delimited by flush packets.
+ const packet = Packet.read(reader, &buf) catch |e| switch (e) {
+ error.EndOfStream => return error.UnsupportedProtocol, // 'version 2' packet not found
+ else => |other| return other,
+ };
+ switch (packet) {
+ .flush => state = .response_start,
+ .data => |data| switch (state) {
+ .response_start => if (mem.eql(u8, data, "version 2\n")) {
+ return .{ .request = request };
+ } else {
+ state = .response_content;
+ },
+ else => {},
+ },
+ else => return error.UnexpectedPacket,
+ }
+ }
+ }
+
+ const CapabilityIterator = struct {
+ request: std.http.Client.Request,
+ buf: [Packet.max_data_length]u8 = undefined,
+
+ const Capability = struct {
+ key: []const u8,
+ value: ?[]const u8 = null,
+ };
+
+ fn deinit(iterator: *CapabilityIterator) void {
+ iterator.request.deinit();
+ iterator.* = undefined;
+ }
+
+ fn next(iterator: *CapabilityIterator) !?Capability {
+ switch (try Packet.read(iterator.request.reader(), &iterator.buf)) {
+ .flush => return null,
+ .data => |data| if (data.len > 0 and data[data.len - 1] == '\n') {
+ if (mem.indexOfScalar(u8, data, '=')) |separator_pos| {
+ return .{ .key = data[0..separator_pos], .value = data[separator_pos + 1 .. data.len - 1] };
+ } else {
+ return .{ .key = data[0 .. data.len - 1] };
+ }
+ } else return error.UnexpectedPacket,
+ else => return error.UnexpectedPacket,
+ }
+ }
+ };
+
+ const ListRefsOptions = struct {
+ /// The ref prefixes (if any) to use to filter the refs available on the
+ /// server. Note that the client must still check the returned refs
+ /// against its desired filters itself: the server is not required to
+ /// respect these prefix filters and may return other refs as well.
+ ref_prefixes: []const []const u8 = &.{},
+ /// Whether to include symref targets for returned symbolic refs.
+ include_symrefs: bool = false,
+ /// Whether to include the peeled object ID for returned tag refs.
+ include_peeled: bool = false,
+ };
+
+ /// Returns an iterator over refs known to the server.
+ pub fn listRefs(session: Session, allocator: Allocator, options: ListRefsOptions) !RefIterator {
+ var upload_pack_uri = session.uri;
+ upload_pack_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "git-upload-pack" });
+ defer allocator.free(upload_pack_uri.path);
+ upload_pack_uri.query = null;
+ upload_pack_uri.fragment = null;
+
+ var headers = std.http.Headers.init(allocator);
+ defer headers.deinit();
+ try headers.append("Content-Type", "application/x-git-upload-pack-request");
+ try headers.append("Git-Protocol", "version=2");
+
+ var body = std.ArrayListUnmanaged(u8){};
+ defer body.deinit(allocator);
+ const body_writer = body.writer(allocator);
+ try Packet.write(.{ .data = "command=ls-refs\n" }, body_writer);
+ if (session.supports_agent) {
+ try Packet.write(.{ .data = agent_capability }, body_writer);
+ }
+ try Packet.write(.delimiter, body_writer);
+ for (options.ref_prefixes) |ref_prefix| {
+ const ref_prefix_packet = try std.fmt.allocPrint(allocator, "ref-prefix {s}\n", .{ref_prefix});
+ defer allocator.free(ref_prefix_packet);
+ try Packet.write(.{ .data = ref_prefix_packet }, body_writer);
+ }
+ if (options.include_symrefs) {
+ try Packet.write(.{ .data = "symrefs\n" }, body_writer);
+ }
+ if (options.include_peeled) {
+ try Packet.write(.{ .data = "peel\n" }, body_writer);
+ }
+ try Packet.write(.flush, body_writer);
+
+ var request = try session.transport.request(.POST, upload_pack_uri, headers, .{
+ .handle_redirects = false,
+ });
+ errdefer request.deinit();
+ request.transfer_encoding = .{ .content_length = body.items.len };
+ try request.start(.{});
+ try request.writeAll(body.items);
+ try request.finish();
+
+ try request.wait();
+ if (request.response.status != .ok) return error.ProtocolError;
+
+ return .{ .request = request };
+ }
+
+ pub const RefIterator = struct {
+ request: std.http.Client.Request,
+ buf: [Packet.max_data_length]u8 = undefined,
+
+ pub const Ref = struct {
+ oid: Oid,
+ name: []const u8,
+ symref_target: ?[]const u8,
+ peeled: ?Oid,
+ };
+
+ pub fn deinit(iterator: *RefIterator) void {
+ iterator.request.deinit();
+ iterator.* = undefined;
+ }
+
+ pub fn next(iterator: *RefIterator) !?Ref {
+ switch (try Packet.read(iterator.request.reader(), &iterator.buf)) {
+ .flush => return null,
+ .data => |data| {
+ const oid_sep_pos = mem.indexOfScalar(u8, data, ' ') orelse return error.InvalidRefPacket;
+ const oid = parseOid(data[0..oid_sep_pos]) catch return error.InvalidRefPacket;
+
+ const name_sep_pos = mem.indexOfAnyPos(u8, data, oid_sep_pos + 1, " \n") orelse return error.InvalidRefPacket;
+ const name = data[oid_sep_pos + 1 .. name_sep_pos];
+
+ var symref_target: ?[]const u8 = null;
+ var peeled: ?Oid = null;
+ var last_sep_pos = name_sep_pos;
+ while (data[last_sep_pos] == ' ') {
+ const next_sep_pos = mem.indexOfAnyPos(u8, data, last_sep_pos + 1, " \n") orelse return error.InvalidRefPacket;
+ const attribute = data[last_sep_pos + 1 .. next_sep_pos];
+ if (mem.startsWith(u8, attribute, "symref-target:")) {
+ symref_target = attribute["symref-target:".len..];
+ } else if (mem.startsWith(u8, attribute, "peeled:")) {
+ peeled = parseOid(attribute["peeled:".len..]) catch return error.InvalidRefPacket;
+ }
+ last_sep_pos = next_sep_pos;
+ }
+
+ return .{ .oid = oid, .name = name, .symref_target = symref_target, .peeled = peeled };
+ },
+ else => return error.UnexpectedPacket,
+ }
+ }
+ };
+
+ /// Fetches the given refs from the server. A shallow fetch (depth 1) is
+ /// performed if the server supports it.
+ pub fn fetch(session: Session, allocator: Allocator, wants: []const []const u8) !FetchStream {
+ var upload_pack_uri = session.uri;
+ upload_pack_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "git-upload-pack" });
+ defer allocator.free(upload_pack_uri.path);
+ upload_pack_uri.query = null;
+ upload_pack_uri.fragment = null;
+
+ var headers = std.http.Headers.init(allocator);
+ defer headers.deinit();
+ try headers.append("Content-Type", "application/x-git-upload-pack-request");
+ try headers.append("Git-Protocol", "version=2");
+
+ var body = std.ArrayListUnmanaged(u8){};
+ defer body.deinit(allocator);
+ const body_writer = body.writer(allocator);
+ try Packet.write(.{ .data = "command=fetch\n" }, body_writer);
+ if (session.supports_agent) {
+ try Packet.write(.{ .data = agent_capability }, body_writer);
+ }
+ try Packet.write(.delimiter, body_writer);
+ // Our packfile parser supports the OFS_DELTA object type
+ try Packet.write(.{ .data = "ofs-delta\n" }, body_writer);
+ // We do not currently convey server progress information to the user
+ try Packet.write(.{ .data = "no-progress\n" }, body_writer);
+ if (session.supports_shallow) {
+ try Packet.write(.{ .data = "deepen 1\n" }, body_writer);
+ }
+ for (wants) |want| {
+ var buf: [Packet.max_data_length]u8 = undefined;
+ const arg = std.fmt.bufPrint(&buf, "want {s}\n", .{want}) catch unreachable;
+ try Packet.write(.{ .data = arg }, body_writer);
+ }
+ try Packet.write(.{ .data = "done\n" }, body_writer);
+ try Packet.write(.flush, body_writer);
+
+ var request = try session.transport.request(.POST, upload_pack_uri, headers, .{
+ .handle_redirects = false,
+ });
+ errdefer request.deinit();
+ request.transfer_encoding = .{ .content_length = body.items.len };
+ try request.start(.{});
+ try request.writeAll(body.items);
+ try request.finish();
+
+ try request.wait();
+ if (request.response.status != .ok) return error.ProtocolError;
+
+ const reader = request.reader();
+ // We are not interested in any of the sections of the returned fetch
+ // data other than the packfile section, since we aren't doing anything
+ // complex like ref negotiation (this is a fresh clone).
+ var state: enum { section_start, section_content } = .section_start;
+ while (true) {
+ var buf: [Packet.max_data_length]u8 = undefined;
+ const packet = try Packet.read(reader, &buf);
+ switch (state) {
+ .section_start => switch (packet) {
+ .data => |data| if (mem.eql(u8, data, "packfile\n")) {
+ return .{ .request = request };
+ } else {
+ state = .section_content;
+ },
+ else => return error.UnexpectedPacket,
+ },
+ .section_content => switch (packet) {
+ .delimiter => state = .section_start,
+ .data => {},
+ else => return error.UnexpectedPacket,
+ },
+ }
+ }
+ }
+
+ pub const FetchStream = struct {
+ request: std.http.Client.Request,
+ buf: [Packet.max_data_length]u8 = undefined,
+ pos: usize = 0,
+ len: usize = 0,
+
+ pub fn deinit(stream: *FetchStream) void {
+ stream.request.deinit();
+ }
+
+ pub const ReadError = std.http.Client.Request.ReadError || error{
+ InvalidPacket,
+ ProtocolError,
+ UnexpectedPacket,
+ };
+ pub const Reader = std.io.Reader(*FetchStream, ReadError, read);
+
+ const StreamCode = enum(u8) {
+ pack_data = 1,
+ progress = 2,
+ fatal_error = 3,
+ _,
+ };
+
+ pub fn reader(stream: *FetchStream) Reader {
+ return .{ .context = stream };
+ }
+
+ pub fn read(stream: *FetchStream, buf: []u8) !usize {
+ if (stream.pos == stream.len) {
+ while (true) {
+ switch (try Packet.read(stream.request.reader(), &stream.buf)) {
+ .flush => return 0,
+ .data => |data| if (data.len > 1) switch (@as(StreamCode, @enumFromInt(data[0]))) {
+ .pack_data => {
+ stream.pos = 1;
+ stream.len = data.len;
+ break;
+ },
+ .fatal_error => return error.ProtocolError,
+ else => {},
+ },
+ else => return error.UnexpectedPacket,
+ }
+ }
+ }
+
+ const size = @min(buf.len, stream.len - stream.pos);
+ @memcpy(buf[0..size], stream.buf[stream.pos .. stream.pos + size]);
+ stream.pos += size;
+ return size;
+ }
+ };
+};
+
+const PackHeader = struct {
+ total_objects: u32,
+
+ const signature = "PACK";
+ const supported_version = 2;
+
+ fn read(reader: anytype) !PackHeader {
+ const actual_signature = reader.readBytesNoEof(4) catch |e| switch (e) {
+ error.EndOfStream => return error.InvalidHeader,
+ else => |other| return other,
+ };
+ if (!mem.eql(u8, &actual_signature, signature)) return error.InvalidHeader;
+ const version = reader.readIntBig(u32) catch |e| switch (e) {
+ error.EndOfStream => return error.InvalidHeader,
+ else => |other| return other,
+ };
+ if (version != supported_version) return error.UnsupportedVersion;
+ const total_objects = reader.readIntBig(u32) catch |e| switch (e) {
+ error.EndOfStream => return error.InvalidHeader,
+ else => |other| return other,
+ };
+ return .{ .total_objects = total_objects };
+ }
+};
+
+const EntryHeader = union(Type) {
+ commit: Undeltified,
+ tree: Undeltified,
+ blob: Undeltified,
+ tag: Undeltified,
+ ofs_delta: OfsDelta,
+ ref_delta: RefDelta,
+
+ const Type = enum(u3) {
+ commit = 1,
+ tree = 2,
+ blob = 3,
+ tag = 4,
+ ofs_delta = 6,
+ ref_delta = 7,
+ };
+
+ const Undeltified = struct {
+ uncompressed_length: u64,
+ };
+
+ const OfsDelta = struct {
+ offset: u64,
+ uncompressed_length: u64,
+ };
+
+ const RefDelta = struct {
+ base_object: Oid,
+ uncompressed_length: u64,
+ };
+
+ fn objectType(header: EntryHeader) Object.Type {
+ return switch (header) {
+ inline .commit, .tree, .blob, .tag => |_, tag| @field(Object.Type, @tagName(tag)),
+ else => unreachable,
+ };
+ }
+
+ fn uncompressedLength(header: EntryHeader) u64 {
+ return switch (header) {
+ inline else => |entry| entry.uncompressed_length,
+ };
+ }
+
+ fn read(reader: anytype) !EntryHeader {
+ const InitialByte = packed struct { len: u4, type: u3, has_next: bool };
+ const initial: InitialByte = @bitCast(reader.readByte() catch |e| switch (e) {
+ error.EndOfStream => return error.InvalidFormat,
+ else => |other| return other,
+ });
+ const rest_len = if (initial.has_next) try readSizeVarInt(reader) else 0;
+ var uncompressed_length: u64 = initial.len;
+ uncompressed_length |= std.math.shlExact(u64, rest_len, 4) catch return error.InvalidFormat;
+ const @"type" = std.meta.intToEnum(EntryHeader.Type, initial.type) catch return error.InvalidFormat;
+ return switch (@"type") {
+ inline .commit, .tree, .blob, .tag => |tag| @unionInit(EntryHeader, @tagName(tag), .{
+ .uncompressed_length = uncompressed_length,
+ }),
+ .ofs_delta => .{ .ofs_delta = .{
+ .offset = try readOffsetVarInt(reader),
+ .uncompressed_length = uncompressed_length,
+ } },
+ .ref_delta => .{ .ref_delta = .{
+ .base_object = reader.readBytesNoEof(oid_length) catch |e| switch (e) {
+ error.EndOfStream => return error.InvalidFormat,
+ else => |other| return other,
+ },
+ .uncompressed_length = uncompressed_length,
+ } },
+ };
+ }
+};
+
+fn readSizeVarInt(r: anytype) !u64 {
+ const Byte = packed struct { value: u7, has_next: bool };
+ var b: Byte = @bitCast(try r.readByte());
+ var value: u64 = b.value;
+ var shift: u6 = 0;
+ while (b.has_next) {
+ b = @bitCast(try r.readByte());
+ shift = std.math.add(u6, shift, 7) catch return error.InvalidFormat;
+ value |= @as(u64, b.value) << shift;
+ }
+ return value;
+}
+
+fn readOffsetVarInt(r: anytype) !u64 {
+ const Byte = packed struct { value: u7, has_next: bool };
+ var b: Byte = @bitCast(try r.readByte());
+ var value: u64 = b.value;
+ while (b.has_next) {
+ b = @bitCast(try r.readByte());
+ value = std.math.shlExact(u64, value + 1, 7) catch return error.InvalidFormat;
+ value |= b.value;
+ }
+ return value;
+}
+
+const IndexHeader = struct {
+ fan_out_table: [256]u32,
+
+ const signature = "\xFFtOc";
+ const supported_version = 2;
+ const size = 4 + 4 + @sizeOf([256]u32);
+
+ fn read(reader: anytype) !IndexHeader {
+ var header_bytes = try reader.readBytesNoEof(size);
+ if (!mem.eql(u8, header_bytes[0..4], signature)) return error.InvalidHeader;
+ const version = mem.readIntBig(u32, header_bytes[4..8]);
+ if (version != supported_version) return error.UnsupportedVersion;
+
+ var fan_out_table: [256]u32 = undefined;
+ var fan_out_table_stream = std.io.fixedBufferStream(header_bytes[8..]);
+ const fan_out_table_reader = fan_out_table_stream.reader();
+ for (&fan_out_table) |*entry| {
+ entry.* = fan_out_table_reader.readIntBig(u32) catch unreachable;
+ }
+ return .{ .fan_out_table = fan_out_table };
+ }
+};
+
+const IndexEntry = struct {
+ offset: u64,
+ crc32: u32,
+};
+
+/// Writes out a version 2 index for the given packfile, as documented in
+/// [pack-format](https://git-scm.com/docs/pack-format).
+pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) !void {
+ try pack.seekTo(0);
+
+ var index_entries = std.AutoHashMapUnmanaged(Oid, IndexEntry){};
+ defer index_entries.deinit(allocator);
+ var pending_deltas = std.ArrayListUnmanaged(IndexEntry){};
+ defer pending_deltas.deinit(allocator);
+
+ const pack_checksum = try indexPackFirstPass(allocator, pack, &index_entries, &pending_deltas);
+
+ var cache: ObjectCache = .{};
+ defer cache.deinit(allocator);
+ var remaining_deltas = pending_deltas.items.len;
+ while (remaining_deltas > 0) {
+ var i: usize = remaining_deltas;
+ while (i > 0) {
+ i -= 1;
+ const delta = pending_deltas.items[i];
+ if (try indexPackHashDelta(allocator, pack, delta, index_entries, &cache)) |oid| {
+ try index_entries.put(allocator, oid, delta);
+ _ = pending_deltas.swapRemove(i);
+ }
+ }
+ if (pending_deltas.items.len == remaining_deltas) return error.IncompletePack;
+ remaining_deltas = pending_deltas.items.len;
+ }
+
+ var oids = std.ArrayListUnmanaged(Oid){};
+ defer oids.deinit(allocator);
+ try oids.ensureTotalCapacityPrecise(allocator, index_entries.count());
+ var index_entries_iter = index_entries.iterator();
+ while (index_entries_iter.next()) |entry| {
+ oids.appendAssumeCapacity(entry.key_ptr.*);
+ }
+ mem.sortUnstable(Oid, oids.items, {}, struct {
+ fn lessThan(_: void, o1: Oid, o2: Oid) bool {
+ return mem.lessThan(u8, &o1, &o2);
+ }
+ }.lessThan);
+
+ var fan_out_table: [256]u32 = undefined;
+ var count: u32 = 0;
+ var fan_out_index: u8 = 0;
+ for (oids.items) |oid| {
+ if (oid[0] > fan_out_index) {
+ @memset(fan_out_table[fan_out_index..oid[0]], count);
+ fan_out_index = oid[0];
+ }
+ count += 1;
+ }
+ @memset(fan_out_table[fan_out_index..], count);
+
+ var index_hashed_writer = hashedWriter(index_writer, Sha1.init(.{}));
+ const writer = index_hashed_writer.writer();
+ try writer.writeAll(IndexHeader.signature);
+ try writer.writeIntBig(u32, IndexHeader.supported_version);
+ for (fan_out_table) |fan_out_entry| {
+ try writer.writeIntBig(u32, fan_out_entry);
+ }
+
+ for (oids.items) |oid| {
+ try writer.writeAll(&oid);
+ }
+
+ for (oids.items) |oid| {
+ try writer.writeIntBig(u32, index_entries.get(oid).?.crc32);
+ }
+
+ var big_offsets = std.ArrayListUnmanaged(u64){};
+ defer big_offsets.deinit(allocator);
+ for (oids.items) |oid| {
+ const offset = index_entries.get(oid).?.offset;
+ if (offset <= std.math.maxInt(u31)) {
+ try writer.writeIntBig(u32, @intCast(offset));
+ } else {
+ const index = big_offsets.items.len;
+ try big_offsets.append(allocator, offset);
+ try writer.writeIntBig(u32, @as(u32, @intCast(index)) | (1 << 31));
+ }
+ }
+ for (big_offsets.items) |offset| {
+ try writer.writeIntBig(u64, offset);
+ }
+
+ try writer.writeAll(&pack_checksum);
+ const index_checksum = index_hashed_writer.hasher.finalResult();
+ try index_writer.writeAll(&index_checksum);
+}
+
+/// Performs the first pass over the packfile data for index construction.
+/// This will index all non-delta objects, queue delta objects for further
+/// processing, and return the pack checksum (which is part of the index
+/// format).
+fn indexPackFirstPass(
+ allocator: Allocator,
+ pack: std.fs.File,
+ index_entries: *std.AutoHashMapUnmanaged(Oid, IndexEntry),
+ pending_deltas: *std.ArrayListUnmanaged(IndexEntry),
+) ![Sha1.digest_length]u8 {
+ var pack_buffered_reader = std.io.bufferedReader(pack.reader());
+ var pack_counting_reader = std.io.countingReader(pack_buffered_reader.reader());
+ var pack_hashed_reader = std.compress.hashedReader(pack_counting_reader.reader(), Sha1.init(.{}));
+ const pack_reader = pack_hashed_reader.reader();
+
+ const pack_header = try PackHeader.read(pack_reader);
+
+ var current_entry: u32 = 0;
+ while (current_entry < pack_header.total_objects) : (current_entry += 1) {
+ const entry_offset = pack_counting_reader.bytes_read;
+ var entry_crc32_reader = std.compress.hashedReader(pack_reader, std.hash.Crc32.init());
+ const entry_header = try EntryHeader.read(entry_crc32_reader.reader());
+ switch (entry_header) {
+ inline .commit, .tree, .blob, .tag => |object, tag| {
+ var entry_decompress_stream = try std.compress.zlib.decompressStream(allocator, entry_crc32_reader.reader());
+ defer entry_decompress_stream.deinit();
+ var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader());
+ var entry_hashed_writer = hashedWriter(std.io.null_writer, Sha1.init(.{}));
+ const entry_writer = entry_hashed_writer.writer();
+ // The object header is not included in the pack data but is
+ // part of the object's ID
+ try entry_writer.print("{s} {}\x00", .{ @tagName(tag), object.uncompressed_length });
+ var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
+ try fifo.pump(entry_counting_reader.reader(), entry_writer);
+ if (entry_counting_reader.bytes_read != object.uncompressed_length) {
+ return error.InvalidObject;
+ }
+ const oid = entry_hashed_writer.hasher.finalResult();
+ try index_entries.put(allocator, oid, .{
+ .offset = entry_offset,
+ .crc32 = entry_crc32_reader.hasher.final(),
+ });
+ },
+ inline .ofs_delta, .ref_delta => |delta| {
+ var entry_decompress_stream = try std.compress.zlib.decompressStream(allocator, entry_crc32_reader.reader());
+ defer entry_decompress_stream.deinit();
+ var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader());
+ var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
+ try fifo.pump(entry_counting_reader.reader(), std.io.null_writer);
+ if (entry_counting_reader.bytes_read != delta.uncompressed_length) {
+ return error.InvalidObject;
+ }
+ try pending_deltas.append(allocator, .{
+ .offset = entry_offset,
+ .crc32 = entry_crc32_reader.hasher.final(),
+ });
+ },
+ }
+ }
+
+ const pack_checksum = pack_hashed_reader.hasher.finalResult();
+ const recorded_checksum = try pack_buffered_reader.reader().readBytesNoEof(Sha1.digest_length);
+ if (!mem.eql(u8, &pack_checksum, &recorded_checksum)) {
+ return error.CorruptedPack;
+ }
+ _ = pack_buffered_reader.reader().readByte() catch |e| switch (e) {
+ error.EndOfStream => return pack_checksum,
+ else => |other| return other,
+ };
+ return error.InvalidFormat;
+}
+
+/// Attempts to determine the final object ID of the given deltified object.
+/// May return null if this is not yet possible (if the delta is a ref-based
+/// delta and we do not yet know the offset of the base object).
+fn indexPackHashDelta(
+ allocator: Allocator,
+ pack: std.fs.File,
+ delta: IndexEntry,
+ index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry),
+ cache: *ObjectCache,
+) !?Oid {
+ // Figure out the chain of deltas to resolve
+ var base_offset = delta.offset;
+ var base_header: EntryHeader = undefined;
+ var delta_offsets = std.ArrayListUnmanaged(u64){};
+ defer delta_offsets.deinit(allocator);
+ const base_object = while (true) {
+ if (cache.get(base_offset)) |base_object| break base_object;
+
+ try pack.seekTo(base_offset);
+ base_header = try EntryHeader.read(pack.reader());
+ switch (base_header) {
+ .ofs_delta => |ofs_delta| {
+ try delta_offsets.append(allocator, base_offset);
+ base_offset = std.math.sub(u64, base_offset, ofs_delta.offset) catch return error.InvalidObject;
+ },
+ .ref_delta => |ref_delta| {
+ try delta_offsets.append(allocator, base_offset);
+ base_offset = (index_entries.get(ref_delta.base_object) orelse return null).offset;
+ },
+ else => {
+ const base_data = try readObjectRaw(allocator, pack.reader(), base_header.uncompressedLength());
+ errdefer allocator.free(base_data);
+ const base_object: Object = .{ .type = base_header.objectType(), .data = base_data };
+ try cache.put(allocator, base_offset, base_object);
+ break base_object;
+ },
+ }
+ };
+
+ const base_data = try resolveDeltaChain(allocator, pack, base_object, delta_offsets.items, cache);
+
+ var entry_hasher = Sha1.init(.{});
+ var entry_hashed_writer = hashedWriter(std.io.null_writer, &entry_hasher);
+ try entry_hashed_writer.writer().print("{s} {}\x00", .{ @tagName(base_object.type), base_data.len });
+ entry_hasher.update(base_data);
+ return entry_hasher.finalResult();
+}
+
+/// Resolves a chain of deltas, returning the final base object data. `pack` is
+/// assumed to be looking at the start of the object data for the base object of
+/// the chain, and will then apply the deltas in `delta_offsets` in reverse order
+/// to obtain the final object.
+fn resolveDeltaChain(
+ allocator: Allocator,
+ pack: std.fs.File,
+ base_object: Object,
+ delta_offsets: []const u64,
+ cache: *ObjectCache,
+) ![]const u8 {
+ var base_data = base_object.data;
+ var i: usize = delta_offsets.len;
+ while (i > 0) {
+ i -= 1;
+
+ const delta_offset = delta_offsets[i];
+ try pack.seekTo(delta_offset);
+ const delta_header = try EntryHeader.read(pack.reader());
+ var delta_data = try readObjectRaw(allocator, pack.reader(), delta_header.uncompressedLength());
+ defer allocator.free(delta_data);
+ var delta_stream = std.io.fixedBufferStream(delta_data);
+ const delta_reader = delta_stream.reader();
+ _ = try readSizeVarInt(delta_reader); // base object size
+ const expanded_size = try readSizeVarInt(delta_reader);
+
+ const expanded_alloc_size = std.math.cast(usize, expanded_size) orelse return error.ObjectTooLarge;
+ var expanded_data = try allocator.alloc(u8, expanded_alloc_size);
+ errdefer allocator.free(expanded_data);
+ var expanded_delta_stream = std.io.fixedBufferStream(expanded_data);
+ var base_stream = std.io.fixedBufferStream(base_data);
+ try expandDelta(&base_stream, delta_reader, expanded_delta_stream.writer());
+ if (expanded_delta_stream.pos != expanded_size) return error.InvalidObject;
+
+ try cache.put(allocator, delta_offset, .{ .type = base_object.type, .data = expanded_data });
+ base_data = expanded_data;
+ }
+ return base_data;
+}
+
+/// Reads the complete contents of an object from `reader`. This function may
+/// read more bytes than required from `reader`, so the reader position after
+/// returning is not reliable.
+fn readObjectRaw(allocator: Allocator, reader: anytype, size: u64) ![]u8 {
+ const alloc_size = std.math.cast(usize, size) orelse return error.ObjectTooLarge;
+ var buffered_reader = std.io.bufferedReader(reader);
+ var decompress_stream = try std.compress.zlib.decompressStream(allocator, buffered_reader.reader());
+ defer decompress_stream.deinit();
+ var data = try allocator.alloc(u8, alloc_size);
+ errdefer allocator.free(data);
+ try decompress_stream.reader().readNoEof(data);
+ _ = decompress_stream.reader().readByte() catch |e| switch (e) {
+ error.EndOfStream => return data,
+ else => |other| return other,
+ };
+ return error.InvalidFormat;
+}
+
+/// Expands delta data from `delta_reader` to `writer`. `base_object` must
+/// support `reader` and `seekTo` (such as a `std.io.FixedBufferStream`).
+///
+/// The format of the delta data is documented in
+/// [pack-format](https://git-scm.com/docs/pack-format).
+fn expandDelta(base_object: anytype, delta_reader: anytype, writer: anytype) !void {
+ while (true) {
+ const inst: packed struct { value: u7, copy: bool } = @bitCast(delta_reader.readByte() catch |e| switch (e) {
+ error.EndOfStream => return,
+ else => |other| return other,
+ });
+ if (inst.copy) {
+ const available: packed struct {
+ offset1: bool,
+ offset2: bool,
+ offset3: bool,
+ offset4: bool,
+ size1: bool,
+ size2: bool,
+ size3: bool,
+ } = @bitCast(inst.value);
+ var offset_parts: packed struct { offset1: u8, offset2: u8, offset3: u8, offset4: u8 } = .{
+ .offset1 = if (available.offset1) try delta_reader.readByte() else 0,
+ .offset2 = if (available.offset2) try delta_reader.readByte() else 0,
+ .offset3 = if (available.offset3) try delta_reader.readByte() else 0,
+ .offset4 = if (available.offset4) try delta_reader.readByte() else 0,
+ };
+ const offset: u32 = @bitCast(offset_parts);
+ var size_parts: packed struct { size1: u8, size2: u8, size3: u8 } = .{
+ .size1 = if (available.size1) try delta_reader.readByte() else 0,
+ .size2 = if (available.size2) try delta_reader.readByte() else 0,
+ .size3 = if (available.size3) try delta_reader.readByte() else 0,
+ };
+ var size: u24 = @bitCast(size_parts);
+ if (size == 0) size = 0x10000;
+ try base_object.seekTo(offset);
+ var copy_reader = std.io.limitedReader(base_object.reader(), size);
+ var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
+ try fifo.pump(copy_reader.reader(), writer);
+ } else if (inst.value != 0) {
+ var data_reader = std.io.limitedReader(delta_reader, inst.value);
+ var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
+ try fifo.pump(data_reader.reader(), writer);
+ } else {
+ return error.InvalidDeltaInstruction;
+ }
+ }
+}
+
+fn HashedWriter(
+ comptime WriterType: anytype,
+ comptime HasherType: anytype,
+) type {
+ return struct {
+ child_writer: WriterType,
+ hasher: HasherType,
+
+ const Error = WriterType.Error;
+ const Writer = std.io.Writer(*@This(), Error, write);
+
+ fn write(hashed_writer: *@This(), buf: []const u8) Error!usize {
+ const amt = try hashed_writer.child_writer.write(buf);
+ hashed_writer.hasher.update(buf);
+ return amt;
+ }
+
+ fn writer(hashed_writer: *@This()) Writer {
+ return .{ .context = hashed_writer };
+ }
+ };
+}
+
+fn hashedWriter(
+ writer: anytype,
+ hasher: anytype,
+) HashedWriter(@TypeOf(writer), @TypeOf(hasher)) {
+ return .{ .child_writer = writer, .hasher = hasher };
+}
+
+test "packfile indexing and checkout" {
+ // To verify the contents of this packfile without using the code in this
+ // file:
+ //
+ // 1. Create a new empty Git repository (`git init`)
+ // 2. `git unpack-objects <path/to/testdata.pack`
+ // 3. `git fsck` -> note the "dangling commit" ID (which matches the commit
+ // checked out below)
+ // 4. `git checkout dd582c0720819ab7130b103635bd7271b9fd4feb`
+ const testrepo_pack = @embedFile("git/testdata/testrepo.pack");
+
+ var git_dir = testing.tmpDir(.{});
+ defer git_dir.cleanup();
+ var pack_file = try git_dir.dir.createFile("testrepo.pack", .{ .read = true });
+ defer pack_file.close();
+ try pack_file.writeAll(testrepo_pack);
+
+ var index_file = try git_dir.dir.createFile("testrepo.idx", .{ .read = true });
+ defer index_file.close();
+ try indexPack(testing.allocator, pack_file, index_file.writer());
+
+ // Arbitrary size limit on files read while checking the repository contents
+ // (all files in the test repo are known to be much smaller than this)
+ const max_file_size = 4096;
+
+ const index_file_data = try git_dir.dir.readFileAlloc(testing.allocator, "testrepo.idx", max_file_size);
+ defer testing.allocator.free(index_file_data);
+ // testrepo.idx is generated by Git. The index created by this file should
+ // match it exactly. Running `git verify-pack -v testrepo.pack` can verify
+ // this.
+ const testrepo_idx = @embedFile("git/testdata/testrepo.idx");
+ try testing.expectEqualSlices(u8, testrepo_idx, index_file_data);
+
+ var repository = try Repository.init(testing.allocator, pack_file, index_file);
+ defer repository.deinit();
+
+ var worktree = testing.tmpIterableDir(.{});
+ defer worktree.cleanup();
+
+ const commit_id = try parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb");
+ try repository.checkout(worktree.iterable_dir.dir, commit_id);
+
+ const expected_files: []const []const u8 = &.{
+ "dir/file",
+ "dir/subdir/file",
+ "dir/subdir/file2",
+ "dir2/file",
+ "dir3/file",
+ "dir3/file2",
+ "file",
+ "file2",
+ "file3",
+ "file4",
+ "file5",
+ "file6",
+ "file7",
+ "file8",
+ "file9",
+ };
+ var actual_files: std.ArrayListUnmanaged([]u8) = .{};
+ defer actual_files.deinit(testing.allocator);
+ defer for (actual_files.items) |file| testing.allocator.free(file);
+ var walker = try worktree.iterable_dir.walk(testing.allocator);
+ defer walker.deinit();
+ while (try walker.next()) |entry| {
+ if (entry.kind != .file) continue;
+ var path = try testing.allocator.dupe(u8, entry.path);
+ errdefer testing.allocator.free(path);
+ mem.replaceScalar(u8, path, std.fs.path.sep, '/');
+ try actual_files.append(testing.allocator, path);
+ }
+ mem.sortUnstable([]u8, actual_files.items, {}, struct {
+ fn lessThan(_: void, a: []u8, b: []u8) bool {
+ return mem.lessThan(u8, a, b);
+ }
+ }.lessThan);
+ try testing.expectEqualDeep(expected_files, actual_files.items);
+
+ const expected_file_contents =
+ \\revision 1
+ \\revision 2
+ \\revision 4
+ \\revision 5
+ \\revision 7
+ \\revision 8
+ \\revision 9
+ \\revision 10
+ \\revision 12
+ \\revision 13
+ \\revision 14
+ \\revision 18
+ \\revision 19
+ \\
+ ;
+ const actual_file_contents = try worktree.iterable_dir.dir.readFileAlloc(testing.allocator, "file", max_file_size);
+ defer testing.allocator.free(actual_file_contents);
+ try testing.expectEqualStrings(expected_file_contents, actual_file_contents);
+}
+
+/// Checks out a commit of a packfile. Intended for experimenting with and
+/// benchmarking possible optimizations to the indexing and checkout behavior.
+pub fn main() !void {
+ const allocator = std.heap.c_allocator;
+
+ const args = try std.process.argsAlloc(allocator);
+ defer std.process.argsFree(allocator, args);
+ if (args.len != 4) {
+ return error.InvalidArguments; // Arguments: packfile commit worktree
+ }
+
+ var pack_file = try std.fs.cwd().openFile(args[1], .{});
+ defer pack_file.close();
+ const commit = try parseOid(args[2]);
+ var worktree = try std.fs.cwd().makeOpenPath(args[3], .{});
+ defer worktree.close();
+
+ var git_dir = try worktree.makeOpenPath(".git", .{});
+ defer git_dir.close();
+
+ std.debug.print("Starting index...\n", .{});
+ var index_file = try git_dir.createFile("idx", .{ .read = true });
+ defer index_file.close();
+ var index_buffered_writer = std.io.bufferedWriter(index_file.writer());
+ try indexPack(allocator, pack_file, index_buffered_writer.writer());
+ try index_buffered_writer.flush();
+ try index_file.sync();
+
+ std.debug.print("Starting checkout...\n", .{});
+ var repository = try Repository.init(allocator, pack_file, index_file);
+ defer repository.deinit();
+ try repository.checkout(worktree, commit);
+}
diff --git a/src/git/testdata/testrepo.idx b/src/Package/Fetch/git/testdata/testrepo.idx
Binary files differ.
diff --git a/src/git/testdata/testrepo.pack b/src/Package/Fetch/git/testdata/testrepo.pack
Binary files differ.
diff --git a/src/Manifest.zig b/src/Package/Manifest.zig
diff --git a/src/git.zig b/src/git.zig
@@ -1,1468 +0,0 @@
-//! Git support for package fetching.
-//!
-//! This is not intended to support all features of Git: it is limited to the
-//! basic functionality needed to clone a repository for the purpose of fetching
-//! a package.
-
-const std = @import("std");
-const mem = std.mem;
-const testing = std.testing;
-const Allocator = mem.Allocator;
-const Sha1 = std.crypto.hash.Sha1;
-const assert = std.debug.assert;
-
-const ProgressReader = @import("Package.zig").ProgressReader;
-
-pub const oid_length = Sha1.digest_length;
-pub const fmt_oid_length = 2 * oid_length;
-/// The ID of a Git object (an SHA-1 hash).
-pub const Oid = [oid_length]u8;
-
-pub fn parseOid(s: []const u8) !Oid {
- if (s.len != fmt_oid_length) return error.InvalidOid;
- var oid: Oid = undefined;
- for (&oid, 0..) |*b, i| {
- b.* = std.fmt.parseUnsigned(u8, s[2 * i ..][0..2], 16) catch return error.InvalidOid;
- }
- return oid;
-}
-
-test parseOid {
- try testing.expectEqualSlices(
- u8,
- &.{ 0xCE, 0x91, 0x9C, 0xCF, 0x45, 0x95, 0x18, 0x56, 0xA7, 0x62, 0xFF, 0xDB, 0x8E, 0xF8, 0x50, 0x30, 0x1C, 0xD8, 0xC5, 0x88 },
- &try parseOid("ce919ccf45951856a762ffdb8ef850301cd8c588"),
- );
- try testing.expectError(error.InvalidOid, parseOid("ce919ccf"));
- try testing.expectError(error.InvalidOid, parseOid("master"));
- try testing.expectError(error.InvalidOid, parseOid("HEAD"));
-}
-
-pub const Diagnostics = struct {
- allocator: Allocator,
- errors: std.ArrayListUnmanaged(Error) = .{},
-
- pub const Error = union(enum) {
- unable_to_create_sym_link: struct {
- code: anyerror,
- file_name: []const u8,
- link_name: []const u8,
- },
- };
-
- pub fn deinit(d: *Diagnostics) void {
- for (d.errors.items) |item| {
- switch (item) {
- .unable_to_create_sym_link => |info| {
- d.allocator.free(info.file_name);
- d.allocator.free(info.link_name);
- },
- }
- }
- d.errors.deinit(d.allocator);
- d.* = undefined;
- }
-};
-
-pub const Repository = struct {
- odb: Odb,
-
- pub fn init(allocator: Allocator, pack_file: std.fs.File, index_file: std.fs.File) !Repository {
- return .{ .odb = try Odb.init(allocator, pack_file, index_file) };
- }
-
- pub fn deinit(repository: *Repository) void {
- repository.odb.deinit();
- repository.* = undefined;
- }
-
- /// Checks out the repository at `commit_oid` to `worktree`.
- pub fn checkout(
- repository: *Repository,
- worktree: std.fs.Dir,
- commit_oid: Oid,
- diagnostics: *Diagnostics,
- ) !void {
- try repository.odb.seekOid(commit_oid);
- const tree_oid = tree_oid: {
- var commit_object = try repository.odb.readObject();
- if (commit_object.type != .commit) return error.NotACommit;
- break :tree_oid try getCommitTree(commit_object.data);
- };
- try repository.checkoutTree(worktree, tree_oid, "", diagnostics);
- }
-
- /// Checks out the tree at `tree_oid` to `worktree`.
- fn checkoutTree(
- repository: *Repository,
- dir: std.fs.Dir,
- tree_oid: Oid,
- current_path: []const u8,
- diagnostics: *Diagnostics,
- ) !void {
- try repository.odb.seekOid(tree_oid);
- const tree_object = try repository.odb.readObject();
- if (tree_object.type != .tree) return error.NotATree;
- // The tree object may be evicted from the object cache while we're
- // iterating over it, so we can make a defensive copy here to make sure
- // it remains valid until we're done with it
- const tree_data = try repository.odb.allocator.dupe(u8, tree_object.data);
- defer repository.odb.allocator.free(tree_data);
-
- var tree_iter: TreeIterator = .{ .data = tree_data };
- while (try tree_iter.next()) |entry| {
- switch (entry.type) {
- .directory => {
- try dir.makeDir(entry.name);
- var subdir = try dir.openDir(entry.name, .{});
- defer subdir.close();
- const sub_path = try std.fs.path.join(repository.odb.allocator, &.{ current_path, entry.name });
- defer repository.odb.allocator.free(sub_path);
- try repository.checkoutTree(subdir, entry.oid, sub_path, diagnostics);
- },
- .file => {
- var file = try dir.createFile(entry.name, .{});
- defer file.close();
- try repository.odb.seekOid(entry.oid);
- var file_object = try repository.odb.readObject();
- if (file_object.type != .blob) return error.InvalidFile;
- try file.writeAll(file_object.data);
- try file.sync();
- },
- .symlink => {
- try repository.odb.seekOid(entry.oid);
- var symlink_object = try repository.odb.readObject();
- if (symlink_object.type != .blob) return error.InvalidFile;
- const link_name = symlink_object.data;
- dir.symLink(link_name, entry.name, .{}) catch |e| {
- const file_name = try std.fs.path.join(diagnostics.allocator, &.{ current_path, entry.name });
- errdefer diagnostics.allocator.free(file_name);
- const link_name_dup = try diagnostics.allocator.dupe(u8, link_name);
- errdefer diagnostics.allocator.free(link_name_dup);
- try diagnostics.errors.append(diagnostics.allocator, .{ .unable_to_create_sym_link = .{
- .code = e,
- .file_name = file_name,
- .link_name = link_name_dup,
- } });
- };
- },
- .gitlink => {
- // Consistent with git archive behavior, create the directory but
- // do nothing else
- try dir.makeDir(entry.name);
- },
- }
- }
- }
-
- /// Returns the ID of the tree associated with the given commit (provided as
- /// raw object data).
- fn getCommitTree(commit_data: []const u8) !Oid {
- if (!mem.startsWith(u8, commit_data, "tree ") or
- commit_data.len < "tree ".len + fmt_oid_length + "\n".len or
- commit_data["tree ".len + fmt_oid_length] != '\n')
- {
- return error.InvalidCommit;
- }
- return try parseOid(commit_data["tree ".len..][0..fmt_oid_length]);
- }
-
- const TreeIterator = struct {
- data: []const u8,
- pos: usize = 0,
-
- const Entry = struct {
- type: Type,
- executable: bool,
- name: [:0]const u8,
- oid: Oid,
-
- const Type = enum(u4) {
- directory = 0o4,
- file = 0o10,
- symlink = 0o12,
- gitlink = 0o16,
- };
- };
-
- fn next(iterator: *TreeIterator) !?Entry {
- if (iterator.pos == iterator.data.len) return null;
-
- const mode_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, ' ') orelse return error.InvalidTree;
- const mode: packed struct {
- permission: u9,
- unused: u3,
- type: u4,
- } = @bitCast(std.fmt.parseUnsigned(u16, iterator.data[iterator.pos..mode_end], 8) catch return error.InvalidTree);
- const @"type" = std.meta.intToEnum(Entry.Type, mode.type) catch return error.InvalidTree;
- const executable = switch (mode.permission) {
- 0 => if (@"type" == .file) return error.InvalidTree else false,
- 0o644 => if (@"type" != .file) return error.InvalidTree else false,
- 0o755 => if (@"type" != .file) return error.InvalidTree else true,
- else => return error.InvalidTree,
- };
- iterator.pos = mode_end + 1;
-
- const name_end = mem.indexOfScalarPos(u8, iterator.data, iterator.pos, 0) orelse return error.InvalidTree;
- const name = iterator.data[iterator.pos..name_end :0];
- iterator.pos = name_end + 1;
-
- if (iterator.pos + oid_length > iterator.data.len) return error.InvalidTree;
- const oid = iterator.data[iterator.pos..][0..oid_length].*;
- iterator.pos += oid_length;
-
- return .{ .type = @"type", .executable = executable, .name = name, .oid = oid };
- }
- };
-};
-
-/// A Git object database backed by a packfile. A packfile index is also used
-/// for efficient access to objects in the packfile.
-///
-/// The format of the packfile and its associated index are documented in
-/// [pack-format](https://git-scm.com/docs/pack-format).
-const Odb = struct {
- pack_file: std.fs.File,
- index_header: IndexHeader,
- index_file: std.fs.File,
- cache: ObjectCache = .{},
- allocator: Allocator,
-
- /// Initializes the database from open pack and index files.
- fn init(allocator: Allocator, pack_file: std.fs.File, index_file: std.fs.File) !Odb {
- try pack_file.seekTo(0);
- try index_file.seekTo(0);
- const index_header = try IndexHeader.read(index_file.reader());
- return .{
- .pack_file = pack_file,
- .index_header = index_header,
- .index_file = index_file,
- .allocator = allocator,
- };
- }
-
- fn deinit(odb: *Odb) void {
- odb.cache.deinit(odb.allocator);
- odb.* = undefined;
- }
-
- /// Reads the object at the current position in the database.
- fn readObject(odb: *Odb) !Object {
- var base_offset = try odb.pack_file.getPos();
- var base_header: EntryHeader = undefined;
- var delta_offsets = std.ArrayListUnmanaged(u64){};
- defer delta_offsets.deinit(odb.allocator);
- const base_object = while (true) {
- if (odb.cache.get(base_offset)) |base_object| break base_object;
-
- base_header = try EntryHeader.read(odb.pack_file.reader());
- switch (base_header) {
- .ofs_delta => |ofs_delta| {
- try delta_offsets.append(odb.allocator, base_offset);
- base_offset = std.math.sub(u64, base_offset, ofs_delta.offset) catch return error.InvalidFormat;
- try odb.pack_file.seekTo(base_offset);
- },
- .ref_delta => |ref_delta| {
- try delta_offsets.append(odb.allocator, base_offset);
- try odb.seekOid(ref_delta.base_object);
- base_offset = try odb.pack_file.getPos();
- },
- else => {
- const base_data = try readObjectRaw(odb.allocator, odb.pack_file.reader(), base_header.uncompressedLength());
- errdefer odb.allocator.free(base_data);
- const base_object: Object = .{ .type = base_header.objectType(), .data = base_data };
- try odb.cache.put(odb.allocator, base_offset, base_object);
- break base_object;
- },
- }
- };
-
- const base_data = try resolveDeltaChain(
- odb.allocator,
- odb.pack_file,
- base_object,
- delta_offsets.items,
- &odb.cache,
- );
-
- return .{ .type = base_object.type, .data = base_data };
- }
-
- /// Seeks to the beginning of the object with the given ID.
- fn seekOid(odb: *Odb, oid: Oid) !void {
- const key = oid[0];
- var start_index = if (key > 0) odb.index_header.fan_out_table[key - 1] else 0;
- var end_index = odb.index_header.fan_out_table[key];
- const found_index = while (start_index < end_index) {
- const mid_index = start_index + (end_index - start_index) / 2;
- try odb.index_file.seekTo(IndexHeader.size + mid_index * oid_length);
- const mid_oid = try odb.index_file.reader().readBytesNoEof(oid_length);
- switch (mem.order(u8, &mid_oid, &oid)) {
- .lt => start_index = mid_index + 1,
- .gt => end_index = mid_index,
- .eq => break mid_index,
- }
- } else return error.ObjectNotFound;
-
- const n_objects = odb.index_header.fan_out_table[255];
- const offset_values_start = IndexHeader.size + n_objects * (oid_length + 4);
- try odb.index_file.seekTo(offset_values_start + found_index * 4);
- const l1_offset: packed struct { value: u31, big: bool } = @bitCast(try odb.index_file.reader().readIntBig(u32));
- const pack_offset = pack_offset: {
- if (l1_offset.big) {
- const l2_offset_values_start = offset_values_start + n_objects * 4;
- try odb.index_file.seekTo(l2_offset_values_start + l1_offset.value * 4);
- break :pack_offset try odb.index_file.reader().readIntBig(u64);
- } else {
- break :pack_offset l1_offset.value;
- }
- };
-
- try odb.pack_file.seekTo(pack_offset);
- }
-};
-
-const Object = struct {
- type: Type,
- data: []const u8,
-
- const Type = enum {
- commit,
- tree,
- blob,
- tag,
- };
-};
-
-/// A cache for object data.
-///
-/// The purpose of this cache is to speed up resolution of deltas by caching the
-/// results of resolving delta objects, while maintaining a maximum cache size
-/// to avoid excessive memory usage. If the total size of the objects in the
-/// cache exceeds the maximum, the cache will begin evicting the least recently
-/// used objects: when resolving delta chains, the most recently used objects
-/// will likely be more helpful as they will be further along in the chain
-/// (skipping earlier reconstruction steps).
-///
-/// Object data stored in the cache is managed by the cache. It should not be
-/// freed by the caller at any point after inserting it into the cache. Any
-/// objects remaining in the cache will be freed when the cache itself is freed.
-const ObjectCache = struct {
- objects: std.AutoHashMapUnmanaged(u64, CacheEntry) = .{},
- lru_nodes: LruList = .{},
- byte_size: usize = 0,
-
- const max_byte_size = 128 * 1024 * 1024; // 128MiB
- /// A list of offsets stored in the cache, with the most recently used
- /// entries at the end.
- const LruList = std.DoublyLinkedList(u64);
- const CacheEntry = struct { object: Object, lru_node: *LruList.Node };
-
- fn deinit(cache: *ObjectCache, allocator: Allocator) void {
- var object_iterator = cache.objects.iterator();
- while (object_iterator.next()) |object| {
- allocator.free(object.value_ptr.object.data);
- allocator.destroy(object.value_ptr.lru_node);
- }
- cache.objects.deinit(allocator);
- cache.* = undefined;
- }
-
- /// Gets an object from the cache, moving it to the most recently used
- /// position if it is present.
- fn get(cache: *ObjectCache, offset: u64) ?Object {
- if (cache.objects.get(offset)) |entry| {
- cache.lru_nodes.remove(entry.lru_node);
- cache.lru_nodes.append(entry.lru_node);
- return entry.object;
- } else {
- return null;
- }
- }
-
- /// Puts an object in the cache, possibly evicting older entries if the
- /// cache exceeds its maximum size. Note that, although old objects may
- /// be evicted, the object just added to the cache with this function
- /// will not be evicted before the next call to `put` or `deinit` even if
- /// it exceeds the maximum cache size.
- fn put(cache: *ObjectCache, allocator: Allocator, offset: u64, object: Object) !void {
- const lru_node = try allocator.create(LruList.Node);
- errdefer allocator.destroy(lru_node);
- lru_node.data = offset;
-
- const gop = try cache.objects.getOrPut(allocator, offset);
- if (gop.found_existing) {
- cache.byte_size -= gop.value_ptr.object.data.len;
- cache.lru_nodes.remove(gop.value_ptr.lru_node);
- allocator.destroy(gop.value_ptr.lru_node);
- allocator.free(gop.value_ptr.object.data);
- }
- gop.value_ptr.* = .{ .object = object, .lru_node = lru_node };
- cache.byte_size += object.data.len;
- cache.lru_nodes.append(lru_node);
-
- while (cache.byte_size > max_byte_size and cache.lru_nodes.len > 1) {
- // The > 1 check is to make sure that we don't evict the most
- // recently added node, even if it by itself happens to exceed the
- // maximum size of the cache.
- const evict_node = cache.lru_nodes.popFirst().?;
- const evict_offset = evict_node.data;
- allocator.destroy(evict_node);
- const evict_object = cache.objects.get(evict_offset).?.object;
- cache.byte_size -= evict_object.data.len;
- allocator.free(evict_object.data);
- _ = cache.objects.remove(evict_offset);
- }
- }
-};
-
-/// A single pkt-line in the Git protocol.
-///
-/// The format of a pkt-line is documented in
-/// [protocol-common](https://git-scm.com/docs/protocol-common). The special
-/// meanings of the delimiter and response-end packets are documented in
-/// [protocol-v2](https://git-scm.com/docs/protocol-v2).
-const Packet = union(enum) {
- flush,
- delimiter,
- response_end,
- data: []const u8,
-
- const max_data_length = 65516;
-
- /// Reads a packet in pkt-line format.
- fn read(reader: anytype, buf: *[max_data_length]u8) !Packet {
- const length = std.fmt.parseUnsigned(u16, &try reader.readBytesNoEof(4), 16) catch return error.InvalidPacket;
- switch (length) {
- 0 => return .flush,
- 1 => return .delimiter,
- 2 => return .response_end,
- 3 => return error.InvalidPacket,
- else => if (length - 4 > max_data_length) return error.InvalidPacket,
- }
- const data = buf[0 .. length - 4];
- try reader.readNoEof(data);
- return .{ .data = data };
- }
-
- /// Writes a packet in pkt-line format.
- fn write(packet: Packet, writer: anytype) !void {
- switch (packet) {
- .flush => try writer.writeAll("0000"),
- .delimiter => try writer.writeAll("0001"),
- .response_end => try writer.writeAll("0002"),
- .data => |data| {
- assert(data.len <= max_data_length);
- try writer.print("{x:0>4}", .{data.len + 4});
- try writer.writeAll(data);
- },
- }
- }
-};
-
-/// A client session for the Git protocol, currently limited to an HTTP(S)
-/// transport. Only protocol version 2 is supported, as documented in
-/// [protocol-v2](https://git-scm.com/docs/protocol-v2).
-pub const Session = struct {
- transport: *std.http.Client,
- uri: std.Uri,
- supports_agent: bool = false,
- supports_shallow: bool = false,
-
- const agent = "zig/" ++ @import("builtin").zig_version_string;
- const agent_capability = std.fmt.comptimePrint("agent={s}\n", .{agent});
-
- /// Discovers server capabilities. This should be called before using any
- /// other client functionality, or the client will be forced to default to
- /// the bare minimum server requirements, which may be considerably less
- /// efficient (e.g. no shallow fetches).
- ///
- /// See the note on `getCapabilities` regarding `redirect_uri`.
- pub fn discoverCapabilities(
- session: *Session,
- allocator: Allocator,
- redirect_uri: *[]u8,
- ) !void {
- var capability_iterator = try session.getCapabilities(allocator, redirect_uri);
- defer capability_iterator.deinit();
- while (try capability_iterator.next()) |capability| {
- if (mem.eql(u8, capability.key, "agent")) {
- session.supports_agent = true;
- } else if (mem.eql(u8, capability.key, "fetch")) {
- var feature_iterator = mem.splitScalar(u8, capability.value orelse continue, ' ');
- while (feature_iterator.next()) |feature| {
- if (mem.eql(u8, feature, "shallow")) {
- session.supports_shallow = true;
- }
- }
- }
- }
- }
-
- /// Returns an iterator over capabilities supported by the server.
- ///
- /// If the server redirects the request, `error.Redirected` is returned and
- /// `redirect_uri` is populated with the URI resulting from the redirects.
- /// When this occurs, the value of `redirect_uri` must be freed with
- /// `allocator` when the caller is done with it.
- fn getCapabilities(
- session: Session,
- allocator: Allocator,
- redirect_uri: *[]u8,
- ) !CapabilityIterator {
- var info_refs_uri = session.uri;
- info_refs_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "info/refs" });
- defer allocator.free(info_refs_uri.path);
- info_refs_uri.query = "service=git-upload-pack";
- info_refs_uri.fragment = null;
-
- var headers = std.http.Headers.init(allocator);
- defer headers.deinit();
- try headers.append("Git-Protocol", "version=2");
-
- var request = try session.transport.request(.GET, info_refs_uri, headers, .{
- .max_redirects = 3,
- });
- errdefer request.deinit();
- try request.start(.{});
- try request.finish();
-
- try request.wait();
- if (request.response.status != .ok) return error.ProtocolError;
- if (request.redirects_left < 3) {
- if (!mem.endsWith(u8, request.uri.path, "/info/refs")) return error.UnparseableRedirect;
- var new_uri = request.uri;
- new_uri.path = new_uri.path[0 .. new_uri.path.len - "/info/refs".len];
- new_uri.query = null;
- redirect_uri.* = try std.fmt.allocPrint(allocator, "{+/}", .{new_uri});
- return error.Redirected;
- }
-
- const reader = request.reader();
- var buf: [Packet.max_data_length]u8 = undefined;
- var state: enum { response_start, response_content } = .response_start;
- while (true) {
- // Some Git servers (at least GitHub) include an additional
- // '# service=git-upload-pack' informative response before sending
- // the expected 'version 2' packet and capability information.
- // This is not universal: SourceHut, for example, does not do this.
- // Thus, we need to skip any such useless additional responses
- // before we get the one we're actually looking for. The responses
- // will be delimited by flush packets.
- const packet = Packet.read(reader, &buf) catch |e| switch (e) {
- error.EndOfStream => return error.UnsupportedProtocol, // 'version 2' packet not found
- else => |other| return other,
- };
- switch (packet) {
- .flush => state = .response_start,
- .data => |data| switch (state) {
- .response_start => if (mem.eql(u8, data, "version 2\n")) {
- return .{ .request = request };
- } else {
- state = .response_content;
- },
- else => {},
- },
- else => return error.UnexpectedPacket,
- }
- }
- }
-
- const CapabilityIterator = struct {
- request: std.http.Client.Request,
- buf: [Packet.max_data_length]u8 = undefined,
-
- const Capability = struct {
- key: []const u8,
- value: ?[]const u8 = null,
- };
-
- fn deinit(iterator: *CapabilityIterator) void {
- iterator.request.deinit();
- iterator.* = undefined;
- }
-
- fn next(iterator: *CapabilityIterator) !?Capability {
- switch (try Packet.read(iterator.request.reader(), &iterator.buf)) {
- .flush => return null,
- .data => |data| if (data.len > 0 and data[data.len - 1] == '\n') {
- if (mem.indexOfScalar(u8, data, '=')) |separator_pos| {
- return .{ .key = data[0..separator_pos], .value = data[separator_pos + 1 .. data.len - 1] };
- } else {
- return .{ .key = data[0 .. data.len - 1] };
- }
- } else return error.UnexpectedPacket,
- else => return error.UnexpectedPacket,
- }
- }
- };
-
- const ListRefsOptions = struct {
- /// The ref prefixes (if any) to use to filter the refs available on the
- /// server. Note that the client must still check the returned refs
- /// against its desired filters itself: the server is not required to
- /// respect these prefix filters and may return other refs as well.
- ref_prefixes: []const []const u8 = &.{},
- /// Whether to include symref targets for returned symbolic refs.
- include_symrefs: bool = false,
- /// Whether to include the peeled object ID for returned tag refs.
- include_peeled: bool = false,
- };
-
- /// Returns an iterator over refs known to the server.
- pub fn listRefs(session: Session, allocator: Allocator, options: ListRefsOptions) !RefIterator {
- var upload_pack_uri = session.uri;
- upload_pack_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "git-upload-pack" });
- defer allocator.free(upload_pack_uri.path);
- upload_pack_uri.query = null;
- upload_pack_uri.fragment = null;
-
- var headers = std.http.Headers.init(allocator);
- defer headers.deinit();
- try headers.append("Content-Type", "application/x-git-upload-pack-request");
- try headers.append("Git-Protocol", "version=2");
-
- var body = std.ArrayListUnmanaged(u8){};
- defer body.deinit(allocator);
- const body_writer = body.writer(allocator);
- try Packet.write(.{ .data = "command=ls-refs\n" }, body_writer);
- if (session.supports_agent) {
- try Packet.write(.{ .data = agent_capability }, body_writer);
- }
- try Packet.write(.delimiter, body_writer);
- for (options.ref_prefixes) |ref_prefix| {
- const ref_prefix_packet = try std.fmt.allocPrint(allocator, "ref-prefix {s}\n", .{ref_prefix});
- defer allocator.free(ref_prefix_packet);
- try Packet.write(.{ .data = ref_prefix_packet }, body_writer);
- }
- if (options.include_symrefs) {
- try Packet.write(.{ .data = "symrefs\n" }, body_writer);
- }
- if (options.include_peeled) {
- try Packet.write(.{ .data = "peel\n" }, body_writer);
- }
- try Packet.write(.flush, body_writer);
-
- var request = try session.transport.request(.POST, upload_pack_uri, headers, .{
- .handle_redirects = false,
- });
- errdefer request.deinit();
- request.transfer_encoding = .{ .content_length = body.items.len };
- try request.start(.{});
- try request.writeAll(body.items);
- try request.finish();
-
- try request.wait();
- if (request.response.status != .ok) return error.ProtocolError;
-
- return .{ .request = request };
- }
-
- pub const RefIterator = struct {
- request: std.http.Client.Request,
- buf: [Packet.max_data_length]u8 = undefined,
-
- pub const Ref = struct {
- oid: Oid,
- name: []const u8,
- symref_target: ?[]const u8,
- peeled: ?Oid,
- };
-
- pub fn deinit(iterator: *RefIterator) void {
- iterator.request.deinit();
- iterator.* = undefined;
- }
-
- pub fn next(iterator: *RefIterator) !?Ref {
- switch (try Packet.read(iterator.request.reader(), &iterator.buf)) {
- .flush => return null,
- .data => |data| {
- const oid_sep_pos = mem.indexOfScalar(u8, data, ' ') orelse return error.InvalidRefPacket;
- const oid = parseOid(data[0..oid_sep_pos]) catch return error.InvalidRefPacket;
-
- const name_sep_pos = mem.indexOfAnyPos(u8, data, oid_sep_pos + 1, " \n") orelse return error.InvalidRefPacket;
- const name = data[oid_sep_pos + 1 .. name_sep_pos];
-
- var symref_target: ?[]const u8 = null;
- var peeled: ?Oid = null;
- var last_sep_pos = name_sep_pos;
- while (data[last_sep_pos] == ' ') {
- const next_sep_pos = mem.indexOfAnyPos(u8, data, last_sep_pos + 1, " \n") orelse return error.InvalidRefPacket;
- const attribute = data[last_sep_pos + 1 .. next_sep_pos];
- if (mem.startsWith(u8, attribute, "symref-target:")) {
- symref_target = attribute["symref-target:".len..];
- } else if (mem.startsWith(u8, attribute, "peeled:")) {
- peeled = parseOid(attribute["peeled:".len..]) catch return error.InvalidRefPacket;
- }
- last_sep_pos = next_sep_pos;
- }
-
- return .{ .oid = oid, .name = name, .symref_target = symref_target, .peeled = peeled };
- },
- else => return error.UnexpectedPacket,
- }
- }
- };
-
- /// Fetches the given refs from the server. A shallow fetch (depth 1) is
- /// performed if the server supports it.
- pub fn fetch(session: Session, allocator: Allocator, wants: []const []const u8) !FetchStream {
- var upload_pack_uri = session.uri;
- upload_pack_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "git-upload-pack" });
- defer allocator.free(upload_pack_uri.path);
- upload_pack_uri.query = null;
- upload_pack_uri.fragment = null;
-
- var headers = std.http.Headers.init(allocator);
- defer headers.deinit();
- try headers.append("Content-Type", "application/x-git-upload-pack-request");
- try headers.append("Git-Protocol", "version=2");
-
- var body = std.ArrayListUnmanaged(u8){};
- defer body.deinit(allocator);
- const body_writer = body.writer(allocator);
- try Packet.write(.{ .data = "command=fetch\n" }, body_writer);
- if (session.supports_agent) {
- try Packet.write(.{ .data = agent_capability }, body_writer);
- }
- try Packet.write(.delimiter, body_writer);
- // Our packfile parser supports the OFS_DELTA object type
- try Packet.write(.{ .data = "ofs-delta\n" }, body_writer);
- // We do not currently convey server progress information to the user
- try Packet.write(.{ .data = "no-progress\n" }, body_writer);
- if (session.supports_shallow) {
- try Packet.write(.{ .data = "deepen 1\n" }, body_writer);
- }
- for (wants) |want| {
- var buf: [Packet.max_data_length]u8 = undefined;
- const arg = std.fmt.bufPrint(&buf, "want {s}\n", .{want}) catch unreachable;
- try Packet.write(.{ .data = arg }, body_writer);
- }
- try Packet.write(.{ .data = "done\n" }, body_writer);
- try Packet.write(.flush, body_writer);
-
- var request = try session.transport.request(.POST, upload_pack_uri, headers, .{
- .handle_redirects = false,
- });
- errdefer request.deinit();
- request.transfer_encoding = .{ .content_length = body.items.len };
- try request.start(.{});
- try request.writeAll(body.items);
- try request.finish();
-
- try request.wait();
- if (request.response.status != .ok) return error.ProtocolError;
-
- const reader = request.reader();
- // We are not interested in any of the sections of the returned fetch
- // data other than the packfile section, since we aren't doing anything
- // complex like ref negotiation (this is a fresh clone).
- var state: enum { section_start, section_content } = .section_start;
- while (true) {
- var buf: [Packet.max_data_length]u8 = undefined;
- const packet = try Packet.read(reader, &buf);
- switch (state) {
- .section_start => switch (packet) {
- .data => |data| if (mem.eql(u8, data, "packfile\n")) {
- return .{ .request = request };
- } else {
- state = .section_content;
- },
- else => return error.UnexpectedPacket,
- },
- .section_content => switch (packet) {
- .delimiter => state = .section_start,
- .data => {},
- else => return error.UnexpectedPacket,
- },
- }
- }
- }
-
- pub const FetchStream = struct {
- request: std.http.Client.Request,
- buf: [Packet.max_data_length]u8 = undefined,
- pos: usize = 0,
- len: usize = 0,
-
- pub fn deinit(stream: *FetchStream) void {
- stream.request.deinit();
- }
-
- pub const ReadError = std.http.Client.Request.ReadError || error{
- InvalidPacket,
- ProtocolError,
- UnexpectedPacket,
- };
- pub const Reader = std.io.Reader(*FetchStream, ReadError, read);
-
- const StreamCode = enum(u8) {
- pack_data = 1,
- progress = 2,
- fatal_error = 3,
- _,
- };
-
- pub fn reader(stream: *FetchStream) Reader {
- return .{ .context = stream };
- }
-
- pub fn read(stream: *FetchStream, buf: []u8) !usize {
- if (stream.pos == stream.len) {
- while (true) {
- switch (try Packet.read(stream.request.reader(), &stream.buf)) {
- .flush => return 0,
- .data => |data| if (data.len > 1) switch (@as(StreamCode, @enumFromInt(data[0]))) {
- .pack_data => {
- stream.pos = 1;
- stream.len = data.len;
- break;
- },
- .fatal_error => return error.ProtocolError,
- else => {},
- },
- else => return error.UnexpectedPacket,
- }
- }
- }
-
- const size = @min(buf.len, stream.len - stream.pos);
- @memcpy(buf[0..size], stream.buf[stream.pos .. stream.pos + size]);
- stream.pos += size;
- return size;
- }
- };
-};
-
-const PackHeader = struct {
- total_objects: u32,
-
- const signature = "PACK";
- const supported_version = 2;
-
- fn read(reader: anytype) !PackHeader {
- const actual_signature = reader.readBytesNoEof(4) catch |e| switch (e) {
- error.EndOfStream => return error.InvalidHeader,
- else => |other| return other,
- };
- if (!mem.eql(u8, &actual_signature, signature)) return error.InvalidHeader;
- const version = reader.readIntBig(u32) catch |e| switch (e) {
- error.EndOfStream => return error.InvalidHeader,
- else => |other| return other,
- };
- if (version != supported_version) return error.UnsupportedVersion;
- const total_objects = reader.readIntBig(u32) catch |e| switch (e) {
- error.EndOfStream => return error.InvalidHeader,
- else => |other| return other,
- };
- return .{ .total_objects = total_objects };
- }
-};
-
-const EntryHeader = union(Type) {
- commit: Undeltified,
- tree: Undeltified,
- blob: Undeltified,
- tag: Undeltified,
- ofs_delta: OfsDelta,
- ref_delta: RefDelta,
-
- const Type = enum(u3) {
- commit = 1,
- tree = 2,
- blob = 3,
- tag = 4,
- ofs_delta = 6,
- ref_delta = 7,
- };
-
- const Undeltified = struct {
- uncompressed_length: u64,
- };
-
- const OfsDelta = struct {
- offset: u64,
- uncompressed_length: u64,
- };
-
- const RefDelta = struct {
- base_object: Oid,
- uncompressed_length: u64,
- };
-
- fn objectType(header: EntryHeader) Object.Type {
- return switch (header) {
- inline .commit, .tree, .blob, .tag => |_, tag| @field(Object.Type, @tagName(tag)),
- else => unreachable,
- };
- }
-
- fn uncompressedLength(header: EntryHeader) u64 {
- return switch (header) {
- inline else => |entry| entry.uncompressed_length,
- };
- }
-
- fn read(reader: anytype) !EntryHeader {
- const InitialByte = packed struct { len: u4, type: u3, has_next: bool };
- const initial: InitialByte = @bitCast(reader.readByte() catch |e| switch (e) {
- error.EndOfStream => return error.InvalidFormat,
- else => |other| return other,
- });
- const rest_len = if (initial.has_next) try readSizeVarInt(reader) else 0;
- var uncompressed_length: u64 = initial.len;
- uncompressed_length |= std.math.shlExact(u64, rest_len, 4) catch return error.InvalidFormat;
- const @"type" = std.meta.intToEnum(EntryHeader.Type, initial.type) catch return error.InvalidFormat;
- return switch (@"type") {
- inline .commit, .tree, .blob, .tag => |tag| @unionInit(EntryHeader, @tagName(tag), .{
- .uncompressed_length = uncompressed_length,
- }),
- .ofs_delta => .{ .ofs_delta = .{
- .offset = try readOffsetVarInt(reader),
- .uncompressed_length = uncompressed_length,
- } },
- .ref_delta => .{ .ref_delta = .{
- .base_object = reader.readBytesNoEof(oid_length) catch |e| switch (e) {
- error.EndOfStream => return error.InvalidFormat,
- else => |other| return other,
- },
- .uncompressed_length = uncompressed_length,
- } },
- };
- }
-};
-
-fn readSizeVarInt(r: anytype) !u64 {
- const Byte = packed struct { value: u7, has_next: bool };
- var b: Byte = @bitCast(try r.readByte());
- var value: u64 = b.value;
- var shift: u6 = 0;
- while (b.has_next) {
- b = @bitCast(try r.readByte());
- shift = std.math.add(u6, shift, 7) catch return error.InvalidFormat;
- value |= @as(u64, b.value) << shift;
- }
- return value;
-}
-
-fn readOffsetVarInt(r: anytype) !u64 {
- const Byte = packed struct { value: u7, has_next: bool };
- var b: Byte = @bitCast(try r.readByte());
- var value: u64 = b.value;
- while (b.has_next) {
- b = @bitCast(try r.readByte());
- value = std.math.shlExact(u64, value + 1, 7) catch return error.InvalidFormat;
- value |= b.value;
- }
- return value;
-}
-
-const IndexHeader = struct {
- fan_out_table: [256]u32,
-
- const signature = "\xFFtOc";
- const supported_version = 2;
- const size = 4 + 4 + @sizeOf([256]u32);
-
- fn read(reader: anytype) !IndexHeader {
- var header_bytes = try reader.readBytesNoEof(size);
- if (!mem.eql(u8, header_bytes[0..4], signature)) return error.InvalidHeader;
- const version = mem.readIntBig(u32, header_bytes[4..8]);
- if (version != supported_version) return error.UnsupportedVersion;
-
- var fan_out_table: [256]u32 = undefined;
- var fan_out_table_stream = std.io.fixedBufferStream(header_bytes[8..]);
- const fan_out_table_reader = fan_out_table_stream.reader();
- for (&fan_out_table) |*entry| {
- entry.* = fan_out_table_reader.readIntBig(u32) catch unreachable;
- }
- return .{ .fan_out_table = fan_out_table };
- }
-};
-
-const IndexEntry = struct {
- offset: u64,
- crc32: u32,
-};
-
-/// Writes out a version 2 index for the given packfile, as documented in
-/// [pack-format](https://git-scm.com/docs/pack-format).
-pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) !void {
- try pack.seekTo(0);
-
- var index_entries = std.AutoHashMapUnmanaged(Oid, IndexEntry){};
- defer index_entries.deinit(allocator);
- var pending_deltas = std.ArrayListUnmanaged(IndexEntry){};
- defer pending_deltas.deinit(allocator);
-
- const pack_checksum = try indexPackFirstPass(allocator, pack, &index_entries, &pending_deltas);
-
- var cache: ObjectCache = .{};
- defer cache.deinit(allocator);
- var remaining_deltas = pending_deltas.items.len;
- while (remaining_deltas > 0) {
- var i: usize = remaining_deltas;
- while (i > 0) {
- i -= 1;
- const delta = pending_deltas.items[i];
- if (try indexPackHashDelta(allocator, pack, delta, index_entries, &cache)) |oid| {
- try index_entries.put(allocator, oid, delta);
- _ = pending_deltas.swapRemove(i);
- }
- }
- if (pending_deltas.items.len == remaining_deltas) return error.IncompletePack;
- remaining_deltas = pending_deltas.items.len;
- }
-
- var oids = std.ArrayListUnmanaged(Oid){};
- defer oids.deinit(allocator);
- try oids.ensureTotalCapacityPrecise(allocator, index_entries.count());
- var index_entries_iter = index_entries.iterator();
- while (index_entries_iter.next()) |entry| {
- oids.appendAssumeCapacity(entry.key_ptr.*);
- }
- mem.sortUnstable(Oid, oids.items, {}, struct {
- fn lessThan(_: void, o1: Oid, o2: Oid) bool {
- return mem.lessThan(u8, &o1, &o2);
- }
- }.lessThan);
-
- var fan_out_table: [256]u32 = undefined;
- var count: u32 = 0;
- var fan_out_index: u8 = 0;
- for (oids.items) |oid| {
- if (oid[0] > fan_out_index) {
- @memset(fan_out_table[fan_out_index..oid[0]], count);
- fan_out_index = oid[0];
- }
- count += 1;
- }
- @memset(fan_out_table[fan_out_index..], count);
-
- var index_hashed_writer = hashedWriter(index_writer, Sha1.init(.{}));
- const writer = index_hashed_writer.writer();
- try writer.writeAll(IndexHeader.signature);
- try writer.writeIntBig(u32, IndexHeader.supported_version);
- for (fan_out_table) |fan_out_entry| {
- try writer.writeIntBig(u32, fan_out_entry);
- }
-
- for (oids.items) |oid| {
- try writer.writeAll(&oid);
- }
-
- for (oids.items) |oid| {
- try writer.writeIntBig(u32, index_entries.get(oid).?.crc32);
- }
-
- var big_offsets = std.ArrayListUnmanaged(u64){};
- defer big_offsets.deinit(allocator);
- for (oids.items) |oid| {
- const offset = index_entries.get(oid).?.offset;
- if (offset <= std.math.maxInt(u31)) {
- try writer.writeIntBig(u32, @intCast(offset));
- } else {
- const index = big_offsets.items.len;
- try big_offsets.append(allocator, offset);
- try writer.writeIntBig(u32, @as(u32, @intCast(index)) | (1 << 31));
- }
- }
- for (big_offsets.items) |offset| {
- try writer.writeIntBig(u64, offset);
- }
-
- try writer.writeAll(&pack_checksum);
- const index_checksum = index_hashed_writer.hasher.finalResult();
- try index_writer.writeAll(&index_checksum);
-}
-
-/// Performs the first pass over the packfile data for index construction.
-/// This will index all non-delta objects, queue delta objects for further
-/// processing, and return the pack checksum (which is part of the index
-/// format).
-fn indexPackFirstPass(
- allocator: Allocator,
- pack: std.fs.File,
- index_entries: *std.AutoHashMapUnmanaged(Oid, IndexEntry),
- pending_deltas: *std.ArrayListUnmanaged(IndexEntry),
-) ![Sha1.digest_length]u8 {
- var pack_buffered_reader = std.io.bufferedReader(pack.reader());
- var pack_counting_reader = std.io.countingReader(pack_buffered_reader.reader());
- var pack_hashed_reader = std.compress.hashedReader(pack_counting_reader.reader(), Sha1.init(.{}));
- const pack_reader = pack_hashed_reader.reader();
-
- const pack_header = try PackHeader.read(pack_reader);
-
- var current_entry: u32 = 0;
- while (current_entry < pack_header.total_objects) : (current_entry += 1) {
- const entry_offset = pack_counting_reader.bytes_read;
- var entry_crc32_reader = std.compress.hashedReader(pack_reader, std.hash.Crc32.init());
- const entry_header = try EntryHeader.read(entry_crc32_reader.reader());
- switch (entry_header) {
- inline .commit, .tree, .blob, .tag => |object, tag| {
- var entry_decompress_stream = try std.compress.zlib.decompressStream(allocator, entry_crc32_reader.reader());
- defer entry_decompress_stream.deinit();
- var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader());
- var entry_hashed_writer = hashedWriter(std.io.null_writer, Sha1.init(.{}));
- const entry_writer = entry_hashed_writer.writer();
- // The object header is not included in the pack data but is
- // part of the object's ID
- try entry_writer.print("{s} {}\x00", .{ @tagName(tag), object.uncompressed_length });
- var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
- try fifo.pump(entry_counting_reader.reader(), entry_writer);
- if (entry_counting_reader.bytes_read != object.uncompressed_length) {
- return error.InvalidObject;
- }
- const oid = entry_hashed_writer.hasher.finalResult();
- try index_entries.put(allocator, oid, .{
- .offset = entry_offset,
- .crc32 = entry_crc32_reader.hasher.final(),
- });
- },
- inline .ofs_delta, .ref_delta => |delta| {
- var entry_decompress_stream = try std.compress.zlib.decompressStream(allocator, entry_crc32_reader.reader());
- defer entry_decompress_stream.deinit();
- var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader());
- var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
- try fifo.pump(entry_counting_reader.reader(), std.io.null_writer);
- if (entry_counting_reader.bytes_read != delta.uncompressed_length) {
- return error.InvalidObject;
- }
- try pending_deltas.append(allocator, .{
- .offset = entry_offset,
- .crc32 = entry_crc32_reader.hasher.final(),
- });
- },
- }
- }
-
- const pack_checksum = pack_hashed_reader.hasher.finalResult();
- const recorded_checksum = try pack_buffered_reader.reader().readBytesNoEof(Sha1.digest_length);
- if (!mem.eql(u8, &pack_checksum, &recorded_checksum)) {
- return error.CorruptedPack;
- }
- _ = pack_buffered_reader.reader().readByte() catch |e| switch (e) {
- error.EndOfStream => return pack_checksum,
- else => |other| return other,
- };
- return error.InvalidFormat;
-}
-
-/// Attempts to determine the final object ID of the given deltified object.
-/// May return null if this is not yet possible (if the delta is a ref-based
-/// delta and we do not yet know the offset of the base object).
-fn indexPackHashDelta(
- allocator: Allocator,
- pack: std.fs.File,
- delta: IndexEntry,
- index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry),
- cache: *ObjectCache,
-) !?Oid {
- // Figure out the chain of deltas to resolve
- var base_offset = delta.offset;
- var base_header: EntryHeader = undefined;
- var delta_offsets = std.ArrayListUnmanaged(u64){};
- defer delta_offsets.deinit(allocator);
- const base_object = while (true) {
- if (cache.get(base_offset)) |base_object| break base_object;
-
- try pack.seekTo(base_offset);
- base_header = try EntryHeader.read(pack.reader());
- switch (base_header) {
- .ofs_delta => |ofs_delta| {
- try delta_offsets.append(allocator, base_offset);
- base_offset = std.math.sub(u64, base_offset, ofs_delta.offset) catch return error.InvalidObject;
- },
- .ref_delta => |ref_delta| {
- try delta_offsets.append(allocator, base_offset);
- base_offset = (index_entries.get(ref_delta.base_object) orelse return null).offset;
- },
- else => {
- const base_data = try readObjectRaw(allocator, pack.reader(), base_header.uncompressedLength());
- errdefer allocator.free(base_data);
- const base_object: Object = .{ .type = base_header.objectType(), .data = base_data };
- try cache.put(allocator, base_offset, base_object);
- break base_object;
- },
- }
- };
-
- const base_data = try resolveDeltaChain(allocator, pack, base_object, delta_offsets.items, cache);
-
- var entry_hasher = Sha1.init(.{});
- var entry_hashed_writer = hashedWriter(std.io.null_writer, &entry_hasher);
- try entry_hashed_writer.writer().print("{s} {}\x00", .{ @tagName(base_object.type), base_data.len });
- entry_hasher.update(base_data);
- return entry_hasher.finalResult();
-}
-
-/// Resolves a chain of deltas, returning the final base object data. `pack` is
-/// assumed to be looking at the start of the object data for the base object of
-/// the chain, and will then apply the deltas in `delta_offsets` in reverse order
-/// to obtain the final object.
-fn resolveDeltaChain(
- allocator: Allocator,
- pack: std.fs.File,
- base_object: Object,
- delta_offsets: []const u64,
- cache: *ObjectCache,
-) ![]const u8 {
- var base_data = base_object.data;
- var i: usize = delta_offsets.len;
- while (i > 0) {
- i -= 1;
-
- const delta_offset = delta_offsets[i];
- try pack.seekTo(delta_offset);
- const delta_header = try EntryHeader.read(pack.reader());
- var delta_data = try readObjectRaw(allocator, pack.reader(), delta_header.uncompressedLength());
- defer allocator.free(delta_data);
- var delta_stream = std.io.fixedBufferStream(delta_data);
- const delta_reader = delta_stream.reader();
- _ = try readSizeVarInt(delta_reader); // base object size
- const expanded_size = try readSizeVarInt(delta_reader);
-
- const expanded_alloc_size = std.math.cast(usize, expanded_size) orelse return error.ObjectTooLarge;
- var expanded_data = try allocator.alloc(u8, expanded_alloc_size);
- errdefer allocator.free(expanded_data);
- var expanded_delta_stream = std.io.fixedBufferStream(expanded_data);
- var base_stream = std.io.fixedBufferStream(base_data);
- try expandDelta(&base_stream, delta_reader, expanded_delta_stream.writer());
- if (expanded_delta_stream.pos != expanded_size) return error.InvalidObject;
-
- try cache.put(allocator, delta_offset, .{ .type = base_object.type, .data = expanded_data });
- base_data = expanded_data;
- }
- return base_data;
-}
-
-/// Reads the complete contents of an object from `reader`. This function may
-/// read more bytes than required from `reader`, so the reader position after
-/// returning is not reliable.
-fn readObjectRaw(allocator: Allocator, reader: anytype, size: u64) ![]u8 {
- const alloc_size = std.math.cast(usize, size) orelse return error.ObjectTooLarge;
- var buffered_reader = std.io.bufferedReader(reader);
- var decompress_stream = try std.compress.zlib.decompressStream(allocator, buffered_reader.reader());
- defer decompress_stream.deinit();
- var data = try allocator.alloc(u8, alloc_size);
- errdefer allocator.free(data);
- try decompress_stream.reader().readNoEof(data);
- _ = decompress_stream.reader().readByte() catch |e| switch (e) {
- error.EndOfStream => return data,
- else => |other| return other,
- };
- return error.InvalidFormat;
-}
-
-/// Expands delta data from `delta_reader` to `writer`. `base_object` must
-/// support `reader` and `seekTo` (such as a `std.io.FixedBufferStream`).
-///
-/// The format of the delta data is documented in
-/// [pack-format](https://git-scm.com/docs/pack-format).
-fn expandDelta(base_object: anytype, delta_reader: anytype, writer: anytype) !void {
- while (true) {
- const inst: packed struct { value: u7, copy: bool } = @bitCast(delta_reader.readByte() catch |e| switch (e) {
- error.EndOfStream => return,
- else => |other| return other,
- });
- if (inst.copy) {
- const available: packed struct {
- offset1: bool,
- offset2: bool,
- offset3: bool,
- offset4: bool,
- size1: bool,
- size2: bool,
- size3: bool,
- } = @bitCast(inst.value);
- var offset_parts: packed struct { offset1: u8, offset2: u8, offset3: u8, offset4: u8 } = .{
- .offset1 = if (available.offset1) try delta_reader.readByte() else 0,
- .offset2 = if (available.offset2) try delta_reader.readByte() else 0,
- .offset3 = if (available.offset3) try delta_reader.readByte() else 0,
- .offset4 = if (available.offset4) try delta_reader.readByte() else 0,
- };
- const offset: u32 = @bitCast(offset_parts);
- var size_parts: packed struct { size1: u8, size2: u8, size3: u8 } = .{
- .size1 = if (available.size1) try delta_reader.readByte() else 0,
- .size2 = if (available.size2) try delta_reader.readByte() else 0,
- .size3 = if (available.size3) try delta_reader.readByte() else 0,
- };
- var size: u24 = @bitCast(size_parts);
- if (size == 0) size = 0x10000;
- try base_object.seekTo(offset);
- var copy_reader = std.io.limitedReader(base_object.reader(), size);
- var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
- try fifo.pump(copy_reader.reader(), writer);
- } else if (inst.value != 0) {
- var data_reader = std.io.limitedReader(delta_reader, inst.value);
- var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
- try fifo.pump(data_reader.reader(), writer);
- } else {
- return error.InvalidDeltaInstruction;
- }
- }
-}
-
-fn HashedWriter(
- comptime WriterType: anytype,
- comptime HasherType: anytype,
-) type {
- return struct {
- child_writer: WriterType,
- hasher: HasherType,
-
- const Error = WriterType.Error;
- const Writer = std.io.Writer(*@This(), Error, write);
-
- fn write(hashed_writer: *@This(), buf: []const u8) Error!usize {
- const amt = try hashed_writer.child_writer.write(buf);
- hashed_writer.hasher.update(buf);
- return amt;
- }
-
- fn writer(hashed_writer: *@This()) Writer {
- return .{ .context = hashed_writer };
- }
- };
-}
-
-fn hashedWriter(
- writer: anytype,
- hasher: anytype,
-) HashedWriter(@TypeOf(writer), @TypeOf(hasher)) {
- return .{ .child_writer = writer, .hasher = hasher };
-}
-
-test "packfile indexing and checkout" {
- // To verify the contents of this packfile without using the code in this
- // file:
- //
- // 1. Create a new empty Git repository (`git init`)
- // 2. `git unpack-objects <path/to/testdata.pack`
- // 3. `git fsck` -> note the "dangling commit" ID (which matches the commit
- // checked out below)
- // 4. `git checkout dd582c0720819ab7130b103635bd7271b9fd4feb`
- const testrepo_pack = @embedFile("git/testdata/testrepo.pack");
-
- var git_dir = testing.tmpDir(.{});
- defer git_dir.cleanup();
- var pack_file = try git_dir.dir.createFile("testrepo.pack", .{ .read = true });
- defer pack_file.close();
- try pack_file.writeAll(testrepo_pack);
-
- var index_file = try git_dir.dir.createFile("testrepo.idx", .{ .read = true });
- defer index_file.close();
- try indexPack(testing.allocator, pack_file, index_file.writer());
-
- // Arbitrary size limit on files read while checking the repository contents
- // (all files in the test repo are known to be much smaller than this)
- const max_file_size = 4096;
-
- const index_file_data = try git_dir.dir.readFileAlloc(testing.allocator, "testrepo.idx", max_file_size);
- defer testing.allocator.free(index_file_data);
- // testrepo.idx is generated by Git. The index created by this file should
- // match it exactly. Running `git verify-pack -v testrepo.pack` can verify
- // this.
- const testrepo_idx = @embedFile("git/testdata/testrepo.idx");
- try testing.expectEqualSlices(u8, testrepo_idx, index_file_data);
-
- var repository = try Repository.init(testing.allocator, pack_file, index_file);
- defer repository.deinit();
-
- var worktree = testing.tmpIterableDir(.{});
- defer worktree.cleanup();
-
- const commit_id = try parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb");
- try repository.checkout(worktree.iterable_dir.dir, commit_id);
-
- const expected_files: []const []const u8 = &.{
- "dir/file",
- "dir/subdir/file",
- "dir/subdir/file2",
- "dir2/file",
- "dir3/file",
- "dir3/file2",
- "file",
- "file2",
- "file3",
- "file4",
- "file5",
- "file6",
- "file7",
- "file8",
- "file9",
- };
- var actual_files: std.ArrayListUnmanaged([]u8) = .{};
- defer actual_files.deinit(testing.allocator);
- defer for (actual_files.items) |file| testing.allocator.free(file);
- var walker = try worktree.iterable_dir.walk(testing.allocator);
- defer walker.deinit();
- while (try walker.next()) |entry| {
- if (entry.kind != .file) continue;
- var path = try testing.allocator.dupe(u8, entry.path);
- errdefer testing.allocator.free(path);
- mem.replaceScalar(u8, path, std.fs.path.sep, '/');
- try actual_files.append(testing.allocator, path);
- }
- mem.sortUnstable([]u8, actual_files.items, {}, struct {
- fn lessThan(_: void, a: []u8, b: []u8) bool {
- return mem.lessThan(u8, a, b);
- }
- }.lessThan);
- try testing.expectEqualDeep(expected_files, actual_files.items);
-
- const expected_file_contents =
- \\revision 1
- \\revision 2
- \\revision 4
- \\revision 5
- \\revision 7
- \\revision 8
- \\revision 9
- \\revision 10
- \\revision 12
- \\revision 13
- \\revision 14
- \\revision 18
- \\revision 19
- \\
- ;
- const actual_file_contents = try worktree.iterable_dir.dir.readFileAlloc(testing.allocator, "file", max_file_size);
- defer testing.allocator.free(actual_file_contents);
- try testing.expectEqualStrings(expected_file_contents, actual_file_contents);
-}
-
-/// Checks out a commit of a packfile. Intended for experimenting with and
-/// benchmarking possible optimizations to the indexing and checkout behavior.
-pub fn main() !void {
- const allocator = std.heap.c_allocator;
-
- const args = try std.process.argsAlloc(allocator);
- defer std.process.argsFree(allocator, args);
- if (args.len != 4) {
- return error.InvalidArguments; // Arguments: packfile commit worktree
- }
-
- var pack_file = try std.fs.cwd().openFile(args[1], .{});
- defer pack_file.close();
- const commit = try parseOid(args[2]);
- var worktree = try std.fs.cwd().makeOpenPath(args[3], .{});
- defer worktree.close();
-
- var git_dir = try worktree.makeOpenPath(".git", .{});
- defer git_dir.close();
-
- std.debug.print("Starting index...\n", .{});
- var index_file = try git_dir.createFile("idx", .{ .read = true });
- defer index_file.close();
- var index_buffered_writer = std.io.bufferedWriter(index_file.writer());
- try indexPack(allocator, pack_file, index_buffered_writer.writer());
- try index_buffered_writer.flush();
- try index_file.sync();
-
- std.debug.print("Starting checkout...\n", .{});
- var repository = try Repository.init(allocator, pack_file, index_file);
- defer repository.deinit();
- try repository.checkout(worktree, commit);
-}