From 7edf05530d83d7f8971db4262ea43e11124c6b63 Mon Sep 17 00:00:00 2001 From: Terry Parker Date: Tue, 2 Aug 2016 08:53:06 -0700 Subject: [PATCH 1/2] Shallow fetch: avoid sending unneeded blobs When doing an incremental fetch from JGit, "have" commits are marked as "uninteresting". In a non-shallow fetch, when the RevWalk hits an "uninteresting" commit it marks the commit's corresponding tree as uninteresting. That has the effect of dropping those trees and all the trees and blobs they reference out of the thin pack returned to the client. However, shallow fetches use a DepthWalk to limit the RevWalk, which nearly always causes the RevWalk to terminate before encountering the "have" commits. As a result the pack created for the incremental fetch never encounters "uninteresting" tree objects and thus includes duplicate objects that it knows the client already has. Change-Id: I7b1f7c3b0d83e04d34cd2fa676f1ad4fec904c05 Signed-off-by: Terry Parker --- .../internal/storage/file/PackWriterTest.java | 72 ++++++++++++++++++- .../internal/storage/pack/PackWriter.java | 20 +++++- 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java index 5afb3eac0..de7279358 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java @@ -76,6 +76,9 @@ import org.eclipse.jgit.lib.ObjectIdSet; import org.eclipse.jgit.lib.ObjectInserter; import org.eclipse.jgit.lib.Sets; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.revwalk.DepthWalk; +import org.eclipse.jgit.revwalk.ObjectWalk; import org.eclipse.jgit.revwalk.RevBlob; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevObject; @@ -542,15 +545,74 @@ private static void assertContent(PackIndex pi, List expected) { expected.contains(pi.getObjectId(i))); } + @Test + public void testShallowIsMinimal() throws Exception { + FileRepository repo = createBareRepository(); + TestRepository r = new TestRepository(repo); + BranchBuilder bb = r.branch("refs/heads/master"); + RevBlob contentA = r.blob("A"); + RevBlob contentB = r.blob("B"); + RevBlob contentC = r.blob("C"); + RevBlob contentD = r.blob("D"); + RevBlob contentE = r.blob("E"); + RevCommit c1 = bb.commit().add("a", contentA).create(); + RevCommit c2 = bb.commit().add("b", contentB).create(); + RevCommit c3 = bb.commit().add("c", contentC).create(); + RevCommit c4 = bb.commit().add("d", contentD).create(); + RevCommit c5 = bb.commit().add("e", contentE).create(); + r.getRevWalk().parseHeaders(c1); + r.getRevWalk().parseHeaders(c2); + r.getRevWalk().parseHeaders(c3); + r.getRevWalk().parseHeaders(c4); + r.getRevWalk().parseHeaders(c5); + + PackIndex idx = writeShallowPack(repo, 1, wants(c2), NONE, NONE); + assertContent(idx, + Arrays.asList(c1.getId(), c2.getId(), c1.getTree().getId(), + c2.getTree().getId(), contentA.getId(), + contentB.getId())); + + // Client already has blobs A and B, verify those are not packed. + idx = writeShallowPack(repo, 1, wants(c5), haves(c1, c2), shallows(c1)); + assertContent(idx, + Arrays.asList(c4.getId(), c5.getId(), c4.getTree().getId(), + c5.getTree().getId(), contentC.getId(), + contentD.getId(), contentE.getId())); + } + private static PackIndex writePack(FileRepository repo, Set want, Set excludeObjects) - throws IOException { + throws IOException { + RevWalk walk = new RevWalk(repo); + return writePack(repo, walk, 0, want, NONE, excludeObjects); + } + + private static PackIndex writeShallowPack(FileRepository repo, int depth, + Set want, Set have, + Set shallow) throws IOException { + // During negotiation, UploadPack would have set up a DepthWalk and + // marked the client's "shallow" commits. Emulate that here. + DepthWalk.RevWalk walk = new DepthWalk.RevWalk(repo, depth); + walk.assumeShallow(shallow); + return writePack(repo, walk, depth, want, have, EMPTY_ID_SET); + } + + private static PackIndex writePack(FileRepository repo, RevWalk walk, + int depth, Set want, + Set have, Set excludeObjects) + throws IOException { try (PackWriter pw = new PackWriter(repo)) { pw.setDeltaBaseAsOffset(true); pw.setReuseDeltaCommits(false); - for (ObjectIdSet idx : excludeObjects) + for (ObjectIdSet idx : excludeObjects) { pw.excludeObjects(idx); - pw.preparePack(NullProgressMonitor.INSTANCE, want, NONE); + } + if (depth > 0) { + pw.setShallowPack(depth, null); + } + ObjectWalk ow = walk.toObjectWalkWithSameObjects(); + + pw.preparePack(NullProgressMonitor.INSTANCE, ow, want, have); String id = pw.computeName().getName(); File packdir = new File(repo.getObjectsDirectory(), "pack"); File packFile = new File(packdir, "pack-" + id + ".pack"); @@ -737,4 +799,8 @@ private static Set haves(ObjectId... objects) { private static Set wants(ObjectId... objects) { return Sets.of(objects); } + + private static Set shallows(ObjectId... objects) { + return Sets.of(objects); + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java index 525f9aecc..4c0fc47f6 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java @@ -1653,6 +1653,8 @@ private void findObjectsToPack(@NonNull ProgressMonitor countingMonitor, List haveObjs = new ArrayList(haveEst); List wantTags = new ArrayList(want.size()); + // Retrieve the RevWalk's versions of "want" and "have" objects to + // maintain any flags previously set in the RevWalk. AsyncRevObjectQueue q = walker.parseAny(all, true); try { for (;;) { @@ -1695,11 +1697,25 @@ private void findObjectsToPack(@NonNull ProgressMonitor countingMonitor, if (walker instanceof DepthWalk.ObjectWalk) { DepthWalk.ObjectWalk depthWalk = (DepthWalk.ObjectWalk) walker; - for (RevObject obj : wantObjs) + for (RevObject obj : wantObjs) { depthWalk.markRoot(obj); + } + // Mark the tree objects associated with "have" commits as + // uninteresting to avoid writing redundant blobs. A normal RevWalk + // lazily propagates the "uninteresting" state from a commit to its + // tree during the walk, but DepthWalks can terminate early so + // preemptively propagate that state here. + for (RevObject obj : haveObjs) { + if (obj instanceof RevCommit) { + RevTree t = ((RevCommit) obj).getTree(); + depthWalk.markUninteresting(t); + } + } + if (unshallowObjects != null) { - for (ObjectId id : unshallowObjects) + for (ObjectId id : unshallowObjects) { depthWalk.markUnshallow(walker.parseAny(id)); + } } } else { for (RevObject obj : wantObjs) From 2a7897bb4cb17e82c3669a8b073a577c7ca5f467 Mon Sep 17 00:00:00 2001 From: Terry Parker Date: Wed, 3 Aug 2016 08:36:55 -0700 Subject: [PATCH 2/2] RevWalk: Make fields available to DepthWalk DepthWalk needs to override toObjectWalkWithSameObjects() and thus needs to be able to directly set the objects and freeFlags fields, so make them package private. Change-Id: I24561b82c54ba3d6522582ca25105b204d777074 Signed-off-by: Terry Parker --- org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java index c8504937a..a7f7cd463 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java @@ -172,7 +172,7 @@ public class RevWalk implements Iterable, AutoCloseable { ObjectIdOwnerMap objects; - private int freeFlags = APP_FLAGS; + int freeFlags = APP_FLAGS; private int delayFreeFlags;