From 8f865bfffed575c3a4db6d7db92dc5f752f97237 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Fri, 18 Feb 2011 17:55:53 -0800 Subject: [PATCH] PackWriter: Hoist and cluster reference targets Many source browsers and network related tools like UploadPack need to find and parse the target of all branches and annotated tags within the repository during their startup phase. Clustering these together into the same part of the pack file will improve locality, reducing thrashing when an application starts and needs to load all of these into memory at once. To prevent bottlenecking basic log viewing tools that are scannning backwards from the tip of a current branch (and don't need tags) we place this cluster of older targets after 4096 newer commits have already been placed into the pack stream. 4096 was chosen as a rough guess, but was based on a few factors: - log viewers typically show 5-200 commits per page - users only view the first page or two - DHT can cram 2200-4000 commits per 1 MiB chunk thus these will fall into the second commit chunk (roughly) Unfortunately this placement hurts history tools that are scanning backwards through the commit graph and completely ignored tags or branch heads when they started. An ancient tagged commit is no longer positioned behind its first child (its now much earlier), resulting in a page fault for the parser to reload this cluster of objects on demand. This may be an acceptable loss. If a user is walking backwards and has already scanned through more than 4096 commits of history, waiting for the region to reload isn't really that bad compared to the amount of time already spent. If the repository is so small that there are less than 4096 commits, this change has no impact on the placement of objects. Change-Id: If3052e430d305e17878d94145c93754f56b74c61 Signed-off-by: Shawn O. Pearce --- .../eclipse/jgit/storage/pack/PackWriter.java | 37 +++++++++++++++++++ .../eclipse/jgit/transport/BundleWriter.java | 12 ++++++ .../eclipse/jgit/transport/UploadPack.java | 13 +++++++ 3 files changed, 62 insertions(+) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java index 334a41c16..fe1ab4fdb 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java @@ -154,6 +154,8 @@ public class PackWriter { private List cachedPacks = new ArrayList(2); + private Set tagTargets = Collections.emptySet(); + private Deflater myDeflater; private final ObjectReader reader; @@ -330,6 +332,22 @@ public void setIgnoreMissingUninteresting(final boolean ignore) { ignoreMissingUninteresting = ignore; } + /** + * Set the tag targets that should be hoisted earlier during packing. + *

+ * Callers may put objects into this set before invoking any of the + * preparePack methods to influence where an annotated tag's target is + * stored within the resulting pack. Typically these will be clustered + * together, and hoisted earlier in the file even if they are ancient + * revisions, allowing readers to find tag targets with better locality. + * + * @param objects + * objects that annotated tags point at. + */ + public void setTagTargets(Set objects) { + tagTargets = objects; + } + /** * Returns objects number in a pack file that was created by this writer. * @@ -1251,10 +1269,14 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor, ArrayList list = (ArrayList) objectsLists[Constants.OBJ_COMMIT]; list.ensureCapacity(list.size() + commits.size()); } + + int commitCnt = 0; + boolean putTagTargets = false; for (RevCommit cmit : commits) { if (!cmit.has(added)) { cmit.add(added); addObject(cmit, 0); + commitCnt++; } for (int i = 0; i < cmit.getParentCount(); i++) { @@ -1262,8 +1284,23 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor, if (!p.has(added) && !p.has(RevFlag.UNINTERESTING)) { p.add(added); addObject(p, 0); + commitCnt++; } } + + if (!putTagTargets && 4096 < commitCnt) { + for (ObjectId id : tagTargets) { + RevObject obj = walker.lookupOrNull(id); + if (obj instanceof RevCommit + && obj.has(include) + && !obj.has(RevFlag.UNINTERESTING) + && !obj.has(added)) { + obj.add(added); + addObject(obj, 0); + } + } + putTagTargets = true; + } } commits = null; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java index e148bdaed..3865c8a75 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java @@ -88,6 +88,8 @@ public class BundleWriter { private final Set assume; + private final Set tagTargets; + private PackConfig packConfig; /** @@ -100,6 +102,7 @@ public BundleWriter(final Repository repo) { db = repo; include = new TreeMap(); assume = new HashSet(); + tagTargets = new HashSet(); } /** @@ -143,6 +146,13 @@ public void include(final String name, final AnyObjectId id) { */ public void include(final Ref r) { include(r.getName(), r.getObjectId()); + + if (r.getPeeledObjectId() != null) + tagTargets.add(r.getPeeledObjectId()); + + else if (r.getObjectId() != null + && r.getName().startsWith(Constants.R_HEADS)) + tagTargets.add(r.getObjectId()); } /** @@ -192,6 +202,8 @@ public void writeBundle(ProgressMonitor monitor, OutputStream os) exc.add(r.getId()); packWriter.setDeltaBaseAsOffset(true); packWriter.setThin(exc.size() > 0); + if (exc.size() == 0) + packWriter.setTagTargets(tagTargets); packWriter.preparePack(monitor, inc, exc); final Writer w = new OutputStreamWriter(os, Constants.CHARSET); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java index e50b01ecc..b7bf08c38 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java @@ -695,6 +695,19 @@ private void sendPack() throws IOException { pw.setDeltaBaseAsOffset(options.contains(OPTION_OFS_DELTA)); pw.setThin(options.contains(OPTION_THIN_PACK)); + if (commonBase.isEmpty()) { + Set tagTargets = new HashSet(); + for (Ref ref : refs.values()) { + if (ref.getPeeledObjectId() != null) + tagTargets.add(ref.getPeeledObjectId()); + else if (ref.getObjectId() == null) + continue; + else if (ref.getName().startsWith(Constants.R_HEADS)) + tagTargets.add(ref.getObjectId()); + } + pw.setTagTargets(tagTargets); + } + RevWalk rw = walk; if (wantAll.isEmpty()) { pw.preparePack(pm, wantIds, commonBase);