PackWriter: Hoist and cluster reference targets

Many source browsers and network related tools like UploadPack need
to find and parse the target of all branches and annotated tags
within the repository during their startup phase.  Clustering these
together into the same part of the pack file will improve locality,
reducing thrashing when an application starts and needs to load
all of these into memory at once.

To prevent bottlenecking basic log viewing tools that are scannning
backwards from the tip of a current branch (and don't need tags)
we place this cluster of older targets after 4096 newer commits
have already been placed into the pack stream.  4096 was chosen as
a rough guess, but was based on a few factors:

  - log viewers typically show 5-200 commits per page
  - users only view the first page or two

  - DHT can cram 2200-4000 commits per 1 MiB chunk
    thus these will fall into the second commit chunk (roughly)

Unfortunately this placement hurts history tools that are scanning
backwards through the commit graph and completely ignored tags or
branch heads when they started.

An ancient tagged commit is no longer positioned behind its first
child (its now much earlier), resulting in a page fault for the
parser to reload this cluster of objects on demand.  This may be
an acceptable loss.  If a user is walking backwards and has already
scanned through more than 4096 commits of history, waiting for the
region to reload isn't really that bad compared to the amount of
time already spent.

If the repository is so small that there are less than 4096 commits,
this change has no impact on the placement of objects.

Change-Id: If3052e430d305e17878d94145c93754f56b74c61
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
This commit is contained in:
Shawn O. Pearce 2011-02-18 17:55:53 -08:00
parent 19037e8cfc
commit 8f865bfffe
3 changed files with 62 additions and 0 deletions

View File

@ -154,6 +154,8 @@ public class PackWriter {
private List<CachedPack> cachedPacks = new ArrayList<CachedPack>(2);
private Set<ObjectId> tagTargets = Collections.emptySet();
private Deflater myDeflater;
private final ObjectReader reader;
@ -330,6 +332,22 @@ public void setIgnoreMissingUninteresting(final boolean ignore) {
ignoreMissingUninteresting = ignore;
}
/**
* Set the tag targets that should be hoisted earlier during packing.
* <p>
* Callers may put objects into this set before invoking any of the
* preparePack methods to influence where an annotated tag's target is
* stored within the resulting pack. Typically these will be clustered
* together, and hoisted earlier in the file even if they are ancient
* revisions, allowing readers to find tag targets with better locality.
*
* @param objects
* objects that annotated tags point at.
*/
public void setTagTargets(Set<ObjectId> objects) {
tagTargets = objects;
}
/**
* Returns objects number in a pack file that was created by this writer.
*
@ -1251,10 +1269,14 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor,
ArrayList<ObjectToPack> list = (ArrayList<ObjectToPack>) objectsLists[Constants.OBJ_COMMIT];
list.ensureCapacity(list.size() + commits.size());
}
int commitCnt = 0;
boolean putTagTargets = false;
for (RevCommit cmit : commits) {
if (!cmit.has(added)) {
cmit.add(added);
addObject(cmit, 0);
commitCnt++;
}
for (int i = 0; i < cmit.getParentCount(); i++) {
@ -1262,8 +1284,23 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor,
if (!p.has(added) && !p.has(RevFlag.UNINTERESTING)) {
p.add(added);
addObject(p, 0);
commitCnt++;
}
}
if (!putTagTargets && 4096 < commitCnt) {
for (ObjectId id : tagTargets) {
RevObject obj = walker.lookupOrNull(id);
if (obj instanceof RevCommit
&& obj.has(include)
&& !obj.has(RevFlag.UNINTERESTING)
&& !obj.has(added)) {
obj.add(added);
addObject(obj, 0);
}
}
putTagTargets = true;
}
}
commits = null;

View File

@ -88,6 +88,8 @@ public class BundleWriter {
private final Set<RevCommit> assume;
private final Set<ObjectId> tagTargets;
private PackConfig packConfig;
/**
@ -100,6 +102,7 @@ public BundleWriter(final Repository repo) {
db = repo;
include = new TreeMap<String, ObjectId>();
assume = new HashSet<RevCommit>();
tagTargets = new HashSet<ObjectId>();
}
/**
@ -143,6 +146,13 @@ public void include(final String name, final AnyObjectId id) {
*/
public void include(final Ref r) {
include(r.getName(), r.getObjectId());
if (r.getPeeledObjectId() != null)
tagTargets.add(r.getPeeledObjectId());
else if (r.getObjectId() != null
&& r.getName().startsWith(Constants.R_HEADS))
tagTargets.add(r.getObjectId());
}
/**
@ -192,6 +202,8 @@ public void writeBundle(ProgressMonitor monitor, OutputStream os)
exc.add(r.getId());
packWriter.setDeltaBaseAsOffset(true);
packWriter.setThin(exc.size() > 0);
if (exc.size() == 0)
packWriter.setTagTargets(tagTargets);
packWriter.preparePack(monitor, inc, exc);
final Writer w = new OutputStreamWriter(os, Constants.CHARSET);

View File

@ -695,6 +695,19 @@ private void sendPack() throws IOException {
pw.setDeltaBaseAsOffset(options.contains(OPTION_OFS_DELTA));
pw.setThin(options.contains(OPTION_THIN_PACK));
if (commonBase.isEmpty()) {
Set<ObjectId> tagTargets = new HashSet<ObjectId>();
for (Ref ref : refs.values()) {
if (ref.getPeeledObjectId() != null)
tagTargets.add(ref.getPeeledObjectId());
else if (ref.getObjectId() == null)
continue;
else if (ref.getName().startsWith(Constants.R_HEADS))
tagTargets.add(ref.getObjectId());
}
pw.setTagTargets(tagTargets);
}
RevWalk rw = walk;
if (wantAll.isEmpty()) {
pw.preparePack(pm, wantIds, commonBase);