PackWriter: Sort commits by parse order to improve locality

RevWalk in JGit and the revision code in C Git both parse commits out
of the pack file in an order that differs from strict timestamp and
topological sorting.  Both implementations pop a commit from the head
of a date queue, and then immediately parse all of its parents in
order to insert those into the date queue at the proper positions as
determined by their committer timestamp field.  This implies that the
parents are parsed when their most recent child is popped from the
queue, and not where they are popped during traversal.

Hoisting a parent commit to be immediately behind its child improves
locality by making sure all parents of a merge are clustered together,
and thus can be paged into the parser by the pack file buffering
system (aka WindowCache in JGit) together.

Change-Id: I80f9e64cafa2e8f082776b43845edf23065386a2
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
This commit is contained in:
Shawn O. Pearce 2011-02-16 17:41:35 -08:00
parent 681739b1c8
commit 733780e8a1
1 changed files with 31 additions and 10 deletions

View File

@ -1116,6 +1116,7 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor,
final Map<ObjectId, CachedPack> tipToPack = new HashMap<ObjectId, CachedPack>();
final RevFlag inCachedPack = walker.newFlag("inCachedPack");
final RevFlag include = walker.newFlag("include");
final RevFlag added = walker.newFlag("added");
final RevFlagSet keepOnRestart = new RevFlagSet();
keepOnRestart.add(inCachedPack);
@ -1177,13 +1178,15 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor,
int typesToPrune = 0;
final int maxBases = config.getDeltaSearchWindowSize();
Set<RevTree> baseTrees = new HashSet<RevTree>();
RevObject o;
while ((o = walker.next()) != null) {
if (o.has(inCachedPack)) {
CachedPack pack = tipToPack.get(o);
List<RevCommit> commits = new ArrayList<RevCommit>();
RevCommit c;
while ((c = walker.next()) != null) {
if (c.has(inCachedPack)) {
CachedPack pack = tipToPack.get(c);
if (includesAllTips(pack, include, walker)) {
useCachedPack(walker, keepOnRestart, //
wantObjs, haveObjs, pack);
commits = new ArrayList<RevCommit>();
countingMonitor.endTask();
countingMonitor.beginTask(JGitText.get().countingObjects,
@ -1192,16 +1195,36 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor,
}
}
if (o.has(RevFlag.UNINTERESTING)) {
if (c.has(RevFlag.UNINTERESTING)) {
if (baseTrees.size() <= maxBases)
baseTrees.add(((RevCommit) o).getTree());
baseTrees.add(c.getTree());
continue;
}
addObject(o, 0);
commits.add(c);
countingMonitor.update(1);
}
if (objectsLists[Constants.OBJ_COMMIT] instanceof ArrayList) {
ArrayList<ObjectToPack> list = (ArrayList<ObjectToPack>) objectsLists[Constants.OBJ_COMMIT];
list.ensureCapacity(list.size() + commits.size());
}
for (RevCommit cmit : commits) {
if (!cmit.has(added)) {
cmit.add(added);
addObject(cmit, 0);
}
for (int i = 0; i < cmit.getParentCount(); i++) {
RevCommit p = cmit.getParent(i);
if (!p.has(added) && !p.has(RevFlag.UNINTERESTING)) {
p.add(added);
addObject(p, 0);
}
}
}
commits = null;
for (CachedPack p : cachedPacks) {
for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_COMMIT])) {
if (baseTrees.size() <= maxBases)
@ -1213,6 +1236,7 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor,
BaseSearch bases = new BaseSearch(countingMonitor, baseTrees, //
objectsMap, edgeObjects, reader);
RevObject o;
while ((o = walker.nextObject()) != null) {
if (o.has(RevFlag.UNINTERESTING))
continue;
@ -1284,9 +1308,6 @@ private void useCachedPack(ObjectWalk walker, RevFlagSet keepOnRestart,
for (ObjectId id : pack.getTips())
baseObj.add(walker.lookupOrNull(id));
objectsMap.clear();
objectsLists[Constants.OBJ_COMMIT] = new ArrayList<ObjectToPack>();
setThin(true);
walker.resetRetain(keepOnRestart);
walker.sort(RevSort.TOPO);