Speed up ObjectWalk by 6235 objects/sec

The "Counting objects" phase of packing is the most time consuming
part for any server providing access to Git repositories. Scanning
through the entire project history, including every revision of
every tree that has ever existed is expensive and takes an incredible
amount of CPU time.

Inline the tree parsing logic, unroll a number of loops, and setup
to better handle the common case of seeing another occurrence of
an object that was already marked SEEN.

This change boosts the "Counting objects" phase when JGit is acting
as a server and is packing the linux-2.6 repository for its client.
Compared to CGit on the same hardware, a JGit daemon server is now
21883 objects/sec faster:

CGit:
  Counted 2058062 objects in 38981 ms at 52796.54 objects/sec
  Counted 2058062 objects in 38920 ms at 52879.29 objects/sec
  Counted 2058062 objects in 39059 ms at 52691.11 objects/sec

JGit (before):
  Counted 2058062 objects in 31529 ms at 65275.21 objects/sec
  Counted 2058062 objects in 30359 ms at 67790.84 objects/sec
  Counted 2058062 objects in 30033 ms at 68526.69 objects/sec

JGit (this commit):
  Counted 2058062 objects in 28726 ms at 71644.57 objects/sec
  Counted 2058062 objects in 27652 ms at 74427.24 objects/sec
  Counted 2058062 objects in 27528 ms at 74762.50 objects/sec

Above the first run was a "cold server". For JGit the JVM had just
started up with `jgit daemon`, and for CGit we hadn't touched the
repository "recently" (but it was certainly in kernel buffer cache).
The second and third runs were against the running JGit JVM, allowing
timing tests to better reflect the benefits of JGit's pack and index
caching, as well as any optimizations the JIT may have performed.

The timings are fair.  CGit is opening, checking and mmap'ing both
the pack and index during the timer.  JGit is opening, checking
and malloc+read'ing the pack and index data into its Java heap
during the timer. Both processes are walking the same graph space,
and are computing the "path hash" necessary to sort objects in the
object table for delta compression.  Since this commit only impacts
the "Counting objects" phase, delta compression was obviously not
included in the timings and JGit may still be performing delta
compression slower than CGit, resulting in an overall slower server
experience for clients.

Change-Id: Ieb184bfaed8475d6960a494b1f3c870e0382164a
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
This commit is contained in:
Shawn O. Pearce 2011-08-08 15:11:54 -07:00
parent 82e735c3aa
commit 53db854185
2 changed files with 372 additions and 117 deletions

View File

@ -43,6 +43,10 @@
package org.eclipse.jgit.revwalk;
import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
@ -51,13 +55,12 @@
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.treewalk.CanonicalTreeParser;
import org.eclipse.jgit.util.RawParseUtils;
/**
* Specialized subclass of RevWalk to include trees, blobs and tags.
@ -76,6 +79,13 @@
* commits that are returned first.
*/
public class ObjectWalk extends RevWalk {
private static final int ID_SZ = 20;
private static final int TYPE_SHIFT = 12;
private static final int TYPE_TREE = 0040000 >>> TYPE_SHIFT;
private static final int TYPE_SYMLINK = 0120000 >>> TYPE_SHIFT;
private static final int TYPE_FILE = 0100000 >>> TYPE_SHIFT;
private static final int TYPE_GITLINK = 0160000 >>> TYPE_SHIFT;
/**
* Indicates a non-RevCommit is in {@link #pendingObjects}.
* <p>
@ -85,22 +95,24 @@ public class ObjectWalk extends RevWalk {
*/
private static final int IN_PENDING = RevWalk.REWRITE;
private static final byte[] EMPTY_PATH = {};
private CanonicalTreeParser treeWalk;
private List<RevObject> rootObjects;
private BlockObjQueue pendingObjects;
private RevTree currentTree;
private RevObject last;
private RevCommit firstCommit;
private RevCommit lastCommit;
private TreeVisit freeVisit;
private TreeVisit currVisit;
private byte[] pathBuf;
private int pathLen;
private boolean boundary;
/**
* Create a new revision and object walker for a given repository.
*
@ -123,7 +135,7 @@ public ObjectWalk(ObjectReader or) {
super(or);
rootObjects = new ArrayList<RevObject>();
pendingObjects = new BlockObjQueue();
treeWalk = new CanonicalTreeParser();
pathBuf = new byte[256];
}
/**
@ -213,7 +225,7 @@ public void markUninteresting(RevObject o) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
while (o instanceof RevTag) {
o.flags |= UNINTERESTING;
if (hasRevSort(RevSort.BOUNDARY))
if (boundary)
addObject(o);
o = ((RevTag) o).getObject();
parseHeaders(o);
@ -226,9 +238,20 @@ else if (o instanceof RevTree)
else
o.flags |= UNINTERESTING;
if (o.getType() != Constants.OBJ_COMMIT && hasRevSort(RevSort.BOUNDARY)) {
if (o.getType() != OBJ_COMMIT && boundary)
addObject(o);
}
}
@Override
public void sort(RevSort s) {
super.sort(s);
boundary = hasRevSort(RevSort.BOUNDARY);
}
@Override
public void sort(RevSort s, boolean use) {
super.sort(s, use);
boundary = hasRevSort(RevSort.BOUNDARY);
}
@Override
@ -236,11 +259,14 @@ public RevCommit next() throws MissingObjectException,
IncorrectObjectTypeException, IOException {
for (;;) {
final RevCommit r = super.next();
if (r == null)
if (r == null) {
if (firstCommit != null)
reader.walkAdviceBeginTrees(this, firstCommit, lastCommit);
return null;
}
if ((r.flags & UNINTERESTING) != 0) {
markTreeUninteresting(r.getTree());
if (hasRevSort(RevSort.BOUNDARY))
if (boundary)
return r;
continue;
}
@ -268,85 +294,180 @@ public RevCommit next() throws MissingObjectException,
*/
public RevObject nextObject() throws MissingObjectException,
IncorrectObjectTypeException, IOException {
if (last != null)
treeWalk = last instanceof RevTree ? enter(last) : treeWalk.next();
pathLen = 0;
while (!treeWalk.eof()) {
final FileMode mode = treeWalk.getEntryFileMode();
switch (mode.getObjectType()) {
case Constants.OBJ_BLOB: {
treeWalk.getEntryObjectId(idBuffer);
final RevBlob o = lookupBlob(idBuffer);
if ((o.flags & SEEN) != 0)
break;
o.flags |= SEEN;
if (shouldSkipObject(o))
break;
last = o;
return o;
}
case Constants.OBJ_TREE: {
treeWalk.getEntryObjectId(idBuffer);
final RevTree o = lookupTree(idBuffer);
if ((o.flags & SEEN) != 0)
break;
o.flags |= SEEN;
if (shouldSkipObject(o))
break;
last = o;
return o;
}
default:
if (FileMode.GITLINK.equals(mode))
break;
treeWalk.getEntryObjectId(idBuffer);
throw new CorruptObjectException(MessageFormat.format(JGitText.get().corruptObjectInvalidMode3
, mode , idBuffer.name() , treeWalk.getEntryPathString() , currentTree.name()));
TreeVisit tv = currVisit;
while (tv != null) {
byte[] buf = tv.buf;
for (int ptr = tv.ptr; ptr < buf.length;) {
int startPtr = ptr;
ptr = findObjectId(buf, ptr);
idBuffer.fromRaw(buf, ptr);
ptr += ID_SZ;
RevObject obj = objects.get(idBuffer);
if (obj != null && (obj.flags & SEEN) != 0)
continue;
int mode = parseMode(buf, startPtr, ptr, tv);
int flags;
switch (mode >>> TYPE_SHIFT) {
case TYPE_FILE:
case TYPE_SYMLINK:
if (obj == null) {
obj = new RevBlob(idBuffer);
obj.flags = SEEN;
objects.add(obj);
return obj;
}
if (!(obj instanceof RevBlob))
throw new IncorrectObjectTypeException(obj, OBJ_BLOB);
obj.flags = flags = obj.flags | SEEN;
if ((flags & UNINTERESTING) == 0)
return obj;
if (boundary)
return obj;
continue;
case TYPE_TREE:
if (obj == null) {
obj = new RevTree(idBuffer);
obj.flags = SEEN;
objects.add(obj);
return enterTree(obj);
}
if (!(obj instanceof RevTree))
throw new IncorrectObjectTypeException(obj, OBJ_TREE);
obj.flags = flags = obj.flags | SEEN;
if ((flags & UNINTERESTING) == 0)
return enterTree(obj);
if (boundary)
return enterTree(obj);
continue;
case TYPE_GITLINK:
continue;
default:
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().corruptObjectInvalidMode3,
String.format("%o", mode), idBuffer.name(),
RawParseUtils.decode(buf, tv.namePtr, tv.nameEnd),
tv.obj));
}
}
treeWalk = treeWalk.next();
currVisit = tv.parent;
releaseTreeVisit(tv);
tv = currVisit;
}
if (firstCommit != null) {
reader.walkAdviceBeginTrees(this, firstCommit, lastCommit);
firstCommit = null;
lastCommit = null;
}
last = null;
for (;;) {
final RevObject o = pendingObjects.next();
RevObject o = pendingObjects.next();
if (o == null) {
reader.walkAdviceEnd();
return null;
}
if ((o.flags & SEEN) != 0)
int flags = o.flags;
if ((flags & SEEN) != 0)
continue;
o.flags |= SEEN;
if (shouldSkipObject(o))
continue;
if (o instanceof RevTree) {
currentTree = (RevTree) o;
treeWalk = treeWalk.resetRoot(reader, currentTree);
flags |= SEEN;
o.flags = flags;
if ((flags & UNINTERESTING) == 0 | boundary) {
if (o instanceof RevTree) {
tv = newTreeVisit(o);
tv.parent = null;
currVisit = tv;
}
return o;
}
return o;
}
}
private CanonicalTreeParser enter(RevObject tree) throws IOException {
CanonicalTreeParser p = treeWalk.createSubtreeIterator0(reader, tree);
if (p.eof()) {
// We can't tolerate the subtree being an empty tree, as
// that will break us out early before we visit all names.
// If it is, advance to the parent's next record.
//
return treeWalk.next();
}
return p;
private RevObject enterTree(RevObject obj) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
TreeVisit tv = newTreeVisit(obj);
tv.parent = currVisit;
currVisit = tv;
return obj;
}
private final boolean shouldSkipObject(final RevObject o) {
return (o.flags & UNINTERESTING) != 0 && !hasRevSort(RevSort.BOUNDARY);
private static int findObjectId(byte[] buf, int ptr) {
// Skip over the mode and name until the NUL before the ObjectId
// can be located. Skip the NUL as the function returns.
for (;;) {
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
}
}
private static int parseMode(byte[] buf, int startPtr, int recEndPtr, TreeVisit tv) {
int mode = buf[startPtr] - '0';
for (;;) {
byte c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
}
tv.ptr = recEndPtr;
tv.namePtr = startPtr + 1;
tv.nameEnd = recEndPtr - (ID_SZ + 1);
return mode;
}
/**
@ -383,7 +504,7 @@ public void checkConnectivity() throws MissingObjectException,
if (o == null)
break;
if (o instanceof RevBlob && !reader.has(o))
throw new MissingObjectException(o, Constants.TYPE_BLOB);
throw new MissingObjectException(o, OBJ_BLOB);
}
}
@ -401,7 +522,12 @@ public void checkConnectivity() throws MissingObjectException,
* has no path, such as for annotated tags or root level trees.
*/
public String getPathString() {
return last != null ? treeWalk.getEntryPathString() : null;
if (pathLen == 0) {
pathLen = updatePathBuf(currVisit);
if (pathLen == 0)
return null;
}
return RawParseUtils.decode(pathBuf, 0, pathLen);
}
/**
@ -413,28 +539,104 @@ public String getPathString() {
* @return path hash code; any integer may be returned.
*/
public int getPathHashCode() {
return last != null ? treeWalk.getEntryPathHashCode() : 0;
TreeVisit tv = currVisit;
if (tv == null)
return 0;
int nameEnd = tv.nameEnd;
if (nameEnd == 0) {
// When nameEnd == 0 the subtree is itself the current path
// being visited. The name hash must be obtained from its
// parent tree. If there is no parent, this is a root tree with
// a hash code of 0.
tv = tv.parent;
if (tv == null)
return 0;
nameEnd = tv.nameEnd;
}
byte[] buf;
int ptr;
if (16 <= (nameEnd - tv.namePtr)) {
buf = tv.buf;
ptr = nameEnd - 16;
} else {
nameEnd = pathLen;
if (nameEnd == 0) {
nameEnd = updatePathBuf(currVisit);
pathLen = nameEnd;
}
buf = pathBuf;
ptr = Math.max(0, nameEnd - 16);
}
int hash = 0;
for (; ptr < nameEnd; ptr++) {
byte c = buf[ptr];
if (c != ' ')
hash = (hash >>> 2) + (c << 24);
}
return hash;
}
/** @return the internal buffer holding the current path. */
public byte[] getPathBuffer() {
return last != null ? treeWalk.getEntryPathBuffer() : EMPTY_PATH;
if (pathLen == 0)
pathLen = updatePathBuf(currVisit);
return pathBuf;
}
/** @return length of the path in {@link #getPathBuffer()}. */
public int getPathLength() {
return last != null ? treeWalk.getEntryPathLength() : 0;
if (pathLen == 0)
pathLen = updatePathBuf(currVisit);
return pathLen;
}
private int updatePathBuf(TreeVisit tv) {
if (tv == null)
return 0;
// If nameEnd == 0 this tree has not yet contributed an entry.
// Update only for the parent, which if null will be empty.
int nameEnd = tv.nameEnd;
if (nameEnd == 0)
return updatePathBuf(tv.parent);
int ptr = tv.pathLen;
if (ptr == 0) {
ptr = updatePathBuf(tv.parent);
if (ptr == pathBuf.length)
growPathBuf(ptr);
if (ptr != 0)
pathBuf[ptr++] = '/';
tv.pathLen = ptr;
}
int namePtr = tv.namePtr;
int nameLen = nameEnd - namePtr;
int end = ptr + nameLen;
while (pathBuf.length < end)
growPathBuf(ptr);
System.arraycopy(tv.buf, namePtr, pathBuf, ptr, nameLen);
return end;
}
private void growPathBuf(int ptr) {
byte[] newBuf = new byte[pathBuf.length << 1];
System.arraycopy(pathBuf, 0, newBuf, 0, ptr);
pathBuf = newBuf;
}
@Override
public void dispose() {
super.dispose();
pendingObjects = new BlockObjQueue();
treeWalk = new CanonicalTreeParser();
currentTree = null;
last = null;
firstCommit = null;
lastCommit = null;
currVisit = null;
freeVisit = null;
}
@Override
@ -446,11 +648,10 @@ protected void reset(final int retainFlags) {
rootObjects = new ArrayList<RevObject>();
pendingObjects = new BlockObjQueue();
treeWalk = new CanonicalTreeParser();
currentTree = null;
last = null;
firstCommit = null;
lastCommit = null;
currVisit = null;
freeVisit = null;
}
private void addObject(final RevObject o) {
@ -468,36 +669,90 @@ private void markTreeUninteresting(final RevTree tree)
return;
tree.flags |= UNINTERESTING;
treeWalk = treeWalk.resetRoot(reader, tree);
while (!treeWalk.eof()) {
final FileMode mode = treeWalk.getEntryFileMode();
final int sType = mode.getObjectType();
byte[] raw = reader.open(tree, OBJ_TREE).getCachedBytes();
for (int ptr = 0; ptr < raw.length;) {
byte c = raw[ptr];
int mode = c - '0';
for (;;) {
c = raw[++ptr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
}
while (raw[++ptr] != 0) {
// Skip entry name.
}
ptr++; // Skip NUL after entry name.
switch (sType) {
case Constants.OBJ_BLOB: {
treeWalk.getEntryObjectId(idBuffer);
switch (mode >>> TYPE_SHIFT) {
case TYPE_FILE:
case TYPE_SYMLINK:
idBuffer.fromRaw(raw, ptr);
lookupBlob(idBuffer).flags |= UNINTERESTING;
break;
}
case Constants.OBJ_TREE: {
treeWalk.getEntryObjectId(idBuffer);
final RevTree t = lookupTree(idBuffer);
if ((t.flags & UNINTERESTING) == 0) {
t.flags |= UNINTERESTING;
treeWalk = treeWalk.createSubtreeIterator0(reader, t);
continue;
}
break;
}
default:
if (FileMode.GITLINK.equals(mode))
break;
treeWalk.getEntryObjectId(idBuffer);
throw new CorruptObjectException(MessageFormat.format(JGitText.get().corruptObjectInvalidMode3
, mode , idBuffer.name() , treeWalk.getEntryPathString() , tree));
}
treeWalk = treeWalk.next();
case TYPE_TREE:
idBuffer.fromRaw(raw, ptr);
markTreeUninteresting(lookupTree(idBuffer));
break;
case TYPE_GITLINK:
break;
default:
idBuffer.fromRaw(raw, ptr);
throw new CorruptObjectException(MessageFormat.format(JGitText
.get().corruptObjectInvalidMode3, String.format("%o",
mode), idBuffer.name(), "", tree));
}
ptr += ID_SZ;
}
}
private TreeVisit newTreeVisit(RevObject obj) throws LargeObjectException,
MissingObjectException, IncorrectObjectTypeException, IOException {
TreeVisit tv = freeVisit;
if (tv != null) {
freeVisit = tv.parent;
tv.ptr = 0;
tv.namePtr = 0;
tv.nameEnd = 0;
tv.pathLen = 0;
} else {
tv = new TreeVisit();
}
tv.obj = obj;
tv.buf = reader.open(obj, OBJ_TREE).getCachedBytes();
return tv;
}
private void releaseTreeVisit(TreeVisit tv) {
tv.buf = null;
tv.parent = freeVisit;
freeVisit = tv;
}
private static class TreeVisit {
/** Parent tree visit that entered this tree, null if root tree. */
TreeVisit parent;
/** The RevTree currently being iterated through. */
RevObject obj;
/** Canonical encoding of the tree named by {@link #obj}. */
byte[] buf;
/** Index of next entry to parse in {@link #buf}. */
int ptr;
/** Start of the current name entry in {@link #buf}. */
int namePtr;
/** One past end of name, {@code nameEnd - namePtr} is the length. */
int nameEnd;
/** Number of bytes in the path leading up to this tree. */
int pathLen;
}
}

View File

@ -170,7 +170,7 @@ public class RevWalk implements Iterable<RevCommit> {
final MutableObjectId idBuffer;
private ObjectIdOwnerMap<RevObject> objects;
ObjectIdOwnerMap<RevObject> objects;
private int freeFlags = APP_FLAGS;