DirCacheBuilder: Speed up reading from trees

Recursively copying a tree into a DirCache is a bottleneck for some algorithms like the in memory merge code in Gerrit Code Review. Drop a layer down in the stack and use CanonicalTreeParser directly as the addition logic only processes 1 tree at a time and does not need the merge sorting feature (or overhead) of TreeWalk. Combined with 761814fe9c ("DirCacheEntry: Speed up creation by avoiding string cast") tree loading 38,900 entries nearly halves in running time from 70ms to 36ms on some platforms. Change-Id: If1490ca25de0679a71cf508f59b486f9cc816165
2015-11-28 09:23:59 -08:00 · 2015-11-28 09:23:59 -08:00 · 2d011cd648
parent b0eb744604
commit 2d011cd648
2 changed files with 53 additions and 19 deletions
--- a/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheBuilder.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheBuilder.java
@ -44,6 +44,9 @@

 package org.eclipse.jgit.dircache;

+import static org.eclipse.jgit.lib.FileMode.TYPE_MASK;
+import static org.eclipse.jgit.lib.FileMode.TYPE_TREE;
+
 import java.io.IOException;
 import java.text.MessageFormat;
 import java.util.Arrays;
@ -51,9 +54,7 @@
 import org.eclipse.jgit.internal.JGitText;
 import org.eclipse.jgit.lib.AnyObjectId;
 import org.eclipse.jgit.lib.ObjectReader;
-import org.eclipse.jgit.treewalk.AbstractTreeIterator;
 import org.eclipse.jgit.treewalk.CanonicalTreeParser;
-import org.eclipse.jgit.treewalk.TreeWalk;

 /**
 * Updates a {@link DirCache} by adding individual {@link DirCacheEntry}s.
@ -163,27 +164,56 @@ public void keep(final int pos, int cnt) {
 	 * @throws IOException
 	 *             a tree cannot be read to iterate through its entries.
 	 */
-	public void addTree(final byte[] pathPrefix, final int stage,
-			final ObjectReader reader, final AnyObjectId tree) throws IOException {
-		final TreeWalk tw = new TreeWalk(reader);
-		tw.addTree(new CanonicalTreeParser(pathPrefix, reader, tree
-				.toObjectId()));
-		tw.setRecursive(true);
-		if (tw.next()) {
-			final DirCacheEntry newEntry = toEntry(stage, tw);
-			beforeAdd(newEntry);
-			fastAdd(newEntry);
-			while (tw.next())
-				fastAdd(toEntry(stage, tw));
+	public void addTree(byte[] pathPrefix, int stage, ObjectReader reader,
+			AnyObjectId tree) throws IOException {
+		CanonicalTreeParser p = createTreeParser(pathPrefix, reader, tree);
+		while (!p.eof()) {
+			if (isTree(p)) {
+				p = enterTree(p, reader);
+				continue;
+			}
+
+			DirCacheEntry first = toEntry(stage, p);
+			beforeAdd(first);
+			fastAdd(first);
+			p = p.next();
+			break;
+		}
+
+		// Rest of tree entries are correctly sorted; use fastAdd().
+		while (!p.eof()) {
+			if (isTree(p)) {
+				p = enterTree(p, reader);
+			} else {
+				fastAdd(toEntry(stage, p));
+				p = p.next();
+			}
 		}
 	}

-	private DirCacheEntry toEntry(final int stage, final TreeWalk tw) {
-		final DirCacheEntry e = new DirCacheEntry(tw.getRawPath(), stage);
-		final AbstractTreeIterator i;
+	private static CanonicalTreeParser createTreeParser(byte[] pathPrefix,
+			ObjectReader reader, AnyObjectId tree) throws IOException {
+		return new CanonicalTreeParser(pathPrefix, reader, tree);
+	}

-		i = tw.getTree(0, AbstractTreeIterator.class);
-		e.setFileMode(tw.getFileMode(0));
+	private static boolean isTree(CanonicalTreeParser p) {
+		return (p.getEntryRawMode() & TYPE_MASK) == TYPE_TREE;
+	}
+
+	private static CanonicalTreeParser enterTree(CanonicalTreeParser p,
+			ObjectReader reader) throws IOException {
+		p = p.createSubtreeIterator(reader);
+		return p.eof() ? p.next() : p;
+	}
+
+	private static DirCacheEntry toEntry(int stage, CanonicalTreeParser i) {
+		byte[] buf = i.getEntryPathBuffer();
+		int len = i.getEntryPathLength();
+		byte[] path = new byte[len];
+		System.arraycopy(buf, 0, path, 0, len);
+
+		DirCacheEntry e = new DirCacheEntry(path, stage);
+		e.setFileMode(i.getEntryRawMode());
 		e.setObjectIdFromRaw(i.idBuffer(), i.idOffset());
 		return e;
 	}
--- a/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java
@ -505,6 +505,10 @@ public void setFileMode(final FileMode mode) {
 		NB.encodeInt32(info, infoOffset + P_MODE, mode.getBits());
 	}

+	void setFileMode(int mode) {
+		NB.encodeInt32(info, infoOffset + P_MODE, mode);
+	}
+
 	/**
 	 * Get the cached creation time of this file, in milliseconds.
 	 *