From 941b3d8a8105333ccbc15e3b57574fce6da6f006 Mon Sep 17 00:00:00 2001 From: roberto Date: Thu, 9 Dec 2010 23:40:34 +0000 Subject: [PATCH] IndexPack: Remove blob-streaming size threshold Always use streaming (for SHA-checksum & collision detection) when indexing whole blobs, regardless of their size. Positives: * benefits of bugfix #312868 will apply to all runtimes, without additional conf for mem-constrained JVMs (5MB huge for some) * no byte array allocation (re-uses readBuffer instead of allocating new full-size array) * mildly better overall performance (given the usual blob-does-not-need-collision-checking case) * removes unnecessary code Negative: * doubles the disk IO for a blob comparision (comparitively rare occurance) I perf-tested a range of threshold sizes against a random selection of packfiles I found on my harddrive, the results are here: https://spreadsheets.google.com/ccc?key=tLCQElyyd2RKN9QevfvgwGQ&hl=en_GB#gid=1 My interpretation of the results is that the streaming size threshold isn't beneficial (actually seems to be very slightly detrimental) -so we should just get rid of it. This tallies with some of the comments Shawn & I had for the default value of streamFileThreshold in the review for I862afd4c: http://egit.eclipse.org/r/#patch,sidebyside,2040,2,org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java The perf-test code is here: https://gist.github.com/735402 It's a bit scruffy but basically does 10 runs (in randomised order) for each threshold size on various packfiles, waiting a second between each pack-indexing to allow GC to catch up. I know it's not perfect - proper perf testing is hard to do :-) --- .../tst/org/eclipse/jgit/transport/IndexPackTest.java | 1 - .../src/org/eclipse/jgit/transport/IndexPack.java | 9 +-------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/IndexPackTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/IndexPackTest.java index 6015428d9..65929059a 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/IndexPackTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/IndexPackTest.java @@ -166,7 +166,6 @@ public void testPackWithDuplicateBlob() throws Exception { final byte[] raw = pack.toByteArray(); IndexPack ip = IndexPack.create(db, new ByteArrayInputStream(raw)); - ip.setStreamFileThreshold(1); ip.index(NullProgressMonitor.INSTANCE); ip.renameAndOpenPack(); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java index 8feb483fe..bb9edc266 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java @@ -145,8 +145,6 @@ private static enum Source { private final Repository repo; - private int streamFileThreshold; - /** * Object database used for loading existing objects */ @@ -244,7 +242,6 @@ private static enum Source { public IndexPack(final Repository db, final InputStream src, final File dstBase) throws IOException { repo = db; - streamFileThreshold = 5 * (1 << 20); // A reasonable default for now. objectDatabase = db.getObjectDatabase().newCachedDatabase(); in = src; inflater = new InflaterStream(); @@ -268,10 +265,6 @@ public IndexPack(final Repository db, final InputStream src, } } - void setStreamFileThreshold(int sz) { - streamFileThreshold = sz; - } - /** * Set the pack index file format version this instance will create. * @@ -860,7 +853,7 @@ private void whole(final int type, final long pos, final long sz) objectDigest.update((byte) 0); boolean checkContentLater = false; - if (type == Constants.OBJ_BLOB && sz >= streamFileThreshold) { + if (type == Constants.OBJ_BLOB) { InputStream inf = inflate(Source.INPUT, sz); long cnt = 0; while (cnt < sz) {