PackWriter: Reduce GC during enumeration

Instead of resizing an ArrayList until all objects have been added,
append objects into a specialized List type that uses small arrays
of 1024 entries for each 1024 objects added.

For a large repository like linux-2.6, PackWriter will now allocate
1,758 smaller arrays to hold the object list, without creating any
garbage from the intermediate states due to list expansion.

1024 was chosen as the block size (and initial directory size) as this
is a reasonable balance for the PackWriter code.  Each block uses
approximately 4096 bytes in a 32 bit JVM, as does the default top
level block directory.  The top level directory doesn't expand until 1
million items have been added to the list, which for linux-2.6 won't
yet occur as the lists are per-object-type and are thus bounded to
about 1/3 of 1.8 million.

Change-Id: If9e4092eb502394c5d3d044b58cf49952772f6d6
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
This commit is contained in:
Shawn O. Pearce 2011-03-07 17:49:08 -08:00
parent 1f57061684
commit f67e5602af
4 changed files with 648 additions and 20 deletions

View File

@ -0,0 +1,336 @@
/*
* Copyright (C) 2011, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.util;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import java.util.Iterator;
import org.junit.Test;
public class BlockListTest {
@Test
public void testEmptyList() {
BlockList<String> empty;
empty = new BlockList<String>();
assertEquals(0, empty.size());
assertTrue(empty.isEmpty());
assertFalse(empty.iterator().hasNext());
empty = new BlockList<String>(0);
assertEquals(0, empty.size());
assertTrue(empty.isEmpty());
assertFalse(empty.iterator().hasNext());
empty = new BlockList<String>(1);
assertEquals(0, empty.size());
assertTrue(empty.isEmpty());
assertFalse(empty.iterator().hasNext());
empty = new BlockList<String>(64);
assertEquals(0, empty.size());
assertTrue(empty.isEmpty());
assertFalse(empty.iterator().hasNext());
}
@Test
public void testGet() {
BlockList<String> list = new BlockList<String>(4);
try {
list.get(-1);
} catch (IndexOutOfBoundsException badIndex) {
assertEquals(String.valueOf(-1), badIndex.getMessage());
}
try {
list.get(0);
} catch (IndexOutOfBoundsException badIndex) {
assertEquals(String.valueOf(0), badIndex.getMessage());
}
try {
list.get(4);
} catch (IndexOutOfBoundsException badIndex) {
assertEquals(String.valueOf(4), badIndex.getMessage());
}
String fooStr = "foo";
String barStr = "bar";
String foobarStr = "foobar";
list.add(fooStr);
list.add(barStr);
list.add(foobarStr);
assertSame(fooStr, list.get(0));
assertSame(barStr, list.get(1));
assertSame(foobarStr, list.get(2));
try {
list.get(3);
} catch (IndexOutOfBoundsException badIndex) {
assertEquals(String.valueOf(3), badIndex.getMessage());
}
}
@Test
public void testSet() {
BlockList<String> list = new BlockList<String>(4);
try {
list.set(-1, "foo");
} catch (IndexOutOfBoundsException badIndex) {
assertEquals(String.valueOf(-1), badIndex.getMessage());
}
try {
list.set(0, "foo");
} catch (IndexOutOfBoundsException badIndex) {
assertEquals(String.valueOf(0), badIndex.getMessage());
}
try {
list.set(4, "foo");
} catch (IndexOutOfBoundsException badIndex) {
assertEquals(String.valueOf(4), badIndex.getMessage());
}
String fooStr = "foo";
String barStr = "bar";
String foobarStr = "foobar";
list.add(fooStr);
list.add(barStr);
list.add(foobarStr);
assertSame(fooStr, list.get(0));
assertSame(barStr, list.get(1));
assertSame(foobarStr, list.get(2));
assertSame(fooStr, list.set(0, barStr));
assertSame(barStr, list.set(1, fooStr));
assertSame(barStr, list.get(0));
assertSame(fooStr, list.get(1));
try {
list.set(3, "bar");
} catch (IndexOutOfBoundsException badIndex) {
assertEquals(String.valueOf(3), badIndex.getMessage());
}
}
@Test
public void testAddToEnd() {
BlockList<Integer> list = new BlockList<Integer>(4);
int cnt = BlockList.BLOCK_SIZE * 3;
for (int i = 0; i < cnt; i++)
list.add(Integer.valueOf(42 + i));
assertEquals(cnt, list.size());
for (int i = 0; i < cnt; i++)
assertEquals(Integer.valueOf(42 + i), list.get(i));
list.clear();
assertEquals(0, list.size());
assertTrue(list.isEmpty());
for (int i = 0; i < cnt; i++)
list.add(i, Integer.valueOf(42 + i));
assertEquals(cnt, list.size());
for (int i = 0; i < cnt; i++)
assertEquals(Integer.valueOf(42 + i), list.get(i));
}
@Test
public void testAddSlowPath() {
BlockList<String> list = new BlockList<String>(4);
String fooStr = "foo";
String barStr = "bar";
String foobarStr = "foobar";
String firstStr = "first";
String zeroStr = "zero";
list.add(fooStr);
list.add(barStr);
list.add(foobarStr);
assertEquals(3, list.size());
list.add(1, firstStr);
assertEquals(4, list.size());
assertSame(fooStr, list.get(0));
assertSame(firstStr, list.get(1));
assertSame(barStr, list.get(2));
assertSame(foobarStr, list.get(3));
list.add(0, zeroStr);
assertEquals(5, list.size());
assertSame(zeroStr, list.get(0));
assertSame(fooStr, list.get(1));
assertSame(firstStr, list.get(2));
assertSame(barStr, list.get(3));
assertSame(foobarStr, list.get(4));
}
@Test
public void testRemoveFromEnd() {
BlockList<String> list = new BlockList<String>(4);
String fooStr = "foo";
String barStr = "bar";
String foobarStr = "foobar";
list.add(fooStr);
list.add(barStr);
list.add(foobarStr);
assertSame(foobarStr, list.remove(2));
assertEquals(2, list.size());
assertSame(barStr, list.remove(1));
assertEquals(1, list.size());
assertSame(fooStr, list.remove(0));
assertEquals(0, list.size());
}
@Test
public void testRemoveSlowPath() {
BlockList<String> list = new BlockList<String>(4);
String fooStr = "foo";
String barStr = "bar";
String foobarStr = "foobar";
list.add(fooStr);
list.add(barStr);
list.add(foobarStr);
assertSame(barStr, list.remove(1));
assertEquals(2, list.size());
assertSame(fooStr, list.get(0));
assertSame(foobarStr, list.get(1));
assertSame(fooStr, list.remove(0));
assertEquals(1, list.size());
assertSame(foobarStr, list.get(0));
assertSame(foobarStr, list.remove(0));
assertEquals(0, list.size());
}
@Test
public void testAddRemoveAdd() {
BlockList<Integer> list = new BlockList<Integer>();
for (int i = 0; i < BlockList.BLOCK_SIZE + 1; i++)
list.add(Integer.valueOf(i));
assertEquals(Integer.valueOf(BlockList.BLOCK_SIZE),
list.remove(list.size() - 1));
assertEquals(Integer.valueOf(BlockList.BLOCK_SIZE - 1),
list.remove(list.size() - 1));
assertTrue(list.add(Integer.valueOf(1)));
assertEquals(Integer.valueOf(1), list.get(list.size() - 1));
}
@Test
public void testFastIterator() {
BlockList<Integer> list = new BlockList<Integer>(4);
int cnt = BlockList.BLOCK_SIZE * 3;
for (int i = 0; i < cnt; i++)
list.add(Integer.valueOf(42 + i));
assertEquals(cnt, list.size());
Iterator<Integer> itr = list.iterator();
for (int i = 0; i < cnt; i++) {
assertTrue(itr.hasNext());
assertEquals(Integer.valueOf(42 + i), itr.next());
}
assertFalse(itr.hasNext());
}
@Test
public void testAddRejectsBadIndexes() {
BlockList<Integer> list = new BlockList<Integer>(4);
list.add(Integer.valueOf(41));
try {
list.add(-1, Integer.valueOf(42));
} catch (IndexOutOfBoundsException badIndex) {
assertEquals(String.valueOf(-1), badIndex.getMessage());
}
try {
list.add(4, Integer.valueOf(42));
} catch (IndexOutOfBoundsException badIndex) {
assertEquals(String.valueOf(4), badIndex.getMessage());
}
}
@Test
public void testRemoveRejectsBadIndexes() {
BlockList<Integer> list = new BlockList<Integer>(4);
list.add(Integer.valueOf(41));
try {
list.remove(-1);
} catch (IndexOutOfBoundsException badIndex) {
assertEquals(String.valueOf(-1), badIndex.getMessage());
}
try {
list.remove(4);
} catch (IndexOutOfBoundsException badIndex) {
assertEquals(String.valueOf(4), badIndex.getMessage());
}
}
}

View File

@ -99,6 +99,7 @@
import org.eclipse.jgit.revwalk.RevTag;
import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.storage.file.PackIndexWriter;
import org.eclipse.jgit.util.BlockList;
import org.eclipse.jgit.util.TemporaryBuffer;
/**
@ -141,16 +142,16 @@ public class PackWriter {
private final List<ObjectToPack> objectsLists[] = new List[Constants.OBJ_TAG + 1];
{
objectsLists[0] = Collections.<ObjectToPack> emptyList();
objectsLists[Constants.OBJ_COMMIT] = new ArrayList<ObjectToPack>();
objectsLists[Constants.OBJ_TREE] = new ArrayList<ObjectToPack>();
objectsLists[Constants.OBJ_BLOB] = new ArrayList<ObjectToPack>();
objectsLists[Constants.OBJ_TAG] = new ArrayList<ObjectToPack>();
objectsLists[Constants.OBJ_COMMIT] = new BlockList<ObjectToPack>();
objectsLists[Constants.OBJ_TREE] = new BlockList<ObjectToPack>();
objectsLists[Constants.OBJ_BLOB] = new BlockList<ObjectToPack>();
objectsLists[Constants.OBJ_TAG] = new BlockList<ObjectToPack>();
}
private final ObjectIdSubclassMap<ObjectToPack> objectsMap = new ObjectIdSubclassMap<ObjectToPack>();
// edge objects for thin packs
private List<ObjectToPack> edgeObjects = new ArrayList<ObjectToPack>();
private List<ObjectToPack> edgeObjects = new BlockList<ObjectToPack>();
private List<CachedPack> cachedPacks = new ArrayList<CachedPack>(2);
@ -586,11 +587,9 @@ private List<ObjectToPack> sortByName() {
int cnt = 0;
for (List<ObjectToPack> list : objectsLists)
cnt += list.size();
sortedByName = new ArrayList<ObjectToPack>(cnt);
for (List<ObjectToPack> list : objectsLists) {
for (ObjectToPack otp : list)
sortedByName.add(otp);
}
sortedByName = new BlockList<ObjectToPack>(cnt);
for (List<ObjectToPack> list : objectsLists)
sortedByName.addAll(list);
Collections.sort(sortedByName);
}
return sortedByName;
@ -1298,7 +1297,7 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor,
int typesToPrune = 0;
final int maxBases = config.getDeltaSearchWindowSize();
Set<RevTree> baseTrees = new HashSet<RevTree>();
List<RevCommit> commits = new ArrayList<RevCommit>();
BlockList<RevCommit> commits = new BlockList<RevCommit>();
RevCommit c;
while ((c = walker.next()) != null) {
if (c.has(inCachedPack)) {
@ -1306,7 +1305,7 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor,
if (includesAllTips(pack, include, walker)) {
useCachedPack(walker, keepOnRestart, //
wantObjs, haveObjs, pack);
commits = new ArrayList<RevCommit>();
commits = new BlockList<RevCommit>();
countingMonitor.endTask();
countingMonitor.beginTask(JGitText.get().countingObjects,
@ -1325,11 +1324,6 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor,
countingMonitor.update(1);
}
if (objectsLists[Constants.OBJ_COMMIT] instanceof ArrayList) {
ArrayList<ObjectToPack> list = (ArrayList<ObjectToPack>) objectsLists[Constants.OBJ_COMMIT];
list.ensureCapacity(list.size() + commits.size());
}
int commitCnt = 0;
boolean putTagTargets = false;
for (RevCommit cmit : commits) {
@ -1436,7 +1430,7 @@ private void pruneObjectList(int typesToPrune, int typeCode) {
}
while (dst < list.size())
list.remove(dst);
list.remove(list.size() - 1);
}
private void useCachedPack(ObjectWalk walker, RevFlagSet keepOnRestart,

View File

@ -76,6 +76,7 @@
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.storage.file.PackLock;
import org.eclipse.jgit.storage.pack.BinaryDelta;
import org.eclipse.jgit.util.BlockList;
import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.NB;
@ -165,7 +166,7 @@ public static enum Source {
private LongMap<UnresolvedDelta> baseByPos;
/** Blobs whose contents need to be double-checked after indexing. */
private List<PackedObjectInfo> deferredCheckBlobs;
private BlockList<PackedObjectInfo> deferredCheckBlobs;
private MessageDigest packDigest;
@ -439,7 +440,7 @@ public PackLock parse(ProgressMonitor receiving, ProgressMonitor resolving)
entries = new PackedObjectInfo[(int) objectCount];
baseById = new ObjectIdSubclassMap<DeltaChain>();
baseByPos = new LongMap<UnresolvedDelta>();
deferredCheckBlobs = new ArrayList<PackedObjectInfo>();
deferredCheckBlobs = new BlockList<PackedObjectInfo>();
receiving.beginTask(JGitText.get().receivingObjects,
(int) objectCount);

View File

@ -0,0 +1,297 @@
/*
* Copyright (C) 2011, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.util;
import java.util.AbstractList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.NoSuchElementException;
/**
* Random access list that allocates entries in blocks.
* <p>
* Unlike {@link java.util.ArrayList}, this type does not need to reallocate the
* internal array in order to expand the capacity of the list. Access to any
* element is constant time, but requires two array lookups instead of one.
* <p>
* To handle common usages, {@link #add(Object)} and {@link #iterator()} use
* internal code paths to amortize out the second array lookup, making addition
* and simple iteration closer to one array operation per element processed.
* <p>
* Similar to {@code ArrayList}, adding or removing from any position except the
* end of the list requires O(N) time to copy all elements between the
* modification point and the end of the list. Applications are strongly
* encouraged to not use this access pattern with this list implementation.
*
* @param <T>
* type of list element.
*/
public class BlockList<T> extends AbstractList<T> {
private static final int BLOCK_BITS = 10;
static final int BLOCK_SIZE = 1 << BLOCK_BITS;
private static final int BLOCK_MASK = BLOCK_SIZE - 1;
private T[][] directory;
private int size;
private int tailDirIdx;
private int tailBlkIdx;
private T[] tailBlock;
/** Initialize an empty list. */
public BlockList() {
directory = BlockList.<T> newDirectory(256);
directory[0] = BlockList.<T> newBlock();
tailBlock = directory[0];
}
/**
* Initialize an empty list with an expected capacity.
*
* @param capacity
* number of elements expected to be in the list.
*/
public BlockList(int capacity) {
int dirSize = toDirectoryIndex(capacity);
if ((capacity & BLOCK_MASK) != 0 || dirSize == 0)
dirSize++;
directory = BlockList.<T> newDirectory(dirSize);
directory[0] = BlockList.<T> newBlock();
tailBlock = directory[0];
}
@Override
public int size() {
return size;
}
@Override
public void clear() {
for (T[] block : directory) {
if (block != null)
Arrays.fill(block, null);
}
size = 0;
tailDirIdx = 0;
tailBlkIdx = 0;
tailBlock = directory[0];
}
@Override
public T get(int index) {
if (index < 0 || size <= index)
throw new IndexOutOfBoundsException(String.valueOf(index));
return directory[toDirectoryIndex(index)][toBlockIndex(index)];
}
@Override
public T set(int index, T element) {
if (index < 0 || size <= index)
throw new IndexOutOfBoundsException(String.valueOf(index));
T[] blockRef = directory[toDirectoryIndex(index)];
int blockIdx = toBlockIndex(index);
T old = blockRef[blockIdx];
blockRef[blockIdx] = element;
return old;
}
@Override
public boolean add(T element) {
int i = tailBlkIdx;
if (i < BLOCK_SIZE) {
// Fast-path: Append to current tail block.
tailBlock[i] = element;
tailBlkIdx = i + 1;
size++;
return true;
}
// Slow path: Move to the next block, expanding if necessary.
if (++tailDirIdx == directory.length) {
T[][] newDir = BlockList.<T> newDirectory(directory.length << 1);
System.arraycopy(directory, 0, newDir, 0, directory.length);
directory = newDir;
}
T[] blockRef = directory[tailDirIdx];
if (blockRef == null) {
blockRef = BlockList.<T> newBlock();
directory[tailDirIdx] = blockRef;
}
blockRef[0] = element;
tailBlock = blockRef;
tailBlkIdx = 1;
size++;
return true;
}
@Override
public void add(int index, T element) {
if (index == size) {
// Fast-path: append onto the end of the list.
add(element);
} else if (index < 0 || size < index) {
throw new IndexOutOfBoundsException(String.valueOf(index));
} else {
// Slow-path: the list needs to expand and insert.
// Do this the naive way, callers shouldn't abuse
// this class by entering this code path.
//
add(null); // expand the list by one
for (int oldIdx = size - 2; index <= oldIdx; oldIdx--)
set(oldIdx + 1, get(oldIdx));
set(index, element);
}
}
@Override
public T remove(int index) {
if (index == size - 1) {
// Fast-path: remove the last element.
T[] blockRef = directory[toDirectoryIndex(index)];
int blockIdx = toBlockIndex(index);
T old = blockRef[blockIdx];
blockRef[blockIdx] = null;
size--;
if (0 < tailBlkIdx)
tailBlkIdx--;
else
resetTailBlock();
return old;
} else if (index < 0 || size <= index) {
throw new IndexOutOfBoundsException(String.valueOf(index));
} else {
// Slow-path: the list needs to contract and remove.
// Do this the naive way, callers shouldn't abuse
// this class by entering this code path.
//
T old = get(index);
for (; index < size - 1; index++)
set(index, get(index + 1));
set(size - 1, null);
size--;
resetTailBlock();
return old;
}
}
private void resetTailBlock() {
tailDirIdx = toDirectoryIndex(size);
tailBlkIdx = toBlockIndex(size);
tailBlock = directory[tailDirIdx];
}
@Override
public Iterator<T> iterator() {
return new MyIterator();
}
private static final int toDirectoryIndex(int index) {
return index >>> BLOCK_BITS;
}
private static final int toBlockIndex(int index) {
return index & BLOCK_MASK;
}
@SuppressWarnings("unchecked")
private static <T> T[][] newDirectory(int size) {
return (T[][]) new Object[size][];
}
@SuppressWarnings("unchecked")
private static <T> T[] newBlock() {
return (T[]) new Object[BLOCK_SIZE];
}
private class MyIterator implements Iterator<T> {
private int index;
private int dirIdx;
private int blkIdx;
private T[] block = directory[dirIdx];
public boolean hasNext() {
return index < size;
}
public T next() {
if (size <= index)
throw new NoSuchElementException();
T res = block[blkIdx];
if (++blkIdx == BLOCK_SIZE) {
if (++dirIdx < directory.length)
block = directory[dirIdx];
else
block = null;
blkIdx = 0;
}
index++;
return res;
}
public void remove() {
if (index == 0)
throw new IllegalStateException();
BlockList.this.remove(--index);
dirIdx = toDirectoryIndex(index);
blkIdx = toBlockIndex(index);
block = directory[dirIdx];
}
}
}