PackWriter: shed memory while creating bitmaps

Once bitmap creation begins the internal maps required for packing are
no longer necessary.  On a repository with 5.2M objects this can save
more than 438 MiB of memory by allowing the ObjectToPack instances to
get garbage collected away.

Downside is the PackWriter cannot be used for any further opertions
except to write the bitmap index.  This is an acceptable trade-off as
in practice nobody uses the PackWriter after the bitmaps are built.

Change-Id: Ibfaf84b22fa0590896a398ff659a91fcf03d7128
This commit is contained in:
Shawn Pearce 2015-08-12 22:58:26 -07:00
parent 20d9ab00ae
commit 4bb523475d
1 changed files with 53 additions and 41 deletions

View File

@ -136,12 +136,14 @@
* order of objects in pack</li> * order of objects in pack</li>
* </ul> * </ul>
* <p> * <p>
* Typical usage consists of creating instance intended for some pack, * Typical usage consists of creating an instance, configuring options,
* configuring options, preparing the list of objects by calling * preparing the list of objects by calling {@link #preparePack(Iterator)} or
* {@link #preparePack(Iterator)} or * {@link #preparePack(ProgressMonitor, Set, Set)}, and streaming with
* {@link #preparePack(ProgressMonitor, Set, Set)}, and finally producing the * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}. If the
* stream with * pack is being stored as a file the matching index can be written out after
* {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}. * writing the pack by {@link #writeIndex(OutputStream)}. An optional bitmap
* index can be made by calling {@link #prepareBitmapIndex(ProgressMonitor)}
* followed by {@link #writeBitmapIndex(OutputStream)}.
* </p> * </p>
* <p> * <p>
* Class provide set of configurable options and {@link ProgressMonitor} * Class provide set of configurable options and {@link ProgressMonitor}
@ -150,9 +152,10 @@
* relies only on deltas and objects reuse. * relies only on deltas and objects reuse.
* </p> * </p>
* <p> * <p>
* This class is not thread safe, it is intended to be used in one thread, with * This class is not thread safe. It is intended to be used in one thread as a
* one instance per created pack. Subsequent calls to writePack result in * single pass to produce one pack. Invoking methods multiple times or out of
* undefined behavior. * order is not supported as internal data structures are destroyed during
* certain phases to save memory when packing large repositories.
* </p> * </p>
*/ */
public class PackWriter implements AutoCloseable { public class PackWriter implements AutoCloseable {
@ -215,7 +218,7 @@ public static Iterable<PackWriter> getInstances() {
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
private final BlockList<ObjectToPack> objectsLists[] = new BlockList[OBJ_TAG + 1]; private BlockList<ObjectToPack> objectsLists[] = new BlockList[OBJ_TAG + 1];
{ {
objectsLists[OBJ_COMMIT] = new BlockList<ObjectToPack>(); objectsLists[OBJ_COMMIT] = new BlockList<ObjectToPack>();
objectsLists[OBJ_TREE] = new BlockList<ObjectToPack>(); objectsLists[OBJ_TREE] = new BlockList<ObjectToPack>();
@ -223,7 +226,7 @@ public static Iterable<PackWriter> getInstances() {
objectsLists[OBJ_TAG] = new BlockList<ObjectToPack>(); objectsLists[OBJ_TAG] = new BlockList<ObjectToPack>();
} }
private final ObjectIdOwnerMap<ObjectToPack> objectsMap = new ObjectIdOwnerMap<ObjectToPack>(); private ObjectIdOwnerMap<ObjectToPack> objectsMap = new ObjectIdOwnerMap<ObjectToPack>();
// edge objects for thin packs // edge objects for thin packs
private List<ObjectToPack> edgeObjects = new BlockList<ObjectToPack>(); private List<ObjectToPack> edgeObjects = new BlockList<ObjectToPack>();
@ -818,10 +821,11 @@ public int getIndexVersion() {
/** /**
* Create an index file to match the pack file just written. * Create an index file to match the pack file just written.
* <p> * <p>
* This method can only be invoked after * Called after
* {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}.
* been invoked and completed successfully. Writing a corresponding index is * <p>
* an optional feature that not all pack users may require. * Writing an index is only required for local pack storage. Packs sent on
* the network do not need to create an index.
* *
* @param indexStream * @param indexStream
* output for the index data. Caller is responsible for closing * output for the index data. Caller is responsible for closing
@ -843,10 +847,7 @@ public void writeIndex(final OutputStream indexStream) throws IOException {
/** /**
* Create a bitmap index file to match the pack file just written. * Create a bitmap index file to match the pack file just written.
* <p> * <p>
* This method can only be invoked after * Called after {@link #prepareBitmapIndex(ProgressMonitor)}.
* {@link #prepareBitmapIndex(ProgressMonitor)} has been invoked and
* completed successfully. Writing a corresponding bitmap index is an
* optional feature that not all pack users may require.
* *
* @param bitmapIndexStream * @param bitmapIndexStream
* output for the bitmap index data. Caller is responsible for * output for the bitmap index data. Caller is responsible for
@ -920,14 +921,13 @@ private void endPhase(ProgressMonitor monitor) {
/** /**
* Write the prepared pack to the supplied stream. * Write the prepared pack to the supplied stream.
* <p> * <p>
* At first, this method collects and sorts objects to pack, then deltas * Called after {@link #preparePack(ProgressMonitor, ObjectWalk, Set, Set)}
* search is performed if set up accordingly, finally pack stream is * or {@link #preparePack(ProgressMonitor, Set, Set)}.
* written. * <p>
* </p> * Performs delta search if enabled and writes the pack stream.
* <p> * <p>
* All reused objects data checksum (Adler32/CRC32) is computed and * All reused objects data checksum (Adler32/CRC32) is computed and
* validated against existing checksum. * validated against existing checksum.
* </p>
* *
* @param compressMonitor * @param compressMonitor
* progress monitor to report object compression work. * progress monitor to report object compression work.
@ -941,8 +941,8 @@ private void endPhase(ProgressMonitor monitor) {
* the pack, or writing compressed object data to the output * the pack, or writing compressed object data to the output
* stream. * stream.
* @throws WriteAbortedException * @throws WriteAbortedException
* the write operation is aborted by * the write operation is aborted by {@link ObjectCountCallback}
* {@link ObjectCountCallback}. * .
*/ */
public void writePack(ProgressMonitor compressMonitor, public void writePack(ProgressMonitor compressMonitor,
ProgressMonitor writeMonitor, OutputStream packStream) ProgressMonitor writeMonitor, OutputStream packStream)
@ -1972,14 +1972,17 @@ private final boolean have(ObjectToPack ptr, AnyObjectId objectId) {
} }
/** /**
* Prepares the bitmaps to be written to the pack index. Bitmaps can be used * Prepares the bitmaps to be written to the bitmap index file.
* to speed up fetches and clones by storing the entire object graph at * <p>
* selected commits. * Bitmaps can be used to speed up fetches and clones by storing the entire
* * object graph at selected commits. Writing a bitmap index is an optional
* This method can only be invoked after * feature that not all pack users may require.
* {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has * <p>
* been invoked and completed successfully. Writing a corresponding bitmap * Called after {@link #writeIndex(OutputStream)}.
* index is an optional feature that not all pack users may require. * <p>
* To reduce memory internal state is cleared during this method, rendering
* the PackWriter instance useless for anything further than a call to write
* out the new bitmaps with {@link #writeBitmapIndex(OutputStream)}.
* *
* @param pm * @param pm
* progress monitor to report bitmap building work. * progress monitor to report bitmap building work.
@ -1995,11 +1998,17 @@ public boolean prepareBitmapIndex(ProgressMonitor pm) throws IOException {
if (pm == null) if (pm == null)
pm = NullProgressMonitor.INSTANCE; pm = NullProgressMonitor.INSTANCE;
writeBitmaps = new PackBitmapIndexBuilder(sortByName()); int numCommits = objectsLists[OBJ_COMMIT].size();
List<ObjectToPack> byName = sortByName();
sortedByName = null;
objectsLists = null;
objectsMap = null;
writeBitmaps = new PackBitmapIndexBuilder(byName);
byName = null;
PackWriterBitmapPreparer bitmapPreparer = new PackWriterBitmapPreparer( PackWriterBitmapPreparer bitmapPreparer = new PackWriterBitmapPreparer(
reader, writeBitmaps, pm, stats.interestingObjects); reader, writeBitmaps, pm, stats.interestingObjects);
int numCommits = objectsLists[OBJ_COMMIT].size();
Collection<PackWriterBitmapPreparer.BitmapCommit> selectedCommits = Collection<PackWriterBitmapPreparer.BitmapCommit> selectedCommits =
bitmapPreparer.doCommitSelection(numCommits); bitmapPreparer.doCommitSelection(numCommits);
@ -2356,11 +2365,14 @@ private class MutableState {
State snapshot() { State snapshot() {
long objCnt = 0; long objCnt = 0;
objCnt += objectsLists[OBJ_COMMIT].size(); BlockList<ObjectToPack>[] lists = objectsLists;
objCnt += objectsLists[OBJ_TREE].size(); if (lists != null) {
objCnt += objectsLists[OBJ_BLOB].size(); objCnt += lists[OBJ_COMMIT].size();
objCnt += objectsLists[OBJ_TAG].size(); objCnt += lists[OBJ_TREE].size();
// Exclude CachedPacks. objCnt += lists[OBJ_BLOB].size();
objCnt += lists[OBJ_TAG].size();
// Exclude CachedPacks.
}
long bytesUsed = OBJECT_TO_PACK_SIZE * objCnt; long bytesUsed = OBJECT_TO_PACK_SIZE * objCnt;
PackingPhase curr = phase; PackingPhase curr = phase;