From 4bb523475d44ec1c4d9b4f92944a359aef99894c Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Wed, 12 Aug 2015 22:58:26 -0700 Subject: [PATCH] PackWriter: shed memory while creating bitmaps Once bitmap creation begins the internal maps required for packing are no longer necessary. On a repository with 5.2M objects this can save more than 438 MiB of memory by allowing the ObjectToPack instances to get garbage collected away. Downside is the PackWriter cannot be used for any further opertions except to write the bitmap index. This is an acceptable trade-off as in practice nobody uses the PackWriter after the bitmaps are built. Change-Id: Ibfaf84b22fa0590896a398ff659a91fcf03d7128 --- .../internal/storage/pack/PackWriter.java | 94 +++++++++++-------- 1 file changed, 53 insertions(+), 41 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java index bd0b0e7c3..a88502c2f 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java @@ -136,12 +136,14 @@ * order of objects in pack * *

- * Typical usage consists of creating instance intended for some pack, - * configuring options, preparing the list of objects by calling - * {@link #preparePack(Iterator)} or - * {@link #preparePack(ProgressMonitor, Set, Set)}, and finally producing the - * stream with - * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}. + * Typical usage consists of creating an instance, configuring options, + * preparing the list of objects by calling {@link #preparePack(Iterator)} or + * {@link #preparePack(ProgressMonitor, Set, Set)}, and streaming with + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}. If the + * pack is being stored as a file the matching index can be written out after + * writing the pack by {@link #writeIndex(OutputStream)}. An optional bitmap + * index can be made by calling {@link #prepareBitmapIndex(ProgressMonitor)} + * followed by {@link #writeBitmapIndex(OutputStream)}. *

*

* Class provide set of configurable options and {@link ProgressMonitor} @@ -150,9 +152,10 @@ * relies only on deltas and objects reuse. *

*

- * This class is not thread safe, it is intended to be used in one thread, with - * one instance per created pack. Subsequent calls to writePack result in - * undefined behavior. + * This class is not thread safe. It is intended to be used in one thread as a + * single pass to produce one pack. Invoking methods multiple times or out of + * order is not supported as internal data structures are destroyed during + * certain phases to save memory when packing large repositories. *

*/ public class PackWriter implements AutoCloseable { @@ -215,7 +218,7 @@ public static Iterable getInstances() { } @SuppressWarnings("unchecked") - private final BlockList objectsLists[] = new BlockList[OBJ_TAG + 1]; + private BlockList objectsLists[] = new BlockList[OBJ_TAG + 1]; { objectsLists[OBJ_COMMIT] = new BlockList(); objectsLists[OBJ_TREE] = new BlockList(); @@ -223,7 +226,7 @@ public static Iterable getInstances() { objectsLists[OBJ_TAG] = new BlockList(); } - private final ObjectIdOwnerMap objectsMap = new ObjectIdOwnerMap(); + private ObjectIdOwnerMap objectsMap = new ObjectIdOwnerMap(); // edge objects for thin packs private List edgeObjects = new BlockList(); @@ -818,10 +821,11 @@ public int getIndexVersion() { /** * Create an index file to match the pack file just written. *

- * This method can only be invoked after - * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has - * been invoked and completed successfully. Writing a corresponding index is - * an optional feature that not all pack users may require. + * Called after + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}. + *

+ * Writing an index is only required for local pack storage. Packs sent on + * the network do not need to create an index. * * @param indexStream * output for the index data. Caller is responsible for closing @@ -843,10 +847,7 @@ public void writeIndex(final OutputStream indexStream) throws IOException { /** * Create a bitmap index file to match the pack file just written. *

- * This method can only be invoked after - * {@link #prepareBitmapIndex(ProgressMonitor)} has been invoked and - * completed successfully. Writing a corresponding bitmap index is an - * optional feature that not all pack users may require. + * Called after {@link #prepareBitmapIndex(ProgressMonitor)}. * * @param bitmapIndexStream * output for the bitmap index data. Caller is responsible for @@ -920,14 +921,13 @@ private void endPhase(ProgressMonitor monitor) { /** * Write the prepared pack to the supplied stream. *

- * At first, this method collects and sorts objects to pack, then deltas - * search is performed if set up accordingly, finally pack stream is - * written. - *

+ * Called after {@link #preparePack(ProgressMonitor, ObjectWalk, Set, Set)} + * or {@link #preparePack(ProgressMonitor, Set, Set)}. + *

+ * Performs delta search if enabled and writes the pack stream. *

* All reused objects data checksum (Adler32/CRC32) is computed and * validated against existing checksum. - *

* * @param compressMonitor * progress monitor to report object compression work. @@ -941,8 +941,8 @@ private void endPhase(ProgressMonitor monitor) { * the pack, or writing compressed object data to the output * stream. * @throws WriteAbortedException - * the write operation is aborted by - * {@link ObjectCountCallback}. + * the write operation is aborted by {@link ObjectCountCallback} + * . */ public void writePack(ProgressMonitor compressMonitor, ProgressMonitor writeMonitor, OutputStream packStream) @@ -1972,14 +1972,17 @@ private final boolean have(ObjectToPack ptr, AnyObjectId objectId) { } /** - * Prepares the bitmaps to be written to the pack index. Bitmaps can be used - * to speed up fetches and clones by storing the entire object graph at - * selected commits. - * - * This method can only be invoked after - * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has - * been invoked and completed successfully. Writing a corresponding bitmap - * index is an optional feature that not all pack users may require. + * Prepares the bitmaps to be written to the bitmap index file. + *

+ * Bitmaps can be used to speed up fetches and clones by storing the entire + * object graph at selected commits. Writing a bitmap index is an optional + * feature that not all pack users may require. + *

+ * Called after {@link #writeIndex(OutputStream)}. + *

+ * To reduce memory internal state is cleared during this method, rendering + * the PackWriter instance useless for anything further than a call to write + * out the new bitmaps with {@link #writeBitmapIndex(OutputStream)}. * * @param pm * progress monitor to report bitmap building work. @@ -1995,11 +1998,17 @@ public boolean prepareBitmapIndex(ProgressMonitor pm) throws IOException { if (pm == null) pm = NullProgressMonitor.INSTANCE; - writeBitmaps = new PackBitmapIndexBuilder(sortByName()); + int numCommits = objectsLists[OBJ_COMMIT].size(); + List byName = sortByName(); + sortedByName = null; + objectsLists = null; + objectsMap = null; + writeBitmaps = new PackBitmapIndexBuilder(byName); + byName = null; + PackWriterBitmapPreparer bitmapPreparer = new PackWriterBitmapPreparer( reader, writeBitmaps, pm, stats.interestingObjects); - int numCommits = objectsLists[OBJ_COMMIT].size(); Collection selectedCommits = bitmapPreparer.doCommitSelection(numCommits); @@ -2356,11 +2365,14 @@ private class MutableState { State snapshot() { long objCnt = 0; - objCnt += objectsLists[OBJ_COMMIT].size(); - objCnt += objectsLists[OBJ_TREE].size(); - objCnt += objectsLists[OBJ_BLOB].size(); - objCnt += objectsLists[OBJ_TAG].size(); - // Exclude CachedPacks. + BlockList[] lists = objectsLists; + if (lists != null) { + objCnt += lists[OBJ_COMMIT].size(); + objCnt += lists[OBJ_TREE].size(); + objCnt += lists[OBJ_BLOB].size(); + objCnt += lists[OBJ_TAG].size(); + // Exclude CachedPacks. + } long bytesUsed = OBJECT_TO_PACK_SIZE * objCnt; PackingPhase curr = phase;