PackWriter: shed memory while creating bitmaps

Once bitmap creation begins the internal maps required for packing are
no longer necessary.  On a repository with 5.2M objects this can save
more than 438 MiB of memory by allowing the ObjectToPack instances to
get garbage collected away.

Downside is the PackWriter cannot be used for any further opertions
except to write the bitmap index.  This is an acceptable trade-off as
in practice nobody uses the PackWriter after the bitmaps are built.

Change-Id: Ibfaf84b22fa0590896a398ff659a91fcf03d7128
This commit is contained in:
Shawn Pearce 2015-08-12 22:58:26 -07:00
parent 20d9ab00ae
commit 4bb523475d
1 changed files with 53 additions and 41 deletions

View File

@ -136,12 +136,14 @@
* order of objects in pack</li>
* </ul>
* <p>
* Typical usage consists of creating instance intended for some pack,
* configuring options, preparing the list of objects by calling
* {@link #preparePack(Iterator)} or
* {@link #preparePack(ProgressMonitor, Set, Set)}, and finally producing the
* stream with
* {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}.
* Typical usage consists of creating an instance, configuring options,
* preparing the list of objects by calling {@link #preparePack(Iterator)} or
* {@link #preparePack(ProgressMonitor, Set, Set)}, and streaming with
* {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}. If the
* pack is being stored as a file the matching index can be written out after
* writing the pack by {@link #writeIndex(OutputStream)}. An optional bitmap
* index can be made by calling {@link #prepareBitmapIndex(ProgressMonitor)}
* followed by {@link #writeBitmapIndex(OutputStream)}.
* </p>
* <p>
* Class provide set of configurable options and {@link ProgressMonitor}
@ -150,9 +152,10 @@
* relies only on deltas and objects reuse.
* </p>
* <p>
* This class is not thread safe, it is intended to be used in one thread, with
* one instance per created pack. Subsequent calls to writePack result in
* undefined behavior.
* This class is not thread safe. It is intended to be used in one thread as a
* single pass to produce one pack. Invoking methods multiple times or out of
* order is not supported as internal data structures are destroyed during
* certain phases to save memory when packing large repositories.
* </p>
*/
public class PackWriter implements AutoCloseable {
@ -215,7 +218,7 @@ public static Iterable<PackWriter> getInstances() {
}
@SuppressWarnings("unchecked")
private final BlockList<ObjectToPack> objectsLists[] = new BlockList[OBJ_TAG + 1];
private BlockList<ObjectToPack> objectsLists[] = new BlockList[OBJ_TAG + 1];
{
objectsLists[OBJ_COMMIT] = new BlockList<ObjectToPack>();
objectsLists[OBJ_TREE] = new BlockList<ObjectToPack>();
@ -223,7 +226,7 @@ public static Iterable<PackWriter> getInstances() {
objectsLists[OBJ_TAG] = new BlockList<ObjectToPack>();
}
private final ObjectIdOwnerMap<ObjectToPack> objectsMap = new ObjectIdOwnerMap<ObjectToPack>();
private ObjectIdOwnerMap<ObjectToPack> objectsMap = new ObjectIdOwnerMap<ObjectToPack>();
// edge objects for thin packs
private List<ObjectToPack> edgeObjects = new BlockList<ObjectToPack>();
@ -818,10 +821,11 @@ public int getIndexVersion() {
/**
* Create an index file to match the pack file just written.
* <p>
* This method can only be invoked after
* {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has
* been invoked and completed successfully. Writing a corresponding index is
* an optional feature that not all pack users may require.
* Called after
* {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}.
* <p>
* Writing an index is only required for local pack storage. Packs sent on
* the network do not need to create an index.
*
* @param indexStream
* output for the index data. Caller is responsible for closing
@ -843,10 +847,7 @@ public void writeIndex(final OutputStream indexStream) throws IOException {
/**
* Create a bitmap index file to match the pack file just written.
* <p>
* This method can only be invoked after
* {@link #prepareBitmapIndex(ProgressMonitor)} has been invoked and
* completed successfully. Writing a corresponding bitmap index is an
* optional feature that not all pack users may require.
* Called after {@link #prepareBitmapIndex(ProgressMonitor)}.
*
* @param bitmapIndexStream
* output for the bitmap index data. Caller is responsible for
@ -920,14 +921,13 @@ private void endPhase(ProgressMonitor monitor) {
/**
* Write the prepared pack to the supplied stream.
* <p>
* At first, this method collects and sorts objects to pack, then deltas
* search is performed if set up accordingly, finally pack stream is
* written.
* </p>
* Called after {@link #preparePack(ProgressMonitor, ObjectWalk, Set, Set)}
* or {@link #preparePack(ProgressMonitor, Set, Set)}.
* <p>
* Performs delta search if enabled and writes the pack stream.
* <p>
* All reused objects data checksum (Adler32/CRC32) is computed and
* validated against existing checksum.
* </p>
*
* @param compressMonitor
* progress monitor to report object compression work.
@ -941,8 +941,8 @@ private void endPhase(ProgressMonitor monitor) {
* the pack, or writing compressed object data to the output
* stream.
* @throws WriteAbortedException
* the write operation is aborted by
* {@link ObjectCountCallback}.
* the write operation is aborted by {@link ObjectCountCallback}
* .
*/
public void writePack(ProgressMonitor compressMonitor,
ProgressMonitor writeMonitor, OutputStream packStream)
@ -1972,14 +1972,17 @@ private final boolean have(ObjectToPack ptr, AnyObjectId objectId) {
}
/**
* Prepares the bitmaps to be written to the pack index. Bitmaps can be used
* to speed up fetches and clones by storing the entire object graph at
* selected commits.
*
* This method can only be invoked after
* {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has
* been invoked and completed successfully. Writing a corresponding bitmap
* index is an optional feature that not all pack users may require.
* Prepares the bitmaps to be written to the bitmap index file.
* <p>
* Bitmaps can be used to speed up fetches and clones by storing the entire
* object graph at selected commits. Writing a bitmap index is an optional
* feature that not all pack users may require.
* <p>
* Called after {@link #writeIndex(OutputStream)}.
* <p>
* To reduce memory internal state is cleared during this method, rendering
* the PackWriter instance useless for anything further than a call to write
* out the new bitmaps with {@link #writeBitmapIndex(OutputStream)}.
*
* @param pm
* progress monitor to report bitmap building work.
@ -1995,11 +1998,17 @@ public boolean prepareBitmapIndex(ProgressMonitor pm) throws IOException {
if (pm == null)
pm = NullProgressMonitor.INSTANCE;
writeBitmaps = new PackBitmapIndexBuilder(sortByName());
int numCommits = objectsLists[OBJ_COMMIT].size();
List<ObjectToPack> byName = sortByName();
sortedByName = null;
objectsLists = null;
objectsMap = null;
writeBitmaps = new PackBitmapIndexBuilder(byName);
byName = null;
PackWriterBitmapPreparer bitmapPreparer = new PackWriterBitmapPreparer(
reader, writeBitmaps, pm, stats.interestingObjects);
int numCommits = objectsLists[OBJ_COMMIT].size();
Collection<PackWriterBitmapPreparer.BitmapCommit> selectedCommits =
bitmapPreparer.doCommitSelection(numCommits);
@ -2356,11 +2365,14 @@ private class MutableState {
State snapshot() {
long objCnt = 0;
objCnt += objectsLists[OBJ_COMMIT].size();
objCnt += objectsLists[OBJ_TREE].size();
objCnt += objectsLists[OBJ_BLOB].size();
objCnt += objectsLists[OBJ_TAG].size();
// Exclude CachedPacks.
BlockList<ObjectToPack>[] lists = objectsLists;
if (lists != null) {
objCnt += lists[OBJ_COMMIT].size();
objCnt += lists[OBJ_TREE].size();
objCnt += lists[OBJ_BLOB].size();
objCnt += lists[OBJ_TAG].size();
// Exclude CachedPacks.
}
long bytesUsed = OBJECT_TO_PACK_SIZE * objCnt;
PackingPhase curr = phase;