From dcb02654360e7617380361a3b755dc380c239edf Mon Sep 17 00:00:00 2001 From: Yunjie Li Date: Tue, 11 Feb 2020 11:06:33 -0800 Subject: [PATCH] PackBitmapIndex: Reduce memory usage in GC Currently, the garbage collection is consistently failing for some large repositories in the building bitmap phase, e.g.Linux-MSM project: https://source.codeaurora.org/quic/la/kernel/msm-3.18 Historically, bitmap index creation happened in 3 phases: 1. Select the commits to which bitmaps should be attached. 2. Create all bitmaps for these commits, stored in uncompressed format in the PackBitmapIndexBuilder. 3. Deltify the bitmaps and write them to disk. We investigated the process. For phase 2 it's most efficient to create bitmaps starting with oldest commit and moving to the newest commit, because the newer commits are able to reuse the work for the old ones. But for bitmap deltification in phase 3, it's better when a newer commit's bitmap is the base, and the current disk format writes bitmaps out for the newest commits first. This change introduces a new collection to hold the deltified and compressed representations of the bitmaps, keeping a smaller subset of commits in the PackBitmapIndexBuilder to help make the bitmap index creation more memory efficient. And in this commit, we're setting DISTANCE_THRESHOLD to 0 in the PackWriterBitmapPreparer, which means the garbage collection will not have much behavoir change and will still use as much memory as before. Change-Id: I6ec2c3e8dde11805af47874d67d33cf1ef83660e Signed-off-by: Yunjie Li --- .../storage/file/PackBitmapIndexBuilder.java | 144 ++++++++++-------- .../internal/storage/pack/PackWriter.java | 10 +- .../pack/PackWriterBitmapPreparer.java | 18 ++- .../eclipse/jgit/revwalk/BitmapWalker.java | 33 +++- 4 files changed, 136 insertions(+), 69 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java index d87b6996f..5666b5760 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java @@ -11,12 +11,13 @@ package org.eclipse.jgit.internal.storage.file; import java.text.MessageFormat; +import java.util.ArrayList; import java.util.Collections; -import java.util.Iterator; +import java.util.LinkedList; import java.util.List; -import java.util.NoSuchElementException; import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.internal.storage.pack.BitmapCommit; import org.eclipse.jgit.internal.storage.pack.ObjectToPack; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.BitmapIndex.Bitmap; @@ -39,8 +40,12 @@ public class PackBitmapIndexBuilder extends BasePackBitmapIndex { private final EWAHCompressedBitmap blobs; private final EWAHCompressedBitmap tags; private final BlockList byOffset; - final BlockList - byAddOrder = new BlockList<>(); + + private final LinkedList + bitmapsToWriteXorBuffer = new LinkedList<>(); + + private List bitmapsToWrite = new ArrayList<>(); + final ObjectIdOwnerMap positionEntries = new ObjectIdOwnerMap<>(); @@ -135,6 +140,63 @@ public void addBitmap(AnyObjectId objectId, Bitmap bitmap, int flags) { addBitmap(objectId, bitmap.retrieveCompressed(), flags); } + /** + * Processes a commit and prepares its bitmap to write to the bitmap index + * file. + * + * @param c + * the commit corresponds to the bitmap. + * @param bitmap + * the bitmap to be written. + * @param flags + * the flags of the commit. + */ + public void processBitmapForWrite(BitmapCommit c, Bitmap bitmap, + int flags) { + EWAHCompressedBitmap compressed = bitmap.retrieveCompressed(); + compressed.trim(); + StoredBitmap newest = new StoredBitmap(c, compressed, null, flags); + + bitmapsToWriteXorBuffer.add(newest); + if (bitmapsToWriteXorBuffer.size() > MAX_XOR_OFFSET_SEARCH) { + bitmapsToWrite.add( + generateStoredEntry(bitmapsToWriteXorBuffer.pollFirst())); + } + + if (c.isAddToIndex()) { + // The Bitmap map in the base class is used to make revwalks + // efficient, so only add bitmaps that keep it efficient without + // bloating memory. + addBitmap(c, bitmap, flags); + } + } + + private StoredEntry generateStoredEntry(StoredBitmap bitmapToWrite) { + int bestXorOffset = 0; + EWAHCompressedBitmap bestBitmap = bitmapToWrite.getBitmap(); + + int offset = 1; + for (StoredBitmap curr : bitmapsToWriteXorBuffer) { + EWAHCompressedBitmap bitmap = curr.getBitmap() + .xor(bitmapToWrite.getBitmap()); + if (bitmap.sizeInBytes() < bestBitmap.sizeInBytes()) { + bestBitmap = bitmap; + bestXorOffset = offset; + } + offset++; + } + + PositionEntry entry = positionEntries.get(bitmapToWrite); + if (entry == null) { + throw new IllegalStateException(); + } + bestBitmap.trim(); + StoredEntry result = new StoredEntry(entry.namePosition, bestBitmap, + bestXorOffset, bitmapToWrite.getFlags()); + + return result; + } + /** * Stores the bitmap for the objectId. * @@ -150,7 +212,6 @@ public void addBitmap( bitmap.trim(); StoredBitmap result = new StoredBitmap(objectId, bitmap, null, flags); getBitmaps().add(result); - byAddOrder.add(result); } /** {@inheritDoc} */ @@ -236,15 +297,18 @@ public int getOptions() { /** {@inheritDoc} */ @Override public int getBitmapCount() { - return getBitmaps().size(); + return bitmapsToWriteXorBuffer.size() + bitmapsToWrite.size(); } /** * Remove all the bitmaps entries added. + * + * @param size + * the expected number of bitmap entries to be written. */ - public void clearBitmaps() { - byAddOrder.clear(); + public void resetBitmaps(int size) { getBitmaps().clear(); + bitmapsToWrite = new ArrayList<>(size); } /** {@inheritDoc} */ @@ -254,64 +318,18 @@ public int getObjectCount() { } /** - * Get an iterator over the xor compressed entries. + * Get list of xor compressed entries that need to be written. * - * @return an iterator over the xor compressed entries. + * @return a list of the xor compressed entries. */ - public Iterable getCompressedBitmaps() { - // Add order is from oldest to newest. The reverse add order is the - // output order. - return () -> new Iterator() { + public List getCompressedBitmaps() { + while (!bitmapsToWriteXorBuffer.isEmpty()) { + bitmapsToWrite.add( + generateStoredEntry(bitmapsToWriteXorBuffer.pollFirst())); + } - private int index = byAddOrder.size() - 1; - - @Override - public boolean hasNext() { - return index >= 0; - } - - @Override - public StoredEntry next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - StoredBitmap item = byAddOrder.get(index); - int bestXorOffset = 0; - EWAHCompressedBitmap bestBitmap = item.getBitmap(); - - // Attempt to compress the bitmap with an XOR of the - // previously written entries. - for (int i = 1; i <= MAX_XOR_OFFSET_SEARCH; i++) { - int curr = i + index; - if (curr >= byAddOrder.size()) { - break; - } - - StoredBitmap other = byAddOrder.get(curr); - EWAHCompressedBitmap bitmap = other.getBitmap() - .xor(item.getBitmap()); - - if (bitmap.sizeInBytes() < bestBitmap.sizeInBytes()) { - bestBitmap = bitmap; - bestXorOffset = i; - } - } - index--; - - PositionEntry entry = positionEntries.get(item); - if (entry == null) { - throw new IllegalStateException(); - } - bestBitmap.trim(); - return new StoredEntry(entry.namePosition, bestBitmap, - bestXorOffset, item.getFlags()); - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - }; + Collections.reverse(bitmapsToWrite); + return bitmapsToWrite; } /** Data object for the on disk representation of a bitmap entry. */ diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java index 0c965fe18..824c62ad9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java @@ -2331,8 +2331,14 @@ public boolean prepareBitmapIndex(ProgressMonitor pm) throws IOException { throw new IllegalStateException(MessageFormat.format( JGitText.get().bitmapMissingObject, cmit.name(), last.name())); - last = cmit; - writeBitmaps.addBitmap(cmit, bitmap.build(), cmit.getFlags()); + last = BitmapCommit.copyFrom(cmit).build(); + writeBitmaps.processBitmapForWrite(cmit, bitmap.build(), + cmit.getFlags()); + + // The bitmap walker should stop when the walk hits the previous + // commit, which saves time. + walker.setPrevCommit(last); + walker.setPrevBitmap(bitmap); pm.update(1); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java index 0377bccd8..fefe565ad 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java @@ -58,6 +58,8 @@ class PackWriterBitmapPreparer { private static final int DAY_IN_SECONDS = 24 * 60 * 60; + private static final int DISTANCE_THRESHOLD = 0; + private static final Comparator ORDER_BY_REVERSE_TIMESTAMP = ( RevCommit a, RevCommit b) -> Integer .signum(b.getCommitTime() - a.getCommitTime()); @@ -244,6 +246,7 @@ Collection selectCommits(int expectedCommitCount, // This commit is selected. // Calculate where to look for the next one. int flags = nextFlg; + int currDist = distanceFromTip; nextIn = nextSpan(distanceFromTip); nextFlg = nextIn == distantCommitSpan ? PackBitmapIndex.FLAG_REUSE @@ -279,8 +282,17 @@ Collection selectCommits(int expectedCommitCount, longestAncestorChain = new ArrayList<>(); chains.add(longestAncestorChain); } - longestAncestorChain.add(new BitmapCommit(c, - !longestAncestorChain.isEmpty(), flags)); + + // The commit bc should reuse bitmap walker from its close + // ancestor. And the bitmap of bc should only be added to + // PackBitmapIndexBuilder when it's an old enough + // commit,i.e. distance from tip should be greater than + // DISTANCE_THRESHOLD, to save memory. + BitmapCommit bc = BitmapCommit.newBuilder(c).setFlags(flags) + .setAddToIndex(currDist >= DISTANCE_THRESHOLD) + .setReuseWalker(!longestAncestorChain.isEmpty()) + .build(); + longestAncestorChain.add(bc); writeBitmaps.addBitmap(c, bitmap, 0); } @@ -288,12 +300,12 @@ Collection selectCommits(int expectedCommitCount, selections.addAll(chain); } } - writeBitmaps.clearBitmaps(); // Remove the temporary commit bitmaps. // Add the remaining peeledWant for (AnyObjectId remainingWant : selectionHelper.newWants) { selections.add(new BitmapCommit(remainingWant, false, 0)); } + writeBitmaps.resetBitmaps(selections.size()); // Remove the temporary commit bitmaps. pm.endTask(); return selections; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java index 023962e25..aaecd2395 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java @@ -16,6 +16,7 @@ import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.internal.revwalk.AddToBitmapFilter; +import org.eclipse.jgit.internal.revwalk.AddToBitmapWithCacheFilter; import org.eclipse.jgit.internal.revwalk.AddUnseenToBitmapFilter; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.BitmapIndex; @@ -41,6 +42,11 @@ public final class BitmapWalker { private long countOfBitmapIndexMisses; + // Cached bitmap and commit to save walk time. + private AnyObjectId prevCommit; + + private Bitmap prevBitmap; + /** * Create a BitmapWalker. * @@ -55,6 +61,28 @@ public BitmapWalker( this.pm = (pm == null) ? NullProgressMonitor.INSTANCE : pm; } + /** + * Set the cached commit for the walker. + * + * @param prevCommit + * the cached commit. + * @since 5.7 + */ + public void setPrevCommit(AnyObjectId prevCommit) { + this.prevCommit = prevCommit; + } + + /** + * Set the bitmap associated with the cached commit for the walker. + * + * @param prevBitmap + * the bitmap associated with the cached commit. + * @since 5.7 + */ + public void setPrevBitmap(Bitmap prevBitmap) { + this.prevBitmap = prevBitmap; + } + /** * Return the number of objects that had to be walked because they were not covered by a * bitmap. @@ -169,7 +197,10 @@ private BitmapBuilder findObjectsWalk(Iterable start, Bitmap } if (marked) { - if (seen == null) { + if (prevCommit != null) { + walker.setRevFilter(new AddToBitmapWithCacheFilter(prevCommit, + prevBitmap, bitmapResult)); + } else if (seen == null) { walker.setRevFilter(new AddToBitmapFilter(bitmapResult)); } else { walker.setRevFilter(