From 840e414d0b0b817e7c154664914d19cb2b6d0387 Mon Sep 17 00:00:00 2001 From: Yunjie Li Date: Mon, 10 Feb 2020 15:22:31 -0800 Subject: [PATCH 1/8] Refactor: Make retriveCompressed an method of the Bitmap class Make retrieveCompressed() a method of Bitmap interface to avoid type casting and later reuse in improving the memory footprint of GC's bitmap generation phase. Change-Id: I098d85105cf17af845d43b8c71b4ca48b02fd7da Signed-off-by: Yunjie Li --- .../jgit/internal/storage/file/BitmapIndexImpl.java | 8 +++++++- .../storage/file/PackBitmapIndexBuilder.java | 13 +------------ .../src/org/eclipse/jgit/lib/BitmapIndex.java | 10 ++++++++++ 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/BitmapIndexImpl.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/BitmapIndexImpl.java index 6aa1a0e8e..0d3a2b4f1 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/BitmapIndexImpl.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/BitmapIndexImpl.java @@ -252,6 +252,11 @@ public BitmapIndexImpl getBitmapIndex() { return bitmapIndex; } + @Override + public EWAHCompressedBitmap retrieveCompressed() { + return build().retrieveCompressed(); + } + private EWAHCompressedBitmap ewahBitmap(Bitmap other) { if (other instanceof CompressedBitmap) { CompressedBitmap b = (CompressedBitmap) other; @@ -372,7 +377,8 @@ public void remove() { }; } - EWAHCompressedBitmap getEwahCompressedBitmap() { + @Override + public EWAHCompressedBitmap retrieveCompressed() { return bitmap; } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java index 9538cc5e0..d87b6996f 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java @@ -17,11 +17,9 @@ import java.util.NoSuchElementException; import org.eclipse.jgit.internal.JGitText; -import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl.CompressedBitmap; import org.eclipse.jgit.internal.storage.pack.ObjectToPack; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.BitmapIndex.Bitmap; -import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectIdOwnerMap; @@ -134,16 +132,7 @@ public ObjectIdOwnerMap getObjectSet() { * the flags to be stored with the bitmap */ public void addBitmap(AnyObjectId objectId, Bitmap bitmap, int flags) { - if (bitmap instanceof BitmapBuilder) - bitmap = ((BitmapBuilder) bitmap).build(); - - EWAHCompressedBitmap compressed; - if (bitmap instanceof CompressedBitmap) - compressed = ((CompressedBitmap) bitmap).getEwahCompressedBitmap(); - else - throw new IllegalArgumentException(bitmap.getClass().toString()); - - addBitmap(objectId, compressed, flags); + addBitmap(objectId, bitmap.retrieveCompressed(), flags); } /** diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/BitmapIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/BitmapIndex.java index f61286d6d..f6695bdf7 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/BitmapIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/BitmapIndex.java @@ -14,6 +14,8 @@ import org.eclipse.jgit.internal.storage.file.PackBitmapIndex; +import com.googlecode.javaewah.EWAHCompressedBitmap; + /** * A compressed bitmap representation of the entire object graph. * @@ -81,6 +83,14 @@ public interface Bitmap extends Iterable { */ @Override Iterator iterator(); + + /** + * Returns the corresponding raw compressed EWAH bitmap of the bitmap. + * + * @return the corresponding {@code EWAHCompressedBitmap} + * @since 5.8 + */ + EWAHCompressedBitmap retrieveCompressed(); } /** From d23254ee5751dd9dd154b338d73d5c23fffd1616 Mon Sep 17 00:00:00 2001 From: Yunjie Li Date: Wed, 22 Apr 2020 13:12:05 -0700 Subject: [PATCH 2/8] PackBitmapIndex: Move BitmapCommit to a top-level class Move BitmapCommit from inside the PackWriterBitmapPreparer to a new top-level class in preparation for improving the memory footprint of GC's bitmap generation phase. Change-Id: I4d404a5b3a34998b441d23105197f33d32d39670 Signed-off-by: Yunjie Li --- .../storage/pack/GcCommitSelectionTest.java | 1 - .../internal/storage/pack/BitmapCommit.java | 35 +++++++++++++++++++ .../internal/storage/pack/PackWriter.java | 4 +-- .../pack/PackWriterBitmapPreparer.java | 22 ------------ 4 files changed, 37 insertions(+), 25 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/BitmapCommit.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/pack/GcCommitSelectionTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/pack/GcCommitSelectionTest.java index f2876b785..cc826c30b 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/pack/GcCommitSelectionTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/pack/GcCommitSelectionTest.java @@ -23,7 +23,6 @@ import org.eclipse.jgit.internal.storage.file.GcTestCase; import org.eclipse.jgit.internal.storage.file.PackBitmapIndexBuilder; -import org.eclipse.jgit.internal.storage.pack.PackWriterBitmapPreparer.BitmapCommit; import org.eclipse.jgit.junit.TestRepository.BranchBuilder; import org.eclipse.jgit.junit.TestRepository.CommitBuilder; import org.eclipse.jgit.lib.Constants; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/BitmapCommit.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/BitmapCommit.java new file mode 100644 index 000000000..cbf1ccfd9 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/BitmapCommit.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.pack; + +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.ObjectId; + +/** + * A commit object for which a bitmap index should be built. + */ +public final class BitmapCommit extends ObjectId { + private final boolean reuseWalker; + private final int flags; + + BitmapCommit(AnyObjectId objectId, boolean reuseWalker, int flags) { + super(objectId); + this.reuseWalker = reuseWalker; + this.flags = flags; + } + + boolean isReuseWalker() { + return reuseWalker; + } + + int getFlags() { + return flags; + } +} \ No newline at end of file diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java index 75dd345f4..0c965fe18 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java @@ -2313,14 +2313,14 @@ public boolean prepareBitmapIndex(ProgressMonitor pm) throws IOException { PackWriterBitmapPreparer bitmapPreparer = new PackWriterBitmapPreparer( reader, writeBitmaps, pm, stats.interestingObjects, config); - Collection selectedCommits = bitmapPreparer + Collection selectedCommits = bitmapPreparer .selectCommits(numCommits, excludeFromBitmapSelection); beginPhase(PackingPhase.BUILDING_BITMAPS, pm, selectedCommits.size()); BitmapWalker walker = bitmapPreparer.newBitmapWalker(); AnyObjectId last = null; - for (PackWriterBitmapPreparer.BitmapCommit cmit : selectedCommits) { + for (BitmapCommit cmit : selectedCommits) { if (!cmit.isReuseWalker()) { walker = bitmapPreparer.newBitmapWalker(); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java index 51b4993e2..0377bccd8 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java @@ -467,28 +467,6 @@ BitmapWalker newBitmapWalker() { new ObjectWalk(reader), bitmapIndex, null); } - /** - * A commit object for which a bitmap index should be built. - */ - static final class BitmapCommit extends ObjectId { - private final boolean reuseWalker; - private final int flags; - - BitmapCommit(AnyObjectId objectId, boolean reuseWalker, int flags) { - super(objectId); - this.reuseWalker = reuseWalker; - this.flags = flags; - } - - boolean isReuseWalker() { - return reuseWalker; - } - - int getFlags() { - return flags; - } - } - /** * Container for state used in the first phase of selecting commits, which * walks all of the reachable commits via the branch tips that are not From b1d4b457081e7e31d292237991a4fe20e6d1b689 Mon Sep 17 00:00:00 2001 From: Yunjie Li Date: Wed, 22 Apr 2020 13:17:47 -0700 Subject: [PATCH 3/8] PackBitmapIndex: Add util methods and builder to BitmapCommit Add some utility methods and a builder class for BitmapCommit class in preparation for improving the memory footprint of GC's bitmap generation phase. Change-Id: Ice3d257fc26f3917a65a64eaf53b508b89043caa Signed-off-by: Yunjie Li --- .../internal/storage/pack/BitmapCommit.java | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/BitmapCommit.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/BitmapCommit.java index cbf1ccfd9..33c478e4a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/BitmapCommit.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/BitmapCommit.java @@ -16,13 +16,26 @@ * A commit object for which a bitmap index should be built. */ public final class BitmapCommit extends ObjectId { + private final boolean reuseWalker; + private final int flags; + private final boolean addToIndex; + BitmapCommit(AnyObjectId objectId, boolean reuseWalker, int flags) { super(objectId); this.reuseWalker = reuseWalker; this.flags = flags; + this.addToIndex = false; + } + + BitmapCommit(AnyObjectId objectId, boolean reuseWalker, int flags, + boolean addToIndex) { + super(objectId); + this.reuseWalker = reuseWalker; + this.flags = flags; + this.addToIndex = addToIndex; } boolean isReuseWalker() { @@ -32,4 +45,119 @@ boolean isReuseWalker() { int getFlags() { return flags; } + + /** + * Whether corresponding bitmap should be added to PackBitmapIndexBuilder. + * + * @return true if the corresponding bitmap should be added to + * PackBitmapIndexBuilder. + */ + public boolean isAddToIndex() { + return addToIndex; + } + + /** + * Get a builder of BitmapCommit whose object id is {@code objId}. + * + * @param objId + * the object id of the BitmapCommit + * @return a BitmapCommit builder with object id set. + */ + public static Builder newBuilder(AnyObjectId objId) { + return new Builder().setId(objId); + } + + /** + * Get a builder of BitmapCommit whose fields are copied from + * {@code commit}. + * + * @param commit + * the bitmap commit the builder is copying from + * @return a BitmapCommit build with fields copied from an existing bitmap + * commit. + */ + public static Builder copyFrom(BitmapCommit commit) { + return new Builder().setId(commit) + .setReuseWalker(commit.isReuseWalker()) + .setFlags(commit.getFlags()) + .setAddToIndex(commit.isAddToIndex()); + } + + /** + * Builder of BitmapCommit. + */ + public static class Builder { + private AnyObjectId objectId; + + private boolean reuseWalker; + + private int flags; + + private boolean addToIndex; + + // Prevent default constructor. + private Builder() { + } + + /** + * Set objectId of the builder. + * + * @param objectId + * the object id of the BitmapCommit + * @return the builder itself + */ + public Builder setId(AnyObjectId objectId) { + this.objectId = objectId; + return this; + } + + /** + * Set reuseWalker of the builder. + * + * @param reuseWalker + * whether the BitmapCommit should reuse bitmap walker when + * walking objects + * @return the builder itself + */ + public Builder setReuseWalker(boolean reuseWalker) { + this.reuseWalker = reuseWalker; + return this; + } + + /** + * Set flags of the builder. + * + * @param flags + * the flags of the BitmapCommit + * @return the builder itself + */ + public Builder setFlags(int flags) { + this.flags = flags; + return this; + } + + /** + * Set whether whether the bitmap of the BitmapCommit should be added to + * PackBitmapIndexBuilder when building bitmap index file. + * + * @param addToIndex + * whether the bitmap of the BitmapCommit should be added to + * PackBitmapIndexBuilder when building bitmap index file + * @return the builder itself + */ + public Builder setAddToIndex(boolean addToIndex) { + this.addToIndex = addToIndex; + return this; + } + + /** + * Builds BitmapCommit from the builder. + * + * @return the new BitmapCommit. + */ + public BitmapCommit build() { + return new BitmapCommit(objectId, reuseWalker, flags, + addToIndex); + } + } } \ No newline at end of file From 067d946090c4db43134687c93d30192ad1314bec Mon Sep 17 00:00:00 2001 From: Yunjie Li Date: Mon, 10 Feb 2020 17:02:13 -0800 Subject: [PATCH 4/8] PackBitmapIndex: Add AddToBitmapWithCacheFilter class Add a new revwalk filter, AddToBitmapWithCachedFilter. This filter updates a client-provided {@code BitmapBuilder} as a side effect of a revwalk. Similar to {@code AddToBitmapFilter}, it short circuits the walk when it encounters a commit which is included in the provided bitmap's BitmapIndex. It also short circuits the walk if it encounters the client-provided cached commit. Change-Id: I62cb503016f4d3995d648d92b82baab7f93549a9 Signed-off-by: Yunjie Li --- .../revwalk/AddToBitmapWithCacheFilter.java | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/internal/revwalk/AddToBitmapWithCacheFilter.java diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/revwalk/AddToBitmapWithCacheFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/revwalk/AddToBitmapWithCacheFilter.java new file mode 100644 index 000000000..d7ccadfbe --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/revwalk/AddToBitmapWithCacheFilter.java @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2020, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.revwalk; + +import org.eclipse.jgit.lib.BitmapIndex.Bitmap; +import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.revwalk.filter.RevFilter; +import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevFlag; + +/** + * A RevFilter that adds the visited commits to {@code bitmap} as a side effect. + *

+ * When the walk hits a commit that is the same as {@code cachedCommit} or is + * part of {@code bitmap}'s BitmapIndex, that entire bitmap is ORed into + * {@code bitmap} and the commit and its parents are marked as SEEN so that the + * walk does not have to visit its ancestors. This ensures the walk is very + * short if there is good bitmap coverage. + */ +public class AddToBitmapWithCacheFilter extends RevFilter { + private final AnyObjectId cachedCommit; + + private final Bitmap cachedBitmap; + + private final BitmapBuilder bitmap; + + /** + * Create a filter with a cached BitmapCommit that adds visited commits to + * the given bitmap. + * + * @param cachedCommit + * the cached commit + * @param cachedBitmap + * the bitmap corresponds to {@code cachedCommit}} + * @param bitmap + * bitmap to write visited commits to + */ + public AddToBitmapWithCacheFilter(AnyObjectId cachedCommit, + Bitmap cachedBitmap, + BitmapBuilder bitmap) { + this.cachedCommit = cachedCommit; + this.cachedBitmap = cachedBitmap; + this.bitmap = bitmap; + } + + /** {@inheritDoc} */ + @Override + public final boolean include(RevWalk rw, RevCommit c) { + Bitmap visitedBitmap; + + if (bitmap.contains(c)) { + // already included + } else if ((visitedBitmap = bitmap.getBitmapIndex() + .getBitmap(c)) != null) { + bitmap.or(visitedBitmap); + } else if (cachedCommit.equals(c)) { + bitmap.or(cachedBitmap); + } else { + bitmap.addObject(c, Constants.OBJ_COMMIT); + return true; + } + + for (RevCommit p : c.getParents()) { + p.add(RevFlag.SEEN); + } + return false; + } + + /** {@inheritDoc} */ + @Override + public final RevFilter clone() { + throw new UnsupportedOperationException(); + } + + /** {@inheritDoc} */ + @Override + public final boolean requiresCommitBody() { + return false; + } +} + From dcb02654360e7617380361a3b755dc380c239edf Mon Sep 17 00:00:00 2001 From: Yunjie Li Date: Tue, 11 Feb 2020 11:06:33 -0800 Subject: [PATCH 5/8] PackBitmapIndex: Reduce memory usage in GC Currently, the garbage collection is consistently failing for some large repositories in the building bitmap phase, e.g.Linux-MSM project: https://source.codeaurora.org/quic/la/kernel/msm-3.18 Historically, bitmap index creation happened in 3 phases: 1. Select the commits to which bitmaps should be attached. 2. Create all bitmaps for these commits, stored in uncompressed format in the PackBitmapIndexBuilder. 3. Deltify the bitmaps and write them to disk. We investigated the process. For phase 2 it's most efficient to create bitmaps starting with oldest commit and moving to the newest commit, because the newer commits are able to reuse the work for the old ones. But for bitmap deltification in phase 3, it's better when a newer commit's bitmap is the base, and the current disk format writes bitmaps out for the newest commits first. This change introduces a new collection to hold the deltified and compressed representations of the bitmaps, keeping a smaller subset of commits in the PackBitmapIndexBuilder to help make the bitmap index creation more memory efficient. And in this commit, we're setting DISTANCE_THRESHOLD to 0 in the PackWriterBitmapPreparer, which means the garbage collection will not have much behavoir change and will still use as much memory as before. Change-Id: I6ec2c3e8dde11805af47874d67d33cf1ef83660e Signed-off-by: Yunjie Li --- .../storage/file/PackBitmapIndexBuilder.java | 144 ++++++++++-------- .../internal/storage/pack/PackWriter.java | 10 +- .../pack/PackWriterBitmapPreparer.java | 18 ++- .../eclipse/jgit/revwalk/BitmapWalker.java | 33 +++- 4 files changed, 136 insertions(+), 69 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java index d87b6996f..5666b5760 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java @@ -11,12 +11,13 @@ package org.eclipse.jgit.internal.storage.file; import java.text.MessageFormat; +import java.util.ArrayList; import java.util.Collections; -import java.util.Iterator; +import java.util.LinkedList; import java.util.List; -import java.util.NoSuchElementException; import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.internal.storage.pack.BitmapCommit; import org.eclipse.jgit.internal.storage.pack.ObjectToPack; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.BitmapIndex.Bitmap; @@ -39,8 +40,12 @@ public class PackBitmapIndexBuilder extends BasePackBitmapIndex { private final EWAHCompressedBitmap blobs; private final EWAHCompressedBitmap tags; private final BlockList byOffset; - final BlockList - byAddOrder = new BlockList<>(); + + private final LinkedList + bitmapsToWriteXorBuffer = new LinkedList<>(); + + private List bitmapsToWrite = new ArrayList<>(); + final ObjectIdOwnerMap positionEntries = new ObjectIdOwnerMap<>(); @@ -135,6 +140,63 @@ public void addBitmap(AnyObjectId objectId, Bitmap bitmap, int flags) { addBitmap(objectId, bitmap.retrieveCompressed(), flags); } + /** + * Processes a commit and prepares its bitmap to write to the bitmap index + * file. + * + * @param c + * the commit corresponds to the bitmap. + * @param bitmap + * the bitmap to be written. + * @param flags + * the flags of the commit. + */ + public void processBitmapForWrite(BitmapCommit c, Bitmap bitmap, + int flags) { + EWAHCompressedBitmap compressed = bitmap.retrieveCompressed(); + compressed.trim(); + StoredBitmap newest = new StoredBitmap(c, compressed, null, flags); + + bitmapsToWriteXorBuffer.add(newest); + if (bitmapsToWriteXorBuffer.size() > MAX_XOR_OFFSET_SEARCH) { + bitmapsToWrite.add( + generateStoredEntry(bitmapsToWriteXorBuffer.pollFirst())); + } + + if (c.isAddToIndex()) { + // The Bitmap map in the base class is used to make revwalks + // efficient, so only add bitmaps that keep it efficient without + // bloating memory. + addBitmap(c, bitmap, flags); + } + } + + private StoredEntry generateStoredEntry(StoredBitmap bitmapToWrite) { + int bestXorOffset = 0; + EWAHCompressedBitmap bestBitmap = bitmapToWrite.getBitmap(); + + int offset = 1; + for (StoredBitmap curr : bitmapsToWriteXorBuffer) { + EWAHCompressedBitmap bitmap = curr.getBitmap() + .xor(bitmapToWrite.getBitmap()); + if (bitmap.sizeInBytes() < bestBitmap.sizeInBytes()) { + bestBitmap = bitmap; + bestXorOffset = offset; + } + offset++; + } + + PositionEntry entry = positionEntries.get(bitmapToWrite); + if (entry == null) { + throw new IllegalStateException(); + } + bestBitmap.trim(); + StoredEntry result = new StoredEntry(entry.namePosition, bestBitmap, + bestXorOffset, bitmapToWrite.getFlags()); + + return result; + } + /** * Stores the bitmap for the objectId. * @@ -150,7 +212,6 @@ public void addBitmap( bitmap.trim(); StoredBitmap result = new StoredBitmap(objectId, bitmap, null, flags); getBitmaps().add(result); - byAddOrder.add(result); } /** {@inheritDoc} */ @@ -236,15 +297,18 @@ public int getOptions() { /** {@inheritDoc} */ @Override public int getBitmapCount() { - return getBitmaps().size(); + return bitmapsToWriteXorBuffer.size() + bitmapsToWrite.size(); } /** * Remove all the bitmaps entries added. + * + * @param size + * the expected number of bitmap entries to be written. */ - public void clearBitmaps() { - byAddOrder.clear(); + public void resetBitmaps(int size) { getBitmaps().clear(); + bitmapsToWrite = new ArrayList<>(size); } /** {@inheritDoc} */ @@ -254,64 +318,18 @@ public int getObjectCount() { } /** - * Get an iterator over the xor compressed entries. + * Get list of xor compressed entries that need to be written. * - * @return an iterator over the xor compressed entries. + * @return a list of the xor compressed entries. */ - public Iterable getCompressedBitmaps() { - // Add order is from oldest to newest. The reverse add order is the - // output order. - return () -> new Iterator() { + public List getCompressedBitmaps() { + while (!bitmapsToWriteXorBuffer.isEmpty()) { + bitmapsToWrite.add( + generateStoredEntry(bitmapsToWriteXorBuffer.pollFirst())); + } - private int index = byAddOrder.size() - 1; - - @Override - public boolean hasNext() { - return index >= 0; - } - - @Override - public StoredEntry next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - StoredBitmap item = byAddOrder.get(index); - int bestXorOffset = 0; - EWAHCompressedBitmap bestBitmap = item.getBitmap(); - - // Attempt to compress the bitmap with an XOR of the - // previously written entries. - for (int i = 1; i <= MAX_XOR_OFFSET_SEARCH; i++) { - int curr = i + index; - if (curr >= byAddOrder.size()) { - break; - } - - StoredBitmap other = byAddOrder.get(curr); - EWAHCompressedBitmap bitmap = other.getBitmap() - .xor(item.getBitmap()); - - if (bitmap.sizeInBytes() < bestBitmap.sizeInBytes()) { - bestBitmap = bitmap; - bestXorOffset = i; - } - } - index--; - - PositionEntry entry = positionEntries.get(item); - if (entry == null) { - throw new IllegalStateException(); - } - bestBitmap.trim(); - return new StoredEntry(entry.namePosition, bestBitmap, - bestXorOffset, item.getFlags()); - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - }; + Collections.reverse(bitmapsToWrite); + return bitmapsToWrite; } /** Data object for the on disk representation of a bitmap entry. */ diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java index 0c965fe18..824c62ad9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java @@ -2331,8 +2331,14 @@ public boolean prepareBitmapIndex(ProgressMonitor pm) throws IOException { throw new IllegalStateException(MessageFormat.format( JGitText.get().bitmapMissingObject, cmit.name(), last.name())); - last = cmit; - writeBitmaps.addBitmap(cmit, bitmap.build(), cmit.getFlags()); + last = BitmapCommit.copyFrom(cmit).build(); + writeBitmaps.processBitmapForWrite(cmit, bitmap.build(), + cmit.getFlags()); + + // The bitmap walker should stop when the walk hits the previous + // commit, which saves time. + walker.setPrevCommit(last); + walker.setPrevBitmap(bitmap); pm.update(1); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java index 0377bccd8..fefe565ad 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java @@ -58,6 +58,8 @@ class PackWriterBitmapPreparer { private static final int DAY_IN_SECONDS = 24 * 60 * 60; + private static final int DISTANCE_THRESHOLD = 0; + private static final Comparator ORDER_BY_REVERSE_TIMESTAMP = ( RevCommit a, RevCommit b) -> Integer .signum(b.getCommitTime() - a.getCommitTime()); @@ -244,6 +246,7 @@ Collection selectCommits(int expectedCommitCount, // This commit is selected. // Calculate where to look for the next one. int flags = nextFlg; + int currDist = distanceFromTip; nextIn = nextSpan(distanceFromTip); nextFlg = nextIn == distantCommitSpan ? PackBitmapIndex.FLAG_REUSE @@ -279,8 +282,17 @@ Collection selectCommits(int expectedCommitCount, longestAncestorChain = new ArrayList<>(); chains.add(longestAncestorChain); } - longestAncestorChain.add(new BitmapCommit(c, - !longestAncestorChain.isEmpty(), flags)); + + // The commit bc should reuse bitmap walker from its close + // ancestor. And the bitmap of bc should only be added to + // PackBitmapIndexBuilder when it's an old enough + // commit,i.e. distance from tip should be greater than + // DISTANCE_THRESHOLD, to save memory. + BitmapCommit bc = BitmapCommit.newBuilder(c).setFlags(flags) + .setAddToIndex(currDist >= DISTANCE_THRESHOLD) + .setReuseWalker(!longestAncestorChain.isEmpty()) + .build(); + longestAncestorChain.add(bc); writeBitmaps.addBitmap(c, bitmap, 0); } @@ -288,12 +300,12 @@ Collection selectCommits(int expectedCommitCount, selections.addAll(chain); } } - writeBitmaps.clearBitmaps(); // Remove the temporary commit bitmaps. // Add the remaining peeledWant for (AnyObjectId remainingWant : selectionHelper.newWants) { selections.add(new BitmapCommit(remainingWant, false, 0)); } + writeBitmaps.resetBitmaps(selections.size()); // Remove the temporary commit bitmaps. pm.endTask(); return selections; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java index 023962e25..aaecd2395 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java @@ -16,6 +16,7 @@ import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.internal.revwalk.AddToBitmapFilter; +import org.eclipse.jgit.internal.revwalk.AddToBitmapWithCacheFilter; import org.eclipse.jgit.internal.revwalk.AddUnseenToBitmapFilter; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.BitmapIndex; @@ -41,6 +42,11 @@ public final class BitmapWalker { private long countOfBitmapIndexMisses; + // Cached bitmap and commit to save walk time. + private AnyObjectId prevCommit; + + private Bitmap prevBitmap; + /** * Create a BitmapWalker. * @@ -55,6 +61,28 @@ public BitmapWalker( this.pm = (pm == null) ? NullProgressMonitor.INSTANCE : pm; } + /** + * Set the cached commit for the walker. + * + * @param prevCommit + * the cached commit. + * @since 5.7 + */ + public void setPrevCommit(AnyObjectId prevCommit) { + this.prevCommit = prevCommit; + } + + /** + * Set the bitmap associated with the cached commit for the walker. + * + * @param prevBitmap + * the bitmap associated with the cached commit. + * @since 5.7 + */ + public void setPrevBitmap(Bitmap prevBitmap) { + this.prevBitmap = prevBitmap; + } + /** * Return the number of objects that had to be walked because they were not covered by a * bitmap. @@ -169,7 +197,10 @@ private BitmapBuilder findObjectsWalk(Iterable start, Bitmap } if (marked) { - if (seen == null) { + if (prevCommit != null) { + walker.setRevFilter(new AddToBitmapWithCacheFilter(prevCommit, + prevBitmap, bitmapResult)); + } else if (seen == null) { walker.setRevFilter(new AddToBitmapFilter(bitmapResult)); } else { walker.setRevFilter( From e250482c7af5e1750b241683a1afde35ed020fee Mon Sep 17 00:00:00 2001 From: Yunjie Li Date: Thu, 23 Apr 2020 15:11:14 -0700 Subject: [PATCH 6/8] PackBitmapIndex: Remove convertedBitmaps in the Remapper The convertedBitmaps serves for time-optimization purpose. But it's actually not saving time much but using lots of memory. So remove the field here to save memory. Currently the remapper class is only used in the construction of the bitmap index file. And during the preparation of the file, we're only getting bitmaps from the remapper when finding objects accessible from a commit, so bitmap associated with each commit will only be fetched once and thus the convertedBitmaps would hardly be read, which means that it's not saving time. Change-Id: Ic942a8e485135fb177ec21d09282d08ca6646fdb Signed-off-by: Yunjie Li --- .../internal/storage/file/PackBitmapIndexRemapper.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexRemapper.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexRemapper.java index 273eeef7e..4b2528451 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexRemapper.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexRemapper.java @@ -18,7 +18,6 @@ import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.BitmapIndex; import org.eclipse.jgit.lib.ObjectId; -import org.eclipse.jgit.lib.ObjectIdOwnerMap; import com.googlecode.javaewah.EWAHCompressedBitmap; import com.googlecode.javaewah.IntIterator; @@ -34,7 +33,6 @@ public class PackBitmapIndexRemapper extends PackBitmapIndex private final BasePackBitmapIndex oldPackIndex; final PackBitmapIndex newPackIndex; - private final ObjectIdOwnerMap convertedBitmaps; private final BitSet inflated; private final int[] prevToNewMapping; @@ -65,7 +63,6 @@ public static PackBitmapIndexRemapper newPackBitmapIndex( private PackBitmapIndexRemapper(PackBitmapIndex newPackIndex) { this.oldPackIndex = null; this.newPackIndex = newPackIndex; - this.convertedBitmaps = null; this.inflated = null; this.prevToNewMapping = null; } @@ -74,7 +71,6 @@ private PackBitmapIndexRemapper( BasePackBitmapIndex oldPackIndex, PackBitmapIndex newPackIndex) { this.oldPackIndex = oldPackIndex; this.newPackIndex = newPackIndex; - convertedBitmaps = new ObjectIdOwnerMap<>(); inflated = new BitSet(newPackIndex.getObjectCount()); prevToNewMapping = new int[oldPackIndex.getObjectCount()]; @@ -152,10 +148,6 @@ public EWAHCompressedBitmap getBitmap(AnyObjectId objectId) { if (bitmap != null || oldPackIndex == null) return bitmap; - StoredBitmap stored = convertedBitmaps.get(objectId); - if (stored != null) - return stored.getBitmap(); - StoredBitmap oldBitmap = oldPackIndex.getBitmaps().get(objectId); if (oldBitmap == null) return null; @@ -168,8 +160,6 @@ public EWAHCompressedBitmap getBitmap(AnyObjectId objectId) { inflated.set(prevToNewMapping[i.next()]); bitmap = inflated.toEWAHCompressedBitmap(); bitmap.trim(); - convertedBitmaps.add( - new StoredBitmap(objectId, bitmap, null, oldBitmap.getFlags())); return bitmap; } From 3aee92478c2cbc67cd921533437b824e43ed9798 Mon Sep 17 00:00:00 2001 From: Yunjie Li Date: Thu, 23 Apr 2020 15:12:15 -0700 Subject: [PATCH 7/8] PackBitmapIndex: Not buffer inflated bitmap in BasePackBitmapIndex Currently we're buffering the inflated bitmap entry in BasePackBitmapIndex to optimize running time. However, this will use lots of memory during the construction of the pack bitmap index file which may cause failure of garbage collection. The running time didn't increase significantly, if there's any increase, after removing the buffering here. The report about usage of time/memory will come in the next commit. Change-Id: I874503ecc85714acab7ca62a6a7968c2dc0b56b3 Signed-off-by: Yunjie Li --- .../eclipse/jgit/internal/storage/file/BasePackBitmapIndex.java | 1 - 1 file changed, 1 deletion(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/BasePackBitmapIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/BasePackBitmapIndex.java index c9bb167f0..74b46bcee 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/BasePackBitmapIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/BasePackBitmapIndex.java @@ -72,7 +72,6 @@ EWAHCompressedBitmap getBitmap() { if (r instanceof EWAHCompressedBitmap) { out = out.xor((EWAHCompressedBitmap) r); out.trim(); - bitmapContainer = out; return out; } xb = (XorCompressedBitmap) r; From 913234e2ec299a30c8795d84e5ef52a683416cde Mon Sep 17 00:00:00 2001 From: Yunjie Li Date: Wed, 29 Apr 2020 15:27:47 -0700 Subject: [PATCH 8/8] PackBitmapIndex: Set distance threshold Setting the distance threshold to 2000 in PackWriterBitmapPreparer to reduce memory usage in garbage collection. When the threshold is 0, GC for the msm repository would use about 37 GB memory to complete. After setting it to 2000, GC can finish in 75 min with about 10 GB memory. Change-Id: I39783eeecbae58261c883735499e61ee1cac75fe Signed-off-by: Yunjie Li --- .../jgit/internal/storage/pack/PackWriterBitmapPreparer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java index fefe565ad..f1ede2acf 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java @@ -58,7 +58,7 @@ class PackWriterBitmapPreparer { private static final int DAY_IN_SECONDS = 24 * 60 * 60; - private static final int DISTANCE_THRESHOLD = 0; + private static final int DISTANCE_THRESHOLD = 2000; private static final Comparator ORDER_BY_REVERSE_TIMESTAMP = ( RevCommit a, RevCommit b) -> Integer