Limit the range of commits for which bitmaps are created.

A bitmap index contains bitmaps for a set of commits in a pack file.
Creating a bitmap for every commit is too expensive, so heuristics
select the most "important" commits. The most recent commits are the
most valuable. To clone a repository only those for the branch tips are
needed. When fetching, only commits since the last fetch are needed.

The commit selection heuristics generally work, but for some
repositories the number of selected commits is prohibitively high. One
example is the MSM 3.10 Linux kernel. With over 1 million commits on
2820 branches, the current heuristics resulted in +36k selected commits.
Each uncompressed bitmap for that repository is ~413k, making it
difficult to complete a GC operation in available memory.

The benefit of creating bitmaps over the entire history of a repository
like the MSM 3.10 Linux kernel isn't clear. For that repository, most
history for the last year appears to be in the last 100k commits.
Limiting bitmap commit selection to just those commits reduces the count
of selected commits from ~36k to ~10.5k. Dropping bitmaps for older
commits does not affect object counting times for clones or for fetches
on clients that are reasonably up-to-date.

This patch defines a new "bitmapCommitRange" PackConfig parameter to
limit the commit selection process when building bitmaps. The range
starts with the most recent commit and walks backwards. A range of 10k
considers only the 10000 most recent commits. A range of zero creates
bitmaps only for branch tips. A range of -1 (the default) does not limit
the range--all commits in the pack are used in the commit selection
process.

Change-Id: Ied92c70cfa0778facc670e0f14a0980bed5e3bfb
Signed-off-by: Terry Parker <tparker@google.com>
This commit is contained in:
Terry Parker 2015-10-08 15:06:37 -07:00
parent cd1b00e3fc
commit 2524157d3d
9 changed files with 159 additions and 11 deletions

View File

@ -85,6 +85,7 @@ public void testPackRepoWithNoRefs(boolean aggressive) throws Exception {
assertEquals(4, stats.numberOfLooseObjects);
assertEquals(0, stats.numberOfPackedObjects);
assertEquals(0, stats.numberOfPackFiles);
assertEquals(0, stats.numberOfBitmaps);
}
@Theory
@ -102,6 +103,7 @@ public void testPack2Commits(boolean aggressive) throws Exception {
assertEquals(0, stats.numberOfLooseObjects);
assertEquals(8, stats.numberOfPackedObjects);
assertEquals(1, stats.numberOfPackFiles);
assertEquals(2, stats.numberOfBitmaps);
}
@Theory
@ -118,6 +120,7 @@ public void testPackAllObjectsInOnePack(boolean aggressive)
assertEquals(0, stats.numberOfLooseObjects);
assertEquals(4, stats.numberOfPackedObjects);
assertEquals(1, stats.numberOfPackFiles);
assertEquals(1, stats.numberOfBitmaps);
// Do the gc again and check that it hasn't changed anything
gc.gc();
@ -125,6 +128,7 @@ public void testPackAllObjectsInOnePack(boolean aggressive)
assertEquals(0, stats.numberOfLooseObjects);
assertEquals(4, stats.numberOfPackedObjects);
assertEquals(1, stats.numberOfPackFiles);
assertEquals(1, stats.numberOfBitmaps);
}
@Theory
@ -143,6 +147,7 @@ public void testPackCommitsAndLooseOne(boolean aggressive) throws Exception {
assertEquals(0, stats.numberOfLooseObjects);
assertEquals(8, stats.numberOfPackedObjects);
assertEquals(2, stats.numberOfPackFiles);
assertEquals(1, stats.numberOfBitmaps);
}
@Theory
@ -220,6 +225,74 @@ public void testDonePruneTooYoungPacks() throws Exception {
}
@Test
public void testCommitRangeForBitmaps() throws Exception {
BranchBuilder bb1 = tr.branch("refs/heads/master");
bb1.commit().message("A1").add("A1", "A1").create();
bb1.commit().message("B1").add("B1", "B1").create();
bb1.commit().message("C1").add("C1", "C1").create();
BranchBuilder bb2 = tr.branch("refs/heads/working");
bb2.commit().message("A2").add("A2", "A2").create();
bb2.commit().message("B2").add("B2", "B2").create();
bb2.commit().message("C2").add("C2", "C2").create();
// Consider all commits. Since history isn't deep all commits are
// selected.
configureGcRange(gc, -1);
gc.gc();
assertEquals(6, gc.getStatistics().numberOfBitmaps);
// Range==0 means don't examine commit history, create bitmaps only for
// branch tips, C1 & C2.
configureGcRange(gc, 0);
gc.gc();
assertEquals(2, gc.getStatistics().numberOfBitmaps);
// Consider only the most recent commit (C2, which is also a branch
// tip).
configureGcRange(gc, 1);
gc.gc();
assertEquals(2, gc.getStatistics().numberOfBitmaps);
// Consider only the two most recent commits, C2 & B2. C1 gets included
// too since it is a branch tip.
configureGcRange(gc, 2);
gc.gc();
assertEquals(3, gc.getStatistics().numberOfBitmaps);
// Consider C2 & B2 & A2. C1 gets included too since it is a branch tip.
configureGcRange(gc, 3);
gc.gc();
assertEquals(4, gc.getStatistics().numberOfBitmaps);
// Consider C2 & B2 & A2 & C1.
configureGcRange(gc, 4);
gc.gc();
assertEquals(4, gc.getStatistics().numberOfBitmaps);
// Consider C2 & B2 & A2 & C1 & B1.
configureGcRange(gc, 5);
gc.gc();
assertEquals(5, gc.getStatistics().numberOfBitmaps);
// Consider all six commits.
configureGcRange(gc, 6);
gc.gc();
assertEquals(6, gc.getStatistics().numberOfBitmaps);
// Input is out of range but should be capped to the total number of
// commits.
configureGcRange(gc, 1000);
gc.gc();
assertEquals(6, gc.getStatistics().numberOfBitmaps);
}
private void configureGcRange(GC myGc, int range) {
PackConfig pconfig = new PackConfig(repo);
pconfig.setBitmapCommitRange(range);
myGc.setPackConfig(pconfig);
}
private void configureGc(GC myGc, boolean aggressive) {
PackConfig pconfig = new PackConfig(repo);
if (aggressive) {

View File

@ -877,6 +877,11 @@ public class RepoStatistics {
*/
public long numberOfPackedRefs;
/**
* The number of bitmaps in the bitmap indices.
*/
public long numberOfBitmaps;
public String toString() {
final StringBuilder b = new StringBuilder();
b.append("numberOfPackedObjects=").append(numberOfPackedObjects); //$NON-NLS-1$
@ -886,15 +891,15 @@ public String toString() {
b.append(", numberOfPackedRefs=").append(numberOfPackedRefs); //$NON-NLS-1$
b.append(", sizeOfLooseObjects=").append(sizeOfLooseObjects); //$NON-NLS-1$
b.append(", sizeOfPackedObjects=").append(sizeOfPackedObjects); //$NON-NLS-1$
b.append(", numberOfBitmaps=").append(numberOfBitmaps); //$NON-NLS-1$
return b.toString();
}
}
/**
* Returns the number of objects stored in pack files. If an object is
* contained in multiple pack files it is counted as often as it occurs.
* Returns information about objects and pack files for a FileRepository.
*
* @return the number of objects stored in pack files
* @return information about objects and pack files for a FileRepository
* @throws IOException
*/
public RepoStatistics getStatistics() throws IOException {
@ -904,6 +909,8 @@ public RepoStatistics getStatistics() throws IOException {
ret.numberOfPackedObjects += f.getIndex().getObjectCount();
ret.numberOfPackFiles++;
ret.sizeOfPackedObjects += f.getPackFile().length();
if (f.getBitmapIndex() != null)
ret.numberOfBitmaps += f.getBitmapIndex().getBitmapCount();
}
File objDir = repo.getObjectsDirectory();
String[] fanout = objDir.list();

View File

@ -193,4 +193,11 @@ public abstract EWAHCompressedBitmap ofObjectType(
* pack that this index was generated from.
*/
public abstract int getObjectCount();
/**
* Returns the number of bitmaps in this bitmap index.
*
* @return the number of bitmaps in this bitmap index.
*/
public abstract int getBitmapCount();
}

View File

@ -253,7 +253,7 @@ public int getOptions() {
return PackBitmapIndexV1.OPT_FULL;
}
/** @return the number of bitmaps. */
@Override
public int getBitmapCount() {
return getBitmaps().size();
}

View File

@ -209,4 +209,10 @@ public int getFlags() {
return flags;
}
}
@Override
public int getBitmapCount() {
// The count is only useful for the end index, not the remapper.
return 0;
}
}

View File

@ -212,6 +212,11 @@ public EWAHCompressedBitmap ofObjectType(
throw new IllegalArgumentException();
}
@Override
public int getBitmapCount() {
return bitmaps.size();
}
@Override
public boolean equals(Object o) {
// TODO(cranger): compare the pack checksum?

View File

@ -2017,8 +2017,11 @@ public boolean prepareBitmapIndex(ProgressMonitor pm) throws IOException {
PackWriterBitmapPreparer bitmapPreparer = new PackWriterBitmapPreparer(
reader, writeBitmaps, pm, stats.interestingObjects);
int commitRange = config.getBitmapCommitRange();
if (commitRange < 0)
commitRange = numCommits; // select from all commits
Collection<PackWriterBitmapPreparer.BitmapCommit> selectedCommits =
bitmapPreparer.doCommitSelection(numCommits);
bitmapPreparer.doCommitSelection(commitRange);
beginPhase(PackingPhase.BUILDING_BITMAPS, pm, selectedCommits.size());

View File

@ -109,13 +109,13 @@ public int compare(BitmapBuilder a, BitmapBuilder b) {
this.bitmapIndex = new BitmapIndexImpl(bitmapRemapper);
}
Collection<BitmapCommit> doCommitSelection(int expectedNumCommits)
Collection<BitmapCommit> doCommitSelection(int commitRange)
throws MissingObjectException, IncorrectObjectTypeException,
IOException {
pm.beginTask(JGitText.get().selectingCommits, ProgressMonitor.UNKNOWN);
RevWalk rw = new RevWalk(reader);
rw.setRetainBody(false);
WalkResult result = findPaths(rw, expectedNumCommits);
WalkResult result = findPaths(rw, commitRange);
pm.endTask();
int totCommits = result.commitsByOldest.length - result.commitStartPos;
@ -215,7 +215,7 @@ Collection<BitmapCommit> doCommitSelection(int expectedNumCommits)
return selections;
}
private WalkResult findPaths(RevWalk rw, int expectedNumCommits)
private WalkResult findPaths(RevWalk rw, int commitRange)
throws MissingObjectException, IOException {
BitmapBuilder reuseBitmap = commitBitmapIndex.newBitmapBuilder();
List<BitmapCommit> reuse = new ArrayList<BitmapCommit>();
@ -256,11 +256,11 @@ private WalkResult findPaths(RevWalk rw, int expectedNumCommits)
}
// Update the paths from the wants and create a list of commits in
// reverse iteration order.
RevCommit[] commits = new RevCommit[expectedNumCommits];
// reverse iteration order for the desired commit range.
RevCommit[] commits = new RevCommit[commitRange];
int pos = commits.length;
RevCommit rc;
while ((rc = rw.next()) != null) {
while ((rc = rw.next()) != null && pos > 0) {
commits[--pos] = rc;
for (BitmapBuilder path : paths) {
if (path.contains(rc)) {

View File

@ -138,6 +138,14 @@ public class PackConfig {
*/
public static final boolean DEFAULT_BUILD_BITMAPS = true;
/**
* Default range of commits for which to create bitmaps: {@value}
*
* @see #setBitmapCommitRange(int)
* @since 4.2
*/
public static final int DEFAULT_BITMAP_COMMIT_RANGE = -1;
private int compressionLevel = Deflater.DEFAULT_COMPRESSION;
@ -169,6 +177,8 @@ public class PackConfig {
private boolean buildBitmaps = DEFAULT_BUILD_BITMAPS;
private int bitmapCommitRange = DEFAULT_BITMAP_COMMIT_RANGE;
private boolean cutDeltaChains;
/** Create a default configuration. */
@ -222,6 +232,7 @@ public PackConfig(PackConfig cfg) {
this.executor = cfg.executor;
this.indexVersion = cfg.indexVersion;
this.buildBitmaps = cfg.buildBitmaps;
this.bitmapCommitRange = cfg.bitmapCommitRange;
this.cutDeltaChains = cfg.cutDeltaChains;
}
@ -683,6 +694,39 @@ public void setBuildBitmaps(boolean buildBitmaps) {
this.buildBitmaps = buildBitmaps;
}
/**
* Get the range of commits for which to build bitmaps. The range starts
* from the most recent commit.
*
* A value of 0 creates bitmaps for only branch tips. A value of -1 creates
* bitmaps spaced through the entire history of commits.
*
* Default setting: {@value #DEFAULT_BITMAP_COMMIT_RANGE}
*
* @return the range of commits for which to create bitmaps, starting with
* the most recent commit
* @see PackIndexWriter
* @since 4.2
*/
public int getBitmapCommitRange() {
return bitmapCommitRange;
}
/**
* Set the range of commits for which to build bitmaps.
*
* Default setting: {@value #DEFAULT_BITMAP_COMMIT_RANGE}
*
* @param range
* the range of commits for which to create bitmaps, starting
* with the most recent commit
* @see PackIndexWriter
* @since 4.2
*/
public void setBitmapCommitRange(final int range) {
bitmapCommitRange = range;
}
/**
* Update properties by setting fields from the configuration.
*
@ -718,6 +762,8 @@ public void fromConfig(final Config rc) {
setCutDeltaChains(rc.getBoolean(
"pack", "cutdeltachains", getCutDeltaChains())); //$NON-NLS-1$ //$NON-NLS-2$
setBuildBitmaps(rc.getBoolean("pack", "buildbitmaps", isBuildBitmaps())); //$NON-NLS-1$ //$NON-NLS-2$
setBitmapCommitRange(
rc.getInt("pack", "bitmapcommitrange", getBitmapCommitRange())); //$NON-NLS-1$ //$NON-NLS-2$
}
public String toString() {
@ -735,6 +781,7 @@ public String toString() {
b.append(", reuseObjects=").append(isReuseObjects()); //$NON-NLS-1$
b.append(", deltaCompress=").append(isDeltaCompress()); //$NON-NLS-1$
b.append(", buildBitmaps=").append(isBuildBitmaps()); //$NON-NLS-1$
b.append(", bitmapCommitRange=").append(getBitmapCommitRange()); //$NON-NLS-1$
return b.toString();
}
}