Avoid repacking unreachable garbage in DfsGarbageCollector

If a repository has significant amounts of unreachable garbage the
final phase to coalesce it can take longer than any other part of the
garbage collection phase. Provide a setting for applications to tweak
the threshold where coalescing ends and files just remain on disk.

Change-Id: I5f11a998a7185c75ece3271d8bc6181bb83f54c1
This commit is contained in:
Shawn Pearce 2013-03-08 11:02:04 -08:00
parent 3ad454497c
commit bb002c619b
1 changed files with 52 additions and 5 deletions

View File

@ -46,12 +46,11 @@
import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.GC;
import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
import static org.eclipse.jgit.storage.pack.PackExt.BITMAP_INDEX;
import static org.eclipse.jgit.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.storage.pack.PackExt.INDEX;
import static org.eclipse.jgit.storage.pack.PackExt.PACK;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
@ -69,6 +68,7 @@
import org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource;
import org.eclipse.jgit.storage.file.PackIndex;
import org.eclipse.jgit.storage.pack.PackConfig;
import org.eclipse.jgit.storage.pack.PackExt;
import org.eclipse.jgit.storage.pack.PackWriter;
import org.eclipse.jgit.util.io.CountingOutputStream;
@ -90,6 +90,8 @@ public class DfsGarbageCollector {
private PackConfig packConfig;
private long coalesceGarbageLimit = 50 << 20;
private Map<String, Ref> refsBefore;
private List<DfsPackFile> packsBefore;
@ -139,6 +141,38 @@ public DfsGarbageCollector setPackConfig(PackConfig newConfig) {
return this;
}
/** @return garbage packs smaller than this size will be repacked. */
public long getCoalesceGarbageLimit() {
return coalesceGarbageLimit;
}
/**
* Set the byte size limit for garbage packs to be repacked.
* <p>
* Any UNREACHABLE_GARBAGE pack smaller than this limit will be repacked at
* the end of the run. This allows the garbage collector to coalesce
* unreachable objects into a single file.
* <p>
* If an UNREACHABLE_GARBAGE pack is already larger than this limit it will
* be left alone by the garbage collector. This avoids unnecessary disk IO
* reading and copying the objects.
* <p>
* If limit is set to 0 the UNREACHABLE_GARBAGE coalesce is disabled.<br>
* If limit is set to {@link Long#MAX_VALUE}, everything is coalesced.
* <p>
* Keeping unreachable garbage prevents race conditions with repository
* changes that may suddenly need an object whose only copy was stored in
* the UNREACHABLE_GARBAGE pack.
*
* @param limit
* size in bytes.
* @return {@code this}
*/
public DfsGarbageCollector setCoalesceGarbageLimit(long limit) {
coalesceGarbageLimit = limit;
return this;
}
/**
* Create a single new pack file containing all of the live objects.
* <p>
@ -167,7 +201,7 @@ public boolean pack(ProgressMonitor pm) throws IOException {
objdb.clearCache();
refsBefore = repo.getAllRefs();
packsBefore = Arrays.asList(objdb.getPacks());
packsBefore = packsToRebuild();
if (packsBefore.isEmpty())
return true;
@ -203,6 +237,19 @@ public boolean pack(ProgressMonitor pm) throws IOException {
}
}
private List<DfsPackFile> packsToRebuild() throws IOException {
DfsPackFile[] packs = objdb.getPacks();
List<DfsPackFile> out = new ArrayList<DfsPackFile>(packs.length);
for (DfsPackFile p : packs) {
DfsPackDescription d = p.getPackDescription();
if (d.getPackSource() != UNREACHABLE_GARBAGE)
out.add(p);
else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit)
out.add(p);
}
return out;
}
/** @return all of the source packs that fed into this compaction. */
public List<DfsPackDescription> getSourcePacks() {
return toPrune();
@ -264,9 +311,9 @@ private void packGarbage(ProgressMonitor pm) throws IOException {
PackWriter pw = newPackWriter();
try {
RevWalk pool = new RevWalk(ctx);
pm.beginTask("Finding garbage", (int) getObjectsBefore());
for (DfsPackFile oldPack : packsBefore) {
PackIndex oldIdx = oldPack.getPackIndex(ctx);
pm.beginTask("Finding garbage", (int) oldIdx.getObjectCount());
for (PackIndex.MutableEntry ent : oldIdx) {
pm.update(1);
ObjectId id = ent.toObjectId();
@ -276,8 +323,8 @@ private void packGarbage(ProgressMonitor pm) throws IOException {
int type = oldPack.getObjectType(ctx, ent.getOffset());
pw.addObject(pool.lookupAny(id, type));
}
pm.endTask();
}
pm.endTask();
if (0 < pw.getObjectCount())
writePack(UNREACHABLE_GARBAGE, pw, pm);
} finally {