Prune UNREACHABLE_GARBAGE packs when they expire
DfsGarbageCollector will now enforce a maximum time to live (TTL) for UNREACHABLE_GARBAGE packs. The default TTL is 1 day, which should be enough time to avoid races with other processes that are inserting data into the repository. Change-Id: Id719e6e2a03cfc9a0c0aef8ed71d261dda14bd0c Signed-off-by: Mike Williams <miwilliams@google.com>
This commit is contained in:
parent
a1ca13e09c
commit
fd527a2cd7
|
@ -0,0 +1,239 @@
|
|||
package org.eclipse.jgit.internal.storage.dfs;
|
||||
|
||||
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
|
||||
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT;
|
||||
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
|
||||
import org.eclipse.jgit.junit.TestRepository;
|
||||
import org.eclipse.jgit.lib.AnyObjectId;
|
||||
import org.eclipse.jgit.lib.Ref;
|
||||
import org.eclipse.jgit.lib.Repository;
|
||||
import org.eclipse.jgit.revwalk.RevCommit;
|
||||
import org.eclipse.jgit.revwalk.RevWalk;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class DfsGarbageCollectorTest {
|
||||
private TestRepository<InMemoryRepository> git;
|
||||
private InMemoryRepository repo;
|
||||
private DfsObjDatabase odb;
|
||||
|
||||
@Before
|
||||
public void setUp() throws IOException {
|
||||
DfsRepositoryDescription desc = new DfsRepositoryDescription("test");
|
||||
git = new TestRepository<>(new InMemoryRepository(desc));
|
||||
repo = git.getRepository();
|
||||
odb = repo.getObjectDatabase();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCollectionWithNoGarbage() throws Exception {
|
||||
RevCommit commit0 = commit().message("0").create();
|
||||
RevCommit commit1 = commit().message("1").parent(commit0).create();
|
||||
git.update("master", commit1);
|
||||
|
||||
assertTrue("commit0 reachable", isReachable(repo, commit0));
|
||||
assertTrue("commit1 reachable", isReachable(repo, commit1));
|
||||
|
||||
// Packs start out as INSERT.
|
||||
assertEquals(2, odb.getPacks().length);
|
||||
for (DfsPackFile pack : odb.getPacks()) {
|
||||
assertEquals(INSERT, pack.getPackDescription().getPackSource());
|
||||
}
|
||||
|
||||
gcNoTtl();
|
||||
|
||||
// Single GC pack present with all objects.
|
||||
assertEquals(1, odb.getPacks().length);
|
||||
DfsPackFile pack = odb.getPacks()[0];
|
||||
assertEquals(GC, pack.getPackDescription().getPackSource());
|
||||
assertTrue("commit0 in pack", isObjectInPack(commit0, pack));
|
||||
assertTrue("commit1 in pack", isObjectInPack(commit1, pack));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCollectionWithGarbage() throws Exception {
|
||||
RevCommit commit0 = commit().message("0").create();
|
||||
RevCommit commit1 = commit().message("1").parent(commit0).create();
|
||||
git.update("master", commit0);
|
||||
|
||||
assertTrue("commit0 reachable", isReachable(repo, commit0));
|
||||
assertFalse("commit1 garbage", isReachable(repo, commit1));
|
||||
gcNoTtl();
|
||||
|
||||
assertEquals(2, odb.getPacks().length);
|
||||
DfsPackFile gc = null;
|
||||
DfsPackFile garbage = null;
|
||||
for (DfsPackFile pack : odb.getPacks()) {
|
||||
DfsPackDescription d = pack.getPackDescription();
|
||||
if (d.getPackSource() == GC) {
|
||||
gc = pack;
|
||||
} else if (d.getPackSource() == UNREACHABLE_GARBAGE) {
|
||||
garbage = pack;
|
||||
} else {
|
||||
fail("unexpected " + d.getPackSource());
|
||||
}
|
||||
}
|
||||
|
||||
assertNotNull("created GC pack", gc);
|
||||
assertTrue(isObjectInPack(commit0, gc));
|
||||
|
||||
assertNotNull("created UNREACHABLE_GARBAGE pack", garbage);
|
||||
assertTrue(isObjectInPack(commit1, garbage));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCollectionWithGarbageAndGarbagePacksPurged()
|
||||
throws Exception {
|
||||
RevCommit commit0 = commit().message("0").create();
|
||||
RevCommit commit1 = commit().message("1").parent(commit0).create();
|
||||
git.update("master", commit0);
|
||||
|
||||
gcNoTtl();
|
||||
gcWithTtl();
|
||||
|
||||
// The repository has an UNREACHABLE_GARBAGE pack that could have
|
||||
// expired, but since we never purge the most recent UNREACHABLE_GARBAGE
|
||||
// pack, it must have survived the GC.
|
||||
boolean commit1Found = false;
|
||||
for (DfsPackFile pack : odb.getPacks()) {
|
||||
DfsPackDescription d = pack.getPackDescription();
|
||||
if (d.getPackSource() == GC) {
|
||||
assertTrue("has commit0", isObjectInPack(commit0, pack));
|
||||
assertFalse("no commit1", isObjectInPack(commit1, pack));
|
||||
} else if (d.getPackSource() == UNREACHABLE_GARBAGE) {
|
||||
commit1Found |= isObjectInPack(commit1, pack);
|
||||
} else {
|
||||
fail("unexpected " + d.getPackSource());
|
||||
}
|
||||
}
|
||||
assertTrue("garbage commit1 still readable", commit1Found);
|
||||
|
||||
// Find oldest UNREACHABLE_GARBAGE; it will be pruned by next GC.
|
||||
DfsPackDescription oldestGarbagePack = null;
|
||||
for (DfsPackFile pack : odb.getPacks()) {
|
||||
DfsPackDescription d = pack.getPackDescription();
|
||||
if (d.getPackSource() == UNREACHABLE_GARBAGE) {
|
||||
oldestGarbagePack = oldestPack(oldestGarbagePack, d);
|
||||
}
|
||||
}
|
||||
assertNotNull("has UNREACHABLE_GARBAGE", oldestGarbagePack);
|
||||
|
||||
gcWithTtl();
|
||||
assertTrue("has packs", odb.getPacks().length > 0);
|
||||
for (DfsPackFile pack : odb.getPacks()) {
|
||||
assertNotEquals(oldestGarbagePack, pack.getPackDescription());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCollectionWithGarbageCoalescence() throws Exception {
|
||||
RevCommit commit0 = commit().message("0").create();
|
||||
RevCommit commit1 = commit().message("1").parent(commit0).create();
|
||||
git.update("master", commit0);
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
commit1 = commit().message("g" + i).parent(commit1).create();
|
||||
|
||||
// Make sure we don't have more than 1 UNREACHABLE_GARBAGE pack
|
||||
// because they're coalesced.
|
||||
gcNoTtl();
|
||||
assertEquals(1, countPacks(UNREACHABLE_GARBAGE));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCollectionWithGarbageNoCoalescence() throws Exception {
|
||||
RevCommit commit0 = commit().message("0").create();
|
||||
RevCommit commit1 = commit().message("1").parent(commit0).create();
|
||||
git.update("master", commit0);
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
commit1 = commit().message("g" + i).parent(commit1).create();
|
||||
|
||||
DfsGarbageCollector gc = new DfsGarbageCollector(repo);
|
||||
gc.setCoalesceGarbageLimit(0);
|
||||
gc.setGarbageTtl(0, TimeUnit.MILLISECONDS);
|
||||
run(gc);
|
||||
assertEquals(1 + i, countPacks(UNREACHABLE_GARBAGE));
|
||||
}
|
||||
}
|
||||
|
||||
private TestRepository<InMemoryRepository>.CommitBuilder commit() {
|
||||
return git.commit();
|
||||
}
|
||||
|
||||
private void gcNoTtl() throws IOException {
|
||||
DfsGarbageCollector gc = new DfsGarbageCollector(repo);
|
||||
gc.setGarbageTtl(0, TimeUnit.MILLISECONDS); // disable TTL
|
||||
run(gc);
|
||||
}
|
||||
|
||||
private void gcWithTtl() throws InterruptedException, IOException {
|
||||
// Wait for the system clock to move by at least 1 millisecond.
|
||||
// This allows the DfsGarbageCollector to recognize the boundary.
|
||||
long start = System.currentTimeMillis();
|
||||
do {
|
||||
Thread.sleep(10);
|
||||
} while (System.currentTimeMillis() <= start);
|
||||
|
||||
DfsGarbageCollector gc = new DfsGarbageCollector(repo);
|
||||
gc.setGarbageTtl(1, TimeUnit.MILLISECONDS);
|
||||
run(gc);
|
||||
}
|
||||
|
||||
private void run(DfsGarbageCollector gc) throws IOException {
|
||||
assertTrue("gc repacked", gc.pack(null));
|
||||
odb.clearCache();
|
||||
}
|
||||
|
||||
private static boolean isReachable(Repository repo, AnyObjectId id)
|
||||
throws IOException {
|
||||
try (RevWalk rw = new RevWalk(repo)) {
|
||||
for (Ref ref : repo.getAllRefs().values()) {
|
||||
rw.markStart(rw.parseCommit(ref.getObjectId()));
|
||||
}
|
||||
for (RevCommit next; (next = rw.next()) != null;) {
|
||||
if (AnyObjectId.equals(next, id)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean isObjectInPack(AnyObjectId id, DfsPackFile pack)
|
||||
throws IOException {
|
||||
try (DfsReader reader = new DfsReader(odb)) {
|
||||
return pack.hasObject(reader, id);
|
||||
}
|
||||
}
|
||||
|
||||
private static DfsPackDescription oldestPack(DfsPackDescription a,
|
||||
DfsPackDescription b) {
|
||||
if (a != null && a.getLastModified() < b.getLastModified()) {
|
||||
return a;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
private int countPacks(PackSource source) throws IOException {
|
||||
int cnt = 0;
|
||||
for (DfsPackFile pack : odb.getPacks()) {
|
||||
if (pack.getPackDescription().getPackSource() == source) {
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
return cnt;
|
||||
}
|
||||
}
|
|
@ -54,9 +54,11 @@
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.eclipse.jgit.internal.JGitText;
|
||||
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
|
||||
|
@ -93,10 +95,14 @@ public class DfsGarbageCollector {
|
|||
|
||||
private PackConfig packConfig;
|
||||
|
||||
// See pack(), below, for how these two variables interact.
|
||||
private long coalesceGarbageLimit = 50 << 20;
|
||||
private long garbageTtlMillis = TimeUnit.DAYS.toMillis(1);
|
||||
|
||||
private long startTimeMillis;
|
||||
private List<DfsPackFile> packsBefore;
|
||||
private List<DfsPackFile> expiredGarbagePacks;
|
||||
|
||||
private Set<ObjectId> allHeads;
|
||||
private Set<ObjectId> nonHeads;
|
||||
private Set<ObjectId> txnHeads;
|
||||
|
@ -167,6 +173,34 @@ public DfsGarbageCollector setCoalesceGarbageLimit(long limit) {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return garbage packs older than this limit (in milliseconds) will be
|
||||
* pruned as part of the garbage collection process if the value is
|
||||
* > 0, otherwise garbage packs are retained.
|
||||
*/
|
||||
public long getGarbageTtlMillis() {
|
||||
return garbageTtlMillis;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the time to live for garbage objects.
|
||||
* <p>
|
||||
* Any UNREACHABLE_GARBAGE older than this limit will be pruned at the end
|
||||
* of the run.
|
||||
* <p>
|
||||
* If timeToLiveMillis is set to 0, UNREACHABLE_GARBAGE purging is disabled.
|
||||
*
|
||||
* @param ttl
|
||||
* Time to live whatever unit is specified.
|
||||
* @param unit
|
||||
* The specified time unit.
|
||||
* @return {@code this}
|
||||
*/
|
||||
public DfsGarbageCollector setGarbageTtl(long ttl, TimeUnit unit) {
|
||||
garbageTtlMillis = unit.toMillis(ttl);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a single new pack file containing all of the live objects.
|
||||
* <p>
|
||||
|
@ -189,6 +223,12 @@ public boolean pack(ProgressMonitor pm) throws IOException {
|
|||
if (packConfig.getIndexVersion() != 2)
|
||||
throw new IllegalStateException(
|
||||
JGitText.get().supportOnlyPackIndexVersion2);
|
||||
if (garbageTtlMillis > 0) {
|
||||
// We disable coalescing because the coalescing step will keep
|
||||
// refreshing the UNREACHABLE_GARBAGE pack and we wouldn't
|
||||
// actually prune anything.
|
||||
coalesceGarbageLimit = 0;
|
||||
}
|
||||
|
||||
startTimeMillis = System.currentTimeMillis();
|
||||
ctx = (DfsReader) objdb.newReader();
|
||||
|
@ -197,9 +237,14 @@ public boolean pack(ProgressMonitor pm) throws IOException {
|
|||
objdb.clearCache();
|
||||
|
||||
Collection<Ref> refsBefore = getAllRefs();
|
||||
packsBefore = packsToRebuild();
|
||||
if (packsBefore.isEmpty())
|
||||
readPacksBefore();
|
||||
|
||||
if (packsBefore.isEmpty()) {
|
||||
if (!expiredGarbagePacks.isEmpty()) {
|
||||
objdb.commitPack(noPacks(), toPrune());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
allHeads = new HashSet<ObjectId>();
|
||||
nonHeads = new HashSet<ObjectId>();
|
||||
|
@ -254,17 +299,60 @@ private Collection<Ref> getAllRefs() throws IOException {
|
|||
return refs;
|
||||
}
|
||||
|
||||
private List<DfsPackFile> packsToRebuild() throws IOException {
|
||||
private void readPacksBefore() throws IOException {
|
||||
DfsPackFile[] packs = objdb.getPacks();
|
||||
List<DfsPackFile> out = new ArrayList<DfsPackFile>(packs.length);
|
||||
packsBefore = new ArrayList<DfsPackFile>(packs.length);
|
||||
expiredGarbagePacks = new ArrayList<DfsPackFile>(packs.length);
|
||||
|
||||
long mostRecentGC = mostRecentGC(packs);
|
||||
long now = System.currentTimeMillis();
|
||||
for (DfsPackFile p : packs) {
|
||||
DfsPackDescription d = p.getPackDescription();
|
||||
if (d.getPackSource() != UNREACHABLE_GARBAGE)
|
||||
out.add(p);
|
||||
else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit)
|
||||
out.add(p);
|
||||
if (d.getPackSource() != UNREACHABLE_GARBAGE) {
|
||||
packsBefore.add(p);
|
||||
} else if (packIsExpiredGarbage(d, mostRecentGC, now)) {
|
||||
expiredGarbagePacks.add(p);
|
||||
} else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit) {
|
||||
packsBefore.add(p);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
private static long mostRecentGC(DfsPackFile[] packs) {
|
||||
long r = 0;
|
||||
for (DfsPackFile p : packs) {
|
||||
DfsPackDescription d = p.getPackDescription();
|
||||
if (d.getPackSource() == GC || d.getPackSource() == GC_REST) {
|
||||
r = Math.max(r, d.getLastModified());
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
private boolean packIsExpiredGarbage(DfsPackDescription d,
|
||||
long mostRecentGC, long now) {
|
||||
// It should be safe to remove an UNREACHABLE_GARBAGE pack if it:
|
||||
//
|
||||
// (a) Predates the most recent prior run of this class. This check
|
||||
// ensures the graph traversal algorithm had a chance to consider
|
||||
// all objects in this pack and copied them into a GC or GC_REST
|
||||
// pack if the graph contained live edges to the objects.
|
||||
//
|
||||
// This check is safe because of the ordering of packing; the GC
|
||||
// packs are written first and then the UNREACHABLE_GARBAGE is
|
||||
// constructed. Any UNREACHABLE_GARBAGE dated earlier than the GC
|
||||
// was input to the prior GC's graph traversal.
|
||||
//
|
||||
// (b) Is older than garbagePackTtl. This check gives concurrent
|
||||
// inserter threads sufficient time to identify an object is not
|
||||
// in the graph and should have a new copy written, rather than
|
||||
// relying on something from an UNREACHABLE_GARBAGE pack.
|
||||
//
|
||||
// Both (a) and (b) must be met to safely remove UNREACHABLE_GARBAGE.
|
||||
return d.getPackSource() == UNREACHABLE_GARBAGE
|
||||
&& d.getLastModified() < mostRecentGC
|
||||
&& garbageTtlMillis > 0
|
||||
&& now - d.getLastModified() >= garbageTtlMillis;
|
||||
}
|
||||
|
||||
/** @return all of the source packs that fed into this compaction. */
|
||||
|
@ -285,8 +373,12 @@ public List<PackStatistics> getNewPackStatistics() {
|
|||
private List<DfsPackDescription> toPrune() {
|
||||
int cnt = packsBefore.size();
|
||||
List<DfsPackDescription> all = new ArrayList<DfsPackDescription>(cnt);
|
||||
for (DfsPackFile pack : packsBefore)
|
||||
for (DfsPackFile pack : packsBefore) {
|
||||
all.add(pack.getPackDescription());
|
||||
}
|
||||
for (DfsPackFile pack : expiredGarbagePacks) {
|
||||
all.add(pack.getPackDescription());
|
||||
}
|
||||
return all;
|
||||
}
|
||||
|
||||
|
@ -329,7 +421,6 @@ private void packRefTreeGraph(ProgressMonitor pm) throws IOException {
|
|||
}
|
||||
|
||||
private void packGarbage(ProgressMonitor pm) throws IOException {
|
||||
// TODO(sop) This is ugly. The garbage pack needs to be deleted.
|
||||
PackConfig cfg = new PackConfig(packConfig);
|
||||
cfg.setReuseDeltas(true);
|
||||
cfg.setReuseObjects(true);
|
||||
|
@ -420,4 +511,8 @@ private DfsPackDescription writePack(PackSource source, PackWriter pw,
|
|||
DfsBlockCache.getInstance().getOrCreate(pack, null);
|
||||
return pack;
|
||||
}
|
||||
|
||||
private static List<DfsPackDescription> noPacks() {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue