Insert duplicate objects to prevent race during garbage collection.

Prior to this change, DfsInserter would not insert an object into a pack
if it already existed in another pack in the repository, even if that
pack was unreachable. Consider this sequence of events:

- Object FOO is pushed to a repository.
- Subsequent ref changes make FOO UNREACHABLE_GARBAGE.
- FOO is subsequently re-inserted using a DfsInserter, but skipped
  due to existing in UNREACHABLE_GARBAGE.
- The repository is repacked; FOO will not be written into a new pack
  because it is not yet reachable from a reference. If the
  UNREACHABLE_GARBAGE packs are deleted, FOO disappears.
- A reference is updated to reference FOO. This reference is now broken
  as FOO was removed when the repacking process deleted the
  UNREACHABLE_GARBAGE pack that stored the only copy of FOO.

The garbage collector can't safely delete the UNREACHABLE_GARBAGE
pack because FOO might be in the middle of being re-inserted/re-packed.

This change writes a duplicate copy of an object if it only exists in
UNREACHABLE_GARBAGE. This "freshens" the object to give it a chance to
survive long enough to be made reachable through a reference.

Change-Id: I20f2062230f3af3bccd6f21d3b7342f1152a5532
Signed-off-by: Mike Williams <miwilliams@google.com>
This commit is contained in:
Mike Williams 2015-10-21 16:09:58 -04:00 committed by Shawn Pearce
parent 63e15c7533
commit c4d73fb7cc
3 changed files with 86 additions and 8 deletions

View File

@ -47,17 +47,12 @@
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.zip.Deflater;
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
import org.eclipse.jgit.internal.storage.pack.PackExt;
import org.eclipse.jgit.junit.JGitTestUtil;
import org.eclipse.jgit.junit.TestRng;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectInserter;
@ -68,6 +63,15 @@
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.zip.Deflater;
public class DfsInserterTest {
InMemoryRepository db;
@ -161,6 +165,56 @@ public void testReaderResolve() throws IOException {
assertEquals(id2, objs.iterator().next());
}
@Test
public void testGarbageSelectivelyVisible() throws IOException {
ObjectInserter ins = db.newObjectInserter();
ObjectId fooId = ins.insert(Constants.OBJ_BLOB, Constants.encode("foo"));
ins.flush();
assertEquals(1, db.getObjectDatabase().listPacks().size());
// Make pack 0 garbage.
db.getObjectDatabase().listPacks().get(0).setPackSource(PackSource.UNREACHABLE_GARBAGE);
// Default behavior should be that the database has foo, because we allow garbage objects.
assertTrue(db.getObjectDatabase().has(fooId));
// But we should not be able to see it if we pass the right args.
assertFalse(db.getObjectDatabase().has(fooId, true));
}
@Test
public void testInserterIgnoresUnreachable() throws IOException {
ObjectInserter ins = db.newObjectInserter();
ObjectId fooId = ins.insert(Constants.OBJ_BLOB, Constants.encode("foo"));
ins.flush();
assertEquals(1, db.getObjectDatabase().listPacks().size());
// Make pack 0 garbage.
db.getObjectDatabase().listPacks().get(0).setPackSource(PackSource.UNREACHABLE_GARBAGE);
// We shouldn't be able to see foo because it's garbage.
assertFalse(db.getObjectDatabase().has(fooId, true));
// But if we re-insert foo, it should become visible again.
ins.insert(Constants.OBJ_BLOB, Constants.encode("foo"));
ins.flush();
assertTrue(db.getObjectDatabase().has(fooId, true));
// Verify that we have a foo in both packs, and 1 of them is garbage.
DfsReader reader = new DfsReader(db.getObjectDatabase());
DfsPackFile packs[] = db.getObjectDatabase().getPacks();
Set<PackSource> pack_sources = new HashSet<PackSource>();
assertEquals(2, packs.length);
pack_sources.add(packs[0].getPackDescription().getPackSource());
pack_sources.add(packs[1].getPackDescription().getPackSource());
assertTrue(packs[0].hasObject(reader, fooId));
assertTrue(packs[1].hasObject(reader, fooId));
assertTrue(pack_sources.contains(PackSource.UNREACHABLE_GARBAGE));
assertTrue(pack_sources.contains(PackSource.INSERT));
}
private static String readString(ObjectLoader loader) throws IOException {
return RawParseUtils.decode(readStream(loader));
}

View File

@ -137,7 +137,8 @@ public ObjectId insert(int type, byte[] data, int off, int len)
ObjectId id = idFor(type, data, off, len);
if (objectMap != null && objectMap.contains(id))
return id;
if (db.has(id))
// Ignore unreachable (garbage) objects here.
if (db.has(id, true))
return id;
long offset = beginObject(type, len);

View File

@ -54,6 +54,7 @@
import java.util.concurrent.atomic.AtomicReference;
import org.eclipse.jgit.internal.storage.pack.PackExt;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.ObjectDatabase;
import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.lib.ObjectReader;
@ -180,6 +181,28 @@ public DfsPackFile[] getCurrentPacks() {
return packList.get().packs;
}
/**
* Does the requested object exist in this database?
* <p>
* This differs from ObjectDatabase's implementation in that we can selectively
* ignore unreachable (garbage) objects.
*
* @param objectId
* identity of the object to test for existence of.
* @param avoidUnreachableObjects
* if true, ignore objects that are unreachable.
* @return true if the specified object is stored in this database.
* @throws IOException
* the object store cannot be accessed.
*/
public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects)
throws IOException {
try (ObjectReader or = newReader()) {
or.setAvoidUnreachableObjects(avoidUnreachableObjects);
return or.has(objectId);
}
}
/**
* Generate a new unique name for a pack file.
*