Update DfsGarbageCollector to not read back a pack index.

Previously, the Dfs GC excluded objects from packs by passing a
previously written index to the PackWriter. Reading back a file on
Dfs is slow. Instead, allow the PackWriter to expose the objects
included in a pack and forward that to invocations of excludeObjects() .

Change-Id: I377cb4ab07f62cf790505e1eeb0b2efe81897c79
This commit is contained in:
Colby Ranger 2013-01-18 16:22:10 -08:00
parent 372b2c0ca6
commit 7c58f6282a
6 changed files with 89 additions and 32 deletions

View File

@ -66,6 +66,7 @@
import org.eclipse.jgit.junit.JGitTestUtil; import org.eclipse.jgit.junit.JGitTestUtil;
import org.eclipse.jgit.junit.TestRepository; import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.junit.TestRepository.BranchBuilder; import org.eclipse.jgit.junit.TestRepository.BranchBuilder;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectInserter; import org.eclipse.jgit.lib.ObjectInserter;
@ -77,6 +78,7 @@
import org.eclipse.jgit.storage.file.PackIndex.MutableEntry; import org.eclipse.jgit.storage.file.PackIndex.MutableEntry;
import org.eclipse.jgit.storage.pack.PackConfig; import org.eclipse.jgit.storage.pack.PackConfig;
import org.eclipse.jgit.storage.pack.PackWriter; import org.eclipse.jgit.storage.pack.PackWriter;
import org.eclipse.jgit.storage.pack.PackWriter.ObjectIdSet;
import org.eclipse.jgit.transport.PackParser; import org.eclipse.jgit.transport.PackParser;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
@ -463,7 +465,7 @@ public void testExclude() throws Exception {
RevCommit c1 = bb.commit().add("f", contentA).create(); RevCommit c1 = bb.commit().add("f", contentA).create();
testRepo.getRevWalk().parseHeaders(c1); testRepo.getRevWalk().parseHeaders(c1);
PackIndex pf1 = writePack(repo, Collections.singleton(c1), PackIndex pf1 = writePack(repo, Collections.singleton(c1),
Collections.<PackIndex> emptySet()); Collections.<ObjectIdSet> emptySet());
assertContent( assertContent(
pf1, pf1,
Arrays.asList(c1.getId(), c1.getTree().getId(), Arrays.asList(c1.getId(), c1.getTree().getId(),
@ -472,7 +474,7 @@ public void testExclude() throws Exception {
RevCommit c2 = bb.commit().add("f", contentB).create(); RevCommit c2 = bb.commit().add("f", contentB).create();
testRepo.getRevWalk().parseHeaders(c2); testRepo.getRevWalk().parseHeaders(c2);
PackIndex pf2 = writePack(repo, Collections.singleton(c2), PackIndex pf2 = writePack(repo, Collections.singleton(c2),
Collections.singleton(pf1)); Collections.singleton(objectIdSet(pf1)));
assertContent( assertContent(
pf2, pf2,
Arrays.asList(c2.getId(), c2.getTree().getId(), Arrays.asList(c2.getId(), c2.getTree().getId(),
@ -490,12 +492,12 @@ private static void assertContent(PackIndex pi, List<ObjectId> expected) {
} }
private static PackIndex writePack(FileRepository repo, private static PackIndex writePack(FileRepository repo,
Set<? extends ObjectId> want, Set<PackIndex> excludeObjects) Set<? extends ObjectId> want, Set<ObjectIdSet> excludeObjects)
throws IOException { throws IOException {
PackWriter pw = new PackWriter(repo); PackWriter pw = new PackWriter(repo);
pw.setDeltaBaseAsOffset(true); pw.setDeltaBaseAsOffset(true);
pw.setReuseDeltaCommits(false); pw.setReuseDeltaCommits(false);
for (PackIndex idx : excludeObjects) for (ObjectIdSet idx : excludeObjects)
pw.excludeObjects(idx); pw.excludeObjects(idx);
pw.preparePack(NullProgressMonitor.INSTANCE, want, pw.preparePack(NullProgressMonitor.INSTANCE, want,
Collections.<ObjectId> emptySet()); Collections.<ObjectId> emptySet());
@ -668,4 +670,12 @@ public int compare(MutableEntry o1, MutableEntry o2) {
assertEquals(objectsOrder[i++].toObjectId(), me.toObjectId()); assertEquals(objectsOrder[i++].toObjectId(), me.toObjectId());
} }
} }
private static ObjectIdSet objectIdSet(final PackIndex idx) {
return new ObjectIdSet() {
public boolean contains(AnyObjectId objectId) {
return idx.hasObject(objectId);
}
};
}
} }

View File

@ -30,6 +30,7 @@ blobNotFound=Blob not found: {0}
blobNotFoundForPath=Blob not found: {0} for path: {1} blobNotFoundForPath=Blob not found: {0} for path: {1}
branchNameInvalid=Branch name {0} is not allowed branchNameInvalid=Branch name {0} is not allowed
cachedPacksPreventsIndexCreation=Using cached packs prevents index creation cachedPacksPreventsIndexCreation=Using cached packs prevents index creation
cachedPacksPreventsListingObjects=Using cached packs prevents listing objects
cannotBeCombined=Cannot be combined. cannotBeCombined=Cannot be combined.
cannotBeRecursiveWhenTreesAreIncluded=TreeWalk shouldn't be recursive when tree objects are included. cannotBeRecursiveWhenTreesAreIncluded=TreeWalk shouldn't be recursive when tree objects are included.
cannotCombineSquashWithNoff=Cannot combine --squash with --no-ff. cannotCombineSquashWithNoff=Cannot combine --squash with --no-ff.

View File

@ -91,6 +91,7 @@ public static JGitText get() {
/***/ public String blobNotFoundForPath; /***/ public String blobNotFoundForPath;
/***/ public String branchNameInvalid; /***/ public String branchNameInvalid;
/***/ public String cachedPacksPreventsIndexCreation; /***/ public String cachedPacksPreventsIndexCreation;
/***/ public String cachedPacksPreventsListingObjects;
/***/ public String cannotBeCombined; /***/ public String cannotBeCombined;
/***/ public String cannotBeRecursiveWhenTreesAreIncluded; /***/ public String cannotBeRecursiveWhenTreesAreIncluded;
/***/ public String cannotCombineSquashWithNoff; /***/ public String cannotCombineSquashWithNoff;

View File

@ -82,7 +82,7 @@ public class DfsGarbageCollector {
private final List<PackWriter.Statistics> newPackStats; private final List<PackWriter.Statistics> newPackStats;
private final List<DfsPackFile> newPackList; private final List<PackWriter.ObjectIdSet> newPackObj;
private DfsReader ctx; private DfsReader ctx;
@ -116,7 +116,7 @@ public DfsGarbageCollector(DfsRepository repository) {
objdb = repo.getObjectDatabase(); objdb = repo.getObjectDatabase();
newPackDesc = new ArrayList<DfsPackDescription>(4); newPackDesc = new ArrayList<DfsPackDescription>(4);
newPackStats = new ArrayList<PackWriter.Statistics>(4); newPackStats = new ArrayList<PackWriter.Statistics>(4);
newPackList = new ArrayList<DfsPackFile>(4); newPackObj = new ArrayList<PackWriter.ObjectIdSet>(4);
packConfig = new PackConfig(repo); packConfig = new PackConfig(repo);
packConfig.setIndexVersion(2); packConfig.setIndexVersion(2);
@ -244,8 +244,8 @@ private void packRest(ProgressMonitor pm) throws IOException {
PackWriter pw = newPackWriter(); PackWriter pw = newPackWriter();
try { try {
for (DfsPackFile pack : newPackList) for (PackWriter.ObjectIdSet packedObjs : newPackObj)
pw.excludeObjects(pack.getPackIndex(ctx)); pw.excludeObjects(packedObjs);
pw.preparePack(pm, nonHeads, allHeads); pw.preparePack(pm, nonHeads, allHeads);
if (0 < pw.getObjectCount()) if (0 < pw.getObjectCount())
writePack(GC, pw, pm); writePack(GC, pw, pm);
@ -259,10 +259,6 @@ private void packGarbage(ProgressMonitor pm) throws IOException {
return; return;
// TODO(sop) This is ugly. The garbage pack needs to be deleted. // TODO(sop) This is ugly. The garbage pack needs to be deleted.
List<PackIndex> newIdx = new ArrayList<PackIndex>(newPackList.size());
for (DfsPackFile pack : newPackList)
newIdx.add(pack.getPackIndex(ctx));
PackWriter pw = newPackWriter(); PackWriter pw = newPackWriter();
try { try {
RevWalk pool = new RevWalk(ctx); RevWalk pool = new RevWalk(ctx);
@ -272,7 +268,7 @@ private void packGarbage(ProgressMonitor pm) throws IOException {
for (PackIndex.MutableEntry ent : oldIdx) { for (PackIndex.MutableEntry ent : oldIdx) {
pm.update(1); pm.update(1);
ObjectId id = ent.toObjectId(); ObjectId id = ent.toObjectId();
if (pool.lookupOrNull(id) != null || anyIndexHas(newIdx, id)) if (pool.lookupOrNull(id) != null || anyPackHas(id))
continue; continue;
int type = oldPack.getObjectType(ctx, ent.getOffset()); int type = oldPack.getObjectType(ctx, ent.getOffset());
@ -287,9 +283,9 @@ private void packGarbage(ProgressMonitor pm) throws IOException {
} }
} }
private static boolean anyIndexHas(List<PackIndex> list, AnyObjectId id) { private boolean anyPackHas(AnyObjectId id) {
for (PackIndex idx : list) for (PackWriter.ObjectIdSet packedObjs : newPackObj)
if (idx.hasObject(id)) if (packedObjs.contains(id))
return true; return true;
return false; return false;
} }
@ -336,6 +332,13 @@ private DfsPackDescription writePack(PackSource source, PackWriter pw,
out.close(); out.close();
} }
final List<ObjectId> packedObjs = pw.getObjectList();
newPackObj.add(new PackWriter.ObjectIdSet() {
public boolean contains(AnyObjectId objectId) {
return 0 <= Collections.binarySearch(packedObjs, objectId);
}
});
PackWriter.Statistics stats = pw.getStatistics(); PackWriter.Statistics stats = pw.getStatistics();
pack.setPackStats(stats); pack.setPackStats(stats);
pack.setFileSize(PACK, stats.getTotalBytes()); pack.setFileSize(PACK, stats.getTotalBytes());
@ -343,7 +346,8 @@ private DfsPackDescription writePack(PackSource source, PackWriter pw,
pack.setDeltaCount(stats.getTotalDeltas()); pack.setDeltaCount(stats.getTotalDeltas());
objectsPacked += stats.getTotalObjects(); objectsPacked += stats.getTotalObjects();
newPackStats.add(stats); newPackStats.add(stats);
newPackList.add(DfsBlockCache.getInstance().getOrCreate(pack, null));
DfsBlockCache.getInstance().getOrCreate(pack, null);
return pack; return pack;
} }
} }

View File

@ -70,6 +70,7 @@
import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.NoWorkTreeException; import org.eclipse.jgit.errors.NoWorkTreeException;
import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.ConfigConstants; import org.eclipse.jgit.lib.ConfigConstants;
import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.FileMode;
@ -83,6 +84,7 @@
import org.eclipse.jgit.revwalk.RevObject; import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.pack.PackWriter; import org.eclipse.jgit.storage.pack.PackWriter;
import org.eclipse.jgit.storage.pack.PackWriter.ObjectIdSet;
import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.treewalk.filter.TreeFilter; import org.eclipse.jgit.treewalk.filter.TreeFilter;
import org.eclipse.jgit.util.FileUtils; import org.eclipse.jgit.util.FileUtils;
@ -498,10 +500,10 @@ public Collection<PackFile> repack() throws IOException {
tagTargets.add(ref.getPeeledObjectId()); tagTargets.add(ref.getPeeledObjectId());
} }
List<PackIndex> excluded = new LinkedList<PackIndex>(); List<ObjectIdSet> excluded = new LinkedList<ObjectIdSet>();
for (PackFile f : repo.getObjectDatabase().getPacks()) for (final PackFile f : repo.getObjectDatabase().getPacks())
if (f.shouldBeKept()) if (f.shouldBeKept())
excluded.add(f.getIndex()); excluded.add(objectIdSet(f.getIndex()));
tagTargets.addAll(allHeads); tagTargets.addAll(allHeads);
nonHeads.addAll(indexObjects); nonHeads.addAll(indexObjects);
@ -513,7 +515,7 @@ public Collection<PackFile> repack() throws IOException {
tagTargets, excluded); tagTargets, excluded);
if (heads != null) { if (heads != null) {
ret.add(heads); ret.add(heads);
excluded.add(0, heads.getIndex()); excluded.add(0, objectIdSet(heads.getIndex()));
} }
} }
if (!nonHeads.isEmpty()) { if (!nonHeads.isEmpty()) {
@ -632,7 +634,7 @@ private Set<ObjectId> listNonHEADIndexObjects()
private PackFile writePack(Set<? extends ObjectId> want, private PackFile writePack(Set<? extends ObjectId> want,
Set<? extends ObjectId> have, Set<ObjectId> tagTargets, Set<? extends ObjectId> have, Set<ObjectId> tagTargets,
List<PackIndex> excludeObjects) throws IOException { List<ObjectIdSet> excludeObjects) throws IOException {
File tmpPack = null; File tmpPack = null;
File tmpIdx = null; File tmpIdx = null;
PackWriter pw = new PackWriter(repo); PackWriter pw = new PackWriter(repo);
@ -643,7 +645,7 @@ private PackFile writePack(Set<? extends ObjectId> want,
if (tagTargets != null) if (tagTargets != null)
pw.setTagTargets(tagTargets); pw.setTagTargets(tagTargets);
if (excludeObjects != null) if (excludeObjects != null)
for (PackIndex idx : excludeObjects) for (ObjectIdSet idx : excludeObjects)
pw.excludeObjects(idx); pw.excludeObjects(idx);
pw.preparePack(pm, want, have); pw.preparePack(pm, want, have);
if (pw.getObjectCount() == 0) if (pw.getObjectCount() == 0)
@ -855,4 +857,11 @@ public void setExpire(Date expire) {
expireAgeMillis = -1; expireAgeMillis = -1;
} }
private static ObjectIdSet objectIdSet(final PackIndex idx) {
return new ObjectIdSet() {
public boolean contains(AnyObjectId objectId) {
return idx.hasObject(objectId);
}
};
}
} }

View File

@ -103,7 +103,6 @@
import org.eclipse.jgit.revwalk.RevSort; import org.eclipse.jgit.revwalk.RevSort;
import org.eclipse.jgit.revwalk.RevTag; import org.eclipse.jgit.revwalk.RevTag;
import org.eclipse.jgit.revwalk.RevTree; import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.storage.file.PackIndex;
import org.eclipse.jgit.storage.file.PackIndexWriter; import org.eclipse.jgit.storage.file.PackIndexWriter;
import org.eclipse.jgit.util.BlockList; import org.eclipse.jgit.util.BlockList;
import org.eclipse.jgit.util.TemporaryBuffer; import org.eclipse.jgit.util.TemporaryBuffer;
@ -144,6 +143,18 @@
public class PackWriter { public class PackWriter {
private static final int PACK_VERSION_GENERATED = 2; private static final int PACK_VERSION_GENERATED = 2;
/** A collection of object ids. */
public interface ObjectIdSet {
/**
* Returns true if the objectId is contained within the collection.
*
* @param objectId
* the objectId to find
* @return whether the collection contains the objectId.
*/
boolean contains(AnyObjectId objectId);
}
private static final Map<WeakReference<PackWriter>, Boolean> instances = private static final Map<WeakReference<PackWriter>, Boolean> instances =
new ConcurrentHashMap<WeakReference<PackWriter>, Boolean>(); new ConcurrentHashMap<WeakReference<PackWriter>, Boolean>();
@ -206,9 +217,9 @@ public static Iterable<PackWriter> getInstances() {
private Set<ObjectId> tagTargets = Collections.emptySet(); private Set<ObjectId> tagTargets = Collections.emptySet();
private PackIndex[] excludeInPacks; private ObjectIdSet[] excludeInPacks;
private PackIndex excludeInPackLast; private ObjectIdSet excludeInPackLast;
private Deflater myDeflater; private Deflater myDeflater;
@ -507,19 +518,40 @@ public long getObjectCount() throws IOException {
return stats.totalObjects; return stats.totalObjects;
} }
/**
* Returns the object ids in the pack file that was created by this writer,
* sorted by name.
*
* This method can only be invoked after
* {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has
* been invoked and completed successfully.
*
* @return number of objects in pack.
* @throws IOException
* a cached pack cannot supply its object ids.
*/
public List<ObjectId> getObjectList() throws IOException {
if (!cachedPacks.isEmpty())
throw new IOException(
JGitText.get().cachedPacksPreventsListingObjects);
return Collections.unmodifiableList(
(List<? extends ObjectId>) sortByName());
}
/** /**
* Add a pack index whose contents should be excluded from the result. * Add a pack index whose contents should be excluded from the result.
* *
* @param idx * @param idx
* objects in this index will not be in the output pack. * objects in this index will not be in the output pack.
*/ */
public void excludeObjects(PackIndex idx) { public void excludeObjects(ObjectIdSet idx) {
if (excludeInPacks == null) { if (excludeInPacks == null) {
excludeInPacks = new PackIndex[] { idx }; excludeInPacks = new ObjectIdSet[] { idx };
excludeInPackLast = idx; excludeInPackLast = idx;
} else { } else {
int cnt = excludeInPacks.length; int cnt = excludeInPacks.length;
PackIndex[] newList = new PackIndex[cnt + 1]; ObjectIdSet[] newList = new ObjectIdSet[cnt + 1];
System.arraycopy(excludeInPacks, 0, newList, 0, cnt); System.arraycopy(excludeInPacks, 0, newList, 0, cnt);
newList[cnt] = idx; newList[cnt] = idx;
excludeInPacks = newList; excludeInPacks = newList;
@ -1798,10 +1830,10 @@ private void addObject(final RevObject object, final int pathHashCode) {
private boolean exclude(AnyObjectId objectId) { private boolean exclude(AnyObjectId objectId) {
if (excludeInPacks == null) if (excludeInPacks == null)
return false; return false;
if (excludeInPackLast.hasObject(objectId)) if (excludeInPackLast.contains(objectId))
return true; return true;
for (PackIndex idx : excludeInPacks) { for (ObjectIdSet idx : excludeInPacks) {
if (idx.hasObject(objectId)) { if (idx.contains(objectId)) {
excludeInPackLast = idx; excludeInPackLast = idx;
return true; return true;
} }