From 3c27ee1a916592d76a92032c57e66d775f830e45 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Tue, 16 Apr 2013 17:54:23 -0700 Subject: [PATCH] Support excluding objects during DFS compaction By excluding objects the compactor can avoid storing objects that are already well packed in the base GC packs, or any other pack not being replaced by the current compaction operation. For deltas the base object is still included even if the base exists in another exclusion set. This favors keeping deltas for recent history, to support faster fetch operations for clients. Change-Id: Ie822fe075fe5072fe3171450fda2f0ca507796a1 --- .../storage/dfs/DfsPackCompactor.java | 133 ++++++++++++++---- 1 file changed, 102 insertions(+), 31 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java index ddd6ff7c0..ea563926b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java @@ -46,6 +46,7 @@ import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT; import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX; import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; +import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA; import java.io.IOException; import java.util.ArrayList; @@ -56,6 +57,7 @@ import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.storage.file.PackIndex; +import org.eclipse.jgit.internal.storage.file.PackReverseIndex; import org.eclipse.jgit.internal.storage.pack.PackWriter; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.NullProgressMonitor; @@ -88,12 +90,18 @@ public class DfsPackCompactor { private final List srcPacks; + private final List exclude; + private final List newPacks; private final List newStats; private int autoAddSize; + private RevWalk rw; + private RevFlag added; + private RevFlag isBase; + /** * Initialize a pack compactor. * @@ -104,6 +112,7 @@ public DfsPackCompactor(DfsRepository repository) { repo = repository; autoAddSize = 5 * 1024 * 1024; // 5 MiB srcPacks = new ArrayList(); + exclude = new ArrayList(4); newPacks = new ArrayList(1); newStats = new ArrayList(1); } @@ -141,10 +150,48 @@ public DfsPackCompactor autoAdd() throws IOException { DfsPackDescription d = pack.getPackDescription(); if (d.getFileSize(PACK) < autoAddSize) add(pack); + else + exclude(pack); } return this; } + /** + * Exclude objects from the compacted pack. + * + * @param set + * objects to not include. + * @return {@code this}. + */ + public DfsPackCompactor exclude(PackWriter.ObjectIdSet set) { + exclude.add(set); + return this; + } + + /** + * Exclude objects from the compacted pack. + * + * @param pack + * objects to not include. + * @return {@code this}. + * @throws IOException + * pack index cannot be loaded. + */ + public DfsPackCompactor exclude(DfsPackFile pack) throws IOException { + final PackIndex idx; + DfsReader ctx = (DfsReader) repo.newObjectReader(); + try { + idx = pack.getPackIndex(ctx); + } finally { + ctx.release(); + } + return exclude(new PackWriter.ObjectIdSet() { + public boolean contains(AnyObjectId id) { + return idx.hasObject(id); + } + }); + } + /** * Compact the pack files together. * @@ -200,6 +247,7 @@ public void compact(ProgressMonitor pm) throws IOException { pw.release(); } } finally { + rw = null; ctx.release(); } } @@ -239,50 +287,73 @@ public int compare(DfsPackFile a, DfsPackFile b) { } }); - RevWalk rw = new RevWalk(ctx); - RevFlag added = rw.newFlag("ADDED"); //$NON-NLS-1$ + rw = new RevWalk(ctx); + added = rw.newFlag("ADDED"); //$NON-NLS-1$ + isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$ + List baseObjects = new BlockList(); pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN); for (DfsPackFile src : srcPacks) { - List want = new BlockList(); - for (PackIndex.MutableEntry ent : src.getPackIndex(ctx)) { - ObjectId id = ent.toObjectId(); - RevObject obj = rw.lookupOrNull(id); - if (obj == null || !obj.has(added)) - want.add(new ObjectIdWithOffset(id, ent.getOffset())); - } + List want = toInclude(src, ctx); + if (want.isEmpty()) + continue; - // Sort objects by the order they appear in the pack file, for - // two benefits. Scanning object type information is faster when - // the pack is traversed in order, and this allows the PackWriter - // to be given the new objects in a relatively sane newest-first - // ordering without additional logic, like unpacking commits and - // walking a commit queue. - Collections.sort(want, new Comparator() { - public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) { - return Long.signum(a.offset - b.offset); - } - }); - - // Only pack each object at most once into the output file. The - // PackWriter will later select a representation to reuse, which - // may be the version in this pack, or may be from another pack if - // the object was copied here to complete a thin pack and is larger - // than a delta from another pack. This is actually somewhat common - // if an object is modified frequently, such as the top level tree. + PackReverseIndex rev = src.getReverseIdx(ctx); + DfsObjectRepresentation rep = new DfsObjectRepresentation(src); for (ObjectIdWithOffset id : want) { int type = src.getObjectType(ctx, id.offset); RevObject obj = rw.lookupAny(id, type); - if (!obj.has(added)) { - pm.update(1); - pw.addObject(obj); - obj.add(added); + if (obj.has(added)) + continue; + + pm.update(1); + pw.addObject(obj); + obj.add(added); + + src.representation(rep, id.offset, ctx, rev); + if (rep.getFormat() != PACK_DELTA) + continue; + + RevObject base = rw.lookupAny(rep.getDeltaBase(), type); + if (!base.has(added) && !base.has(isBase)) { + baseObjects.add(base); + base.add(isBase); } } } + for (RevObject obj : baseObjects) { + if (!obj.has(added)) { + pm.update(1); + pw.addObject(obj); + obj.add(added); + } + } pm.endTask(); } + private List toInclude(DfsPackFile src, DfsReader ctx) + throws IOException { + PackIndex srcIdx = src.getPackIndex(ctx); + List want = new BlockList( + (int) srcIdx.getObjectCount()); + SCAN: for (PackIndex.MutableEntry ent : srcIdx) { + ObjectId id = ent.toObjectId(); + RevObject obj = rw.lookupOrNull(id); + if (obj != null && (obj.has(added) || obj.has(isBase))) + continue; + for (PackWriter.ObjectIdSet e : exclude) + if (e.contains(id)) + continue SCAN; + want.add(new ObjectIdWithOffset(id, ent.getOffset())); + } + Collections.sort(want, new Comparator() { + public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) { + return Long.signum(a.offset - b.offset); + } + }); + return want; + } + private static void writePack(DfsObjDatabase objdb, DfsPackDescription pack, PackWriter pw, ProgressMonitor pm) throws IOException {