Merge "Support excluding objects during DFS compaction"

2013-04-17 14:19:21 -04:00 · 2013-04-17 14:19:21 -04:00 · e74263e743
parent aa7be667bc 3c27ee1a91
commit e74263e743
1 changed files with 102 additions and 31 deletions
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java
@ -46,6 +46,7 @@
 import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
+import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA;

 import java.io.IOException;
 import java.util.ArrayList;
@ -56,6 +57,7 @@
 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
 import org.eclipse.jgit.internal.JGitText;
 import org.eclipse.jgit.internal.storage.file.PackIndex;
+import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
 import org.eclipse.jgit.internal.storage.pack.PackWriter;
 import org.eclipse.jgit.lib.AnyObjectId;
 import org.eclipse.jgit.lib.NullProgressMonitor;
@ -88,12 +90,18 @@ public class DfsPackCompactor {

 	private final List<DfsPackFile> srcPacks;

+	private final List<PackWriter.ObjectIdSet> exclude;
+
 	private final List<DfsPackDescription> newPacks;

 	private final List<PackWriter.Statistics> newStats;

 	private int autoAddSize;

+	private RevWalk rw;
+	private RevFlag added;
+	private RevFlag isBase;
+
 	/**
 	 * Initialize a pack compactor.
 	 *
@ -104,6 +112,7 @@ public DfsPackCompactor(DfsRepository repository) {
 		repo = repository;
 		autoAddSize = 5 * 1024 * 1024; // 5 MiB
 		srcPacks = new ArrayList<DfsPackFile>();
+		exclude = new ArrayList<PackWriter.ObjectIdSet>(4);
 		newPacks = new ArrayList<DfsPackDescription>(1);
 		newStats = new ArrayList<PackWriter.Statistics>(1);
 	}
@ -141,10 +150,48 @@ public DfsPackCompactor autoAdd() throws IOException {
 			DfsPackDescription d = pack.getPackDescription();
 			if (d.getFileSize(PACK) < autoAddSize)
 				add(pack);
+			else
+				exclude(pack);
 		}
 		return this;
 	}

+	/**
+	 * Exclude objects from the compacted pack.
+	 *
+	 * @param set
+	 *            objects to not include.
+	 * @return {@code this}.
+	 */
+	public DfsPackCompactor exclude(PackWriter.ObjectIdSet set) {
+		exclude.add(set);
+		return this;
+	}
+
+	/**
+	 * Exclude objects from the compacted pack.
+	 *
+	 * @param pack
+	 *            objects to not include.
+	 * @return {@code this}.
+	 * @throws IOException
+	 *             pack index cannot be loaded.
+	 */
+	public DfsPackCompactor exclude(DfsPackFile pack) throws IOException {
+		final PackIndex idx;
+		DfsReader ctx = (DfsReader) repo.newObjectReader();
+		try {
+			idx = pack.getPackIndex(ctx);
+		} finally {
+			ctx.release();
+		}
+		return exclude(new PackWriter.ObjectIdSet() {
+			public boolean contains(AnyObjectId id) {
+				return idx.hasObject(id);
+			}
+		});
+	}
+
 	/**
 	 * Compact the pack files together.
 	 *
@ -200,6 +247,7 @@ public void compact(ProgressMonitor pm) throws IOException {
 					pw.release();
 			}
 		} finally {
+			rw = null;
 			ctx.release();
 		}
 	}
@ -239,50 +287,73 @@ public int compare(DfsPackFile a, DfsPackFile b) {
 			}
 		});

-		RevWalk rw = new RevWalk(ctx);
-		RevFlag added = rw.newFlag("ADDED"); //$NON-NLS-1$
+		rw = new RevWalk(ctx);
+		added = rw.newFlag("ADDED"); //$NON-NLS-1$
+		isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$
+		List<RevObject> baseObjects = new BlockList<RevObject>();

 		pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
 		for (DfsPackFile src : srcPacks) {
-			List<ObjectIdWithOffset> want = new BlockList<ObjectIdWithOffset>();
-			for (PackIndex.MutableEntry ent : src.getPackIndex(ctx)) {
-				ObjectId id = ent.toObjectId();
-				RevObject obj = rw.lookupOrNull(id);
-				if (obj == null || !obj.has(added))
-					want.add(new ObjectIdWithOffset(id, ent.getOffset()));
-			}
+			List<ObjectIdWithOffset> want = toInclude(src, ctx);
+			if (want.isEmpty())
+				continue;

-			// Sort objects by the order they appear in the pack file, for
-			// two benefits. Scanning object type information is faster when
-			// the pack is traversed in order, and this allows the PackWriter
-			// to be given the new objects in a relatively sane newest-first
-			// ordering without additional logic, like unpacking commits and
-			// walking a commit queue.
-			Collections.sort(want, new Comparator<ObjectIdWithOffset>() {
-				public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) {
-					return Long.signum(a.offset - b.offset);
-				}
-			});
-
-			// Only pack each object at most once into the output file. The
-			// PackWriter will later select a representation to reuse, which
-			// may be the version in this pack, or may be from another pack if
-			// the object was copied here to complete a thin pack and is larger
-			// than a delta from another pack. This is actually somewhat common
-			// if an object is modified frequently, such as the top level tree.
+			PackReverseIndex rev = src.getReverseIdx(ctx);
+			DfsObjectRepresentation rep = new DfsObjectRepresentation(src);
 			for (ObjectIdWithOffset id : want) {
 				int type = src.getObjectType(ctx, id.offset);
 				RevObject obj = rw.lookupAny(id, type);
-				if (!obj.has(added)) {
-					pm.update(1);
-					pw.addObject(obj);
-					obj.add(added);
+				if (obj.has(added))
+					continue;
+
+				pm.update(1);
+				pw.addObject(obj);
+				obj.add(added);
+
+				src.representation(rep, id.offset, ctx, rev);
+				if (rep.getFormat() != PACK_DELTA)
+					continue;
+
+				RevObject base = rw.lookupAny(rep.getDeltaBase(), type);
+				if (!base.has(added) && !base.has(isBase)) {
+					baseObjects.add(base);
+					base.add(isBase);
 				}
 			}
 		}
+		for (RevObject obj : baseObjects) {
+			if (!obj.has(added)) {
+				pm.update(1);
+				pw.addObject(obj);
+				obj.add(added);
+			}
+		}
 		pm.endTask();
 	}

+	private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx)
+			throws IOException {
+		PackIndex srcIdx = src.getPackIndex(ctx);
+		List<ObjectIdWithOffset> want = new BlockList<ObjectIdWithOffset>(
+				(int) srcIdx.getObjectCount());
+		SCAN: for (PackIndex.MutableEntry ent : srcIdx) {
+			ObjectId id = ent.toObjectId();
+			RevObject obj = rw.lookupOrNull(id);
+			if (obj != null && (obj.has(added) || obj.has(isBase)))
+				continue;
+			for (PackWriter.ObjectIdSet e : exclude)
+				if (e.contains(id))
+					continue SCAN;
+			want.add(new ObjectIdWithOffset(id, ent.getOffset()));
+		}
+		Collections.sort(want, new Comparator<ObjectIdWithOffset>() {
+			public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) {
+				return Long.signum(a.offset - b.offset);
+			}
+		});
+		return want;
+	}
+
 	private static void writePack(DfsObjDatabase objdb,
 			DfsPackDescription pack,
 			PackWriter pw, ProgressMonitor pm) throws IOException {