Optimize DFS object reuse selection code

Rewrite this complicated logic to examine each pack file exactly once. This reduces thrashing when there are many large pack files present and the reader needs to locate each object's header. The intermediate temporary list is now smaller, it is bounded to the same length as the input object list. In the prior version of this code the list contained one entry for every representation of every object being packed. Only one representation object is allocated, reducing the overall memory footprint to be approximately one reference per object found in the current pack file (the pointer in the BlockList). This saves considerable working set memory compared to the prior version that made and held onto a new representation for every ObjectToPack. Change-Id: I2c1f18cd6755643ac4c2cf1f23b5464ca9d91b22
2013-04-04 12:18:22 -07:00 · 2013-04-04 12:18:22 -07:00 · d72416afbb
parent 93a27ce728
commit d72416afbb
5 changed files with 67 additions and 104 deletions
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjectRepresentation.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjectRepresentation.java
@ -47,34 +47,20 @@
 import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;

 import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
-import org.eclipse.jgit.internal.storage.pack.ObjectToPack;
 import org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation;
 import org.eclipse.jgit.lib.ObjectId;

 class DfsObjectRepresentation extends StoredObjectRepresentation {
-	final ObjectToPack object;
-
-	DfsPackFile pack;
-
-	/**
-	 * Position of {@link #pack} in the reader's pack list. Lower numbers are
-	 * newer/more recent packs and less likely to contain the best format for a
-	 * base object. Higher numbered packs are bigger, more stable, and favored
-	 * by PackWriter when selecting representations... but only if they come
-	 * last in the representation ordering.
-	 */
-	int packIndex;
-
-	long offset;
-
+	final DfsPackFile pack;
+	final int packIndex;
 	int format;
-
+	long offset;
 	long length;
-
 	ObjectId baseId;

-	DfsObjectRepresentation(ObjectToPack object) {
-		this.object = object;
+	DfsObjectRepresentation(DfsPackFile pack, int packIndex) {
+		this.pack = pack;
+		this.packIndex = packIndex;
 	}

 	@Override
@ -94,10 +80,7 @@ public ObjectId getDeltaBase() {

 	@Override
 	public boolean wasDeltaAttempted() {
-		if (pack != null) {
-			PackSource source = pack.getPackDescription().getPackSource();
-			return source == GC || source == UNREACHABLE_GARBAGE;
-		}
-		return false;
+		PackSource source = pack.getPackDescription().getPackSource();
+		return source == GC || source == UNREACHABLE_GARBAGE;
 	}
 }
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjectToPack.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjectToPack.java
@ -49,6 +49,8 @@

 /** {@link ObjectToPack} for {@link DfsObjDatabase}. */
 class DfsObjectToPack extends ObjectToPack {
+	private static final int FLAG_FOUND = 1 << 0;
+
 	/** Pack to reuse compressed data from, otherwise null. */
 	DfsPackFile pack;

@ -65,6 +67,14 @@ class DfsObjectToPack extends ObjectToPack {
 		super(src, type);
 	}

+	final boolean isFound() {
+		return isExtendedFlag(FLAG_FOUND);
+	}
+
+	final void setFound() {
+		setExtendedFlag(FLAG_FOUND);
+	}
+
 	@Override
 	protected void clearReuseAsIs() {
 		super.clearReuseAsIs();
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java
@ -341,7 +341,7 @@ else if (bs <= 0)
 		}
 	}

-	private PackReverseIndex getReverseIdx(DfsReader ctx) throws IOException {
+	PackReverseIndex getReverseIdx(DfsReader ctx) throws IOException {
 		DfsBlockCache.Ref<PackReverseIndex> revref = reverseIndex;
 		if (revref != null) {
 			PackReverseIndex revidx = revref.get();
@ -431,22 +431,6 @@ long getObjectCount(DfsReader ctx) throws IOException {
 		return idx(ctx).getObjectCount();
 	}

-	/**
-	 * Search for object id with the specified start offset in associated pack
-	 * (reverse) index.
-	 *
-	 * @param ctx
-	 *            current reader for the calling thread.
-	 * @param offset
-	 *            start offset of object to find
-	 * @return object id for this offset, or null if no object was found
-	 * @throws IOException
-	 *             the index file cannot be loaded into memory.
-	 */
-	ObjectId findObjectForOffset(DfsReader ctx, long offset) throws IOException {
-		return getReverseIdx(ctx).findObject(offset);
-	}
-
 	private byte[] decompress(long position, int sz, DfsReader ctx)
 			throws IOException, DataFormatException {
 		byte[] dstbuf;
@ -1053,9 +1037,10 @@ long getObjectSize(DfsReader ctx, long pos)
 		}
 	}

-	void representation(DfsReader ctx, DfsObjectRepresentation r)
+	void representation(DfsObjectRepresentation r, final long pos,
+			DfsReader ctx, PackReverseIndex rev)
 			throws IOException {
-		final long pos = r.offset;
+		r.offset = pos;
 		final byte[] ib = ctx.tempId;
 		readFully(pos, ib, 0, 20, ctx);
 		int c = ib[0] & 0xff;
@ -1064,13 +1049,14 @@ void representation(DfsReader ctx, DfsObjectRepresentation r)
 		while ((c & 0x80) != 0)
 			c = ib[p++] & 0xff;

-		long len = (getReverseIdx(ctx).findNextOffset(pos, length - 20) - pos);
+		long len = rev.findNextOffset(pos, length - 20) - pos;
 		switch (typeCode) {
 		case Constants.OBJ_COMMIT:
 		case Constants.OBJ_TREE:
 		case Constants.OBJ_BLOB:
 		case Constants.OBJ_TAG:
 			r.format = StoredObjectRepresentation.PACK_WHOLE;
+			r.baseId = null;
 			r.length = len - p;
 			return;

@ -1083,21 +1069,17 @@ void representation(DfsReader ctx, DfsObjectRepresentation r)
 				ofs <<= 7;
 				ofs += (c & 127);
 			}
-			ofs = pos - ofs;
 			r.format = StoredObjectRepresentation.PACK_DELTA;
-			r.baseId = findObjectForOffset(ctx, ofs);
+			r.baseId = rev.findObject(pos - ofs);
 			r.length = len - p;
 			return;
 		}

 		case Constants.OBJ_REF_DELTA: {
-			len -= p;
-			len -= Constants.OBJECT_ID_LENGTH;
 			readFully(pos + p, ib, 0, 20, ctx);
-			ObjectId id = ObjectId.fromRaw(ib);
 			r.format = StoredObjectRepresentation.PACK_DELTA;
-			r.baseId = id;
-			r.length = len;
+			r.baseId = ObjectId.fromRaw(ib);
+			r.length = len - p - 20;
 			return;
 		}

--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java
@ -73,6 +73,8 @@
 import org.eclipse.jgit.internal.JGitText;
 import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl;
 import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
+import org.eclipse.jgit.internal.storage.file.PackIndex;
+import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
 import org.eclipse.jgit.internal.storage.pack.CachedPack;
 import org.eclipse.jgit.internal.storage.pack.ObjectReuseAsIs;
 import org.eclipse.jgit.internal.storage.pack.ObjectToPack;
@ -476,12 +478,9 @@ public DfsObjectToPack newObjectToPack(AnyObjectId objectId, int type) {
 		return new DfsObjectToPack(objectId, type);
 	}

-	private static final Comparator<DfsObjectRepresentation> REPRESENTATION_SORT = new Comparator<DfsObjectRepresentation>() {
-		public int compare(DfsObjectRepresentation a, DfsObjectRepresentation b) {
-			int cmp = a.packIndex - b.packIndex;
-			if (cmp == 0)
-				cmp = Long.signum(a.offset - b.offset);
-			return cmp;
+	private static final Comparator<DfsObjectToPack> OFFSET_SORT = new Comparator<DfsObjectToPack>() {
+		public int compare(DfsObjectToPack a, DfsObjectToPack b) {
+			return Long.signum(a.getOffset() - b.getOffset());
 		}
 	};

@ -489,56 +488,45 @@ public void selectObjectRepresentation(PackWriter packer,
 			ProgressMonitor monitor, Iterable<ObjectToPack> objects)
 			throws IOException, MissingObjectException {
 		DfsPackFile[] packList = db.getPacks();
-		if (packList.length == 0) {
-			Iterator<ObjectToPack> itr = objects.iterator();
-			if (itr.hasNext())
-				throw new MissingObjectException(itr.next(), "unknown");
-			return;
+		for (int packIndex = 0; packIndex < packList.length; packIndex++) {
+			DfsPackFile pack = packList[packIndex];
+			List<DfsObjectToPack> tmp = findAllFromPack(pack, objects);
+			if (tmp.isEmpty())
+				continue;
+			Collections.sort(tmp, OFFSET_SORT);
+			try {
+				wantReadAhead = true;
+				PackReverseIndex rev = pack.getReverseIdx(this);
+				DfsObjectRepresentation rep = new DfsObjectRepresentation(
+						pack,
+						packIndex);
+				for (DfsObjectToPack otp : tmp) {
+					pack.representation(rep, otp.getOffset(), this, rev);
+					otp.setOffset(0);
+					packer.select(otp, rep);
+					if (!otp.isFound()) {
+						otp.setFound();
+						monitor.update(1);
+					}
+				}
+			} finally {
+				cancelReadAhead();
+			}
 		}
+	}

-		int objectCount = 0;
-		int updated = 0;
-		int posted = 0;
-		List<DfsObjectRepresentation> all = new BlockList<DfsObjectRepresentation>();
+	private List<DfsObjectToPack> findAllFromPack(DfsPackFile pack,
+			Iterable<ObjectToPack> objects) throws IOException {
+		List<DfsObjectToPack> tmp = new BlockList<DfsObjectToPack>();
+		PackIndex idx = pack.getPackIndex(this);
 		for (ObjectToPack otp : objects) {
-			boolean found = false;
-			for (int packIndex = 0; packIndex < packList.length; packIndex++) {
-				DfsPackFile pack = packList[packIndex];
-				long p = pack.findOffset(this, otp);
-				if (0 < p) {
-					DfsObjectRepresentation r = new DfsObjectRepresentation(otp);
-					r.pack = pack;
-					r.packIndex = packIndex;
-					r.offset = p;
-					all.add(r);
-					found = true;
-				}
+			long p = idx.findOffset(otp);
+			if (0 < p) {
+				otp.setOffset(p);
+				tmp.add((DfsObjectToPack) otp);
 			}
-			if (!found)
-				throw new MissingObjectException(otp, otp.getType());
-			if ((++updated & 1) == 1) {
-				monitor.update(1); // Update by 50%, the other 50% is below.
-				posted++;
-			}
-			objectCount++;
 		}
-		Collections.sort(all, REPRESENTATION_SORT);
-
-		try {
-			wantReadAhead = true;
-			for (DfsObjectRepresentation r : all) {
-				r.pack.representation(this, r);
-				packer.select(r.object, r);
-				if ((++updated & 1) == 1 && posted < objectCount) {
-					monitor.update(1);
-					posted++;
-				}
-			}
-		} finally {
-			cancelReadAhead();
-		}
-		if (posted < objectCount)
-			monitor.update(objectCount - posted);
+		return tmp;
 	}

 	public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp,
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/LargePackedWholeObject.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/LargePackedWholeObject.java
@ -109,7 +109,7 @@ public ObjectStream openStream() throws MissingObjectException, IOException {
 			// again and open the stream from that location instead.
 			//
 			try {
-				ObjectId obj = pack.findObjectForOffset(ctx, objectOffset);
+				ObjectId obj = pack.getReverseIdx(ctx).findObject(objectOffset);
 				return ctx.open(obj, type).openStream();
 			} finally {
 				ctx.release();