PackWriter: Speed up pruning of objects from cached packs
During object enumeration for the thin pack, very few objects come out that are duplicated with the cached pack. Typically these are only cases where a blob or tree was cherry-picked forward, got a copy or rename, or was reverted... all relatively infrequent events. Speed up pruning of the thin pack object list by combining the phase with the object representation selection. Implementers should already be offering to reuse the object from the cached pack if it is stored there, at which point the implementation can perform a very fast type of containment test using the cached pack's identity rather than yet another index lookup. For the local disk case this is probably not a big improvement, but it does help on the DHT implementation where the two passes combined into one reduces latency. Change-Id: I6a07fc75d9075bf6233e967360b6546f9e9a2b33 Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
This commit is contained in:
parent
00eae14a7f
commit
9f5bbb5dd4
|
@ -47,13 +47,14 @@
|
|||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.eclipse.jgit.lib.ObjectId;
|
||||
import org.eclipse.jgit.storage.pack.CachedPack;
|
||||
import org.eclipse.jgit.storage.pack.ObjectToPack;
|
||||
import org.eclipse.jgit.storage.pack.PackOutputStream;
|
||||
import org.eclipse.jgit.storage.pack.StoredObjectRepresentation;
|
||||
|
||||
class LocalCachedPack extends CachedPack {
|
||||
private final ObjectDirectory odb;
|
||||
|
@ -62,6 +63,8 @@ class LocalCachedPack extends CachedPack {
|
|||
|
||||
private final String[] packNames;
|
||||
|
||||
private PackFile[] packs;
|
||||
|
||||
LocalCachedPack(ObjectDirectory odb, Set<ObjectId> tips,
|
||||
List<String> packNames) {
|
||||
this.odb = odb;
|
||||
|
@ -82,34 +85,39 @@ public Set<ObjectId> getTips() {
|
|||
@Override
|
||||
public long getObjectCount() throws IOException {
|
||||
long cnt = 0;
|
||||
for (String packName : packNames)
|
||||
cnt += getPackFile(packName).getObjectCount();
|
||||
for (PackFile pack : getPacks())
|
||||
cnt += pack.getObjectCount();
|
||||
return cnt;
|
||||
}
|
||||
|
||||
void copyAsIs(PackOutputStream out, boolean validate, WindowCursor wc)
|
||||
throws IOException {
|
||||
for (String packName : packNames)
|
||||
getPackFile(packName).copyPackAsIs(out, validate, wc);
|
||||
for (PackFile pack : getPacks())
|
||||
pack.copyPackAsIs(out, validate, wc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T extends ObjectId> Set<ObjectId> hasObject(Iterable<T> toFind)
|
||||
throws IOException {
|
||||
PackFile[] packs = new PackFile[packNames.length];
|
||||
for (int i = 0; i < packNames.length; i++)
|
||||
packs[i] = getPackFile(packNames[i]);
|
||||
|
||||
Set<ObjectId> have = new HashSet<ObjectId>();
|
||||
for (ObjectId id : toFind) {
|
||||
for (PackFile pack : packs) {
|
||||
if (pack.hasObject(id)) {
|
||||
have.add(id);
|
||||
break;
|
||||
}
|
||||
public boolean hasObject(ObjectToPack obj, StoredObjectRepresentation rep) {
|
||||
try {
|
||||
LocalObjectRepresentation local = (LocalObjectRepresentation) rep;
|
||||
for (PackFile pack : getPacks()) {
|
||||
if (local.pack == pack)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
} catch (FileNotFoundException packGone) {
|
||||
return false;
|
||||
}
|
||||
return have;
|
||||
}
|
||||
|
||||
private PackFile[] getPacks() throws FileNotFoundException {
|
||||
if (packs == null) {
|
||||
PackFile[] p = new PackFile[packNames.length];
|
||||
for (int i = 0; i < packNames.length; i++)
|
||||
p[i] = getPackFile(packNames[i]);
|
||||
packs = p;
|
||||
}
|
||||
return packs;
|
||||
}
|
||||
|
||||
private PackFile getPackFile(String packName) throws FileNotFoundException {
|
||||
|
|
|
@ -93,16 +93,27 @@ public long getDeltaCount() throws IOException {
|
|||
}
|
||||
|
||||
/**
|
||||
* Determine if the pack contains the requested objects.
|
||||
* Determine if this pack contains the object representation given.
|
||||
* <p>
|
||||
* PackWriter uses this method during the finding sources phase to prune
|
||||
* away any objects from the leading thin-pack that already appear within
|
||||
* this pack and should not be sent twice.
|
||||
* <p>
|
||||
* Implementors are strongly encouraged to rely on looking at {@code rep}
|
||||
* only and using its internal state to decide if this object is within this
|
||||
* pack. Implementors should ensure a representation from this cached pack
|
||||
* is tested as part of
|
||||
* {@link ObjectReuseAsIs#selectObjectRepresentation(PackWriter, org.eclipse.jgit.lib.ProgressMonitor, Iterable)}
|
||||
* , ensuring this method would eventually return true if the object would
|
||||
* be included by this cached pack.
|
||||
*
|
||||
* @param <T>
|
||||
* any type of ObjectId to search for.
|
||||
* @param toFind
|
||||
* the objects to search for.
|
||||
* @return the objects contained in the pack.
|
||||
* @throws IOException
|
||||
* the pack cannot be accessed
|
||||
* @param obj
|
||||
* the object being packed. Can be used as an ObjectId.
|
||||
* @param rep
|
||||
* representation from the {@link ObjectReuseAsIs} instance that
|
||||
* originally supplied this CachedPack.
|
||||
* @return true if this pack contains this object.
|
||||
*/
|
||||
public abstract <T extends ObjectId> Set<ObjectId> hasObject(
|
||||
Iterable<T> toFind) throws IOException;
|
||||
public abstract boolean hasObject(ObjectToPack obj,
|
||||
StoredObjectRepresentation rep);
|
||||
}
|
||||
|
|
|
@ -81,13 +81,19 @@ public interface ObjectReuseAsIs {
|
|||
|
||||
/**
|
||||
* Select the best object representation for a packer.
|
||||
*
|
||||
* <p>
|
||||
* Implementations should iterate through all available representations of
|
||||
* an object, and pass them in turn to the PackWriter though
|
||||
* {@link PackWriter#select(ObjectToPack, StoredObjectRepresentation)} so
|
||||
* the writer can select the most suitable representation to reuse into the
|
||||
* output stream.
|
||||
*
|
||||
* <p>
|
||||
* If the implementation returns CachedPack from {@link #getCachedPacks()},
|
||||
* it must consider the representation of any object that is stored in any
|
||||
* of the offered CachedPacks. PackWriter relies on this behavior to prune
|
||||
* duplicate objects out of the pack stream when it selects a CachedPack and
|
||||
* the object was also reached through the thin-pack enumeration.
|
||||
* <p>
|
||||
* The implementation may choose to consider multiple objects at once on
|
||||
* concurrent threads, but must evaluate all representations of an object
|
||||
* within the same thread.
|
||||
|
|
|
@ -188,6 +188,8 @@ public class PackWriter {
|
|||
|
||||
private boolean ignoreMissingUninteresting = true;
|
||||
|
||||
private boolean pruneCurrentObjectList;
|
||||
|
||||
/**
|
||||
* Create writer for specified repository.
|
||||
* <p>
|
||||
|
@ -526,16 +528,7 @@ public void preparePack(ProgressMonitor countingMonitor,
|
|||
*/
|
||||
public boolean willInclude(final AnyObjectId id) throws IOException {
|
||||
ObjectToPack obj = objectsMap.get(id);
|
||||
if (obj != null && !obj.isEdge())
|
||||
return true;
|
||||
|
||||
Set<ObjectId> toFind = Collections.singleton(id.toObjectId());
|
||||
for (CachedPack pack : cachedPacks) {
|
||||
if (pack.hasObject(toFind).contains(id))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return obj != null && !obj.isEdge();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -639,7 +632,10 @@ public void writePack(ProgressMonitor compressMonitor,
|
|||
if (writeMonitor == null)
|
||||
writeMonitor = NullProgressMonitor.INSTANCE;
|
||||
|
||||
if ((reuseDeltas || config.isReuseObjects()) && reuseSupport != null)
|
||||
if (reuseSupport != null && (
|
||||
reuseDeltas
|
||||
|| config.isReuseObjects()
|
||||
|| !cachedPacks.isEmpty()))
|
||||
searchForReuse(compressMonitor);
|
||||
if (config.isDeltaCompress())
|
||||
searchForDeltas(compressMonitor);
|
||||
|
@ -715,8 +711,12 @@ private void searchForReuse(ProgressMonitor monitor) throws IOException {
|
|||
cnt += list.size();
|
||||
long start = System.currentTimeMillis();
|
||||
monitor.beginTask(JGitText.get().searchForReuse, cnt);
|
||||
for (List<ObjectToPack> list : objectsLists)
|
||||
for (List<ObjectToPack> list : objectsLists) {
|
||||
pruneCurrentObjectList = false;
|
||||
reuseSupport.selectObjectRepresentation(this, monitor, list);
|
||||
if (pruneCurrentObjectList)
|
||||
pruneEdgesFromObjectList(list);
|
||||
}
|
||||
monitor.endTask();
|
||||
stats.timeSearchingForReuse = System.currentTimeMillis() - start;
|
||||
}
|
||||
|
@ -1324,7 +1324,6 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor,
|
|||
for (RevObject obj : haveObjs)
|
||||
walker.markUninteresting(obj);
|
||||
|
||||
int typesToPrune = 0;
|
||||
final int maxBases = config.getDeltaSearchWindowSize();
|
||||
Set<RevTree> baseTrees = new HashSet<RevTree>();
|
||||
BlockList<RevCommit> commits = new BlockList<RevCommit>();
|
||||
|
@ -1388,15 +1387,6 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor,
|
|||
}
|
||||
commits = null;
|
||||
|
||||
for (CachedPack p : cachedPacks) {
|
||||
for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_COMMIT])) {
|
||||
if (baseTrees.size() <= maxBases)
|
||||
baseTrees.add(walker.lookupCommit(d).getTree());
|
||||
objectsMap.get(d).setEdge();
|
||||
typesToPrune |= 1 << Constants.OBJ_COMMIT;
|
||||
}
|
||||
}
|
||||
|
||||
BaseSearch bases = new BaseSearch(countingMonitor, baseTrees, //
|
||||
objectsMap, edgeObjects, reader);
|
||||
RevObject o;
|
||||
|
@ -1413,39 +1403,13 @@ private void findObjectsToPack(final ProgressMonitor countingMonitor,
|
|||
countingMonitor.update(1);
|
||||
}
|
||||
|
||||
for (CachedPack p : cachedPacks) {
|
||||
for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_TREE])) {
|
||||
objectsMap.get(d).setEdge();
|
||||
typesToPrune |= 1 << Constants.OBJ_TREE;
|
||||
}
|
||||
for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_BLOB])) {
|
||||
objectsMap.get(d).setEdge();
|
||||
typesToPrune |= 1 << Constants.OBJ_BLOB;
|
||||
}
|
||||
for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_TAG])) {
|
||||
objectsMap.get(d).setEdge();
|
||||
typesToPrune |= 1 << Constants.OBJ_TAG;
|
||||
}
|
||||
}
|
||||
|
||||
if (typesToPrune != 0) {
|
||||
pruneObjectList(typesToPrune, Constants.OBJ_COMMIT);
|
||||
pruneObjectList(typesToPrune, Constants.OBJ_TREE);
|
||||
pruneObjectList(typesToPrune, Constants.OBJ_BLOB);
|
||||
pruneObjectList(typesToPrune, Constants.OBJ_TAG);
|
||||
}
|
||||
|
||||
for (CachedPack pack : cachedPacks)
|
||||
countingMonitor.update((int) pack.getObjectCount());
|
||||
countingMonitor.endTask();
|
||||
stats.timeCounting = System.currentTimeMillis() - countingStart;
|
||||
}
|
||||
|
||||
private void pruneObjectList(int typesToPrune, int typeCode) {
|
||||
if ((typesToPrune & (1 << typeCode)) == 0)
|
||||
return;
|
||||
|
||||
final List<ObjectToPack> list = objectsLists[typeCode];
|
||||
private static void pruneEdgesFromObjectList(List<ObjectToPack> list) {
|
||||
final int size = list.size();
|
||||
int src = 0;
|
||||
int dst = 0;
|
||||
|
@ -1544,6 +1508,23 @@ private void addObject(final RevObject object, final int pathHashCode)
|
|||
*/
|
||||
public void select(ObjectToPack otp, StoredObjectRepresentation next) {
|
||||
int nFmt = next.getFormat();
|
||||
|
||||
if (!cachedPacks.isEmpty()) {
|
||||
if (otp.isEdge())
|
||||
return;
|
||||
if ((nFmt == PACK_WHOLE) | (nFmt == PACK_DELTA)) {
|
||||
for (CachedPack pack : cachedPacks) {
|
||||
if (pack.hasObject(otp, next)) {
|
||||
otp.setEdge();
|
||||
otp.clearDeltaBase();
|
||||
otp.clearReuseAsIs();
|
||||
pruneCurrentObjectList = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int nWeight;
|
||||
if (otp.isReuseAsIs()) {
|
||||
// We've already chosen to reuse a packed form, if next
|
||||
|
|
Loading…
Reference in New Issue