Teach UploadPack to support filtering by blob size

Teach UploadPack to advertise the filter capability and support a
"filter" line in the request, accepting blob sizes only, if the
configuration variable "uploadpack.allowfilter" is true. This feature is
currently in the "master" branch of Git, and as of the time of writing,
this feature is to be released in Git 2.17.

This is incomplete in that the filter-by-sparse-specification feature
also supported by Git is not included in this patch.

If a JGit server were to be patched with this commit, and a repository
on that server configured with RequestPolicy.ANY or
RequestPolicy.REACHABLE_COMMIT_TIP, a Git client built from the "master"
branch would be able to perform a partial clone.

Change-Id: If72b4b422c06ab432137e9e5272d353b14b73259
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
This commit is contained in:
Jonathan Tan 2017-10-17 15:48:53 -07:00 committed by Jonathan Nieder
parent c6a2c58624
commit 4ac32e79b7
9 changed files with 289 additions and 4 deletions

View File

@ -13,6 +13,7 @@
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevBlob;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.transport.UploadPack.RequestPolicy;
import org.eclipse.jgit.transport.resolver.ServiceNotAuthorizedException;
import org.eclipse.jgit.transport.resolver.ServiceNotEnabledException;
@ -170,4 +171,149 @@ public void testFetchReachableBlobWithoutBitmap() throws Exception {
Collections.singletonList(new RefSpec(blob.name())));
}
}
@Test
public void testFetchWithBlobNoneFilter() throws Exception {
InMemoryRepository server2 = newRepo("server2");
TestRepository<InMemoryRepository> remote =
new TestRepository<>(server2);
RevBlob blob1 = remote.blob("foobar");
RevBlob blob2 = remote.blob("fooba");
RevTree tree = remote.tree(
remote.file("1", blob1), remote.file("2", blob2));
RevCommit commit = remote.commit(tree);
remote.update("master", commit);
server2.getConfig().setBoolean("uploadpack", null, "allowfilter", true);
testProtocol = new TestProtocol<>(
new UploadPackFactory<Object>() {
@Override
public UploadPack create(Object req, Repository db)
throws ServiceNotEnabledException,
ServiceNotAuthorizedException {
UploadPack up = new UploadPack(db);
return up;
}
}, null);
uri = testProtocol.register(ctx, server2);
try (Transport tn = testProtocol.open(uri, client, "server2")) {
tn.setFilterBlobLimit(0);
tn.fetch(NullProgressMonitor.INSTANCE,
Collections.singletonList(new RefSpec(commit.name())));
assertTrue(client.hasObject(tree.toObjectId()));
assertFalse(client.hasObject(blob1.toObjectId()));
assertFalse(client.hasObject(blob2.toObjectId()));
}
}
@Test
public void testFetchWithBlobLimitFilter() throws Exception {
InMemoryRepository server2 = newRepo("server2");
TestRepository<InMemoryRepository> remote =
new TestRepository<>(server2);
RevBlob longBlob = remote.blob("foobar");
RevBlob shortBlob = remote.blob("fooba");
RevTree tree = remote.tree(
remote.file("1", longBlob), remote.file("2", shortBlob));
RevCommit commit = remote.commit(tree);
remote.update("master", commit);
server2.getConfig().setBoolean("uploadpack", null, "allowfilter", true);
testProtocol = new TestProtocol<>(
new UploadPackFactory<Object>() {
@Override
public UploadPack create(Object req, Repository db)
throws ServiceNotEnabledException,
ServiceNotAuthorizedException {
UploadPack up = new UploadPack(db);
return up;
}
}, null);
uri = testProtocol.register(ctx, server2);
try (Transport tn = testProtocol.open(uri, client, "server2")) {
tn.setFilterBlobLimit(5);
tn.fetch(NullProgressMonitor.INSTANCE,
Collections.singletonList(new RefSpec(commit.name())));
assertFalse(client.hasObject(longBlob.toObjectId()));
assertTrue(client.hasObject(shortBlob.toObjectId()));
}
}
@Test
public void testFetchWithBlobLimitFilterAndBitmaps() throws Exception {
InMemoryRepository server2 = newRepo("server2");
TestRepository<InMemoryRepository> remote =
new TestRepository<>(server2);
RevBlob longBlob = remote.blob("foobar");
RevBlob shortBlob = remote.blob("fooba");
RevTree tree = remote.tree(
remote.file("1", longBlob), remote.file("2", shortBlob));
RevCommit commit = remote.commit(tree);
remote.update("master", commit);
server2.getConfig().setBoolean("uploadpack", null, "allowfilter", true);
// generate bitmaps
new DfsGarbageCollector(server2).pack(null);
server2.scanForRepoChanges();
testProtocol = new TestProtocol<>(
new UploadPackFactory<Object>() {
@Override
public UploadPack create(Object req, Repository db)
throws ServiceNotEnabledException,
ServiceNotAuthorizedException {
UploadPack up = new UploadPack(db);
return up;
}
}, null);
uri = testProtocol.register(ctx, server2);
try (Transport tn = testProtocol.open(uri, client, "server2")) {
tn.setFilterBlobLimit(5);
tn.fetch(NullProgressMonitor.INSTANCE,
Collections.singletonList(new RefSpec(commit.name())));
assertFalse(client.hasObject(longBlob.toObjectId()));
assertTrue(client.hasObject(shortBlob.toObjectId()));
}
}
@Test
public void testFetchWithNonSupportingServer() throws Exception {
InMemoryRepository server2 = newRepo("server2");
TestRepository<InMemoryRepository> remote =
new TestRepository<>(server2);
RevBlob blob = remote.blob("foo");
RevTree tree = remote.tree(remote.file("1", blob));
RevCommit commit = remote.commit(tree);
remote.update("master", commit);
server2.getConfig().setBoolean("uploadpack", null, "allowfilter", false);
testProtocol = new TestProtocol<>(
new UploadPackFactory<Object>() {
@Override
public UploadPack create(Object req, Repository db)
throws ServiceNotEnabledException,
ServiceNotAuthorizedException {
UploadPack up = new UploadPack(db);
return up;
}
}, null);
uri = testProtocol.register(ctx, server2);
try (Transport tn = testProtocol.open(uri, client, "server2")) {
tn.setFilterBlobLimit(0);
thrown.expect(TransportException.class);
thrown.expectMessage("filter requires server to advertise that capability");
tn.fetch(NullProgressMonitor.INSTANCE,
Collections.singletonList(new RefSpec(commit.name())));
}
}
}

View File

@ -307,6 +307,7 @@ fileIsTooLarge=File is too large: {0}
fileModeNotSetForPath=FileMode not set for path {0}
filterExecutionFailed=Execution of filter command ''{0}'' on file ''{1}'' failed
filterExecutionFailedRc=Execution of filter command ''{0}'' on file ''{1}'' failed with return code ''{2}'', message on stderr: ''{3}''
filterRequiresCapability=filter requires server to advertise that capability
findingGarbage=Finding garbage
flagIsDisposed={0} is disposed.
flagNotFromThis={0} not from this.
@ -358,6 +359,7 @@ invalidCommitParentNumber=Invalid commit parent number
invalidDepth=Invalid depth: {0}
invalidEncryption=Invalid encryption
invalidExpandWildcard=ExpandFromSource on a refspec that can have mismatched wildcards does not make sense.
invalidFilter=Invalid filter: {0}
invalidGitdirRef = Invalid .git reference in file ''{0}''
invalidGitType=invalid git type: {0}
invalidId=Invalid id: {0}
@ -664,6 +666,7 @@ threadInterruptedWhileRunning="Current thread interrupted while running {0}"
timeIsUncertain=Time is uncertain
timerAlreadyTerminated=Timer already terminated
tooManyCommands=Too many commands
tooManyFilters=Too many "filter" lines in request
tooManyIncludeRecursions=Too many recursions; circular includes in config file(s)?
topologicalSortRequired=Topological sort required.
transactionAborted=transaction aborted

View File

@ -368,6 +368,7 @@ public static JGitText get() {
/***/ public String fileModeNotSetForPath;
/***/ public String filterExecutionFailed;
/***/ public String filterExecutionFailedRc;
/***/ public String filterRequiresCapability;
/***/ public String findingGarbage;
/***/ public String flagIsDisposed;
/***/ public String flagNotFromThis;
@ -419,6 +420,7 @@ public static JGitText get() {
/***/ public String invalidDepth;
/***/ public String invalidEncryption;
/***/ public String invalidExpandWildcard;
/***/ public String invalidFilter;
/***/ public String invalidGitdirRef;
/***/ public String invalidGitType;
/***/ public String invalidId;
@ -726,6 +728,7 @@ public static JGitText get() {
/***/ public String timeIsUncertain;
/***/ public String timerAlreadyTerminated;
/***/ public String tooManyCommands;
/***/ public String tooManyFilters;
/***/ public String tooManyIncludeRecursions;
/***/ public String topologicalSortRequired;
/***/ public String transportExceptionBadRef;

View File

@ -303,6 +303,8 @@ public static Iterable<PackWriter> getInstances() {
private ObjectCountCallback callback;
private long filterBlobLimit = -1;
/**
* Create writer for specified repository.
* <p>
@ -638,6 +640,14 @@ public void setShallowPack(int depth,
this.unshallowObjects = unshallow;
}
/**
* @param bytes exclude blobs of size greater than this
* @since 5.0
*/
public void setFilterBlobLimit(long bytes) {
filterBlobLimit = bytes;
}
/**
* Returns objects number in a pack file that was created by this writer.
*
@ -1960,7 +1970,7 @@ private void findObjectsToPack(@NonNull ProgressMonitor countingMonitor,
byte[] pathBuf = walker.getPathBuffer();
int pathLen = walker.getPathLength();
bases.addBase(o.getType(), pathBuf, pathLen, pathHash);
addObject(o, pathHash);
filterAndAddObject(o, o.getType(), pathHash);
countingMonitor.update(1);
}
} else {
@ -1970,7 +1980,7 @@ private void findObjectsToPack(@NonNull ProgressMonitor countingMonitor,
continue;
if (exclude(o))
continue;
addObject(o, walker.getPathHashCode());
filterAndAddObject(o, o.getType(), walker.getPathHashCode());
countingMonitor.update(1);
}
}
@ -2003,7 +2013,7 @@ private void findObjectsToPackUsingBitmaps(
needBitmap.remove(objectId);
continue;
}
addObject(objectId, obj.getType(), 0);
filterAndAddObject(objectId, obj.getType(), 0);
}
if (thin)
@ -2062,6 +2072,24 @@ private void addObject(
objectsMap.add(otp);
}
/**
* Adds the given object as an object to be packed, first performing
* filtering on blobs at or exceeding a given size.
*
* @see #setFilterBlobLimit
*/
private void filterAndAddObject(@NonNull AnyObjectId src, int type,
int pathHashCode) throws IOException {
// Check if this object needs to be rejected, doing the cheaper
// checks first.
boolean reject = filterBlobLimit >= 0 &&
type == OBJ_BLOB &&
reader.getObjectSize(src, OBJ_BLOB) > filterBlobLimit;
if (!reject) {
addObject(src, type, pathHashCode);
}
}
private boolean exclude(AnyObjectId objectId) {
if (excludeInPacks == null)
return false;

View File

@ -200,6 +200,13 @@ public abstract class BasePackFetchConnection extends BasePackConnection
*/
public static final String OPTION_ALLOW_REACHABLE_SHA1_IN_WANT = GitProtocolConstants.OPTION_ALLOW_REACHABLE_SHA1_IN_WANT;
/**
* The client specified a filter expression.
*
* @since 5.0
*/
public static final String OPTION_FILTER = GitProtocolConstants.OPTION_FILTER;
private final RevWalk walk;
/** All commits that are immediately reachable by a local ref. */
@ -242,6 +249,9 @@ public abstract class BasePackFetchConnection extends BasePackConnection
private PacketLineOut pckState;
/** If not -1, the maximum blob size to be sent to the server. */
private final long filterBlobLimit;
/**
* Create a new connection to fetch using the native git transport.
*
@ -262,6 +272,7 @@ public BasePackFetchConnection(final PackTransport packTransport) {
}
includeTags = transport.getTagOpt() != TagOpt.NO_TAGS;
thinPack = transport.isFetchThin();
filterBlobLimit = transport.getFilterBlobLimit();
if (local != null) {
walk = new RevWalk(local);
@ -524,6 +535,11 @@ private boolean sendWants(final Collection<Ref> want) throws IOException {
if (first) {
return false;
}
if (filterBlobLimit == 0) {
p.writeString(OPTION_FILTER + " blob:none");
} else if (filterBlobLimit > 0) {
p.writeString(OPTION_FILTER + " blob:limit=" + filterBlobLimit);
}
p.end();
outNeedsEnd = false;
return true;
@ -564,6 +580,11 @@ else if (wantCapability(line, OPTION_SIDE_BAND))
OPTION_MULTI_ACK_DETAILED));
}
if (filterBlobLimit >= 0 && !wantCapability(line, OPTION_FILTER)) {
throw new PackProtocolException(uri,
JGitText.get().filterRequiresCapability);
}
addUserAgentCapability(line);
return line.toString();
}

View File

@ -151,6 +151,13 @@ public class GitProtocolConstants {
*/
public static final String OPTION_PUSH_CERT = "push-cert"; //$NON-NLS-1$
/**
* The client specified a filter expression.
*
* @since 5.0
*/
public static final String OPTION_FILTER = "filter"; //$NON-NLS-1$
/**
* The client supports atomic pushes. If this option is used, the server
* will update all refs within one atomic transaction.

View File

@ -101,6 +101,7 @@ public enum FsckMode {
private final boolean safeForMacOS;
private final boolean allowTipSha1InWant;
private final boolean allowReachableSha1InWant;
private final boolean allowFilter;
final String[] hideRefs;
TransferConfig(final Repository db) {
@ -153,6 +154,8 @@ public enum FsckMode {
"uploadpack", "allowtipsha1inwant", false); //$NON-NLS-1$ //$NON-NLS-2$
allowReachableSha1InWant = rc.getBoolean(
"uploadpack", "allowreachablesha1inwant", false); //$NON-NLS-1$ //$NON-NLS-2$
allowFilter = rc.getBoolean(
"uploadpack", "allowfilter", false); //$NON-NLS-1$ //$NON-NLS-2$
hideRefs = rc.getStringList("uploadpack", null, "hiderefs"); //$NON-NLS-1$ //$NON-NLS-2$
}
@ -219,6 +222,14 @@ public boolean isAllowReachableSha1InWant() {
return allowReachableSha1InWant;
}
/**
* @return true if clients are allowed to specify a "filter" line
* @since 5.0
*/
public boolean isAllowFilter() {
return allowFilter;
}
/**
* Get {@link org.eclipse.jgit.transport.RefFilter} respecting configured
* hidden refs.

View File

@ -791,6 +791,8 @@ private static String findTrackingRefName(final String remoteName,
/** Should refs no longer on the source be pruned from the destination? */
private boolean removeDeletedRefs;
private long filterBlobLimit = -1;
/** Timeout in seconds to wait before aborting an IO read or write. */
private int timeout;
@ -1065,6 +1067,23 @@ public void setRemoveDeletedRefs(final boolean remove) {
removeDeletedRefs = remove;
}
/**
* @return the last value passed to {@link #setFilterBlobLimit}, or -1 if
* it was never invoked.
* @since 5.0
*/
public long getFilterBlobLimit() {
return filterBlobLimit;
}
/**
* @param bytes exclude blobs of size greater than this
* @since 5.0
*/
public void setFilterBlobLimit(final long bytes) {
filterBlobLimit = bytes;
}
/**
* Apply provided remote configuration on this transport.
*

View File

@ -47,6 +47,7 @@
import static org.eclipse.jgit.transport.GitProtocolConstants.OPTION_AGENT;
import static org.eclipse.jgit.transport.GitProtocolConstants.OPTION_ALLOW_REACHABLE_SHA1_IN_WANT;
import static org.eclipse.jgit.transport.GitProtocolConstants.OPTION_ALLOW_TIP_SHA1_IN_WANT;
import static org.eclipse.jgit.transport.GitProtocolConstants.OPTION_FILTER;
import static org.eclipse.jgit.transport.GitProtocolConstants.OPTION_INCLUDE_TAG;
import static org.eclipse.jgit.transport.GitProtocolConstants.OPTION_MULTI_ACK;
import static org.eclipse.jgit.transport.GitProtocolConstants.OPTION_MULTI_ACK_DETAILED;
@ -317,6 +318,8 @@ public Set<String> getOptions() {
private PackStatistics statistics;
private long filterBlobLimit = -1;
/**
* Create a new pack upload for an open repository.
*
@ -942,6 +945,9 @@ public void sendAdvertisedRefs(final RefAdvertiser adv) throws IOException,
|| policy == null)
adv.advertiseCapability(OPTION_ALLOW_REACHABLE_SHA1_IN_WANT);
adv.advertiseCapability(OPTION_AGENT, UserAgent.get());
if (transferConfig.isAllowFilter()) {
adv.advertiseCapability(OPTION_FILTER);
}
adv.setDerefTags(true);
Map<String, Ref> advertisedOrDefaultRefs = getAdvertisedOrDefaultRefs();
findSymrefs(adv, advertisedOrDefaultRefs);
@ -982,6 +988,7 @@ public OutputStream getMessageOutputStream() {
private void recvWants() throws IOException {
boolean isFirst = true;
boolean filterReceived = false;
for (;;) {
String line;
try {
@ -1010,6 +1017,41 @@ private void recvWants() throws IOException {
continue;
}
if (transferConfig.isAllowFilter()
&& line.startsWith(OPTION_FILTER + " ")) { //$NON-NLS-1$
String arg = line.substring(OPTION_FILTER.length() + 1);
if (filterReceived) {
throw new PackProtocolException(JGitText.get().tooManyFilters);
}
filterReceived = true;
if (arg.equals("blob:none")) {
filterBlobLimit = 0;
} else if (arg.startsWith("blob:limit=")) {
try {
filterBlobLimit = Long.parseLong(arg.substring("blob:limit=".length()));
} catch (NumberFormatException e) {
throw new PackProtocolException(
MessageFormat.format(JGitText.get().invalidFilter,
arg));
}
}
/*
* We must have (1) either "blob:none" or
* "blob:limit=" set (because we only support
* blob size limits for now), and (2) if the
* latter, then it must be nonnegative. Throw
* if (1) or (2) is not met.
*/
if (filterBlobLimit < 0) {
throw new PackProtocolException(
MessageFormat.format(JGitText.get().invalidFilter,
arg));
}
continue;
}
if (!line.startsWith("want ") || line.length() < 45) //$NON-NLS-1$
throw new PackProtocolException(MessageFormat.format(JGitText.get().expectedGot, "want", line)); //$NON-NLS-1$
@ -1547,7 +1589,12 @@ private void sendPack(final boolean sideband,
accumulator);
try {
pw.setIndexDisabled(true);
pw.setUseCachedPacks(true);
if (filterBlobLimit >= 0) {
pw.setFilterBlobLimit(filterBlobLimit);
pw.setUseCachedPacks(false);
} else {
pw.setUseCachedPacks(true);
}
pw.setUseBitmaps(depth == 0 && clientShallowCommits.isEmpty());
pw.setClientShallowCommits(clientShallowCommits);
pw.setReuseDeltaCommits(true);