AmazonS3: Speed up fetch, try recent packs first

When fetching remote objects, WalkFetchConnection searches remote
packs in the order provided by WalkRemoteObjectDatabase:getPackNames.
Previously, for TransportAmazonS3, the packs were in no particular
order. This resulted in potential many extra calls to get pack idx
files.

This change modifies TransportAmazonS3 and AmazonS3 so that
getPackNames returns a list sorted with the most recently modified
packs first.  In the case of fetching recent changes to a repo,
this dramatically reduces the number of packs searched and speeds
up fetch.

Note: WalkRemoteObjectDatabase::getPackNames does not specify
the order of the returned names.

Testing: did "mvn clean install" in root dir and all tests passed.
And manually constructed some S3 repos and using jgit.sh
confirmed that the freshest pack was checked first.

Change-Id: I3b968fee825e793be55566e28c2d69d0cbe53807
Signed-off-by: Joshua Redstone <redstone@gmail.com>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
This commit is contained in:
Joshua Redstone 2020-01-31 13:14:42 -06:00 committed by Matthias Sohn
parent eaff0a2eba
commit 2a134e1541
2 changed files with 54 additions and 9 deletions

View File

@ -33,6 +33,7 @@
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
@ -44,6 +45,8 @@
import java.util.SortedMap;
import java.util.TimeZone;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.time.Instant;
import javax.crypto.Mac;
import javax.crypto.spec.SecretKeySpec;
@ -288,6 +291,8 @@ public InputStream decrypt(URLConnection u) throws IOException {
* <p>
* This method is primarily meant for obtaining a "recursive directory
* listing" rooted under the specified bucket and prefix location.
* It returns the keys sorted in reverse order of LastModified time
* (freshest keys first).
*
* @param bucket
* name of the bucket whose objects should be listed.
@ -311,7 +316,10 @@ public List<String> list(String bucket, String prefix)
do {
lp.list();
} while (lp.truncated);
return lp.entries;
Comparator<KeyInfo> comparator = Comparator.comparingLong(KeyInfo::getLastModifiedSecs);
return lp.entries.stream().sorted(comparator.reversed())
.map(KeyInfo::getName).collect(Collectors.toList());
}
/**
@ -620,8 +628,26 @@ static Properties properties(File authFile)
return p;
}
/**
* KeyInfo enables sorting of keys by lastModified time
*/
private static final class KeyInfo {
private final String name;
private final long lastModifiedSecs;
public KeyInfo(String aname, long lsecs) {
name = aname;
lastModifiedSecs = lsecs;
}
public String getName() {
return name;
}
public long getLastModifiedSecs() {
return lastModifiedSecs;
}
}
private final class ListParser extends DefaultHandler {
final List<String> entries = new ArrayList<>();
final List<KeyInfo> entries = new ArrayList<>();
private final String bucket;
@ -630,6 +656,8 @@ private final class ListParser extends DefaultHandler {
boolean truncated;
private StringBuilder data;
private String keyName;
private Instant keyLastModified;
ListParser(String bn, String p) {
bucket = bn;
@ -641,7 +669,7 @@ void list() throws IOException {
if (prefix.length() > 0)
args.put("prefix", prefix); //$NON-NLS-1$
if (!entries.isEmpty())
args.put("marker", prefix + entries.get(entries.size() - 1)); //$NON-NLS-1$
args.put("marker", prefix + entries.get(entries.size() - 1).getName()); //$NON-NLS-1$
for (int curAttempt = 0; curAttempt < maxAttempts; curAttempt++) {
final HttpURLConnection c = open("GET", bucket, "", args); //$NON-NLS-1$ //$NON-NLS-2$
@ -650,6 +678,8 @@ void list() throws IOException {
case HttpURLConnection.HTTP_OK:
truncated = false;
data = null;
keyName = null;
keyLastModified = null;
final XMLReader xr;
try {
@ -683,8 +713,13 @@ void list() throws IOException {
public void startElement(final String uri, final String name,
final String qName, final Attributes attributes)
throws SAXException {
if ("Key".equals(name) || "IsTruncated".equals(name)) //$NON-NLS-1$ //$NON-NLS-2$
if ("Key".equals(name) || "IsTruncated".equals(name) || "LastModified".equals(name)) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
data = new StringBuilder();
}
if ("Contents".equals(name)) { //$NON-NLS-1$
keyName = null;
keyLastModified = null;
}
}
@Override
@ -704,10 +739,16 @@ public void characters(char[] ch, int s, int n)
@Override
public void endElement(final String uri, final String name,
final String qName) throws SAXException {
if ("Key".equals(name)) //$NON-NLS-1$
entries.add(data.toString().substring(prefix.length()));
else if ("IsTruncated".equals(name)) //$NON-NLS-1$
if ("Key".equals(name)) { //$NON-NLS-1$
keyName = data.toString().substring(prefix.length());
} else if ("IsTruncated".equals(name)) { //$NON-NLS-1$
truncated = StringUtils.equalsIgnoreCase("true", data.toString()); //$NON-NLS-1$
} else if ("LastModified".equals(name)) { //$NON-NLS-1$
keyLastModified = Instant.parse(data.toString());
} else if ("Contents".equals(name)) { //$NON-NLS-1$
entries.add(new KeyInfo(keyName, keyLastModified.getEpochSecond()));
}
data = null;
}
}

View File

@ -23,6 +23,7 @@
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
@ -241,11 +242,14 @@ WalkRemoteObjectDatabase openAlternate(String location)
@Override
Collection<String> getPackNames() throws IOException {
// s3.list returns most recently modified packs first.
// These are the packs most likely to contain missing refs.
final List<String> packList = s3.list(bucket, resolveKey("pack")); //$NON-NLS-1$
final HashSet<String> have = new HashSet<>();
have.addAll(s3.list(bucket, resolveKey("pack"))); //$NON-NLS-1$
have.addAll(packList);
final Collection<String> packs = new ArrayList<>();
for (String n : have) {
for (String n : packList) {
if (!n.startsWith("pack-") || !n.endsWith(".pack")) //$NON-NLS-1$ //$NON-NLS-2$
continue;