Speedup GC listing objects referenced from reflogs

GC needs to get a ReflogReader for all existing refs to list all objects
referenced from reflogs. The existing Repository#getReflogReader method
accepts the ref name and then resolves the Ref to create a ReflogReader.
GC calling that for a huge number of Refs one by one is very slow. GC
first gets all Refs in bulk and then calls getReflogReader for each of
them.

Fix this by adding another getReflogReader method to Repository which
accepts a Ref directly.

This speeds up running JGit gc on a mirror clone of the Gerrit
repository from 15:36 min to 1:08 min. The repository used in this test
had 45k refs, 275k commits and 1.2m git objects.

Change-Id: I474897fdc6652923e35d461c065a29f54d9949f4
This commit is contained in:
Matthias Sohn 2023-01-18 17:39:19 +01:00
parent 2011fe06d2
commit cd3fc7a299
5 changed files with 36 additions and 41 deletions

View File

@ -1,17 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<component id="org.eclipse.jgit.http.server" version="2">
<resource path="src/org/eclipse/jgit/http/server/GitServlet.java" type="org.eclipse.jgit.http.server.GitServlet">
<filter id="1142947843">
<message_arguments>
<message_argument value="5.9.1"/>
<message_argument value="setReceivePackErrorHandler(ReceivePackErrorHandler)"/>
</message_arguments>
</filter>
<filter id="1142947843">
<message_arguments>
<message_argument value="5.9.1"/>
<message_argument value="setUploadPackErrorHandler(UploadPackErrorHandler)"/>
</message_arguments>
</filter>
</resource>
</component>

View File

@ -1,5 +1,13 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<component id="org.eclipse.jgit" version="2">
<resource path="META-INF/MANIFEST.MF">
<filter id="924844039">
<message_arguments>
<message_argument value="5.13.2"/>
<message_argument value="5.13.0"/>
</message_arguments>
</filter>
</resource>
<resource path="src/org/eclipse/jgit/lib/ConfigConstants.java" type="org.eclipse.jgit.lib.ConfigConstants">
<filter id="1142947843">
<message_arguments>
@ -8,11 +16,11 @@
</message_arguments>
</filter>
</resource>
<resource path="src/org/eclipse/jgit/storage/pack/PackConfig.java" type="org.eclipse.jgit.storage.pack.PackConfig">
<filter id="336658481">
<resource path="src/org/eclipse/jgit/lib/Repository.java" type="org.eclipse.jgit.lib.Repository">
<filter id="1142947843">
<message_arguments>
<message_argument value="org.eclipse.jgit.storage.pack.PackConfig"/>
<message_argument value="DEFAULT_SEARCH_FOR_REUSE_TIMEOUT"/>
<message_argument value="5.13.2"/>
<message_argument value="getReflogReader(Ref)"/>
</message_arguments>
</filter>
</resource>
@ -24,22 +32,6 @@
</message_arguments>
</filter>
</resource>
<resource path="src/org/eclipse/jgit/transport/ProtocolV2Hook.java" type="org.eclipse.jgit.transport.ProtocolV2Hook">
<filter id="404000815">
<message_arguments>
<message_argument value="org.eclipse.jgit.transport.ProtocolV2Hook"/>
<message_argument value="onObjectInfo(ObjectInfoRequest)"/>
</message_arguments>
</filter>
</resource>
<resource path="src/org/eclipse/jgit/transport/SshConstants.java" type="org.eclipse.jgit.transport.SshConstants">
<filter id="1142947843">
<message_arguments>
<message_argument value="5.11.1"/>
<message_argument value="PUBKEY_ACCEPTED_ALGORITHMS"/>
</message_arguments>
</filter>
</resource>
<resource path="src/org/eclipse/jgit/util/sha1/SHA1.java" type="org.eclipse.jgit.util.sha1.SHA1">
<filter id="337764418">
<message_arguments>

View File

@ -31,6 +31,7 @@
import java.util.Objects;
import java.util.Set;
import org.eclipse.jgit.annotations.NonNull;
import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.api.errors.JGitInternalException;
import org.eclipse.jgit.attributes.AttributesNode;
@ -525,6 +526,12 @@ public ReflogReader getReflogReader(String refName) throws IOException {
return new ReflogReaderImpl(this, ref.getName());
}
@Override
public @NonNull ReflogReader getReflogReader(@NonNull Ref ref)
throws IOException {
return new ReflogReaderImpl(this, ref.getName());
}
/** {@inheritDoc} */
@Override
public AttributesNodeProvider createAttributesNodeProvider() {

View File

@ -1019,10 +1019,7 @@ private void deleteTempPacksIdx() {
* @throws IOException
*/
private Set<ObjectId> listRefLogObjects(Ref ref, long minTime) throws IOException {
ReflogReader reflogReader = repo.getReflogReader(ref.getName());
if (reflogReader == null) {
return Collections.emptySet();
}
ReflogReader reflogReader = repo.getReflogReader(ref);
List<ReflogEntry> rlEntries = reflogReader
.getReverseEntries();
if (rlEntries == null || rlEntries.isEmpty())

View File

@ -1691,6 +1691,22 @@ public void setGitwebDescription(@Nullable String description)
public abstract ReflogReader getReflogReader(String refName)
throws IOException;
/**
* Get the reflog reader. Subclasses should override this method and provide
* a more efficient implementation.
*
* @param ref
* a Ref
* @return a {@link org.eclipse.jgit.lib.ReflogReader} for the supplied ref,
* or {@code null} if the ref does not exist.
* @throws IOException
* @since 5.13.2
*/
public @Nullable ReflogReader getReflogReader(@NonNull Ref ref)
throws IOException {
return getReflogReader(ref.getName());
}
/**
* Return the information stored in the file $GIT_DIR/MERGE_MSG. In this
* file operations triggering a merge will store a template for the commit