Prevent infinite loop rescanning the pack list on PackMismatchException

We found, when analysing an incident where Gerrit's gc runner thread got
stuck, that we can end up in an infinite loop in
ObjectDirectory#openPackedObject which tries to rescan the pack
list and starts over trying to open a packed object in an unconfined
loop if it catches a PackMismatchException.

Here the relevant part of a thread dump we created while the gc runner
was stuck:

"WorkQueue-2[java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask@350812a3[Not
completed,
task = java.util.concurrent.Executors$RunnableAdapter@5425d7ee]]" #72
tid=0x00007f73cee1c800 nid=0x584
runnable  [0x00007f7392d57000]
   java.lang.Thread.State: RUNNABLE
	at org.eclipse.jgit.internal.storage.file.WindowCache.removeAll(WindowCache.java:716)
	at org.eclipse.jgit.internal.storage.file.WindowCache.purge(WindowCache.java:399)
	at org.eclipse.jgit.internal.storage.file.PackFile.close(PackFile.java:296)
	at org.eclipse.jgit.internal.storage.file.ObjectDirectory.reuseMap(ObjectDirectory.java:973)
	at org.eclipse.jgit.internal.storage.file.ObjectDirectory.scanPacksImpl(ObjectDirectory.java:904)
	at org.eclipse.jgit.internal.storage.file.ObjectDirectory.scanPacks(ObjectDirectory.java:895)
	- locked <0x000000050a498f60> (a
java.util.concurrent.atomic.AtomicReference)
	at org.eclipse.jgit.internal.storage.file.ObjectDirectory.searchPacksAgain(ObjectDirectory.java:794)
	at org.eclipse.jgit.internal.storage.file.ObjectDirectory.openPackedObject(ObjectDirectory.java:465)
	at org.eclipse.jgit.internal.storage.file.ObjectDirectory.openPackedFromSelfOrAlternate(ObjectDirectory.java:417)
	at org.eclipse.jgit.internal.storage.file.ObjectDirectory.openObject(ObjectDirectory.java:408)
	at org.eclipse.jgit.internal.storage.file.WindowCursor.open(WindowCursor.java:132)
	at org.eclipse.jgit.lib.ObjectReader$1.open(ObjectReader.java:279)
	at org.eclipse.jgit.revwalk.RevWalk$2.next(RevWalk.java:1031)
	at org.eclipse.jgit.internal.storage.pack.PackWriter.findObjectsToPack(PackWriter.java:1911)
	at org.eclipse.jgit.internal.storage.pack.PackWriter.preparePack(PackWriter.java:960)
	at org.eclipse.jgit.internal.storage.pack.PackWriter.preparePack(PackWriter.java:876)
	at org.eclipse.jgit.internal.storage.file.GC.writePack(GC.java:1168)
	at org.eclipse.jgit.internal.storage.file.GC.repack(GC.java:852)
	at org.eclipse.jgit.internal.storage.file.GC.doGc(GC.java:269)
	at org.eclipse.jgit.internal.storage.file.GC.gc(GC.java:220)
	at org.eclipse.jgit.api.GarbageCollectCommand.call(GarbageCollectCommand.java:179)
	at com.google.gerrit.server.git.GarbageCollection.run(GarbageCollection.java:112)
	at com.google.gerrit.server.git.GarbageCollection.run(GarbageCollection.java:75)
	at com.google.gerrit.server.git.GarbageCollection.run(GarbageCollection.java:71)
	at com.google.gerrit.server.git.GarbageCollectionRunner.run(GarbageCollectionRunner.java:76)
	at com.google.gerrit.server.logging.LoggingContextAwareRunnable.run(LoggingContextAwareRunnable.java:103)
	at java.util.concurrent.Executors$RunnableAdapter.call(java.base@11.0.18/Executors.java:515)
	at java.util.concurrent.FutureTask.runAndReset(java.base@11.0.18/FutureTask.java:305)
	at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(java.base@11.0.18/ScheduledThreadPoolExecutor.java:305)
	at com.google.gerrit.server.git.WorkQueue$Task.run(WorkQueue.java:612)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.18/ThreadPoolExecutor.java:1128)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.18/ThreadPoolExecutor.java:628)
	at java.lang.Thread.run(java.base@11.0.18/Thread.java:829)

The code in ObjectDirectory#openPackedObject [1] apparently assumes that
this is caused by a transient problem which it can resume from by
retrying. We use `core.trustFolderStat = false` on this server since it
uses NFS. The incident we had showed that we can enter into an infinite
loop here if there is a permanent mismatch between a pack file and its
corresponding pack index. I am not yet sure how this can happen.

Break the infinite loop by limiting the number of attempts rescanning
the pack list to 5 retries.  When we exceed this threshold set the type
of the PackMismatchException to permanent and rethrow it which breaks
the infinite loop.

Also apply the same limit in #getPackedObjectSize
and #selectObjectRepresentation where we use similar retry loops.

[1] 011c26ff36/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectory.java (465)

Change-Id: I20fb63bcc1fdc3a03d39b963f06a90e6f0ba73dc
This commit is contained in:
Matthias Sohn 2023-03-30 13:43:17 +02:00
parent f371bfb3eb
commit 96d9e3eb19
3 changed files with 76 additions and 7 deletions

View File

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<component id="org.eclipse.jgit" version="2">
<resource path="src/org/eclipse/jgit/errors/PackMismatchException.java" type="org.eclipse.jgit.errors.PackMismatchException">
<filter id="1142947843">
<message_arguments>
<message_argument value="5.9.1"/>
<message_argument value="isPermanent()"/>
</message_arguments>
</filter>
<filter id="1142947843">
<message_arguments>
<message_argument value="5.9.1"/>
<message_argument value="setPermanent(boolean)"/>
</message_arguments>
</filter>
</resource>
</component>

View File

@ -18,6 +18,8 @@
public class PackMismatchException extends IOException {
private static final long serialVersionUID = 1L;
private boolean permanent;
/**
* Construct a pack modification error.
*
@ -27,4 +29,31 @@ public class PackMismatchException extends IOException {
public PackMismatchException(String why) {
super(why);
}
/**
* Set the type of the exception
*
* @param permanent
* whether the exception is considered permanent
* @since 5.9.1
*/
public void setPermanent(boolean permanent) {
this.permanent = permanent;
}
/**
* Check if this is a permanent problem
*
* @return if this is a permanent problem and repeatedly scanning the
* packlist couldn't fix it
* @since 5.9.1
*/
public boolean isPermanent() {
return permanent;
}
@Override
public String toString() {
return super.toString() + ", permanent: " + permanent; //$NON-NLS-1$
}
}

View File

@ -89,6 +89,8 @@ public class ObjectDirectory extends FileObjectDatabase {
* handle exception is thrown */
final static int MAX_LOOSE_OBJECT_STALE_READ_ATTEMPTS = 5;
private static final int MAX_PACKLIST_RESCAN_ATTEMPTS = 5;
private final AlternateHandle handle = new AlternateHandle(this);
private final Config config;
@ -413,7 +415,8 @@ ObjectLoader openObject(WindowCursor curs, AnyObjectId objectId)
}
private ObjectLoader openPackedFromSelfOrAlternate(WindowCursor curs,
AnyObjectId objectId, Set<AlternateHandle.Id> skips) {
AnyObjectId objectId, Set<AlternateHandle.Id> skips)
throws PackMismatchException {
ObjectLoader ldr = openPackedObject(curs, objectId);
if (ldr != null) {
return ldr;
@ -449,9 +452,11 @@ private ObjectLoader openLooseFromSelfOrAlternate(WindowCursor curs,
return null;
}
ObjectLoader openPackedObject(WindowCursor curs, AnyObjectId objectId) {
ObjectLoader openPackedObject(WindowCursor curs, AnyObjectId objectId)
throws PackMismatchException {
PackList pList;
do {
int retries = 0;
SEARCH: for (;;) {
pList = packList.get();
for (PackFile p : pList.packs) {
@ -462,8 +467,10 @@ ObjectLoader openPackedObject(WindowCursor curs, AnyObjectId objectId) {
return ldr;
} catch (PackMismatchException e) {
// Pack was modified; refresh the entire pack list.
if (searchPacksAgain(pList))
if (searchPacksAgain(pList)) {
retries = checkRescanPackThreshold(retries, e);
continue SEARCH;
}
} catch (IOException e) {
handlePackError(e, p);
}
@ -555,7 +562,8 @@ long getObjectSize(WindowCursor curs, AnyObjectId id)
}
private long getPackedSizeFromSelfOrAlternate(WindowCursor curs,
AnyObjectId id, Set<AlternateHandle.Id> skips) {
AnyObjectId id, Set<AlternateHandle.Id> skips)
throws PackMismatchException {
long len = getPackedObjectSize(curs, id);
if (0 <= len) {
return len;
@ -590,9 +598,11 @@ private long getLooseSizeFromSelfOrAlternate(WindowCursor curs,
return -1;
}
private long getPackedObjectSize(WindowCursor curs, AnyObjectId id) {
private long getPackedObjectSize(WindowCursor curs, AnyObjectId id)
throws PackMismatchException {
PackList pList;
do {
int retries = 0;
SEARCH: for (;;) {
pList = packList.get();
for (PackFile p : pList.packs) {
@ -603,8 +613,10 @@ private long getPackedObjectSize(WindowCursor curs, AnyObjectId id) {
return len;
} catch (PackMismatchException e) {
// Pack was modified; refresh the entire pack list.
if (searchPacksAgain(pList))
if (searchPacksAgain(pList)) {
retries = checkRescanPackThreshold(retries, e);
continue SEARCH;
}
} catch (IOException e) {
handlePackError(e, p);
}
@ -632,13 +644,14 @@ private long getLooseObjectSize(WindowCursor curs, AnyObjectId id)
@Override
void selectObjectRepresentation(PackWriter packer, ObjectToPack otp,
WindowCursor curs) throws IOException {
WindowCursor curs) throws IOException {
selectObjectRepresentation(packer, otp, curs, null);
}
private void selectObjectRepresentation(PackWriter packer, ObjectToPack otp,
WindowCursor curs, Set<AlternateHandle.Id> skips) throws IOException {
PackList pList = packList.get();
int retries = 0;
SEARCH: for (;;) {
for (PackFile p : pList.packs) {
try {
@ -649,6 +662,7 @@ private void selectObjectRepresentation(PackWriter packer, ObjectToPack otp,
} catch (PackMismatchException e) {
// Pack was modified; refresh the entire pack list.
//
retries = checkRescanPackThreshold(retries, e);
pList = scanPacks(pList);
continue SEARCH;
} catch (IOException e) {
@ -666,6 +680,15 @@ private void selectObjectRepresentation(PackWriter packer, ObjectToPack otp,
}
}
private int checkRescanPackThreshold(int retries, PackMismatchException e)
throws PackMismatchException {
if (retries++ > MAX_PACKLIST_RESCAN_ATTEMPTS) {
e.setPermanent(true);
throw e;
}
return retries;
}
private void handlePackError(IOException e, PackFile p) {
String warnTmpl = null;
int transientErrorCount = 0;