From f5f4bf0ad97f67ff56db18033c0a0795b722e96e Mon Sep 17 00:00:00 2001 From: Luca Milanesio Date: Fri, 11 Aug 2023 20:15:17 +0100 Subject: [PATCH] Do not exclude objects in locked packs from bitmap processing Packfiles having an equivalent .keep file are associated with in-flight pushes that haven't been completed, with potentially a set of git objects not yet referenced by a ref. If the Git client is not up-to-date, it may result in pushing a packfile, generating a .keep on the server, which may also contain existing commits due to the lack of Git protocol negotiation in the git-receive-pack. The Git protocol negotiation is the phase where the client and the server exchange the list of refs they have for trying to find a common base and minimise the amount of objects to be transferred. The repack phase in GC was previously skipping all objects that were contained in all packfiles having a .keep file associated (aka "locked packfiles"), which did not take into consideration the fact that excluding the existing commits would have resulted in the generation of an invalid bitmap file. The code for excluding the objects in the locked packfiles was written well before the bitmap was introduced, hence could not consider a use case that did not exist at that time. However, when the bitmap was introduced, the exclusion of locked packfiles was not changed, hence creating a potential problem. The issue went unnoticed for many years because the bitmap generation was disabled when JGit noticed any locked packfiles; however, the bitmaps are enabled again since Id722e68d9f , and the the issue is now visible and is impacting the GC repack phase. Introduce the '--pack-kept-objects' option in GC for including the objects contained in the locked packfiles during the repack phase, which is not an issue because of the following: - If there are any existing commits duplicated in the packfiles they will be just considered once anyway because the repack doesn't generate duplicates in the output packfile. - If there are any new commits that do not have any ref pointing to them, they will be automatically excluded from the output repacked packfile. The same identical solution is adopted in the C implementation of git in repack.c. Because the locked packfile is not pruned, any new commits not pointed by any refs will remain in the repository and there will not be any accidental pruning or object loss as it is today before this change. As a side-effect of this change, it is now potentially possible to still have duplicate BLOBs after GC when the keep packfile contained existing objects. However, it is way better to keep the duplication until the next GC phase rather than omitting existing objects from repacking and, therefore generating an invalid bitmap and incorrect packfile. Bug: 582292 Bug: 582455 Change-Id: Ide3445e652fcf256a7912f881cb898897c99b8f8 --- .../jgit/pgm/internal/CLIText.properties | 1 + .../src/org/eclipse/jgit/pgm/Gc.java | 6 ++ .../bin/.project | 28 +++++++ .../storage/file/GcKeepFilesTest.java | 76 +++++++++++++++++++ org.eclipse.jgit/.settings/.api_filters | 8 ++ .../jgit/api/GarbageCollectCommand.java | 16 ++++ .../jgit/internal/storage/file/GC.java | 14 +++- 7 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 org.eclipse.jgit.ssh.apache.agent/bin/.project diff --git a/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/internal/CLIText.properties b/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/internal/CLIText.properties index 97450033e..ae10af2f6 100644 --- a/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/internal/CLIText.properties +++ b/org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/internal/CLIText.properties @@ -265,6 +265,7 @@ usage_MakeCacheTree=Show the current cache tree structure usage_Match=Only consider tags matching the given glob(7) pattern or patterns, excluding the "refs/tags/" prefix. usage_MergeBase=Find as good common ancestors as possible for a merge usage_MergesTwoDevelopmentHistories=Merges two development histories +usage_PackKeptObjects=Include objects in packs locked by a ".keep" file when repacking usage_PreserveOldPacks=Preserve old pack files by moving them into the preserved subdirectory instead of deleting them after repacking usage_PrunePreserved=Remove the preserved subdirectory containing previously preserved old pack files before repacking, and before preserving more old pack files usage_ReadDirCache= Read the DirCache 100 times diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Gc.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Gc.java index c87f0b6dc..35ac7a162 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Gc.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Gc.java @@ -27,6 +27,9 @@ class Gc extends TextBuiltin { @Option(name = "--prune-preserved", usage = "usage_PrunePreserved") private Boolean prunePreserved; + @Option(name = "--pack-kept-objects", usage = "usage_PackKeptObjects") + private Boolean packKeptObjects; + /** {@inheritDoc} */ @Override protected void run() { @@ -40,6 +43,9 @@ protected void run() { if (prunePreserved != null) { command.setPrunePreserved(prunePreserved.booleanValue()); } + if (packKeptObjects != null) { + command.setPackKeptObjects(packKeptObjects.booleanValue()); + } command.call(); } catch (GitAPIException e) { throw die(e.getMessage(), e); diff --git a/org.eclipse.jgit.ssh.apache.agent/bin/.project b/org.eclipse.jgit.ssh.apache.agent/bin/.project new file mode 100644 index 000000000..73358f4a6 --- /dev/null +++ b/org.eclipse.jgit.ssh.apache.agent/bin/.project @@ -0,0 +1,28 @@ + + + org.eclipse.jgit.ssh.apache.agent + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.pde.ManifestBuilder + + + + + org.eclipse.pde.SchemaBuilder + + + + + + org.eclipse.pde.PluginNature + org.eclipse.jdt.core.javanature + + diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/GcKeepFilesTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/GcKeepFilesTest.java index 67848ae7e..5919192e5 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/GcKeepFilesTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/GcKeepFilesTest.java @@ -12,6 +12,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.util.Iterator; @@ -19,9 +20,12 @@ import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry; import org.eclipse.jgit.internal.storage.pack.PackExt; import org.eclipse.jgit.junit.TestRepository.BranchBuilder; +import org.eclipse.jgit.lib.ObjectId; import org.junit.Test; public class GcKeepFilesTest extends GcTestCase { + private static final int COMMIT_AND_TREE_OBJECTS = 2; + @Test public void testKeepFiles() throws Exception { BranchBuilder bb = tr.branch("refs/heads/master"); @@ -72,4 +76,76 @@ public void testKeepFiles() throws Exception { + e.toObjectId(), ind2.hasObject(e.toObjectId())); } + + @Test + public void testKeptObjectsAreIncluded() throws Exception { + BranchBuilder bb = tr.branch("refs/heads/master"); + ObjectId commitObjectInLockedPack = bb.commit().create().toObjectId(); + gc.gc(); + stats = gc.getStatistics(); + assertEquals(COMMIT_AND_TREE_OBJECTS, stats.numberOfPackedObjects); + assertEquals(1, stats.numberOfPackFiles); + assertTrue(getSinglePack().getPackFile().create(PackExt.KEEP).createNewFile()); + + bb.commit().create(); + gc.setPackKeptObjects(true); + gc.gc(); + stats = gc.getStatistics(); + assertEquals(2 * COMMIT_AND_TREE_OBJECTS + 1, + stats.numberOfPackedObjects); + assertEquals(2, stats.numberOfPackFiles); + + PackIndex lockedPackIdx = null; + PackIndex newPackIdx = null; + for (Pack pack : repo.getObjectDatabase().getPacks()) { + if (pack.getObjectCount() == COMMIT_AND_TREE_OBJECTS) { + lockedPackIdx = pack.getIndex(); + } else { + newPackIdx = pack.getIndex(); + } + } + assertNotNull(lockedPackIdx); + assertTrue(lockedPackIdx.hasObject(commitObjectInLockedPack)); + assertNotNull(newPackIdx); + assertTrue(newPackIdx.hasObject(commitObjectInLockedPack)); + } + + @Test + public void testKeptObjectsAreNotIncludedByDefault() throws Exception { + BranchBuilder bb = tr.branch("refs/heads/master"); + ObjectId commitObjectInLockedPack = bb.commit().create().toObjectId(); + gc.gc(); + stats = gc.getStatistics(); + assertEquals(COMMIT_AND_TREE_OBJECTS, stats.numberOfPackedObjects); + assertEquals(1, stats.numberOfPackFiles); + assertTrue(getSinglePack().getPackFile().create(PackExt.KEEP).createNewFile()); + + bb.commit().create(); + gc.gc(); + stats = gc.getStatistics(); + assertEquals(COMMIT_AND_TREE_OBJECTS + 1, stats.numberOfPackedObjects); + assertEquals(2, stats.numberOfPackFiles); + + PackIndex lockedPackIdx = null; + PackIndex newPackIdx = null; + for (Pack pack : repo.getObjectDatabase().getPacks()) { + if (pack.getObjectCount() == COMMIT_AND_TREE_OBJECTS) { + lockedPackIdx = pack.getIndex(); + } else { + newPackIdx = pack.getIndex(); + } + } + assertNotNull(lockedPackIdx); + assertTrue(lockedPackIdx.hasObject(commitObjectInLockedPack)); + assertNotNull(newPackIdx); + assertFalse(newPackIdx.hasObject(commitObjectInLockedPack)); + } + + private Pack getSinglePack() { + Iterator packIt = repo.getObjectDatabase().getPacks() + .iterator(); + Pack singlePack = packIt.next(); + assertFalse(packIt.hasNext()); + return singlePack; + } } diff --git a/org.eclipse.jgit/.settings/.api_filters b/org.eclipse.jgit/.settings/.api_filters index 1d3917173..24c6a3150 100644 --- a/org.eclipse.jgit/.settings/.api_filters +++ b/org.eclipse.jgit/.settings/.api_filters @@ -1,5 +1,13 @@ + + + + + + + + diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/api/GarbageCollectCommand.java b/org.eclipse.jgit/src/org/eclipse/jgit/api/GarbageCollectCommand.java index a2fbd411f..2e09d4a8e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/api/GarbageCollectCommand.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/api/GarbageCollectCommand.java @@ -62,6 +62,8 @@ public class GarbageCollectCommand extends GitCommand { private PackConfig pconfig; + private boolean packKeptObjects; + /** * Constructor for GarbageCollectCommand. * @@ -130,6 +132,19 @@ public GarbageCollectCommand setAggressive(boolean aggressive) { return this; } + /** + * Whether to include objects in `.keep` packs when repacking. + * + * @param packKeptObjects + * whether to include objects in `.keep` files when repacking. + * @return this instance + * @since 5.13.3 + */ + public GarbageCollectCommand setPackKeptObjects(boolean packKeptObjects) { + this.packKeptObjects = packKeptObjects; + return this; + } + /** * Whether to preserve old pack files instead of deleting them. * @@ -174,6 +189,7 @@ public Properties call() throws GitAPIException { gc.setProgressMonitor(monitor); if (this.expire != null) gc.setExpire(expire); + gc.setPackKeptObjects(packKeptObjects); try { gc.gc(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java index 4db922b2c..63edfa64b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java @@ -158,6 +158,8 @@ public static void setExecutor(ExecutorService e) { private Date packExpire; + private boolean packKeptObjects; + private PackConfig pconfig; /** @@ -839,8 +841,9 @@ public Collection repack() throws IOException { List excluded = new LinkedList<>(); for (Pack p : repo.getObjectDatabase().getPacks()) { checkCancelled(); - if (p.shouldBeKept()) + if (!packKeptObjects && p.shouldBeKept()) { excluded.add(p.getIndex()); + } } // Don't exclude tags that are also branch tips @@ -1307,6 +1310,15 @@ private void checkCancelled() throws CancelledException { } } + /** + * Define whether to include objects in `.keep` files when repacking. + * + * @param packKeptObjects Whether to include objects in `.keep` files when repacking. + */ + public void setPackKeptObjects(boolean packKeptObjects) { + this.packKeptObjects = packKeptObjects; + } + /** * A class holding statistical data for a FileRepository regarding how many * objects are stored as loose or packed objects