From 68b95afc706bdac78443f1b7c17c48bf57735f2d Mon Sep 17 00:00:00 2001 From: Gal Paikin Date: Thu, 19 Nov 2020 18:05:04 +0100 Subject: [PATCH 1/3] Add seekPastPrefix method to RefCursor This method will be used by the follow-up change. This useful if we want to go over all the changes after a specific ref. For example, the new method allows us to create a follow-up that would go over all the refs until we reach a specific ref (e.g refs/changes/), and then we use seekPastPrefix(refs/changes/) to read the rest of the refs, thus basically we return all refs except a specific prefix. When seeking past a prefix, the previous condition that created the RefCursor still applies. E.g, if the cursor was created by seekRefsWithPrefix, we can skip some refs but we will not return refs that are not starting with this prefix. Signed-off-by: Gal Paikin Change-Id: I2c02e89c877fe90da8619cb8a4a9a0c865f238ef --- .../storage/reftable/MergedReftableTest.java | 112 +++++++++++++ .../storage/reftable/ReftableTest.java | 158 ++++++++++++++++++ .../storage/reftable/MergedReftable.java | 18 ++ .../internal/storage/reftable/RefCursor.java | 13 ++ .../storage/reftable/ReftableReader.java | 26 +++ 5 files changed, 327 insertions(+) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/MergedReftableTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/MergedReftableTest.java index 0a03fc352..9aea3b4b2 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/MergedReftableTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/MergedReftableTest.java @@ -137,6 +137,118 @@ public void twoTableSeek() throws IOException { } } + @Test + public void twoTableSeekPastWithRefCursor() throws IOException { + List delta1 = Arrays.asList( + ref("refs/heads/apple", 1), + ref("refs/heads/master", 2)); + List delta2 = Arrays.asList( + ref("refs/heads/banana", 3), + ref("refs/heads/zzlast", 4)); + + MergedReftable mr = merge(write(delta1), write(delta2)); + try (RefCursor rc = mr.seekRefsWithPrefix("")) { + assertTrue(rc.next()); + assertEquals("refs/heads/apple", rc.getRef().getName()); + assertEquals(id(1), rc.getRef().getObjectId()); + + rc.seekPastPrefix("refs/heads/banana/"); + + assertTrue(rc.next()); + assertEquals("refs/heads/master", rc.getRef().getName()); + assertEquals(id(2), rc.getRef().getObjectId()); + + assertTrue(rc.next()); + assertEquals("refs/heads/zzlast", rc.getRef().getName()); + assertEquals(id(4), rc.getRef().getObjectId()); + + assertEquals(1, rc.getRef().getUpdateIndex()); + } + } + + @Test + public void oneTableSeekPastWithRefCursor() throws IOException { + List delta1 = Arrays.asList( + ref("refs/heads/apple", 1), + ref("refs/heads/master", 2)); + + MergedReftable mr = merge(write(delta1)); + try (RefCursor rc = mr.seekRefsWithPrefix("")) { + rc.seekPastPrefix("refs/heads/apple"); + + assertTrue(rc.next()); + assertEquals("refs/heads/master", rc.getRef().getName()); + assertEquals(id(2), rc.getRef().getObjectId()); + + assertEquals(1, rc.getRef().getUpdateIndex()); + } + } + + @Test + public void seekPastToNonExistentPrefixToTheMiddle() throws IOException { + List delta1 = Arrays.asList( + ref("refs/heads/apple", 1), + ref("refs/heads/master", 2)); + List delta2 = Arrays.asList( + ref("refs/heads/banana", 3), + ref("refs/heads/zzlast", 4)); + + MergedReftable mr = merge(write(delta1), write(delta2)); + try (RefCursor rc = mr.seekRefsWithPrefix("")) { + rc.seekPastPrefix("refs/heads/x"); + + assertTrue(rc.next()); + assertEquals("refs/heads/zzlast", rc.getRef().getName()); + assertEquals(id(4), rc.getRef().getObjectId()); + + assertEquals(1, rc.getRef().getUpdateIndex()); + } + } + + @Test + public void seekPastToNonExistentPrefixToTheEnd() throws IOException { + List delta1 = Arrays.asList( + ref("refs/heads/apple", 1), + ref("refs/heads/master", 2)); + List delta2 = Arrays.asList( + ref("refs/heads/banana", 3), + ref("refs/heads/zzlast", 4)); + + MergedReftable mr = merge(write(delta1), write(delta2)); + try (RefCursor rc = mr.seekRefsWithPrefix("")) { + rc.seekPastPrefix("refs/heads/zzz"); + assertFalse(rc.next()); + } + } + + @Test + public void seekPastManyTimes() throws IOException { + List delta1 = Arrays.asList( + ref("refs/heads/apple", 1), + ref("refs/heads/master", 2)); + List delta2 = Arrays.asList( + ref("refs/heads/banana", 3), + ref("refs/heads/zzlast", 4)); + + MergedReftable mr = merge(write(delta1), write(delta2)); + try (RefCursor rc = mr.seekRefsWithPrefix("")) { + rc.seekPastPrefix("refs/heads/apple"); + rc.seekPastPrefix("refs/heads/banana"); + rc.seekPastPrefix("refs/heads/master"); + rc.seekPastPrefix("refs/heads/zzlast"); + assertFalse(rc.next()); + } + } + + @Test + public void seekPastOnEmptyTable() throws IOException { + MergedReftable mr = merge(write(), write()); + try (RefCursor rc = mr.seekRefsWithPrefix("")) { + rc.seekPastPrefix("refs/"); + assertFalse(rc.next()); + } + } + @Test public void twoTableById() throws IOException { List delta1 = Arrays.asList( diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java index 009914b35..56f881ec5 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/reftable/ReftableTest.java @@ -10,6 +10,7 @@ package org.eclipse.jgit.internal.storage.reftable; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.eclipse.jgit.lib.Constants.HEAD; import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; import static org.eclipse.jgit.lib.Constants.R_HEADS; @@ -49,8 +50,16 @@ import org.junit.Test; public class ReftableTest { + private static final byte[] LAST_UTF8_CHAR = new byte[] { + (byte)0x10, + (byte)0xFF, + (byte)0xFF}; + private static final String MASTER = "refs/heads/master"; private static final String NEXT = "refs/heads/next"; + private static final String AFTER_NEXT = "refs/heads/nextnext"; + private static final String LAST = "refs/heads/nextnextnext"; + private static final String NOT_REF_HEADS = "refs/zzz/zzz"; private static final String V1_0 = "refs/tags/v1.0"; private Stats stats; @@ -395,6 +404,135 @@ public void namespaceHeads() throws IOException { } } + @Test + public void seekPastRefWithRefCursor() throws IOException { + Ref exp = ref(MASTER, 1); + Ref next = ref(NEXT, 2); + Ref afterNext = ref(AFTER_NEXT, 3); + Ref afterNextNext = ref(LAST, 4); + ReftableReader t = read(write(exp, next, afterNext, afterNextNext)); + try (RefCursor rc = t.seekRefsWithPrefix("")) { + assertTrue(rc.next()); + assertEquals(MASTER, rc.getRef().getName()); + + rc.seekPastPrefix("refs/heads/next/"); + + assertTrue(rc.next()); + assertEquals(AFTER_NEXT, rc.getRef().getName()); + assertTrue(rc.next()); + assertEquals(LAST, rc.getRef().getName()); + + assertFalse(rc.next()); + } + } + + @Test + public void seekPastToNonExistentPrefixToTheMiddle() throws IOException { + Ref exp = ref(MASTER, 1); + Ref next = ref(NEXT, 2); + Ref afterNext = ref(AFTER_NEXT, 3); + Ref afterNextNext = ref(LAST, 4); + ReftableReader t = read(write(exp, next, afterNext, afterNextNext)); + try (RefCursor rc = t.seekRefsWithPrefix("")) { + rc.seekPastPrefix("refs/heads/master_non_existent"); + + assertTrue(rc.next()); + assertEquals(NEXT, rc.getRef().getName()); + + assertTrue(rc.next()); + assertEquals(AFTER_NEXT, rc.getRef().getName()); + + assertTrue(rc.next()); + assertEquals(LAST, rc.getRef().getName()); + + assertFalse(rc.next()); + } + } + + @Test + public void seekPastToNonExistentPrefixToTheEnd() throws IOException { + Ref exp = ref(MASTER, 1); + Ref next = ref(NEXT, 2); + Ref afterNext = ref(AFTER_NEXT, 3); + Ref afterNextNext = ref(LAST, 4); + ReftableReader t = read(write(exp, next, afterNext, afterNextNext)); + try (RefCursor rc = t.seekRefsWithPrefix("")) { + rc.seekPastPrefix("refs/heads/nextnon_existent_end"); + assertFalse(rc.next()); + } + } + + @Test + public void seekPastWithSeekRefsWithPrefix() throws IOException { + Ref exp = ref(MASTER, 1); + Ref next = ref(NEXT, 2); + Ref afterNext = ref(AFTER_NEXT, 3); + Ref afterNextNext = ref(LAST, 4); + Ref notRefsHeads = ref(NOT_REF_HEADS, 5); + ReftableReader t = read(write(exp, next, afterNext, afterNextNext, notRefsHeads)); + try (RefCursor rc = t.seekRefsWithPrefix("refs/heads/")) { + rc.seekPastPrefix("refs/heads/next/"); + assertTrue(rc.next()); + assertEquals(AFTER_NEXT, rc.getRef().getName()); + assertTrue(rc.next()); + assertEquals(LAST, rc.getRef().getName()); + + // NOT_REF_HEADS is next, but it's omitted because of + // seekRefsWithPrefix("refs/heads/"). + assertFalse(rc.next()); + } + } + + @Test + public void seekPastWithLotsOfRefs() throws IOException { + Ref[] refs = new Ref[500]; + for (int i = 1; i <= 500; i++) { + refs[i - 1] = ref(String.format("refs/%d", i), i); + } + ReftableReader t = read(write(refs)); + try (RefCursor rc = t.allRefs()) { + rc.seekPastPrefix("refs/3"); + assertTrue(rc.next()); + assertEquals("refs/4", rc.getRef().getName()); + assertTrue(rc.next()); + assertEquals("refs/40", rc.getRef().getName()); + + rc.seekPastPrefix("refs/8"); + assertTrue(rc.next()); + assertEquals("refs/9", rc.getRef().getName()); + assertTrue(rc.next()); + assertEquals("refs/90", rc.getRef().getName()); + assertTrue(rc.next()); + assertEquals("refs/91", rc.getRef().getName()); + } + } + + @Test + public void seekPastManyTimes() throws IOException { + Ref exp = ref(MASTER, 1); + Ref next = ref(NEXT, 2); + Ref afterNext = ref(AFTER_NEXT, 3); + Ref afterNextNext = ref(LAST, 4); + ReftableReader t = read(write(exp, next, afterNext, afterNextNext)); + + try (RefCursor rc = t.seekRefsWithPrefix("")) { + rc.seekPastPrefix("refs/heads/master"); + rc.seekPastPrefix("refs/heads/next"); + rc.seekPastPrefix("refs/heads/nextnext"); + rc.seekPastPrefix("refs/heads/nextnextnext"); + assertFalse(rc.next()); + } + } + + @Test + public void seekPastOnEmptyTable() throws IOException { + ReftableReader t = read(write()); + try (RefCursor rc = t.seekRefsWithPrefix("")) { + rc.seekPastPrefix("refs/"); + assertFalse(rc.next()); + } + } + @Test public void indexScan() throws IOException { List refs = new ArrayList<>(); @@ -873,6 +1011,14 @@ public void byObjectIdOneRefWithIndex() throws IOException { } } + @Test + public void byObjectIdSkipPastPrefix() throws IOException { + ReftableReader t = read(write()); + try (RefCursor rc = t.byObjectId(id(2))) { + assertThrows(UnsupportedOperationException.class, () -> rc.seekPastPrefix("refs/heads/")); + } + } + @Test public void unpeeledDoesNotWrite() { try { @@ -883,6 +1029,18 @@ public void unpeeledDoesNotWrite() { } } + @Test + public void skipPastRefWithLastUTF8() throws IOException { + ReftableReader t = read(write(ref(String.format("refs/heads/%sbla", new String(LAST_UTF8_CHAR + , UTF_8)), 1))); + + try (RefCursor rc = t.allRefs()) { + rc.seekPastPrefix("refs/heads/"); + assertFalse(rc.next()); + } + } + + @Test public void nameTooLongDoesNotWrite() throws IOException { try { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/MergedReftable.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/MergedReftable.java index a78f4d24d..e210acf05 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/MergedReftable.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/MergedReftable.java @@ -11,6 +11,7 @@ package org.eclipse.jgit.internal.storage.reftable; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.PriorityQueue; @@ -215,6 +216,23 @@ public boolean next() throws IOException { } } + @Override + public void seekPastPrefix(String prefixName) throws IOException { + List entriesToAdd = new ArrayList<>(); + entriesToAdd.addAll(queue); + if (head != null) { + entriesToAdd.add(head); + } + + head = null; + queue.clear(); + + for(RefQueueEntry entry : entriesToAdd){ + entry.rc.seekPastPrefix(prefixName); + add(entry); + } + } + private RefQueueEntry poll() { RefQueueEntry e = head; if (e != null) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/RefCursor.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/RefCursor.java index d96648eb5..5e2c35088 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/RefCursor.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/RefCursor.java @@ -28,6 +28,19 @@ public abstract class RefCursor implements AutoCloseable { */ public abstract boolean next() throws IOException; + /** + * Seeks forward to the first ref record lexicographically beyond + * {@code prefixName} that doesn't start with {@code prefixName}. If there are + * no more results, skipping some refs won't add new results. E.g if we create a + * RefCursor that returns only results with a specific prefix, skipping that + * prefix won't give results that are not part of the original prefix. + * + * @param prefixName prefix that should be skipped. All previous refs before it + * will be skipped. + * @throws java.io.IOException references cannot be read. + */ + public abstract void seekPastPrefix(String prefixName) throws IOException; + /** * Get reference at the current position. * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java index 095276f57..9e2ae9160 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableReader.java @@ -508,6 +508,21 @@ public boolean next() throws IOException { } } + @Override + public void seekPastPrefix(String prefixName) throws IOException { + initRefIndex(); + byte[] key = prefixName.getBytes(UTF_8); + ByteBuffer byteBuffer = ByteBuffer.allocate(key.length + 1); + byteBuffer.put(key); + // Add the representation of the last byte lexicographically. Based on how UTF_8 + // representation works, this byte will be bigger lexicographically than any + // UTF_8 character when translated into bytes, since 0xFF can never be a part of + // a UTF_8 string. + byteBuffer.put((byte) 0xFF); + + block = seek(REF_BLOCK_TYPE, byteBuffer.array(), refIndex, 0, refEnd); + } + @Override public Ref getRef() { return ref; @@ -681,6 +696,17 @@ public boolean next() throws IOException { } } + @Override + /** + * The implementation here would not be efficient complexity-wise since it + * expected that there are a small number of refs that match the same object id. + * In such case it's better to not even use this method (as the caller might + * expect it to be efficient). + */ + public void seekPastPrefix(String prefixName) throws IOException { + throw new UnsupportedOperationException(); + } + @Override public Ref getRef() { return ref; From a6b90b7ec5c238692dc323e25ef927e4433edb1d Mon Sep 17 00:00:00 2001 From: Gal Paikin Date: Mon, 30 Nov 2020 15:57:06 +0100 Subject: [PATCH 2/3] Add getsRefsByPrefixWithSkips (excluding prefixes) to ReftableDatabase We sometimes want to get all the refs except specific prefixes, similarly to getRefsByPrefix that gets all the refs of a specific prefix. We now create a new method that gets all refs matching a prefix except a set of specific prefixes. One use-case is for Gerrit to be able to get all the refs except refs/changes; in Gerrit we often have lots of refs/changes, but very little other refs. Currently, to get all the refs except refs/changes we need to get all the refs and then filter the refs/changes, which is very inefficient. With this method, we can simply skip the unneeded prefix so that we don't have to go over all the elements. RefDirectory still uses the inefficient implementation, since there isn't a simple way to use Refcursor to achieve the efficient implementation (as done in ReftableDatabase). Signed-off-by: Gal Paikin Change-Id: I8c5db581acdeb6698e3d3a2abde8da32f70c854c --- .../RefsUnreadableInMemoryRepository.java | 11 +++ .../storage/file/FileReftableTest.java | 72 +++++++++++++++++++ .../storage/file/RefDirectoryTest.java | 20 ++++++ .../tst/org/eclipse/jgit/lib/RefTest.java | 59 +++++++++++++++ .../storage/dfs/DfsReftableDatabase.java | 7 ++ .../storage/file/FileReftableDatabase.java | 8 +++ .../storage/reftable/ReftableDatabase.java | 50 +++++++++++++ .../src/org/eclipse/jgit/lib/RefDatabase.java | 28 ++++++++ 8 files changed, 255 insertions(+) diff --git a/org.eclipse.jgit.http.test/src/org/eclipse/jgit/http/test/RefsUnreadableInMemoryRepository.java b/org.eclipse.jgit.http.test/src/org/eclipse/jgit/http/test/RefsUnreadableInMemoryRepository.java index 80cbe8738..4167b038e 100644 --- a/org.eclipse.jgit.http.test/src/org/eclipse/jgit/http/test/RefsUnreadableInMemoryRepository.java +++ b/org.eclipse.jgit.http.test/src/org/eclipse/jgit/http/test/RefsUnreadableInMemoryRepository.java @@ -83,6 +83,17 @@ public List getRefsByPrefix(String prefix) throws IOException { return super.getRefsByPrefix(prefix); } + /** {@inheritDoc} */ + @Override + public List getRefsByPrefixWithExclusions(String include, Set excludes) + throws IOException { + if (failing) { + throw new IOException("disk failed, no refs found"); + } + + return super.getRefsByPrefixWithExclusions(include, excludes); + } + /** {@inheritDoc} */ @Override public Set getTipsWithSha1(ObjectId id) throws IOException { diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileReftableTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileReftableTest.java index 33bacbe3e..15c9109ca 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileReftableTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileReftableTest.java @@ -28,14 +28,18 @@ import java.io.IOException; import java.security.SecureRandom; import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; import java.util.List; +import java.util.Set; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.PersonIdent; import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.lib.RefDatabase; import org.eclipse.jgit.lib.RefRename; import org.eclipse.jgit.lib.RefUpdate; import org.eclipse.jgit.lib.RefUpdate.Result; @@ -579,6 +583,64 @@ public void reftableRefsStorageClass() throws IOException { assertEquals(Ref.Storage.PACKED, b.getStorage()); } + @Test + public void testGetRefsExcludingPrefix() throws IOException { + Set prefixes = new HashSet<>(); + prefixes.add("refs/tags"); + // HEAD + 12 refs/heads are present here. + List refs = + db.getRefDatabase().getRefsByPrefixWithExclusions(RefDatabase.ALL, prefixes); + assertEquals(13, refs.size()); + checkContainsRef(refs, db.exactRef("HEAD")); + checkContainsRef(refs, db.exactRef("refs/heads/a")); + for (Ref notInResult : db.getRefDatabase().getRefsByPrefix("refs/tags")) { + assertFalse(refs.contains(notInResult)); + } + } + + @Test + public void testGetRefsExcludingPrefixes() throws IOException { + Set exclude = new HashSet<>(); + exclude.add("refs/tags/"); + exclude.add("refs/heads/"); + List refs = db.getRefDatabase().getRefsByPrefixWithExclusions(RefDatabase.ALL, exclude); + assertEquals(1, refs.size()); + checkContainsRef(refs, db.exactRef("HEAD")); + } + + @Test + public void testGetRefsExcludingNonExistingPrefixes() throws IOException { + Set exclude = new HashSet<>(); + exclude.add("refs/tags/"); + exclude.add("refs/heads/"); + exclude.add("refs/nonexistent/"); + List refs = db.getRefDatabase().getRefsByPrefixWithExclusions(RefDatabase.ALL, exclude); + assertEquals(1, refs.size()); + checkContainsRef(refs, db.exactRef("HEAD")); + } + + @Test + public void testGetRefsWithPrefixExcludingPrefixes() throws IOException { + Set exclude = new HashSet<>(); + exclude.add("refs/heads/pa"); + String include = "refs/heads/p"; + List refs = db.getRefDatabase().getRefsByPrefixWithExclusions(include, exclude); + assertEquals(1, refs.size()); + checkContainsRef(refs, db.exactRef("refs/heads/prefix/a")); + } + + @Test + public void testGetRefsWithPrefixExcludingOverlappingPrefixes() throws IOException { + Set exclude = new HashSet<>(); + exclude.add("refs/heads/pa"); + exclude.add("refs/heads/"); + exclude.add("refs/heads/p"); + exclude.add("refs/tags/"); + List refs = db.getRefDatabase().getRefsByPrefixWithExclusions(RefDatabase.ALL, exclude); + assertEquals(1, refs.size()); + checkContainsRef(refs, db.exactRef("HEAD")); + } + private RefUpdate updateRef(String name) throws IOException { final RefUpdate ref = db.updateRef(name); ref.setNewObjectId(db.resolve(Constants.HEAD)); @@ -596,4 +658,14 @@ private void writeSymref(String src, String dst) throws IOException { fail("link " + src + " to " + dst); } } + + private static void checkContainsRef(Collection haystack, Ref needle) { + for (Ref ref : haystack) { + if (ref.getName().equals(needle.getName()) && + ref.getObjectId().equals(needle.getObjectId())) { + return; + } + } + fail("list " + haystack + " does not contain ref " + needle); + } } diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/RefDirectoryTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/RefDirectoryTest.java index 97ef5993b..38c545ef5 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/RefDirectoryTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/RefDirectoryTest.java @@ -30,8 +30,10 @@ import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -352,6 +354,24 @@ public void testGetRefs_IgnoresGarbageRef4() throws IOException { assertEquals(A, c.getObjectId()); } + @Test + public void testGetRefs_ExcludingPrefixes() throws IOException { + writeLooseRef("refs/heads/A", A); + writeLooseRef("refs/heads/B", B); + writeLooseRef("refs/tags/tag", A); + writeLooseRef("refs/something/something", B); + writeLooseRef("refs/aaa/aaa", A); + + Set toExclude = new HashSet<>(); + toExclude.add("refs/aaa/"); + toExclude.add("refs/heads/"); + List refs = refdir.getRefsByPrefixWithExclusions(RefDatabase.ALL, toExclude); + + assertEquals(2, refs.size()); + assertTrue(refs.contains(refdir.exactRef("refs/tags/tag"))); + assertTrue(refs.contains(refdir.exactRef("refs/something/something"))); + } + @Test public void testFirstExactRef_IgnoresGarbageRef() throws IOException { writeLooseRef("refs/heads/A", A); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/RefTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/RefTest.java index 88d17ec15..7590048a7 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/RefTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/RefTest.java @@ -26,6 +26,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.util.Collection; +import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; @@ -317,6 +318,64 @@ public void testGetRefsByPrefixes() throws IOException { checkContainsRef(refs, db.exactRef("refs/tags/A")); } + @Test + public void testGetRefsExcludingPrefix() throws IOException { + Set exclude = new HashSet<>(); + exclude.add("refs/tags"); + // HEAD + 12 refs/heads are present here. + List refs = + db.getRefDatabase().getRefsByPrefixWithExclusions(RefDatabase.ALL, exclude); + assertEquals(13, refs.size()); + checkContainsRef(refs, db.exactRef("HEAD")); + checkContainsRef(refs, db.exactRef("refs/heads/a")); + for (Ref notInResult : db.getRefDatabase().getRefsByPrefix("refs/tags")) { + assertFalse(refs.contains(notInResult)); + } + } + + @Test + public void testGetRefsExcludingPrefixes() throws IOException { + Set exclude = new HashSet<>(); + exclude.add("refs/tags/"); + exclude.add("refs/heads/"); + List refs = db.getRefDatabase().getRefsByPrefixWithExclusions(RefDatabase.ALL, exclude); + assertEquals(1, refs.size()); + checkContainsRef(refs, db.exactRef("HEAD")); + } + + @Test + public void testGetRefsExcludingNonExistingPrefixes() throws IOException { + Set prefixes = new HashSet<>(); + prefixes.add("refs/tags/"); + prefixes.add("refs/heads/"); + prefixes.add("refs/nonexistent/"); + List refs = db.getRefDatabase().getRefsByPrefixWithExclusions(RefDatabase.ALL, prefixes); + assertEquals(1, refs.size()); + checkContainsRef(refs, db.exactRef("HEAD")); + } + + @Test + public void testGetRefsWithPrefixExcludingPrefixes() throws IOException { + Set exclude = new HashSet<>(); + exclude.add("refs/heads/pa"); + String include = "refs/heads/p"; + List refs = db.getRefDatabase().getRefsByPrefixWithExclusions(include, exclude); + assertEquals(1, refs.size()); + checkContainsRef(refs, db.exactRef("refs/heads/prefix/a")); + } + + @Test + public void testGetRefsWithPrefixExcludingOverlappingPrefixes() throws IOException { + Set exclude = new HashSet<>(); + exclude.add("refs/heads/pa"); + exclude.add("refs/heads/"); + exclude.add("refs/heads/p"); + exclude.add("refs/tags/"); + List refs = db.getRefDatabase().getRefsByPrefixWithExclusions(RefDatabase.ALL, exclude); + assertEquals(1, refs.size()); + checkContainsRef(refs, db.exactRef("HEAD")); + } + @Test public void testResolveTipSha1() throws IOException { ObjectId masterId = db.resolve("refs/heads/master"); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReftableDatabase.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReftableDatabase.java index 5561dc6a2..6c3b056ef 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReftableDatabase.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReftableDatabase.java @@ -176,6 +176,13 @@ public List getRefsByPrefix(String prefix) throws IOException { return reftableDatabase.getRefsByPrefix(prefix); } + /** {@inheritDoc} */ + @Override + public List getRefsByPrefixWithExclusions(String include, Set excludes) + throws IOException { + return reftableDatabase.getRefsByPrefixWithExclusions(include, excludes); + } + /** {@inheritDoc} */ @Override public Set getTipsWithSha1(ObjectId id) throws IOException { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileReftableDatabase.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileReftableDatabase.java index ad1e75312..a80fa837b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileReftableDatabase.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileReftableDatabase.java @@ -21,6 +21,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.TreeSet; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -179,6 +180,13 @@ public Map getRefs(String prefix) throws IOException { RefList.emptyList()); } + /** {@inheritDoc} */ + @Override + public List getRefsByPrefixWithExclusions(String include, Set excludes) + throws IOException { + return reftableDatabase.getRefsByPrefixWithExclusions(include, excludes); + } + /** {@inheritDoc} */ @Override public List getAdditionalRefs() throws IOException { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableDatabase.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableDatabase.java index 4747be354..0c1682861 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableDatabase.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableDatabase.java @@ -14,10 +14,12 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Collectors; import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.lib.ObjectId; @@ -265,6 +267,54 @@ public List getRefsByPrefix(String prefix) throws IOException { return Collections.unmodifiableList(all); } + /** + * Returns refs whose names start with a given prefix excluding all refs that + * start with one of the given prefixes. + * + * @param include string that names of refs should start with; may be empty. + * @param excludes strings that names of refs can't start with; may be empty. + * @return immutable list of refs whose names start with {@code include} and + * none of the strings in {@code exclude}. + * @throws java.io.IOException the reference space cannot be accessed. + */ + public List getRefsByPrefixWithExclusions(String include, Set excludes) throws IOException { + if (excludes.isEmpty()) { + return getRefsByPrefix(include); + } + List results = new ArrayList<>(); + lock.lock(); + try { + Reftable table = reader(); + Iterator excludeIterator = + excludes.stream().sorted().collect(Collectors.toList()).iterator(); + String currentExclusion = excludeIterator.hasNext() ? excludeIterator.next() : null; + try (RefCursor rc = RefDatabase.ALL.equals(include) ? table.allRefs() : table.seekRefsWithPrefix(include)) { + while (rc.next()) { + Ref ref = table.resolve(rc.getRef()); + if (ref == null || ref.getObjectId() == null) { + continue; + } + // Skip prefixes that will never see since we are already further than those + // prefixes lexicographically. + while (excludeIterator.hasNext() && !ref.getName().startsWith(currentExclusion) + && ref.getName().compareTo(currentExclusion) > 0) { + currentExclusion = excludeIterator.next(); + } + + if (currentExclusion != null && ref.getName().startsWith(currentExclusion)) { + rc.seekPastPrefix(currentExclusion); + continue; + } + results.add(ref); + } + } + } finally { + lock.unlock(); + } + + return Collections.unmodifiableList(results); + } + /** * @return whether there is a fast SHA1 to ref map. * @throws IOException in case of I/O problems. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/RefDatabase.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/RefDatabase.java index 6832c9cd8..7b7bdebac 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/RefDatabase.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/RefDatabase.java @@ -21,6 +21,9 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + import org.eclipse.jgit.annotations.NonNull; import org.eclipse.jgit.annotations.Nullable; @@ -413,6 +416,31 @@ public List getRefsByPrefix(String prefix) throws IOException { return Collections.unmodifiableList(result); } + /** + * Returns refs whose names start with a given prefix excluding all refs that + * start with one of the given prefixes. + * + *

+ * The default implementation is not efficient. Implementors of {@link RefDatabase} + * should override this method directly if a better implementation is possible. + * + * @param include string that names of refs should start with; may be empty. + * @param excludes strings that names of refs can't start with; may be empty. + * @return immutable list of refs whose names start with {@code prefix} and none + * of the strings in {@code exclude}. + * @throws java.io.IOException the reference space cannot be accessed. + * @since 5.11 + */ + @NonNull + public List getRefsByPrefixWithExclusions(String include, Set excludes) + throws IOException { + Stream refs = getRefs(include).values().stream(); + for(String exclude: excludes) { + refs = refs.filter(r -> !r.getName().startsWith(exclude)); + } + return Collections.unmodifiableList(refs.collect(Collectors.toList())); + } + /** * Returns refs whose names start with one of the given prefixes. *

From 31e3cb4375f92e56f27b83c4583523c14a712b2d Mon Sep 17 00:00:00 2001 From: Gal Paikin Date: Mon, 7 Dec 2020 15:18:34 +0100 Subject: [PATCH 3/3] Compare getting all refs except specific refs with seek and with filter There are currently two ways to get all refs except a specific ref, we add two methods that perform both and compare the two different approaches. This change adds two methods that compares the two different approaches of such query: 1. Get all the refs, and then filter by refs that don't start with the prefix (current approach). 2. Get all refs until encountering a ref that is part of the prefix we should exclude, skip using seekPastPrefix, and continue (new approach). This works since the refs are sorted. Specifically in Gerrit, we often have thousands of refs that are not refs/changes, and millions of refs/changes, hence the second approach should be much faster. In Jgit in general it's still expected to provide a better result even if we're skipping a smaller chunk of the refs since the complexity here is O(logn) with a binary search, rather than O(number of skipped refs). We ran this benchmark on a big chunk of chromium/src's reftable. To run it, we first create the reftable: git ls-remote https://chromium.googlesource.com/chromium/src > lsr bazel build org.eclipse.jgit.pgm:jgit && rm -rf /tmp/reftable* && \ ./bazel-bin/org.eclipse.jgit.pgm/jgit debug-benchmark-reftable \ --test write_stack lsr /tmp/reftable Then, we actually test the created reftable. Note that we can't test all of them at once since there are multiple ones, but below is a good example. bazel build org.eclipse.jgit.pgm:jgit && \ ./bazel-bin/org.eclipse.jgit.pgm/jgit debug-benchmark-reftable \ --test get_refs_excluding_ref --ref refs/changes \ lsr /tmp/reftable/000000000001-0000001e0371.ref Result: total time the action took using seek: 36925 usec total time the action took using filter: 874382 usec number of refs that start with prefix: 4266. number of refs that don't start with prefix: 1962695. Similarly for Android's biggest repository, platform/frameworks/base (still only partial result): total time the action took using seek: 9020 usec total time the action took using filter: 143166 usec number of refs that start with prefix: 296. number of refs that don't start with prefix: 60400. In conclusion, it's easy to see an improvement of a factor of 15-20x for large Gerrit repositories! Signed-off-by: Gal Paikin Change-Id: I36d9b63eb259804c774864429cf2c761cd099cc3 --- .../jgit/pgm/debug/BenchmarkReftable.java | 54 ++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java index 630fac549..f23f4cf0e 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java @@ -23,7 +23,9 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; +import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; import org.eclipse.jgit.internal.storage.file.FileReftableStack; import org.eclipse.jgit.internal.storage.io.BlockSource; @@ -47,6 +49,7 @@ enum Test { SEEK_COLD, SEEK_HOT, BY_ID_COLD, BY_ID_HOT, WRITE_STACK, + GET_REFS_EXCLUDING_REF } @Option(name = "--tries") @@ -91,7 +94,11 @@ protected void run() throws Exception { case WRITE_STACK: writeStack(); break; - } + case GET_REFS_EXCLUDING_REF : + getRefsExcludingWithSeekPast(ref); + getRefsExcludingWithFilter(ref); + break; + } } private void printf(String fmt, Object... args) throws IOException { @@ -315,4 +322,49 @@ private void byIdHot(ObjectId id) throws Exception { printf("%12s %10d usec %9.1f usec/run %5d runs", "reftable", tot / 1000, (((double) tot) / tries) / 1000, tries); } + + @SuppressWarnings({"nls", "boxing"}) + private void getRefsExcludingWithFilter(String prefix) throws Exception { + long startTime = System.nanoTime(); + List allRefs = new ArrayList<>(); + try (FileInputStream in = new FileInputStream(reftablePath); + BlockSource src = BlockSource.from(in); + ReftableReader reader = new ReftableReader(src)) { + try (RefCursor rc = reader.allRefs()) { + while (rc.next()) { + allRefs.add(rc.getRef()); + } + } + } + int total = allRefs.size(); + allRefs = allRefs.stream().filter(r -> r.getName().startsWith(prefix)).collect(Collectors.toList()); + int notStartWithPrefix = allRefs.size(); + int startWithPrefix = total - notStartWithPrefix; + long totalTime = System.nanoTime() - startTime; + printf("total time the action took using filter: %10d usec", totalTime / 1000); + printf("number of refs that start with prefix: %d", startWithPrefix); + printf("number of refs that don't start with prefix: %d", notStartWithPrefix); + } + + @SuppressWarnings({"nls", "boxing"}) + private void getRefsExcludingWithSeekPast(String prefix) throws Exception { + long start = System.nanoTime(); + try (FileInputStream in = new FileInputStream(reftablePath); + BlockSource src = BlockSource.from(in); + ReftableReader reader = new ReftableReader(src)) { + try (RefCursor rc = reader.allRefs()) { + while (rc.next()) { + if (rc.getRef().getName().startsWith(prefix)) { + break; + } + } + rc.seekPastPrefix(prefix); + while (rc.next()) { + rc.getRef(); + } + } + } + long tot = System.nanoTime() - start; + printf("total time the action took using seek: %10d usec", tot / 1000); + } }