CommitGraphWriter: write changed-path filters
Add support for writing the BIDX and BDAT chunks of the commit graph file, as described in man gitformat-commit-graph(5). The ability to read such chunks will be added in a subsequent commit. This work is based on earlier work by Kyle Zhao (Ib863782af209f26381e3ca0a2c119b99e84b679c). Change-Id: Ic18e6f0eeec7da1e1ff31751aabda5e6952dbe6e Signed-off-by: kylezhao <kylezhao@tencent.com> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>motiejus-detectSymlinkSupport
parent
5dc63514d0
commit
49beb5ae51
|
@ -21,6 +21,10 @@ java_library(
|
|||
|
||||
java_library(
|
||||
name = "commons-codec",
|
||||
visibility = [
|
||||
"//org.eclipse.jgit:__pkg__",
|
||||
"//org.eclipse.jgit.test:__pkg__",
|
||||
],
|
||||
exports = ["@commons-codec//jar"],
|
||||
)
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ org.eclipse.jgit.pgm.debug.BenchmarkReftable
|
|||
org.eclipse.jgit.pgm.debug.DiffAlgorithms
|
||||
org.eclipse.jgit.pgm.debug.LfsStore
|
||||
org.eclipse.jgit.pgm.debug.MakeCacheTree
|
||||
org.eclipse.jgit.pgm.debug.ReadChangedPathFilter
|
||||
org.eclipse.jgit.pgm.debug.ReadDirCache
|
||||
org.eclipse.jgit.pgm.debug.ReadReftable
|
||||
org.eclipse.jgit.pgm.debug.RebuildCommitGraph
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright (C) 2023, Google LLC
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Eclipse Distribution License v. 1.0 which is available at
|
||||
* https://www.eclipse.org/org/documents/edl-v10.php.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
package org.eclipse.jgit.pgm.debug;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.eclipse.jgit.pgm.Command;
|
||||
import org.eclipse.jgit.pgm.TextBuiltin;
|
||||
import org.eclipse.jgit.util.NB;
|
||||
import org.kohsuke.args4j.Argument;
|
||||
|
||||
/**
|
||||
* Prints the contents of the BDAT chunk from commit-graph file.
|
||||
* <p>
|
||||
* This is a debugging tool for changed path filter development.
|
||||
*/
|
||||
@Command
|
||||
class ReadChangedPathFilter extends TextBuiltin {
|
||||
|
||||
static final int CHUNK_ID_OID_FANOUT = 0x4f494446; /* "OIDF" */
|
||||
|
||||
static final int CHUNK_ID_BLOOM_FILTER_INDEX = 0x42494458; /* "BIDX" */
|
||||
|
||||
static final int CHUNK_ID_BLOOM_FILTER_DATA = 0x42444154; /* "BDAT" */
|
||||
|
||||
@Argument(index = 0)
|
||||
private String input;
|
||||
|
||||
static HashSet<String> changedPathStrings(byte[] data) {
|
||||
int oidf_offset = -1;
|
||||
int bidx_offset = -1;
|
||||
int bdat_offset = -1;
|
||||
for (int i = 8; i < data.length - 4; i += 12) {
|
||||
switch (NB.decodeInt32(data, i)) {
|
||||
case CHUNK_ID_OID_FANOUT:
|
||||
oidf_offset = (int) NB.decodeInt64(data, i + 4);
|
||||
break;
|
||||
case CHUNK_ID_BLOOM_FILTER_INDEX:
|
||||
bidx_offset = (int) NB.decodeInt64(data, i + 4);
|
||||
break;
|
||||
case CHUNK_ID_BLOOM_FILTER_DATA:
|
||||
bdat_offset = (int) NB.decodeInt64(data, i + 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
bdat_offset += 12; // skip version, hash count, bits per entry
|
||||
int commit_count = NB.decodeInt32(data, oidf_offset + 255 * 4);
|
||||
int[] changed_path_length_cumuls = new int[commit_count];
|
||||
for (int i = 0; i < commit_count; i++) {
|
||||
changed_path_length_cumuls[i] = NB.decodeInt32(data,
|
||||
bidx_offset + i * 4);
|
||||
}
|
||||
HashSet<String> changed_paths = new HashSet<>();
|
||||
for (int i = 0; i < commit_count; i++) {
|
||||
int prior_cumul = i == 0 ? 0 : changed_path_length_cumuls[i - 1];
|
||||
String changed_path = "";
|
||||
for (int j = prior_cumul; j < changed_path_length_cumuls[i]; j++) {
|
||||
changed_path += data[bdat_offset + j] + ",";
|
||||
}
|
||||
changed_paths.add(changed_path);
|
||||
}
|
||||
return changed_paths;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
protected void run() throws Exception {
|
||||
try (FileInputStream in = new FileInputStream(input)
|
||||
) {
|
||||
byte[] data = in.readAllBytes();
|
||||
outw.println(changedPathStrings(data).toString());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -77,6 +77,7 @@ java_library(
|
|||
resources = DATA,
|
||||
deps = [
|
||||
"//lib:assertj-core",
|
||||
"//lib:commons-codec",
|
||||
"//lib:junit",
|
||||
"//lib:mockito",
|
||||
"//lib:slf4j-simple",
|
||||
|
|
|
@ -10,19 +10,24 @@
|
|||
|
||||
package org.eclipse.jgit.internal.storage.commitgraph;
|
||||
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.containsInAnyOrder;
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.eclipse.jgit.dircache.DirCacheEntry;
|
||||
import org.eclipse.jgit.internal.storage.file.FileRepository;
|
||||
import org.eclipse.jgit.junit.RepositoryTestCase;
|
||||
import org.eclipse.jgit.junit.TestRepository;
|
||||
import org.eclipse.jgit.lib.NullProgressMonitor;
|
||||
import org.eclipse.jgit.lib.ObjectId;
|
||||
import org.eclipse.jgit.revwalk.RevBlob;
|
||||
import org.eclipse.jgit.revwalk.RevCommit;
|
||||
import org.eclipse.jgit.revwalk.RevWalk;
|
||||
import org.eclipse.jgit.util.NB;
|
||||
|
@ -76,11 +81,20 @@ public class CommitGraphWriterTest extends RepositoryTestCase {
|
|||
assertTrue(data.length > 0);
|
||||
byte[] headers = new byte[8];
|
||||
System.arraycopy(data, 0, headers, 0, 8);
|
||||
assertArrayEquals(new byte[] {'C', 'G', 'P', 'H', 1, 1, 4, 0}, headers);
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT, NB.decodeInt32(data, 8));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP, NB.decodeInt32(data, 20));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA, NB.decodeInt32(data, 32));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST, NB.decodeInt32(data, 44));
|
||||
assertArrayEquals(new byte[] { 'C', 'G', 'P', 'H', 1, 1, 6, 0 },
|
||||
headers);
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT,
|
||||
NB.decodeInt32(data, 8));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP,
|
||||
NB.decodeInt32(data, 20));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA,
|
||||
NB.decodeInt32(data, 32));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST,
|
||||
NB.decodeInt32(data, 44));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX,
|
||||
NB.decodeInt32(data, 56));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA,
|
||||
NB.decodeInt32(data, 68));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -101,13 +115,208 @@ public class CommitGraphWriterTest extends RepositoryTestCase {
|
|||
assertTrue(data.length > 0);
|
||||
byte[] headers = new byte[8];
|
||||
System.arraycopy(data, 0, headers, 0, 8);
|
||||
assertArrayEquals(new byte[] {'C', 'G', 'P', 'H', 1, 1, 3, 0}, headers);
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT, NB.decodeInt32(data, 8));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP, NB.decodeInt32(data, 20));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA, NB.decodeInt32(data, 32));
|
||||
assertArrayEquals(new byte[] { 'C', 'G', 'P', 'H', 1, 1, 5, 0 },
|
||||
headers);
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT,
|
||||
NB.decodeInt32(data, 8));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP,
|
||||
NB.decodeInt32(data, 20));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA,
|
||||
NB.decodeInt32(data, 32));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX,
|
||||
NB.decodeInt32(data, 44));
|
||||
assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA,
|
||||
NB.decodeInt32(data, 56));
|
||||
}
|
||||
|
||||
static HashSet<String> changedPathStrings(byte[] data) {
|
||||
int oidf_offset = -1;
|
||||
int bidx_offset = -1;
|
||||
int bdat_offset = -1;
|
||||
for (int i = 8; i < data.length - 4; i += 12) {
|
||||
switch (NB.decodeInt32(data, i)) {
|
||||
case CommitGraphConstants.CHUNK_ID_OID_FANOUT:
|
||||
oidf_offset = (int) NB.decodeInt64(data, i + 4);
|
||||
break;
|
||||
case CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX:
|
||||
bidx_offset = (int) NB.decodeInt64(data, i + 4);
|
||||
break;
|
||||
case CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA:
|
||||
bdat_offset = (int) NB.decodeInt64(data, i + 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
assertTrue(oidf_offset > 0);
|
||||
assertTrue(bidx_offset > 0);
|
||||
assertTrue(bdat_offset > 0);
|
||||
bdat_offset += 12; // skip version, hash count, bits per entry
|
||||
int commit_count = NB.decodeInt32(data, oidf_offset + 255 * 4);
|
||||
int[] changed_path_length_cumuls = new int[commit_count];
|
||||
for (int i = 0; i < commit_count; i++) {
|
||||
changed_path_length_cumuls[i] = NB.decodeInt32(data,
|
||||
bidx_offset + i * 4);
|
||||
}
|
||||
HashSet<String> changed_paths = new HashSet<>();
|
||||
for (int i = 0; i < commit_count; i++) {
|
||||
int prior_cumul = i == 0 ? 0 : changed_path_length_cumuls[i - 1];
|
||||
String changed_path = "";
|
||||
for (int j = prior_cumul; j < changed_path_length_cumuls[i]; j++) {
|
||||
changed_path += data[bdat_offset + j] + ",";
|
||||
}
|
||||
changed_paths.add(changed_path);
|
||||
}
|
||||
return changed_paths;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expected value generated using the following:
|
||||
*
|
||||
* <pre>
|
||||
* # apply into git-repo: https://lore.kernel.org/git/cover.1684790529.git.jonathantanmy@google.com/
|
||||
* (cd git-repo; make)
|
||||
* git-repo/bin-wrappers/git init tested
|
||||
* (cd tested; touch foo.txt; mkdir -p onedir/twodir; touch onedir/twodir/bar.txt)
|
||||
* git-repo/bin-wrappers/git -C tested add foo.txt onedir
|
||||
* git-repo/bin-wrappers/git -C tested commit -m first_commit
|
||||
* (cd tested; mv foo.txt foo-new.txt; mv onedir/twodir/bar.txt onedir/twodir/bar-new.txt)
|
||||
* git-repo/bin-wrappers/git -C tested add foo-new.txt onedir
|
||||
* git-repo/bin-wrappers/git -C tested commit -a -m second_commit
|
||||
* git-repo/bin-wrappers/git -C tested maintenance run
|
||||
* git-repo/bin-wrappers/git -C tested commit-graph write --changed-paths
|
||||
* (cd tested; $JGIT debug-read-changed-path-filter .git/objects/info/commit-graph)
|
||||
* </pre>
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testChangedPathFilterRootAndNested() throws Exception {
|
||||
RevBlob emptyBlob = tr.blob(new byte[] {});
|
||||
RevCommit root = tr.commit(tr.tree(tr.file("foo.txt", emptyBlob),
|
||||
tr.file("onedir/twodir/bar.txt", emptyBlob)));
|
||||
RevCommit tip = tr.commit(tr.tree(tr.file("foo-new.txt", emptyBlob),
|
||||
tr.file("onedir/twodir/bar-new.txt", emptyBlob)), root);
|
||||
|
||||
Set<ObjectId> wants = Collections.singleton(tip);
|
||||
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
|
||||
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
|
||||
writer = new CommitGraphWriter(graphCommits);
|
||||
writer.write(m, os);
|
||||
|
||||
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
|
||||
assertThat(changedPaths, containsInAnyOrder(
|
||||
"109,-33,2,60,20,79,-11,116,",
|
||||
"119,69,63,-8,0,"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Expected value generated using the following:
|
||||
*
|
||||
* <pre>
|
||||
* git -C git-repo checkout todo get version number when it is merged
|
||||
* (cd git-repo; make)
|
||||
* git-repo/bin-wrappers/git init tested
|
||||
* (cd tested; mkdir -p onedir/twodir; touch onedir/twodir/a.txt; touch onedir/twodir/b.txt)
|
||||
* git-repo/bin-wrappers/git -C tested add onedir
|
||||
* git-repo/bin-wrappers/git -C tested commit -m first_commit
|
||||
* (cd tested; mv onedir/twodir/a.txt onedir/twodir/c.txt; mv onedir/twodir/b.txt onedir/twodir/d.txt)
|
||||
* git-repo/bin-wrappers/git -C tested add onedir
|
||||
* git-repo/bin-wrappers/git -C tested commit -a -m second_commit
|
||||
* git-repo/bin-wrappers/git -C tested maintenance run
|
||||
* git-repo/bin-wrappers/git -C tested commit-graph write --changed-paths
|
||||
* (cd tested; $JGIT debug-read-changed-path-filter .git/objects/info/commit-graph)
|
||||
* </pre>
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testChangedPathFilterOverlappingNested() throws Exception {
|
||||
RevBlob emptyBlob = tr.blob(new byte[] {});
|
||||
RevCommit root = tr
|
||||
.commit(tr.tree(tr.file("onedir/twodir/a.txt", emptyBlob),
|
||||
tr.file("onedir/twodir/b.txt", emptyBlob)));
|
||||
RevCommit tip = tr
|
||||
.commit(tr.tree(tr.file("onedir/twodir/c.txt", emptyBlob),
|
||||
tr.file("onedir/twodir/d.txt", emptyBlob)), root);
|
||||
|
||||
Set<ObjectId> wants = Collections.singleton(tip);
|
||||
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
|
||||
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
|
||||
writer = new CommitGraphWriter(graphCommits);
|
||||
writer.write(m, os);
|
||||
|
||||
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
|
||||
assertThat(changedPaths, containsInAnyOrder("61,30,23,-24,1,",
|
||||
"-58,-51,-46,60,29,-121,113,90,"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Expected value generated using the following:
|
||||
*
|
||||
* <pre>
|
||||
* git -C git-repo checkout todo get version number when it is merged
|
||||
* (cd git-repo; make)
|
||||
* git-repo/bin-wrappers/git init tested
|
||||
* (cd tested; touch 你好)
|
||||
* git-repo/bin-wrappers/git -C tested add 你好
|
||||
* git-repo/bin-wrappers/git -C tested commit -m first_commit
|
||||
* git-repo/bin-wrappers/git -C tested maintenance run
|
||||
* git-repo/bin-wrappers/git -C tested commit-graph write --changed-paths
|
||||
* (cd tested; $JGIT debug-read-changed-path-filter .git/objects/info/commit-graph)
|
||||
* </pre>
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testChangedPathFilterHighBit() throws Exception {
|
||||
RevBlob emptyBlob = tr.blob(new byte[] {});
|
||||
// tr.file encodes using UTF-8
|
||||
RevCommit root = tr.commit(tr.tree(tr.file("你好", emptyBlob)));
|
||||
|
||||
Set<ObjectId> wants = Collections.singleton(root);
|
||||
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
|
||||
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
|
||||
writer = new CommitGraphWriter(graphCommits);
|
||||
writer.write(m, os);
|
||||
|
||||
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
|
||||
assertThat(changedPaths, containsInAnyOrder("16,16,"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testChangedPathFilterEmptyChange() throws Exception {
|
||||
RevCommit root = commit();
|
||||
|
||||
Set<ObjectId> wants = Collections.singleton(root);
|
||||
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
|
||||
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
|
||||
writer = new CommitGraphWriter(graphCommits);
|
||||
writer.write(m, os);
|
||||
|
||||
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
|
||||
assertThat(changedPaths, containsInAnyOrder("0,"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testChangedPathFilterManyChanges() throws Exception {
|
||||
RevBlob emptyBlob = tr.blob(new byte[] {});
|
||||
DirCacheEntry[] entries = new DirCacheEntry[513];
|
||||
for (int i = 0; i < entries.length; i++) {
|
||||
entries[i] = tr.file(i + ".txt", emptyBlob);
|
||||
}
|
||||
|
||||
RevCommit root = tr.commit(tr.tree(entries));
|
||||
|
||||
Set<ObjectId> wants = Collections.singleton(root);
|
||||
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
|
||||
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
|
||||
writer = new CommitGraphWriter(graphCommits);
|
||||
writer.write(m, os);
|
||||
|
||||
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
|
||||
assertThat(changedPaths, containsInAnyOrder("-1,"));
|
||||
}
|
||||
|
||||
RevCommit commit(RevCommit... parents) throws Exception {
|
||||
return tr.commit(parents);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -20,6 +20,7 @@ java_library(
|
|||
resources = RESOURCES,
|
||||
deps = [
|
||||
":insecure_cipher_factory",
|
||||
"//lib:commons-codec",
|
||||
"//lib:javaewah",
|
||||
"//lib:slf4j-api",
|
||||
],
|
||||
|
|
|
@ -46,6 +46,12 @@
|
|||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
<version>1.15</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
/*
|
||||
* Copyright (C) 2023, Google LLC
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Eclipse Distribution License v. 1.0 which is available at
|
||||
* https://www.eclipse.org/org/documents/edl-v10.php.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
package org.eclipse.jgit.internal.storage.commitgraph;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.codec.digest.MurmurHash3;
|
||||
|
||||
/**
|
||||
* A changed path filter for a commit.
|
||||
*
|
||||
* @since 6.7
|
||||
*/
|
||||
public class ChangedPathFilter {
|
||||
/**
|
||||
* The number of times a path is hashed, as described in man
|
||||
* gitformat-commit-graph(5). The value of this constant is the only value
|
||||
* JGit currently supports.
|
||||
*/
|
||||
public static final int PATH_HASH_COUNT = 7;
|
||||
|
||||
/**
|
||||
* The minimum bits per entry, as described in man
|
||||
* gitformat-commit-graph(5). The value of this constant is the only value
|
||||
* JGit currently supports.
|
||||
*/
|
||||
public static final int BITS_PER_ENTRY = 10;
|
||||
|
||||
/**
|
||||
* Seed value as described in man gitformat-commit-graph(5).
|
||||
*/
|
||||
private static final int SEED1 = 0x293ae76f;
|
||||
|
||||
/**
|
||||
* Seed value as described in man gitformat-commit-graph(5).
|
||||
*/
|
||||
private static final int SEED2 = 0x7e646e2c;
|
||||
|
||||
/**
|
||||
* A filter that matches every path.
|
||||
*/
|
||||
public static final ChangedPathFilter FULL = new ChangedPathFilter(
|
||||
new byte[] { (byte) 0xff }, 0, 1);
|
||||
|
||||
private static final ChangedPathFilter EMPTY = new ChangedPathFilter(
|
||||
new byte[] { (byte) 0 }, 0, 1);
|
||||
|
||||
private final byte[] data;
|
||||
|
||||
private final int offset;
|
||||
|
||||
private final int length;
|
||||
|
||||
/**
|
||||
* Constructs a changed path filter.
|
||||
*
|
||||
* @param data
|
||||
* data (possibly read from a commit graph file)
|
||||
* @param offset
|
||||
* offset into data
|
||||
* @param length
|
||||
* length of data
|
||||
*/
|
||||
private ChangedPathFilter(byte[] data, int offset, int length) {
|
||||
this.data = data;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a filter that matches all given paths.
|
||||
* <p>
|
||||
* Because of the nature of Bloom filters, this filter may also match paths
|
||||
* not in the given set.
|
||||
*
|
||||
* @param paths
|
||||
* the paths that the filter must match
|
||||
* @return the corresponding filter
|
||||
*/
|
||||
public static ChangedPathFilter fromPaths(Set<ByteBuffer> paths) {
|
||||
if (paths.isEmpty()) {
|
||||
return EMPTY;
|
||||
}
|
||||
byte[] bloom = new byte[-Math
|
||||
.floorDiv(-paths.size() * ChangedPathFilter.BITS_PER_ENTRY, 8)];
|
||||
for (ByteBuffer path : paths) {
|
||||
add(bloom, path.array(), path.position(),
|
||||
path.limit() - path.position());
|
||||
}
|
||||
return new ChangedPathFilter(bloom, 0, bloom.length);
|
||||
}
|
||||
|
||||
private static void add(byte[] changedPathFilterData, byte[] path,
|
||||
int offset, int length) {
|
||||
|
||||
int hash0 = MurmurHash3.hash32x86(path, offset, length, SEED1);
|
||||
int hash1 = MurmurHash3.hash32x86(path, offset, length, SEED2);
|
||||
for (int i = 0; i < PATH_HASH_COUNT; i++) {
|
||||
int pos = Integer.remainderUnsigned(hash0 + i * hash1,
|
||||
changedPathFilterData.length * 8);
|
||||
changedPathFilterData[pos / 8] |= (byte) (1 << (pos % 8));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes this filter to the given stream.
|
||||
*
|
||||
* @param s
|
||||
* stream to write to
|
||||
*/
|
||||
public void writeTo(ByteArrayOutputStream s) {
|
||||
s.write(data, offset, length);
|
||||
}
|
||||
}
|
|
@ -25,6 +25,10 @@ class CommitGraphConstants {
|
|||
|
||||
static final int CHUNK_ID_EXTRA_EDGE_LIST = 0x45444745; /* "EDGE" */
|
||||
|
||||
static final int CHUNK_ID_BLOOM_FILTER_INDEX = 0x42494458; /* "BIDX" */
|
||||
|
||||
static final int CHUNK_ID_BLOOM_FILTER_DATA = 0x42444154; /* "BDAT" */
|
||||
|
||||
/**
|
||||
* First 4 bytes describe the chunk id. Value 0 is a terminating label.
|
||||
* Other 8 bytes provide the byte-offset in current file for chunk to start.
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
package org.eclipse.jgit.internal.storage.commitgraph;
|
||||
|
||||
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA;
|
||||
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX;
|
||||
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_COMMIT_DATA;
|
||||
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST;
|
||||
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_OID_FANOUT;
|
||||
|
@ -24,21 +26,30 @@ import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_NOT_COMPUTED;
|
|||
import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_UNKNOWN;
|
||||
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InterruptedIOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Stack;
|
||||
|
||||
import org.eclipse.jgit.annotations.NonNull;
|
||||
import org.eclipse.jgit.errors.CorruptObjectException;
|
||||
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
||||
import org.eclipse.jgit.errors.MissingObjectException;
|
||||
import org.eclipse.jgit.internal.JGitText;
|
||||
import org.eclipse.jgit.internal.storage.io.CancellableDigestOutputStream;
|
||||
import org.eclipse.jgit.lib.ObjectId;
|
||||
import org.eclipse.jgit.lib.ObjectReader;
|
||||
import org.eclipse.jgit.lib.ProgressMonitor;
|
||||
import org.eclipse.jgit.revwalk.RevCommit;
|
||||
import org.eclipse.jgit.treewalk.EmptyTreeIterator;
|
||||
import org.eclipse.jgit.treewalk.TreeWalk;
|
||||
import org.eclipse.jgit.util.NB;
|
||||
|
||||
/**
|
||||
|
@ -56,6 +67,8 @@ public class CommitGraphWriter {
|
|||
|
||||
private static final int GENERATION_NUMBER_MAX = 0x3FFFFFFF;
|
||||
|
||||
private static final int MAX_CHANGED_PATHS = 512;
|
||||
|
||||
private final int hashsz;
|
||||
|
||||
private final GraphCommits graphCommits;
|
||||
|
@ -111,7 +124,8 @@ public class CommitGraphWriter {
|
|||
}
|
||||
}
|
||||
|
||||
private List<ChunkHeader> createChunks() {
|
||||
private List<ChunkHeader> createChunks() throws MissingObjectException,
|
||||
IncorrectObjectTypeException, CorruptObjectException, IOException {
|
||||
List<ChunkHeader> chunks = new ArrayList<>();
|
||||
chunks.add(new ChunkHeader(CHUNK_ID_OID_FANOUT, GRAPH_FANOUT_SIZE));
|
||||
chunks.add(new ChunkHeader(CHUNK_ID_OID_LOOKUP,
|
||||
|
@ -122,6 +136,11 @@ public class CommitGraphWriter {
|
|||
chunks.add(new ChunkHeader(CHUNK_ID_EXTRA_EDGE_LIST,
|
||||
graphCommits.getExtraEdgeCnt() * 4));
|
||||
}
|
||||
BloomFilterChunks bloomFilterChunks = computeBloomFilterChunks();
|
||||
chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_INDEX,
|
||||
bloomFilterChunks.index));
|
||||
chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_DATA,
|
||||
bloomFilterChunks.data));
|
||||
return chunks;
|
||||
}
|
||||
|
||||
|
@ -171,6 +190,14 @@ public class CommitGraphWriter {
|
|||
case CHUNK_ID_EXTRA_EDGE_LIST:
|
||||
writeExtraEdges(out);
|
||||
break;
|
||||
case CHUNK_ID_BLOOM_FILTER_INDEX:
|
||||
case CHUNK_ID_BLOOM_FILTER_DATA:
|
||||
if (!chunk.data.isPresent()) {
|
||||
throw new IllegalStateException(
|
||||
"data for this chunk must be precomputed"); //$NON-NLS-1$
|
||||
}
|
||||
chunk.data.get().writeTo(out);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -305,6 +332,72 @@ public class CommitGraphWriter {
|
|||
return generations;
|
||||
}
|
||||
|
||||
private static Optional<HashSet<ByteBuffer>> computeBloomFilterPaths(
|
||||
ObjectReader or, RevCommit cmit) throws MissingObjectException,
|
||||
IncorrectObjectTypeException, CorruptObjectException, IOException {
|
||||
HashSet<ByteBuffer> paths = new HashSet<>();
|
||||
try (TreeWalk walk = new TreeWalk(null, or)) {
|
||||
walk.setRecursive(true);
|
||||
if (cmit.getParentCount() == 0) {
|
||||
walk.addTree(new EmptyTreeIterator());
|
||||
} else {
|
||||
walk.addTree(cmit.getParent(0).getTree());
|
||||
}
|
||||
walk.addTree(cmit.getTree());
|
||||
while (walk.next()) {
|
||||
if (walk.idEqual(0, 1)) {
|
||||
continue;
|
||||
}
|
||||
byte[] rawPath = walk.getRawPath();
|
||||
paths.add(ByteBuffer.wrap(rawPath));
|
||||
for (int i = 0; i < rawPath.length; i++) {
|
||||
if (rawPath[i] == '/') {
|
||||
paths.add(ByteBuffer.wrap(rawPath, 0, i));
|
||||
}
|
||||
if (paths.size() > MAX_CHANGED_PATHS) {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return Optional.of(paths);
|
||||
}
|
||||
|
||||
private BloomFilterChunks computeBloomFilterChunks()
|
||||
throws MissingObjectException, IncorrectObjectTypeException,
|
||||
CorruptObjectException, IOException {
|
||||
|
||||
ByteArrayOutputStream index = new ByteArrayOutputStream();
|
||||
ByteArrayOutputStream data = new ByteArrayOutputStream();
|
||||
|
||||
// Allocate scratch buffer for converting integers into
|
||||
// big-endian bytes.
|
||||
byte[] scratch = new byte[4];
|
||||
|
||||
NB.encodeInt32(scratch, 0, 1); // version 1
|
||||
data.write(scratch);
|
||||
NB.encodeInt32(scratch, 0, ChangedPathFilter.PATH_HASH_COUNT);
|
||||
data.write(scratch);
|
||||
NB.encodeInt32(scratch, 0, ChangedPathFilter.BITS_PER_ENTRY);
|
||||
data.write(scratch);
|
||||
int dataHeaderSize = data.size();
|
||||
|
||||
for (RevCommit cmit : graphCommits) {
|
||||
Optional<HashSet<ByteBuffer>> paths = computeBloomFilterPaths(
|
||||
graphCommits.getObjectReader(), cmit);
|
||||
ChangedPathFilter cpf;
|
||||
if (paths.isEmpty()) {
|
||||
cpf = ChangedPathFilter.FULL;
|
||||
} else {
|
||||
cpf = ChangedPathFilter.fromPaths(paths.get());
|
||||
}
|
||||
cpf.writeTo(data);
|
||||
NB.encodeInt32(scratch, 0, data.size() - dataHeaderSize);
|
||||
index.write(scratch);
|
||||
}
|
||||
return new BloomFilterChunks(index, data);
|
||||
}
|
||||
|
||||
private void writeExtraEdges(CancellableDigestOutputStream out)
|
||||
throws IOException {
|
||||
byte[] tmp = new byte[4];
|
||||
|
@ -331,9 +424,30 @@ public class CommitGraphWriter {
|
|||
|
||||
final long size;
|
||||
|
||||
final Optional<ByteArrayOutputStream> data;
|
||||
|
||||
public ChunkHeader(int id, long size) {
|
||||
this.id = id;
|
||||
this.size = size;
|
||||
this.data = Optional.empty();
|
||||
}
|
||||
|
||||
ChunkHeader(int id, ByteArrayOutputStream data) {
|
||||
this.id = id;
|
||||
this.size = data.size();
|
||||
this.data = Optional.of(data);
|
||||
}
|
||||
}
|
||||
|
||||
private static class BloomFilterChunks {
|
||||
final ByteArrayOutputStream index;
|
||||
|
||||
final ByteArrayOutputStream data;
|
||||
|
||||
BloomFilterChunks(ByteArrayOutputStream index,
|
||||
ByteArrayOutputStream data) {
|
||||
this.index = index;
|
||||
this.data = data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.eclipse.jgit.lib.AnyObjectId;
|
|||
import org.eclipse.jgit.lib.Constants;
|
||||
import org.eclipse.jgit.lib.ObjectId;
|
||||
import org.eclipse.jgit.lib.ObjectIdOwnerMap;
|
||||
import org.eclipse.jgit.lib.ObjectReader;
|
||||
import org.eclipse.jgit.lib.ProgressMonitor;
|
||||
import org.eclipse.jgit.revwalk.RevCommit;
|
||||
import org.eclipse.jgit.revwalk.RevObject;
|
||||
|
@ -79,7 +80,7 @@ public class GraphCommits implements Iterable<RevCommit> {
|
|||
commits.add(c);
|
||||
}
|
||||
pm.endTask();
|
||||
return new GraphCommits(commits);
|
||||
return new GraphCommits(commits, walk.getObjectReader());
|
||||
}
|
||||
|
||||
private final List<RevCommit> sortedCommits;
|
||||
|
@ -88,13 +89,17 @@ public class GraphCommits implements Iterable<RevCommit> {
|
|||
|
||||
private final int extraEdgeCnt;
|
||||
|
||||
private final ObjectReader objectReader;
|
||||
|
||||
/**
|
||||
* Initialize the GraphCommits.
|
||||
*
|
||||
* @param commits
|
||||
* list of commits with their headers already parsed.
|
||||
* @param objectReader
|
||||
* object reader
|
||||
*/
|
||||
private GraphCommits(List<RevCommit> commits) {
|
||||
private GraphCommits(List<RevCommit> commits, ObjectReader objectReader) {
|
||||
Collections.sort(commits); // sorted by name
|
||||
sortedCommits = commits;
|
||||
commitPosMap = new ObjectIdOwnerMap<>();
|
||||
|
@ -107,6 +112,7 @@ public class GraphCommits implements Iterable<RevCommit> {
|
|||
commitPosMap.add(new CommitWithPosition(c, i));
|
||||
}
|
||||
this.extraEdgeCnt = cnt;
|
||||
this.objectReader = objectReader;
|
||||
}
|
||||
|
||||
int getOidPosition(RevCommit c) throws MissingObjectException {
|
||||
|
@ -125,6 +131,10 @@ public class GraphCommits implements Iterable<RevCommit> {
|
|||
return sortedCommits.size();
|
||||
}
|
||||
|
||||
ObjectReader getObjectReader() {
|
||||
return objectReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<RevCommit> iterator() {
|
||||
return sortedCommits.iterator();
|
||||
|
|
Loading…
Reference in New Issue