CommitGraphWriter: write changed-path filters

Add support for writing the BIDX and BDAT chunks of the commit graph
file, as described in man gitformat-commit-graph(5). The ability to read
such chunks will be added in a subsequent commit.

This work is based on earlier work by Kyle Zhao
(Ib863782af209f26381e3ca0a2c119b99e84b679c).

Change-Id: Ic18e6f0eeec7da1e1ff31751aabda5e6952dbe6e
Signed-off-by: kylezhao <kylezhao@tencent.com>
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
motiejus-detectSymlinkSupport
Jonathan Tan 2023-04-18 15:20:02 -07:00
parent 5dc63514d0
commit 49beb5ae51
11 changed files with 570 additions and 13 deletions

View File

@ -21,6 +21,10 @@ java_library(
java_library(
name = "commons-codec",
visibility = [
"//org.eclipse.jgit:__pkg__",
"//org.eclipse.jgit.test:__pkg__",
],
exports = ["@commons-codec//jar"],
)

View File

@ -46,6 +46,7 @@ org.eclipse.jgit.pgm.debug.BenchmarkReftable
org.eclipse.jgit.pgm.debug.DiffAlgorithms
org.eclipse.jgit.pgm.debug.LfsStore
org.eclipse.jgit.pgm.debug.MakeCacheTree
org.eclipse.jgit.pgm.debug.ReadChangedPathFilter
org.eclipse.jgit.pgm.debug.ReadDirCache
org.eclipse.jgit.pgm.debug.ReadReftable
org.eclipse.jgit.pgm.debug.RebuildCommitGraph

View File

@ -0,0 +1,83 @@
/*
* Copyright (C) 2023, Google LLC
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.pgm.debug;
import java.io.FileInputStream;
import java.util.HashSet;
import org.eclipse.jgit.pgm.Command;
import org.eclipse.jgit.pgm.TextBuiltin;
import org.eclipse.jgit.util.NB;
import org.kohsuke.args4j.Argument;
/**
* Prints the contents of the BDAT chunk from commit-graph file.
* <p>
* This is a debugging tool for changed path filter development.
*/
@Command
class ReadChangedPathFilter extends TextBuiltin {
static final int CHUNK_ID_OID_FANOUT = 0x4f494446; /* "OIDF" */
static final int CHUNK_ID_BLOOM_FILTER_INDEX = 0x42494458; /* "BIDX" */
static final int CHUNK_ID_BLOOM_FILTER_DATA = 0x42444154; /* "BDAT" */
@Argument(index = 0)
private String input;
static HashSet<String> changedPathStrings(byte[] data) {
int oidf_offset = -1;
int bidx_offset = -1;
int bdat_offset = -1;
for (int i = 8; i < data.length - 4; i += 12) {
switch (NB.decodeInt32(data, i)) {
case CHUNK_ID_OID_FANOUT:
oidf_offset = (int) NB.decodeInt64(data, i + 4);
break;
case CHUNK_ID_BLOOM_FILTER_INDEX:
bidx_offset = (int) NB.decodeInt64(data, i + 4);
break;
case CHUNK_ID_BLOOM_FILTER_DATA:
bdat_offset = (int) NB.decodeInt64(data, i + 4);
break;
}
}
bdat_offset += 12; // skip version, hash count, bits per entry
int commit_count = NB.decodeInt32(data, oidf_offset + 255 * 4);
int[] changed_path_length_cumuls = new int[commit_count];
for (int i = 0; i < commit_count; i++) {
changed_path_length_cumuls[i] = NB.decodeInt32(data,
bidx_offset + i * 4);
}
HashSet<String> changed_paths = new HashSet<>();
for (int i = 0; i < commit_count; i++) {
int prior_cumul = i == 0 ? 0 : changed_path_length_cumuls[i - 1];
String changed_path = "";
for (int j = prior_cumul; j < changed_path_length_cumuls[i]; j++) {
changed_path += data[bdat_offset + j] + ",";
}
changed_paths.add(changed_path);
}
return changed_paths;
}
/** {@inheritDoc} */
@Override
protected void run() throws Exception {
try (FileInputStream in = new FileInputStream(input)
) {
byte[] data = in.readAllBytes();
outw.println(changedPathStrings(data).toString());
}
}
}

View File

@ -77,6 +77,7 @@ java_library(
resources = DATA,
deps = [
"//lib:assertj-core",
"//lib:commons-codec",
"//lib:junit",
"//lib:mockito",
"//lib:slf4j-simple",

View File

@ -10,19 +10,24 @@
package org.eclipse.jgit.internal.storage.commitgraph;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayOutputStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.eclipse.jgit.dircache.DirCacheEntry;
import org.eclipse.jgit.internal.storage.file.FileRepository;
import org.eclipse.jgit.junit.RepositoryTestCase;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.revwalk.RevBlob;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.util.NB;
@ -76,11 +81,20 @@ public class CommitGraphWriterTest extends RepositoryTestCase {
assertTrue(data.length > 0);
byte[] headers = new byte[8];
System.arraycopy(data, 0, headers, 0, 8);
assertArrayEquals(new byte[] {'C', 'G', 'P', 'H', 1, 1, 4, 0}, headers);
assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT, NB.decodeInt32(data, 8));
assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP, NB.decodeInt32(data, 20));
assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA, NB.decodeInt32(data, 32));
assertEquals(CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST, NB.decodeInt32(data, 44));
assertArrayEquals(new byte[] { 'C', 'G', 'P', 'H', 1, 1, 6, 0 },
headers);
assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT,
NB.decodeInt32(data, 8));
assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP,
NB.decodeInt32(data, 20));
assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA,
NB.decodeInt32(data, 32));
assertEquals(CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST,
NB.decodeInt32(data, 44));
assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX,
NB.decodeInt32(data, 56));
assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA,
NB.decodeInt32(data, 68));
}
@Test
@ -101,13 +115,208 @@ public class CommitGraphWriterTest extends RepositoryTestCase {
assertTrue(data.length > 0);
byte[] headers = new byte[8];
System.arraycopy(data, 0, headers, 0, 8);
assertArrayEquals(new byte[] {'C', 'G', 'P', 'H', 1, 1, 3, 0}, headers);
assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT, NB.decodeInt32(data, 8));
assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP, NB.decodeInt32(data, 20));
assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA, NB.decodeInt32(data, 32));
assertArrayEquals(new byte[] { 'C', 'G', 'P', 'H', 1, 1, 5, 0 },
headers);
assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT,
NB.decodeInt32(data, 8));
assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP,
NB.decodeInt32(data, 20));
assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA,
NB.decodeInt32(data, 32));
assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX,
NB.decodeInt32(data, 44));
assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA,
NB.decodeInt32(data, 56));
}
static HashSet<String> changedPathStrings(byte[] data) {
int oidf_offset = -1;
int bidx_offset = -1;
int bdat_offset = -1;
for (int i = 8; i < data.length - 4; i += 12) {
switch (NB.decodeInt32(data, i)) {
case CommitGraphConstants.CHUNK_ID_OID_FANOUT:
oidf_offset = (int) NB.decodeInt64(data, i + 4);
break;
case CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX:
bidx_offset = (int) NB.decodeInt64(data, i + 4);
break;
case CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA:
bdat_offset = (int) NB.decodeInt64(data, i + 4);
break;
}
}
assertTrue(oidf_offset > 0);
assertTrue(bidx_offset > 0);
assertTrue(bdat_offset > 0);
bdat_offset += 12; // skip version, hash count, bits per entry
int commit_count = NB.decodeInt32(data, oidf_offset + 255 * 4);
int[] changed_path_length_cumuls = new int[commit_count];
for (int i = 0; i < commit_count; i++) {
changed_path_length_cumuls[i] = NB.decodeInt32(data,
bidx_offset + i * 4);
}
HashSet<String> changed_paths = new HashSet<>();
for (int i = 0; i < commit_count; i++) {
int prior_cumul = i == 0 ? 0 : changed_path_length_cumuls[i - 1];
String changed_path = "";
for (int j = prior_cumul; j < changed_path_length_cumuls[i]; j++) {
changed_path += data[bdat_offset + j] + ",";
}
changed_paths.add(changed_path);
}
return changed_paths;
}
/**
* Expected value generated using the following:
*
* <pre>
* # apply into git-repo: https://lore.kernel.org/git/cover.1684790529.git.jonathantanmy@google.com/
* (cd git-repo; make)
* git-repo/bin-wrappers/git init tested
* (cd tested; touch foo.txt; mkdir -p onedir/twodir; touch onedir/twodir/bar.txt)
* git-repo/bin-wrappers/git -C tested add foo.txt onedir
* git-repo/bin-wrappers/git -C tested commit -m first_commit
* (cd tested; mv foo.txt foo-new.txt; mv onedir/twodir/bar.txt onedir/twodir/bar-new.txt)
* git-repo/bin-wrappers/git -C tested add foo-new.txt onedir
* git-repo/bin-wrappers/git -C tested commit -a -m second_commit
* git-repo/bin-wrappers/git -C tested maintenance run
* git-repo/bin-wrappers/git -C tested commit-graph write --changed-paths
* (cd tested; $JGIT debug-read-changed-path-filter .git/objects/info/commit-graph)
* </pre>
*
* @throws Exception
*/
@Test
public void testChangedPathFilterRootAndNested() throws Exception {
RevBlob emptyBlob = tr.blob(new byte[] {});
RevCommit root = tr.commit(tr.tree(tr.file("foo.txt", emptyBlob),
tr.file("onedir/twodir/bar.txt", emptyBlob)));
RevCommit tip = tr.commit(tr.tree(tr.file("foo-new.txt", emptyBlob),
tr.file("onedir/twodir/bar-new.txt", emptyBlob)), root);
Set<ObjectId> wants = Collections.singleton(tip);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer.write(m, os);
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
assertThat(changedPaths, containsInAnyOrder(
"109,-33,2,60,20,79,-11,116,",
"119,69,63,-8,0,"));
}
/**
* Expected value generated using the following:
*
* <pre>
* git -C git-repo checkout todo get version number when it is merged
* (cd git-repo; make)
* git-repo/bin-wrappers/git init tested
* (cd tested; mkdir -p onedir/twodir; touch onedir/twodir/a.txt; touch onedir/twodir/b.txt)
* git-repo/bin-wrappers/git -C tested add onedir
* git-repo/bin-wrappers/git -C tested commit -m first_commit
* (cd tested; mv onedir/twodir/a.txt onedir/twodir/c.txt; mv onedir/twodir/b.txt onedir/twodir/d.txt)
* git-repo/bin-wrappers/git -C tested add onedir
* git-repo/bin-wrappers/git -C tested commit -a -m second_commit
* git-repo/bin-wrappers/git -C tested maintenance run
* git-repo/bin-wrappers/git -C tested commit-graph write --changed-paths
* (cd tested; $JGIT debug-read-changed-path-filter .git/objects/info/commit-graph)
* </pre>
*
* @throws Exception
*/
@Test
public void testChangedPathFilterOverlappingNested() throws Exception {
RevBlob emptyBlob = tr.blob(new byte[] {});
RevCommit root = tr
.commit(tr.tree(tr.file("onedir/twodir/a.txt", emptyBlob),
tr.file("onedir/twodir/b.txt", emptyBlob)));
RevCommit tip = tr
.commit(tr.tree(tr.file("onedir/twodir/c.txt", emptyBlob),
tr.file("onedir/twodir/d.txt", emptyBlob)), root);
Set<ObjectId> wants = Collections.singleton(tip);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer.write(m, os);
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
assertThat(changedPaths, containsInAnyOrder("61,30,23,-24,1,",
"-58,-51,-46,60,29,-121,113,90,"));
}
/**
* Expected value generated using the following:
*
* <pre>
* git -C git-repo checkout todo get version number when it is merged
* (cd git-repo; make)
* git-repo/bin-wrappers/git init tested
* (cd tested; touch )
* git-repo/bin-wrappers/git -C tested add
* git-repo/bin-wrappers/git -C tested commit -m first_commit
* git-repo/bin-wrappers/git -C tested maintenance run
* git-repo/bin-wrappers/git -C tested commit-graph write --changed-paths
* (cd tested; $JGIT debug-read-changed-path-filter .git/objects/info/commit-graph)
* </pre>
*
* @throws Exception
*/
@Test
public void testChangedPathFilterHighBit() throws Exception {
RevBlob emptyBlob = tr.blob(new byte[] {});
// tr.file encodes using UTF-8
RevCommit root = tr.commit(tr.tree(tr.file("你好", emptyBlob)));
Set<ObjectId> wants = Collections.singleton(root);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer.write(m, os);
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
assertThat(changedPaths, containsInAnyOrder("16,16,"));
}
@Test
public void testChangedPathFilterEmptyChange() throws Exception {
RevCommit root = commit();
Set<ObjectId> wants = Collections.singleton(root);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer.write(m, os);
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
assertThat(changedPaths, containsInAnyOrder("0,"));
}
@Test
public void testChangedPathFilterManyChanges() throws Exception {
RevBlob emptyBlob = tr.blob(new byte[] {});
DirCacheEntry[] entries = new DirCacheEntry[513];
for (int i = 0; i < entries.length; i++) {
entries[i] = tr.file(i + ".txt", emptyBlob);
}
RevCommit root = tr.commit(tr.tree(entries));
Set<ObjectId> wants = Collections.singleton(root);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer.write(m, os);
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
assertThat(changedPaths, containsInAnyOrder("-1,"));
}
RevCommit commit(RevCommit... parents) throws Exception {
return tr.commit(parents);
}
}
}

View File

@ -20,6 +20,7 @@ java_library(
resources = RESOURCES,
deps = [
":insecure_cipher_factory",
"//lib:commons-codec",
"//lib:javaewah",
"//lib:slf4j-api",
],

View File

@ -46,6 +46,12 @@
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.15</version>
</dependency>
</dependencies>
<build>

View File

@ -0,0 +1,124 @@
/*
* Copyright (C) 2023, Google LLC
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.commitgraph;
import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.util.Set;
import org.apache.commons.codec.digest.MurmurHash3;
/**
* A changed path filter for a commit.
*
* @since 6.7
*/
public class ChangedPathFilter {
/**
* The number of times a path is hashed, as described in man
* gitformat-commit-graph(5). The value of this constant is the only value
* JGit currently supports.
*/
public static final int PATH_HASH_COUNT = 7;
/**
* The minimum bits per entry, as described in man
* gitformat-commit-graph(5). The value of this constant is the only value
* JGit currently supports.
*/
public static final int BITS_PER_ENTRY = 10;
/**
* Seed value as described in man gitformat-commit-graph(5).
*/
private static final int SEED1 = 0x293ae76f;
/**
* Seed value as described in man gitformat-commit-graph(5).
*/
private static final int SEED2 = 0x7e646e2c;
/**
* A filter that matches every path.
*/
public static final ChangedPathFilter FULL = new ChangedPathFilter(
new byte[] { (byte) 0xff }, 0, 1);
private static final ChangedPathFilter EMPTY = new ChangedPathFilter(
new byte[] { (byte) 0 }, 0, 1);
private final byte[] data;
private final int offset;
private final int length;
/**
* Constructs a changed path filter.
*
* @param data
* data (possibly read from a commit graph file)
* @param offset
* offset into data
* @param length
* length of data
*/
private ChangedPathFilter(byte[] data, int offset, int length) {
this.data = data;
this.offset = offset;
this.length = length;
}
/**
* Returns a filter that matches all given paths.
* <p>
* Because of the nature of Bloom filters, this filter may also match paths
* not in the given set.
*
* @param paths
* the paths that the filter must match
* @return the corresponding filter
*/
public static ChangedPathFilter fromPaths(Set<ByteBuffer> paths) {
if (paths.isEmpty()) {
return EMPTY;
}
byte[] bloom = new byte[-Math
.floorDiv(-paths.size() * ChangedPathFilter.BITS_PER_ENTRY, 8)];
for (ByteBuffer path : paths) {
add(bloom, path.array(), path.position(),
path.limit() - path.position());
}
return new ChangedPathFilter(bloom, 0, bloom.length);
}
private static void add(byte[] changedPathFilterData, byte[] path,
int offset, int length) {
int hash0 = MurmurHash3.hash32x86(path, offset, length, SEED1);
int hash1 = MurmurHash3.hash32x86(path, offset, length, SEED2);
for (int i = 0; i < PATH_HASH_COUNT; i++) {
int pos = Integer.remainderUnsigned(hash0 + i * hash1,
changedPathFilterData.length * 8);
changedPathFilterData[pos / 8] |= (byte) (1 << (pos % 8));
}
}
/**
* Writes this filter to the given stream.
*
* @param s
* stream to write to
*/
public void writeTo(ByteArrayOutputStream s) {
s.write(data, offset, length);
}
}

View File

@ -25,6 +25,10 @@ class CommitGraphConstants {
static final int CHUNK_ID_EXTRA_EDGE_LIST = 0x45444745; /* "EDGE" */
static final int CHUNK_ID_BLOOM_FILTER_INDEX = 0x42494458; /* "BIDX" */
static final int CHUNK_ID_BLOOM_FILTER_DATA = 0x42444154; /* "BDAT" */
/**
* First 4 bytes describe the chunk id. Value 0 is a terminating label.
* Other 8 bytes provide the byte-offset in current file for chunk to start.

View File

@ -10,6 +10,8 @@
package org.eclipse.jgit.internal.storage.commitgraph;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_COMMIT_DATA;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_OID_FANOUT;
@ -24,21 +26,30 @@ import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_NOT_COMPUTED;
import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_UNKNOWN;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Stack;
import org.eclipse.jgit.annotations.NonNull;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.io.CancellableDigestOutputStream;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.treewalk.EmptyTreeIterator;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.util.NB;
/**
@ -56,6 +67,8 @@ public class CommitGraphWriter {
private static final int GENERATION_NUMBER_MAX = 0x3FFFFFFF;
private static final int MAX_CHANGED_PATHS = 512;
private final int hashsz;
private final GraphCommits graphCommits;
@ -111,7 +124,8 @@ public class CommitGraphWriter {
}
}
private List<ChunkHeader> createChunks() {
private List<ChunkHeader> createChunks() throws MissingObjectException,
IncorrectObjectTypeException, CorruptObjectException, IOException {
List<ChunkHeader> chunks = new ArrayList<>();
chunks.add(new ChunkHeader(CHUNK_ID_OID_FANOUT, GRAPH_FANOUT_SIZE));
chunks.add(new ChunkHeader(CHUNK_ID_OID_LOOKUP,
@ -122,6 +136,11 @@ public class CommitGraphWriter {
chunks.add(new ChunkHeader(CHUNK_ID_EXTRA_EDGE_LIST,
graphCommits.getExtraEdgeCnt() * 4));
}
BloomFilterChunks bloomFilterChunks = computeBloomFilterChunks();
chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_INDEX,
bloomFilterChunks.index));
chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_DATA,
bloomFilterChunks.data));
return chunks;
}
@ -171,6 +190,14 @@ public class CommitGraphWriter {
case CHUNK_ID_EXTRA_EDGE_LIST:
writeExtraEdges(out);
break;
case CHUNK_ID_BLOOM_FILTER_INDEX:
case CHUNK_ID_BLOOM_FILTER_DATA:
if (!chunk.data.isPresent()) {
throw new IllegalStateException(
"data for this chunk must be precomputed"); //$NON-NLS-1$
}
chunk.data.get().writeTo(out);
break;
}
}
}
@ -305,6 +332,72 @@ public class CommitGraphWriter {
return generations;
}
private static Optional<HashSet<ByteBuffer>> computeBloomFilterPaths(
ObjectReader or, RevCommit cmit) throws MissingObjectException,
IncorrectObjectTypeException, CorruptObjectException, IOException {
HashSet<ByteBuffer> paths = new HashSet<>();
try (TreeWalk walk = new TreeWalk(null, or)) {
walk.setRecursive(true);
if (cmit.getParentCount() == 0) {
walk.addTree(new EmptyTreeIterator());
} else {
walk.addTree(cmit.getParent(0).getTree());
}
walk.addTree(cmit.getTree());
while (walk.next()) {
if (walk.idEqual(0, 1)) {
continue;
}
byte[] rawPath = walk.getRawPath();
paths.add(ByteBuffer.wrap(rawPath));
for (int i = 0; i < rawPath.length; i++) {
if (rawPath[i] == '/') {
paths.add(ByteBuffer.wrap(rawPath, 0, i));
}
if (paths.size() > MAX_CHANGED_PATHS) {
return Optional.empty();
}
}
}
}
return Optional.of(paths);
}
private BloomFilterChunks computeBloomFilterChunks()
throws MissingObjectException, IncorrectObjectTypeException,
CorruptObjectException, IOException {
ByteArrayOutputStream index = new ByteArrayOutputStream();
ByteArrayOutputStream data = new ByteArrayOutputStream();
// Allocate scratch buffer for converting integers into
// big-endian bytes.
byte[] scratch = new byte[4];
NB.encodeInt32(scratch, 0, 1); // version 1
data.write(scratch);
NB.encodeInt32(scratch, 0, ChangedPathFilter.PATH_HASH_COUNT);
data.write(scratch);
NB.encodeInt32(scratch, 0, ChangedPathFilter.BITS_PER_ENTRY);
data.write(scratch);
int dataHeaderSize = data.size();
for (RevCommit cmit : graphCommits) {
Optional<HashSet<ByteBuffer>> paths = computeBloomFilterPaths(
graphCommits.getObjectReader(), cmit);
ChangedPathFilter cpf;
if (paths.isEmpty()) {
cpf = ChangedPathFilter.FULL;
} else {
cpf = ChangedPathFilter.fromPaths(paths.get());
}
cpf.writeTo(data);
NB.encodeInt32(scratch, 0, data.size() - dataHeaderSize);
index.write(scratch);
}
return new BloomFilterChunks(index, data);
}
private void writeExtraEdges(CancellableDigestOutputStream out)
throws IOException {
byte[] tmp = new byte[4];
@ -331,9 +424,30 @@ public class CommitGraphWriter {
final long size;
final Optional<ByteArrayOutputStream> data;
public ChunkHeader(int id, long size) {
this.id = id;
this.size = size;
this.data = Optional.empty();
}
ChunkHeader(int id, ByteArrayOutputStream data) {
this.id = id;
this.size = data.size();
this.data = Optional.of(data);
}
}
private static class BloomFilterChunks {
final ByteArrayOutputStream index;
final ByteArrayOutputStream data;
BloomFilterChunks(ByteArrayOutputStream index,
ByteArrayOutputStream data) {
this.index = index;
this.data = data;
}
}
}

View File

@ -23,6 +23,7 @@ import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdOwnerMap;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevObject;
@ -79,7 +80,7 @@ public class GraphCommits implements Iterable<RevCommit> {
commits.add(c);
}
pm.endTask();
return new GraphCommits(commits);
return new GraphCommits(commits, walk.getObjectReader());
}
private final List<RevCommit> sortedCommits;
@ -88,13 +89,17 @@ public class GraphCommits implements Iterable<RevCommit> {
private final int extraEdgeCnt;
private final ObjectReader objectReader;
/**
* Initialize the GraphCommits.
*
* @param commits
* list of commits with their headers already parsed.
* @param objectReader
* object reader
*/
private GraphCommits(List<RevCommit> commits) {
private GraphCommits(List<RevCommit> commits, ObjectReader objectReader) {
Collections.sort(commits); // sorted by name
sortedCommits = commits;
commitPosMap = new ObjectIdOwnerMap<>();
@ -107,6 +112,7 @@ public class GraphCommits implements Iterable<RevCommit> {
commitPosMap.add(new CommitWithPosition(c, i));
}
this.extraEdgeCnt = cnt;
this.objectReader = objectReader;
}
int getOidPosition(RevCommit c) throws MissingObjectException {
@ -125,6 +131,10 @@ public class GraphCommits implements Iterable<RevCommit> {
return sortedCommits.size();
}
ObjectReader getObjectReader() {
return objectReader;
}
@Override
public Iterator<RevCommit> iterator() {
return sortedCommits.iterator();