CommitGraphLoader: read changed-path filters

As described in the parent commit, add support for reading the BIDX and
BDAT chunks of the commit graph file, as described in man gitformat-
commit-graph(5).

This work is based on earlier work by Kyle Zhao
(I160f6b022afaa842c331fb9a086974e49dced7b2).

Change-Id: I82e02e6a3a3b758e6bf9d7bbd2198f0ffe3a331b
Signed-off-by: kylezhao <kylezhao@tencent.com>
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
This commit is contained in:
Jonathan Tan 2023-04-24 12:55:30 -07:00
parent 49beb5ae51
commit ff0f7c174f
7 changed files with 186 additions and 2 deletions

View File

@ -10,9 +10,12 @@
package org.eclipse.jgit.internal.storage.commitgraph;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_UNKNOWN;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
@ -196,6 +199,27 @@ public void testGraphWithMerges() throws Exception {
assertEquals(getGenerationNumber(c8), 5);
}
@Test
public void testGraphComputeChangedPaths() throws Exception {
RevCommit a = tr.commit(tr.tree(tr.file("d/f", tr.blob("a"))));
RevCommit b = tr.commit(tr.tree(tr.file("d/f", tr.blob("a"))), a);
RevCommit c = tr.commit(tr.tree(tr.file("d/f", tr.blob("b"))), b);
writeAndReadCommitGraph(Collections.singleton(c));
ChangedPathFilter acpf = commitGraph
.getChangedPathFilter(commitGraph.findGraphPosition(a));
assertTrue(acpf.maybeContains("d".getBytes(UTF_8)));
assertTrue(acpf.maybeContains("d/f".getBytes(UTF_8)));
ChangedPathFilter bcpf = commitGraph
.getChangedPathFilter(commitGraph.findGraphPosition(b));
assertFalse(bcpf.maybeContains("d".getBytes(UTF_8)));
assertFalse(bcpf.maybeContains("d/f".getBytes(UTF_8)));
ChangedPathFilter ccpf = commitGraph
.getChangedPathFilter(commitGraph.findGraphPosition(c));
assertTrue(ccpf.maybeContains("d".getBytes(UTF_8)));
assertTrue(ccpf.maybeContains("d/f".getBytes(UTF_8)));
}
void writeAndReadCommitGraph(Set<ObjectId> wants) throws Exception {
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
try (RevWalk walk = new RevWalk(db)) {

View File

@ -100,6 +100,23 @@ public static ChangedPathFilter fromPaths(Set<ByteBuffer> paths) {
return new ChangedPathFilter(bloom, 0, bloom.length);
}
/**
* Returns a filter read from a file.
*
* @param data
* data (read from a commit graph file)
* @param offset
* offset into data
* @param length
* length of data
*
* @return the corresponding filter
*/
public static ChangedPathFilter fromFile(byte[] data, int offset,
int length) {
return new ChangedPathFilter(data, offset, length);
}
private static void add(byte[] changedPathFilterData, byte[] path,
int offset, int length) {
@ -112,6 +129,28 @@ private static void add(byte[] changedPathFilterData, byte[] path,
}
}
/**
* Checks if this changed path filter could contain path.
*
* @param path
* path to check existence of
* @return true if the filter could contain path, false if the filter
* definitely does not contain path
*/
public boolean maybeContains(byte[] path) {
int hash0 = MurmurHash3.hash32x86(path, 0, path.length, SEED1);
int hash1 = MurmurHash3.hash32x86(path, 0, path.length, SEED2);
int bloomFilterBits = length * 8;
for (int i = 0; i < PATH_HASH_COUNT; i++) {
int pos = Integer.remainderUnsigned(hash0 + i * hash1,
bloomFilterBits);
if ((data[offset + (pos / 8)] & (byte) (1 << (pos % 8))) == 0) {
return false;
}
}
return true;
}
/**
* Writes this filter to the given stream.
*

View File

@ -47,6 +47,11 @@ public ObjectId getObjectId(int graphPos) {
return null;
}
@Override
public ChangedPathFilter getChangedPathFilter(int graphPos) {
return null;
}
@Override
public long getCommitCnt() {
return 0;
@ -92,6 +97,15 @@ public long getCommitCnt() {
*/
ObjectId getObjectId(int graphPos);
/**
* Get the changed path filter of the object at the commit-graph position.
*
* @param graphPos
* the position in the commit-graph of the object.
* @return the bloom filter or null if it's not found.
*/
ChangedPathFilter getChangedPathFilter(int graphPos);
/**
* Obtain the total number of commits described by this commit-graph.
*

View File

@ -10,6 +10,8 @@
package org.eclipse.jgit.internal.storage.commitgraph;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_COMMIT_DATA;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_OID_FANOUT;
@ -35,6 +37,10 @@ class CommitGraphBuilder {
private byte[] extraList;
private byte[] bloomFilterIndex;
private byte[] bloomFilterData;
/** @return A builder of {@link CommitGraph}. */
static CommitGraphBuilder builder() {
return new CommitGraphBuilder(OBJECT_ID_LENGTH);
@ -72,6 +78,20 @@ CommitGraphBuilder addExtraList(byte[] buffer)
return this;
}
CommitGraphBuilder addBloomFilterIndex(byte[] buffer)
throws CommitGraphFormatException {
assertChunkNotSeenYet(bloomFilterIndex, CHUNK_ID_BLOOM_FILTER_INDEX);
bloomFilterIndex = buffer;
return this;
}
CommitGraphBuilder addBloomFilterData(byte[] buffer)
throws CommitGraphFormatException {
assertChunkNotSeenYet(bloomFilterData, CHUNK_ID_BLOOM_FILTER_DATA);
bloomFilterData = buffer;
return this;
}
CommitGraph build() throws CommitGraphFormatException {
assertChunkNotNull(oidFanout, CHUNK_ID_OID_FANOUT);
assertChunkNotNull(oidLookup, CHUNK_ID_OID_LOOKUP);
@ -81,7 +101,9 @@ CommitGraph build() throws CommitGraphFormatException {
oidLookup);
GraphCommitData commitDataChunk = new GraphCommitData(hashLength,
commitData, extraList);
return new CommitGraphV1(index, commitDataChunk);
GraphChangedPathFilterData cpfData = new GraphChangedPathFilterData(
bloomFilterIndex, bloomFilterData);
return new CommitGraphV1(index, commitDataChunk, cpfData);
}
private void assertChunkNotNull(Object object, int chunkId)

View File

@ -10,6 +10,8 @@
package org.eclipse.jgit.internal.storage.commitgraph;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_COMMIT_DATA;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_OID_FANOUT;
@ -164,6 +166,12 @@ public static CommitGraph read(InputStream fd)
case CHUNK_ID_EXTRA_EDGE_LIST:
builder.addExtraList(buffer);
break;
case CHUNK_ID_BLOOM_FILTER_INDEX:
builder.addBloomFilterIndex(buffer);
break;
case CHUNK_ID_BLOOM_FILTER_DATA:
builder.addBloomFilterData(buffer);
break;
default:
LOG.warn(MessageFormat.format(
JGitText.get().commitGraphChunkUnknown,

View File

@ -24,9 +24,13 @@ class CommitGraphV1 implements CommitGraph {
private final GraphCommitData commitData;
CommitGraphV1(GraphObjectIndex index, GraphCommitData commitData) {
private final GraphChangedPathFilterData cpfData;
CommitGraphV1(GraphObjectIndex index, GraphCommitData commitData,
GraphChangedPathFilterData cpfData) {
this.idx = index;
this.commitData = commitData;
this.cpfData = cpfData;
}
@Override
@ -47,6 +51,11 @@ public ObjectId getObjectId(int graphPos) {
return idx.getObjectId(graphPos);
}
@Override
public ChangedPathFilter getChangedPathFilter(int graphPos) {
return cpfData.getChangedPathFilter(graphPos);
}
@Override
public long getCommitCnt() {
return idx.getCommitCnt();

View File

@ -0,0 +1,68 @@
/*
* Copyright (C) 2023, Google LLC.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.commitgraph;
import org.eclipse.jgit.util.NB;
/**
* Represents the BIDX and BDAT data found in a commit graph file.
*/
class GraphChangedPathFilterData {
private static final int BIDX_BYTES_PER_ENTRY = 4;
private static final int BDAT_HEADER_BYTES = 12;
private final byte[] bloomFilterIndex;
private final byte[] bloomFilterData;
/**
* Initialize the GraphChangedPathFilterData.
*
* @param bloomFilterIndex
* content of BIDX chunk, if it exists
* @param bloomFilterData
* content of BDAT chunk, if it exists
*/
GraphChangedPathFilterData(byte[] bloomFilterIndex,
byte[] bloomFilterData) {
if ((bloomFilterIndex == null) != (bloomFilterData == null)) {
bloomFilterIndex = null;
bloomFilterData = null;
}
if (bloomFilterData != null
&& (NB.decodeUInt32(bloomFilterData,
4) != ChangedPathFilter.PATH_HASH_COUNT
|| NB.decodeUInt32(bloomFilterData,
8) != ChangedPathFilter.BITS_PER_ENTRY)) {
bloomFilterIndex = null;
bloomFilterData = null;
}
this.bloomFilterIndex = bloomFilterIndex;
this.bloomFilterData = bloomFilterData;
}
ChangedPathFilter getChangedPathFilter(int graphPos) {
if (bloomFilterIndex == null) {
return null;
}
int priorCumul = graphPos == 0 ? 0
: NB.decodeInt32(bloomFilterIndex,
graphPos * BIDX_BYTES_PER_ENTRY - BIDX_BYTES_PER_ENTRY);
int cumul = NB.decodeInt32(bloomFilterIndex, graphPos * BIDX_BYTES_PER_ENTRY);
return ChangedPathFilter.fromFile(bloomFilterData,
priorCumul + BDAT_HEADER_BYTES,
cumul - priorCumul);
}
}