CommitGraphWriter: add option for writing/using bloom filters

Currently, bloom filters are written and used without any way to turn
them off. Add a per-repo config variable to control whether bloom
filters are written. As for reading, add a JGit option to control this.
(A JGit option is used instead of a per-repo config variable as there is
usually no reason not to use the bloom filters if they are present, but
a global control to disable them is useful if there turns out to be an
issue with the implementation of bloom filters.)

The config that controls reading is the same as C Git, but the config
for writing is not: C Git has no config to control writing, but whether
bloom filters are written depends on whether bloom filters are already
present and what arguments are passed to "git commit-graph write". See
the manpage of "git commit-graph" for more information.

Change-Id: I1b7b25340387673506252b9260b22bfe147bde58
This commit is contained in:
Ronald Bhuleskar 2023-05-17 16:29:14 -07:00 committed by Jonathan Tan
parent 77aec62141
commit 3b77e33ad8
9 changed files with 238 additions and 20 deletions

View File

@ -19,18 +19,22 @@
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.eclipse.jgit.errors.ConfigInvalidException;
import org.eclipse.jgit.internal.storage.file.FileRepository;
import org.eclipse.jgit.junit.RepositoryTestCase;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.lib.ConfigConstants;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.file.FileBasedConfig;
import org.junit.Before;
import org.junit.Test;
@ -48,6 +52,7 @@ public class CommitGraphTest extends RepositoryTestCase {
public void setUp() throws Exception {
super.setUp();
tr = new TestRepository<>(db, new RevWalk(db), mockSystemReader);
mockSystemReader.setJGitConfig(new MockConfig());
}
@Test
@ -224,7 +229,7 @@ void writeAndReadCommitGraph(Set<ObjectId> wants) throws Exception {
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
try (RevWalk walk = new RevWalk(db)) {
CommitGraphWriter writer = new CommitGraphWriter(
GraphCommits.fromWalk(m, wants, walk));
GraphCommits.fromWalk(m, wants, walk), true);
ByteArrayOutputStream os = new ByteArrayOutputStream();
writer.write(m, os);
InputStream inputStream = new ByteArrayInputStream(
@ -276,4 +281,41 @@ int getGenerationNumber(ObjectId id) {
RevCommit commit(RevCommit... parents) throws Exception {
return tr.commit(parents);
}
private static final class MockConfig extends FileBasedConfig {
private MockConfig() {
super(null, null);
}
@Override
public void load() throws IOException, ConfigInvalidException {
// Do nothing
}
@Override
public void save() throws IOException {
// Do nothing
}
@Override
public boolean isOutdated() {
return false;
}
@Override
public String toString() {
return "MockConfig";
}
@Override
public boolean getBoolean(final String section, final String name,
final boolean defaultValue) {
if (section.equals(ConfigConstants.CONFIG_COMMIT_GRAPH_SECTION)
&& name.equals(
ConfigConstants.CONFIG_KEY_READ_CHANGED_PATHS)) {
return true;
}
return defaultValue;
}
}
}

View File

@ -17,11 +17,13 @@
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.eclipse.jgit.dircache.DirCacheEntry;
import org.eclipse.jgit.errors.ConfigInvalidException;
import org.eclipse.jgit.internal.storage.file.FileRepository;
import org.eclipse.jgit.internal.storage.file.GC;
import org.eclipse.jgit.junit.RepositoryTestCase;
@ -32,6 +34,7 @@
import org.eclipse.jgit.revwalk.RevBlob;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.file.FileBasedConfig;
import org.eclipse.jgit.util.NB;
import org.junit.Before;
import org.junit.Test;
@ -53,6 +56,7 @@ public void setUp() throws Exception {
os = new ByteArrayOutputStream();
tr = new TestRepository<>(db, new RevWalk(db), mockSystemReader);
walk = new RevWalk(db);
mockSystemReader.setJGitConfig(new MockConfig());
}
@Test
@ -75,7 +79,7 @@ public void testWriterWithExtraEdgeList() throws Exception {
Set<ObjectId> wants = Collections.singleton(tip);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer = new CommitGraphWriter(graphCommits, true);
writer.write(m, os);
assertEquals(5, graphCommits.size());
@ -109,7 +113,7 @@ public void testWriterWithoutExtraEdgeList() throws Exception {
Set<ObjectId> wants = Collections.singleton(tip);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer = new CommitGraphWriter(graphCommits, true);
writer.write(m, os);
assertEquals(4, graphCommits.size());
@ -201,7 +205,7 @@ public void testChangedPathFilterRootAndNested() throws Exception {
Set<ObjectId> wants = Collections.singleton(tip);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer = new CommitGraphWriter(graphCommits, true);
writer.write(m, os);
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
@ -243,7 +247,7 @@ public void testChangedPathFilterOverlappingNested() throws Exception {
Set<ObjectId> wants = Collections.singleton(tip);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer = new CommitGraphWriter(graphCommits, true);
writer.write(m, os);
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
@ -277,7 +281,7 @@ public void testChangedPathFilterHighBit() throws Exception {
Set<ObjectId> wants = Collections.singleton(root);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer = new CommitGraphWriter(graphCommits, true);
writer.write(m, os);
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
@ -291,7 +295,7 @@ public void testChangedPathFilterEmptyChange() throws Exception {
Set<ObjectId> wants = Collections.singleton(root);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer = new CommitGraphWriter(graphCommits, true);
writer.write(m, os);
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
@ -311,7 +315,7 @@ public void testChangedPathFilterManyChanges() throws Exception {
Set<ObjectId> wants = Collections.singleton(root);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer = new CommitGraphWriter(graphCommits, true);
writer.write(m, os);
HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
@ -329,6 +333,8 @@ public void testReuseBloomFilters() throws Exception {
ConfigConstants.CONFIG_COMMIT_GRAPH, true);
db.getConfig().setBoolean(ConfigConstants.CONFIG_GC_SECTION, null,
ConfigConstants.CONFIG_KEY_WRITE_COMMIT_GRAPH, true);
db.getConfig().setBoolean(ConfigConstants.CONFIG_GC_SECTION, null,
ConfigConstants.CONFIG_KEY_WRITE_CHANGED_PATHS, true);
GC gc = new GC(db);
gc.gc().get();
@ -338,7 +344,7 @@ public void testReuseBloomFilters() throws Exception {
Set<ObjectId> wants = Collections.singleton(tip);
NullProgressMonitor m = NullProgressMonitor.INSTANCE;
GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
writer = new CommitGraphWriter(graphCommits);
writer = new CommitGraphWriter(graphCommits, true);
CommitGraphWriter.Stats stats = writer.write(m, os);
assertEquals(1, stats.getChangedPathFiltersReused());
@ -355,4 +361,41 @@ public void testReuseBloomFilters() throws Exception {
RevCommit commit(RevCommit... parents) throws Exception {
return tr.commit(parents);
}
private static final class MockConfig extends FileBasedConfig {
private MockConfig() {
super(null, null);
}
@Override
public void load() throws IOException, ConfigInvalidException {
// Do nothing
}
@Override
public void save() throws IOException {
// Do nothing
}
@Override
public boolean isOutdated() {
return false;
}
@Override
public String toString() {
return "MockConfig";
}
@Override
public boolean getBoolean(final String section, final String name,
final boolean defaultValue) {
if (section.equals(ConfigConstants.CONFIG_COMMIT_GRAPH_SECTION)
&& name.equals(
ConfigConstants.CONFIG_KEY_READ_CHANGED_PATHS)) {
return true;
}
return defaultValue;
}
}
}

View File

@ -27,6 +27,7 @@
import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.diff.DiffConfig;
import org.eclipse.jgit.errors.ConfigInvalidException;
import org.eclipse.jgit.internal.storage.file.GC;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.ConfigConstants;
@ -34,6 +35,7 @@
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.revwalk.filter.MessageRevFilter;
import org.eclipse.jgit.revwalk.filter.RevFilter;
import org.eclipse.jgit.storage.file.FileBasedConfig;
import org.eclipse.jgit.treewalk.filter.AndTreeFilter;
import org.eclipse.jgit.treewalk.filter.PathFilter;
import org.eclipse.jgit.treewalk.filter.TreeFilter;
@ -47,6 +49,7 @@ public class RevWalkCommitGraphTest extends RevWalkTestCase {
public void setUp() throws Exception {
super.setUp();
rw = new RevWalk(db);
mockSystemReader.setJGitConfig(new MockConfig());
}
@Test
@ -504,6 +507,8 @@ void enableAndWriteCommitGraph() throws Exception {
ConfigConstants.CONFIG_COMMIT_GRAPH, true);
db.getConfig().setBoolean(ConfigConstants.CONFIG_GC_SECTION, null,
ConfigConstants.CONFIG_KEY_WRITE_COMMIT_GRAPH, true);
db.getConfig().setBoolean(ConfigConstants.CONFIG_GC_SECTION, null,
ConfigConstants.CONFIG_KEY_WRITE_CHANGED_PATHS, true);
GC gc = new GC(db);
gc.gc().get();
}
@ -512,4 +517,41 @@ private void reinitializeRevWalk() {
rw.close();
rw = new RevWalk(db);
}
private static final class MockConfig extends FileBasedConfig {
private MockConfig() {
super(null, null);
}
@Override
public void load() throws IOException, ConfigInvalidException {
// Do nothing
}
@Override
public void save() throws IOException {
// Do nothing
}
@Override
public boolean isOutdated() {
return false;
}
@Override
public String toString() {
return "MockConfig";
}
@Override
public boolean getBoolean(final String section, final String name,
final boolean defaultValue) {
if (section.equals(ConfigConstants.CONFIG_COMMIT_GRAPH_SECTION)
&& name.equals(
ConfigConstants.CONFIG_KEY_READ_CHANGED_PATHS)) {
return true;
}
return defaultValue;
}
}
}

View File

@ -27,9 +27,12 @@
import java.util.ArrayList;
import java.util.List;
import org.eclipse.jgit.errors.ConfigInvalidException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.ConfigConstants;
import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.NB;
import org.eclipse.jgit.util.SystemReader;
import org.eclipse.jgit.util.io.SilentFileInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -139,6 +142,17 @@ public static CommitGraph read(InputStream fd)
chunks.add(new ChunkSegment(id, offset));
}
boolean readChangedPathFilters;
try {
readChangedPathFilters = SystemReader.getInstance()
.getJGitConfig()
.getBoolean(ConfigConstants.CONFIG_COMMIT_GRAPH_SECTION,
ConfigConstants.CONFIG_KEY_READ_CHANGED_PATHS, false);
} catch (ConfigInvalidException e) {
// Use the default value if, for some reason, the config couldn't be read.
readChangedPathFilters = false;
}
CommitGraphBuilder builder = CommitGraphBuilder.builder();
for (int i = 0; i < numberOfChunks; i++) {
long chunkOffset = chunks.get(i).offset;
@ -167,10 +181,14 @@ public static CommitGraph read(InputStream fd)
builder.addExtraList(buffer);
break;
case CHUNK_ID_BLOOM_FILTER_INDEX:
builder.addBloomFilterIndex(buffer);
if (readChangedPathFilters) {
builder.addBloomFilterIndex(buffer);
}
break;
case CHUNK_ID_BLOOM_FILTER_DATA:
builder.addBloomFilterData(buffer);
if (readChangedPathFilters) {
builder.addBloomFilterData(buffer);
}
break;
default:
LOG.warn(MessageFormat.format(

View File

@ -73,6 +73,8 @@ public class CommitGraphWriter {
private final GraphCommits graphCommits;
private final boolean generateChangedPathFilters;
/**
* Create commit-graph writer for these commits.
*
@ -80,8 +82,22 @@ public class CommitGraphWriter {
* the commits which will be writen to the commit-graph.
*/
public CommitGraphWriter(@NonNull GraphCommits graphCommits) {
this(graphCommits, false);
}
/**
* Create commit-graph writer for these commits.
*
* @param graphCommits
* the commits which will be writen to the commit-graph.
* @param generateChangedPathFilters
* whether changed path filters are generated
*/
public CommitGraphWriter(@NonNull GraphCommits graphCommits,
boolean generateChangedPathFilters) {
this.graphCommits = graphCommits;
this.hashsz = OBJECT_ID_LENGTH;
this.generateChangedPathFilters = generateChangedPathFilters;
}
/**
@ -140,11 +156,14 @@ private List<ChunkHeader> createChunks(Stats stats)
chunks.add(new ChunkHeader(CHUNK_ID_EXTRA_EDGE_LIST,
graphCommits.getExtraEdgeCnt() * 4));
}
BloomFilterChunks bloomFilterChunks = computeBloomFilterChunks(stats);
chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_INDEX,
bloomFilterChunks.index));
chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_DATA,
bloomFilterChunks.data));
if (generateChangedPathFilters) {
BloomFilterChunks bloomFilterChunks = computeBloomFilterChunks(
stats);
chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_INDEX,
bloomFilterChunks.index));
chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_DATA,
bloomFilterChunks.data));
}
return chunks;
}

View File

@ -81,6 +81,8 @@ public class DfsGarbageCollector {
private ReftableConfig reftableConfig;
private boolean convertToReftable = true;
private boolean writeCommitGraph;
private boolean writeBloomFilter;
private boolean includeDeletes;
private long reftableInitialMinUpdateIndex = 1;
private long reftableInitialMaxUpdateIndex = 1;
@ -298,6 +300,20 @@ public DfsGarbageCollector setWriteCommitGraph(boolean enable) {
return this;
}
/**
* Toggle bloom filter generation.
* <p>
* False by default.
*
* @param enable
* Whether bloom filter generation is enabled
* @return {@code this}
*/
public DfsGarbageCollector setWriteBloomFilter(boolean enable) {
writeBloomFilter = enable;
return this;
}
/**
* Create a single new pack file containing all of the live objects.
* <p>
@ -774,7 +790,8 @@ private void writeCommitGraph(DfsPackDescription pack, ProgressMonitor pm)
RevWalk pool = new RevWalk(ctx)) {
GraphCommits gcs = GraphCommits.fromWalk(pm, allTips, pool);
CountingOutputStream cnt = new CountingOutputStream(out);
CommitGraphWriter writer = new CommitGraphWriter(gcs);
CommitGraphWriter writer = new CommitGraphWriter(gcs,
writeBloomFilter);
writer.write(pm, cnt);
pack.addFileExt(COMMIT_GRAPH);
pack.setFileSize(COMMIT_GRAPH, cnt.getCount());

View File

@ -108,7 +108,8 @@ GraphSnapshot refresh() {
// commit-graph file was not modified
return this;
}
return new GraphSnapshot(file, FileSnapshot.save(file), open(file));
return new GraphSnapshot(file, FileSnapshot.save(file),
open(file));
}
private static CommitGraph open(File file) {

View File

@ -135,6 +135,8 @@ public class GC {
private static final int DEFAULT_AUTOLIMIT = 6700;
private static final boolean DEFAULT_WRITE_BLOOM_FILTER = false;
private static final boolean DEFAULT_WRITE_COMMIT_GRAPH = false;
private static volatile ExecutorService executor;
@ -959,7 +961,8 @@ void writeCommitGraph(@NonNull Set<? extends ObjectId> wants)
File tmpFile = null;
try (RevWalk walk = new RevWalk(repo)) {
CommitGraphWriter writer = new CommitGraphWriter(
GraphCommits.fromWalk(pm, wants, walk));
GraphCommits.fromWalk(pm, wants, walk),
shouldWriteBloomFilter());
tmpFile = File.createTempFile("commit_", //$NON-NLS-1$
COMMIT_GRAPH.getTmpExtension(),
repo.getObjectDatabase().getInfoDirectory());
@ -1011,7 +1014,7 @@ private void deleteTempCommitGraph() {
/**
* If {@code true}, will rewrite the commit-graph file when gc is run.
*
* @return true if commit-graph should be writen. Default is {@code false}.
* @return true if commit-graph should be written. Default is {@code false}.
*/
boolean shouldWriteCommitGraphWhenGc() {
return repo.getConfig().getBoolean(ConfigConstants.CONFIG_GC_SECTION,
@ -1019,6 +1022,17 @@ boolean shouldWriteCommitGraphWhenGc() {
DEFAULT_WRITE_COMMIT_GRAPH);
}
/**
* If {@code true}, generates bloom filter in the commit-graph file.
*
* @return true if bloom filter should be written. Default is {@code false}.
*/
boolean shouldWriteBloomFilter() {
return repo.getConfig().getBoolean(ConfigConstants.CONFIG_GC_SECTION,
ConfigConstants.CONFIG_KEY_WRITE_CHANGED_PATHS,
DEFAULT_WRITE_BLOOM_FILTER);
}
private static boolean isHead(Ref ref) {
return ref.getName().startsWith(Constants.R_HEADS);
}

View File

@ -109,6 +109,7 @@ public final class ConfigConstants {
/**
* The "fetch" section
*
* @since 3.3
*/
public static final String CONFIG_FETCH_SECTION = "fetch";
@ -947,4 +948,25 @@ public final class ConfigConstants {
* @since 5.13.2
*/
public static final String CONFIG_KEY_PRUNE_PRESERVED = "prunepreserved";
/**
* The "commitGraph" section
*
* @since 6.7
*/
public static final String CONFIG_COMMIT_GRAPH_SECTION = "commitGraph";
/**
* The "writeChangedPaths" key
*
* @since 6.7
*/
public static final String CONFIG_KEY_WRITE_CHANGED_PATHS = "writeChangedPaths";
/**
* The "readChangedPaths" key
*
* @since 6.7
*/
public static final String CONFIG_KEY_READ_CHANGED_PATHS = "readChangedPaths";
}