jgit: Add DfsBundleWriter

DfsBundleWriter writes out the entire repository to a Git bundle file.
It packs all objects included in the packfile by concatenating all pack
files. This makes the bundle creation fast and cheap. Useful for backing
up a repository as-is.

Change-Id: Iee20e4b1ab45b2a178dde8c72093c0dd83f04805
Signed-off-by: Masaya Suzuki <masayasuzuki@google.com>
This commit is contained in:
Masaya Suzuki 2020-01-23 16:47:40 -08:00
parent 957419610a
commit 9d2055152c
4 changed files with 189 additions and 9 deletions

View File

@ -0,0 +1,85 @@
/*
* Copyright (c) 2020, Google LLC and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.dfs;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Collections;
import java.util.Set;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.transport.FetchResult;
import org.eclipse.jgit.transport.RefSpec;
import org.eclipse.jgit.transport.TransportBundleStream;
import org.eclipse.jgit.transport.URIish;
import org.junit.Before;
import org.junit.Test;
public class DfsBundleWriterTest {
private TestRepository<InMemoryRepository> git;
private InMemoryRepository repo;
@Before
public void setUp() throws IOException {
DfsRepositoryDescription desc = new DfsRepositoryDescription("test");
git = new TestRepository<>(new InMemoryRepository(desc));
repo = git.getRepository();
}
@Test
public void testRepo() throws Exception {
RevCommit commit0 = git.commit().message("0").create();
RevCommit commit1 = git.commit().message("1").parent(commit0).create();
git.update("master", commit1);
RevCommit commit2 = git.commit().message("0").create();
byte[] bundle = makeBundle();
try (Repository newRepo = new InMemoryRepository(
new DfsRepositoryDescription("copy"))) {
fetchFromBundle(newRepo, bundle);
Ref ref = newRepo.exactRef("refs/heads/master");
assertNotNull(ref);
assertEquals(commit1.toObjectId(), ref.getObjectId());
// Unreferenced objects are included as well.
assertTrue(newRepo.getObjectDatabase().has(commit2));
}
}
private byte[] makeBundle() throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
DfsBundleWriter.writeEntireRepositoryAsBundle(
NullProgressMonitor.INSTANCE, out, repo);
return out.toByteArray();
}
private static FetchResult fetchFromBundle(Repository newRepo,
byte[] bundle) throws Exception {
URIish uri = new URIish("in-memory://");
ByteArrayInputStream in = new ByteArrayInputStream(bundle);
RefSpec rs = new RefSpec("refs/heads/*:refs/heads/*");
Set<RefSpec> refs = Collections.singleton(rs);
try (TransportBundleStream transport = new TransportBundleStream(
newRepo, uri, in)) {
return transport.fetch(NullProgressMonitor.INSTANCE, refs);
}
}
}

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2020, Google LLC and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.dfs;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.eclipse.jgit.internal.storage.pack.CachedPack;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.transport.BundleWriter;
/** Writes {@link DfsRepository} to a Git bundle. */
public class DfsBundleWriter {
/**
* Writes the entire {@link DfsRepository} to a Git bundle.
* <p>
* This method try to avoid traversing the pack files as much as possible
* and dumps all objects as-is to a Git bundle.
*
* @param pm
* progress monitor
* @param os
* Git bundle output
* @param db
* repository
* @throws IOException
* thrown if the output stream throws one.
*/
public static void writeEntireRepositoryAsBundle(ProgressMonitor pm,
OutputStream os, DfsRepository db) throws IOException {
BundleWriter bw = new BundleWriter(db);
db.getRefDatabase().getRefs().forEach(bw::include);
List<CachedPack> packs = new ArrayList<>();
for (DfsPackFile p : db.getObjectDatabase().getPacks()) {
packs.add(new DfsCachedPack(p));
}
bw.addObjectsAsIs(packs);
bw.writeBundle(pm, os);
}
private DfsBundleWriter() {
}
}

View File

@ -754,6 +754,19 @@ public void preparePack(@NonNull Iterator<RevObject> objectsSource)
}
}
/**
* Prepare the list of objects to be written to the pack stream.
*
* <p>
* PackWriter will concat and write out the specified packs as-is.
*
* @param c
* cached packs to be written.
*/
public void preparePack(Collection<? extends CachedPack> c) {
cachedPacks.addAll(c);
}
/**
* Prepare the list of objects to be written to the pack stream.
* <p>

View File

@ -17,12 +17,16 @@
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.pack.CachedPack;
import org.eclipse.jgit.internal.storage.pack.PackWriter;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
@ -62,6 +66,8 @@ public class BundleWriter {
private final Set<ObjectId> tagTargets;
private final List<CachedPack> cachedPacks = new ArrayList<>();
private PackConfig packConfig;
private ObjectCountCallback callback;
@ -149,6 +155,25 @@ else if (r.getObjectId() != null
tagTargets.add(r.getObjectId());
}
/**
* Add objects to the bundle file.
*
* <p>
* When this method is used, object traversal is disabled and specified pack
* files are directly saved to the Git bundle file.
*
* <p>
* Unlike {@link #include}, this doesn't affect the refs. Even if the
* objects are not reachable from any ref, they will be included in the
* bundle file.
*
* @param c
* pack to include
*/
public void addObjectsAsIs(Collection<? extends CachedPack> c) {
cachedPacks.addAll(c);
}
/**
* Assume a commit is available on the recipient's side.
* <p>
@ -187,19 +212,24 @@ public void writeBundle(ProgressMonitor monitor, OutputStream os)
try (PackWriter packWriter = newPackWriter()) {
packWriter.setObjectCountCallback(callback);
final HashSet<ObjectId> inc = new HashSet<>();
final HashSet<ObjectId> exc = new HashSet<>();
inc.addAll(include.values());
for (RevCommit r : assume)
exc.add(r.getId());
packWriter.setIndexDisabled(true);
packWriter.setDeltaBaseAsOffset(true);
packWriter.setThin(!exc.isEmpty());
packWriter.setReuseValidatingObjects(false);
if (exc.isEmpty()) {
packWriter.setTagTargets(tagTargets);
if (cachedPacks.isEmpty()) {
HashSet<ObjectId> inc = new HashSet<>();
HashSet<ObjectId> exc = new HashSet<>();
inc.addAll(include.values());
for (RevCommit r : assume) {
exc.add(r.getId());
}
if (exc.isEmpty()) {
packWriter.setTagTargets(tagTargets);
}
packWriter.setThin(!exc.isEmpty());
packWriter.preparePack(monitor, inc, exc);
} else {
packWriter.preparePack(cachedPacks);
}
packWriter.preparePack(monitor, inc, exc);
final Writer w = new OutputStreamWriter(os, UTF_8);
w.write(TransportBundle.V2_BUNDLE_SIGNATURE);