GC: Pack RefTrees in their own pack

The RefTree graph needs to be quickly accessed to read references.
It is also distinct graph disconnected from the rest of the
repository. Store the commit and tree objects in their own pack.

Change-Id: Icbb735be8fa91ccbf0708ca3a219b364e11a6b83
This commit is contained in:
Shawn Pearce 2016-01-12 10:50:36 -08:00
parent 398d8e877f
commit 40051505d7
4 changed files with 197 additions and 33 deletions

View File

@ -44,18 +44,18 @@
package org.eclipse.jgit.internal.storage.dfs;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC_TXN;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.lib.RefDatabase.ALL;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.eclipse.jgit.internal.JGitText;
@ -63,6 +63,7 @@
import org.eclipse.jgit.internal.storage.file.PackIndex;
import org.eclipse.jgit.internal.storage.pack.PackExt;
import org.eclipse.jgit.internal.storage.pack.PackWriter;
import org.eclipse.jgit.internal.storage.reftree.RefTreeNames;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.NullProgressMonitor;
@ -94,14 +95,11 @@ public class DfsGarbageCollector {
private long coalesceGarbageLimit = 50 << 20;
private Map<String, Ref> refsBefore;
private List<DfsPackFile> packsBefore;
private Set<ObjectId> allHeads;
private Set<ObjectId> nonHeads;
private Set<ObjectId> txnHeads;
private Set<ObjectId> tagTargets;
/**
@ -197,19 +195,22 @@ public boolean pack(ProgressMonitor pm) throws IOException {
refdb.refresh();
objdb.clearCache();
refsBefore = refdb.getRefs(ALL);
Collection<Ref> refsBefore = RefTreeNames.allRefs(refdb);
packsBefore = packsToRebuild();
if (packsBefore.isEmpty())
return true;
allHeads = new HashSet<ObjectId>();
nonHeads = new HashSet<ObjectId>();
txnHeads = new HashSet<ObjectId>();
tagTargets = new HashSet<ObjectId>();
for (Ref ref : refsBefore.values()) {
for (Ref ref : refsBefore) {
if (ref.isSymbolic() || ref.getObjectId() == null)
continue;
if (isHead(ref))
allHeads.add(ref.getObjectId());
else if (RefTreeNames.isRefTree(refdb, ref.getName()))
txnHeads.add(ref.getObjectId());
else
nonHeads.add(ref.getObjectId());
if (ref.getPeeledObjectId() != null)
@ -221,6 +222,7 @@ public boolean pack(ProgressMonitor pm) throws IOException {
try {
packHeads(pm);
packRest(pm);
packRefTreeGraph(pm);
packGarbage(pm);
objdb.commitPack(newPackDesc, toPrune());
rollback = false;
@ -276,12 +278,11 @@ private void packHeads(ProgressMonitor pm) throws IOException {
try (PackWriter pw = newPackWriter()) {
pw.setTagTargets(tagTargets);
pw.preparePack(pm, allHeads, Collections.<ObjectId> emptySet());
pw.preparePack(pm, allHeads, none());
if (0 < pw.getObjectCount())
writePack(GC, pw, pm);
}
}
private void packRest(ProgressMonitor pm) throws IOException {
if (nonHeads.isEmpty())
return;
@ -295,6 +296,23 @@ private void packRest(ProgressMonitor pm) throws IOException {
}
}
private void packRefTreeGraph(ProgressMonitor pm) throws IOException {
if (txnHeads.isEmpty())
return;
try (PackWriter pw = newPackWriter()) {
for (ObjectIdSet packedObjs : newPackObj)
pw.excludeObjects(packedObjs);
pw.preparePack(pm, txnHeads, none());
if (0 < pw.getObjectCount())
writePack(GC_TXN, pw, pm);
}
}
private static Set<ObjectId> none() {
return Collections.<ObjectId> emptySet();
}
private void packGarbage(ProgressMonitor pm) throws IOException {
// TODO(sop) This is ugly. The garbage pack needs to be deleted.
PackConfig cfg = new PackConfig(packConfig);

View File

@ -90,6 +90,13 @@ public static enum PackSource {
*/
GC(1),
/**
* RefTreeGraph pack was created by Git garbage collection.
*
* @see DfsGarbageCollector
*/
GC_TXN(1),
/**
* The pack was created by compacting multiple packs together.
* <p>

View File

@ -45,7 +45,6 @@
import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
import static org.eclipse.jgit.lib.RefDatabase.ALL;
import java.io.File;
import java.io.FileOutputStream;
@ -63,11 +62,9 @@
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
@ -80,6 +77,7 @@
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.pack.PackExt;
import org.eclipse.jgit.internal.storage.pack.PackWriter;
import org.eclipse.jgit.internal.storage.reftree.RefTreeNames;
import org.eclipse.jgit.lib.ConfigConstants;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
@ -128,7 +126,7 @@ public class GC {
* difference between the current refs and the refs which existed during
* last {@link #repack()}.
*/
private Map<String, Ref> lastPackedRefs;
private Collection<Ref> lastPackedRefs;
/**
* Holds the starting time of the last repack() execution. This is needed in
@ -362,17 +360,20 @@ public void prune(Set<ObjectId> objectsToKeep) throws IOException,
// during last repack(). Only those refs will survive which have been
// added or modified since the last repack. Only these can save existing
// loose refs from being pruned.
Map<String, Ref> newRefs;
Collection<Ref> newRefs;
if (lastPackedRefs == null || lastPackedRefs.isEmpty())
newRefs = getAllRefs();
else {
newRefs = new HashMap<String, Ref>();
for (Iterator<Map.Entry<String, Ref>> i = getAllRefs().entrySet()
.iterator(); i.hasNext();) {
Entry<String, Ref> newEntry = i.next();
Ref old = lastPackedRefs.get(newEntry.getKey());
if (!equals(newEntry.getValue(), old))
newRefs.put(newEntry.getKey(), newEntry.getValue());
Map<String, Ref> last = new HashMap<>();
for (Ref r : lastPackedRefs) {
last.put(r.getName(), r);
}
newRefs = new ArrayList<>();
for (Ref r : getAllRefs()) {
Ref old = last.get(r.getName());
if (!equals(r, old)) {
newRefs.add(r);
}
}
}
@ -384,10 +385,10 @@ public void prune(Set<ObjectId> objectsToKeep) throws IOException,
// leave this method.
ObjectWalk w = new ObjectWalk(repo);
try {
for (Ref cr : newRefs.values())
for (Ref cr : newRefs)
w.markStart(w.parseAny(cr.getObjectId()));
if (lastPackedRefs != null)
for (Ref lpr : lastPackedRefs.values())
for (Ref lpr : lastPackedRefs)
w.markUninteresting(w.parseAny(lpr.getObjectId()));
removeReferenced(deletionCandidates, w);
} finally {
@ -405,11 +406,11 @@ public void prune(Set<ObjectId> objectsToKeep) throws IOException,
// additional reflog entries not handled during last repack()
ObjectWalk w = new ObjectWalk(repo);
try {
for (Ref ar : getAllRefs().values())
for (Ref ar : getAllRefs())
for (ObjectId id : listRefLogObjects(ar, lastRepackTime))
w.markStart(w.parseAny(id));
if (lastPackedRefs != null)
for (Ref lpr : lastPackedRefs.values())
for (Ref lpr : lastPackedRefs)
w.markUninteresting(w.parseAny(lpr.getObjectId()));
removeReferenced(deletionCandidates, w);
} finally {
@ -530,19 +531,23 @@ public Collection<PackFile> repack() throws IOException {
Collection<PackFile> toBeDeleted = repo.getObjectDatabase().getPacks();
long time = System.currentTimeMillis();
Map<String, Ref> refsBefore = getAllRefs();
Collection<Ref> refsBefore = getAllRefs();
Set<ObjectId> allHeads = new HashSet<ObjectId>();
Set<ObjectId> nonHeads = new HashSet<ObjectId>();
Set<ObjectId> txnHeads = new HashSet<ObjectId>();
Set<ObjectId> tagTargets = new HashSet<ObjectId>();
Set<ObjectId> indexObjects = listNonHEADIndexObjects();
RefDatabase refdb = repo.getRefDatabase();
for (Ref ref : refsBefore.values()) {
for (Ref ref : refsBefore) {
nonHeads.addAll(listRefLogObjects(ref, 0));
if (ref.isSymbolic() || ref.getObjectId() == null)
continue;
if (ref.getName().startsWith(Constants.R_HEADS))
allHeads.add(ref.getObjectId());
else if (RefTreeNames.isRefTree(refdb, ref.getName()))
txnHeads.add(ref.getObjectId());
else
nonHeads.add(ref.getObjectId());
if (ref.getPeeledObjectId() != null)
@ -572,6 +577,11 @@ public Collection<PackFile> repack() throws IOException {
if (rest != null)
ret.add(rest);
}
if (!txnHeads.isEmpty()) {
PackFile txn = writePack(txnHeads, null, null, excluded);
if (txn != null)
ret.add(txn);
}
try {
deleteOldPacks(toBeDeleted, ret);
} catch (ParseException e) {
@ -624,11 +634,16 @@ private Set<ObjectId> listRefLogObjects(Ref ref, long minTime) throws IOExceptio
* @return a map where names of refs point to ref objects
* @throws IOException
*/
private Map<String, Ref> getAllRefs() throws IOException {
Map<String, Ref> ret = repo.getRefDatabase().getRefs(ALL);
for (Ref ref : repo.getRefDatabase().getAdditionalRefs())
ret.put(ref.getName(), ref);
return ret;
private Collection<Ref> getAllRefs() throws IOException {
Collection<Ref> refs = RefTreeNames.allRefs(repo.getRefDatabase());
List<Ref> addl = repo.getRefDatabase().getAdditionalRefs();
if (!addl.isEmpty()) {
List<Ref> all = new ArrayList<>(refs.size() + addl.size());
all.addAll(refs);
all.addAll(addl);
return all;
}
return refs;
}
/**

View File

@ -0,0 +1,124 @@
/*
* Copyright (C) 2016, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.internal.storage.reftree;
import static org.eclipse.jgit.lib.RefDatabase.ALL;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.RefDatabase;
/** Magic reference name logic for RefTrees. */
public class RefTreeNames {
/**
* Suffix used on a {@link RefTreeDatabase#getTxnNamespace()} for user data.
* <p>
* A {@link RefTreeDatabase}'s namespace may include a subspace (e.g.
* {@code "refs/txn/stage/"}) containing commit objects from the usual user
* portion of the repository (e.g. {@code "refs/heads/"}). These should be
* packed by the garbage collector alongside other user content rather than
* with the RefTree.
*/
private static final String STAGE = "stage/"; //$NON-NLS-1$
/**
* Determine if the reference is likely to be a RefTree.
*
* @param refdb
* database instance.
* @param ref
* reference name.
* @return {@code true} if the reference is a RefTree.
*/
public static boolean isRefTree(RefDatabase refdb, String ref) {
if (refdb instanceof RefTreeDatabase) {
RefTreeDatabase b = (RefTreeDatabase) refdb;
if (ref.equals(b.getTxnCommitted())) {
return true;
}
String namespace = b.getTxnNamespace();
if (namespace != null
&& ref.startsWith(namespace)
&& !ref.startsWith(namespace + STAGE)) {
return true;
}
}
return false;
}
/**
* Snapshot all references from a RefTreeDatabase and its bootstrap.
* <p>
* There may be name conflicts with multiple {@link Ref} objects containing
* the same name in the returned collection.
*
* @param refdb
* database instance.
* @return all known references.
* @throws IOException
* references cannot be enumerated.
*/
public static Collection<Ref> allRefs(RefDatabase refdb)
throws IOException {
Collection<Ref> refs = refdb.getRefs(ALL).values();
if (!(refdb instanceof RefTreeDatabase)) {
return refs;
}
RefDatabase bootstrap = ((RefTreeDatabase) refdb).getBootstrap();
Collection<Ref> br = bootstrap.getRefs(ALL).values();
List<Ref> all = new ArrayList<>(refs.size() + br.size());
all.addAll(refs);
all.addAll(br);
return all;
}
private RefTreeNames() {
}
}