ObjectChecker: allow some objects to skip errors

Some ancient objects may be broken, but in a relatively harmless way.
Allow the ObjectChecker caller to whitelist specific objects that are
going to fail checks, but that have been reviewed by a human and decided
the objects are OK enough to permit continued use of.

This avoids needing to rewrite history to scrub the broken objects out.

Honor the git-core fsck.skipList configuration setting when receiving a
push or fetching from a remote repository.

Change-Id: I62bd7c0b0848981f73dd7c752860fd02794233a6
This commit is contained in:
Shawn Pearce 2015-12-29 15:52:16 -08:00
parent e3acf01748
commit fa7ce0e0f3
7 changed files with 765 additions and 930 deletions

View File

@ -822,7 +822,7 @@ public void fsck(RevObject... tips) throws MissingObjectException,
break;
final byte[] bin = db.open(o, o.getType()).getCachedBytes();
oc.checkCommit(bin);
oc.checkCommit(o, bin);
assertHash(o, bin);
}
@ -832,7 +832,7 @@ public void fsck(RevObject... tips) throws MissingObjectException,
break;
final byte[] bin = db.open(o, o.getType()).getCachedBytes();
oc.check(o.getType(), bin);
oc.check(o, o.getType(), bin);
assertHash(o, bin);
}
}

View File

@ -0,0 +1,106 @@
/*
* Copyright (C) 2015, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.internal.storage.file;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.MutableObjectId;
import org.eclipse.jgit.lib.ObjectIdOwnerMap;
import org.eclipse.jgit.lib.ObjectIdSet;
/** Lazily loads a set of ObjectIds, one per line. */
public class LazyObjectIdSetFile implements ObjectIdSet {
private final File src;
private ObjectIdOwnerMap<Entry> set;
/**
* Create a new lazy set from a file.
*
* @param src
* the source file.
*/
public LazyObjectIdSetFile(File src) {
this.src = src;
}
@Override
public boolean contains(AnyObjectId objectId) {
if (set == null) {
set = load();
}
return set.contains(objectId);
}
private ObjectIdOwnerMap<Entry> load() {
ObjectIdOwnerMap<Entry> r = new ObjectIdOwnerMap<>();
try (FileInputStream fin = new FileInputStream(src);
Reader rin = new InputStreamReader(fin, UTF_8);
BufferedReader br = new BufferedReader(rin)) {
MutableObjectId id = new MutableObjectId();
for (String line; (line = br.readLine()) != null;) {
id.fromString(line);
if (!r.contains(id)) {
r.add(new Entry(id));
}
}
} catch (IOException e) {
// Ignore IO errors accessing the lazy set.
}
return r;
}
static class Entry extends ObjectIdOwnerMap.Entry {
Entry(AnyObjectId id) {
super(id);
}
}
}

View File

@ -44,6 +44,10 @@
package org.eclipse.jgit.lib;
import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
import static org.eclipse.jgit.lib.Constants.OBJ_TAG;
import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
import static org.eclipse.jgit.util.RawParseUtils.match;
import static org.eclipse.jgit.util.RawParseUtils.nextLF;
import static org.eclipse.jgit.util.RawParseUtils.parseBase10;
@ -54,6 +58,7 @@
import java.util.Locale;
import java.util.Set;
import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.util.MutableInteger;
@ -99,15 +104,28 @@ public class ObjectChecker {
public static final byte[] tagger = Constants.encodeASCII("tagger "); //$NON-NLS-1$
private final MutableObjectId tempId = new MutableObjectId();
private final MutableInteger ptrout = new MutableInteger();
private ObjectIdSet skipList;
private boolean allowZeroMode;
private boolean allowInvalidPersonIdent;
private boolean windows;
private boolean macosx;
/**
* Enable accepting specific malformed (but not horribly broken) objects.
*
* @param objects
* collection of object names known to be broken in a non-fatal
* way that should be ignored by the checker.
* @return {@code this}
* @since 4.2
*/
public ObjectChecker setSkipList(@Nullable ObjectIdSet objects) {
skipList = objects;
return this;
}
/**
* Enable accepting leading zero mode in tree entries.
* <p>
@ -183,19 +201,40 @@ public ObjectChecker setSafeForMacOS(boolean mac) {
* @throws CorruptObjectException
* if an error is identified.
*/
public void check(final int objType, final byte[] raw)
public void check(int objType, byte[] raw)
throws CorruptObjectException {
check(idFor(objType, raw), objType, raw);
}
/**
* Check an object for parsing errors.
*
* @param id
* identify of the object being checked.
* @param objType
* type of the object. Must be a valid object type code in
* {@link Constants}.
* @param raw
* the raw data which comprises the object. This should be in the
* canonical format (that is the format used to generate the
* ObjectId of the object). The array is never modified.
* @throws CorruptObjectException
* if an error is identified.
* @since 4.2
*/
public void check(@Nullable AnyObjectId id, int objType, byte[] raw)
throws CorruptObjectException {
switch (objType) {
case Constants.OBJ_COMMIT:
checkCommit(raw);
case OBJ_COMMIT:
checkCommit(id, raw);
break;
case Constants.OBJ_TAG:
checkTag(raw);
case OBJ_TAG:
checkTag(id, raw);
break;
case Constants.OBJ_TREE:
checkTree(raw);
case OBJ_TREE:
checkTree(id, raw);
break;
case Constants.OBJ_BLOB:
case OBJ_BLOB:
checkBlob(raw);
break;
default:
@ -214,9 +253,9 @@ private int id(final byte[] raw, final int ptr) {
}
}
private int personIdent(final byte[] raw, int ptr) {
if (allowInvalidPersonIdent)
return nextLF(raw, ptr) - 1;
private int personIdent(byte[] raw, int ptr, @Nullable AnyObjectId id) {
if (allowInvalidPersonIdent || skip(id))
return nextLF(raw, ptr);
final int emailB = nextLF(raw, ptr, '<');
if (emailB == ptr || raw[emailB - 1] != '<')
@ -238,7 +277,11 @@ private int personIdent(final byte[] raw, int ptr) {
parseBase10(raw, ptr + 1, ptrout); // tz offset
if (ptr + 1 == ptrout.value)
return -1;
return ptrout.value;
ptr = ptrout.value;
if (raw[ptr++] == '\n')
return ptr;
return -1;
}
/**
@ -249,7 +292,23 @@ private int personIdent(final byte[] raw, int ptr) {
* @throws CorruptObjectException
* if any error was detected.
*/
public void checkCommit(final byte[] raw) throws CorruptObjectException {
public void checkCommit(byte[] raw) throws CorruptObjectException {
checkCommit(idFor(OBJ_COMMIT, raw), raw);
}
/**
* Check a commit for errors.
*
* @param id
* identity of the object being checked.
* @param raw
* the commit data. The array is never modified.
* @throws CorruptObjectException
* if any error was detected.
* @since 4.2
*/
public void checkCommit(@Nullable AnyObjectId id, byte[] raw)
throws CorruptObjectException {
int ptr = 0;
if ((ptr = match(raw, ptr, tree)) < 0)
@ -266,19 +325,27 @@ public void checkCommit(final byte[] raw) throws CorruptObjectException {
JGitText.get().corruptObjectInvalidParent);
}
if ((ptr = match(raw, ptr, author)) < 0)
int p = match(raw, ptr, author);
if (p > ptr) {
if ((ptr = personIdent(raw, p, id)) < 0) {
throw new CorruptObjectException(
JGitText.get().corruptObjectInvalidAuthor);
}
} else if (!skip(id)) {
throw new CorruptObjectException(
JGitText.get().corruptObjectNoAuthor);
if ((ptr = personIdent(raw, ptr)) < 0 || raw[ptr++] != '\n')
throw new CorruptObjectException(
JGitText.get().corruptObjectInvalidAuthor);
}
if ((ptr = match(raw, ptr, committer)) < 0)
p = match(raw, ptr, committer);
if (p > ptr) {
if ((ptr = personIdent(raw, p, id)) < 0) {
throw new CorruptObjectException(
JGitText.get().corruptObjectInvalidCommitter);
}
} else if (!skip(id)) {
throw new CorruptObjectException(
JGitText.get().corruptObjectNoCommitter);
if ((ptr = personIdent(raw, ptr)) < 0 || raw[ptr++] != '\n')
throw new CorruptObjectException(
JGitText.get().corruptObjectInvalidCommitter);
}
}
/**
@ -289,7 +356,23 @@ public void checkCommit(final byte[] raw) throws CorruptObjectException {
* @throws CorruptObjectException
* if any error was detected.
*/
public void checkTag(final byte[] raw) throws CorruptObjectException {
public void checkTag(byte[] raw) throws CorruptObjectException {
checkTag(idFor(OBJ_TAG, raw), raw);
}
/**
* Check an annotated tag for errors.
*
* @param id
* identity of the object being checked.
* @param raw
* the tag data. The array is never modified.
* @throws CorruptObjectException
* if any error was detected.
* @since 4.2
*/
public void checkTag(@Nullable AnyObjectId id, byte[] raw)
throws CorruptObjectException {
int ptr = 0;
if ((ptr = match(raw, ptr, object)) < 0)
@ -304,15 +387,16 @@ public void checkTag(final byte[] raw) throws CorruptObjectException {
JGitText.get().corruptObjectNoTypeHeader);
ptr = nextLF(raw, ptr);
if ((ptr = match(raw, ptr, tag)) < 0)
if (match(raw, ptr, tag) < 0 && !skip(id))
throw new CorruptObjectException(
JGitText.get().corruptObjectNoTagHeader);
ptr = nextLF(raw, ptr);
if ((ptr = match(raw, ptr, tagger)) > 0) {
if ((ptr = personIdent(raw, ptr)) < 0 || raw[ptr++] != '\n')
if ((ptr = personIdent(raw, ptr, id)) < 0) {
throw new CorruptObjectException(
JGitText.get().corruptObjectInvalidTagger);
}
}
}
@ -381,11 +465,28 @@ else if (cmp == 0)
* @throws CorruptObjectException
* if any error was detected.
*/
public void checkTree(final byte[] raw) throws CorruptObjectException {
public void checkTree(byte[] raw) throws CorruptObjectException {
checkTree(idFor(OBJ_TREE, raw), raw);
}
/**
* Check a canonical formatted tree for errors.
*
* @param id
* identity of the object being checked.
* @param raw
* the raw tree data. The array is never modified.
* @throws CorruptObjectException
* if any error was detected.
* @since 4.2
*/
public void checkTree(@Nullable AnyObjectId id, byte[] raw)
throws CorruptObjectException {
final int sz = raw.length;
int ptr = 0;
int lastNameB = 0, lastNameE = 0, lastMode = 0;
Set<String> normalized = windows || macosx
boolean skip = skip(id);
Set<String> normalized = !skip && (windows || macosx)
? new HashSet<String>()
: null;
@ -401,7 +502,7 @@ public void checkTree(final byte[] raw) throws CorruptObjectException {
if (c < '0' || c > '7')
throw new CorruptObjectException(
JGitText.get().corruptObjectInvalidModeChar);
if (thisMode == 0 && c == '0' && !allowZeroMode)
if (thisMode == 0 && c == '0' && !allowZeroMode && !skip)
throw new CorruptObjectException(
JGitText.get().corruptObjectInvalidModeStartsZero);
thisMode <<= 3;
@ -418,16 +519,16 @@ public void checkTree(final byte[] raw) throws CorruptObjectException {
if (ptr == sz || raw[ptr] != 0)
throw new CorruptObjectException(
JGitText.get().corruptObjectTruncatedInName);
checkPathSegment2(raw, thisNameB, ptr);
checkPathSegment2(raw, thisNameB, ptr, skip);
if (normalized != null) {
if (!normalized.add(normalize(raw, thisNameB, ptr)))
throw new CorruptObjectException(
JGitText.get().corruptObjectDuplicateEntryNames);
} else if (duplicateName(raw, thisNameB, ptr))
} else if (!skip && duplicateName(raw, thisNameB, ptr))
throw new CorruptObjectException(
JGitText.get().corruptObjectDuplicateEntryNames);
if (lastNameB != 0) {
if (!skip && lastNameB != 0) {
final int cmp = pathCompare(raw, lastNameB, lastNameE,
lastMode, thisNameB, ptr, thisMode);
if (cmp > 0)
@ -468,6 +569,19 @@ private int scanPathSegment(byte[] raw, int ptr, int end)
return ptr;
}
@SuppressWarnings("resource")
@Nullable
private ObjectId idFor(int objType, byte[] raw) {
if (skipList != null) {
return new ObjectInserter.Formatter().idFor(objType, raw);
}
return null;
}
private boolean skip(@Nullable AnyObjectId id) {
return skipList != null && id != null && skipList.contains(id);
}
/**
* Check tree path entry for validity.
* <p>
@ -522,10 +636,10 @@ public void checkPathSegment(byte[] raw, int ptr, int end)
if (e < end && raw[e] == 0)
throw new CorruptObjectException(
JGitText.get().corruptObjectNameContainsNullByte);
checkPathSegment2(raw, ptr, end);
checkPathSegment2(raw, ptr, end, false);
}
private void checkPathSegment2(byte[] raw, int ptr, int end)
private void checkPathSegment2(byte[] raw, int ptr, int end, boolean skip)
throws CorruptObjectException {
if (ptr == end)
throw new CorruptObjectException(
@ -541,36 +655,38 @@ private void checkPathSegment2(byte[] raw, int ptr, int end)
JGitText.get().corruptObjectNameDotDot);
break;
case 4:
if (isGit(raw, ptr + 1))
if (!skip && isGit(raw, ptr + 1))
throw new CorruptObjectException(String.format(
JGitText.get().corruptObjectInvalidName,
RawParseUtils.decode(raw, ptr, end)));
break;
default:
if (end - ptr > 4 && isNormalizedGit(raw, ptr + 1, end))
if (!skip && end - ptr > 4
&& isNormalizedGit(raw, ptr + 1, end))
throw new CorruptObjectException(String.format(
JGitText.get().corruptObjectInvalidName,
RawParseUtils.decode(raw, ptr, end)));
}
} else if (isGitTilde1(raw, ptr, end)) {
} else if (!skip && isGitTilde1(raw, ptr, end)) {
throw new CorruptObjectException(String.format(
JGitText.get().corruptObjectInvalidName,
RawParseUtils.decode(raw, ptr, end)));
}
if (macosx && isMacHFSGit(raw, ptr, end))
throw new CorruptObjectException(String.format(
JGitText.get().corruptObjectInvalidNameIgnorableUnicode,
RawParseUtils.decode(raw, ptr, end)));
if (windows) {
// Windows ignores space and dot at end of file name.
if (raw[end - 1] == ' ' || raw[end - 1] == '.')
if (!skip) {
if (macosx && isMacHFSGit(raw, ptr, end))
throw new CorruptObjectException(String.format(
JGitText.get().corruptObjectInvalidNameEnd,
Character.valueOf(((char) raw[end - 1]))));
if (end - ptr >= 3)
checkNotWindowsDevice(raw, ptr, end);
JGitText.get().corruptObjectInvalidNameIgnorableUnicode,
RawParseUtils.decode(raw, ptr, end)));
if (windows) {
// Windows ignores space and dot at end of file name.
if (raw[end - 1] == ' ' || raw[end - 1] == '.')
throw new CorruptObjectException(String.format(
JGitText.get().corruptObjectInvalidNameEnd,
Character.valueOf(((char) raw[end - 1]))));
if (end - ptr >= 3)
checkNotWindowsDevice(raw, ptr, end);
}
}
}

View File

@ -1049,7 +1049,7 @@ private void verifySafeObject(final AnyObjectId id, final int type,
final byte[] data) throws IOException {
if (objCheck != null) {
try {
objCheck.check(type, data);
objCheck.check(id, type, data);
} catch (CorruptObjectException e) {
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().invalidObject,

View File

@ -43,13 +43,16 @@
package org.eclipse.jgit.transport;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.internal.storage.file.LazyObjectIdSetFile;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.lib.Config.SectionParser;
import org.eclipse.jgit.lib.ObjectChecker;
import org.eclipse.jgit.lib.ObjectIdSet;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.util.SystemReader;
@ -68,6 +71,7 @@ public TransferConfig parse(final Config cfg) {
private final boolean fetchFsck;
private final boolean receiveFsck;
private final String fsckSkipList;
private final boolean allowLeadingZeroFileMode;
private final boolean allowInvalidPersonIdent;
private final boolean safeForWindows;
@ -84,6 +88,7 @@ public TransferConfig parse(final Config cfg) {
boolean fsck = rc.getBoolean("transfer", "fsckobjects", false); //$NON-NLS-1$ //$NON-NLS-2$
fetchFsck = rc.getBoolean("fetch", "fsckobjects", fsck); //$NON-NLS-1$ //$NON-NLS-2$
receiveFsck = rc.getBoolean("receive", "fsckobjects", fsck); //$NON-NLS-1$ //$NON-NLS-2$
fsckSkipList = rc.getString("fsck", null, "skipList"); //$NON-NLS-1$ //$NON-NLS-2$
allowLeadingZeroFileMode = rc.getBoolean("fsck", "allowLeadingZeroFileMode", false); //$NON-NLS-1$ //$NON-NLS-2$
allowInvalidPersonIdent = rc.getBoolean("fsck", "allowInvalidPersonIdent", false); //$NON-NLS-1$ //$NON-NLS-2$
safeForWindows = rc.getBoolean("fsck", "safeForWindows", //$NON-NLS-1$ //$NON-NLS-2$
@ -126,7 +131,15 @@ private ObjectChecker newObjectChecker(boolean check) {
.setAllowLeadingZeroFileMode(allowLeadingZeroFileMode)
.setAllowInvalidPersonIdent(allowInvalidPersonIdent)
.setSafeForWindows(safeForWindows)
.setSafeForMacOS(safeForMacOS);
.setSafeForMacOS(safeForMacOS)
.setSkipList(skipList());
}
private ObjectIdSet skipList() {
if (fsckSkipList != null && !fsckSkipList.isEmpty()) {
return new LazyObjectIdSetFile(new File(fsckSkipList));
}
return null;
}
/**

View File

@ -637,10 +637,11 @@ private void verifyAndInsertLooseObject(final AnyObjectId id,
final byte[] raw = uol.getCachedBytes();
if (objCheck != null) {
try {
objCheck.check(type, raw);
objCheck.check(id, type, raw);
} catch (CorruptObjectException e) {
throw new TransportException(MessageFormat.format(JGitText.get().transportExceptionInvalid
, Constants.typeString(type), id.name(), e.getMessage()));
throw new TransportException(MessageFormat.format(
JGitText.get().transportExceptionInvalid,
Constants.typeString(type), id.name(), e.getMessage()));
}
}