Defer object collision check until pack stream is done

Object collision check requires read from local storage which may be
slow. We already delay this check for blobs, this change will also delay
other objects until the pack stream is closed. In this way, there is no
readCurs call until the pack stream is closed.

Change-Id: I3c8c4720dd19a5f64f8c7ddf07d815ed6877b6aa
Signed-off-by: Zhen Chen <czhen@google.com>
This commit is contained in:
Zhen Chen 2017-06-08 16:55:26 -07:00
parent 0d20573d9c
commit 9a3e037726
1 changed files with 69 additions and 58 deletions

View File

@ -173,8 +173,8 @@ public static enum Source {
private LongMap<UnresolvedDelta> baseByPos;
/** Blobs whose contents need to be double-checked after indexing. */
private BlockList<PackedObjectInfo> deferredCheckBlobs;
/** Objects need to be double-checked for collision after indexing. */
private BlockList<PackedObjectInfo> collisionCheckObjs;
private MessageDigest packDigest;
@ -528,7 +528,7 @@ public PackLock parse(ProgressMonitor receiving, ProgressMonitor resolving)
entries = new PackedObjectInfo[(int) objectCount];
baseById = new ObjectIdOwnerMap<>();
baseByPos = new LongMap<>();
deferredCheckBlobs = new BlockList<>();
collisionCheckObjs = new BlockList<>();
receiving.beginTask(JGitText.get().receivingObjects,
(int) objectCount);
@ -545,8 +545,10 @@ public PackLock parse(ProgressMonitor receiving, ProgressMonitor resolving)
receiving.endTask();
}
if (!deferredCheckBlobs.isEmpty())
doDeferredCheckBlobs();
if (!collisionCheckObjs.isEmpty()) {
checkObjectCollision();
}
if (deltaCount > 0) {
if (resolving instanceof BatchingProgressMonitor) {
((BatchingProgressMonitor) resolving).setDelayStart(
@ -675,6 +677,9 @@ private void resolveDeltas(DeltaVisit visit, final int type,
objectDigest.digest(tempObjectId);
verifySafeObject(tempObjectId, type, visit.data);
if (isCheckObjectCollisions() && readCurs.has(tempObjectId)) {
checkObjectCollision(tempObjectId, type, visit.data);
}
PackedObjectInfo oe;
oe = newInfo(tempObjectId, visit.delta, visit.parent.id);
@ -1031,7 +1036,6 @@ private void whole(final long pos, final int type, final long sz)
objectDigest.update((byte) 0);
final byte[] data;
boolean checkContentLater = false;
if (type == Constants.OBJ_BLOB) {
byte[] readBuffer = buffer();
InputStream inf = inflate(Source.INPUT, sz);
@ -1045,10 +1049,7 @@ private void whole(final long pos, final int type, final long sz)
}
inf.close();
objectDigest.digest(tempObjectId);
checkContentLater = isCheckObjectCollisions()
&& readCurs.has(tempObjectId);
data = null;
} else {
data = inflateAndReturn(Source.INPUT, sz);
objectDigest.update(data);
@ -1062,8 +1063,10 @@ private void whole(final long pos, final int type, final long sz)
if (data != null)
onInflatedObjectData(obj, type, data);
addObjectAndTrack(obj);
if (checkContentLater)
deferredCheckBlobs.add(obj);
if (isCheckObjectCollisions()) {
collisionCheckObjs.add(obj);
}
}
private void verifySafeObject(final AnyObjectId id, final int type,
@ -1078,62 +1081,70 @@ private void verifySafeObject(final AnyObjectId id, final int type,
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().invalidObject,
Constants.typeString(type),
readCurs.abbreviate(id, 10).name(),
id.name(),
e.getMessage()), e);
}
}
if (isCheckObjectCollisions()) {
try {
final ObjectLoader ldr = readCurs.open(id, type);
final byte[] existingData = ldr.getCachedBytes(data.length);
if (!Arrays.equals(data, existingData)) {
throw new IOException(MessageFormat.format(
JGitText.get().collisionOn, id.name()));
}
} catch (MissingObjectException notLocal) {
// This is OK, we don't have a copy of the object locally
// but the API throws when we try to read it as usually its
// an error to read something that doesn't exist.
}
}
}
private void doDeferredCheckBlobs() throws IOException {
private void checkObjectCollision() throws IOException {
for (PackedObjectInfo obj : collisionCheckObjs) {
if (!readCurs.has(obj)) {
continue;
}
checkObjectCollision(obj);
}
}
private void checkObjectCollision(PackedObjectInfo obj)
throws IOException {
ObjectTypeAndSize info = openDatabase(obj, new ObjectTypeAndSize());
final byte[] readBuffer = buffer();
final byte[] curBuffer = new byte[readBuffer.length];
ObjectTypeAndSize info = new ObjectTypeAndSize();
for (PackedObjectInfo obj : deferredCheckBlobs) {
info = openDatabase(obj, info);
if (info.type != Constants.OBJ_BLOB)
long sz = info.size;
InputStream pck = null;
try (ObjectStream cur = readCurs.open(obj, info.type).openStream()) {
if (cur.getSize() != sz) {
throw new IOException(MessageFormat.format(
JGitText.get().unknownObjectType,
Integer.valueOf(info.type)));
ObjectStream cur = readCurs.open(obj, info.type).openStream();
try {
long sz = info.size;
if (cur.getSize() != sz)
throw new IOException(MessageFormat.format(
JGitText.get().collisionOn, obj.name()));
InputStream pck = inflate(Source.DATABASE, sz);
while (0 < sz) {
int n = (int) Math.min(readBuffer.length, sz);
IO.readFully(cur, curBuffer, 0, n);
IO.readFully(pck, readBuffer, 0, n);
for (int i = 0; i < n; i++) {
if (curBuffer[i] != readBuffer[i])
throw new IOException(MessageFormat.format(JGitText
.get().collisionOn, obj.name()));
}
sz -= n;
}
pck.close();
} finally {
cur.close();
JGitText.get().collisionOn, obj.name()));
}
pck = inflate(Source.DATABASE, sz);
while (0 < sz) {
int n = (int) Math.min(readBuffer.length, sz);
IO.readFully(cur, curBuffer, 0, n);
IO.readFully(pck, readBuffer, 0, n);
for (int i = 0; i < n; i++) {
if (curBuffer[i] != readBuffer[i]) {
throw new IOException(MessageFormat.format(JGitText
.get().collisionOn, obj.name()));
}
}
sz -= n;
}
} catch (MissingObjectException notLocal) {
// This is OK, we don't have a copy of the object locally
// but the API throws when we try to read it as usually its
// an error to read something that doesn't exist.
} finally {
if (pck != null) {
pck.close();
}
}
}
private void checkObjectCollision(AnyObjectId obj, int type, byte[] data)
throws IOException {
try {
final ObjectLoader ldr = readCurs.open(obj, type);
final byte[] existingData = ldr.getCachedBytes(data.length);
if (!Arrays.equals(data, existingData)) {
throw new IOException(MessageFormat.format(
JGitText.get().collisionOn, obj.name()));
}
} catch (MissingObjectException notLocal) {
// This is OK, we don't have a copy of the object locally
// but the API throws when we try to read it as usually its
// an error to read something that doesn't exist.
}
}