PackWriter: Avoid CRC-32 validation when feeding IndexPack

There is no need to validate the object contents during
copyObjectAsIs if the result is going to be parsed by unpack-objects
or index-pack.  Both programs will compute the SHA-1 of the object,
and also validate most of the pack structure.  For git daemon
like servers, this work is already done on the client end of the
connection, so the server doesn't need to repeat that work itself.

Disable object validation for the 3 transport cases where we know
the remote side will handle object validation for us (push, bundle
creation, and upload pack).  This improves performance on the server
side by reducing the work that must be done.

Change-Id: Iabb78eec45898e4a17f7aab3fb94c004d8d69af6
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
This commit is contained in:
Shawn O. Pearce 2011-03-02 12:23:55 -08:00
parent bd6853e90a
commit 1b2062fe37
8 changed files with 76 additions and 26 deletions

View File

@ -107,7 +107,7 @@ public void select(ObjectToPack otp, StoredObjectRepresentation next) {
asis.selectObjectRepresentation(pw, NullProgressMonitor.INSTANCE,
Collections.singleton(target));
asis.copyObjectAsIs(new PackOutputStream(NullProgressMonitor.INSTANCE,
buf, pw), target);
buf, pw), target, true);
// At this point the object header has no delta information,
// because it was output as though it were a whole object.

View File

@ -313,21 +313,21 @@ void copyPackAsIs(PackOutputStream out, WindowCursor curs)
}
final void copyAsIs(PackOutputStream out, LocalObjectToPack src,
WindowCursor curs) throws IOException,
boolean validate, WindowCursor curs) throws IOException,
StoredObjectRepresentationNotAvailableException {
beginCopyAsIs(src);
try {
copyAsIs2(out, src, curs);
copyAsIs2(out, src, validate, curs);
} finally {
endCopyAsIs();
}
}
private void copyAsIs2(PackOutputStream out, LocalObjectToPack src,
WindowCursor curs) throws IOException,
boolean validate, WindowCursor curs) throws IOException,
StoredObjectRepresentationNotAvailableException {
final CRC32 crc1 = new CRC32();
final CRC32 crc2 = new CRC32();
final CRC32 crc1 = validate ? new CRC32() : null;
final CRC32 crc2 = validate ? new CRC32() : null;
final byte[] buf = out.getCopyBuffer();
// Rip apart the header so we can discover the size.
@ -348,17 +348,23 @@ private void copyAsIs2(PackOutputStream out, LocalObjectToPack src,
do {
c = buf[headerCnt++] & 0xff;
} while ((c & 128) != 0);
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
if (validate) {
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
}
} else if (typeCode == Constants.OBJ_REF_DELTA) {
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
if (validate) {
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
}
readFully(src.offset + headerCnt, buf, 0, 20, curs);
crc1.update(buf, 0, 20);
crc2.update(buf, 0, 20);
if (validate) {
crc1.update(buf, 0, 20);
crc2.update(buf, 0, 20);
}
headerCnt += 20;
} else {
} else if (validate) {
crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt);
}
@ -374,7 +380,7 @@ private void copyAsIs2(PackOutputStream out, LocalObjectToPack src,
try {
quickCopy = curs.quickCopy(this, dataOffset, dataLength);
if (idx().hasCRC32Support()) {
if (validate && idx().hasCRC32Support()) {
// Index has the CRC32 code cached, validate the object.
//
expectedCRC = idx().findCRC32(src);
@ -397,7 +403,7 @@ private void copyAsIs2(PackOutputStream out, LocalObjectToPack src,
JGitText.get().objectAtHasBadZlibStream,
src.offset, getPackFile()));
}
} else {
} else if (validate) {
// We don't have a CRC32 code in the index, so compute it
// now while inflating the raw data to get zlib to tell us
// whether or not the data is safe.
@ -427,6 +433,8 @@ private void copyAsIs2(PackOutputStream out, LocalObjectToPack src,
src.offset));
}
expectedCRC = crc1.getValue();
} else {
expectedCRC = -1;
}
} catch (DataFormatException dataFormat) {
setCorrupt(src.offset);
@ -473,12 +481,13 @@ private void copyAsIs2(PackOutputStream out, LocalObjectToPack src,
while (cnt > 0) {
final int n = (int) Math.min(cnt, buf.length);
readFully(pos, buf, 0, n, curs);
crc2.update(buf, 0, n);
if (validate)
crc2.update(buf, 0, n);
out.write(buf, 0, n);
pos += n;
cnt -= n;
}
if (crc2.getValue() != expectedCRC) {
if (validate && crc2.getValue() != expectedCRC) {
throw new CorruptObjectException(MessageFormat.format(JGitText
.get().objectAtHasBadZlibStream, src.offset,
getPackFile()));

View File

@ -143,10 +143,11 @@ public void selectObjectRepresentation(PackWriter packer,
}
}
public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp)
throws IOException, StoredObjectRepresentationNotAvailableException {
public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp,
boolean validate) throws IOException,
StoredObjectRepresentationNotAvailableException {
LocalObjectToPack src = (LocalObjectToPack) otp;
src.pack.copyAsIs(out, src, this);
src.pack.copyAsIs(out, src, validate, this);
}
public void writeObjects(PackOutputStream out, List<ObjectToPack> list)

View File

@ -130,8 +130,9 @@ public void selectObjectRepresentation(PackWriter packer,
* reduce data locality for the reader, slowing down data access.
*
* Invoking {@link PackOutputStream#writeObject(ObjectToPack)} will cause
* {@link #copyObjectAsIs(PackOutputStream, ObjectToPack)} to be invoked
* recursively on {@code this} if the current object is scheduled for reuse.
* {@link #copyObjectAsIs(PackOutputStream, ObjectToPack, boolean)} to be
* invoked recursively on {@code this} if the current object is scheduled
* for reuse.
*
* @param out
* the stream to write each object to.
@ -160,7 +161,11 @@ public void writeObjects(PackOutputStream out, List<ObjectToPack> list)
*
* <pre>
* MyToPack mtp = (MyToPack) otp;
* byte[] raw = validate(mtp); // throw SORNAE here, if at all
* byte[] raw;
* if (validate)
* raw = validate(mtp); // throw SORNAE here, if at all
* else
* raw = readFast(mtp);
* out.writeHeader(mtp, mtp.inflatedSize);
* out.write(raw);
* </pre>
@ -169,6 +174,11 @@ public void writeObjects(PackOutputStream out, List<ObjectToPack> list)
* stream the object should be written to.
* @param otp
* the object's saved representation information.
* @param validate
* if true the representation must be validated and not be
* corrupt before being reused. If false, validation may be
* skipped as it will be performed elsewhere in the processing
* pipeline.
* @throws StoredObjectRepresentationNotAvailableException
* the previously selected representation is no longer
* available. If thrown before {@code out.writeHeader} the pack
@ -179,8 +189,9 @@ public void writeObjects(PackOutputStream out, List<ObjectToPack> list)
* the stream's write method threw an exception. Packing will
* abort.
*/
public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp)
throws IOException, StoredObjectRepresentationNotAvailableException;
public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp,
boolean validate) throws IOException,
StoredObjectRepresentationNotAvailableException;
/**
* Obtain the available cached packs.

View File

@ -177,6 +177,8 @@ public class PackWriter {
private boolean reuseDeltaCommits;
private boolean reuseValidate;
private boolean thin;
private boolean useCachedPacks;
@ -245,6 +247,7 @@ public PackWriter(final PackConfig config, final ObjectReader reader) {
deltaBaseAsOffset = config.isDeltaBaseAsOffset();
reuseDeltas = config.isReuseDeltas();
reuseValidate = true; // be paranoid by default
stats = new Statistics();
}
@ -298,6 +301,29 @@ public void setReuseDeltaCommits(boolean reuse) {
reuseDeltaCommits = reuse;
}
/**
* Check if the writer validates objects before copying them.
*
* @return true if validation is enabled; false if the reader will handle
* object validation as a side-effect of it consuming the output.
*/
public boolean isReuseValidatingObjects() {
return reuseValidate;
}
/**
* Enable (or disable) object validation during packing.
*
* @param validate
* if true the pack writer will validate an object before it is
* put into the output. This additional validation work may be
* necessary to avoid propagating corruption from one local pack
* file to another local pack file.
*/
public void setReuseValidatingObjects(boolean validate) {
reuseValidate = validate;
}
/** @return true if this writer is producing a thin pack. */
public boolean isThin() {
return thin;
@ -1006,7 +1032,7 @@ void writeObject(PackOutputStream out, ObjectToPack otp) throws IOException {
while (otp.isReuseAsIs()) {
try {
reuseSupport.copyObjectAsIs(out, otp);
reuseSupport.copyObjectAsIs(out, otp, reuseValidate);
out.endObject();
otp.setCRC(out.getCRC32());
stats.reusedObjects++;

View File

@ -260,6 +260,7 @@ private void writePack(final Map<String, RemoteRefUpdate> refUpdates,
writer.setUseCachedPacks(true);
writer.setThin(thinPack);
writer.setReuseValidatingObjects(false);
writer.setDeltaBaseAsOffset(capableOfsDelta);
writer.preparePack(monitor, newObjects, remoteObjects);
writer.writePack(monitor, monitor, out);

View File

@ -202,6 +202,7 @@ public void writeBundle(ProgressMonitor monitor, OutputStream os)
exc.add(r.getId());
packWriter.setDeltaBaseAsOffset(true);
packWriter.setThin(exc.size() > 0);
packWriter.setReuseValidatingObjects(false);
if (exc.size() == 0)
packWriter.setTagTargets(tagTargets);
packWriter.preparePack(monitor, inc, exc);

View File

@ -695,6 +695,7 @@ private void sendPack() throws IOException {
pw.setReuseDeltaCommits(true);
pw.setDeltaBaseAsOffset(options.contains(OPTION_OFS_DELTA));
pw.setThin(options.contains(OPTION_THIN_PACK));
pw.setReuseValidatingObjects(false);
if (commonBase.isEmpty()) {
Set<ObjectId> tagTargets = new HashSet<ObjectId>();