From f63ee965d4f6990a25628c1aac452f4cbf8a69dd Mon Sep 17 00:00:00 2001 From: Dave Borowitz Date: Wed, 20 Dec 2017 12:16:40 -0500 Subject: [PATCH 1/2] ObjectInserter: Add warning about mixing read-back with writes Change-Id: Ib0460d3c7df315d86f9adca5f66a8fd4c39e4060 --- .../src/org/eclipse/jgit/lib/ObjectInserter.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectInserter.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectInserter.java index 857ec9b2d..b2ffbe6f6 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectInserter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectInserter.java @@ -423,6 +423,13 @@ public abstract ObjectId insert(int objectType, long length, InputStream in) *

* The returned reader should return this inserter instance from {@link * ObjectReader#getCreatedFromInserter()}. + *

+ * Behavior is undefined if an insert method is called on the inserter in the + * middle of reading from an {@link ObjectStream} opened from this reader. For + * example, reading the remainder of the object may fail, or newly written + * data may even be corrupted. Interleaving whole object reads (including + * streaming reads) with inserts is fine, just not interleaving streaming + * partial object reads with inserts. * * @since 3.5 * @return reader for any object, including an object recently inserted by From 43ef5dabf12fee8877c6d8fa07b26be766dca1ee Mon Sep 17 00:00:00 2001 From: Dave Borowitz Date: Wed, 20 Dec 2017 12:36:03 -0500 Subject: [PATCH 2/2] PackInserter: Ensure objects are written at the end of the pack When interleaving reads and writes from an unflushed pack, we forgot to reset the file pointer back to the end of the file before writing more new objects. This had at least two unfortunate effects: * The pack data was potentially corrupt, since we could overwrite previous portions of the file willy-nilly. * The CountingOutputStream would report more bytes read than the size of the file, which stored the wrong PackedObjectInfo, which would cause EOFs during reading. We already had a test in PackInserterTest which was supposed to catch bugs like this, by interleaving reads and writes. Unfortunately, it didn't catch the bug, since as an implementation detail we always read a full buffer's worth of data from the file when inflating during readback. If the size of the file was less than the offset of the object we were reading back plus one buffer (8192 bytes), we would completely accidentally end up back in the right place in the file. So, add another test for this case where we read back a small object positioned before a large object. Before the fix, this test exhibited exactly the "Unexpected EOF" error reported at crbug.com/gerrit/7668. Change-Id: I74f08f3d5d9046781d59e5bd7c84916ff8225c3b --- .../storage/file/PackInserterTest.java | 52 ++++++- .../internal/storage/file/PackInserter.java | 129 ++++++++++++++---- 2 files changed, 148 insertions(+), 33 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java index 17f04c854..8596f74f8 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java @@ -67,6 +67,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Random; import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -440,6 +441,53 @@ public void readBackFallsBackToRepo() throws Exception { } } + @Test + public void readBackSmallObjectBeforeLargeObject() throws Exception { + WindowCacheConfig wcc = new WindowCacheConfig(); + wcc.setStreamFileThreshold(1024); + wcc.install(); + + ObjectId blobId1; + ObjectId blobId2; + ObjectId largeId; + byte[] blob1 = Constants.encode("blob1"); + byte[] blob2 = Constants.encode("blob2"); + byte[] largeBlob = newLargeBlob(); + try (PackInserter ins = newInserter()) { + assertThat(blob1.length, lessThan(ins.getBufferSize())); + assertThat(largeBlob.length, greaterThan(ins.getBufferSize())); + + blobId1 = ins.insert(OBJ_BLOB, blob1); + largeId = ins.insert(OBJ_BLOB, largeBlob); + + try (ObjectReader reader = ins.newReader()) { + // A previous bug did not reset the file pointer to EOF after reading + // back. We need to seek to something further back than a full buffer, + // since the read-back code eagerly reads a full buffer's worth of data + // from the file to pass to the inflater. If we seeked back just a small + // amount, this step would consume the rest of the file, so the file + // pointer would coincidentally end up back at EOF, hiding the bug. + assertBlob(reader, blobId1, blob1); + } + + blobId2 = ins.insert(OBJ_BLOB, blob2); + + try (ObjectReader reader = ins.newReader()) { + assertBlob(reader, blobId1, blob1); + assertBlob(reader, blobId2, blob2); + assertBlob(reader, largeId, largeBlob); + } + + ins.flush(); + } + + try (ObjectReader reader = db.newObjectReader()) { + assertBlob(reader, blobId1, blob1); + assertBlob(reader, blobId2, blob2); + assertBlob(reader, largeId, largeBlob); + } + } + private List listPacks() throws Exception { List fromOpenDb = listPacks(db); List reopened; @@ -470,9 +518,7 @@ private PackInserter newInserter() { private static byte[] newLargeBlob() { byte[] blob = new byte[10240]; - for (int i = 0; i < blob.length; i++) { - blob[i] = (byte) ('0' + (i % 10)); - } + new Random(0).nextBytes(blob); return blob; } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java index dd83e251f..e6139fdc0 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java @@ -52,6 +52,7 @@ import java.io.EOFException; import java.io.File; import java.io.FileOutputStream; +import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -254,7 +255,6 @@ public void flush() throws IOException { try { packHash = packOut.finishPack(); } finally { - packOut.close(); packOut = null; } @@ -359,6 +359,20 @@ private Inflater inflater() { return cachedInflater; } + /** + * Stream that writes to a pack file. + *

+ * Backed by two views of the same open file descriptor: a random-access file, + * and an output stream. Seeking in the file causes subsequent writes to the + * output stream to occur wherever the file pointer is pointing, so we need to + * take care to always seek to the end of the file before writing a new + * object. + *

+ * Callers should always use {@link #seek(long)} to seek, rather than reaching + * into the file member. As long as this contract is followed, calls to {@link + * #write(byte[], int, int)} are guaranteed to write at the end of the file, + * even if there have been intermediate seeks. + */ private class PackStream extends OutputStream { final byte[] hdrBuf; final CRC32 crc32; @@ -368,6 +382,8 @@ private class PackStream extends OutputStream { private final CountingOutputStream out; private final Deflater deflater; + private boolean atEnd; + PackStream(File pack) throws IOException { file = new RandomAccessFile(pack, "rw"); //$NON-NLS-1$ out = new CountingOutputStream(new FileOutputStream(file.getFD())); @@ -375,12 +391,23 @@ private class PackStream extends OutputStream { compress = new DeflaterOutputStream(this, deflater, 8192); hdrBuf = new byte[32]; crc32 = new CRC32(); + atEnd = true; } long getOffset() { + // This value is accurate as long as we only ever write to the end of the + // file, and don't seek back to overwrite any previous segments. Although + // this is subtle, storing the stream counter this way is still preferable + // to returning file.length() here, as it avoids a syscall and possible + // IOException. return out.getCount(); } + void seek(long offset) throws IOException { + file.seek(offset); + atEnd = false; + } + void beginObject(int objectType, long length) throws IOException { crc32.reset(); deflater.reset(); @@ -409,34 +436,49 @@ public void write(final int b) throws IOException { @Override public void write(byte[] data, int off, int len) throws IOException { crc32.update(data, off, len); + if (!atEnd) { + file.seek(file.length()); + atEnd = true; + } out.write(data, off, len); } byte[] finishPack() throws IOException { - // Overwrite placeholder header with actual object count, then hash. - file.seek(0); - write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size())); + // Overwrite placeholder header with actual object count, then hash. This + // method intentionally uses direct seek/write calls rather than the + // wrappers which keep track of atEnd. This leaves atEnd, the file + // pointer, and out's counter in an inconsistent state; that's ok, since + // this method closes the file anyway. + try { + file.seek(0); + out.write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size())); - byte[] buf = buffer(); - SHA1 md = digest().reset(); - file.seek(0); - while (true) { - int r = file.read(buf); - if (r < 0) { - break; + byte[] buf = buffer(); + SHA1 md = digest().reset(); + file.seek(0); + while (true) { + int r = file.read(buf); + if (r < 0) { + break; + } + md.update(buf, 0, r); } - md.update(buf, 0, r); + byte[] packHash = md.digest(); + out.write(packHash, 0, packHash.length); + return packHash; + } finally { + close(); } - byte[] packHash = md.digest(); - out.write(packHash, 0, packHash.length); - return packHash; } @Override public void close() throws IOException { deflater.end(); - out.close(); - file.close(); + try { + out.close(); + } finally { + file.close(); + } } byte[] inflate(long filePos, int len) throws IOException, DataFormatException { @@ -467,7 +509,7 @@ byte[] inflate(long filePos, int len) throws IOException, DataFormatException { private int setInput(long filePos, Inflater inf, byte[] buf) throws IOException { if (file.getFilePointer() != filePos) { - file.seek(filePos); + seek(filePos); } int n = file.read(buf); if (n < 0) { @@ -528,9 +570,8 @@ public ObjectLoader open(AnyObjectId objectId, int typeHint) } byte[] buf = buffer(); - RandomAccessFile f = packOut.file; - f.seek(obj.getOffset()); - int cnt = f.read(buf, 0, 20); + packOut.seek(obj.getOffset()); + int cnt = packOut.file.read(buf, 0, 20); if (cnt <= 0) { throw new EOFException(JGitText.get().unexpectedEofInPack); } @@ -564,7 +605,7 @@ public ObjectLoader open(AnyObjectId objectId, int typeHint) return new ObjectLoader.SmallObject(type, data); } } - return new StreamLoader(f, type, sz, zpos); + return new StreamLoader(type, sz, zpos); } private byte[] inflate(PackedObjectInfo obj, long zpos, int sz) @@ -593,13 +634,11 @@ public void close() { } private class StreamLoader extends ObjectLoader { - private final RandomAccessFile file; private final int type; private final long size; private final long pos; - StreamLoader(RandomAccessFile file, int type, long size, long pos) { - this.file = file; + StreamLoader(int type, long size, long pos) { this.type = type; this.size = size; this.pos = pos; @@ -609,14 +648,44 @@ private class StreamLoader extends ObjectLoader { public ObjectStream openStream() throws MissingObjectException, IOException { int bufsz = buffer().length; - file.seek(pos); + packOut.seek(pos); + + InputStream fileStream = new FilterInputStream( + Channels.newInputStream(packOut.file.getChannel())) { + // atEnd was already set to false by the previous seek, but it's + // technically possible for a caller to call insert on the + // inserter in the middle of reading from this stream. Behavior is + // undefined in this case, so it would arguably be ok to ignore, + // but it's not hard to at least make an attempt to not corrupt + // the data. + @Override + public int read() throws IOException { + packOut.atEnd = false; + return super.read(); + } + + @Override + public int read(byte[] b) throws IOException { + packOut.atEnd = false; + return super.read(b); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + packOut.atEnd = false; + return super.read(b,off,len); + } + + @Override + public void close() { + // Never close underlying RandomAccessFile, which lasts the + // lifetime of the enclosing PackStream. + } + }; return new ObjectStream.Filter( type, size, new BufferedInputStream( - new InflaterInputStream( - Channels.newInputStream(packOut.file.getChannel()), - inflater(), bufsz), - bufsz)); + new InflaterInputStream(fileStream, inflater(), bufsz), bufsz)); } @Override