diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java index 814e148db..b782ce87f 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java @@ -67,6 +67,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Random; import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -440,6 +441,53 @@ public void readBackFallsBackToRepo() throws Exception { } } + @Test + public void readBackSmallObjectBeforeLargeObject() throws Exception { + WindowCacheConfig wcc = new WindowCacheConfig(); + wcc.setStreamFileThreshold(1024); + wcc.install(); + + ObjectId blobId1; + ObjectId blobId2; + ObjectId largeId; + byte[] blob1 = Constants.encode("blob1"); + byte[] blob2 = Constants.encode("blob2"); + byte[] largeBlob = newLargeBlob(); + try (PackInserter ins = newInserter()) { + assertThat(blob1.length, lessThan(ins.getBufferSize())); + assertThat(largeBlob.length, greaterThan(ins.getBufferSize())); + + blobId1 = ins.insert(OBJ_BLOB, blob1); + largeId = ins.insert(OBJ_BLOB, largeBlob); + + try (ObjectReader reader = ins.newReader()) { + // A previous bug did not reset the file pointer to EOF after reading + // back. We need to seek to something further back than a full buffer, + // since the read-back code eagerly reads a full buffer's worth of data + // from the file to pass to the inflater. If we seeked back just a small + // amount, this step would consume the rest of the file, so the file + // pointer would coincidentally end up back at EOF, hiding the bug. + assertBlob(reader, blobId1, blob1); + } + + blobId2 = ins.insert(OBJ_BLOB, blob2); + + try (ObjectReader reader = ins.newReader()) { + assertBlob(reader, blobId1, blob1); + assertBlob(reader, blobId2, blob2); + assertBlob(reader, largeId, largeBlob); + } + + ins.flush(); + } + + try (ObjectReader reader = db.newObjectReader()) { + assertBlob(reader, blobId1, blob1); + assertBlob(reader, blobId2, blob2); + assertBlob(reader, largeId, largeBlob); + } + } + private List listPacks() throws Exception { List fromOpenDb = listPacks(db); List reopened; @@ -470,9 +518,7 @@ private PackInserter newInserter() { private static byte[] newLargeBlob() { byte[] blob = new byte[10240]; - for (int i = 0; i < blob.length; i++) { - blob[i] = (byte) ('0' + (i % 10)); - } + new Random(0).nextBytes(blob); return blob; } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java index e60e29787..39cf5bd5d 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java @@ -52,6 +52,7 @@ import java.io.EOFException; import java.io.File; import java.io.FileOutputStream; +import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -264,7 +265,6 @@ public void flush() throws IOException { try { packHash = packOut.finishPack(); } finally { - packOut.close(); packOut = null; } @@ -369,6 +369,20 @@ private Inflater inflater() { return cachedInflater; } + /** + * Stream that writes to a pack file. + *

+ * Backed by two views of the same open file descriptor: a random-access file, + * and an output stream. Seeking in the file causes subsequent writes to the + * output stream to occur wherever the file pointer is pointing, so we need to + * take care to always seek to the end of the file before writing a new + * object. + *

+ * Callers should always use {@link #seek(long)} to seek, rather than reaching + * into the file member. As long as this contract is followed, calls to {@link + * #write(byte[], int, int)} are guaranteed to write at the end of the file, + * even if there have been intermediate seeks. + */ private class PackStream extends OutputStream { final byte[] hdrBuf; final CRC32 crc32; @@ -378,6 +392,8 @@ private class PackStream extends OutputStream { private final CountingOutputStream out; private final Deflater deflater; + private boolean atEnd; + PackStream(File pack) throws IOException { file = new RandomAccessFile(pack, "rw"); //$NON-NLS-1$ out = new CountingOutputStream(new FileOutputStream(file.getFD())); @@ -385,12 +401,23 @@ private class PackStream extends OutputStream { compress = new DeflaterOutputStream(this, deflater, 8192); hdrBuf = new byte[32]; crc32 = new CRC32(); + atEnd = true; } long getOffset() { + // This value is accurate as long as we only ever write to the end of the + // file, and don't seek back to overwrite any previous segments. Although + // this is subtle, storing the stream counter this way is still preferable + // to returning file.length() here, as it avoids a syscall and possible + // IOException. return out.getCount(); } + void seek(long offset) throws IOException { + file.seek(offset); + atEnd = false; + } + void beginObject(int objectType, long length) throws IOException { crc32.reset(); deflater.reset(); @@ -419,34 +446,49 @@ public void write(final int b) throws IOException { @Override public void write(byte[] data, int off, int len) throws IOException { crc32.update(data, off, len); + if (!atEnd) { + file.seek(file.length()); + atEnd = true; + } out.write(data, off, len); } byte[] finishPack() throws IOException { - // Overwrite placeholder header with actual object count, then hash. - file.seek(0); - write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size())); + // Overwrite placeholder header with actual object count, then hash. This + // method intentionally uses direct seek/write calls rather than the + // wrappers which keep track of atEnd. This leaves atEnd, the file + // pointer, and out's counter in an inconsistent state; that's ok, since + // this method closes the file anyway. + try { + file.seek(0); + out.write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size())); - byte[] buf = buffer(); - SHA1 md = digest().reset(); - file.seek(0); - while (true) { - int r = file.read(buf); - if (r < 0) { - break; + byte[] buf = buffer(); + SHA1 md = digest().reset(); + file.seek(0); + while (true) { + int r = file.read(buf); + if (r < 0) { + break; + } + md.update(buf, 0, r); } - md.update(buf, 0, r); + byte[] packHash = md.digest(); + out.write(packHash, 0, packHash.length); + return packHash; + } finally { + close(); } - byte[] packHash = md.digest(); - out.write(packHash, 0, packHash.length); - return packHash; } @Override public void close() throws IOException { deflater.end(); - out.close(); - file.close(); + try { + out.close(); + } finally { + file.close(); + } } byte[] inflate(long filePos, int len) throws IOException, DataFormatException { @@ -477,7 +519,7 @@ byte[] inflate(long filePos, int len) throws IOException, DataFormatException { private int setInput(long filePos, Inflater inf, byte[] buf) throws IOException { if (file.getFilePointer() != filePos) { - file.seek(filePos); + seek(filePos); } int n = file.read(buf); if (n < 0) { @@ -538,9 +580,8 @@ public ObjectLoader open(AnyObjectId objectId, int typeHint) } byte[] buf = buffer(); - RandomAccessFile f = packOut.file; - f.seek(obj.getOffset()); - int cnt = f.read(buf, 0, 20); + packOut.seek(obj.getOffset()); + int cnt = packOut.file.read(buf, 0, 20); if (cnt <= 0) { throw new EOFException(JGitText.get().unexpectedEofInPack); } @@ -574,7 +615,7 @@ public ObjectLoader open(AnyObjectId objectId, int typeHint) return new ObjectLoader.SmallObject(type, data); } } - return new StreamLoader(f, type, sz, zpos); + return new StreamLoader(type, sz, zpos); } private byte[] inflate(PackedObjectInfo obj, long zpos, int sz) @@ -602,13 +643,11 @@ public void close() { } private class StreamLoader extends ObjectLoader { - private final RandomAccessFile file; private final int type; private final long size; private final long pos; - StreamLoader(RandomAccessFile file, int type, long size, long pos) { - this.file = file; + StreamLoader(int type, long size, long pos) { this.type = type; this.size = size; this.pos = pos; @@ -618,14 +657,44 @@ private class StreamLoader extends ObjectLoader { public ObjectStream openStream() throws MissingObjectException, IOException { int bufsz = buffer().length; - file.seek(pos); + packOut.seek(pos); + + InputStream fileStream = new FilterInputStream( + Channels.newInputStream(packOut.file.getChannel())) { + // atEnd was already set to false by the previous seek, but it's + // technically possible for a caller to call insert on the + // inserter in the middle of reading from this stream. Behavior is + // undefined in this case, so it would arguably be ok to ignore, + // but it's not hard to at least make an attempt to not corrupt + // the data. + @Override + public int read() throws IOException { + packOut.atEnd = false; + return super.read(); + } + + @Override + public int read(byte[] b) throws IOException { + packOut.atEnd = false; + return super.read(b); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + packOut.atEnd = false; + return super.read(b,off,len); + } + + @Override + public void close() { + // Never close underlying RandomAccessFile, which lasts the + // lifetime of the enclosing PackStream. + } + }; return new ObjectStream.Filter( type, size, new BufferedInputStream( - new InflaterInputStream( - Channels.newInputStream(packOut.file.getChannel()), - inflater(), bufsz), - bufsz)); + new InflaterInputStream(fileStream, inflater(), bufsz), bufsz)); } @Override diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectInserter.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectInserter.java index f334ff778..40522ba24 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectInserter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectInserter.java @@ -428,6 +428,13 @@ public abstract ObjectId insert(int objectType, long length, InputStream in) *

* The returned reader should return this inserter instance from {@link * ObjectReader#getCreatedFromInserter()}. + *

+ * Behavior is undefined if an insert method is called on the inserter in the + * middle of reading from an {@link ObjectStream} opened from this reader. For + * example, reading the remainder of the object may fail, or newly written + * data may even be corrupted. Interleaving whole object reads (including + * streaming reads) with inserts is fine, just not interleaving streaming + * partial object reads with inserts. * * @since 3.5 * @return reader for any object, including an object recently inserted by