Merge branch 'stable-4.9'

* stable-4.9: PackInserter: Ensure objects are written at the end of the pack ObjectInserter: Add warning about mixing read-back with writes Change-Id: I308e7c1c6b72e8d4d9b5d0f4f51e9815fc92d7d7
2017-12-20 14:39:11 -05:00 · 2017-12-20 14:39:11 -05:00 · c1a02f497a
parent 7f6aaa5768 43ef5dabf1
commit c1a02f497a
3 changed files with 155 additions and 33 deletions
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java
@ -67,6 +67,7 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
+import java.util.Random;
 import java.util.function.Predicate;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@ -440,6 +441,53 @@ public void readBackFallsBackToRepo() throws Exception {
 		}
 	}

+	@Test
+	public void readBackSmallObjectBeforeLargeObject() throws Exception {
+		WindowCacheConfig wcc = new WindowCacheConfig();
+		wcc.setStreamFileThreshold(1024);
+		wcc.install();
+
+		ObjectId blobId1;
+		ObjectId blobId2;
+		ObjectId largeId;
+		byte[] blob1 = Constants.encode("blob1");
+		byte[] blob2 = Constants.encode("blob2");
+		byte[] largeBlob = newLargeBlob();
+		try (PackInserter ins = newInserter()) {
+			assertThat(blob1.length, lessThan(ins.getBufferSize()));
+			assertThat(largeBlob.length, greaterThan(ins.getBufferSize()));
+
+			blobId1 = ins.insert(OBJ_BLOB, blob1);
+			largeId = ins.insert(OBJ_BLOB, largeBlob);
+
+			try (ObjectReader reader = ins.newReader()) {
+				// A previous bug did not reset the file pointer to EOF after reading
+				// back. We need to seek to something further back than a full buffer,
+				// since the read-back code eagerly reads a full buffer's worth of data
+				// from the file to pass to the inflater. If we seeked back just a small
+				// amount, this step would consume the rest of the file, so the file
+				// pointer would coincidentally end up back at EOF, hiding the bug.
+				assertBlob(reader, blobId1, blob1);
+			}
+
+			blobId2 = ins.insert(OBJ_BLOB, blob2);
+
+			try (ObjectReader reader = ins.newReader()) {
+				assertBlob(reader, blobId1, blob1);
+				assertBlob(reader, blobId2, blob2);
+				assertBlob(reader, largeId, largeBlob);
+			}
+
+			ins.flush();
+		}
+
+		try (ObjectReader reader = db.newObjectReader()) {
+				assertBlob(reader, blobId1, blob1);
+				assertBlob(reader, blobId2, blob2);
+				assertBlob(reader, largeId, largeBlob);
+		}
+	}
+
 	private List<PackFile> listPacks() throws Exception {
 		List<PackFile> fromOpenDb = listPacks(db);
 		List<PackFile> reopened;
@ -470,9 +518,7 @@ private PackInserter newInserter() {

 	private static byte[] newLargeBlob() {
 		byte[] blob = new byte[10240];
-		for (int i = 0; i < blob.length; i++) {
-			blob[i] = (byte) ('0' + (i % 10));
-		}
+		new Random(0).nextBytes(blob);
 		return blob;
 	}

--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java
@ -52,6 +52,7 @@
 import java.io.EOFException;
 import java.io.File;
 import java.io.FileOutputStream;
+import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@ -264,7 +265,6 @@ public void flush() throws IOException {
 		try {
 			packHash = packOut.finishPack();
 		} finally {
-			packOut.close();
 			packOut = null;
 		}

@ -369,6 +369,20 @@ private Inflater inflater() {
 		return cachedInflater;
 	}

+	/**
+	 * Stream that writes to a pack file.
+	 * <p>
+	 * Backed by two views of the same open file descriptor: a random-access file,
+	 * and an output stream. Seeking in the file causes subsequent writes to the
+	 * output stream to occur wherever the file pointer is pointing, so we need to
+	 * take care to always seek to the end of the file before writing a new
+	 * object.
+	 * <p>
+	 * Callers should always use {@link #seek(long)} to seek, rather than reaching
+	 * into the file member. As long as this contract is followed, calls to {@link
+	 * #write(byte[], int, int)} are guaranteed to write at the end of the file,
+	 * even if there have been intermediate seeks.
+	 */
 	private class PackStream extends OutputStream {
 		final byte[] hdrBuf;
 		final CRC32 crc32;
@ -378,6 +392,8 @@ private class PackStream extends OutputStream {
 		private final CountingOutputStream out;
 		private final Deflater deflater;

+		private boolean atEnd;
+
 		PackStream(File pack) throws IOException {
 			file = new RandomAccessFile(pack, "rw"); //$NON-NLS-1$
 			out = new CountingOutputStream(new FileOutputStream(file.getFD()));
@ -385,12 +401,23 @@ private class PackStream extends OutputStream {
 			compress = new DeflaterOutputStream(this, deflater, 8192);
 			hdrBuf = new byte[32];
 			crc32 = new CRC32();
+			atEnd = true;
 		}

 		long getOffset() {
+			// This value is accurate as long as we only ever write to the end of the
+			// file, and don't seek back to overwrite any previous segments. Although
+			// this is subtle, storing the stream counter this way is still preferable
+			// to returning file.length() here, as it avoids a syscall and possible
+			// IOException.
 			return out.getCount();
 		}

+		void seek(long offset) throws IOException {
+			file.seek(offset);
+			atEnd = false;
+		}
+
 		void beginObject(int objectType, long length) throws IOException {
 			crc32.reset();
 			deflater.reset();
@ -419,34 +446,49 @@ public void write(final int b) throws IOException {
 		@Override
 		public void write(byte[] data, int off, int len) throws IOException {
 			crc32.update(data, off, len);
+			if (!atEnd) {
+				file.seek(file.length());
+				atEnd = true;
+			}
 			out.write(data, off, len);
 		}

 		byte[] finishPack() throws IOException {
-			// Overwrite placeholder header with actual object count, then hash.
-			file.seek(0);
-			write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size()));
+			// Overwrite placeholder header with actual object count, then hash. This
+			// method intentionally uses direct seek/write calls rather than the
+			// wrappers which keep track of atEnd. This leaves atEnd, the file
+			// pointer, and out's counter in an inconsistent state; that's ok, since
+			// this method closes the file anyway.
+			try {
+				file.seek(0);
+				out.write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size()));

-			byte[] buf = buffer();
-			SHA1 md = digest().reset();
-			file.seek(0);
-			while (true) {
-				int r = file.read(buf);
-				if (r < 0) {
-					break;
+				byte[] buf = buffer();
+				SHA1 md = digest().reset();
+				file.seek(0);
+				while (true) {
+					int r = file.read(buf);
+					if (r < 0) {
+						break;
+					}
+					md.update(buf, 0, r);
 				}
-				md.update(buf, 0, r);
+				byte[] packHash = md.digest();
+				out.write(packHash, 0, packHash.length);
+				return packHash;
+			} finally {
+				close();
 			}
-			byte[] packHash = md.digest();
-			out.write(packHash, 0, packHash.length);
-			return packHash;
 		}

 		@Override
 		public void close() throws IOException {
 			deflater.end();
-			out.close();
-			file.close();
+			try {
+				out.close();
+			} finally {
+				file.close();
+			}
 		}

 		byte[] inflate(long filePos, int len) throws IOException, DataFormatException {
@ -477,7 +519,7 @@ byte[] inflate(long filePos, int len) throws IOException, DataFormatException {
 		private int setInput(long filePos, Inflater inf, byte[] buf)
 				throws IOException {
 			if (file.getFilePointer() != filePos) {
-				file.seek(filePos);
+				seek(filePos);
 			}
 			int n = file.read(buf);
 			if (n < 0) {
@ -538,9 +580,8 @@ public ObjectLoader open(AnyObjectId objectId, int typeHint)
 			}

 			byte[] buf = buffer();
-			RandomAccessFile f = packOut.file;
-			f.seek(obj.getOffset());
-			int cnt = f.read(buf, 0, 20);
+			packOut.seek(obj.getOffset());
+			int cnt = packOut.file.read(buf, 0, 20);
 			if (cnt <= 0) {
 				throw new EOFException(JGitText.get().unexpectedEofInPack);
 			}
@ -574,7 +615,7 @@ public ObjectLoader open(AnyObjectId objectId, int typeHint)
 					return new ObjectLoader.SmallObject(type, data);
 				}
 			}
-			return new StreamLoader(f, type, sz, zpos);
+			return new StreamLoader(type, sz, zpos);
 		}

 		private byte[] inflate(PackedObjectInfo obj, long zpos, int sz)
@ -602,13 +643,11 @@ public void close() {
 		}

 		private class StreamLoader extends ObjectLoader {
-			private final RandomAccessFile file;
 			private final int type;
 			private final long size;
 			private final long pos;

-			StreamLoader(RandomAccessFile file, int type, long size, long pos) {
-				this.file = file;
+			StreamLoader(int type, long size, long pos) {
 				this.type = type;
 				this.size = size;
 				this.pos = pos;
@ -618,14 +657,44 @@ private class StreamLoader extends ObjectLoader {
 			public ObjectStream openStream()
 					throws MissingObjectException, IOException {
 				int bufsz = buffer().length;
-				file.seek(pos);
+				packOut.seek(pos);
+
+				InputStream fileStream = new FilterInputStream(
+						Channels.newInputStream(packOut.file.getChannel())) {
+							// atEnd was already set to false by the previous seek, but it's
+							// technically possible for a caller to call insert on the
+							// inserter in the middle of reading from this stream. Behavior is
+							// undefined in this case, so it would arguably be ok to ignore,
+							// but it's not hard to at least make an attempt to not corrupt
+							// the data.
+							@Override
+							public int read() throws IOException {
+								packOut.atEnd = false;
+								return super.read();
+							}
+
+							@Override
+							public int read(byte[] b) throws IOException {
+								packOut.atEnd = false;
+								return super.read(b);
+							}
+
+							@Override
+							public int read(byte[] b, int off, int len) throws IOException {
+								packOut.atEnd = false;
+								return super.read(b,off,len);
+							}
+
+							@Override
+							public void close() {
+								// Never close underlying RandomAccessFile, which lasts the
+								// lifetime of the enclosing PackStream.
+							}
+						};
 				return new ObjectStream.Filter(
 						type, size,
 						new BufferedInputStream(
-								new InflaterInputStream(
-										Channels.newInputStream(packOut.file.getChannel()),
-										inflater(), bufsz),
-								bufsz));
+								new InflaterInputStream(fileStream, inflater(), bufsz), bufsz));
 			}

 			@Override
--- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectInserter.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectInserter.java
@ -428,6 +428,13 @@ public abstract ObjectId insert(int objectType, long length, InputStream in)
 	 * <p>
 	 * The returned reader should return this inserter instance from {@link
 	 * ObjectReader#getCreatedFromInserter()}.
+	 * <p>
+	 * Behavior is undefined if an insert method is called on the inserter in the
+	 * middle of reading from an {@link ObjectStream} opened from this reader. For
+	 * example, reading the remainder of the object may fail, or newly written
+	 * data may even be corrupted. Interleaving whole object reads (including
+	 * streaming reads) with inserts is fine, just not interleaving streaming
+	 * <em>partial</em> object reads with inserts.
 	 *
 	 * @since 3.5
 	 * @return reader for any object, including an object recently inserted by