Merge branch 'stable-4.9'
* stable-4.9: PackInserter: Ensure objects are written at the end of the pack ObjectInserter: Add warning about mixing read-back with writes Change-Id: I308e7c1c6b72e8d4d9b5d0f4f51e9815fc92d7d7
This commit is contained in:
commit
c1a02f497a
|
@ -67,6 +67,7 @@
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
@ -440,6 +441,53 @@ public void readBackFallsBackToRepo() throws Exception {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readBackSmallObjectBeforeLargeObject() throws Exception {
|
||||
WindowCacheConfig wcc = new WindowCacheConfig();
|
||||
wcc.setStreamFileThreshold(1024);
|
||||
wcc.install();
|
||||
|
||||
ObjectId blobId1;
|
||||
ObjectId blobId2;
|
||||
ObjectId largeId;
|
||||
byte[] blob1 = Constants.encode("blob1");
|
||||
byte[] blob2 = Constants.encode("blob2");
|
||||
byte[] largeBlob = newLargeBlob();
|
||||
try (PackInserter ins = newInserter()) {
|
||||
assertThat(blob1.length, lessThan(ins.getBufferSize()));
|
||||
assertThat(largeBlob.length, greaterThan(ins.getBufferSize()));
|
||||
|
||||
blobId1 = ins.insert(OBJ_BLOB, blob1);
|
||||
largeId = ins.insert(OBJ_BLOB, largeBlob);
|
||||
|
||||
try (ObjectReader reader = ins.newReader()) {
|
||||
// A previous bug did not reset the file pointer to EOF after reading
|
||||
// back. We need to seek to something further back than a full buffer,
|
||||
// since the read-back code eagerly reads a full buffer's worth of data
|
||||
// from the file to pass to the inflater. If we seeked back just a small
|
||||
// amount, this step would consume the rest of the file, so the file
|
||||
// pointer would coincidentally end up back at EOF, hiding the bug.
|
||||
assertBlob(reader, blobId1, blob1);
|
||||
}
|
||||
|
||||
blobId2 = ins.insert(OBJ_BLOB, blob2);
|
||||
|
||||
try (ObjectReader reader = ins.newReader()) {
|
||||
assertBlob(reader, blobId1, blob1);
|
||||
assertBlob(reader, blobId2, blob2);
|
||||
assertBlob(reader, largeId, largeBlob);
|
||||
}
|
||||
|
||||
ins.flush();
|
||||
}
|
||||
|
||||
try (ObjectReader reader = db.newObjectReader()) {
|
||||
assertBlob(reader, blobId1, blob1);
|
||||
assertBlob(reader, blobId2, blob2);
|
||||
assertBlob(reader, largeId, largeBlob);
|
||||
}
|
||||
}
|
||||
|
||||
private List<PackFile> listPacks() throws Exception {
|
||||
List<PackFile> fromOpenDb = listPacks(db);
|
||||
List<PackFile> reopened;
|
||||
|
@ -470,9 +518,7 @@ private PackInserter newInserter() {
|
|||
|
||||
private static byte[] newLargeBlob() {
|
||||
byte[] blob = new byte[10240];
|
||||
for (int i = 0; i < blob.length; i++) {
|
||||
blob[i] = (byte) ('0' + (i % 10));
|
||||
}
|
||||
new Random(0).nextBytes(blob);
|
||||
return blob;
|
||||
}
|
||||
|
||||
|
|
|
@ -52,6 +52,7 @@
|
|||
import java.io.EOFException;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FilterInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
@ -264,7 +265,6 @@ public void flush() throws IOException {
|
|||
try {
|
||||
packHash = packOut.finishPack();
|
||||
} finally {
|
||||
packOut.close();
|
||||
packOut = null;
|
||||
}
|
||||
|
||||
|
@ -369,6 +369,20 @@ private Inflater inflater() {
|
|||
return cachedInflater;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream that writes to a pack file.
|
||||
* <p>
|
||||
* Backed by two views of the same open file descriptor: a random-access file,
|
||||
* and an output stream. Seeking in the file causes subsequent writes to the
|
||||
* output stream to occur wherever the file pointer is pointing, so we need to
|
||||
* take care to always seek to the end of the file before writing a new
|
||||
* object.
|
||||
* <p>
|
||||
* Callers should always use {@link #seek(long)} to seek, rather than reaching
|
||||
* into the file member. As long as this contract is followed, calls to {@link
|
||||
* #write(byte[], int, int)} are guaranteed to write at the end of the file,
|
||||
* even if there have been intermediate seeks.
|
||||
*/
|
||||
private class PackStream extends OutputStream {
|
||||
final byte[] hdrBuf;
|
||||
final CRC32 crc32;
|
||||
|
@ -378,6 +392,8 @@ private class PackStream extends OutputStream {
|
|||
private final CountingOutputStream out;
|
||||
private final Deflater deflater;
|
||||
|
||||
private boolean atEnd;
|
||||
|
||||
PackStream(File pack) throws IOException {
|
||||
file = new RandomAccessFile(pack, "rw"); //$NON-NLS-1$
|
||||
out = new CountingOutputStream(new FileOutputStream(file.getFD()));
|
||||
|
@ -385,12 +401,23 @@ private class PackStream extends OutputStream {
|
|||
compress = new DeflaterOutputStream(this, deflater, 8192);
|
||||
hdrBuf = new byte[32];
|
||||
crc32 = new CRC32();
|
||||
atEnd = true;
|
||||
}
|
||||
|
||||
long getOffset() {
|
||||
// This value is accurate as long as we only ever write to the end of the
|
||||
// file, and don't seek back to overwrite any previous segments. Although
|
||||
// this is subtle, storing the stream counter this way is still preferable
|
||||
// to returning file.length() here, as it avoids a syscall and possible
|
||||
// IOException.
|
||||
return out.getCount();
|
||||
}
|
||||
|
||||
void seek(long offset) throws IOException {
|
||||
file.seek(offset);
|
||||
atEnd = false;
|
||||
}
|
||||
|
||||
void beginObject(int objectType, long length) throws IOException {
|
||||
crc32.reset();
|
||||
deflater.reset();
|
||||
|
@ -419,34 +446,49 @@ public void write(final int b) throws IOException {
|
|||
@Override
|
||||
public void write(byte[] data, int off, int len) throws IOException {
|
||||
crc32.update(data, off, len);
|
||||
if (!atEnd) {
|
||||
file.seek(file.length());
|
||||
atEnd = true;
|
||||
}
|
||||
out.write(data, off, len);
|
||||
}
|
||||
|
||||
byte[] finishPack() throws IOException {
|
||||
// Overwrite placeholder header with actual object count, then hash.
|
||||
file.seek(0);
|
||||
write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size()));
|
||||
// Overwrite placeholder header with actual object count, then hash. This
|
||||
// method intentionally uses direct seek/write calls rather than the
|
||||
// wrappers which keep track of atEnd. This leaves atEnd, the file
|
||||
// pointer, and out's counter in an inconsistent state; that's ok, since
|
||||
// this method closes the file anyway.
|
||||
try {
|
||||
file.seek(0);
|
||||
out.write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size()));
|
||||
|
||||
byte[] buf = buffer();
|
||||
SHA1 md = digest().reset();
|
||||
file.seek(0);
|
||||
while (true) {
|
||||
int r = file.read(buf);
|
||||
if (r < 0) {
|
||||
break;
|
||||
byte[] buf = buffer();
|
||||
SHA1 md = digest().reset();
|
||||
file.seek(0);
|
||||
while (true) {
|
||||
int r = file.read(buf);
|
||||
if (r < 0) {
|
||||
break;
|
||||
}
|
||||
md.update(buf, 0, r);
|
||||
}
|
||||
md.update(buf, 0, r);
|
||||
byte[] packHash = md.digest();
|
||||
out.write(packHash, 0, packHash.length);
|
||||
return packHash;
|
||||
} finally {
|
||||
close();
|
||||
}
|
||||
byte[] packHash = md.digest();
|
||||
out.write(packHash, 0, packHash.length);
|
||||
return packHash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
deflater.end();
|
||||
out.close();
|
||||
file.close();
|
||||
try {
|
||||
out.close();
|
||||
} finally {
|
||||
file.close();
|
||||
}
|
||||
}
|
||||
|
||||
byte[] inflate(long filePos, int len) throws IOException, DataFormatException {
|
||||
|
@ -477,7 +519,7 @@ byte[] inflate(long filePos, int len) throws IOException, DataFormatException {
|
|||
private int setInput(long filePos, Inflater inf, byte[] buf)
|
||||
throws IOException {
|
||||
if (file.getFilePointer() != filePos) {
|
||||
file.seek(filePos);
|
||||
seek(filePos);
|
||||
}
|
||||
int n = file.read(buf);
|
||||
if (n < 0) {
|
||||
|
@ -538,9 +580,8 @@ public ObjectLoader open(AnyObjectId objectId, int typeHint)
|
|||
}
|
||||
|
||||
byte[] buf = buffer();
|
||||
RandomAccessFile f = packOut.file;
|
||||
f.seek(obj.getOffset());
|
||||
int cnt = f.read(buf, 0, 20);
|
||||
packOut.seek(obj.getOffset());
|
||||
int cnt = packOut.file.read(buf, 0, 20);
|
||||
if (cnt <= 0) {
|
||||
throw new EOFException(JGitText.get().unexpectedEofInPack);
|
||||
}
|
||||
|
@ -574,7 +615,7 @@ public ObjectLoader open(AnyObjectId objectId, int typeHint)
|
|||
return new ObjectLoader.SmallObject(type, data);
|
||||
}
|
||||
}
|
||||
return new StreamLoader(f, type, sz, zpos);
|
||||
return new StreamLoader(type, sz, zpos);
|
||||
}
|
||||
|
||||
private byte[] inflate(PackedObjectInfo obj, long zpos, int sz)
|
||||
|
@ -602,13 +643,11 @@ public void close() {
|
|||
}
|
||||
|
||||
private class StreamLoader extends ObjectLoader {
|
||||
private final RandomAccessFile file;
|
||||
private final int type;
|
||||
private final long size;
|
||||
private final long pos;
|
||||
|
||||
StreamLoader(RandomAccessFile file, int type, long size, long pos) {
|
||||
this.file = file;
|
||||
StreamLoader(int type, long size, long pos) {
|
||||
this.type = type;
|
||||
this.size = size;
|
||||
this.pos = pos;
|
||||
|
@ -618,14 +657,44 @@ private class StreamLoader extends ObjectLoader {
|
|||
public ObjectStream openStream()
|
||||
throws MissingObjectException, IOException {
|
||||
int bufsz = buffer().length;
|
||||
file.seek(pos);
|
||||
packOut.seek(pos);
|
||||
|
||||
InputStream fileStream = new FilterInputStream(
|
||||
Channels.newInputStream(packOut.file.getChannel())) {
|
||||
// atEnd was already set to false by the previous seek, but it's
|
||||
// technically possible for a caller to call insert on the
|
||||
// inserter in the middle of reading from this stream. Behavior is
|
||||
// undefined in this case, so it would arguably be ok to ignore,
|
||||
// but it's not hard to at least make an attempt to not corrupt
|
||||
// the data.
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
packOut.atEnd = false;
|
||||
return super.read();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] b) throws IOException {
|
||||
packOut.atEnd = false;
|
||||
return super.read(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] b, int off, int len) throws IOException {
|
||||
packOut.atEnd = false;
|
||||
return super.read(b,off,len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// Never close underlying RandomAccessFile, which lasts the
|
||||
// lifetime of the enclosing PackStream.
|
||||
}
|
||||
};
|
||||
return new ObjectStream.Filter(
|
||||
type, size,
|
||||
new BufferedInputStream(
|
||||
new InflaterInputStream(
|
||||
Channels.newInputStream(packOut.file.getChannel()),
|
||||
inflater(), bufsz),
|
||||
bufsz));
|
||||
new InflaterInputStream(fileStream, inflater(), bufsz), bufsz));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -428,6 +428,13 @@ public abstract ObjectId insert(int objectType, long length, InputStream in)
|
|||
* <p>
|
||||
* The returned reader should return this inserter instance from {@link
|
||||
* ObjectReader#getCreatedFromInserter()}.
|
||||
* <p>
|
||||
* Behavior is undefined if an insert method is called on the inserter in the
|
||||
* middle of reading from an {@link ObjectStream} opened from this reader. For
|
||||
* example, reading the remainder of the object may fail, or newly written
|
||||
* data may even be corrupted. Interleaving whole object reads (including
|
||||
* streaming reads) with inserts is fine, just not interleaving streaming
|
||||
* <em>partial</em> object reads with inserts.
|
||||
*
|
||||
* @since 3.5
|
||||
* @return reader for any object, including an object recently inserted by
|
||||
|
|
Loading…
Reference in New Issue