ApplyCommand: add a stream to apply a delta patch
Add a new BinaryDeltaInputStream that applies a delta provided by another InputStream to a given base. Because delta application needs random access to the base, the base itself cannot be yet another InputStream. But at least this enables streaming of the result. Add a simple test using delta hunks generated by C git. Bug: 371725 Change-Id: Ibd26fa2f49860737ad5c5387f7f4870d3e85e628 Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch> Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
This commit is contained in:
parent
2eb54afe6a
commit
0fe794a433
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Eclipse Distribution License v. 1.0 which is available at
|
||||
* https://www.eclipse.org/org/documents/edl-v10.php.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
package org.eclipse.jgit.util.io;
|
||||
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.util.zip.InflaterInputStream;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Crude tests for the {@link BinaryDeltaInputStream} using delta diffs
|
||||
* generated by C git.
|
||||
*/
|
||||
public class BinaryDeltaInputStreamTest {
|
||||
|
||||
private InputStream getBinaryHunk(String name) {
|
||||
return this.getClass().getResourceAsStream(name);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBinaryDelta() throws Exception {
|
||||
// Prepare our test data
|
||||
byte[] data = new byte[8192];
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
data[i] = (byte) (255 - (i % 256));
|
||||
}
|
||||
// Same, but with five 'x' inserted in the middle.
|
||||
int middle = data.length / 2;
|
||||
byte[] newData = new byte[data.length + 5];
|
||||
System.arraycopy(data, 0, newData, 0, middle);
|
||||
for (int i = 0; i < 5; i++) {
|
||||
newData[middle + i] = 'x';
|
||||
}
|
||||
System.arraycopy(data, middle, newData, middle + 5, middle);
|
||||
// delta1.forward has the instructions
|
||||
// @formatter:off
|
||||
// COPY 0 4096
|
||||
// INSERT 5 xxxxx
|
||||
// COPY 0 4096
|
||||
// @formatter:on
|
||||
// Note that the way we built newData could be expressed as
|
||||
// @formatter:off
|
||||
// COPY 0 4096
|
||||
// INSERT 5 xxxxx
|
||||
// COPY 4096 4096
|
||||
// @formatter:on
|
||||
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
BinaryDeltaInputStream input = new BinaryDeltaInputStream(data,
|
||||
new InflaterInputStream(new BinaryHunkInputStream(
|
||||
getBinaryHunk("delta1.forward"))))) {
|
||||
byte[] buf = new byte[1024];
|
||||
int n;
|
||||
while ((n = input.read(buf)) >= 0) {
|
||||
out.write(buf, 0, n);
|
||||
}
|
||||
assertArrayEquals(newData, out.toByteArray());
|
||||
assertTrue(input.isFullyConsumed());
|
||||
}
|
||||
// delta1.reverse has the instructions
|
||||
// @formatter:off
|
||||
// COPY 0 4096
|
||||
// COPY 256 3840
|
||||
// COPY 256 256
|
||||
// @formatter:on
|
||||
// Note that there are alternatives, for instance
|
||||
// @formatter:off
|
||||
// COPY 0 4096
|
||||
// COPY 4101 4096
|
||||
// @formatter:on
|
||||
// or
|
||||
// @formatter:off
|
||||
// COPY 0 4096
|
||||
// COPY 0 4096
|
||||
// @formatter:on
|
||||
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
BinaryDeltaInputStream input = new BinaryDeltaInputStream(
|
||||
newData,
|
||||
new InflaterInputStream(new BinaryHunkInputStream(
|
||||
getBinaryHunk("delta1.reverse"))))) {
|
||||
long expectedSize = input.getExpectedResultSize();
|
||||
assertEquals(data.length, expectedSize);
|
||||
byte[] buf = new byte[1024];
|
||||
int n;
|
||||
while ((n = input.read(buf)) >= 0) {
|
||||
out.write(buf, 0, n);
|
||||
}
|
||||
assertArrayEquals(data, out.toByteArray());
|
||||
assertTrue(input.isFullyConsumed());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -43,6 +43,9 @@ base85overflow=Base-85 value overflow, does not fit into 32 bits: 0x{0}
|
|||
base85tooLong=Extra base-85 encoded data for output size of {0} bytes
|
||||
base85tooShort=Base-85 data decoded into less than {0} bytes
|
||||
baseLengthIncorrect=base length incorrect
|
||||
binaryDeltaBaseLengthMismatch=Binary delta base length does not match, expected {0}, got {1}
|
||||
binaryDeltaInvalidOffset=Binary delta offset + length too large: {0} + {1}
|
||||
binaryDeltaInvalidResultLength=Binary delta expected result length is negative
|
||||
binaryHunkDecodeError=Binary hunk, line {0}: invalid input
|
||||
binaryHunkInvalidLength=Binary hunk, line {0}: input corrupt; expected length byte, got 0x{1}
|
||||
binaryHunkLineTooShort=Binary hunk, line {0}: input ended prematurely
|
||||
|
|
|
@ -71,6 +71,9 @@ public static JGitText get() {
|
|||
/***/ public String base85tooLong;
|
||||
/***/ public String base85tooShort;
|
||||
/***/ public String baseLengthIncorrect;
|
||||
/***/ public String binaryDeltaBaseLengthMismatch;
|
||||
/***/ public String binaryDeltaInvalidOffset;
|
||||
/***/ public String binaryDeltaInvalidResultLength;
|
||||
/***/ public String binaryHunkDecodeError;
|
||||
/***/ public String binaryHunkInvalidLength;
|
||||
/***/ public String binaryHunkLineTooShort;
|
||||
|
|
|
@ -0,0 +1,206 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Eclipse Distribution License v. 1.0 which is available at
|
||||
* https://www.eclipse.org/org/documents/edl-v10.php.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
package org.eclipse.jgit.util.io;
|
||||
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.StreamCorruptedException;
|
||||
import java.text.MessageFormat;
|
||||
|
||||
import org.eclipse.jgit.internal.JGitText;
|
||||
|
||||
/**
|
||||
* An {@link InputStream} that applies a binary delta to a base on the fly.
|
||||
* <p>
|
||||
* Delta application to a base needs random access to the base data. The delta
|
||||
* is expressed as a sequence of copy and insert instructions. A copy
|
||||
* instruction has the form "COPY fromOffset length" and says "copy length bytes
|
||||
* from the base, starting at offset fromOffset, to the result". An insert
|
||||
* instruction has the form "INSERT length" followed by length bytes and says
|
||||
* "copy the next length bytes from the delta to the result".
|
||||
* </p>
|
||||
* <p>
|
||||
* These instructions are generated using a content-defined chunking algorithm
|
||||
* (currently C git uses the standard Rabin variant; but there are others that
|
||||
* could be used) that identifies equal chunks. It is entirely possible that a
|
||||
* later copy instruction has a fromOffset that is before the fromOffset of an
|
||||
* earlier copy instruction.
|
||||
* </p>
|
||||
* <p>
|
||||
* This makes it impossible to stream the base.
|
||||
* </p>
|
||||
* <p>
|
||||
* JGit is limited to 2GB maximum size for the base since array indices are
|
||||
* signed 32bit values.
|
||||
*
|
||||
* @since 5.12
|
||||
*/
|
||||
public class BinaryDeltaInputStream extends InputStream {
|
||||
|
||||
private final byte[] base;
|
||||
|
||||
private final InputStream delta;
|
||||
|
||||
private long resultLength;
|
||||
|
||||
private long toDeliver = -1;
|
||||
|
||||
private int fromBase;
|
||||
|
||||
private int fromDelta;
|
||||
|
||||
private int baseOffset = -1;
|
||||
|
||||
/**
|
||||
* Creates a new {@link BinaryDeltaInputStream} that applies {@code delta}
|
||||
* to {@code base}.
|
||||
*
|
||||
* @param base
|
||||
* data to apply the delta to
|
||||
* @param delta
|
||||
* {@link InputStream} delivering the delta to apply
|
||||
*/
|
||||
public BinaryDeltaInputStream(byte[] base, InputStream delta) {
|
||||
this.base = base;
|
||||
this.delta = delta;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
int b = readNext();
|
||||
if (b >= 0) {
|
||||
toDeliver--;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
private void initialize() throws IOException {
|
||||
long baseSize = readVarInt(delta);
|
||||
if (baseSize > Integer.MAX_VALUE || baseSize < 0
|
||||
|| (int) baseSize != base.length) {
|
||||
throw new IOException(MessageFormat.format(
|
||||
JGitText.get().binaryDeltaBaseLengthMismatch,
|
||||
Integer.valueOf(base.length), Long.valueOf(baseSize)));
|
||||
}
|
||||
resultLength = readVarInt(delta);
|
||||
if (resultLength < 0) {
|
||||
throw new StreamCorruptedException(
|
||||
JGitText.get().binaryDeltaInvalidResultLength);
|
||||
}
|
||||
toDeliver = resultLength;
|
||||
baseOffset = 0;
|
||||
}
|
||||
|
||||
private int readNext() throws IOException {
|
||||
if (baseOffset < 0) {
|
||||
initialize();
|
||||
}
|
||||
if (fromBase > 0) {
|
||||
fromBase--;
|
||||
return base[baseOffset++] & 0xFF;
|
||||
} else if (fromDelta > 0) {
|
||||
fromDelta--;
|
||||
return delta.read();
|
||||
}
|
||||
int command = delta.read();
|
||||
if (command < 0) {
|
||||
return -1;
|
||||
}
|
||||
if ((command & 0x80) != 0) {
|
||||
// Decode offset and length to read from base
|
||||
long copyOffset = 0;
|
||||
for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) {
|
||||
if ((command & i) != 0) {
|
||||
copyOffset |= ((long) next(delta)) << shift;
|
||||
}
|
||||
}
|
||||
int copySize = 0;
|
||||
for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) {
|
||||
if ((command & i) != 0) {
|
||||
copySize |= next(delta) << shift;
|
||||
}
|
||||
}
|
||||
if (copySize == 0) {
|
||||
copySize = 0x10000;
|
||||
}
|
||||
if (copyOffset > base.length - copySize) {
|
||||
throw new StreamCorruptedException(MessageFormat.format(
|
||||
JGitText.get().binaryDeltaInvalidOffset,
|
||||
Long.valueOf(copyOffset), Integer.valueOf(copySize)));
|
||||
}
|
||||
baseOffset = (int) copyOffset;
|
||||
fromBase = copySize;
|
||||
return readNext();
|
||||
} else if (command != 0) {
|
||||
// The next 'command' bytes come from the delta
|
||||
fromDelta = command - 1;
|
||||
return delta.read();
|
||||
} else {
|
||||
// Zero is reserved
|
||||
throw new StreamCorruptedException(
|
||||
JGitText.get().unsupportedCommand0);
|
||||
}
|
||||
}
|
||||
|
||||
private int next(InputStream in) throws IOException {
|
||||
int b = in.read();
|
||||
if (b < 0) {
|
||||
throw new EOFException();
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
private long readVarInt(InputStream in) throws IOException {
|
||||
long val = 0;
|
||||
int shift = 0;
|
||||
int b;
|
||||
do {
|
||||
b = next(in);
|
||||
val |= ((long) (b & 0x7f)) << shift;
|
||||
shift += 7;
|
||||
} while ((b & 0x80) != 0);
|
||||
return val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells the expected size of the final result.
|
||||
*
|
||||
* @return the size
|
||||
* @throws IOException
|
||||
* if the size cannot be determined from {@code delta}
|
||||
*/
|
||||
public long getExpectedResultSize() throws IOException {
|
||||
if (baseOffset < 0) {
|
||||
initialize();
|
||||
}
|
||||
return resultLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether the delta has been fully consumed, and the expected number
|
||||
* of bytes for the combined result have been read from this
|
||||
* {@link BinaryDeltaInputStream}.
|
||||
*
|
||||
* @return whether delta application was successful
|
||||
*/
|
||||
public boolean isFullyConsumed() {
|
||||
try {
|
||||
return toDeliver == 0 && delta.read() < 0;
|
||||
} catch (IOException e) {
|
||||
return toDeliver == 0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
delta.close();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue