diff --git a/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.forward b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.forward new file mode 100644 index 000000000..878b167ae Binary files /dev/null and b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.forward differ diff --git a/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.reverse b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.reverse new file mode 100644 index 000000000..7ff7a08ad Binary files /dev/null and b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.reverse differ diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryDeltaInputStreamTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryDeltaInputStreamTest.java new file mode 100644 index 000000000..d9297fcd9 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryDeltaInputStreamTest.java @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2021 Thomas Wolf and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.util.io; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.util.zip.InflaterInputStream; + +import org.junit.Test; + +/** + * Crude tests for the {@link BinaryDeltaInputStream} using delta diffs + * generated by C git. + */ +public class BinaryDeltaInputStreamTest { + + private InputStream getBinaryHunk(String name) { + return this.getClass().getResourceAsStream(name); + } + + @Test + public void testBinaryDelta() throws Exception { + // Prepare our test data + byte[] data = new byte[8192]; + for (int i = 0; i < data.length; i++) { + data[i] = (byte) (255 - (i % 256)); + } + // Same, but with five 'x' inserted in the middle. + int middle = data.length / 2; + byte[] newData = new byte[data.length + 5]; + System.arraycopy(data, 0, newData, 0, middle); + for (int i = 0; i < 5; i++) { + newData[middle + i] = 'x'; + } + System.arraycopy(data, middle, newData, middle + 5, middle); + // delta1.forward has the instructions + // @formatter:off + // COPY 0 4096 + // INSERT 5 xxxxx + // COPY 0 4096 + // @formatter:on + // Note that the way we built newData could be expressed as + // @formatter:off + // COPY 0 4096 + // INSERT 5 xxxxx + // COPY 4096 4096 + // @formatter:on + try (ByteArrayOutputStream out = new ByteArrayOutputStream(); + BinaryDeltaInputStream input = new BinaryDeltaInputStream(data, + new InflaterInputStream(new BinaryHunkInputStream( + getBinaryHunk("delta1.forward"))))) { + byte[] buf = new byte[1024]; + int n; + while ((n = input.read(buf)) >= 0) { + out.write(buf, 0, n); + } + assertArrayEquals(newData, out.toByteArray()); + assertTrue(input.isFullyConsumed()); + } + // delta1.reverse has the instructions + // @formatter:off + // COPY 0 4096 + // COPY 256 3840 + // COPY 256 256 + // @formatter:on + // Note that there are alternatives, for instance + // @formatter:off + // COPY 0 4096 + // COPY 4101 4096 + // @formatter:on + // or + // @formatter:off + // COPY 0 4096 + // COPY 0 4096 + // @formatter:on + try (ByteArrayOutputStream out = new ByteArrayOutputStream(); + BinaryDeltaInputStream input = new BinaryDeltaInputStream( + newData, + new InflaterInputStream(new BinaryHunkInputStream( + getBinaryHunk("delta1.reverse"))))) { + long expectedSize = input.getExpectedResultSize(); + assertEquals(data.length, expectedSize); + byte[] buf = new byte[1024]; + int n; + while ((n = input.read(buf)) >= 0) { + out.write(buf, 0, n); + } + assertArrayEquals(data, out.toByteArray()); + assertTrue(input.isFullyConsumed()); + } + } +} diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties index f8c9ea0dc..8b6872d33 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties @@ -43,6 +43,9 @@ base85overflow=Base-85 value overflow, does not fit into 32 bits: 0x{0} base85tooLong=Extra base-85 encoded data for output size of {0} bytes base85tooShort=Base-85 data decoded into less than {0} bytes baseLengthIncorrect=base length incorrect +binaryDeltaBaseLengthMismatch=Binary delta base length does not match, expected {0}, got {1} +binaryDeltaInvalidOffset=Binary delta offset + length too large: {0} + {1} +binaryDeltaInvalidResultLength=Binary delta expected result length is negative binaryHunkDecodeError=Binary hunk, line {0}: invalid input binaryHunkInvalidLength=Binary hunk, line {0}: input corrupt; expected length byte, got 0x{1} binaryHunkLineTooShort=Binary hunk, line {0}: input ended prematurely diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java index 85b40cb6d..3b67b0f62 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java @@ -71,6 +71,9 @@ public static JGitText get() { /***/ public String base85tooLong; /***/ public String base85tooShort; /***/ public String baseLengthIncorrect; + /***/ public String binaryDeltaBaseLengthMismatch; + /***/ public String binaryDeltaInvalidOffset; + /***/ public String binaryDeltaInvalidResultLength; /***/ public String binaryHunkDecodeError; /***/ public String binaryHunkInvalidLength; /***/ public String binaryHunkLineTooShort; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryDeltaInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryDeltaInputStream.java new file mode 100644 index 000000000..7f0d87f0d --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryDeltaInputStream.java @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2021 Thomas Wolf and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.util.io; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.io.StreamCorruptedException; +import java.text.MessageFormat; + +import org.eclipse.jgit.internal.JGitText; + +/** + * An {@link InputStream} that applies a binary delta to a base on the fly. + *

+ * Delta application to a base needs random access to the base data. The delta + * is expressed as a sequence of copy and insert instructions. A copy + * instruction has the form "COPY fromOffset length" and says "copy length bytes + * from the base, starting at offset fromOffset, to the result". An insert + * instruction has the form "INSERT length" followed by length bytes and says + * "copy the next length bytes from the delta to the result". + *

+ *

+ * These instructions are generated using a content-defined chunking algorithm + * (currently C git uses the standard Rabin variant; but there are others that + * could be used) that identifies equal chunks. It is entirely possible that a + * later copy instruction has a fromOffset that is before the fromOffset of an + * earlier copy instruction. + *

+ *

+ * This makes it impossible to stream the base. + *

+ *

+ * JGit is limited to 2GB maximum size for the base since array indices are + * signed 32bit values. + * + * @since 5.12 + */ +public class BinaryDeltaInputStream extends InputStream { + + private final byte[] base; + + private final InputStream delta; + + private long resultLength; + + private long toDeliver = -1; + + private int fromBase; + + private int fromDelta; + + private int baseOffset = -1; + + /** + * Creates a new {@link BinaryDeltaInputStream} that applies {@code delta} + * to {@code base}. + * + * @param base + * data to apply the delta to + * @param delta + * {@link InputStream} delivering the delta to apply + */ + public BinaryDeltaInputStream(byte[] base, InputStream delta) { + this.base = base; + this.delta = delta; + } + + @Override + public int read() throws IOException { + int b = readNext(); + if (b >= 0) { + toDeliver--; + } + return b; + } + + private void initialize() throws IOException { + long baseSize = readVarInt(delta); + if (baseSize > Integer.MAX_VALUE || baseSize < 0 + || (int) baseSize != base.length) { + throw new IOException(MessageFormat.format( + JGitText.get().binaryDeltaBaseLengthMismatch, + Integer.valueOf(base.length), Long.valueOf(baseSize))); + } + resultLength = readVarInt(delta); + if (resultLength < 0) { + throw new StreamCorruptedException( + JGitText.get().binaryDeltaInvalidResultLength); + } + toDeliver = resultLength; + baseOffset = 0; + } + + private int readNext() throws IOException { + if (baseOffset < 0) { + initialize(); + } + if (fromBase > 0) { + fromBase--; + return base[baseOffset++] & 0xFF; + } else if (fromDelta > 0) { + fromDelta--; + return delta.read(); + } + int command = delta.read(); + if (command < 0) { + return -1; + } + if ((command & 0x80) != 0) { + // Decode offset and length to read from base + long copyOffset = 0; + for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) { + if ((command & i) != 0) { + copyOffset |= ((long) next(delta)) << shift; + } + } + int copySize = 0; + for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) { + if ((command & i) != 0) { + copySize |= next(delta) << shift; + } + } + if (copySize == 0) { + copySize = 0x10000; + } + if (copyOffset > base.length - copySize) { + throw new StreamCorruptedException(MessageFormat.format( + JGitText.get().binaryDeltaInvalidOffset, + Long.valueOf(copyOffset), Integer.valueOf(copySize))); + } + baseOffset = (int) copyOffset; + fromBase = copySize; + return readNext(); + } else if (command != 0) { + // The next 'command' bytes come from the delta + fromDelta = command - 1; + return delta.read(); + } else { + // Zero is reserved + throw new StreamCorruptedException( + JGitText.get().unsupportedCommand0); + } + } + + private int next(InputStream in) throws IOException { + int b = in.read(); + if (b < 0) { + throw new EOFException(); + } + return b; + } + + private long readVarInt(InputStream in) throws IOException { + long val = 0; + int shift = 0; + int b; + do { + b = next(in); + val |= ((long) (b & 0x7f)) << shift; + shift += 7; + } while ((b & 0x80) != 0); + return val; + } + + /** + * Tells the expected size of the final result. + * + * @return the size + * @throws IOException + * if the size cannot be determined from {@code delta} + */ + public long getExpectedResultSize() throws IOException { + if (baseOffset < 0) { + initialize(); + } + return resultLength; + } + + /** + * Tells whether the delta has been fully consumed, and the expected number + * of bytes for the combined result have been read from this + * {@link BinaryDeltaInputStream}. + * + * @return whether delta application was successful + */ + public boolean isFullyConsumed() { + try { + return toDeliver == 0 && delta.read() < 0; + } catch (IOException e) { + return toDeliver == 0; + } + } + + @Override + public void close() throws IOException { + delta.close(); + } +}