ApplyCommand: use byte arrays for text patches, not strings
Instead of converting the patch bytes to strings apply the patch on byte level, like C git does. Converting the input lines and the hunk lines from bytes to strings and then applying the patch based on strings may give surprising results if a patch converts a text file from one encoding to another. Moreover, in the end we don't know which encoding to use to write the result. Previous code just wrote the result as UTF-8, which forcibly changed the encoding if the original input had some other encoding (even if the patch had the same non-UTF-8 encoding). It was also wrong if the input was UTF-8, and the patch should have changed the encoding to something else. So use ByteBuffers instead of Strings. This has the additional advantage that all these ByteBuffers can share the underlying byte arrays of the input and of the patch, so it also reduces memory consumption. Change-Id: I450975f2ba0e7d0bec8973e3113cc2e7aea187ee Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
This commit is contained in:
parent
10ac449911
commit
76b76a6048
|
@ -1,5 +1,6 @@
|
||||||
#Sat Dec 20 21:21:24 CET 2008
|
|
||||||
eclipse.preferences.version=1
|
eclipse.preferences.version=1
|
||||||
|
encoding//tst-rsrc/org/eclipse/jgit/diff/umlaut.patch=ISO-8859-1
|
||||||
|
encoding//tst-rsrc/org/eclipse/jgit/diff/umlaut_PostImage=ISO-8859-1
|
||||||
encoding//tst-rsrc/org/eclipse/jgit/patch/testGetText_BothISO88591.patch=ISO-8859-1
|
encoding//tst-rsrc/org/eclipse/jgit/patch/testGetText_BothISO88591.patch=ISO-8859-1
|
||||||
encoding//tst-rsrc/org/eclipse/jgit/patch/testGetText_Convert.patch=ISO-8859-1
|
encoding//tst-rsrc/org/eclipse/jgit/patch/testGetText_Convert.patch=ISO-8859-1
|
||||||
encoding//tst-rsrc/org/eclipse/jgit/patch/testGetText_DiffCc.patch=ISO-8859-1
|
encoding//tst-rsrc/org/eclipse/jgit/patch/testGetText_DiffCc.patch=ISO-8859-1
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -287,6 +287,14 @@ public void testBinaryLiteralAdd() throws Exception {
|
||||||
checkBinary("literal_add", false);
|
checkBinary("literal_add", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEncodingChange() throws Exception {
|
||||||
|
// This is a text patch that changes a file containing ÄÖÜ in UTF-8 to
|
||||||
|
// the same characters in ISO-8859-1. The patch file itself uses mixed
|
||||||
|
// encoding. Since checkFile() works with strings use the binary check.
|
||||||
|
checkBinary("umlaut", true);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAddA1() throws Exception {
|
public void testAddA1() throws Exception {
|
||||||
ApplyResult result = init("A1", false, true);
|
ApplyResult result = init("A1", false, true);
|
||||||
|
|
|
@ -10,7 +10,6 @@
|
||||||
package org.eclipse.jgit.api;
|
package org.eclipse.jgit.api;
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
import java.io.BufferedInputStream;
|
||||||
import java.io.BufferedWriter;
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
@ -18,9 +17,7 @@
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.io.OutputStreamWriter;
|
import java.nio.ByteBuffer;
|
||||||
import java.io.Writer;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.StandardCopyOption;
|
import java.nio.file.StandardCopyOption;
|
||||||
import java.text.MessageFormat;
|
import java.text.MessageFormat;
|
||||||
|
@ -549,11 +546,11 @@ private void applyBinary(Repository repository, String path, File f,
|
||||||
private void applyText(Repository repository, String path, RawText rt,
|
private void applyText(Repository repository, String path, RawText rt,
|
||||||
File f, FileHeader fh, CheckoutMetadata checkOut)
|
File f, FileHeader fh, CheckoutMetadata checkOut)
|
||||||
throws IOException, PatchApplyException {
|
throws IOException, PatchApplyException {
|
||||||
List<String> oldLines = new ArrayList<>(rt.size());
|
List<ByteBuffer> oldLines = new ArrayList<>(rt.size());
|
||||||
for (int i = 0; i < rt.size(); i++) {
|
for (int i = 0; i < rt.size(); i++) {
|
||||||
oldLines.add(rt.getString(i));
|
oldLines.add(rt.getRawString(i));
|
||||||
}
|
}
|
||||||
List<String> newLines = new ArrayList<>(oldLines);
|
List<ByteBuffer> newLines = new ArrayList<>(oldLines);
|
||||||
int afterLastHunk = 0;
|
int afterLastHunk = 0;
|
||||||
int lineNumberShift = 0;
|
int lineNumberShift = 0;
|
||||||
int lastHunkNewLine = -1;
|
int lastHunkNewLine = -1;
|
||||||
|
@ -571,9 +568,9 @@ private void applyText(Repository repository, String path, RawText rt,
|
||||||
b.length);
|
b.length);
|
||||||
RawText hrt = new RawText(b);
|
RawText hrt = new RawText(b);
|
||||||
|
|
||||||
List<String> hunkLines = new ArrayList<>(hrt.size());
|
List<ByteBuffer> hunkLines = new ArrayList<>(hrt.size());
|
||||||
for (int i = 0; i < hrt.size(); i++) {
|
for (int i = 0; i < hrt.size(); i++) {
|
||||||
hunkLines.add(hrt.getString(i));
|
hunkLines.add(hrt.getRawString(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hh.getNewStartLine() == 0) {
|
if (hh.getNewStartLine() == 0) {
|
||||||
|
@ -642,8 +639,8 @@ && canApplyAt(hunkLines, newLines, 0)) {
|
||||||
lineNumberShift = applyAt - hh.getNewStartLine() + 1;
|
lineNumberShift = applyAt - hh.getNewStartLine() + 1;
|
||||||
int sz = hunkLines.size();
|
int sz = hunkLines.size();
|
||||||
for (int j = 1; j < sz; j++) {
|
for (int j = 1; j < sz; j++) {
|
||||||
String hunkLine = hunkLines.get(j);
|
ByteBuffer hunkLine = hunkLines.get(j);
|
||||||
switch (hunkLine.charAt(0)) {
|
switch (hunkLine.array()[hunkLine.position()]) {
|
||||||
case ' ':
|
case ' ':
|
||||||
applyAt++;
|
applyAt++;
|
||||||
break;
|
break;
|
||||||
|
@ -651,7 +648,7 @@ && canApplyAt(hunkLines, newLines, 0)) {
|
||||||
newLines.remove(applyAt);
|
newLines.remove(applyAt);
|
||||||
break;
|
break;
|
||||||
case '+':
|
case '+':
|
||||||
newLines.add(applyAt++, hunkLine.substring(1));
|
newLines.add(applyAt++, slice(hunkLine, 1));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
@ -660,28 +657,29 @@ && canApplyAt(hunkLines, newLines, 0)) {
|
||||||
afterLastHunk = applyAt;
|
afterLastHunk = applyAt;
|
||||||
}
|
}
|
||||||
if (!isNoNewlineAtEndOfFile(fh)) {
|
if (!isNoNewlineAtEndOfFile(fh)) {
|
||||||
newLines.add(""); //$NON-NLS-1$
|
newLines.add(null);
|
||||||
}
|
}
|
||||||
if (!rt.isMissingNewlineAtEnd()) {
|
if (!rt.isMissingNewlineAtEnd()) {
|
||||||
oldLines.add(""); //$NON-NLS-1$
|
oldLines.add(null);
|
||||||
}
|
}
|
||||||
if (!isChanged(oldLines, newLines)) {
|
if (oldLines.equals(newLines)) {
|
||||||
return; // Don't touch the file
|
return; // Unchanged; don't touch the file
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: forcing UTF-8 is a bit strange and may lead to re-coding if the
|
|
||||||
// input was some other encoding, but it's what previous versions of
|
|
||||||
// this code used. (Even earlier the code used the default encoding,
|
|
||||||
// which has the same problem.) Perhaps using bytes instead of Strings
|
|
||||||
// for the lines would be better.
|
|
||||||
TemporaryBuffer buffer = new TemporaryBuffer.LocalFile(null);
|
TemporaryBuffer buffer = new TemporaryBuffer.LocalFile(null);
|
||||||
try {
|
try {
|
||||||
try (Writer w = new BufferedWriter(
|
try (OutputStream out = buffer) {
|
||||||
new OutputStreamWriter(buffer, StandardCharsets.UTF_8))) {
|
for (Iterator<ByteBuffer> l = newLines.iterator(); l
|
||||||
for (Iterator<String> l = newLines.iterator(); l.hasNext();) {
|
.hasNext();) {
|
||||||
w.write(l.next());
|
ByteBuffer line = l.next();
|
||||||
|
if (line == null) {
|
||||||
|
// Must be the marker for the final newline
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
out.write(line.array(), line.position(),
|
||||||
|
line.limit() - line.position());
|
||||||
if (l.hasNext()) {
|
if (l.hasNext()) {
|
||||||
w.write('\n');
|
out.write('\n');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -698,18 +696,18 @@ && canApplyAt(hunkLines, newLines, 0)) {
|
||||||
fh.getNewMode() == FileMode.EXECUTABLE_FILE);
|
fh.getNewMode() == FileMode.EXECUTABLE_FILE);
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean canApplyAt(List<String> hunkLines, List<String> newLines,
|
private boolean canApplyAt(List<ByteBuffer> hunkLines,
|
||||||
int line) {
|
List<ByteBuffer> newLines, int line) {
|
||||||
int sz = hunkLines.size();
|
int sz = hunkLines.size();
|
||||||
int limit = newLines.size();
|
int limit = newLines.size();
|
||||||
int pos = line;
|
int pos = line;
|
||||||
for (int j = 1; j < sz; j++) {
|
for (int j = 1; j < sz; j++) {
|
||||||
String hunkLine = hunkLines.get(j);
|
ByteBuffer hunkLine = hunkLines.get(j);
|
||||||
switch (hunkLine.charAt(0)) {
|
switch (hunkLine.array()[hunkLine.position()]) {
|
||||||
case ' ':
|
case ' ':
|
||||||
case '-':
|
case '-':
|
||||||
if (pos >= limit
|
if (pos >= limit
|
||||||
|| !newLines.get(pos).equals(hunkLine.substring(1))) {
|
|| !newLines.get(pos).equals(slice(hunkLine, 1))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
pos++;
|
pos++;
|
||||||
|
@ -721,13 +719,9 @@ private boolean canApplyAt(List<String> hunkLines, List<String> newLines,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isChanged(List<String> ol, List<String> nl) {
|
private ByteBuffer slice(ByteBuffer b, int off) {
|
||||||
if (ol.size() != nl.size())
|
int newOffset = b.position() + off;
|
||||||
return true;
|
return ByteBuffer.wrap(b.array(), newOffset, b.limit() - newOffset);
|
||||||
for (int i = 0; i < ol.size(); i++)
|
|
||||||
if (!ol.get(i).equals(nl.get(i)))
|
|
||||||
return true;
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isNoNewlineAtEndOfFile(FileHeader fh) {
|
private boolean isNoNewlineAtEndOfFile(FileHeader fh) {
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2009, Google Inc.
|
* Copyright (C) 2009, Google Inc.
|
||||||
* Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de> and others
|
* Copyright (C) 2008-2021, Johannes E. Schindelin <johannes.schindelin@gmx.de> and others
|
||||||
*
|
*
|
||||||
* This program and the accompanying materials are made available under the
|
* This program and the accompanying materials are made available under the
|
||||||
* terms of the Eclipse Distribution License v. 1.0 which is available at
|
* terms of the Eclipse Distribution License v. 1.0 which is available at
|
||||||
|
@ -16,6 +16,7 @@
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
|
||||||
import org.eclipse.jgit.errors.BinaryBlobException;
|
import org.eclipse.jgit.errors.BinaryBlobException;
|
||||||
import org.eclipse.jgit.errors.LargeObjectException;
|
import org.eclipse.jgit.errors.LargeObjectException;
|
||||||
|
@ -164,6 +165,27 @@ public String getString(int i) {
|
||||||
return getString(i, i + 1, true);
|
return getString(i, i + 1, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the raw text for a single line.
|
||||||
|
*
|
||||||
|
* @param i
|
||||||
|
* index of the line to extract. Note this is 0-based, so line
|
||||||
|
* number 1 is actually index 0.
|
||||||
|
* @return the text for the line, without a trailing LF, as a
|
||||||
|
* {@link ByteBuffer} that is backed by a slice of the
|
||||||
|
* {@link #getRawContent() raw content}, with the buffer's position
|
||||||
|
* on the start of the line and the limit at the end.
|
||||||
|
* @since 5.12
|
||||||
|
*/
|
||||||
|
public ByteBuffer getRawString(int i) {
|
||||||
|
int s = getStart(i);
|
||||||
|
int e = getEnd(i);
|
||||||
|
if (e > 0 && content[e - 1] == '\n') {
|
||||||
|
e--;
|
||||||
|
}
|
||||||
|
return ByteBuffer.wrap(content, s, e - s);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the text for a region of lines.
|
* Get the text for a region of lines.
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue