diff --git a/org.eclipse.jgit.test/.settings/org.eclipse.core.resources.prefs b/org.eclipse.jgit.test/.settings/org.eclipse.core.resources.prefs index 6a9621db1..cddb99d1d 100644 --- a/org.eclipse.jgit.test/.settings/org.eclipse.core.resources.prefs +++ b/org.eclipse.jgit.test/.settings/org.eclipse.core.resources.prefs @@ -1,5 +1,6 @@ -#Sat Dec 20 21:21:24 CET 2008 eclipse.preferences.version=1 +encoding//tst-rsrc/org/eclipse/jgit/diff/umlaut.patch=ISO-8859-1 +encoding//tst-rsrc/org/eclipse/jgit/diff/umlaut_PostImage=ISO-8859-1 encoding//tst-rsrc/org/eclipse/jgit/patch/testGetText_BothISO88591.patch=ISO-8859-1 encoding//tst-rsrc/org/eclipse/jgit/patch/testGetText_Convert.patch=ISO-8859-1 encoding//tst-rsrc/org/eclipse/jgit/patch/testGetText_DiffCc.patch=ISO-8859-1 diff --git a/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut.patch b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut.patch new file mode 100644 index 000000000..7380dbed8 Binary files /dev/null and b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut.patch differ diff --git a/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut_PostImage b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut_PostImage new file mode 100644 index 000000000..557f72f51 Binary files /dev/null and b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut_PostImage differ diff --git a/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut_PreImage b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut_PreImage new file mode 100644 index 000000000..003a05411 Binary files /dev/null and b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/diff/umlaut_PreImage differ diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/ApplyCommandTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/ApplyCommandTest.java index e9b8924e6..b997ac009 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/ApplyCommandTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/ApplyCommandTest.java @@ -287,6 +287,14 @@ public void testBinaryLiteralAdd() throws Exception { checkBinary("literal_add", false); } + @Test + public void testEncodingChange() throws Exception { + // This is a text patch that changes a file containing ÄÖÜ in UTF-8 to + // the same characters in ISO-8859-1. The patch file itself uses mixed + // encoding. Since checkFile() works with strings use the binary check. + checkBinary("umlaut", true); + } + @Test public void testAddA1() throws Exception { ApplyResult result = init("A1", false, true); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/api/ApplyCommand.java b/org.eclipse.jgit/src/org/eclipse/jgit/api/ApplyCommand.java index f7eba88ac..ec53412fc 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/api/ApplyCommand.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/api/ApplyCommand.java @@ -10,7 +10,6 @@ package org.eclipse.jgit.api; import java.io.BufferedInputStream; -import java.io.BufferedWriter; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; @@ -18,9 +17,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.nio.charset.StandardCharsets; +import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.StandardCopyOption; import java.text.MessageFormat; @@ -549,11 +546,11 @@ private void applyBinary(Repository repository, String path, File f, private void applyText(Repository repository, String path, RawText rt, File f, FileHeader fh, CheckoutMetadata checkOut) throws IOException, PatchApplyException { - List oldLines = new ArrayList<>(rt.size()); + List oldLines = new ArrayList<>(rt.size()); for (int i = 0; i < rt.size(); i++) { - oldLines.add(rt.getString(i)); + oldLines.add(rt.getRawString(i)); } - List newLines = new ArrayList<>(oldLines); + List newLines = new ArrayList<>(oldLines); int afterLastHunk = 0; int lineNumberShift = 0; int lastHunkNewLine = -1; @@ -571,9 +568,9 @@ private void applyText(Repository repository, String path, RawText rt, b.length); RawText hrt = new RawText(b); - List hunkLines = new ArrayList<>(hrt.size()); + List hunkLines = new ArrayList<>(hrt.size()); for (int i = 0; i < hrt.size(); i++) { - hunkLines.add(hrt.getString(i)); + hunkLines.add(hrt.getRawString(i)); } if (hh.getNewStartLine() == 0) { @@ -642,8 +639,8 @@ && canApplyAt(hunkLines, newLines, 0)) { lineNumberShift = applyAt - hh.getNewStartLine() + 1; int sz = hunkLines.size(); for (int j = 1; j < sz; j++) { - String hunkLine = hunkLines.get(j); - switch (hunkLine.charAt(0)) { + ByteBuffer hunkLine = hunkLines.get(j); + switch (hunkLine.array()[hunkLine.position()]) { case ' ': applyAt++; break; @@ -651,7 +648,7 @@ && canApplyAt(hunkLines, newLines, 0)) { newLines.remove(applyAt); break; case '+': - newLines.add(applyAt++, hunkLine.substring(1)); + newLines.add(applyAt++, slice(hunkLine, 1)); break; default: break; @@ -660,28 +657,29 @@ && canApplyAt(hunkLines, newLines, 0)) { afterLastHunk = applyAt; } if (!isNoNewlineAtEndOfFile(fh)) { - newLines.add(""); //$NON-NLS-1$ + newLines.add(null); } if (!rt.isMissingNewlineAtEnd()) { - oldLines.add(""); //$NON-NLS-1$ + oldLines.add(null); } - if (!isChanged(oldLines, newLines)) { - return; // Don't touch the file + if (oldLines.equals(newLines)) { + return; // Unchanged; don't touch the file } - // TODO: forcing UTF-8 is a bit strange and may lead to re-coding if the - // input was some other encoding, but it's what previous versions of - // this code used. (Even earlier the code used the default encoding, - // which has the same problem.) Perhaps using bytes instead of Strings - // for the lines would be better. TemporaryBuffer buffer = new TemporaryBuffer.LocalFile(null); try { - try (Writer w = new BufferedWriter( - new OutputStreamWriter(buffer, StandardCharsets.UTF_8))) { - for (Iterator l = newLines.iterator(); l.hasNext();) { - w.write(l.next()); + try (OutputStream out = buffer) { + for (Iterator l = newLines.iterator(); l + .hasNext();) { + ByteBuffer line = l.next(); + if (line == null) { + // Must be the marker for the final newline + break; + } + out.write(line.array(), line.position(), + line.limit() - line.position()); if (l.hasNext()) { - w.write('\n'); + out.write('\n'); } } } @@ -698,18 +696,18 @@ && canApplyAt(hunkLines, newLines, 0)) { fh.getNewMode() == FileMode.EXECUTABLE_FILE); } - private boolean canApplyAt(List hunkLines, List newLines, - int line) { + private boolean canApplyAt(List hunkLines, + List newLines, int line) { int sz = hunkLines.size(); int limit = newLines.size(); int pos = line; for (int j = 1; j < sz; j++) { - String hunkLine = hunkLines.get(j); - switch (hunkLine.charAt(0)) { + ByteBuffer hunkLine = hunkLines.get(j); + switch (hunkLine.array()[hunkLine.position()]) { case ' ': case '-': if (pos >= limit - || !newLines.get(pos).equals(hunkLine.substring(1))) { + || !newLines.get(pos).equals(slice(hunkLine, 1))) { return false; } pos++; @@ -721,13 +719,9 @@ private boolean canApplyAt(List hunkLines, List newLines, return true; } - private static boolean isChanged(List ol, List nl) { - if (ol.size() != nl.size()) - return true; - for (int i = 0; i < ol.size(); i++) - if (!ol.get(i).equals(nl.get(i))) - return true; - return false; + private ByteBuffer slice(ByteBuffer b, int off) { + int newOffset = b.position() + off; + return ByteBuffer.wrap(b.array(), newOffset, b.limit() - newOffset); } private boolean isNoNewlineAtEndOfFile(FileHeader fh) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index 9f4b1fa49..d09da019d 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -1,6 +1,6 @@ /* * Copyright (C) 2009, Google Inc. - * Copyright (C) 2008-2009, Johannes E. Schindelin and others + * Copyright (C) 2008-2021, Johannes E. Schindelin and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at @@ -16,6 +16,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.ByteBuffer; import org.eclipse.jgit.errors.BinaryBlobException; import org.eclipse.jgit.errors.LargeObjectException; @@ -164,6 +165,27 @@ public String getString(int i) { return getString(i, i + 1, true); } + /** + * Get the raw text for a single line. + * + * @param i + * index of the line to extract. Note this is 0-based, so line + * number 1 is actually index 0. + * @return the text for the line, without a trailing LF, as a + * {@link ByteBuffer} that is backed by a slice of the + * {@link #getRawContent() raw content}, with the buffer's position + * on the start of the line and the limit at the end. + * @since 5.12 + */ + public ByteBuffer getRawString(int i) { + int s = getStart(i); + int e = getEnd(i); + if (e > 0 && content[e - 1] == '\n') { + e--; + } + return ByteBuffer.wrap(content, s, e - s); + } + /** * Get the text for a region of lines. *