RevCommit: Better support invalid encoding headers
With this support we no longer need the 'utf-8' alias. UTF-8 will be automatically tried when the encoding header is not recognized and used if the character sequence cleanly decodes as UTF-8. Modernize some of the references to use StandardCharsets. Change-Id: I4c0c88750475560e1f2263180c4a98eb8febeca0
This commit is contained in:
parent
24cd8e170d
commit
31d92ace5b
|
@ -43,13 +43,18 @@
|
|||
|
||||
package org.eclipse.jgit.revwalk;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertSame;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
import java.util.TimeZone;
|
||||
|
||||
import org.eclipse.jgit.junit.RepositoryTestCase;
|
||||
|
@ -303,6 +308,86 @@ public void testParse_explicit_bad_encoded2() throws Exception {
|
|||
assertEquals("\u304d\u308c\u3044\n\nHi\n", c.getFullMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParse_incorrectUtf8Name() throws Exception {
|
||||
ByteArrayOutputStream b = new ByteArrayOutputStream();
|
||||
b.write("tree 9788669ad918b6fcce64af8882fc9a81cb6aba67\n"
|
||||
.getBytes(UTF_8));
|
||||
b.write("author au <a@example.com> 1218123387 +0700\n".getBytes(UTF_8));
|
||||
b.write("committer co <c@example.com> 1218123390 -0500\n"
|
||||
.getBytes(UTF_8));
|
||||
b.write("encoding 'utf8'\n".getBytes(UTF_8));
|
||||
b.write("\n".getBytes(UTF_8));
|
||||
b.write("Sm\u00f6rg\u00e5sbord\n".getBytes(UTF_8));
|
||||
|
||||
RevCommit c = new RevCommit(
|
||||
id("9473095c4cb2f12aefe1db8a355fe3fafba42f67"));
|
||||
c.parseCanonical(new RevWalk(db), b.toByteArray());
|
||||
assertEquals("'utf8'", c.getEncodingName());
|
||||
assertEquals("Sm\u00f6rg\u00e5sbord\n", c.getFullMessage());
|
||||
|
||||
try {
|
||||
c.getEncoding();
|
||||
fail("Expected " + IllegalCharsetNameException.class);
|
||||
} catch (IllegalCharsetNameException badName) {
|
||||
assertEquals("'utf8'", badName.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParse_illegalEncoding() throws Exception {
|
||||
ByteArrayOutputStream b = new ByteArrayOutputStream();
|
||||
b.write("tree 9788669ad918b6fcce64af8882fc9a81cb6aba67\n".getBytes(UTF_8));
|
||||
b.write("author au <a@example.com> 1218123387 +0700\n".getBytes(UTF_8));
|
||||
b.write("committer co <c@example.com> 1218123390 -0500\n".getBytes(UTF_8));
|
||||
b.write("encoding utf-8logoutputencoding=gbk\n".getBytes(UTF_8));
|
||||
b.write("\n".getBytes(UTF_8));
|
||||
b.write("message\n".getBytes(UTF_8));
|
||||
|
||||
RevCommit c = new RevCommit(
|
||||
id("9473095c4cb2f12aefe1db8a355fe3fafba42f67"));
|
||||
c.parseCanonical(new RevWalk(db), b.toByteArray());
|
||||
assertEquals("utf-8logoutputencoding=gbk", c.getEncodingName());
|
||||
assertEquals("message\n", c.getFullMessage());
|
||||
assertEquals("message", c.getShortMessage());
|
||||
assertTrue(c.getFooterLines().isEmpty());
|
||||
assertEquals("au", c.getAuthorIdent().getName());
|
||||
|
||||
try {
|
||||
c.getEncoding();
|
||||
fail("Expected " + IllegalCharsetNameException.class);
|
||||
} catch (IllegalCharsetNameException badName) {
|
||||
assertEquals("utf-8logoutputencoding=gbk", badName.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParse_unsupportedEncoding() throws Exception {
|
||||
ByteArrayOutputStream b = new ByteArrayOutputStream();
|
||||
b.write("tree 9788669ad918b6fcce64af8882fc9a81cb6aba67\n".getBytes(UTF_8));
|
||||
b.write("author au <a@example.com> 1218123387 +0700\n".getBytes(UTF_8));
|
||||
b.write("committer co <c@example.com> 1218123390 -0500\n".getBytes(UTF_8));
|
||||
b.write("encoding it_IT.UTF8\n".getBytes(UTF_8));
|
||||
b.write("\n".getBytes(UTF_8));
|
||||
b.write("message\n".getBytes(UTF_8));
|
||||
|
||||
RevCommit c = new RevCommit(
|
||||
id("9473095c4cb2f12aefe1db8a355fe3fafba42f67"));
|
||||
c.parseCanonical(new RevWalk(db), b.toByteArray());
|
||||
assertEquals("it_IT.UTF8", c.getEncodingName());
|
||||
assertEquals("message\n", c.getFullMessage());
|
||||
assertEquals("message", c.getShortMessage());
|
||||
assertTrue(c.getFooterLines().isEmpty());
|
||||
assertEquals("au", c.getAuthorIdent().getName());
|
||||
|
||||
try {
|
||||
c.getEncoding();
|
||||
fail("Expected " + UnsupportedCharsetException.class);
|
||||
} catch (UnsupportedCharsetException badName) {
|
||||
assertEquals("it_IT.UTF8", badName.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParse_NoMessage() throws Exception {
|
||||
final String msg = "";
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
|
||||
package org.eclipse.jgit.revwalk;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
|
@ -361,6 +362,44 @@ public void testParse_explicit_bad_encoded2() throws Exception {
|
|||
assertEquals("\u304d\u308c\u3044\n\nHi\n", c.getFullMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParse_illegalEncoding() throws Exception {
|
||||
ByteArrayOutputStream b = new ByteArrayOutputStream();
|
||||
b.write("object 9788669ad918b6fcce64af8882fc9a81cb6aba67\n".getBytes(UTF_8));
|
||||
b.write("type tree\n".getBytes(UTF_8));
|
||||
b.write("tag v1.0\n".getBytes(UTF_8));
|
||||
b.write("tagger t <t@example.com> 1218123387 +0700\n".getBytes(UTF_8));
|
||||
b.write("encoding utf-8logoutputencoding=gbk\n".getBytes(UTF_8));
|
||||
b.write("\n".getBytes(UTF_8));
|
||||
b.write("message\n".getBytes(UTF_8));
|
||||
|
||||
RevTag t = new RevTag(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67"));
|
||||
t.parseCanonical(new RevWalk(db), b.toByteArray());
|
||||
|
||||
assertEquals("t", t.getTaggerIdent().getName());
|
||||
assertEquals("message", t.getShortMessage());
|
||||
assertEquals("message\n", t.getFullMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParse_unsupportedEncoding() throws Exception {
|
||||
ByteArrayOutputStream b = new ByteArrayOutputStream();
|
||||
b.write("object 9788669ad918b6fcce64af8882fc9a81cb6aba67\n".getBytes(UTF_8));
|
||||
b.write("type tree\n".getBytes(UTF_8));
|
||||
b.write("tag v1.0\n".getBytes(UTF_8));
|
||||
b.write("tagger t <t@example.com> 1218123387 +0700\n".getBytes(UTF_8));
|
||||
b.write("encoding it_IT.UTF8\n".getBytes(UTF_8));
|
||||
b.write("\n".getBytes(UTF_8));
|
||||
b.write("message\n".getBytes(UTF_8));
|
||||
|
||||
RevTag t = new RevTag(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67"));
|
||||
t.parseCanonical(new RevWalk(db), b.toByteArray());
|
||||
|
||||
assertEquals("t", t.getTaggerIdent().getName());
|
||||
assertEquals("message", t.getShortMessage());
|
||||
assertEquals("message\n", t.getFullMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParse_NoMessage() throws Exception {
|
||||
final String msg = "";
|
||||
|
|
|
@ -44,12 +44,17 @@
|
|||
|
||||
package org.eclipse.jgit.revwalk;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.eclipse.jgit.annotations.Nullable;
|
||||
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
||||
import org.eclipse.jgit.errors.MissingObjectException;
|
||||
import org.eclipse.jgit.lib.AnyObjectId;
|
||||
|
@ -441,12 +446,12 @@ public final PersonIdent getCommitterIdent() {
|
|||
* @return decoded commit message as a string. Never null.
|
||||
*/
|
||||
public final String getFullMessage() {
|
||||
final byte[] raw = buffer;
|
||||
final int msgB = RawParseUtils.commitMessage(raw, 0);
|
||||
if (msgB < 0)
|
||||
byte[] raw = buffer;
|
||||
int msgB = RawParseUtils.commitMessage(raw, 0);
|
||||
if (msgB < 0) {
|
||||
return ""; //$NON-NLS-1$
|
||||
final Charset enc = RawParseUtils.parseEncoding(raw);
|
||||
return RawParseUtils.decode(enc, raw, msgB, raw.length);
|
||||
}
|
||||
return RawParseUtils.decode(guessEncoding(), raw, msgB, raw.length);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -465,16 +470,17 @@ public final String getFullMessage() {
|
|||
* spanned multiple lines. Embedded LFs are converted to spaces.
|
||||
*/
|
||||
public final String getShortMessage() {
|
||||
final byte[] raw = buffer;
|
||||
final int msgB = RawParseUtils.commitMessage(raw, 0);
|
||||
if (msgB < 0)
|
||||
byte[] raw = buffer;
|
||||
int msgB = RawParseUtils.commitMessage(raw, 0);
|
||||
if (msgB < 0) {
|
||||
return ""; //$NON-NLS-1$
|
||||
}
|
||||
|
||||
final Charset enc = RawParseUtils.parseEncoding(raw);
|
||||
final int msgE = RawParseUtils.endOfParagraph(raw, msgB);
|
||||
String str = RawParseUtils.decode(enc, raw, msgB, msgE);
|
||||
if (hasLF(raw, msgB, msgE))
|
||||
int msgE = RawParseUtils.endOfParagraph(raw, msgB);
|
||||
String str = RawParseUtils.decode(guessEncoding(), raw, msgB, msgE);
|
||||
if (hasLF(raw, msgB, msgE)) {
|
||||
str = StringUtils.replaceLineBreaksWithSpace(str);
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
|
@ -485,6 +491,23 @@ static boolean hasLF(final byte[] r, int b, final int e) {
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the encoding of the commit message buffer.
|
||||
* <p>
|
||||
* Locates the "encoding" header (if present) and returns its value. Due to
|
||||
* corruption in the wild this may be an invalid encoding name that is not
|
||||
* recognized by any character encoding library.
|
||||
* <p>
|
||||
* If no encoding header is present, null.
|
||||
*
|
||||
* @return the preferred encoding of {@link #getRawBuffer()}; or null.
|
||||
* @since 4.2
|
||||
*/
|
||||
@Nullable
|
||||
public final String getEncodingName() {
|
||||
return RawParseUtils.parseEncodingName(buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the encoding of the commit message buffer.
|
||||
* <p>
|
||||
|
@ -492,14 +515,28 @@ static boolean hasLF(final byte[] r, int b, final int e) {
|
|||
* character set to apply to this buffer to evaluate its contents as
|
||||
* character data.
|
||||
* <p>
|
||||
* If no encoding header is present, {@link Constants#CHARSET} is assumed.
|
||||
* If no encoding header is present {@code UTF-8} is assumed.
|
||||
*
|
||||
* @return the preferred encoding of {@link #getRawBuffer()}.
|
||||
* @throws IllegalCharsetNameException
|
||||
* if the character set requested by the encoding header is
|
||||
* malformed and unsupportable.
|
||||
* @throws UnsupportedCharsetException
|
||||
* if the JRE does not support the character set requested by
|
||||
* the encoding header.
|
||||
*/
|
||||
public final Charset getEncoding() {
|
||||
return RawParseUtils.parseEncoding(buffer);
|
||||
}
|
||||
|
||||
private Charset guessEncoding() {
|
||||
try {
|
||||
return getEncoding();
|
||||
} catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
|
||||
return UTF_8;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the footer lines (e.g. "Signed-off-by") for machine processing.
|
||||
* <p>
|
||||
|
@ -529,7 +566,7 @@ public final List<FooterLine> getFooterLines() {
|
|||
|
||||
final int msgB = RawParseUtils.commitMessage(raw, 0);
|
||||
final ArrayList<FooterLine> r = new ArrayList<FooterLine>(4);
|
||||
final Charset enc = getEncoding();
|
||||
final Charset enc = guessEncoding();
|
||||
for (;;) {
|
||||
ptr = RawParseUtils.prevLF(raw, ptr);
|
||||
if (ptr <= msgB)
|
||||
|
|
|
@ -45,8 +45,12 @@
|
|||
|
||||
package org.eclipse.jgit.revwalk;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
|
||||
import org.eclipse.jgit.errors.CorruptObjectException;
|
||||
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
|
||||
|
@ -162,7 +166,7 @@ void parseCanonical(final RevWalk walk, final byte[] rawTag)
|
|||
|
||||
int p = pos.value += 4; // "tag "
|
||||
final int nameEnd = RawParseUtils.nextLF(rawTag, p) - 1;
|
||||
tagName = RawParseUtils.decode(Constants.CHARSET, rawTag, p, nameEnd);
|
||||
tagName = RawParseUtils.decode(UTF_8, rawTag, p, nameEnd);
|
||||
|
||||
if (walk.isRetainBody())
|
||||
buffer = rawTag;
|
||||
|
@ -207,12 +211,12 @@ public final PersonIdent getTaggerIdent() {
|
|||
* @return decoded tag message as a string. Never null.
|
||||
*/
|
||||
public final String getFullMessage() {
|
||||
final byte[] raw = buffer;
|
||||
final int msgB = RawParseUtils.tagMessage(raw, 0);
|
||||
if (msgB < 0)
|
||||
byte[] raw = buffer;
|
||||
int msgB = RawParseUtils.tagMessage(raw, 0);
|
||||
if (msgB < 0) {
|
||||
return ""; //$NON-NLS-1$
|
||||
final Charset enc = RawParseUtils.parseEncoding(raw);
|
||||
return RawParseUtils.decode(enc, raw, msgB, raw.length);
|
||||
}
|
||||
return RawParseUtils.decode(guessEncoding(), raw, msgB, raw.length);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -231,19 +235,28 @@ public final String getFullMessage() {
|
|||
* multiple lines. Embedded LFs are converted to spaces.
|
||||
*/
|
||||
public final String getShortMessage() {
|
||||
final byte[] raw = buffer;
|
||||
final int msgB = RawParseUtils.tagMessage(raw, 0);
|
||||
if (msgB < 0)
|
||||
byte[] raw = buffer;
|
||||
int msgB = RawParseUtils.tagMessage(raw, 0);
|
||||
if (msgB < 0) {
|
||||
return ""; //$NON-NLS-1$
|
||||
}
|
||||
|
||||
final Charset enc = RawParseUtils.parseEncoding(raw);
|
||||
final int msgE = RawParseUtils.endOfParagraph(raw, msgB);
|
||||
String str = RawParseUtils.decode(enc, raw, msgB, msgE);
|
||||
if (RevCommit.hasLF(raw, msgB, msgE))
|
||||
int msgE = RawParseUtils.endOfParagraph(raw, msgB);
|
||||
String str = RawParseUtils.decode(guessEncoding(), raw, msgB, msgE);
|
||||
if (RevCommit.hasLF(raw, msgB, msgE)) {
|
||||
str = StringUtils.replaceLineBreaksWithSpace(str);
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
private Charset guessEncoding() {
|
||||
try {
|
||||
return RawParseUtils.parseEncoding(buffer);
|
||||
} catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
|
||||
return UTF_8;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a reference to the object this tag was placed on.
|
||||
* <p>
|
||||
|
|
|
@ -44,6 +44,8 @@
|
|||
|
||||
package org.eclipse.jgit.util;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.ISO_8859_1;
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
import static org.eclipse.jgit.lib.ObjectChecker.author;
|
||||
import static org.eclipse.jgit.lib.ObjectChecker.committer;
|
||||
import static org.eclipse.jgit.lib.ObjectChecker.encoding;
|
||||
|
@ -60,6 +62,7 @@
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.eclipse.jgit.annotations.Nullable;
|
||||
import org.eclipse.jgit.lib.Constants;
|
||||
import org.eclipse.jgit.lib.PersonIdent;
|
||||
|
||||
|
@ -70,7 +73,7 @@ public final class RawParseUtils {
|
|||
*
|
||||
* @since 2.2
|
||||
*/
|
||||
public static final Charset UTF8_CHARSET = Charset.forName("UTF-8"); //$NON-NLS-1$
|
||||
public static final Charset UTF8_CHARSET = UTF_8;
|
||||
|
||||
private static final byte[] digits10;
|
||||
|
||||
|
@ -81,9 +84,9 @@ public final class RawParseUtils {
|
|||
private static final Map<String, Charset> encodingAliases;
|
||||
|
||||
static {
|
||||
encodingAliases = new HashMap<String, Charset>();
|
||||
encodingAliases.put("latin-1", Charset.forName("ISO-8859-1")); //$NON-NLS-1$ //$NON-NLS-2$
|
||||
encodingAliases.put("'utf8'", Charset.forName("UTF-8")); //$NON-NLS-1$ //$NON-NLS-2$
|
||||
encodingAliases = new HashMap<>();
|
||||
encodingAliases.put("latin-1", ISO_8859_1); //$NON-NLS-1$
|
||||
encodingAliases.put("iso-latin-1", ISO_8859_1); //$NON-NLS-1$
|
||||
|
||||
digits10 = new byte['9' + 1];
|
||||
Arrays.fill(digits10, (byte) -1);
|
||||
|
@ -671,6 +674,27 @@ public static final int encoding(final byte[] b, int ptr) {
|
|||
return match(b, ptr, encoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the "encoding " header as a string.
|
||||
* <p>
|
||||
* Locates the "encoding " header (if present) and returns its value.
|
||||
*
|
||||
* @param b
|
||||
* buffer to scan.
|
||||
* @return the encoding header as specified in the commit; null if the
|
||||
* header was not present and should be assumed.
|
||||
* @since 4.2
|
||||
*/
|
||||
@Nullable
|
||||
public static String parseEncodingName(final byte[] b) {
|
||||
int enc = encoding(b, 0);
|
||||
if (enc < 0) {
|
||||
return null;
|
||||
}
|
||||
int lf = nextLF(b, enc);
|
||||
return decode(UTF_8, b, enc, lf - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the "encoding " header into a character set reference.
|
||||
* <p>
|
||||
|
@ -678,29 +702,33 @@ public static final int encoding(final byte[] b, int ptr) {
|
|||
* {@link #encoding(byte[], int)} and then returns the proper character set
|
||||
* to apply to this buffer to evaluate its contents as character data.
|
||||
* <p>
|
||||
* If no encoding header is present, {@link Constants#CHARSET} is assumed.
|
||||
* If no encoding header is present {@code UTF-8} is assumed.
|
||||
*
|
||||
* @param b
|
||||
* buffer to scan.
|
||||
* @return the Java character set representation. Never null.
|
||||
* @throws IllegalCharsetNameException
|
||||
* if the character set requested by the encoding header is
|
||||
* malformed and unsupportable.
|
||||
* @throws UnsupportedCharsetException
|
||||
* if the JRE does not support the character set requested by
|
||||
* the encoding header.
|
||||
*/
|
||||
public static Charset parseEncoding(final byte[] b) {
|
||||
final int enc = encoding(b, 0);
|
||||
if (enc < 0)
|
||||
return Constants.CHARSET;
|
||||
final int lf = nextLF(b, enc);
|
||||
String decoded = decode(Constants.CHARSET, b, enc, lf - 1);
|
||||
String enc = parseEncodingName(b);
|
||||
if (enc == null) {
|
||||
return UTF_8;
|
||||
}
|
||||
|
||||
String name = enc.trim();
|
||||
try {
|
||||
return Charset.forName(decoded);
|
||||
} catch (IllegalCharsetNameException badName) {
|
||||
Charset aliased = charsetForAlias(decoded);
|
||||
if (aliased != null)
|
||||
return aliased;
|
||||
throw badName;
|
||||
} catch (UnsupportedCharsetException badName) {
|
||||
Charset aliased = charsetForAlias(decoded);
|
||||
if (aliased != null)
|
||||
return Charset.forName(name);
|
||||
} catch (IllegalCharsetNameException
|
||||
| UnsupportedCharsetException badName) {
|
||||
Charset aliased = charsetForAlias(name);
|
||||
if (aliased != null) {
|
||||
return aliased;
|
||||
}
|
||||
throw badName;
|
||||
}
|
||||
}
|
||||
|
@ -739,7 +767,15 @@ public static PersonIdent parsePersonIdent(final String in) {
|
|||
* parsed.
|
||||
*/
|
||||
public static PersonIdent parsePersonIdent(final byte[] raw, final int nameB) {
|
||||
final Charset cs = parseEncoding(raw);
|
||||
Charset cs;
|
||||
try {
|
||||
cs = parseEncoding(raw);
|
||||
} catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
|
||||
// Assume UTF-8 for person identities, usually this is correct.
|
||||
// If not decode() will fall back to the ISO-8859-1 encoding.
|
||||
cs = UTF_8;
|
||||
}
|
||||
|
||||
final int emailB = nextLF(raw, nameB, '<');
|
||||
final int emailE = nextLF(raw, emailB, '>');
|
||||
if (emailB >= raw.length || raw[emailB] == '\n' ||
|
||||
|
@ -887,7 +923,7 @@ public static String decode(final byte[] buffer) {
|
|||
*/
|
||||
public static String decode(final byte[] buffer, final int start,
|
||||
final int end) {
|
||||
return decode(Constants.CHARSET, buffer, start, end);
|
||||
return decode(UTF_8, buffer, start, end);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -961,23 +997,21 @@ public static String decode(final Charset cs, final byte[] buffer,
|
|||
public static String decodeNoFallback(final Charset cs,
|
||||
final byte[] buffer, final int start, final int end)
|
||||
throws CharacterCodingException {
|
||||
final ByteBuffer b = ByteBuffer.wrap(buffer, start, end - start);
|
||||
ByteBuffer b = ByteBuffer.wrap(buffer, start, end - start);
|
||||
b.mark();
|
||||
|
||||
// Try our built-in favorite. The assumption here is that
|
||||
// decoding will fail if the data is not actually encoded
|
||||
// using that encoder.
|
||||
//
|
||||
try {
|
||||
return decode(b, Constants.CHARSET);
|
||||
return decode(b, UTF_8);
|
||||
} catch (CharacterCodingException e) {
|
||||
b.reset();
|
||||
}
|
||||
|
||||
if (!cs.equals(Constants.CHARSET)) {
|
||||
if (!cs.equals(UTF_8)) {
|
||||
// Try the suggested encoding, it might be right since it was
|
||||
// provided by the caller.
|
||||
//
|
||||
try {
|
||||
return decode(b, cs);
|
||||
} catch (CharacterCodingException e) {
|
||||
|
@ -987,9 +1021,8 @@ public static String decodeNoFallback(final Charset cs,
|
|||
|
||||
// Try the default character set. A small group of people
|
||||
// might actually use the same (or very similar) locale.
|
||||
//
|
||||
final Charset defcs = Charset.defaultCharset();
|
||||
if (!defcs.equals(cs) && !defcs.equals(Constants.CHARSET)) {
|
||||
Charset defcs = Charset.defaultCharset();
|
||||
if (!defcs.equals(cs) && !defcs.equals(UTF_8)) {
|
||||
try {
|
||||
return decode(b, defcs);
|
||||
} catch (CharacterCodingException e) {
|
||||
|
|
Loading…
Reference in New Issue