Make Repository.normalizeBranchName less strict

This operation was added recently with the goal to provide some
way to auto-correct invalid user input, or to provide a correction
suggestion to the user -- EGit uses it now that way. But the initial
implementation was very restrictive; it removed all non-ASCII
characters and even slashes.

Understandably end users were not happy with that. Git has no such
restriction to ASCII-only; nor does JGit. Branch names should be
meaningful to the end user, and if a user-supplied branch name is
invalid for technical reasons, a "normalized" name should still
be meaningful to the user.

Rewrite to attempt a minimal fix such that the result will pass
isValidRefName.

* Replace all Unicode whitespace by underscore.
* Replace troublesome special characters by dash.
* Collapse sequences of underscores, dots, and dashes.
* Remove underscores, dots, and dashes following slashes, and
  collapse sequences of slashes.
* Strip leading and trailing sequences of slashes, dots, dashes,
  and underscores.
* Avoid the ".lock" extension.
* Avoid the Windows reserved device names.
* If input name is null return an empty String so callers don't need to
check for null.

This still allows branch names with single slashes as separators
between components, avoids some pitfalls that isValidRefName() tests
for, and leaves other character untouched and thus allows non-ASCII
branch names.

Also move the function from the bottom of the file up to where
isValidRefName is implemented.

Bug: 512508
Change-Id: Ia0576d9b2489162208c05e51c6d54e9f0c88c3a7
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
This commit is contained in:
Thomas Wolf 2017-02-23 22:49:43 +01:00 committed by Matthias Sohn
parent 45f62576de
commit 1f3e74ed9f
2 changed files with 170 additions and 128 deletions

View File

@ -44,6 +44,7 @@
package org.eclipse.jgit.lib;
import static org.eclipse.jgit.junit.Assert.assertEquals;
import static org.junit.Assert.assertEquals;
import org.eclipse.jgit.junit.MockSystemReader;
import org.eclipse.jgit.util.SystemReader;
@ -94,6 +95,25 @@ private static void assertInvalidOnWindows(final String name) {
}
}
private static void assertNormalized(final String name,
final String expected) {
SystemReader instance = SystemReader.getInstance();
try {
setUnixSystemReader();
String normalized = Repository.normalizeBranchName(name);
assertEquals("Normalization of " + name, expected, normalized);
assertEquals("\"" + normalized + "\"", true,
Repository.isValidRefName(Constants.R_HEADS + normalized));
setWindowsSystemReader();
normalized = Repository.normalizeBranchName(name);
assertEquals("Normalization of " + name, expected, normalized);
assertEquals("\"" + normalized + "\"", true,
Repository.isValidRefName(Constants.R_HEADS + normalized));
} finally {
SystemReader.setInstance(instance);
}
}
@Test
public void testEmptyString() {
assertValid(false, "");
@ -250,97 +270,56 @@ public void testWindowsReservedNames() {
@Test
public void testNormalizeBranchName() {
assertEquals(true, Repository.normalizeBranchName(null) == "");
assertEquals(true, Repository.normalizeBranchName("").equals(""));
assertNormalized("Bug 12345::::Hello World", "Bug_12345-Hello_World");
assertNormalized("Bug 12345 :::: Hello World", "Bug_12345_Hello_World");
assertNormalized("Bug 12345 :::: Hello::: World",
"Bug_12345_Hello-World");
assertNormalized(":::Bug 12345 - Hello World", "Bug_12345_Hello_World");
assertNormalized("---Bug 12345 - Hello World", "Bug_12345_Hello_World");
assertNormalized("Bug 12345 ---- Hello --- World",
"Bug_12345_Hello_World");
assertNormalized("Bug 12345 - Hello World!", "Bug_12345_Hello_World!");
assertNormalized("Bug 12345 : Hello World!", "Bug_12345_Hello_World!");
assertNormalized("Bug 12345 _ Hello World!", "Bug_12345_Hello_World!");
assertNormalized("Bug 12345 - Hello World!",
"Bug_12345_Hello_World!");
assertNormalized(" Bug 12345 - Hello World! ",
"Bug_12345_Hello_World!");
assertNormalized(" Bug 12345 - Hello World! ",
"Bug_12345_Hello_World!");
assertNormalized("Bug 12345 - Hello______ World!",
"Bug_12345_Hello_World!");
assertNormalized("_Bug 12345 - Hello World!", "Bug_12345_Hello_World!");
}
assertEquals(true,
Repository.normalizeBranchName("__@#$@#$@$____ _")
.equals(""));
@Test
public void testNormalizeWithSlashes() {
assertNormalized("foo/bar/baz", "foo/bar/baz");
assertNormalized("foo/bar.lock/baz.lock", "foo/bar_lock/baz_lock");
assertNormalized("foo/.git/.git~1/bar", "foo/git/git-1/bar");
assertNormalized(".foo/aux/con/com3.txt/com0/prn/lpt1",
"foo/+aux/+con/+com3.txt/com0/+prn/+lpt1");
assertNormalized("foo/../bar///.--ba...z", "foo/bar/ba.z");
}
assertEquals(true,
Repository.normalizeBranchName("~`!@#$%^&*()_+}]{[|\\\";?>.<,/")
.equals(""));
assertEquals(true,
Repository.normalizeBranchName("Bug 12345 :::: Hello World")
.equals("Bug_12345-Hello_World"));
assertEquals(true,
Repository.normalizeBranchName("Bug 12345 :::: Hello::: World")
.equals("Bug_12345-Hello-_World"));
assertEquals(true,
Repository.normalizeBranchName(":::Bug 12345 - Hello World")
.equals("Bug_12345-Hello_World"));
assertEquals(true,
Repository.normalizeBranchName("---Bug 12345 - Hello World")
.equals("Bug_12345-Hello_World"));
assertEquals(true,
Repository.normalizeBranchName("Bug 12345 ---- Hello --- World")
.equals("Bug_12345-Hello-World"));
assertEquals(true, Repository.normalizeBranchName(null) == null);
assertEquals(true,
Repository.normalizeBranchName("Bug 12345 - Hello World!")
.equals("Bug_12345-Hello_World"));
assertEquals(true,
Repository.normalizeBranchName("Bug 12345 : Hello World!")
.equals("Bug_12345-Hello_World"));
assertEquals(true,
Repository.normalizeBranchName("Bug 12345 _ Hello World!")
.equals("Bug_12345_Hello_World"));
assertEquals(true,
Repository
.normalizeBranchName("Bug 12345 - Hello World!")
.equals("Bug_12345-Hello_World"));
assertEquals(true,
Repository.normalizeBranchName(" Bug 12345 - Hello World! ")
.equals("Bug_12345-Hello_World"));
assertEquals(true,
Repository
.normalizeBranchName(" Bug 12345 - Hello World! ")
.equals("Bug_12345-Hello_World"));
assertEquals(true,
Repository
.normalizeBranchName(
"Bug 12345 - Hello______ World!")
.equals("Bug_12345-Hello_World"));
assertEquals(true,
Repository.normalizeBranchName("_Bug 12345 - Hello World!")
.equals("Bug_12345-Hello_World"));
assertEquals(true,
Repository
.normalizeBranchName(
"Bug 12345 - Hello Wo!@#$%^&*(rld {@")
.equals("Bug_12345-Hello_World_"));
assertEquals(true,
Repository.normalizeBranchName("Bug 1#$ 2345 - Hello World")
.equals("Bug_12345-Hello_World"));
@Test
public void testNormalizeWithUnicode() {
assertNormalized("f\u00f6\u00f6/.b\u00e0r/*[<>|^~/b\u00e9\\z",
"f\u00f6\u00f6/b\u00e0r/b\u00e9-z");
assertNormalized("\u5165\u53e3 entrance;/.\u51fa\u53e3_*ex*it*/",
"\u5165\u53e3_entrance;/\u51fa\u53e3_ex-it");
}
@Test
public void testNormalizeAlreadyValidRefName() {
assertEquals(true,
Repository.normalizeBranchName("refs/heads/m.a.s.t.e.r")
.equals("refs/heads/m.a.s.t.e.r"));
assertNormalized("refs/heads/m.a.s.t.e.r", "refs/heads/m.a.s.t.e.r");
assertNormalized("refs/tags/v1.0-20170223", "refs/tags/v1.0-20170223");
}
@Test
public void testNormalizeTrimmedUnicodeAlreadyValidRefName() {
assertEquals(true,
Repository.normalizeBranchName(" \u00e5ngstr\u00f6m\t")
.equals("\u00e5ngstr\u00f6m"));
assertNormalized(" \u00e5ngstr\u00f6m\t", "\u00e5ngstr\u00f6m");
}
}

View File

@ -66,6 +66,7 @@
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import org.eclipse.jgit.annotations.NonNull;
import org.eclipse.jgit.annotations.Nullable;
@ -111,6 +112,17 @@ public abstract class Repository implements AutoCloseable {
private static final Logger LOG = LoggerFactory.getLogger(Repository.class);
private static final ListenerList globalListeners = new ListenerList();
/**
* Branch names containing slashes should not have a name component that is
* one of the reserved device names on Windows.
*
* @see #normalizeBranchName(String)
*/
private static final Pattern FORBIDDEN_BRANCH_NAME_COMPONENTS = Pattern
.compile(
"(^|/)(aux|com[1-9]|con|lpt[1-9]|nul|prn)(\\.[^/]*)?", //$NON-NLS-1$
Pattern.CASE_INSENSITIVE);
/** @return the global listener list observing all events in this JVM. */
public static ListenerList getGlobalListenerList() {
return globalListeners;
@ -923,7 +935,7 @@ public String toString() {
else
desc = getClass().getSimpleName() + "-" //$NON-NLS-1$
+ System.identityHashCode(this);
return "Repository[" + desc + "]"; //$NON-NLS-1$
return "Repository[" + desc + "]"; //$NON-NLS-1$ //$NON-NLS-2$
}
/**
@ -1332,13 +1344,106 @@ public static boolean isValidRefName(final String refName) {
return components > 1;
}
/**
* Normalizes the passed branch name into a possible valid branch name. The
* validity of the returned name should be checked by a subsequent call to
* {@link #isValidRefName(String)}.
* <p/>
* Future implementations of this method could be more restrictive or more
* lenient about the validity of specific characters in the returned name.
* <p/>
* The current implementation returns the trimmed input string if this is
* already a valid branch name. Otherwise it returns a trimmed string with
* special characters not allowed by {@link #isValidRefName(String)}
* replaced by hyphens ('-') and blanks replaced by underscores ('_').
* Leading and trailing slashes, dots, hyphens, and underscores are removed.
*
* @param name
* to normalize
*
* @return The normalized name or an empty String if it is {@code null} or
* empty.
* @since 4.7
* @see #isValidRefName(String)
*/
public static String normalizeBranchName(String name) {
if (name == null || name.isEmpty()) {
return ""; //$NON-NLS-1$
}
String result = name.trim();
String fullName = result.startsWith(Constants.R_HEADS) ? result
: Constants.R_HEADS + result;
if (isValidRefName(fullName)) {
return result;
}
// All Unicode blanks to underscore
result = result.replaceAll("(?:\\h|\\v)+", "_"); //$NON-NLS-1$ //$NON-NLS-2$
StringBuilder b = new StringBuilder();
char p = '/';
for (int i = 0, len = result.length(); i < len; i++) {
char c = result.charAt(i);
if (c < ' ' || c == 127) {
continue;
}
// Substitute a dash for problematic characters
switch (c) {
case '\\':
case '^':
case '~':
case ':':
case '?':
case '*':
case '[':
case '@':
case '<':
case '>':
case '|':
case '"':
c = '-';
break;
default:
break;
}
// Collapse multiple slashes, dashes, dots, underscores, and omit
// dashes, dots, and underscores following a slash.
switch (c) {
case '/':
if (p == '/') {
continue;
}
p = '/';
break;
case '.':
case '_':
case '-':
if (p == '/' || p == '-') {
continue;
}
p = '-';
break;
default:
p = c;
break;
}
b.append(c);
}
// Strip trailing special characters, and avoid the .lock extension
result = b.toString().replaceFirst("[/_.-]+$", "") //$NON-NLS-1$ //$NON-NLS-2$
.replaceAll("\\.lock($|/)", "_lock$1"); //$NON-NLS-1$ //$NON-NLS-2$
return FORBIDDEN_BRANCH_NAME_COMPONENTS.matcher(result)
.replaceAll("$1+$2$3"); //$NON-NLS-1$
}
/**
* Strip work dir and return normalized repository path.
*
* @param workDir Work dir
* @param file File whose path shall be stripped of its workdir
* @return normalized repository relative path or the empty
* string if the file is not relative to the work directory.
* @param workDir
* Work dir
* @param file
* File whose path shall be stripped of its workdir
* @return normalized repository relative path or the empty string if the
* file is not relative to the work directory.
*/
@NonNull
public static String stripWorkDir(File workDir, File file) {
@ -1889,46 +1994,4 @@ public Set<String> getRemoteNames() {
public void autoGC(ProgressMonitor monitor) {
// default does nothing
}
/**
* Normalizes the passed branch name into a possible valid branch name. The
* validity of the returned name should be checked by a subsequent call to
* {@link #isValidRefName(String)}.
* <p/>
* Future implementations of this method could be more restrictive or more
* lenient about the validity of specific characters in the returned name.
* <p/>
* The current implementation returns the trimmed input string if this is
* already a valid branch name. Otherwise it returns a trimmed string only
* containing word characters ([a-zA-Z_0-9]) and hyphens ('-'). Colons are
* replaced by hyphens. Repeating underscores and hyphens are replaced by a
* single occurrence. Underscores and hyphens at the beginning of the string
* are removed.
*
* @param name
* The name to normalize.
*
* @return The normalized String or null if null was passed.
* @since 4.7
* @see #isValidRefName(String)
*/
public static String normalizeBranchName(String name) {
if (name == null || name.length() == 0) {
return name;
}
String result = name.trim();
String fullName = result.startsWith(Constants.R_HEADS) ? result
: Constants.R_HEADS + result;
if (isValidRefName(fullName)) {
return result;
}
return result.replaceAll("\\s+([_:-])*?\\s+", "$1") //$NON-NLS-1$//$NON-NLS-2$
.replaceAll(":", "-") //$NON-NLS-1$//$NON-NLS-2$
.replaceAll("\\s+", "_") //$NON-NLS-1$//$NON-NLS-2$
.replaceAll("_{2,}", "_") //$NON-NLS-1$//$NON-NLS-2$
.replaceAll("-{2,}", "-") //$NON-NLS-1$//$NON-NLS-2$
.replaceAll("[^\\w-]", "") //$NON-NLS-1$ //$NON-NLS-2$
.replaceAll("^_+", "") //$NON-NLS-1$//$NON-NLS-2$
.replaceAll("^-+", ""); //$NON-NLS-1$//$NON-NLS-2$
}
}