Don't use java.util.regex for two simple wildcard cases

To improve ignore parser performance we can avoid using java.util.regex
code on simple wildcard patterns with leading or trailing asterisk. As
those patterns represent a majority of ignore rules, the index diff
performance can be drastically increased on huge repository with lot of
ignore rules.

Bug: 450466
Change-Id: I80428441cc8d5de5468813f841d89322413eed8b
Signed-off-by: Andrey Loskutov <loskutov@gmx.de>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
This commit is contained in:
Andrey Loskutov 2014-11-07 16:30:44 +01:00 committed by Matthias Sohn
parent a31db826d4
commit 2c940fa18a
7 changed files with 237 additions and 27 deletions

View File

@ -62,14 +62,14 @@
@RunWith(Parameterized.class)
public class FastIgnoreRuleTest {
@Parameters(name = "JGit? {0}")
@Parameters(name = "OldRule? {0}")
public static Iterable<Boolean[]> data() {
return Arrays.asList(new Boolean[][] { { Boolean.FALSE },
{ Boolean.TRUE } });
}
@Parameter
public Boolean useJGitRule;
public Boolean useOldRule;
@Test
public void testSimpleCharClass() {
@ -388,11 +388,11 @@ public void testSegmentsDoNotMatch() {
@SuppressWarnings("boxing")
@Test
public void testWildmatch() {
if (useJGitRule)
if (useOldRule)
System.err
.println("IgnoreRule can't understand wildmatch rules, skipping testWildmatch!");
Boolean assume = useJGitRule;
Boolean assume = useOldRule;
assertMatched("**/a/b", "a/b", assume);
assertMatched("**/a/b", "c/a/b", assume);
assertMatched("**/a/b", "c/d/a/b", assume);
@ -428,11 +428,11 @@ public void testWildmatch() {
@SuppressWarnings("boxing")
@Test
public void testWildmatchDoNotMatch() {
if (useJGitRule)
if (useOldRule)
System.err
.println("IgnoreRule can't understand wildmatch rules, skipping testWildmatchDoNotMatch!");
Boolean assume = useJGitRule;
Boolean assume = useOldRule;
assertNotMatched("**/a/b", "a/c/b", assume);
assertNotMatched("!/**/*.zip", "c/a/b.zip", assume);
assertNotMatched("!**/*.zip", "c/a/b.zip", assume);
@ -542,7 +542,7 @@ public void assertNotMatched(String pattern, String path, Boolean... assume) {
*/
private boolean match(String pattern, String target) {
boolean isDirectory = target.endsWith("/");
if (useJGitRule.booleanValue()) {
if (useOldRule.booleanValue()) {
IgnoreRule r = new IgnoreRule(pattern);
// If speed of this test is ever an issue, we can use a presetRule
// field

View File

@ -60,14 +60,14 @@
@RunWith(Parameterized.class)
public class IgnoreMatcherParametrizedTest {
@Parameters(name = "JGit? {0}")
@Parameters(name = "OldRule? {0}")
public static Iterable<Boolean[]> data() {
return Arrays.asList(new Boolean[][] { { Boolean.FALSE },
{ Boolean.TRUE } });
}
@Parameter
public Boolean useJGitRule;
public Boolean useOldRule;
@Test
public void testBasic() {
@ -355,7 +355,7 @@ public void assertNotMatched(String pattern, String target) {
*/
private boolean match(String pattern, String target) {
boolean isDirectory = target.endsWith("/");
if (useJGitRule.booleanValue()) {
if (useOldRule.booleanValue()) {
IgnoreRule r = new IgnoreRule(pattern);
// If speed of this test is ever an issue, we can use a presetRule
// field

View File

@ -58,19 +58,19 @@
@SuppressWarnings({ "deprecation", "boxing" })
public class IgnoreRuleSpecialCasesTest {
@Parameters(name = "JGit? {0}")
@Parameters(name = "OldRule? {0}")
public static Iterable<Boolean[]> data() {
return Arrays.asList(new Boolean[][] { { Boolean.FALSE },
{ Boolean.TRUE } });
}
@Parameter
public Boolean useJGitRule;
public Boolean useOldRule;
private void assertMatch(final String pattern, final String input,
final boolean matchExpected, Boolean... assume) {
boolean assumeDir = input.endsWith("/");
if (useJGitRule.booleanValue()) {
if (useOldRule.booleanValue()) {
final IgnoreRule matcher = new IgnoreRule(pattern);
if (assume.length == 0 || !assume[0].booleanValue())
assertEquals(matchExpected, matcher.isMatch(input, assumeDir));
@ -88,7 +88,7 @@ private void assertMatch(final String pattern, final String input,
private void assertFileNameMatch(final String pattern, final String input,
final boolean matchExpected) {
boolean assumeDir = input.endsWith("/");
if (useJGitRule.booleanValue()) {
if (useOldRule.booleanValue()) {
final IgnoreRule matcher = new IgnoreRule(pattern);
assertEquals(matchExpected, matcher.isMatch(input, assumeDir));
} else {
@ -99,10 +99,10 @@ private void assertFileNameMatch(final String pattern, final String input,
@Test
public void testVerySimplePatternCase0() throws Exception {
if (useJGitRule)
if (useOldRule)
System.err
.println("IgnoreRule can't understand blank lines, skipping");
Boolean assume = useJGitRule;
Boolean assume = useOldRule;
assertMatch("", "", false, assume);
}
@ -800,9 +800,14 @@ public void testSpecialGroupCase8() throws Exception {
@Test
public void testSpecialGroupCase9() throws Exception {
if (useJGitRule)
assertMatch("][", "][", true);
}
@Test
public void testSpecialGroupCase10() throws Exception {
if (useOldRule)
System.err.println("IgnoreRule can't understand [[:], skipping");
Boolean assume = useJGitRule;
Boolean assume = useOldRule;
// Second bracket is threated literally, so both [ and : should match
assertMatch("[[:]", ":", true, assume);
assertMatch("[[:]", "[", true, assume);
@ -861,10 +866,10 @@ public void testEscapedBracket6() throws Exception {
@Test
public void testEscapedBackslash() throws Exception {
if (useJGitRule)
if (useOldRule)
System.err
.println("IgnoreRule can't understand escaped backslashes, skipping");
Boolean assume = useJGitRule;
Boolean assume = useOldRule;
// In Git CLI a\\b matches a\b file
assertMatch("a\\\\b", "a\\b", true, assume);
}
@ -899,4 +904,4 @@ public void testFilePathCase3() throws Exception {
assertFileNameMatch("a?b", "a\\b", true);
}
}
}

View File

@ -0,0 +1,83 @@
/*
* Copyright (C) 2014, Andrey Loskutov <loskutov@gmx.de>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.ignore.internal;
/**
* Matcher for simple regex patterns starting with an asterisk, e.g. "*.tmp"
*
* @since 3.6
*/
public class LeadingAsteriskMatcher extends NameMatcher {
LeadingAsteriskMatcher(String pattern, Character pathSeparator, boolean dirOnly) {
super(pattern, pathSeparator, dirOnly);
if (subPattern.charAt(0) != '*')
throw new IllegalArgumentException(
"Pattern must have leading asterisk: " + pattern); //$NON-NLS-1$
}
public boolean matches(String segment, int startIncl, int endExcl,
boolean assumeDirectory) {
// faster local access, same as in string.indexOf()
String s = subPattern;
// we don't need to count '*' character itself
int subLength = s.length() - 1;
// simple /*/ pattern
if (subLength == 0)
return true;
if (subLength > (endExcl - startIncl))
return false;
for (int i = subLength, j = endExcl - 1; i > 0; i--, j--) {
char c1 = s.charAt(i);
char c2 = segment.charAt(j);
if (c1 != c2)
return false;
}
return true;
}
}

View File

@ -42,6 +42,7 @@
*/
package org.eclipse.jgit.ignore.internal;
import static org.eclipse.jgit.ignore.internal.Strings.checkWildCards;
import static org.eclipse.jgit.ignore.internal.Strings.count;
import static org.eclipse.jgit.ignore.internal.Strings.getPathSeparator;
import static org.eclipse.jgit.ignore.internal.Strings.isWildCard;
@ -52,6 +53,7 @@
import org.eclipse.jgit.errors.InvalidPatternException;
import org.eclipse.jgit.ignore.FastIgnoreRule;
import org.eclipse.jgit.ignore.internal.Strings.PatternState;
/**
* Matcher built by patterns consists of multiple path segments.
@ -132,9 +134,18 @@ private static IMatcher createNameMatcher0(String segment,
if (WildMatcher.WILDMATCH.equals(segment)
|| WildMatcher.WILDMATCH2.equals(segment))
return WILD;
if (isWildCard(segment))
PatternState state = checkWildCards(segment);
switch (state) {
case LEADING_ASTERISK_ONLY:
return new LeadingAsteriskMatcher(segment, pathSeparator, dirOnly);
case TRAILING_ASTERISK_ONLY:
return new TrailingAsteriskMatcher(segment, pathSeparator, dirOnly);
case COMPLEX:
return new WildCardMatcher(segment, pathSeparator, dirOnly);
return new NameMatcher(segment, pathSeparator, dirOnly);
default:
return new NameMatcher(segment, pathSeparator, dirOnly);
}
}
public boolean matches(String path, boolean assumeDirectory) {

View File

@ -132,10 +132,39 @@ else if (right == pattern.length() - 1)
}
static boolean isWildCard(String pattern) {
return pattern.indexOf('*') != -1 || pattern.indexOf('?') != -1
|| pattern.indexOf('[') != -1
// required to match escaped backslashes '\\\\'
|| pattern.indexOf('\\') != -1 || pattern.indexOf(']') != -1;
return pattern.indexOf('*') != -1 || isComplexWildcard(pattern);
}
private static boolean isComplexWildcard(String pattern) {
int idx1 = pattern.indexOf('[');
if (idx1 != -1) {
int idx2 = pattern.indexOf(']');
if (idx2 > idx1)
return true;
}
// required to match escaped backslashes '\\\\'
if (pattern.indexOf('?') != -1 || pattern.indexOf('\\') != -1)
return true;
return false;
}
static PatternState checkWildCards(String pattern) {
if (isComplexWildcard(pattern))
return PatternState.COMPLEX;
int startIdx = pattern.indexOf('*');
if (startIdx < 0)
return PatternState.NONE;
if (startIdx == pattern.length() - 1)
return PatternState.TRAILING_ASTERISK_ONLY;
if (pattern.lastIndexOf('*') == 0)
return PatternState.LEADING_ASTERISK_ONLY;
return PatternState.COMPLEX;
}
static enum PatternState {
LEADING_ASTERISK_ONLY, TRAILING_ASTERISK_ONLY, COMPLEX, NONE
}
final static List<String> POSIX_CHAR_CLASSES = Arrays.asList(

View File

@ -0,0 +1,82 @@
/*
* Copyright (C) 2014, Andrey Loskutov <loskutov@gmx.de>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.ignore.internal;
/**
* Matcher for simple patterns ending with an asterisk, e.g. "Makefile.*"
*
* @since 3.6
*/
public class TrailingAsteriskMatcher extends NameMatcher {
TrailingAsteriskMatcher(String pattern, Character pathSeparator, boolean dirOnly) {
super(pattern, pathSeparator, dirOnly);
if (subPattern.charAt(subPattern.length() - 1) != '*')
throw new IllegalArgumentException(
"Pattern must have trailing asterisk: " + pattern); //$NON-NLS-1$
}
public boolean matches(String segment, int startIncl, int endExcl,
boolean assumeDirectory) {
// faster local access, same as in string.indexOf()
String s = subPattern;
// we don't need to count '*' character itself
int subLenth = s.length() - 1;
// simple /*/ pattern
if (subLenth == 0)
return true;
if (subLenth > (endExcl - startIncl))
return false;
for (int i = 0; i < subLenth; i++) {
char c1 = s.charAt(i);
char c2 = segment.charAt(i + startIncl);
if (c1 != c2)
return false;
}
return true;
}
}