Browse Source

Don't use java.util.regex for two simple wildcard cases

To improve ignore parser performance we can avoid using java.util.regex
code on simple wildcard patterns with leading or trailing asterisk. As
those patterns represent a majority of ignore rules, the index diff
performance can be drastically increased on huge repository with lot of
ignore rules.

Bug: 450466
Change-Id: I80428441cc8d5de5468813f841d89322413eed8b
Signed-off-by: Andrey Loskutov <loskutov@gmx.de>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
stable-3.6
Andrey Loskutov 10 years ago committed by Matthias Sohn
parent
commit
2c940fa18a
  1. 14
      org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/FastIgnoreRuleTest.java
  2. 6
      org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreMatcherParametrizedTest.java
  3. 27
      org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreRuleSpecialCasesTest.java
  4. 83
      org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/LeadingAsteriskMatcher.java
  5. 15
      org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/PathMatcher.java
  6. 37
      org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/Strings.java
  7. 82
      org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/TrailingAsteriskMatcher.java

14
org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/FastIgnoreRuleTest.java

@ -62,14 +62,14 @@ import org.junit.runners.Parameterized.Parameters;
@RunWith(Parameterized.class)
public class FastIgnoreRuleTest {
@Parameters(name = "JGit? {0}")
@Parameters(name = "OldRule? {0}")
public static Iterable<Boolean[]> data() {
return Arrays.asList(new Boolean[][] { { Boolean.FALSE },
{ Boolean.TRUE } });
}
@Parameter
public Boolean useJGitRule;
public Boolean useOldRule;
@Test
public void testSimpleCharClass() {
@ -388,11 +388,11 @@ public class FastIgnoreRuleTest {
@SuppressWarnings("boxing")
@Test
public void testWildmatch() {
if (useJGitRule)
if (useOldRule)
System.err
.println("IgnoreRule can't understand wildmatch rules, skipping testWildmatch!");
Boolean assume = useJGitRule;
Boolean assume = useOldRule;
assertMatched("**/a/b", "a/b", assume);
assertMatched("**/a/b", "c/a/b", assume);
assertMatched("**/a/b", "c/d/a/b", assume);
@ -428,11 +428,11 @@ public class FastIgnoreRuleTest {
@SuppressWarnings("boxing")
@Test
public void testWildmatchDoNotMatch() {
if (useJGitRule)
if (useOldRule)
System.err
.println("IgnoreRule can't understand wildmatch rules, skipping testWildmatchDoNotMatch!");
Boolean assume = useJGitRule;
Boolean assume = useOldRule;
assertNotMatched("**/a/b", "a/c/b", assume);
assertNotMatched("!/**/*.zip", "c/a/b.zip", assume);
assertNotMatched("!**/*.zip", "c/a/b.zip", assume);
@ -542,7 +542,7 @@ public class FastIgnoreRuleTest {
*/
private boolean match(String pattern, String target) {
boolean isDirectory = target.endsWith("/");
if (useJGitRule.booleanValue()) {
if (useOldRule.booleanValue()) {
IgnoreRule r = new IgnoreRule(pattern);
// If speed of this test is ever an issue, we can use a presetRule
// field

6
org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreMatcherParametrizedTest.java

@ -60,14 +60,14 @@ import org.junit.runners.Parameterized.Parameters;
@RunWith(Parameterized.class)
public class IgnoreMatcherParametrizedTest {
@Parameters(name = "JGit? {0}")
@Parameters(name = "OldRule? {0}")
public static Iterable<Boolean[]> data() {
return Arrays.asList(new Boolean[][] { { Boolean.FALSE },
{ Boolean.TRUE } });
}
@Parameter
public Boolean useJGitRule;
public Boolean useOldRule;
@Test
public void testBasic() {
@ -355,7 +355,7 @@ public class IgnoreMatcherParametrizedTest {
*/
private boolean match(String pattern, String target) {
boolean isDirectory = target.endsWith("/");
if (useJGitRule.booleanValue()) {
if (useOldRule.booleanValue()) {
IgnoreRule r = new IgnoreRule(pattern);
// If speed of this test is ever an issue, we can use a presetRule
// field

27
org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreRuleSpecialCasesTest.java

@ -58,19 +58,19 @@ import org.junit.runners.Parameterized.Parameters;
@SuppressWarnings({ "deprecation", "boxing" })
public class IgnoreRuleSpecialCasesTest {
@Parameters(name = "JGit? {0}")
@Parameters(name = "OldRule? {0}")
public static Iterable<Boolean[]> data() {
return Arrays.asList(new Boolean[][] { { Boolean.FALSE },
{ Boolean.TRUE } });
}
@Parameter
public Boolean useJGitRule;
public Boolean useOldRule;
private void assertMatch(final String pattern, final String input,
final boolean matchExpected, Boolean... assume) {
boolean assumeDir = input.endsWith("/");
if (useJGitRule.booleanValue()) {
if (useOldRule.booleanValue()) {
final IgnoreRule matcher = new IgnoreRule(pattern);
if (assume.length == 0 || !assume[0].booleanValue())
assertEquals(matchExpected, matcher.isMatch(input, assumeDir));
@ -88,7 +88,7 @@ public class IgnoreRuleSpecialCasesTest {
private void assertFileNameMatch(final String pattern, final String input,
final boolean matchExpected) {
boolean assumeDir = input.endsWith("/");
if (useJGitRule.booleanValue()) {
if (useOldRule.booleanValue()) {
final IgnoreRule matcher = new IgnoreRule(pattern);
assertEquals(matchExpected, matcher.isMatch(input, assumeDir));
} else {
@ -99,10 +99,10 @@ public class IgnoreRuleSpecialCasesTest {
@Test
public void testVerySimplePatternCase0() throws Exception {
if (useJGitRule)
if (useOldRule)
System.err
.println("IgnoreRule can't understand blank lines, skipping");
Boolean assume = useJGitRule;
Boolean assume = useOldRule;
assertMatch("", "", false, assume);
}
@ -800,9 +800,14 @@ public class IgnoreRuleSpecialCasesTest {
@Test
public void testSpecialGroupCase9() throws Exception {
if (useJGitRule)
assertMatch("][", "][", true);
}
@Test
public void testSpecialGroupCase10() throws Exception {
if (useOldRule)
System.err.println("IgnoreRule can't understand [[:], skipping");
Boolean assume = useJGitRule;
Boolean assume = useOldRule;
// Second bracket is threated literally, so both [ and : should match
assertMatch("[[:]", ":", true, assume);
assertMatch("[[:]", "[", true, assume);
@ -861,10 +866,10 @@ public class IgnoreRuleSpecialCasesTest {
@Test
public void testEscapedBackslash() throws Exception {
if (useJGitRule)
if (useOldRule)
System.err
.println("IgnoreRule can't understand escaped backslashes, skipping");
Boolean assume = useJGitRule;
Boolean assume = useOldRule;
// In Git CLI a\\b matches a\b file
assertMatch("a\\\\b", "a\\b", true, assume);
}
@ -899,4 +904,4 @@ public class IgnoreRuleSpecialCasesTest {
assertFileNameMatch("a?b", "a\\b", true);
}
}
}

83
org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/LeadingAsteriskMatcher.java

@ -0,0 +1,83 @@
/*
* Copyright (C) 2014, Andrey Loskutov <loskutov@gmx.de>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.ignore.internal;
/**
* Matcher for simple regex patterns starting with an asterisk, e.g. "*.tmp"
*
* @since 3.6
*/
public class LeadingAsteriskMatcher extends NameMatcher {
LeadingAsteriskMatcher(String pattern, Character pathSeparator, boolean dirOnly) {
super(pattern, pathSeparator, dirOnly);
if (subPattern.charAt(0) != '*')
throw new IllegalArgumentException(
"Pattern must have leading asterisk: " + pattern); //$NON-NLS-1$
}
public boolean matches(String segment, int startIncl, int endExcl,
boolean assumeDirectory) {
// faster local access, same as in string.indexOf()
String s = subPattern;
// we don't need to count '*' character itself
int subLength = s.length() - 1;
// simple /*/ pattern
if (subLength == 0)
return true;
if (subLength > (endExcl - startIncl))
return false;
for (int i = subLength, j = endExcl - 1; i > 0; i--, j--) {
char c1 = s.charAt(i);
char c2 = segment.charAt(j);
if (c1 != c2)
return false;
}
return true;
}
}

15
org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/PathMatcher.java

@ -42,6 +42,7 @@
*/
package org.eclipse.jgit.ignore.internal;
import static org.eclipse.jgit.ignore.internal.Strings.checkWildCards;
import static org.eclipse.jgit.ignore.internal.Strings.count;
import static org.eclipse.jgit.ignore.internal.Strings.getPathSeparator;
import static org.eclipse.jgit.ignore.internal.Strings.isWildCard;
@ -52,6 +53,7 @@ import java.util.List;
import org.eclipse.jgit.errors.InvalidPatternException;
import org.eclipse.jgit.ignore.FastIgnoreRule;
import org.eclipse.jgit.ignore.internal.Strings.PatternState;
/**
* Matcher built by patterns consists of multiple path segments.
@ -132,9 +134,18 @@ public class PathMatcher extends AbstractMatcher {
if (WildMatcher.WILDMATCH.equals(segment)
|| WildMatcher.WILDMATCH2.equals(segment))
return WILD;
if (isWildCard(segment))
PatternState state = checkWildCards(segment);
switch (state) {
case LEADING_ASTERISK_ONLY:
return new LeadingAsteriskMatcher(segment, pathSeparator, dirOnly);
case TRAILING_ASTERISK_ONLY:
return new TrailingAsteriskMatcher(segment, pathSeparator, dirOnly);
case COMPLEX:
return new WildCardMatcher(segment, pathSeparator, dirOnly);
return new NameMatcher(segment, pathSeparator, dirOnly);
default:
return new NameMatcher(segment, pathSeparator, dirOnly);
}
}
public boolean matches(String path, boolean assumeDirectory) {

37
org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/Strings.java

@ -132,10 +132,39 @@ public class Strings {
}
static boolean isWildCard(String pattern) {
return pattern.indexOf('*') != -1 || pattern.indexOf('?') != -1
|| pattern.indexOf('[') != -1
// required to match escaped backslashes '\\\\'
|| pattern.indexOf('\\') != -1 || pattern.indexOf(']') != -1;
return pattern.indexOf('*') != -1 || isComplexWildcard(pattern);
}
private static boolean isComplexWildcard(String pattern) {
int idx1 = pattern.indexOf('[');
if (idx1 != -1) {
int idx2 = pattern.indexOf(']');
if (idx2 > idx1)
return true;
}
// required to match escaped backslashes '\\\\'
if (pattern.indexOf('?') != -1 || pattern.indexOf('\\') != -1)
return true;
return false;
}
static PatternState checkWildCards(String pattern) {
if (isComplexWildcard(pattern))
return PatternState.COMPLEX;
int startIdx = pattern.indexOf('*');
if (startIdx < 0)
return PatternState.NONE;
if (startIdx == pattern.length() - 1)
return PatternState.TRAILING_ASTERISK_ONLY;
if (pattern.lastIndexOf('*') == 0)
return PatternState.LEADING_ASTERISK_ONLY;
return PatternState.COMPLEX;
}
static enum PatternState {
LEADING_ASTERISK_ONLY, TRAILING_ASTERISK_ONLY, COMPLEX, NONE
}
final static List<String> POSIX_CHAR_CLASSES = Arrays.asList(

82
org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/TrailingAsteriskMatcher.java

@ -0,0 +1,82 @@
/*
* Copyright (C) 2014, Andrey Loskutov <loskutov@gmx.de>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.ignore.internal;
/**
* Matcher for simple patterns ending with an asterisk, e.g. "Makefile.*"
*
* @since 3.6
*/
public class TrailingAsteriskMatcher extends NameMatcher {
TrailingAsteriskMatcher(String pattern, Character pathSeparator, boolean dirOnly) {
super(pattern, pathSeparator, dirOnly);
if (subPattern.charAt(subPattern.length() - 1) != '*')
throw new IllegalArgumentException(
"Pattern must have trailing asterisk: " + pattern); //$NON-NLS-1$
}
public boolean matches(String segment, int startIncl, int endExcl,
boolean assumeDirectory) {
// faster local access, same as in string.indexOf()
String s = subPattern;
// we don't need to count '*' character itself
int subLenth = s.length() - 1;
// simple /*/ pattern
if (subLenth == 0)
return true;
if (subLenth > (endExcl - startIncl))
return false;
for (int i = 0; i < subLenth; i++) {
char c1 = s.charAt(i);
char c2 = segment.charAt(i + startIncl);
if (c1 != c2)
return false;
}
return true;
}
}
Loading…
Cancel
Save