Browse Source

Added support for whitespace ignoring

JGit did not have support for skipping whitespace when comparing
lines in RawText objects. I added a subclass of RawText that skips
whitespace in its equals and hashCode methods. I used a subclass
rather than adding functionality into RawText so that performance
would not be impacted by extra logic.

This class only supports ignoring all whitespace. Others will follow
that allow other forms of whitespace ignoring.

Change-Id: Ic2f79e85215e48d3fd53ec1b4ad13373dd183a4a
stable-0.9
Jeff Schumacher 15 years ago
parent
commit
543235b805
  1. 96
      org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespaceTest.java
  2. 108
      org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawCharUtilTest.java
  3. 120
      org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java
  4. 116
      org.eclipse.jgit/src/org/eclipse/jgit/util/RawCharUtil.java

96
org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespaceTest.java

@ -0,0 +1,96 @@
/*
* Copyright (C) 2009-2010, Google Inc.
* Copyright (C) 2009, Johannes E. Schindelin <johannes.schindelin@gmx.de>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import org.eclipse.jgit.lib.Constants;
import junit.framework.TestCase;
public class RawTextIgnoreAllWhitespaceTest extends TestCase {
public void testEqualsWithoutWhitespace() {
final RawText a = new RawTextIgnoreAllWhitespace(Constants
.encodeASCII("foo-a\nfoo-b\nfoo\n"));
final RawText b = new RawTextIgnoreAllWhitespace(Constants
.encodeASCII("foo-b\nfoo-c\nf\n"));
assertEquals(3, a.size());
assertEquals(3, b.size());
// foo-a != foo-b
assertFalse(a.equals(0, b, 0));
assertFalse(b.equals(0, a, 0));
// foo-b == foo-b
assertTrue(a.equals(1, b, 0));
assertTrue(b.equals(0, a, 1));
// foo != f
assertFalse(a.equals(2, b, 2));
assertFalse(b.equals(2, a, 2));
}
public void testEqualsWithWhitespace() {
final RawText a = new RawTextIgnoreAllWhitespace(Constants
.encodeASCII("foo-a\n \n a b c\na \n"));
final RawText b = new RawTextIgnoreAllWhitespace(Constants
.encodeASCII("foo-a b\n\nab c\na\n"));
// "foo-a" != "foo-a b"
assertFalse(a.equals(0, b, 0));
assertFalse(b.equals(0, a, 0));
// " " == ""
assertTrue(a.equals(1, b, 1));
assertTrue(b.equals(1, a, 1));
// " a b c" == "ab c"
assertTrue(a.equals(2, b, 2));
assertTrue(b.equals(2, a, 2));
// "a " == "a"
assertTrue(a.equals(3, b, 3));
assertTrue(b.equals(3, a, 3));
}
}

108
org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawCharUtilTest.java

@ -0,0 +1,108 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.util;
import java.io.UnsupportedEncodingException;
import junit.framework.TestCase;
import static org.eclipse.jgit.util.RawCharUtil.isWhitespace;
import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace;
import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace;
public class RawCharUtilTest extends TestCase {
/**
* Test method for {@link RawCharUtil#isWhitespace(byte)}.
*/
public void testIsWhitespace() {
for (byte c = -128; c < 127; c++) {
switch (c) {
case (byte) '\r':
case (byte) '\n':
case (byte) '\t':
case (byte) ' ':
assertTrue(isWhitespace(c));
break;
default:
assertFalse(isWhitespace(c));
}
}
}
/**
* Test method for
* {@link RawCharUtil#trimTrailingWhitespace(byte[], int, int)}.
*
* @throws UnsupportedEncodingException
*/
public void testTrimTrailingWhitespace()
throws UnsupportedEncodingException {
assertEquals(0, trimTrailingWhitespace("".getBytes("US-ASCII"), 0, 0));
assertEquals(0, trimTrailingWhitespace(" ".getBytes("US-ASCII"), 0, 1));
assertEquals(1, trimTrailingWhitespace("a ".getBytes("US-ASCII"), 0, 2));
assertEquals(2,
trimTrailingWhitespace(" a ".getBytes("US-ASCII"), 0, 3));
assertEquals(3,
trimTrailingWhitespace(" a".getBytes("US-ASCII"), 0, 3));
assertEquals(6, trimTrailingWhitespace(
" test ".getBytes("US-ASCII"), 2, 9));
}
/**
* Test method for
* {@link RawCharUtil#trimLeadingWhitespace(byte[], int, int)}.
*
* @throws UnsupportedEncodingException
*/
public void testTrimLeadingWhitespace() throws UnsupportedEncodingException {
assertEquals(0, trimLeadingWhitespace("".getBytes("US-ASCII"), 0, 0));
assertEquals(1, trimLeadingWhitespace(" ".getBytes("US-ASCII"), 0, 1));
assertEquals(0, trimLeadingWhitespace("a ".getBytes("US-ASCII"), 0, 2));
assertEquals(1, trimLeadingWhitespace(" a ".getBytes("US-ASCII"), 0, 3));
assertEquals(2, trimLeadingWhitespace(" a".getBytes("US-ASCII"), 0, 3));
assertEquals(2, trimLeadingWhitespace(" test ".getBytes("US-ASCII"),
2, 9));
}
}

120
org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java

@ -0,0 +1,120 @@
/*
* Copyright (C) 2009-2010, Google Inc.
* Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import static org.eclipse.jgit.util.RawCharUtil.isWhitespace;
import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace;
/**
* A version of {@link RawText} that ignores all whitespace.
*/
public class RawTextIgnoreAllWhitespace extends RawText {
/**
* Create a new sequence from an existing content byte array.
* <p>
* The entire array (indexes 0 through length-1) is used as the content.
*
* @param input
* the content array. The array is never modified, so passing
* through cached arrays is safe.
*/
public RawTextIgnoreAllWhitespace(byte[] input) {
super(input);
}
@Override
public boolean equals(final int i, final Sequence other, final int j) {
return equals(this, i + 1, (RawText) other, j + 1);
}
private static boolean equals(final RawText a, final int ai,
final RawText b, final int bi) {
if (a.hashes.get(ai) != b.hashes.get(bi))
return false;
int as = a.lines.get(ai);
int bs = b.lines.get(bi);
int ae = a.lines.get(ai + 1);
int be = b.lines.get(bi + 1);
ae = trimTrailingWhitespace(a.content, as, ae);
be = trimTrailingWhitespace(b.content, bs, be);
while (as < ae && bs < be) {
byte ac = a.content[as];
byte bc = b.content[bs];
while (as < ae - 1 && isWhitespace(ac)) {
as++;
ac = a.content[as];
}
while (bs < be - 1 && isWhitespace(bc)) {
bs++;
bc = b.content[bs];
}
if (ac != bc)
return false;
as++;
bs++;
}
return as == ae && bs == be;
}
@Override
protected int hashLine(final byte[] raw, int ptr, final int end) {
int hash = 5381;
for (; ptr < end; ptr++) {
byte c = raw[ptr];
if (!isWhitespace(c))
hash = (hash << 5) ^ (c & 0xff);
}
return hash;
}
}

116
org.eclipse.jgit/src/org/eclipse/jgit/util/RawCharUtil.java

@ -0,0 +1,116 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.util;
/**
* Utility class for character functions on raw bytes
* <p>
* Characters are assumed to be 8-bit US-ASCII.
*/
public class RawCharUtil {
private static final boolean[] WHITESPACE = new boolean[256];
static {
WHITESPACE['\r'] = true;
WHITESPACE['\n'] = true;
WHITESPACE['\t'] = true;
WHITESPACE[' '] = true;
}
/**
* Determine if an 8-bit US-ASCII encoded character is represents whitespace
*
* @param c
* the 8-bit US-ASCII encoded character
* @return true if c represents a whitespace character in 8-bit US-ASCII
*/
public static boolean isWhitespace(byte c) {
return WHITESPACE[c & 0xff];
}
/**
* Returns the new end point for the byte array passed in after trimming any
* trailing whitespace characters, as determined by the isWhitespace()
* function. start and end are assumed to be within the bounds of raw.
*
* @param raw
* the byte array containing the portion to trim whitespace for
* @param start
* the start of the section of bytes
* @param end
* the end of the section of bytes
* @return the new end point
*/
public static int trimTrailingWhitespace(byte[] raw, int start, int end) {
int ptr = end - 1;
while (start <= ptr && isWhitespace(raw[ptr]))
ptr--;
return ptr + 1;
}
/**
* Returns the new start point for the byte array passed in after trimming
* any leading whitespace characters, as determined by the isWhitespace()
* function. start and end are assumed to be within the bounds of raw.
*
* @param raw
* the byte array containing the portion to trim whitespace for
* @param start
* the start of the section of bytes
* @param end
* the end of the section of bytes
* @return the new start point
*/
public static int trimLeadingWhitespace(byte[] raw, int start, int end) {
while (start < end && isWhitespace(raw[start]))
start++;
return start;
}
private RawCharUtil() {
// This will never be called
}
}
Loading…
Cancel
Save