diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespaceTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespaceTest.java new file mode 100644 index 000000000..5e1a238a7 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespaceTest.java @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2009-2010, Google Inc. + * Copyright (C) 2009, Johannes E. Schindelin + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import org.eclipse.jgit.lib.Constants; + +import junit.framework.TestCase; + +public class RawTextIgnoreAllWhitespaceTest extends TestCase { + public void testEqualsWithoutWhitespace() { + final RawText a = new RawTextIgnoreAllWhitespace(Constants + .encodeASCII("foo-a\nfoo-b\nfoo\n")); + final RawText b = new RawTextIgnoreAllWhitespace(Constants + .encodeASCII("foo-b\nfoo-c\nf\n")); + + assertEquals(3, a.size()); + assertEquals(3, b.size()); + + // foo-a != foo-b + assertFalse(a.equals(0, b, 0)); + assertFalse(b.equals(0, a, 0)); + + // foo-b == foo-b + assertTrue(a.equals(1, b, 0)); + assertTrue(b.equals(0, a, 1)); + + // foo != f + assertFalse(a.equals(2, b, 2)); + assertFalse(b.equals(2, a, 2)); + } + + public void testEqualsWithWhitespace() { + final RawText a = new RawTextIgnoreAllWhitespace(Constants + .encodeASCII("foo-a\n \n a b c\na \n")); + final RawText b = new RawTextIgnoreAllWhitespace(Constants + .encodeASCII("foo-a b\n\nab c\na\n")); + + // "foo-a" != "foo-a b" + assertFalse(a.equals(0, b, 0)); + assertFalse(b.equals(0, a, 0)); + + // " " == "" + assertTrue(a.equals(1, b, 1)); + assertTrue(b.equals(1, a, 1)); + + // " a b c" == "ab c" + assertTrue(a.equals(2, b, 2)); + assertTrue(b.equals(2, a, 2)); + + // "a " == "a" + assertTrue(a.equals(3, b, 3)); + assertTrue(b.equals(3, a, 3)); + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawCharUtilTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawCharUtilTest.java new file mode 100644 index 000000000..6747b26ad --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawCharUtilTest.java @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util; + +import java.io.UnsupportedEncodingException; + +import junit.framework.TestCase; +import static org.eclipse.jgit.util.RawCharUtil.isWhitespace; +import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace; +import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace; + +public class RawCharUtilTest extends TestCase { + + /** + * Test method for {@link RawCharUtil#isWhitespace(byte)}. + */ + public void testIsWhitespace() { + for (byte c = -128; c < 127; c++) { + switch (c) { + case (byte) '\r': + case (byte) '\n': + case (byte) '\t': + case (byte) ' ': + assertTrue(isWhitespace(c)); + break; + default: + assertFalse(isWhitespace(c)); + } + } + } + + /** + * Test method for + * {@link RawCharUtil#trimTrailingWhitespace(byte[], int, int)}. + * + * @throws UnsupportedEncodingException + */ + public void testTrimTrailingWhitespace() + throws UnsupportedEncodingException { + assertEquals(0, trimTrailingWhitespace("".getBytes("US-ASCII"), 0, 0)); + assertEquals(0, trimTrailingWhitespace(" ".getBytes("US-ASCII"), 0, 1)); + assertEquals(1, trimTrailingWhitespace("a ".getBytes("US-ASCII"), 0, 2)); + assertEquals(2, + trimTrailingWhitespace(" a ".getBytes("US-ASCII"), 0, 3)); + assertEquals(3, + trimTrailingWhitespace(" a".getBytes("US-ASCII"), 0, 3)); + assertEquals(6, trimTrailingWhitespace( + " test ".getBytes("US-ASCII"), 2, 9)); + } + + /** + * Test method for + * {@link RawCharUtil#trimLeadingWhitespace(byte[], int, int)}. + * + * @throws UnsupportedEncodingException + */ + public void testTrimLeadingWhitespace() throws UnsupportedEncodingException { + assertEquals(0, trimLeadingWhitespace("".getBytes("US-ASCII"), 0, 0)); + assertEquals(1, trimLeadingWhitespace(" ".getBytes("US-ASCII"), 0, 1)); + assertEquals(0, trimLeadingWhitespace("a ".getBytes("US-ASCII"), 0, 2)); + assertEquals(1, trimLeadingWhitespace(" a ".getBytes("US-ASCII"), 0, 3)); + assertEquals(2, trimLeadingWhitespace(" a".getBytes("US-ASCII"), 0, 3)); + assertEquals(2, trimLeadingWhitespace(" test ".getBytes("US-ASCII"), + 2, 9)); + } + +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java new file mode 100644 index 000000000..f72259605 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextIgnoreAllWhitespace.java @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2009-2010, Google Inc. + * Copyright (C) 2008-2009, Johannes E. Schindelin + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import static org.eclipse.jgit.util.RawCharUtil.isWhitespace; +import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace; + +/** + * A version of {@link RawText} that ignores all whitespace. + */ +public class RawTextIgnoreAllWhitespace extends RawText { + + /** + * Create a new sequence from an existing content byte array. + *

+ * The entire array (indexes 0 through length-1) is used as the content. + * + * @param input + * the content array. The array is never modified, so passing + * through cached arrays is safe. + */ + public RawTextIgnoreAllWhitespace(byte[] input) { + super(input); + } + + @Override + public boolean equals(final int i, final Sequence other, final int j) { + return equals(this, i + 1, (RawText) other, j + 1); + } + + private static boolean equals(final RawText a, final int ai, + final RawText b, final int bi) { + if (a.hashes.get(ai) != b.hashes.get(bi)) + return false; + + int as = a.lines.get(ai); + int bs = b.lines.get(bi); + int ae = a.lines.get(ai + 1); + int be = b.lines.get(bi + 1); + + ae = trimTrailingWhitespace(a.content, as, ae); + be = trimTrailingWhitespace(b.content, bs, be); + + while (as < ae && bs < be) { + byte ac = a.content[as]; + byte bc = b.content[bs]; + + while (as < ae - 1 && isWhitespace(ac)) { + as++; + ac = a.content[as]; + } + + while (bs < be - 1 && isWhitespace(bc)) { + bs++; + bc = b.content[bs]; + } + + if (ac != bc) + return false; + + as++; + bs++; + } + + return as == ae && bs == be; + } + + @Override + protected int hashLine(final byte[] raw, int ptr, final int end) { + int hash = 5381; + for (; ptr < end; ptr++) { + byte c = raw[ptr]; + if (!isWhitespace(c)) + hash = (hash << 5) ^ (c & 0xff); + } + return hash; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawCharUtil.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawCharUtil.java new file mode 100644 index 000000000..9b4e542ef --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawCharUtil.java @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util; + +/** + * Utility class for character functions on raw bytes + *

+ * Characters are assumed to be 8-bit US-ASCII. + */ +public class RawCharUtil { + private static final boolean[] WHITESPACE = new boolean[256]; + + static { + WHITESPACE['\r'] = true; + WHITESPACE['\n'] = true; + WHITESPACE['\t'] = true; + WHITESPACE[' '] = true; + } + + /** + * Determine if an 8-bit US-ASCII encoded character is represents whitespace + * + * @param c + * the 8-bit US-ASCII encoded character + * @return true if c represents a whitespace character in 8-bit US-ASCII + */ + public static boolean isWhitespace(byte c) { + return WHITESPACE[c & 0xff]; + } + + /** + * Returns the new end point for the byte array passed in after trimming any + * trailing whitespace characters, as determined by the isWhitespace() + * function. start and end are assumed to be within the bounds of raw. + * + * @param raw + * the byte array containing the portion to trim whitespace for + * @param start + * the start of the section of bytes + * @param end + * the end of the section of bytes + * @return the new end point + */ + public static int trimTrailingWhitespace(byte[] raw, int start, int end) { + int ptr = end - 1; + while (start <= ptr && isWhitespace(raw[ptr])) + ptr--; + + return ptr + 1; + } + + /** + * Returns the new start point for the byte array passed in after trimming + * any leading whitespace characters, as determined by the isWhitespace() + * function. start and end are assumed to be within the bounds of raw. + * + * @param raw + * the byte array containing the portion to trim whitespace for + * @param start + * the start of the section of bytes + * @param end + * the end of the section of bytes + * @return the new start point + */ + public static int trimLeadingWhitespace(byte[] raw, int start, int end) { + while (start < end && isWhitespace(raw[start])) + start++; + + return start; + } + + private RawCharUtil() { + // This will never be called + } +}