diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java index ec1ced62a..1aa76bc43 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java @@ -882,8 +882,8 @@ private FormatResult createFormatResult(DiffEntry ent) throws IOException, type = PatchType.BINARY; } else { - res.a = new RawText(comparator, aRaw); - res.b = new RawText(comparator, bRaw); + res.a = new RawText(aRaw); + res.b = new RawText(bRaw); editList = diff(res.a, res.b); type = PatchType.UNIFIED; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequence.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequence.java new file mode 100644 index 000000000..2a26700c1 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequence.java @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +/** + * Wraps a {@link Sequence} to assign hash codes to elements. + * + * This sequence acts as a proxy for the real sequence, caching element hash + * codes so they don't need to be recomputed each time. Sequences of this type + * must be used with a {@link HashedSequenceComparator}. + * + * To construct an instance of this type use {@link HashedSequencePair}. + * + * @param + * the base sequence type. + */ +public final class HashedSequence extends Sequence { + final S base; + + final int[] hashes; + + HashedSequence(S base, int[] hashes) { + this.base = base; + this.hashes = hashes; + } + + @Override + public int size() { + return base.size(); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequenceComparator.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequenceComparator.java new file mode 100644 index 000000000..2aa84e49e --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequenceComparator.java @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +/** + * Wrap another comparator for use with {@link HashedSequence}. + * + * This comparator acts as a proxy for the real comparator, evaluating the + * cached hash code before testing the underlying comparator's equality. + * Comparators of this type must be used with a {@link HashedSequence}. + * + * To construct an instance of this type use {@link HashedSequencePair}. + * + * @param + * the base sequence type. + */ +public final class HashedSequenceComparator extends + SequenceComparator> { + private final SequenceComparator cmp; + + HashedSequenceComparator(SequenceComparator cmp) { + this.cmp = cmp; + } + + @Override + public boolean equals(HashedSequence a, int ai, // + HashedSequence b, int bi) { + return a.hashes[ai] == b.hashes[bi] + && cmp.equals(a.base, ai, b.base, bi); + } + + @Override + public int hash(HashedSequence seq, int ptr) { + return seq.hashes[ptr]; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequencePair.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequencePair.java new file mode 100644 index 000000000..b72202ffc --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequencePair.java @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +/** + * Wraps two {@link Sequence} instances to cache their element hash codes. + * + * This pair wraps two sequences that contain cached hash codes for the input + * sequences. + * + * @param + * the base sequence type. + */ +public class HashedSequencePair { + private final SequenceComparator cmp; + + private final S baseA; + + private final S baseB; + + private HashedSequence cachedA; + + private HashedSequence cachedB; + + /** + * Construct a pair to provide fast hash codes. + * + * @param cmp + * the base comparator for the sequence elements. + * @param a + * the A sequence. + * @param b + * the B sequence. + */ + public HashedSequencePair(SequenceComparator cmp, S a, S b) { + this.cmp = cmp; + this.baseA = a; + this.baseB = b; + } + + /** @return obtain a comparator that uses the cached hash codes. */ + public HashedSequenceComparator getComparator() { + return new HashedSequenceComparator(cmp); + } + + /** @return wrapper around A that includes cached hash codes. */ + public HashedSequence getA() { + if (cachedA == null) + cachedA = wrap(baseA); + return cachedA; + } + + /** @return wrapper around B that includes cached hash codes. */ + public HashedSequence getB() { + if (cachedB == null) + cachedB = wrap(baseB); + return cachedB; + } + + private HashedSequence wrap(S base) { + final int end = base.size(); + final int[] hashes = new int[end]; + for (int ptr = 0; ptr < end; ptr++) + hashes[ptr] = cmp.hash(base, ptr); + return new HashedSequence(base, hashes); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java index 45abb58cd..3fad2c349 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java @@ -110,7 +110,26 @@ public class MyersDiff { public static final DiffAlgorithm INSTANCE = new DiffAlgorithm() { public > EditList diff( C cmp, S a, S b) { - return new MyersDiff(cmp, a, b).getEdits(); + Edit region = new Edit(0, a.size(), 0, b.size()); + region = cmp.reduceCommonStartEnd(a, b, region); + + switch (region.getType()) { + case INSERT: + case DELETE: { + EditList r = new EditList(); + r.add(region); + return r; + } + + case REPLACE: + return new MyersDiff(cmp, a, b, region).getEdits(); + + case EMPTY: + return new EditList(); + + default: + throw new IllegalStateException(); + } } }; @@ -120,23 +139,31 @@ public > EditList di protected EditList edits; /** Comparison function for sequences. */ - protected SequenceComparator cmp; + protected HashedSequenceComparator> cmp; /** * The first text to be compared. Referred to as "Text A" in the comments */ - protected S a; + protected HashedSequence> a; /** * The second text to be compared. Referred to as "Text B" in the comments */ - protected S b; + protected HashedSequence> b; + + private MyersDiff(SequenceComparator cmp, S a, S b, Edit region) { + Subsequence as = Subsequence.a(a, region); + Subsequence bs = Subsequence.b(b, region); + + HashedSequencePair> pair = new HashedSequencePair>( + new SubsequenceComparator(cmp), as, bs); + + this.cmp = pair.getComparator(); + this.a = pair.getA(); + this.b = pair.getB(); - private MyersDiff(SequenceComparator cmp, S a, S b) { - this.cmp = cmp; - this.a = a; - this.b = b; calculateEdits(); + Subsequence.toBase(edits, as, bs); } /** @@ -538,8 +565,8 @@ public static void main(String[] args) { try { RawText a = new RawText(new java.io.File(args[0])); RawText b = new RawText(new java.io.File(args[1])); - MyersDiff diff = new MyersDiff(RawTextComparator.DEFAULT, a, b); - System.out.println(diff.getEdits().toString()); + EditList res = INSTANCE.diff(RawTextComparator.DEFAULT, a, b); + System.out.println(res.toString()); } catch (Exception e) { e.printStackTrace(); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index 07d28572a..7d0987450 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -75,9 +75,6 @@ public class RawText extends Sequence { /** Map of line number to starting position within {@link #content}. */ protected final IntList lines; - /** Hash code for each line, for fast equality elimination. */ - protected final int[] hashes; - /** * Create a new sequence from an existing content byte array. *

@@ -105,7 +102,6 @@ public RawText(final byte[] input) { public RawText(RawTextComparator cmp, byte[] input) { content = input; lines = RawParseUtils.lineMap(content, 0, content.length); - hashes = computeHashes(cmp); } /** @@ -170,16 +166,6 @@ public boolean isMissingNewlineAtEnd() { return content[end - 1] != '\n'; } - private int[] computeHashes(RawTextComparator cmp) { - final int[] r = new int[lines.size()]; - for (int lno = 1; lno < lines.size() - 1; lno++) { - final int ptr = lines.get(lno); - final int end = lines.get(lno + 1); - r[lno] = cmp.hashRegion(content, ptr, end); - } - return r; - } - /** * Determine heuristically whether a byte array represents binary (as * opposed to text) content. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java index 767bf61f7..a1e2e6cec 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java @@ -59,9 +59,6 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { ai++; bi++; - if (a.hashes[ai] != b.hashes[bi]) - return false; - int as = a.lines.get(ai); int bs = b.lines.get(bi); final int ae = a.lines.get(ai + 1); @@ -78,7 +75,7 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { } @Override - public int hashRegion(final byte[] raw, int ptr, final int end) { + protected int hashRegion(final byte[] raw, int ptr, final int end) { int hash = 5381; for (; ptr < end; ptr++) hash = (hash << 5) ^ (raw[ptr] & 0xff); @@ -93,9 +90,6 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { ai++; bi++; - if (a.hashes[ai] != b.hashes[bi]) - return false; - int as = a.lines.get(ai); int bs = b.lines.get(bi); int ae = a.lines.get(ai + 1); @@ -129,7 +123,7 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { } @Override - public int hashRegion(byte[] raw, int ptr, int end) { + protected int hashRegion(byte[] raw, int ptr, int end) { int hash = 5381; for (; ptr < end; ptr++) { byte c = raw[ptr]; @@ -147,9 +141,6 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { ai++; bi++; - if (a.hashes[ai] != b.hashes[bi]) - return false; - int as = a.lines.get(ai); int bs = b.lines.get(bi); int ae = a.lines.get(ai + 1); @@ -169,7 +160,7 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { } @Override - public int hashRegion(final byte[] raw, int ptr, int end) { + protected int hashRegion(final byte[] raw, int ptr, int end) { int hash = 5381; ptr = trimLeadingWhitespace(raw, ptr, end); for (; ptr < end; ptr++) { @@ -186,9 +177,6 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { ai++; bi++; - if (a.hashes[ai] != b.hashes[bi]) - return false; - int as = a.lines.get(ai); int bs = b.lines.get(bi); int ae = a.lines.get(ai + 1); @@ -208,7 +196,7 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { } @Override - public int hashRegion(final byte[] raw, int ptr, int end) { + protected int hashRegion(final byte[] raw, int ptr, int end) { int hash = 5381; end = trimTrailingWhitespace(raw, ptr, end); for (; ptr < end; ptr++) { @@ -225,9 +213,6 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { ai++; bi++; - if (a.hashes[ai] != b.hashes[bi]) - return false; - int as = a.lines.get(ai); int bs = b.lines.get(bi); int ae = a.lines.get(ai + 1); @@ -257,7 +242,7 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { } @Override - public int hashRegion(final byte[] raw, int ptr, int end) { + protected int hashRegion(final byte[] raw, int ptr, int end) { int hash = 5381; end = trimTrailingWhitespace(raw, ptr, end); while (ptr < end) { @@ -273,8 +258,10 @@ public int hashRegion(final byte[] raw, int ptr, int end) { }; @Override - public int hash(RawText seq, int ptr) { - return seq.hashes[ptr + 1]; + public int hash(RawText seq, int lno) { + final int begin = seq.lines.get(lno + 1); + final int end = seq.lines.get(lno + 2); + return hashRegion(seq.content, begin, end); } @Override @@ -347,5 +334,5 @@ private static int findReverseLine(IntList lines, int idx, int ptr) { * 1 past the last byte of the region. * @return hash code for the region [ptr, end) of raw. */ - public abstract int hashRegion(byte[] raw, int ptr, int end); + protected abstract int hashRegion(byte[] raw, int ptr, int end); }