Merge "Define LowLevelDiffAlgorithm to bypass re-hashing"

This commit is contained in:
Chris Aniszczyk 2010-10-11 17:18:05 -04:00 committed by Code Review
commit 7429a9a5aa
3 changed files with 125 additions and 29 deletions

View File

@ -89,7 +89,7 @@
* This implementation has an internal limitation that prevents it from handling
* sequences with more than 268,435,456 (2^28) elements.
*/
public class HistogramDiff extends DiffAlgorithm {
public class HistogramDiff extends LowLevelDiffAlgorithm {
/** Algorithm to use when there are too many element occurrences. */
private DiffAlgorithm fallback = MyersDiff.INSTANCE;
@ -127,11 +127,10 @@ public void setMaxChainLength(int maxLen) {
maxChainLength = maxLen;
}
public <S extends Sequence> EditList diffNonCommon(
SequenceComparator<? super S> cmp, S a, S b) {
State<S> s = new State<S>(new HashedSequencePair<S>(cmp, a, b));
s.diffReplace(new Edit(0, s.a.size(), 0, s.b.size()));
return s.edits;
public <S extends Sequence> void diffNonCommon(EditList edits,
HashedSequenceComparator<S> cmp, HashedSequence<S> a,
HashedSequence<S> b, Edit region) {
new State<S>(edits, cmp, a, b).diffReplace(region);
}
private class State<S extends Sequence> {
@ -144,11 +143,12 @@ private class State<S extends Sequence> {
/** Result edits we have determined that must be made to convert a to b. */
final EditList edits;
State(HashedSequencePair<S> p) {
this.cmp = p.getComparator();
this.a = p.getA();
this.b = p.getB();
this.edits = new EditList();
State(EditList edits, HashedSequenceComparator<S> cmp,
HashedSequence<S> a, HashedSequence<S> b) {
this.cmp = cmp;
this.a = a;
this.b = b;
this.edits = edits;
}
void diffReplace(Edit r) {
@ -167,6 +167,10 @@ void diffReplace(Edit r) {
diff(r.after(lcs));
}
} else if (fallback instanceof LowLevelDiffAlgorithm) {
LowLevelDiffAlgorithm fb = (LowLevelDiffAlgorithm) fallback;
fb.diffNonCommon(edits, cmp, a, b, r);
} else if (fallback != null) {
SubsequenceComparator<HashedSequence<S>> cs = subcmp();
Subsequence<HashedSequence<S>> as = Subsequence.a(a, r);

View File

@ -0,0 +1,92 @@
/*
* Copyright (C) 2010, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
/** Compares two sequences primarily based upon hash codes. */
public abstract class LowLevelDiffAlgorithm extends DiffAlgorithm {
@Override
public <S extends Sequence> EditList diffNonCommon(
SequenceComparator<? super S> cmp, S a, S b) {
HashedSequencePair<S> p = new HashedSequencePair<S>(cmp, a, b);
HashedSequenceComparator<S> hc = p.getComparator();
HashedSequence<S> ha = p.getA();
HashedSequence<S> hb = p.getB();
p = null;
EditList res = new EditList();
Edit region = new Edit(0, a.size(), 0, b.size());
diffNonCommon(res, hc, ha, hb, region);
return res;
}
/**
* Compare two sequences and identify a list of edits between them.
*
* This method should be invoked only after the two sequences have been
* proven to have no common starting or ending elements. The expected
* elimination of common starting and ending elements is automatically
* performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
* method, which invokes this method using {@link Subsequence}s.
*
* @param <S>
* type of sequence being compared.
* @param edits
* result list to append the region's edits onto.
* @param cmp
* the comparator supplying the element equivalence function.
* @param a
* the first (also known as old or pre-image) sequence. Edits
* returned by this algorithm will reference indexes using the
* 'A' side: {@link Edit#getBeginA()}, {@link Edit#getEndA()}.
* @param b
* the second (also known as new or post-image) sequence. Edits
* returned by this algorithm will reference indexes using the
* 'B' side: {@link Edit#getBeginB()}, {@link Edit#getEndB()}.
* @param region
* the region being compared within the two sequences.
*/
public abstract <S extends Sequence> void diffNonCommon(EditList edits,
HashedSequenceComparator<S> cmp, HashedSequence<S> a,
HashedSequence<S> b, Edit region);
}

View File

@ -107,15 +107,18 @@
*/
public class MyersDiff<S extends Sequence> {
/** Singleton instance of MyersDiff. */
public static final DiffAlgorithm INSTANCE = new DiffAlgorithm() {
public <S extends Sequence> EditList diffNonCommon(
SequenceComparator<? super S> cmp, S a, S b) {
return new MyersDiff<S>(cmp, a, b).edits;
public static final DiffAlgorithm INSTANCE = new LowLevelDiffAlgorithm() {
@Override
public <S extends Sequence> void diffNonCommon(EditList edits,
HashedSequenceComparator<S> cmp, HashedSequence<S> a,
HashedSequence<S> b, Edit region) {
new MyersDiff<S>(edits, cmp, a, b, region);
}
};
/**
* The list of edits found during the last call to {@link #calculateEdits()}
* The list of edits found during the last call to
* {@link #calculateEdits(Edit)}
*/
protected EditList edits;
@ -132,15 +135,13 @@ public <S extends Sequence> EditList diffNonCommon(
*/
protected HashedSequence<S> b;
private MyersDiff(SequenceComparator<? super S> cmp, S a, S b) {
HashedSequencePair<S> pair;
pair = new HashedSequencePair<S>(cmp, a, b);
this.cmp = pair.getComparator();
this.a = pair.getA();
this.b = pair.getB();
calculateEdits();
private MyersDiff(EditList edits, HashedSequenceComparator<S> cmp,
HashedSequence<S> a, HashedSequence<S> b, Edit region) {
this.edits = edits;
this.cmp = cmp;
this.a = a;
this.b = b;
calculateEdits(region);
}
// TODO: use ThreadLocal for future multi-threaded operations
@ -149,11 +150,10 @@ private MyersDiff(SequenceComparator<? super S> cmp, S a, S b) {
/**
* Entrypoint into the algorithm this class is all about. This method triggers that the
* differences between A and B are calculated in form of a list of edits.
* @param r portion of the sequences to examine.
*/
protected void calculateEdits() {
edits = new EditList();
middle.initialize(0, a.size(), 0, b.size());
private void calculateEdits(Edit r) {
middle.initialize(r.beginA, r.endA, r.beginB, r.endB);
if (middle.beginA >= middle.endA &&
middle.beginB >= middle.endB)
return;