Define LowLevelDiffAlgorithm to bypass re-hashing
When passing to a fallback algorithm, we can avoid creating a new copy of the hash codes for each sequence by passing in the hashed sequences directly. This makes it cheaper to switch from HistogramDiff down to MyersDiff in a single pass. Change-Id: Ibf2e81be57c083862eeb134279aed676653bf9b5 Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
This commit is contained in:
parent
4fc50df97d
commit
1bd24a23f9
|
@ -89,7 +89,7 @@
|
||||||
* This implementation has an internal limitation that prevents it from handling
|
* This implementation has an internal limitation that prevents it from handling
|
||||||
* sequences with more than 268,435,456 (2^28) elements.
|
* sequences with more than 268,435,456 (2^28) elements.
|
||||||
*/
|
*/
|
||||||
public class HistogramDiff extends DiffAlgorithm {
|
public class HistogramDiff extends LowLevelDiffAlgorithm {
|
||||||
/** Algorithm to use when there are too many element occurrences. */
|
/** Algorithm to use when there are too many element occurrences. */
|
||||||
private DiffAlgorithm fallback = MyersDiff.INSTANCE;
|
private DiffAlgorithm fallback = MyersDiff.INSTANCE;
|
||||||
|
|
||||||
|
@ -127,11 +127,10 @@ public void setMaxChainLength(int maxLen) {
|
||||||
maxChainLength = maxLen;
|
maxChainLength = maxLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
public <S extends Sequence> EditList diffNonCommon(
|
public <S extends Sequence> void diffNonCommon(EditList edits,
|
||||||
SequenceComparator<? super S> cmp, S a, S b) {
|
HashedSequenceComparator<S> cmp, HashedSequence<S> a,
|
||||||
State<S> s = new State<S>(new HashedSequencePair<S>(cmp, a, b));
|
HashedSequence<S> b, Edit region) {
|
||||||
s.diffReplace(new Edit(0, s.a.size(), 0, s.b.size()));
|
new State<S>(edits, cmp, a, b).diffReplace(region);
|
||||||
return s.edits;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private class State<S extends Sequence> {
|
private class State<S extends Sequence> {
|
||||||
|
@ -144,11 +143,12 @@ private class State<S extends Sequence> {
|
||||||
/** Result edits we have determined that must be made to convert a to b. */
|
/** Result edits we have determined that must be made to convert a to b. */
|
||||||
final EditList edits;
|
final EditList edits;
|
||||||
|
|
||||||
State(HashedSequencePair<S> p) {
|
State(EditList edits, HashedSequenceComparator<S> cmp,
|
||||||
this.cmp = p.getComparator();
|
HashedSequence<S> a, HashedSequence<S> b) {
|
||||||
this.a = p.getA();
|
this.cmp = cmp;
|
||||||
this.b = p.getB();
|
this.a = a;
|
||||||
this.edits = new EditList();
|
this.b = b;
|
||||||
|
this.edits = edits;
|
||||||
}
|
}
|
||||||
|
|
||||||
void diffReplace(Edit r) {
|
void diffReplace(Edit r) {
|
||||||
|
@ -167,6 +167,10 @@ void diffReplace(Edit r) {
|
||||||
diff(r.after(lcs));
|
diff(r.after(lcs));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} else if (fallback instanceof LowLevelDiffAlgorithm) {
|
||||||
|
LowLevelDiffAlgorithm fb = (LowLevelDiffAlgorithm) fallback;
|
||||||
|
fb.diffNonCommon(edits, cmp, a, b, r);
|
||||||
|
|
||||||
} else if (fallback != null) {
|
} else if (fallback != null) {
|
||||||
SubsequenceComparator<HashedSequence<S>> cs = subcmp();
|
SubsequenceComparator<HashedSequence<S>> cs = subcmp();
|
||||||
Subsequence<HashedSequence<S>> as = Subsequence.a(a, r);
|
Subsequence<HashedSequence<S>> as = Subsequence.a(a, r);
|
||||||
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2010, Google Inc.
|
||||||
|
* and other copyright owners as documented in the project's IP log.
|
||||||
|
*
|
||||||
|
* This program and the accompanying materials are made available
|
||||||
|
* under the terms of the Eclipse Distribution License v1.0 which
|
||||||
|
* accompanies this distribution, is reproduced below, and is
|
||||||
|
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||||
|
*
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or
|
||||||
|
* without modification, are permitted provided that the following
|
||||||
|
* conditions are met:
|
||||||
|
*
|
||||||
|
* - Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* - Redistributions in binary form must reproduce the above
|
||||||
|
* copyright notice, this list of conditions and the following
|
||||||
|
* disclaimer in the documentation and/or other materials provided
|
||||||
|
* with the distribution.
|
||||||
|
*
|
||||||
|
* - Neither the name of the Eclipse Foundation, Inc. nor the
|
||||||
|
* names of its contributors may be used to endorse or promote
|
||||||
|
* products derived from this software without specific prior
|
||||||
|
* written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||||
|
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||||
|
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||||
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.eclipse.jgit.diff;
|
||||||
|
|
||||||
|
/** Compares two sequences primarily based upon hash codes. */
|
||||||
|
public abstract class LowLevelDiffAlgorithm extends DiffAlgorithm {
|
||||||
|
@Override
|
||||||
|
public <S extends Sequence> EditList diffNonCommon(
|
||||||
|
SequenceComparator<? super S> cmp, S a, S b) {
|
||||||
|
HashedSequencePair<S> p = new HashedSequencePair<S>(cmp, a, b);
|
||||||
|
HashedSequenceComparator<S> hc = p.getComparator();
|
||||||
|
HashedSequence<S> ha = p.getA();
|
||||||
|
HashedSequence<S> hb = p.getB();
|
||||||
|
p = null;
|
||||||
|
|
||||||
|
EditList res = new EditList();
|
||||||
|
Edit region = new Edit(0, a.size(), 0, b.size());
|
||||||
|
diffNonCommon(res, hc, ha, hb, region);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compare two sequences and identify a list of edits between them.
|
||||||
|
*
|
||||||
|
* This method should be invoked only after the two sequences have been
|
||||||
|
* proven to have no common starting or ending elements. The expected
|
||||||
|
* elimination of common starting and ending elements is automatically
|
||||||
|
* performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
|
||||||
|
* method, which invokes this method using {@link Subsequence}s.
|
||||||
|
*
|
||||||
|
* @param <S>
|
||||||
|
* type of sequence being compared.
|
||||||
|
* @param edits
|
||||||
|
* result list to append the region's edits onto.
|
||||||
|
* @param cmp
|
||||||
|
* the comparator supplying the element equivalence function.
|
||||||
|
* @param a
|
||||||
|
* the first (also known as old or pre-image) sequence. Edits
|
||||||
|
* returned by this algorithm will reference indexes using the
|
||||||
|
* 'A' side: {@link Edit#getBeginA()}, {@link Edit#getEndA()}.
|
||||||
|
* @param b
|
||||||
|
* the second (also known as new or post-image) sequence. Edits
|
||||||
|
* returned by this algorithm will reference indexes using the
|
||||||
|
* 'B' side: {@link Edit#getBeginB()}, {@link Edit#getEndB()}.
|
||||||
|
* @param region
|
||||||
|
* the region being compared within the two sequences.
|
||||||
|
*/
|
||||||
|
public abstract <S extends Sequence> void diffNonCommon(EditList edits,
|
||||||
|
HashedSequenceComparator<S> cmp, HashedSequence<S> a,
|
||||||
|
HashedSequence<S> b, Edit region);
|
||||||
|
}
|
|
@ -107,15 +107,18 @@
|
||||||
*/
|
*/
|
||||||
public class MyersDiff<S extends Sequence> {
|
public class MyersDiff<S extends Sequence> {
|
||||||
/** Singleton instance of MyersDiff. */
|
/** Singleton instance of MyersDiff. */
|
||||||
public static final DiffAlgorithm INSTANCE = new DiffAlgorithm() {
|
public static final DiffAlgorithm INSTANCE = new LowLevelDiffAlgorithm() {
|
||||||
public <S extends Sequence> EditList diffNonCommon(
|
@Override
|
||||||
SequenceComparator<? super S> cmp, S a, S b) {
|
public <S extends Sequence> void diffNonCommon(EditList edits,
|
||||||
return new MyersDiff<S>(cmp, a, b).edits;
|
HashedSequenceComparator<S> cmp, HashedSequence<S> a,
|
||||||
|
HashedSequence<S> b, Edit region) {
|
||||||
|
new MyersDiff<S>(edits, cmp, a, b, region);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The list of edits found during the last call to {@link #calculateEdits()}
|
* The list of edits found during the last call to
|
||||||
|
* {@link #calculateEdits(Edit)}
|
||||||
*/
|
*/
|
||||||
protected EditList edits;
|
protected EditList edits;
|
||||||
|
|
||||||
|
@ -132,15 +135,13 @@ public <S extends Sequence> EditList diffNonCommon(
|
||||||
*/
|
*/
|
||||||
protected HashedSequence<S> b;
|
protected HashedSequence<S> b;
|
||||||
|
|
||||||
private MyersDiff(SequenceComparator<? super S> cmp, S a, S b) {
|
private MyersDiff(EditList edits, HashedSequenceComparator<S> cmp,
|
||||||
HashedSequencePair<S> pair;
|
HashedSequence<S> a, HashedSequence<S> b, Edit region) {
|
||||||
|
this.edits = edits;
|
||||||
pair = new HashedSequencePair<S>(cmp, a, b);
|
this.cmp = cmp;
|
||||||
this.cmp = pair.getComparator();
|
this.a = a;
|
||||||
this.a = pair.getA();
|
this.b = b;
|
||||||
this.b = pair.getB();
|
calculateEdits(region);
|
||||||
|
|
||||||
calculateEdits();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: use ThreadLocal for future multi-threaded operations
|
// TODO: use ThreadLocal for future multi-threaded operations
|
||||||
|
@ -149,11 +150,10 @@ private MyersDiff(SequenceComparator<? super S> cmp, S a, S b) {
|
||||||
/**
|
/**
|
||||||
* Entrypoint into the algorithm this class is all about. This method triggers that the
|
* Entrypoint into the algorithm this class is all about. This method triggers that the
|
||||||
* differences between A and B are calculated in form of a list of edits.
|
* differences between A and B are calculated in form of a list of edits.
|
||||||
|
* @param r portion of the sequences to examine.
|
||||||
*/
|
*/
|
||||||
protected void calculateEdits() {
|
private void calculateEdits(Edit r) {
|
||||||
edits = new EditList();
|
middle.initialize(r.beginA, r.endA, r.beginB, r.endB);
|
||||||
|
|
||||||
middle.initialize(0, a.size(), 0, b.size());
|
|
||||||
if (middle.beginA >= middle.endA &&
|
if (middle.beginA >= middle.endA &&
|
||||||
middle.beginB >= middle.endB)
|
middle.beginB >= middle.endB)
|
||||||
return;
|
return;
|
||||||
|
|
Loading…
Reference in New Issue