From 857d68d17329637dde8b143945e61d90d374311f Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Tue, 21 Sep 2010 08:12:51 -0700 Subject: [PATCH] Perform common start/end elimination by default for DiffAlgorithm As it turns out, every single diff algorithm we might try to implement can benfit from using the SequenceComparator's native concept of the simple reduceCommonStartEnd() step. For most inputs, there can be a significant number of elements that can be removed from the space the DiffAlgorithm needs to consider, which will reduce the overall running time for the final solution. Pool this logic inside of DiffAlgorithm itself as a default, but permit a specific algorithm to override it when necessary. Convert MyersDiff to use this reduction to reduce the space it needs to search, making it perform slightly better on common inputs. Change-Id: I14004d771117e4a4ab2a02cace8deaeda9814bc1 Signed-off-by: Shawn O. Pearce --- .../org/eclipse/jgit/diff/DiffAlgorithm.java | 61 +++++++++++-- .../src/org/eclipse/jgit/diff/EditList.java | 24 ++++++ .../src/org/eclipse/jgit/diff/MyersDiff.java | 52 +++--------- .../org/eclipse/jgit/diff/PatienceDiff.java | 85 +++++++------------ 4 files changed, 122 insertions(+), 100 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffAlgorithm.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffAlgorithm.java index 94cb0b60f..2fa89cc58 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffAlgorithm.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffAlgorithm.java @@ -52,14 +52,12 @@ * algorithms may support parameterization, in which case the caller can create * a unique instance per thread. */ -public interface DiffAlgorithm { +public abstract class DiffAlgorithm { /** * Compare two sequences and identify a list of edits between them. - * + * * @param * type of sequence being compared. - * @param - * type of comparator to evaluate the sequence elements. * @param cmp * the comparator supplying the element equivalence function. * @param a @@ -74,6 +72,57 @@ public interface DiffAlgorithm { * sequences are identical according to {@code cmp}'s rules. The * result list is never null. */ - public > EditList diff( - C cmp, S a, S b); + public EditList diff( + SequenceComparator cmp, S a, S b) { + Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b)); + + switch (region.getType()) { + case INSERT: + case DELETE: + return EditList.singleton(region); + + case REPLACE: { + SubsequenceComparator cs = new SubsequenceComparator(cmp); + Subsequence as = Subsequence.a(a, region); + Subsequence bs = Subsequence.b(b, region); + return Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs); + } + + case EMPTY: + return new EditList(0); + + default: + throw new IllegalStateException(); + } + } + + private static Edit coverEdit(S a, S b) { + return new Edit(0, a.size(), 0, b.size()); + } + + /** + * Compare two sequences and identify a list of edits between them. + * + * This method should be invoked only after the two sequences have been + * proven to have no common starting or ending elements. The expected + * elimination of common starting and ending elements is automatically + * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)} + * method, which invokes this method using {@link Subsequence}s. + * + * @param + * type of sequence being compared. + * @param cmp + * the comparator supplying the element equivalence function. + * @param a + * the first (also known as old or pre-image) sequence. Edits + * returned by this algorithm will reference indexes using the + * 'A' side: {@link Edit#getBeginA()}, {@link Edit#getEndA()}. + * @param b + * the second (also known as new or post-image) sequence. Edits + * returned by this algorithm will reference indexes using the + * 'B' side: {@link Edit#getBeginB()}, {@link Edit#getEndB()}. + * @return a modifiable edit list comparing the two sequences. + */ + public abstract EditList diffNonCommon( + SequenceComparator cmp, S a, S b); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/EditList.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/EditList.java index 85a539644..a8088207f 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/EditList.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/EditList.java @@ -48,6 +48,19 @@ /** Specialized list of {@link Edit}s in a document. */ public class EditList extends AbstractList { + /** + * Construct an edit list containing a single edit. + * + * @param edit + * the edit to return in the list. + * @return list containing only {@code edit}. + */ + public static EditList singleton(Edit edit) { + EditList res = new EditList(1); + res.add(edit); + return res; + } + private final ArrayList container; /** Create a new, empty edit list. */ @@ -55,6 +68,17 @@ public EditList() { container = new ArrayList(); } + /** + * Create an empty edit list with the specified capacity. + * + * @param capacity + * the initial capacity of the edit list. If additional edits are + * added to the list, it will be grown to support them. + */ + public EditList(int capacity) { + container = new ArrayList(capacity); + } + @Override public int size() { return container.size(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java index 3fad2c349..821d06be2 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java @@ -108,28 +108,9 @@ public class MyersDiff { /** Singleton instance of MyersDiff. */ public static final DiffAlgorithm INSTANCE = new DiffAlgorithm() { - public > EditList diff( - C cmp, S a, S b) { - Edit region = new Edit(0, a.size(), 0, b.size()); - region = cmp.reduceCommonStartEnd(a, b, region); - - switch (region.getType()) { - case INSERT: - case DELETE: { - EditList r = new EditList(); - r.add(region); - return r; - } - - case REPLACE: - return new MyersDiff(cmp, a, b, region).getEdits(); - - case EMPTY: - return new EditList(); - - default: - throw new IllegalStateException(); - } + public EditList diffNonCommon( + SequenceComparator cmp, S a, S b) { + return new MyersDiff(cmp, a, b).edits; } }; @@ -139,38 +120,27 @@ public > EditList di protected EditList edits; /** Comparison function for sequences. */ - protected HashedSequenceComparator> cmp; + protected HashedSequenceComparator cmp; /** * The first text to be compared. Referred to as "Text A" in the comments */ - protected HashedSequence> a; + protected HashedSequence a; /** * The second text to be compared. Referred to as "Text B" in the comments */ - protected HashedSequence> b; + protected HashedSequence b; - private MyersDiff(SequenceComparator cmp, S a, S b, Edit region) { - Subsequence as = Subsequence.a(a, region); - Subsequence bs = Subsequence.b(b, region); - - HashedSequencePair> pair = new HashedSequencePair>( - new SubsequenceComparator(cmp), as, bs); + private MyersDiff(SequenceComparator cmp, S a, S b) { + HashedSequencePair pair; + pair = new HashedSequencePair(cmp, a, b); this.cmp = pair.getComparator(); this.a = pair.getA(); this.b = pair.getB(); calculateEdits(); - Subsequence.toBase(edits, as, bs); - } - - /** - * @return the list of edits found during the last call to {@link #calculateEdits()} - */ - public EditList getEdits() { - return edits; } // TODO: use ThreadLocal for future multi-threaded operations @@ -565,8 +535,8 @@ public static void main(String[] args) { try { RawText a = new RawText(new java.io.File(args[0])); RawText b = new RawText(new java.io.File(args[1])); - EditList res = INSTANCE.diff(RawTextComparator.DEFAULT, a, b); - System.out.println(res.toString()); + EditList r = INSTANCE.diff(RawTextComparator.DEFAULT, a, b); + System.out.println(r.toString()); } catch (Exception e) { e.printStackTrace(); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiff.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiff.java index 44e1f79bb..571a498ae 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiff.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiff.java @@ -99,7 +99,7 @@ * by the prior step 2 or 5. * */ -public class PatienceDiff implements DiffAlgorithm { +public class PatienceDiff extends DiffAlgorithm { /** Algorithm we use when there are no common unique lines in a region. */ private DiffAlgorithm fallback; @@ -114,38 +114,10 @@ public void setFallbackAlgorithm(DiffAlgorithm alg) { fallback = alg; } - public > EditList diff( - C cmp, S a, S b) { - Edit region = new Edit(0, a.size(), 0, b.size()); - region = cmp.reduceCommonStartEnd(a, b, region); - - switch (region.getType()) { - case INSERT: - case DELETE: { - EditList r = new EditList(); - r.add(region); - return r; - } - - case REPLACE: { - SubsequenceComparator cs = new SubsequenceComparator(cmp); - Subsequence as = Subsequence.a(a, region); - Subsequence bs = Subsequence.b(b, region); - return Subsequence.toBase(diffImpl(cs, as, bs), as, bs); - } - - case EMPTY: - return new EditList(); - - default: - throw new IllegalStateException(); - } - } - - private > EditList diffImpl( - C cmp, S a, S b) { + public EditList diffNonCommon( + SequenceComparator cmp, S a, S b) { State s = new State(new HashedSequencePair(cmp, a, b)); - s.diff(new Edit(0, s.a.size(), 0, s.b.size()), null, 0, 0); + s.diffReplace(new Edit(0, s.a.size(), 0, s.b.size()), null, 0, 0); return s.edits; } @@ -166,25 +138,12 @@ private class State { this.edits = new EditList(); } - private void diff(Edit r, long[] pCommon, int pIdx, int pEnd) { - switch (r.getType()) { - case INSERT: - case DELETE: - edits.add(r); - return; - - case REPLACE: - break; - - case EMPTY: - default: - throw new IllegalStateException(); - } - + void diffReplace(Edit r, long[] pCommon, int pIdx, int pEnd) { PatienceDiffIndex p; + Edit lcs; p = new PatienceDiffIndex(cmp, a, b, r, pCommon, pIdx, pEnd); - Edit lcs = p.findLongestCommonSequence(); + lcs = p.findLongestCommonSequence(); if (lcs != null) { pCommon = p.nCommon; @@ -196,20 +155,40 @@ private void diff(Edit r, long[] pCommon, int pIdx, int pEnd) { diff(r.after(lcs), pCommon, pIdx + 1, pEnd); } else if (fallback != null) { - p = null; pCommon = null; + p = null; - SubsequenceComparator> cs; - cs = new SubsequenceComparator>(cmp); - + SubsequenceComparator> cs = subcmp(); Subsequence> as = Subsequence.a(a, r); Subsequence> bs = Subsequence.b(b, r); - EditList res = fallback.diff(cs, as, bs); + + EditList res = fallback.diffNonCommon(cs, as, bs); edits.addAll(Subsequence.toBase(res, as, bs)); } else { edits.add(r); } } + + private void diff(Edit r, long[] pCommon, int pIdx, int pEnd) { + switch (r.getType()) { + case INSERT: + case DELETE: + edits.add(r); + break; + + case REPLACE: + diffReplace(r, pCommon, pIdx, pEnd); + break; + + case EMPTY: + default: + throw new IllegalStateException(); + } + } + + private SubsequenceComparator> subcmp() { + return new SubsequenceComparator>(cmp); + } } }