Make diff locations more consistent
DiffAlgorithms can return different edit locations for inserts or deletes, if they can be "shifted" up or down repeating blocks of lines. This causes the 3-way merge to apply both edits, resulting in incorrectly removing or duplicating lines. Augment an existing "tidy-up" stage in DiffAlgorithm to move all shiftable edits (not just the last INSERT edit) to a consistent location, and add test cases for previously incorrect merges. Bug: 514095 Change-Id: I5fe150a2fc04e1cdb012d22609d86df16dfb0b7e Signed-off-by: KB Sriram <kbsriram@google.com>
This commit is contained in:
parent
61e336475d
commit
4a985f5aa8
|
@ -195,6 +195,32 @@ public void testEdit_InsertNearCommonTail() {
|
|||
assertEquals(2, r.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEdit_DeleteNearCommonTail() {
|
||||
EditList r = diff(t("aCq}nD}nb"), t("aq}nb"));
|
||||
assertEquals(new Edit(1, 2, 1, 1), r.get(0));
|
||||
assertEquals(new Edit(5, 8, 4, 4), r.get(1));
|
||||
assertEquals(2, r.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEdit_DeleteNearCommonCenter() {
|
||||
EditList r = diff(t("abcd123123uvwxpq"), t("aBcd123uvwxPq"));
|
||||
assertEquals(new Edit(1, 2, 1, 2), r.get(0));
|
||||
assertEquals(new Edit(7, 10, 7, 7), r.get(1));
|
||||
assertEquals(new Edit(14, 15, 11, 12), r.get(2));
|
||||
assertEquals(3, r.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEdit_InsertNearCommonCenter() {
|
||||
EditList r = diff(t("aBcd123uvwxPq"), t("abcd123123uvwxpq"));
|
||||
assertEquals(new Edit(1, 2, 1, 2), r.get(0));
|
||||
assertEquals(new Edit(7, 7, 7, 10), r.get(1));
|
||||
assertEquals(new Edit(11, 12, 14, 15), r.get(2));
|
||||
assertEquals(3, r.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEdit_LinuxBug() {
|
||||
EditList r = diff(t("a{bcdE}z"), t("a{0bcdEE}z"));
|
||||
|
|
|
@ -131,6 +131,45 @@ public void testTwoComplicatedModifications() throws IOException {
|
|||
merge("abcdefghij", "aZZZZfZhZj", "abYdYYYYiY"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge two modifications with a shared delete at the end. The underlying
|
||||
* diff algorithm has to provide consistent edit results to get the expected
|
||||
* merge result.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testTwoModificationsWithSharedDelete() throws IOException {
|
||||
assertEquals(t("Cb}n}"),
|
||||
merge("ab}n}n}", "ab}n}", "Cb}n}"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge modifications with a shared insert in the middle. The
|
||||
* underlying diff algorithm has to provide consistent edit
|
||||
* results to get the expected merge result.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testModificationsWithMiddleInsert() throws IOException {
|
||||
assertEquals(t("aBcd123123uvwxPq"),
|
||||
merge("abcd123uvwxpq", "aBcd123123uvwxPq", "abcd123123uvwxpq"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge modifications with a shared delete in the middle. The
|
||||
* underlying diff algorithm has to provide consistent edit
|
||||
* results to get the expected merge result.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testModificationsWithMiddleDelete() throws IOException {
|
||||
assertEquals(t("Abz}z123Q"),
|
||||
merge("abz}z}z123q", "Abz}z123Q", "abz}z123q"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test a conflicting region at the very start of the text.
|
||||
*
|
||||
|
|
|
@ -121,23 +121,7 @@ public <S extends Sequence> EditList diff(
|
|||
Subsequence<S> as = Subsequence.a(a, region);
|
||||
Subsequence<S> bs = Subsequence.b(b, region);
|
||||
EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
|
||||
|
||||
// The last insertion may need to be shifted later if it
|
||||
// inserts elements that were previously reduced out as
|
||||
// common at the end.
|
||||
//
|
||||
Edit last = e.get(e.size() - 1);
|
||||
if (last.getType() == Edit.Type.INSERT) {
|
||||
while (last.endB < b.size()
|
||||
&& cmp.equals(b, last.beginB, b, last.endB)) {
|
||||
last.beginA++;
|
||||
last.endA++;
|
||||
last.beginB++;
|
||||
last.endB++;
|
||||
}
|
||||
}
|
||||
|
||||
return e;
|
||||
return normalize(cmp, e, a, b);
|
||||
}
|
||||
|
||||
case EMPTY:
|
||||
|
@ -152,6 +136,107 @@ private static <S extends Sequence> Edit coverEdit(S a, S b) {
|
|||
return new Edit(0, a.size(), 0, b.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Reorganize an {@link EditList} for better diff consistency.
|
||||
* <p>
|
||||
* {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or
|
||||
* {@link Edit.Type#DELETE} edits that can be "shifted". For
|
||||
* example, the deleted section
|
||||
* <pre>
|
||||
* -a
|
||||
* -b
|
||||
* -c
|
||||
* a
|
||||
* b
|
||||
* c
|
||||
* </pre>
|
||||
* can be shifted down by 1, 2 or 3 locations.
|
||||
* <p>
|
||||
* To avoid later merge issues, we shift such edits to a
|
||||
* consistent location. {@code normalize} uses a simple strategy of
|
||||
* shifting such edits to their latest possible location.
|
||||
* <p>
|
||||
* This strategy may not always produce an aesthetically pleasing
|
||||
* diff. For instance, it works well with
|
||||
* <pre>
|
||||
* function1 {
|
||||
* ...
|
||||
* }
|
||||
*
|
||||
* +function2 {
|
||||
* + ...
|
||||
* +}
|
||||
* +
|
||||
* function3 {
|
||||
* ...
|
||||
* }
|
||||
* </pre>
|
||||
* but less so for
|
||||
* <pre>
|
||||
* #
|
||||
* # comment1
|
||||
* #
|
||||
* function1() {
|
||||
* }
|
||||
*
|
||||
* #
|
||||
* +# comment3
|
||||
* +#
|
||||
* +function3() {
|
||||
* +}
|
||||
* +
|
||||
* +#
|
||||
* # comment2
|
||||
* #
|
||||
* function2() {
|
||||
* }
|
||||
* </pre>
|
||||
* <a href="https://github.com/mhagger/diff-slider-tools">More
|
||||
* sophisticated strategies</a> are possible, say by calculating a
|
||||
* suitable "aesthetic cost" for each possible position and using
|
||||
* the lowest cost, but {@code normalize} just shifts edits
|
||||
* to the end as much as possible.
|
||||
*
|
||||
* @param <S>
|
||||
* type of sequence being compared.
|
||||
* @param cmp
|
||||
* the comparator supplying the element equivalence function.
|
||||
* @param e
|
||||
* a modifiable edit list comparing the provided sequences.
|
||||
* @param a
|
||||
* the first (also known as old or pre-image) sequence.
|
||||
* @param b
|
||||
* the second (also known as new or post-image) sequence.
|
||||
* @return a modifiable edit list with edit regions shifted to their
|
||||
* latest possible location. The result list is never null.
|
||||
* @since 4.7
|
||||
*/
|
||||
private static <S extends Sequence> EditList normalize(
|
||||
SequenceComparator<? super S> cmp, EditList e, S a, S b) {
|
||||
Edit prev = null;
|
||||
for (int i = e.size() - 1; i >= 0; i--) {
|
||||
Edit cur = e.get(i);
|
||||
Edit.Type curType = cur.getType();
|
||||
|
||||
int maxA = (prev == null) ? a.size() : prev.beginA;
|
||||
int maxB = (prev == null) ? b.size() : prev.beginB;
|
||||
|
||||
if (curType == Edit.Type.INSERT) {
|
||||
while (cur.endA < maxA && cur.endB < maxB
|
||||
&& cmp.equals(b, cur.beginB, b, cur.endB)) {
|
||||
cur.shift(1);
|
||||
}
|
||||
} else if (curType == Edit.Type.DELETE) {
|
||||
while (cur.endA < maxA && cur.endB < maxB
|
||||
&& cmp.equals(a, cur.beginA, a, cur.endA)) {
|
||||
cur.shift(1);
|
||||
}
|
||||
}
|
||||
prev = cur;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare two sequences and identify a list of edits between them.
|
||||
*
|
||||
|
|
|
@ -169,6 +169,20 @@ public final int getLengthB() {
|
|||
return endB - beginB;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move the edit region by the specified amount.
|
||||
* @param amount
|
||||
* the region is shifted by this amount, and can be
|
||||
* positive or negative.
|
||||
* @since 4.7
|
||||
*/
|
||||
public final void shift(int amount) {
|
||||
beginA += amount;
|
||||
endA += amount;
|
||||
beginB += amount;
|
||||
endB += amount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new edit representing the region before cut.
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue