From 9c056fcace49f4c111cad8a49399a113fb083f30 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 Sep 2009 12:46:31 +0200 Subject: [PATCH 01/10] Add set to IntList Some applications may wish to modify an int list. Bug: 291083 Eclipse-CQ: 3559 Change-Id: Iea871443ec661230aec92397229f1eda6c74216f Signed-off-by: Johannes Schindelin Signed-off-by: Shawn O. Pearce --- .../org/eclipse/jgit/util/IntListTest.java | 21 +++++++++++++++++++ .../src/org/eclipse/jgit/util/IntList.java | 18 ++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/IntListTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/IntListTest.java index ecabeeea5..b8d5bd102 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/IntListTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/IntListTest.java @@ -150,6 +150,27 @@ public void testClear() { } } + public void testSet() { + final IntList i = new IntList(); + i.add(1); + assertEquals(1, i.size()); + assertEquals(1, i.get(0)); + + i.set(0, 5); + assertEquals(5, i.get(0)); + + try { + i.set(5, 5); + fail("accepted set of 5 beyond end of list"); + } catch (ArrayIndexOutOfBoundsException e){ + assertTrue(true); + } + + i.set(1, 2); + assertEquals(2, i.size()); + assertEquals(2, i.get(1)); + } + public void testToString() { final IntList i = new IntList(); i.add(1); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/IntList.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/IntList.java index 510f2a4db..510032eeb 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/IntList.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/IntList.java @@ -1,5 +1,6 @@ /* * Copyright (C) 2008, Google Inc. + * Copyright (C) 2009, Johannes Schindelin * and other copyright owners as documented in the project's IP log. * * This program and the accompanying materials are made available @@ -99,6 +100,23 @@ public void add(final int n) { entries[count++] = n; } + /** + * Assign an entry in the list. + * + * @param index + * index to set, must be in the range [0, {@link #size()}). + * @param n + * value to store at the position. + */ + public void set(final int index, final int n) { + if (count < index) + throw new ArrayIndexOutOfBoundsException(index); + else if (count == index) + add(n); + else + entries[index] = n; + } + /** * Pad the list with entries. * From d8a3b4eb310fd8f160c858ddb5c1160a29a14546 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 Sep 2009 12:46:41 +0200 Subject: [PATCH 02/10] Add Myers' algorithm to generate diff scripts Myers' algorithm is the standard way to generate diff scripts in an efficient manner (especially memory-wise). The source contains extensive documentation about the principal ideas of the algorithm. Bug: 291083 Eclipse-CQ: 3559 Change-Id: Id873e7c2c204692cb4a1a05d24b733154b9ed49c Signed-off-by: Johannes Schindelin Signed-off-by: Shawn O. Pearce --- .../src/org/eclipse/jgit/diff/MyersDiff.java | 469 ++++++++++++++++++ 1 file changed, 469 insertions(+) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java new file mode 100644 index 000000000..795075d28 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java @@ -0,0 +1,469 @@ +/* + * Copyright (C) 2008-2009, Johannes E. Schindelin + * Copyright (C) 2009, Johannes Schindelin + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.eclipse.jgit.util.IntList; + +public class MyersDiff { + protected EditList edits; + protected Sequence a, b; + + public MyersDiff(Sequence a, Sequence b) { + this.a = a; + this.b = b; + calculateEdits(); + } + + public EditList getEdits() { + return edits; + } + + // TODO: use ThreadLocal for future multi-threaded operations + MiddleEdit middle = new MiddleEdit(); + + protected void calculateEdits() { + edits = new EditList(); + + middle.initialize(0, a.size(), 0, b.size()); + if (middle.beginA >= middle.endA && + middle.beginB >= middle.endB) + return; + + calculateEdits(middle.beginA, middle.endA, + middle.beginB, middle.endB); + } + + protected void calculateEdits(int beginA, int endA, + int beginB, int endB) { + Edit edit = middle.calculate(beginA, endA, beginB, endB); + + if (beginA < edit.beginA || beginB < edit.beginB) { + int k = edit.beginB - edit.beginA; + int x = middle.backward.snake(k, edit.beginA); + calculateEdits(beginA, x, beginB, k + x); + } + + if (edit.getType() != Edit.Type.EMPTY) + edits.add(edits.size(), edit); + + // after middle + if (endA > edit.endA || endB > edit.endB) { + int k = edit.endB - edit.endA; + int x = middle.forward.snake(k, edit.endA); + calculateEdits(x, endA, k + x, endB); + } + } + + /** + * A class to help bisecting the sequences a and b to find minimal + * edit paths. + * + * As the arrays are reused for space efficiency, you will need one + * instance per thread. + * + * The entry function is the calculate() method. + */ + class MiddleEdit { + void initialize(int beginA, int endA, int beginB, int endB) { + this.beginA = beginA; this.endA = endA; + this.beginB = beginB; this.endB = endB; + + // strip common parts on either end + int k = beginB - beginA; + this.beginA = forward.snake(k, beginA); + this.beginB = k + this.beginA; + + k = endB - endA; + this.endA = backward.snake(k, endA); + this.endB = k + this.endA; + } + + /* + * This function calculates the "middle" Edit of the shortest + * edit path between the given subsequences of a and b. + * + * Once a forward path and a backward path meet, we found the + * middle part. From the last snake end point on both of them, + * we construct the Edit. + * + * It is assumed that there is at least one edit in the range. + */ + // TODO: measure speed impact when this is synchronized + Edit calculate(int beginA, int endA, int beginB, int endB) { + if (beginA == endA || beginB == endB) + return new Edit(beginA, endA, beginB, endB); + this.beginA = beginA; this.endA = endA; + this.beginB = beginB; this.endB = endB; + + /* + * Following the conventions in Myers' paper, "k" is + * the difference between the index into "b" and the + * index into "a". + */ + int minK = beginB - endA; + int maxK = endB - beginA; + + forward.initialize(beginB - beginA, beginA, minK, maxK); + backward.initialize(endB - endA, endA, minK, maxK); + + for (int d = 1; ; d++) + if (forward.calculate(d) || + backward.calculate(d)) + return edit; + } + + /* + * For each d, we need to hold the d-paths for the diagonals + * k = -d, -d + 2, ..., d - 2, d. These are stored in the + * forward (and backward) array. + * + * As we allow subsequences, too, this needs some refinement: + * the forward paths start on the diagonal forwardK = + * beginB - beginA, and backward paths start on the diagonal + * backwardK = endB - endA. + * + * So, we need to hold the forward d-paths for the diagonals + * k = forwardK - d, forwardK - d + 2, ..., forwardK + d and + * the analogue for the backward d-paths. This means that + * we can turn (k, d) into the forward array index using this + * formula: + * + * i = (d + k - forwardK) / 2 + * + * There is a further complication: the edit paths should not + * leave the specified subsequences, so k is bounded by + * minK = beginB - endA and maxK = endB - beginA. However, + * (k - forwardK) _must_ be odd whenever d is odd, and it + * _must_ be even when d is even. + * + * The values in the "forward" and "backward" arrays are + * positions ("x") in the sequence a, to get the corresponding + * positions ("y") in the sequence b, you have to calculate + * the appropriate k and then y: + * + * k = forwardK - d + i * 2 + * y = k + x + * + * (substitute backwardK for forwardK if you want to get the + * y position for an entry in the "backward" array. + */ + EditPaths forward = new ForwardEditPaths(); + EditPaths backward = new BackwardEditPaths(); + + /* Some variables which are shared between methods */ + protected int beginA, endA, beginB, endB; + protected Edit edit; + + abstract class EditPaths { + private IntList x = new IntList(); + private IntList snake = new IntList(); + int beginK, endK, middleK; + int prevBeginK, prevEndK; + /* if we hit one end early, no need to look further */ + int minK, maxK; // TODO: better explanation + + final int getIndex(int d, int k) { +// TODO: remove +if (((d + k - middleK) % 2) == 1) + throw new RuntimeException("odd: " + d + " + " + k + " - " + middleK); + return (d + k - middleK) / 2; + } + + final int getX(int d, int k) { +// TODO: remove +if (k < beginK || k > endK) + throw new RuntimeException("k " + k + " not in " + beginK + " - " + endK); + return x.get(getIndex(d, k)); + } + + final int getSnake(int d, int k) { +// TODO: remove +if (k < beginK || k > endK) + throw new RuntimeException("k " + k + " not in " + beginK + " - " + endK); + return snake.get(getIndex(d, k)); + } + + private int forceKIntoRange(int k) { + /* if k is odd, so must be the result */ + if (k < minK) + return minK + ((k ^ minK) & 1); + else if (k > maxK) + return maxK - ((k ^ maxK) & 1); + return k; + } + + void initialize(int k, int x, int minK, int maxK) { + this.minK = minK; + this.maxK = maxK; + beginK = endK = middleK = k; + this.x.clear(); + this.x.add(x); + snake.clear(); + snake.add(newSnake(k, x)); + } + + abstract int snake(int k, int x); + abstract int getLeft(int x); + abstract int getRight(int x); + abstract boolean isBetter(int left, int right); + abstract void adjustMinMaxK(final int k, final int x); + abstract boolean meets(int d, int k, int x, int snake); + + final int newSnake(int k, int x) { + int y = k + x; + return x + (endA + 1) * y; + } + + final int snake2x(int snake) { + return snake % (endA + 1); + } + + final int snake2y(int snake) { + return snake / (endA + 1); + } + + final boolean makeEdit(int snake1, int snake2) { + int x1 = snake2x(snake1), x2 = snake2x(snake2); + int y1 = snake2y(snake1), y2 = snake2y(snake2); + /* + * Check for incompatible partial edit paths: + * when there are ambiguities, we might have + * hit incompatible (i.e. non-overlapping) + * forward/backward paths. + * + * In that case, just pretend that we have + * an empty edit at the end of one snake; this + * will force a decision which path to take + * in the next recursion step. + */ + if (x1 > x2 || y1 > y2) { + x1 = x2; + y1 = y2; + } + edit = new Edit(x1, x2, y1, y2); + return true; + } + + boolean calculate(int d) { + prevBeginK = beginK; + prevEndK = endK; + beginK = forceKIntoRange(middleK - d); + endK = forceKIntoRange(middleK + d); + // TODO: handle i more efficiently + // TODO: walk snake(k, getX(d, k)) only once per (d, k) + // TODO: move end points out of the loop to avoid conditionals inside the loop + // go backwards so that we can avoid temp vars + for (int k = endK; k >= beginK; k -= 2) { + int left = -1, right = -1; + int leftSnake = -1, rightSnake = -1; + // TODO: refactor into its own function + if (k > prevBeginK) { + int i = getIndex(d - 1, k - 1); + left = x.get(i); + int end = snake(k - 1, left); + leftSnake = left != end ? + newSnake(k - 1, end) : + snake.get(i); + if (meets(d, k - 1, end, leftSnake)) + return true; + left = getLeft(end); + } + if (k < prevEndK) { + int i = getIndex(d - 1, k + 1); + right = x.get(i); + int end = snake(k + 1, right); + rightSnake = right != end ? + newSnake(k + 1, end) : + snake.get(i); + if (meets(d, k + 1, end, rightSnake)) + return true; + right = getRight(end); + } + int newX, newSnake; + if (k >= prevEndK || + (k > prevBeginK && + isBetter(left, right))) { + newX = left; + newSnake = leftSnake; + } + else { + newX = right; + newSnake = rightSnake; + } + if (meets(d, k, newX, newSnake)) + return true; + adjustMinMaxK(k, newX); + int i = getIndex(d, k); + x.set(i, newX); + snake.set(i, newSnake); + } + return false; + } + } + + class ForwardEditPaths extends EditPaths { + final int snake(int k, int x) { + for (; x < endA && k + x < endB; x++) + if (!a.equals(x, b, k + x)) + break; + return x; + } + + final int getLeft(final int x) { + return x; + } + + final int getRight(final int x) { + return x + 1; + } + + final boolean isBetter(final int left, final int right) { + return left > right; + } + + final void adjustMinMaxK(final int k, final int x) { + if (x >= endA || k + x >= endB) { + if (k > backward.middleK) + maxK = k; + else + minK = k; + } + } + + final boolean meets(int d, int k, int x, int snake) { + if (k < backward.beginK || k > backward.endK) + return false; + // TODO: move out of loop + if (((d - 1 + k - backward.middleK) % 2) == 1) + return false; + if (x < backward.getX(d - 1, k)) + return false; + makeEdit(snake, backward.getSnake(d - 1, k)); + return true; + } + } + + class BackwardEditPaths extends EditPaths { + final int snake(int k, int x) { + for (; x > beginA && k + x > beginB; x--) + if (!a.equals(x - 1, b, k + x - 1)) + break; + return x; + } + + final int getLeft(final int x) { + return x - 1; + } + + final int getRight(final int x) { + return x; + } + + final boolean isBetter(final int left, final int right) { + return left < right; + } + + final void adjustMinMaxK(final int k, final int x) { + if (x <= beginA || k + x <= beginB) { + if (k > forward.middleK) + maxK = k; + else + minK = k; + } + } + + final boolean meets(int d, int k, int x, int snake) { + if (k < forward.beginK || k > forward.endK) + return false; + // TODO: move out of loop + if (((d + k - forward.middleK) % 2) == 1) + return false; + if (x > forward.getX(d, k)) + return false; + makeEdit(forward.getSnake(d, k), snake); + return true; + } + } + } + + // debugging (TODO: remove) + public void print(Sequence s, int begin, int end) { + RawText raw = (RawText)s; + try { + while (begin < end) { + System.err.print("" + begin + ": "); + raw.writeLine(System.err, begin++); + System.err.println(""); + } + } catch (Exception e) { e.printStackTrace(); } + } + + public void print(int beginA, int endA, int beginB, int endB) { + System.err.println("<<<<<<"); + print(a, beginA, endA); + System.err.println("======"); + print(b, beginB, endB); + System.err.println(">>>>>>"); + } + + public static void main(String[] args) { + if (args.length != 2) { + System.err.println("Need 2 arguments"); + System.exit(1); + } + try { + RawText a = new RawText(new java.io.File(args[0])); + RawText b = new RawText(new java.io.File(args[1])); + MyersDiff diff = new MyersDiff(a, b); + System.out.println(diff.getEdits().toString()); + } catch (Exception e) { + e.printStackTrace(); + } + } +} From 1a03c864a6c82ed76ebfe9504d983f5741d75032 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 Sep 2009 12:46:51 +0200 Subject: [PATCH 03/10] Add a test class for Myers' diff algorithm Bug: 291083 Eclipse-CQ: 3559 Change-Id: I5a10946637438052e7596489b9f9de3a0c6b2066 Signed-off-by: Johannes Schindelin Signed-off-by: Shawn O. Pearce --- .../org/eclipse/jgit/diff/MyersDiffTest.java | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffTest.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffTest.java new file mode 100644 index 000000000..c91348ffe --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffTest.java @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2009, Johannes E. Schindelin + * Copyright (C) 2009, Johannes Schindelin + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import junit.framework.TestCase; + +public class MyersDiffTest extends TestCase { + public void testAtEnd() { + assertDiff("HELLO", "HELL", " -4,1 +4,0"); + } + + public void testAtStart() { + assertDiff("Git", "JGit", " -0,0 +0,1"); + } + + public void testSimple() { + assertDiff("HELLO WORLD", "LOW", + " -0,3 +0,0 -5,1 +2,0 -7,4 +3,0"); + // is ambiguous, could be this, too: + // " -0,2 +0,0 -3,1 +1,0 -5,1 +2,0 -7,4 +3,0" + } + + public void assertDiff(String a, String b, String edits) { + MyersDiff diff = new MyersDiff(toCharArray(a), toCharArray(b)); + assertEquals(edits, toString(diff.getEdits())); + } + + private static String toString(EditList list) { + StringBuilder builder = new StringBuilder(); + for (Edit e : list) + builder.append(" -" + e.beginA + + "," + (e.endA - e.beginA) + + " +" + e.beginB + "," + (e.endB - e.beginB)); + return builder.toString(); + } + + private static CharArray toCharArray(String s) { + return new CharArray(s); + } + + protected static String toString(Sequence seq, int begin, int end) { + CharArray a = (CharArray)seq; + return new String(a.array, begin, end - begin); + } + + protected static String toString(CharArray a, CharArray b, + int x, int k) { + return "(" + x + "," + (k + x) + + (x < 0 ? '<' : + (x >= a.array.length ? + '>' : a.array[x])) + + (k + x < 0 ? '<' : + (k + x >= b.array.length ? + '>' : b.array[k + x])) + + ")"; + } + + private static class CharArray implements Sequence { + char[] array; + public CharArray(String s) { array = s.toCharArray(); } + public int size() { return array.length; } + public boolean equals(int i, Sequence other, int j) { + CharArray o = (CharArray)other; + return array[i] == o.array[j]; + } + } +} From e34865b88b02bb5937f8ac815fe065ebe8e9d47b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 Sep 2009 12:47:01 +0200 Subject: [PATCH 04/10] Prepare RawText for diff-index and diff-files Bug: 291083 Eclipse-CQ: 3559 Change-Id: Ia02f346a96b5f1e24f8bc9676bd428b968a41222 Signed-off-by: Johannes Schindelin Signed-off-by: Shawn O. Pearce --- .../src/org/eclipse/jgit/diff/RawText.java | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index 61a3ef41c..0bbbb1991 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -44,6 +44,8 @@ package org.eclipse.jgit.diff; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.OutputStream; @@ -87,6 +89,18 @@ public RawText(final byte[] input) { hashes = computeHashes(); } + /** + * Create a new sequence from a file. + *

+ * The entire file contents are used. + * + * @param file + * the text file. + */ + public RawText(File file) throws IOException { + this(readFile(file)); + } + public int size() { // The line map is always 2 entries larger than the number of lines in // the file. Index 0 is padded out/unused. The last index is the total @@ -187,4 +201,16 @@ protected int hashLine(final byte[] raw, int ptr, final int end) { hash = (hash << 5) ^ (raw[ptr] & 0xff); return hash; } -} \ No newline at end of file + + private static byte[] readFile(File file) throws IOException { + byte[] result = new byte[(int)file.length()]; + FileInputStream in = new FileInputStream(file); + for (int off = 0; off < result.length; ) { + int read = in.read(result, off, result.length - off); + if (read < 0) + throw new IOException("Early EOF"); + off += read; + } + return result; + } +} From a3aa43ecb164d13679b9ee33fb8626c82dea7f8c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 Sep 2009 12:47:08 +0200 Subject: [PATCH 05/10] Add the "jgit diff" command This commit contains fixes provided by Christian Halstrick. Bug: 291083 Eclipse-CQ: 3559 Change-Id: If8d187e70a51f31040f99098489e513791dc6415 Signed-off-by: Johannes Schindelin Signed-off-by: Shawn O. Pearce --- .../services/org.eclipse.jgit.pgm.TextBuiltin | 1 + .../src/org/eclipse/jgit/pgm/Diff.java | 140 ++++++++++++++++++ .../org/eclipse/jgit/diff/DiffFormatter.java | 2 +- 3 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java diff --git a/org.eclipse.jgit.pgm/src/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin b/org.eclipse.jgit.pgm/src/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin index 6c2a65306..de63adbbc 100644 --- a/org.eclipse.jgit.pgm/src/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin +++ b/org.eclipse.jgit.pgm/src/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin @@ -2,6 +2,7 @@ org.eclipse.jgit.pgm.AmazonS3Client org.eclipse.jgit.pgm.Branch org.eclipse.jgit.pgm.Clone org.eclipse.jgit.pgm.Daemon +org.eclipse.jgit.pgm.Diff org.eclipse.jgit.pgm.DiffTree org.eclipse.jgit.pgm.Fetch org.eclipse.jgit.pgm.Glog diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java new file mode 100644 index 000000000..3b327efd7 --- /dev/null +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2009, Johannes E. Schindelin + * Copyright (C) 2009, Johannes Schindelin + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.pgm; + +import java.io.IOException; +import java.io.PrintStream; + +import java.util.ArrayList; +import java.util.List; + +import org.kohsuke.args4j.Argument; +import org.kohsuke.args4j.ExampleMode; +import org.kohsuke.args4j.Option; + +import org.eclipse.jgit.diff.DiffFormatter; +import org.eclipse.jgit.diff.MyersDiff; +import org.eclipse.jgit.diff.RawText; + +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.Ref; + +import org.eclipse.jgit.pgm.opt.CmdLineParser; +import org.eclipse.jgit.pgm.opt.PathTreeFilterHandler; + +import org.eclipse.jgit.treewalk.AbstractTreeIterator; +import org.eclipse.jgit.treewalk.TreeWalk; + +import org.eclipse.jgit.treewalk.filter.AndTreeFilter; +import org.eclipse.jgit.treewalk.filter.TreeFilter; + +@Command(common = true, usage = "Show diffs") +class Diff extends TextBuiltin { + @Argument(index = 0, metaVar = "tree-ish", required = true) + void tree_0(final AbstractTreeIterator c) { + trees.add(c); + } + + @Argument(index = 1, metaVar = "tree-ish", required = true) + private final List trees = new ArrayList(); + + @Option(name = "--", metaVar = "path", multiValued = true, handler = PathTreeFilterHandler.class) + private TreeFilter pathFilter = TreeFilter.ALL; + + private DiffFormatter fmt = new DiffFormatter(); + + @Override + protected void run() throws Exception { + final TreeWalk walk = new TreeWalk(db); + walk.reset(); + walk.setRecursive(true); + for (final AbstractTreeIterator i : trees) + walk.addTree(i); + walk.setFilter(AndTreeFilter.create(TreeFilter.ANY_DIFF, pathFilter)); + + final int nTree = walk.getTreeCount(); + while (walk.next()) + outputDiff(System.out, walk.getPathString(), + walk.getObjectId(0), walk.getFileMode(0), + walk.getObjectId(1), walk.getFileMode(1)); + } + + protected void outputDiff(PrintStream out, String path, + ObjectId id1, FileMode mode1, ObjectId id2, FileMode mode2) throws IOException { + String name1 = "a/" + path; + String name2 = "b/" + path; + out.println("diff --git " + name1 + " " + name2); + boolean isNew=false; + boolean isDelete=false; + if (id1.equals(id1.zeroId())) { + out.println("new file mode " + mode2); + isNew=true; + } else if (id2.equals(id2.zeroId())) { + out.println("deleted file mode " + mode1); + isDelete=true; + } else if (!mode1.equals(mode2)) { + out.println("old mode " + mode1); + out.println("new mode " + mode2); + } + out.println("index " + id1.abbreviate(db, 7).name() + + ".." + id2.abbreviate(db, 7).name() + + (mode1.equals(mode2) ? " " + mode1 : "")); + out.println("--- " + (isNew ? "/dev/null" : name1)); + out.println("+++ " + (isDelete ? "/dev/null" : name2)); + RawText a = getRawText(id1); + RawText b = getRawText(id2); + MyersDiff diff = new MyersDiff(a, b); + fmt.formatEdits(out, a, b, diff.getEdits()); + } + + private RawText getRawText(ObjectId id) throws IOException { + if (id.equals(id.zeroId())) + return new RawText(new byte[] { }); + return new RawText(db.openBlob(id).getCachedBytes()); + } +} + diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java index 639ed77ee..a4edb3f2b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java @@ -114,7 +114,7 @@ public void format(final OutputStream out, final FileHeader head, formatEdits(out, a, b, head.toEditList()); } - private void formatEdits(final OutputStream out, final RawText a, + public void formatEdits(final OutputStream out, final RawText a, final RawText b, final EditList edits) throws IOException { for (int curIdx = 0; curIdx < edits.size();) { Edit curEdit = edits.get(curIdx); From 6549af4591e84dd872ce6a92f6ea638770bd9696 Mon Sep 17 00:00:00 2001 From: Christian Halstrick Date: Tue, 6 Oct 2009 11:16:31 +0200 Subject: [PATCH 06/10] Fix some warnings regarding unnecessary imports and accessing static methods My Galileo eclipse was complaining about unneeded import statements and how static methods have been accessed in Diff.java. There was also one method call which could be removed because he had no sideeffects and the return value was not used. I fixed this so that there are no warnings anymore. Signed-off-by: Christian Halstrick Change-Id: I48d7e01536aab1524140d72af574e3fd7149cd23 --- .../src/org/eclipse/jgit/pgm/Diff.java | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java index 3b327efd7..24bcdcc61 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java @@ -1,4 +1,5 @@ /* + * Copyright (C) 2009, Christian Halstrick * Copyright (C) 2009, Johannes E. Schindelin * Copyright (C) 2009, Johannes Schindelin * and other copyright owners as documented in the project's IP log. @@ -46,29 +47,20 @@ import java.io.IOException; import java.io.PrintStream; - import java.util.ArrayList; import java.util.List; import org.kohsuke.args4j.Argument; -import org.kohsuke.args4j.ExampleMode; import org.kohsuke.args4j.Option; import org.eclipse.jgit.diff.DiffFormatter; import org.eclipse.jgit.diff.MyersDiff; import org.eclipse.jgit.diff.RawText; - -import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.ObjectId; -import org.eclipse.jgit.lib.Ref; - -import org.eclipse.jgit.pgm.opt.CmdLineParser; import org.eclipse.jgit.pgm.opt.PathTreeFilterHandler; - import org.eclipse.jgit.treewalk.AbstractTreeIterator; import org.eclipse.jgit.treewalk.TreeWalk; - import org.eclipse.jgit.treewalk.filter.AndTreeFilter; import org.eclipse.jgit.treewalk.filter.TreeFilter; @@ -96,7 +88,6 @@ protected void run() throws Exception { walk.addTree(i); walk.setFilter(AndTreeFilter.create(TreeFilter.ANY_DIFF, pathFilter)); - final int nTree = walk.getTreeCount(); while (walk.next()) outputDiff(System.out, walk.getPathString(), walk.getObjectId(0), walk.getFileMode(0), @@ -110,10 +101,10 @@ protected void outputDiff(PrintStream out, String path, out.println("diff --git " + name1 + " " + name2); boolean isNew=false; boolean isDelete=false; - if (id1.equals(id1.zeroId())) { + if (id1.equals(ObjectId.zeroId())) { out.println("new file mode " + mode2); isNew=true; - } else if (id2.equals(id2.zeroId())) { + } else if (id2.equals(ObjectId.zeroId())) { out.println("deleted file mode " + mode1); isDelete=true; } else if (!mode1.equals(mode2)) { @@ -132,7 +123,7 @@ protected void outputDiff(PrintStream out, String path, } private RawText getRawText(ObjectId id) throws IOException { - if (id.equals(id.zeroId())) + if (id.equals(ObjectId.zeroId())) return new RawText(new byte[] { }); return new RawText(db.openBlob(id).getCachedBytes()); } From b3e4ac2622891ec50d1b56d61585263fadcf9160 Mon Sep 17 00:00:00 2001 From: Christian Halstrick Date: Tue, 6 Oct 2009 16:21:39 +0200 Subject: [PATCH 07/10] Fixed MyersDiff to be able to handle more than 100k MyersDiff was crashing with ArrayIndexOutOfBoundsException when diffing huge files. This was because the snake data (begin and end position while processing chunks of the text) was sequeezed into a single int. But with longer texts to diff these values exceeded the range of java int values. It is fixed by squeezing the two ints into a long and by adding the LongList helper class. Change-Id: Iacb9082e1b076e994d1486aa8e512342ad7432b3 Signed-off-by: Christian Halstrick --- .../src/org/eclipse/jgit/diff/MyersDiff.java | 35 ++-- .../src/org/eclipse/jgit/util/LongList.java | 152 ++++++++++++++++++ 2 files changed, 171 insertions(+), 16 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/util/LongList.java diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java index 795075d28..a1c485aa0 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java @@ -49,6 +49,7 @@ import java.util.List; import org.eclipse.jgit.util.IntList; +import org.eclipse.jgit.util.LongList; public class MyersDiff { protected EditList edits; @@ -202,7 +203,7 @@ Edit calculate(int beginA, int endA, int beginB, int endB) { abstract class EditPaths { private IntList x = new IntList(); - private IntList snake = new IntList(); + private LongList snake = new LongList(); int beginK, endK, middleK; int prevBeginK, prevEndK; /* if we hit one end early, no need to look further */ @@ -222,7 +223,7 @@ final int getX(int d, int k) { return x.get(getIndex(d, k)); } - final int getSnake(int d, int k) { + final long getSnake(int d, int k) { // TODO: remove if (k < beginK || k > endK) throw new RuntimeException("k " + k + " not in " + beginK + " - " + endK); @@ -253,22 +254,23 @@ void initialize(int k, int x, int minK, int maxK) { abstract int getRight(int x); abstract boolean isBetter(int left, int right); abstract void adjustMinMaxK(final int k, final int x); - abstract boolean meets(int d, int k, int x, int snake); + abstract boolean meets(int d, int k, int x, long snake); - final int newSnake(int k, int x) { - int y = k + x; - return x + (endA + 1) * y; + final long newSnake(int k, int x) { + long y = k + x; + long ret = ((long) x) << 32; + return ret | y; } - final int snake2x(int snake) { - return snake % (endA + 1); + final int snake2x(long snake) { + return (int) (snake >>> 32); } - final int snake2y(int snake) { - return snake / (endA + 1); + final int snake2y(long snake) { + return (int) snake; } - final boolean makeEdit(int snake1, int snake2) { + final boolean makeEdit(long snake1, long snake2) { int x1 = snake2x(snake1), x2 = snake2x(snake2); int y1 = snake2y(snake1), y2 = snake2y(snake2); /* @@ -301,7 +303,7 @@ boolean calculate(int d) { // go backwards so that we can avoid temp vars for (int k = endK; k >= beginK; k -= 2) { int left = -1, right = -1; - int leftSnake = -1, rightSnake = -1; + long leftSnake = -1L, rightSnake = -1L; // TODO: refactor into its own function if (k > prevBeginK) { int i = getIndex(d - 1, k - 1); @@ -325,7 +327,8 @@ boolean calculate(int d) { return true; right = getRight(end); } - int newX, newSnake; + int newX; + long newSnake; if (k >= prevEndK || (k > prevBeginK && isBetter(left, right))) { @@ -376,7 +379,7 @@ final void adjustMinMaxK(final int k, final int x) { } } - final boolean meets(int d, int k, int x, int snake) { + final boolean meets(int d, int k, int x, long snake) { if (k < backward.beginK || k > backward.endK) return false; // TODO: move out of loop @@ -418,7 +421,7 @@ final void adjustMinMaxK(final int k, final int x) { } } - final boolean meets(int d, int k, int x, int snake) { + final boolean meets(int d, int k, int x, long snake) { if (k < forward.beginK || k > forward.endK) return false; // TODO: move out of loop @@ -466,4 +469,4 @@ public static void main(String[] args) { e.printStackTrace(); } } -} +} \ No newline at end of file diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/LongList.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/LongList.java new file mode 100644 index 000000000..26608bb2a --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/LongList.java @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2009, Christian Halstrick + * Copyright (C) 2009, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util; + +/** A more efficient List using a primitive long array. */ +public class LongList { + private long[] entries; + + private int count; + + /** Create an empty list with a default capacity. */ + public LongList() { + this(10); + } + + /** + * Create an empty list with the specified capacity. + * + * @param capacity + * number of entries the list can initially hold. + */ + public LongList(final int capacity) { + entries = new long[capacity]; + } + + /** @return number of entries in this list */ + public int size() { + return count; + } + + /** + * @param i + * index to read, must be in the range [0, {@link #size()}). + * @return the number at the specified index + * @throws ArrayIndexOutOfBoundsException + * the index outside the valid range + */ + public long get(final int i) { + if (count <= i) + throw new ArrayIndexOutOfBoundsException(i); + return entries[i]; + } + + /** Empty this list */ + public void clear() { + count = 0; + } + + /** + * Add an entry to the end of the list. + * + * @param n + * the number to add. + */ + public void add(final long n) { + if (count == entries.length) + grow(); + entries[count++] = n; + } + + /** + * Assign an entry in the list. + * + * @param index + * index to set, must be in the range [0, {@link #size()}). + * @param n + * value to store at the position. + */ + public void set(final int index, final long n) { + if (count < index) + throw new ArrayIndexOutOfBoundsException(index); + else if (count == index) + add(n); + else + entries[index] = n; + } + + /** + * Pad the list with entries. + * + * @param toIndex + * index position to stop filling at. 0 inserts no filler. 1 + * ensures the list has a size of 1, adding val if + * the list is currently empty. + * @param val + * value to insert into padded positions. + */ + public void fillTo(int toIndex, final long val) { + while (count < toIndex) + add(val); + } + + private void grow() { + final long[] n = new long[(entries.length + 16) * 3 / 2]; + System.arraycopy(entries, 0, n, 0, count); + entries = n; + } + + public String toString() { + final StringBuilder r = new StringBuilder(); + r.append('['); + for (int i = 0; i < count; i++) { + if (i > 0) + r.append(", "); + r.append(entries[i]); + } + r.append(']'); + return r.toString(); + } +} From 2484ad6fe033fd811fc25245a1eb0c2fdda20d91 Mon Sep 17 00:00:00 2001 From: Christian Halstrick Date: Fri, 9 Oct 2009 16:18:59 +0200 Subject: [PATCH 08/10] Add javadoc comments, remove unused code, shift comments to correct place This change only fixes warnings of the eclipse build regarding missing javadocs. Some comments where just missing, so they have been added. Other comments where at the wrong (from eclipse point of view) place, so eclipse was complaining. Also two method which existed for debugging purposes have been removed to get rid of Eclipse warngins about unused code. --- .../org/eclipse/jgit/diff/DiffFormatter.java | 8 ++ .../src/org/eclipse/jgit/diff/MyersDiff.java | 113 ++++++++++++++---- .../src/org/eclipse/jgit/diff/RawText.java | 1 + 3 files changed, 97 insertions(+), 25 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java index a4edb3f2b..115d9baff 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java @@ -114,6 +114,14 @@ public void format(final OutputStream out, final FileHeader head, formatEdits(out, a, b, head.toEditList()); } + /** + * Formats a list of edits in unified diff format + * @param out where the unified diff is written to + * @param a the text A which was compared + * @param b the text B which was compared + * @param edits some differences which have been calculated between A and B + * @throws IOException + */ public void formatEdits(final OutputStream out, final RawText a, final RawText b, final EditList edits) throws IOException { for (int curIdx = 0; curIdx < edits.size();) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java index a1c485aa0..055729961 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java @@ -44,23 +44,92 @@ package org.eclipse.jgit.diff; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - import org.eclipse.jgit.util.IntList; import org.eclipse.jgit.util.LongList; +/** + * Diff algorithm, based on "An O(ND) Difference Algorithm and its + * Variations", by Eugene Myers. + * + * The basic idea is to put the line numbers of text A as columns ("x") and the + * lines of text B as rows ("y"). Now you try to find the shortest "edit path" + * from the upper left corner to the lower right corner, where you can + * always go horizontally or vertically, but diagonally from (x,y) to + * (x+1,y+1) only if line x in text A is identical to line y in text B. + * + * Myers' fundamental concept is the "furthest reaching D-path on diagonal k": + * a D-path is an edit path starting at the upper left corner and containing + * exactly D non-diagonal elements ("differences"). The furthest reaching + * D-path on diagonal k is the one that contains the most (diagonal) elements + * which ends on diagonal k (where k = y - x). + * + * Example: + * + * H E L L O W O R L D + * ____ + * L \___ + * O \___ + * W \________ + * + * Since every D-path has exactly D horizontal or vertical elements, it can + * only end on the diagonals -D, -D+2, ..., D-2, D. + * + * Since every furthest reaching D-path contains at least one furthest + * reaching (D-1)-path (except for D=0), we can construct them recursively. + * + * Since we are really interested in the shortest edit path, we can start + * looking for a 0-path, then a 1-path, and so on, until we find a path that + * ends in the lower right corner. + * + * To save space, we do not need to store all paths (which has quadratic space + * requirements), but generate the D-paths simultaneously from both sides. + * When the ends meet, we will have found "the middle" of the path. From the + * end points of that diagonal part, we can generate the rest recursively. + * + * This only requires linear space. + * + * The overall (runtime) complexity is + * + * O(N * D^2 + 2 * N/2 * (D/2)^2 + 4 * N/4 * (D/4)^2 + ...) + * = O(N * D^2 * 5 / 4) = O(N * D^2), + * + * (With each step, we have to find the middle parts of twice as many regions + * as before, but the regions (as well as the D) are halved.) + * + * So the overall runtime complexity stays the same with linear space, + * albeit with a larger constant factor. + */ public class MyersDiff { + /** + * The list of edits found during the last call to {@link #calculateEdits()} + */ protected EditList edits; - protected Sequence a, b; + /** + * The first text to be compared. Referred to as "Text A" in the comments + */ + protected Sequence a; + + /** + * The second text to be compared. Referred to as "Text B" in the comments + */ + protected Sequence b; + + /** + * The only constructor + * + * @param a the text A which should be compared + * @param b the text B which should be compared + */ public MyersDiff(Sequence a, Sequence b) { this.a = a; this.b = b; calculateEdits(); } + /** + * @return the list of edits found during the last call to {@link #calculateEdits()} + */ public EditList getEdits() { return edits; } @@ -68,6 +137,10 @@ public EditList getEdits() { // TODO: use ThreadLocal for future multi-threaded operations MiddleEdit middle = new MiddleEdit(); + /** + * Entrypoint into the algorithm this class is all about. This method triggers that the + * differences between A and B are calculated in form of a list of edits. + */ protected void calculateEdits() { edits = new EditList(); @@ -80,6 +153,13 @@ protected void calculateEdits() { middle.beginB, middle.endB); } + /** + * Calculates the differences between a given part of A against another given part of B + * @param beginA start of the part of A which should be compared (0<=beginA>>>>>"); - } - + /** + * @param args two filenames specifying the contents to be diffed + */ public static void main(String[] args) { if (args.length != 2) { System.err.println("Need 2 arguments"); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index 0bbbb1991..9a206af19 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -96,6 +96,7 @@ public RawText(final byte[] input) { * * @param file * the text file. + * @throws IOException if Exceptions occur while reading the file */ public RawText(File file) throws IOException { this(readFile(file)); From b0772d7a5c74d68a31697ebc971438faa7a132f5 Mon Sep 17 00:00:00 2001 From: Christian Halstrick Date: Mon, 19 Oct 2009 17:10:10 +0200 Subject: [PATCH 09/10] Add performance tests for MyersDiff Add some tests which make sure that the diff algorithm really behaves in the promised O(N*D) manner. This tests compute diffs between multiple big chunks of data, measure time for computing the diffs and fail if the measured times are off O(N*D) by more than a factor 10 Signed-off-by: Christian Halstrick Change-Id: I8e1e0be60299472828718371b231f1d8a9dc21a7 Signed-off-by: Robin Rosenberg --- .../jgit/diff/DiffTestDataGenerator.java | 90 ++++++++ .../jgit/diff/MyersDiffPerformanceTest.java | 196 ++++++++++++++++++ .../eclipse/jgit/util/CPUTimeStopWatch.java | 111 ++++++++++ 3 files changed, 397 insertions(+) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java new file mode 100644 index 000000000..c40311214 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2009, Christian Halstrick + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +public class DiffTestDataGenerator { + /** + * Generate sequence of characters in ascending order. The first character + * is a space. All subsequent characters have an ASCII code one greater then + * the ASCII code of the preceding character. On exception: the character + * following which follows '~' is again a ' '. + * + * @param len + * length of the String to be returned + * @return the sequence of characters as String + */ + public static String generateSequence(int len) { + return generateSequence(len, 0, 0); + } + + /** + * Generate sequence of characters similar to the one returned by + * {@link #generateSequence(int)}. But this time in each chunk of + * characters the last characters are left out. By + * calling this method twice with two different prime skipPeriod values and + * short skipLength values you create test data which is similar to what + * programmers do to their source code - huge files with only few + * insertions/deletions/changes. + * + * @param len + * length of the String to be returned + * @param skipPeriod + * @param skipLength + * @return the sequence of characters as String + */ + public static String generateSequence(int len, int skipPeriod, + int skipLength) { + StringBuilder text = new StringBuilder(len); + int skipStart = skipPeriod - skipLength; + int skippedChars = 0; + for (int i = 0; i - skippedChars < len; ++i) { + if (skipPeriod == 0 || i % skipPeriod < skipStart) { + text.append((char) (32 + i % 95)); + } else { + skippedChars++; + } + } + return text.toString(); + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java new file mode 100644 index 000000000..fe63e3d18 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java @@ -0,0 +1,196 @@ +/* + * Copyright (C) 2009, Christian Halstrick + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.text.DecimalFormat; +import java.text.NumberFormat; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; + +import junit.framework.TestCase; + +import org.eclipse.jgit.util.CPUTimeStopWatch; + +/** + * Test cases for the performance of the diff implementation. The tests test + * that the performance of the MyersDiff algorithm is really O(N*D). Means the + * time for computing the diff between a and b should depend on the product of + * a.length+b.length and the number of found differences. The tests compute + * diffs between chunks of different length, measure the needed time and check + * that time/(N*D) does not differ more than a certain factor (currently 10) + */ +public class MyersDiffPerformanceTest extends TestCase { + private static final long longTaskBoundary = 5000000000L; + + private static final int minCPUTimerTicks = 10; + + private static final int maxFactor = 15; + + private CPUTimeStopWatch stopwatch=CPUTimeStopWatch.createInstance(); + + public class PerfData { + private NumberFormat fmt = new DecimalFormat("#.##E0"); + + public long runningTime; + + public long D; + + public long N; + + private double p1 = -1; + + private double p2 = -1; + + public double perf1() { + if (p1 < 0) + p1 = runningTime / ((double) N * D); + return p1; + } + + public double perf2() { + if (p2 < 0) + p2 = runningTime / ((double) N * D * D); + return p2; + } + + public String toString() { + return ("diffing " + N / 2 + " bytes took " + runningTime + + " ns. N=" + N + ", D=" + D + ", time/(N*D):" + + fmt.format(perf1()) + ", time/(N*D^2):" + fmt + .format(perf2())); + } + } + + public static Comparator getComparator(final int whichPerf) { + return new Comparator() { + public int compare(PerfData o1, PerfData o2) { + double p1 = (whichPerf == 1) ? o1.perf1() : o1.perf2(); + double p2 = (whichPerf == 1) ? o2.perf1() : o2.perf2(); + return (p1 < p2) ? -1 : (p1 > p2) ? 1 : 0; + } + }; + } + + public void test() { + if (stopwatch!=null) { + List perfData = new LinkedList(); + perfData.add(test(10000)); + perfData.add(test(20000)); + perfData.add(test(50000)); + perfData.add(test(80000)); + perfData.add(test(99999)); + perfData.add(test(999999)); + + Comparator c = getComparator(1); + double factor = Collections.max(perfData, c).perf1() + / Collections.min(perfData, c).perf1(); + assertTrue( + "minimun and maximum of performance-index t/(N*D) differed too much. Measured factor of " + + factor + + " (maxFactor=" + + maxFactor + + "). Perfdata=<" + perfData.toString() + ">", + factor < maxFactor); + } + } + + /** + * Tests the performance of MyersDiff for texts which are similar (not + * random data). The CPU time is measured and returned. Because of bad + * accuracy of CPU time information the diffs are repeated. During each + * repetition the interim CPU time is checked. The diff operation is + * repeated until we have seen the CPU time clock changed its value at least + * {@link #minCPUTimerTicks} times. + * + * @param characters + * the size of the diffed character sequences. + * @return performance data + */ + private PerfData test(int characters) { + PerfData ret = new PerfData(); + String a = DiffTestDataGenerator.generateSequence(characters, 971, 3); + String b = DiffTestDataGenerator.generateSequence(characters, 1621, 5); + CharArray ac = new CharArray(a); + CharArray bc = new CharArray(b); + MyersDiff myersDiff = null; + int cpuTimeChanges = 0; + long lastReadout = 0; + long interimTime = 0; + int repetitions = 0; + stopwatch.start(); + while (cpuTimeChanges < minCPUTimerTicks && interimTime < longTaskBoundary) { + myersDiff = new MyersDiff(ac, bc); + repetitions++; + interimTime = stopwatch.readout(); + if (interimTime != lastReadout) { + cpuTimeChanges++; + lastReadout = interimTime; + } + } + ret.runningTime = stopwatch.stop() / repetitions; + ret.N = (ac.size() + bc.size()); + ret.D = myersDiff.getEdits().size(); + + return ret; + } + + private static class CharArray implements Sequence { + private final char[] array; + + public CharArray(String s) { + array = s.toCharArray(); + } + + public int size() { + return array.length; + } + + public boolean equals(int i, Sequence other, int j) { + CharArray o = (CharArray) other; + return array[i] == o.array[j]; + } + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java new file mode 100644 index 000000000..55e51f710 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2009, Christian Halstrick + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util; + +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadMXBean; + +/** + * A simple stopwatch which measures elapsed CPU time of the current thread. CPU + * time is the time spent on executing your own code plus the time spent on + * executing operating system calls triggered by your application. + *

+ * This stopwatch needs a VM which supports getting CPU Time information for the + * current thread. The static method createInstance() will take care to return + * only a new instance of this class if the VM is capable of returning CPU time. + */ +public class CPUTimeStopWatch { + private long start; + + private static ThreadMXBean mxBean=ManagementFactory.getThreadMXBean(); + + /** + * use this method instead of the constructor to be sure that the underlying + * VM provides all features needed by this class. + * + * @return a new instance of {@link #CPUTimeStopWatch()} or + * null if the VM does not support getting CPU time + * information + */ + public static CPUTimeStopWatch createInstance() { + return mxBean.isCurrentThreadCpuTimeSupported() ? new CPUTimeStopWatch() + : null; + } + + /** + * Starts the stopwatch. If the stopwatch is already started this will + * restart the stopwatch. + */ + public void start() { + start = mxBean.getCurrentThreadCpuTime(); + } + + /** + * Stops the stopwatch and return the elapsed CPU time in nanoseconds. + * Should be called only on started stopwatches. + * + * @return the elapsed CPU time in nanoseconds. When called on non-started + * stopwatches (either because {@link #start()} was never called or + * {@link #stop()} was called after the last call to + * {@link #start()}) this method will return 0. + */ + public long stop() { + long cpuTime = readout(); + start = 0; + return cpuTime; + } + + /** + * Return the elapsed CPU time in nanoseconds. In contrast to + * {@link #stop()} the stopwatch will continue to run after this call. + * + * @return the elapsed CPU time in nanoseconds. When called on non-started + * stopwatches (either because {@link #start()} was never called or + * {@link #stop()} was called after the last call to + * {@link #start()}) this method will return 0. + */ + public long readout() { + return (start == 0) ? 0 : mxBean.getCurrentThreadCpuTime() - start; + } +} From 6d930cd572463ede104126ac68a84bc283a1aa3d Mon Sep 17 00:00:00 2001 From: Christian Halstrick Date: Thu, 3 Dec 2009 16:36:14 +0100 Subject: [PATCH 10/10] Add file content merge algorithm Adds the file content merge alorithm and tests for merge to jgit. The merge algorithm: - Gets as input parameters the common base, the two new contents called "ours" and "theirs". - Computes the Edits from base to ours and from base to theirs with the help of MyersDiff. - Iterates over the edits. - Independent edits from ours or from theirs will just be applied to the result. - For conflicting edits we first harmonize the ranges of the edits so that in the end we have exactly two edits starting and ending at the same points in the common base. Then we write the two conclicting contents into the result stream. Change-Id: I411862393e7bf416b6f33ca55ec5af608ff4663 Signed-off-by: Christian Halstrick [sp: Fixed up two awkard comments in documentation.] Signed-off-by: Shawn O. Pearce --- .../jgit/merge/MergeAlgorithmTest.java | 187 ++++++++++++++ .../eclipse/jgit/merge/MergeAlgorithm.java | 230 ++++++++++++++++++ .../org/eclipse/jgit/merge/MergeChunk.java | 142 +++++++++++ .../eclipse/jgit/merge/MergeFormatter.java | 155 ++++++++++++ .../org/eclipse/jgit/merge/MergeResult.java | 161 ++++++++++++ 5 files changed, 875 insertions(+) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/merge/MergeAlgorithmTest.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeAlgorithm.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeChunk.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeFormatter.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeResult.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/merge/MergeAlgorithmTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/merge/MergeAlgorithmTest.java new file mode 100644 index 000000000..d12fae7c1 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/merge/MergeAlgorithmTest.java @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2009, Christian Halstrick + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.merge; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import junit.framework.TestCase; + +import org.eclipse.jgit.diff.RawText; +import org.eclipse.jgit.lib.Constants; + +public class MergeAlgorithmTest extends TestCase { + MergeFormatter fmt=new MergeFormatter(); + + // the texts which are used in this merge-tests are constructed by + // concatenating fixed chunks of text defined by the String constants + // A..Y. The common base text is always the text A+B+C+D+E+F+G+H+I+J. + // The two texts being merged are constructed by deleting some chunks + // or inserting new chunks. Some of the chunks are one-liners, others + // contain more than one line. + private static final String A = "aaa\n"; + private static final String B = "bbbbb\nbb\nbbb\n"; + private static final String C = "c\n"; + private static final String D = "dd\n"; + private static final String E = "ee\n"; + private static final String F = "fff\nff\n"; + private static final String G = "gg\n"; + private static final String H = "h\nhhh\nhh\n"; + private static final String I = "iiii\n"; + private static final String J = "jj\n"; + private static final String Z = "zzz\n"; + private static final String Y = "y\n"; + + // constants which define how conflict-regions are expected to be reported. + private static final String XXX_0 = "<<<<<<< O\n"; + private static final String XXX_1 = "=======\n"; + private static final String XXX_2 = ">>>>>>> T\n"; + + // the common base from which all merges texts derive from + String base=A+B+C+D+E+F+G+H+I+J; + + // the following constants define the merged texts. The name of the + // constants describe how they are created out of the common base. E.g. + // the constant named replace_XYZ_by_MNO stands for the text which is + // created from common base by replacing first chunk X by chunk M, then + // Y by N and then Z by O. + String replace_C_by_Z=A+B+Z+D+E+F+G+H+I+J; + String replace_A_by_Y=Y+B+C+D+E+F+G+H+I+J; + String replace_A_by_Z=Z+B+C+D+E+F+G+H+I+J; + String replace_J_by_Y=A+B+C+D+E+F+G+H+I+Y; + String replace_J_by_Z=A+B+C+D+E+F+G+H+I+Z; + String replace_BC_by_ZZ=A+Z+Z+D+E+F+G+H+I+J; + String replace_BCD_by_ZZZ=A+Z+Z+Z+E+F+G+H+I+J; + String replace_BD_by_ZZ=A+Z+C+Z+E+F+G+H+I+J; + String replace_BCDEGI_by_ZZZZZZ=A+Z+Z+Z+Z+F+Z+H+Z+J; + String replace_CEFGHJ_by_YYYYYY=A+B+Y+D+Y+Y+Y+Y+I+Y; + String replace_BDE_by_ZZY=A+Z+C+Z+Y+F+G+H+I+J; + + /** + * Check for a conflict where the second text was changed similar to the + * first one, but the second texts modification covers one more line. + * + * @throws IOException + */ + public void testTwoConflictingModifications() throws IOException { + assertEquals(A + XXX_0 + B + Z + XXX_1 + Z + Z + XXX_2 + D + E + F + G + + H + I + J, + merge(base, replace_C_by_Z, replace_BC_by_ZZ)); + } + + /** + * Test a case where we have three consecutive chunks. The first text + * modifies all three chunks. The second text modifies the first and the + * last chunk. This should be reported as one conflicting region. + * + * @throws IOException + */ + public void testOneAgainstTwoConflictingModifications() throws IOException { + assertEquals(A + XXX_0 + Z + Z + Z + XXX_1 + Z + C + Z + XXX_2 + E + F + + G + H + I + J, + merge(base, replace_BCD_by_ZZZ, replace_BD_by_ZZ)); + } + + /** + * Test a merge where only the second text contains modifications. Expect as + * merge result the second text. + * + * @throws IOException + */ + public void testNoAgainstOneModification() throws IOException { + assertEquals(replace_BD_by_ZZ.toString(), + merge(base, base, replace_BD_by_ZZ)); + } + + /** + * Both texts contain modifications but not on the same chunks. Expect a + * non-conflict merge result. + * + * @throws IOException + */ + public void testTwoNonConflictingModifications() throws IOException { + assertEquals(Y + B + Z + D + E + F + G + H + I + J, + merge(base, replace_C_by_Z, replace_A_by_Y)); + } + + /** + * Merge two complicated modifications. The merge algorithm has to extend + * and combine conflicting regions to get to the expected merge result. + * + * @throws IOException + */ + public void testTwoComplicatedModifications() throws IOException { + assertEquals(A + XXX_0 + Z + Z + Z + Z + F + Z + H + XXX_1 + B + Y + D + + Y + Y + Y + Y + XXX_2 + Z + Y, + merge(base, + replace_BCDEGI_by_ZZZZZZ, + replace_CEFGHJ_by_YYYYYY)); + } + + /** + * Test a conflicting region at the very start of the text. + * + * @throws IOException + */ + public void testConflictAtStart() throws IOException { + assertEquals(XXX_0 + Z + XXX_1 + Y + XXX_2 + B + C + D + E + F + G + H + + I + J, merge(base, replace_A_by_Z, replace_A_by_Y)); + } + + /** + * Test a conflicting region at the very end of the text. + * + * @throws IOException + */ + public void testConflictAtEnd() throws IOException { + assertEquals(A+B+C+D+E+F+G+H+I+XXX_0+Z+XXX_1+Y+XXX_2, merge(base, replace_J_by_Z, replace_J_by_Y)); + } + + private String merge(String commonBase, String ours, String theirs) throws IOException { + MergeResult r=MergeAlgorithm.merge(new RawText(Constants.encode(commonBase)), new RawText(Constants.encode(ours)), new RawText(Constants.encode(theirs))); + ByteArrayOutputStream bo=new ByteArrayOutputStream(50); + fmt.formatMerge(bo, r, "B", "O", "T", Constants.CHARACTER_ENCODING); + return new String(bo.toByteArray(), Constants.CHARACTER_ENCODING); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeAlgorithm.java b/org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeAlgorithm.java new file mode 100644 index 000000000..deae82e76 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeAlgorithm.java @@ -0,0 +1,230 @@ +/* + * Copyright (C) 2009, Christian Halstrick + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.merge; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.eclipse.jgit.diff.Edit; +import org.eclipse.jgit.diff.EditList; +import org.eclipse.jgit.diff.MyersDiff; +import org.eclipse.jgit.diff.Sequence; +import org.eclipse.jgit.merge.MergeChunk.ConflictState; + +/** + * Provides the merge algorithm which does a three-way merge on content provided + * as RawText. Makes use of {@link MyersDiff} to compute the diffs. + */ +public final class MergeAlgorithm { + + /** + * Since this class provides only static methods I add a private default + * constructor to prevent instantiation. + */ + private MergeAlgorithm() { + } + + // An special edit which acts as a sentinel value by marking the end the + // list of edits + private final static Edit END_EDIT = new Edit(Integer.MAX_VALUE, + Integer.MAX_VALUE); + + /** + * Does the three way merge between a common base and two sequences. + * + * @param base the common base sequence + * @param ours the first sequence to be merged + * @param theirs the second sequence to be merged + * @return the resulting content + */ + public static MergeResult merge(Sequence base, Sequence ours, + Sequence theirs) { + List sequences = new ArrayList(3); + sequences.add(base); + sequences.add(ours); + sequences.add(theirs); + MergeResult result = new MergeResult(sequences); + EditList oursEdits = new MyersDiff(base, ours).getEdits(); + Iterator baseToOurs = oursEdits.iterator(); + EditList theirsEdits = new MyersDiff(base, theirs).getEdits(); + Iterator baseToTheirs = theirsEdits.iterator(); + int current = 0; // points to the next line (first line is 0) of base + // which was not handled yet + Edit oursEdit = nextEdit(baseToOurs); + Edit theirsEdit = nextEdit(baseToTheirs); + + // iterate over all edits from base to ours and from base to theirs + // leave the loop when there are no edits more for ours or for theirs + // (or both) + while (theirsEdit != END_EDIT || oursEdit != END_EDIT) { + if (oursEdit.getEndA() <= theirsEdit.getBeginA()) { + // something was changed in ours not overlapping with any change + // from theirs. First add the common part in front of the edit + // then the edit. + if (current != oursEdit.getBeginA()) { + result.add(0, current, oursEdit.getBeginA(), + ConflictState.NO_CONFLICT); + } + result.add(1, oursEdit.getBeginB(), oursEdit.getEndB(), + ConflictState.NO_CONFLICT); + current = oursEdit.getEndA(); + oursEdit = nextEdit(baseToOurs); + } else if (theirsEdit.getEndA() <= oursEdit.getBeginA()) { + // something was changed in theirs not overlapping with any + // from ours. First add the common part in front of the edit + // then the edit. + if (current != theirsEdit.getBeginA()) { + result.add(0, current, theirsEdit.getBeginA(), + ConflictState.NO_CONFLICT); + } + result.add(2, theirsEdit.getBeginB(), theirsEdit.getEndB(), + ConflictState.NO_CONFLICT); + current = theirsEdit.getEndA(); + theirsEdit = nextEdit(baseToTheirs); + } else { + // here we found a real overlapping modification + + // if there is a common part in front of the conflict add it + if (oursEdit.getBeginA() != current + && theirsEdit.getBeginA() != current) { + result.add(0, current, Math.min(oursEdit.getBeginA(), + theirsEdit.getBeginA()), ConflictState.NO_CONFLICT); + } + + // set some initial values for the ranges in A and B which we + // want to handle + int oursBeginB = oursEdit.getBeginB(); + int theirsBeginB = theirsEdit.getBeginB(); + // harmonize the start of the ranges in A and B + if (oursEdit.getBeginA() < theirsEdit.getBeginA()) { + theirsBeginB -= theirsEdit.getBeginA() + - oursEdit.getBeginA(); + } else { + oursBeginB -= oursEdit.getBeginA() - theirsEdit.getBeginA(); + } + + // combine edits: + // Maybe an Edit on one side corresponds to multiple Edits on + // the other side. Then we have to combine the Edits of the + // other side - so in the end we can merge together two single + // edits. + // + // It is important to notice that this combining will extend the + // ranges of our conflict always downwards (towards the end of + // the content). The starts of the conflicting ranges in ours + // and theirs are not touched here. + // + // This combining is an iterative process: after we have + // combined some edits we have to do the check again. The + // combined edits could now correspond to multiple edits on the + // other side. + // + // Example: when this combining algorithm works on the following + // edits + // oursEdits=((0-5,0-5),(6-8,6-8),(10-11,10-11)) and + // theirsEdits=((0-1,0-1),(2-3,2-3),(5-7,5-7)) + // it will merge them into + // oursEdits=((0-8,0-8),(10-11,10-11)) and + // theirsEdits=((0-7,0-7)) + // + // Since the only interesting thing to us is how in ours and + // theirs the end of the conflicting range is changing we let + // oursEdit and theirsEdit point to the last conflicting edit + Edit nextOursEdit = nextEdit(baseToOurs); + Edit nextTheirsEdit = nextEdit(baseToTheirs); + for (;;) { + if (oursEdit.getEndA() > nextTheirsEdit.getBeginA()) { + theirsEdit = nextTheirsEdit; + nextTheirsEdit = nextEdit(baseToTheirs); + } else if (theirsEdit.getEndA() > nextOursEdit.getBeginA()) { + oursEdit = nextOursEdit; + nextOursEdit = nextEdit(baseToOurs); + } else { + break; + } + } + + // harmonize the end of the ranges in A and B + int oursEndB = oursEdit.getEndB(); + int theirsEndB = theirsEdit.getEndB(); + if (oursEdit.getEndA() < theirsEdit.getEndA()) { + oursEndB += theirsEdit.getEndA() - oursEdit.getEndA(); + } else { + theirsEndB += oursEdit.getEndA() - theirsEdit.getEndA(); + } + + // Add the conflict + result.add(1, oursBeginB, oursEndB, + ConflictState.FIRST_CONFLICTING_RANGE); + result.add(2, theirsBeginB, theirsEndB, + ConflictState.NEXT_CONFLICTING_RANGE); + + current = Math.max(oursEdit.getEndA(), theirsEdit.getEndA()); + oursEdit = nextOursEdit; + theirsEdit = nextTheirsEdit; + } + } + // maybe we have a common part behind the last edit: copy it to the + // result + if (current < base.size()) { + result.add(0, current, base.size(), ConflictState.NO_CONFLICT); + } + return result; + } + + /** + * Helper method which returns the next Edit for an Iterator over Edits. + * When there are no more edits left this method will return the constant + * END_EDIT. + * + * @param it + * the iterator for which the next edit should be returned + * @return the next edit from the iterator or END_EDIT if there no more + * edits + */ + private static Edit nextEdit(Iterator it) { + return (it.hasNext() ? it.next() : END_EDIT); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeChunk.java b/org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeChunk.java new file mode 100644 index 000000000..2b2cf3384 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeChunk.java @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2009, Christian Halstrick + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.merge; + +/** + * One chunk from a merge result. Each chunk contains a range from a + * single sequence. In case of conflicts multiple chunks are reported for one + * conflict. The conflictState tells when conflicts start and end. + */ +public class MergeChunk { + /** + * A state telling whether a MergeChunk belongs to a conflict or not. The + * first chunk of a conflict is reported with a special state to be able to + * distinguish the border between two consecutive conflicts + */ + public enum ConflictState { + /** + * This chunk does not belong to a conflict + */ + NO_CONFLICT, + + /** + * This chunk does belong to a conflict and is the first one of the + * conflicting chunks + */ + FIRST_CONFLICTING_RANGE, + + /** + * This chunk does belong to a conflict but is not the first one of the + * conflicting chunks. It's a subsequent one. + */ + NEXT_CONFLICTING_RANGE + }; + + private final int sequenceIndex; + + private final int begin; + + private final int end; + + private final ConflictState conflictState; + + /** + * Creates a new empty MergeChunk + * + * @param sequenceIndex + * determines to which sequence this chunks belongs to. Same as + * in {@link MergeResult#add(int, int, int, ConflictState)} + * @param begin + * the first element from the specified sequence which should be + * included in the merge result. Indexes start with 0. + * @param end + * specifies the end of the range to be added. The element this + * index points to is the first element which not added to the + * merge result. All elements between begin (including begin) and + * this element are added. + * @param conflictState + * the state of this chunk. See {@link ConflictState} + */ + protected MergeChunk(int sequenceIndex, int begin, int end, + ConflictState conflictState) { + this.sequenceIndex = sequenceIndex; + this.begin = begin; + this.end = end; + this.conflictState = conflictState; + } + + /** + * @return the index of the sequence to which sequence this chunks belongs + * to. Same as in + * {@link MergeResult#add(int, int, int, ConflictState)} + */ + public int getSequenceIndex() { + return sequenceIndex; + } + + /** + * @return the first element from the specified sequence which should be + * included in the merge result. Indexes start with 0. + */ + public int getBegin() { + return begin; + } + + /** + * @return the end of the range of this chunk. The element this index + * points to is the first element which not added to the merge + * result. All elements between begin (including begin) and this + * element are added. + */ + public int getEnd() { + return end; + } + + /** + * @return the state of this chunk. See {@link ConflictState} + */ + public ConflictState getConflictState() { + return conflictState; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeFormatter.java new file mode 100644 index 000000000..2ff3e9b65 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeFormatter.java @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2009, Christian Halstrick + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.merge; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.jgit.diff.RawText; +import org.eclipse.jgit.merge.MergeChunk.ConflictState; + +/** + * A class to convert merge results into a Git conformant textual presentation + */ +public class MergeFormatter { + /** + * Formats the results of a merge of {@link RawText} objects in a Git + * conformant way. This method also assumes that the {@link RawText} objects + * being merged are line oriented files which use LF as delimiter. This + * method will also use LF to separate chunks and conflict metadata, + * therefore it fits only to texts that are LF-separated lines. + * + * @param out + * the outputstream where to write the textual presentation + * @param res + * the merge result which should be presented + * @param seqName + * When a conflict is reported each conflicting range will get a + * name. This name is following the "<<<<<<< " or ">>>>>>> " + * conflict markers. The names for the sequences are given in + * this list + * @param charsetName + * the name of the characterSet used when writing conflict + * metadata + * @throws IOException + */ + public void formatMerge(OutputStream out, MergeResult res, + List seqName, String charsetName) throws IOException { + String lastConflictingName = null; // is set to non-null whenever we are + // in a conflict + boolean threeWayMerge = (res.getSequences().size() == 3); + for (MergeChunk chunk : res) { + RawText seq = (RawText) res.getSequences().get( + chunk.getSequenceIndex()); + if (lastConflictingName != null + && chunk.getConflictState() != ConflictState.NEXT_CONFLICTING_RANGE) { + // found the end of an conflict + out.write((">>>>>>> " + lastConflictingName + "\n").getBytes(charsetName)); + lastConflictingName = null; + } + if (chunk.getConflictState() == ConflictState.FIRST_CONFLICTING_RANGE) { + // found the start of an conflict + out.write(("<<<<<<< " + seqName.get(chunk.getSequenceIndex()) + + "\n").getBytes(charsetName)); + lastConflictingName = seqName.get(chunk.getSequenceIndex()); + } else if (chunk.getConflictState() == ConflictState.NEXT_CONFLICTING_RANGE) { + // found another conflicting chunk + + /* + * In case of a non-three-way merge I'll add the name of the + * conflicting chunk behind the equal signs. I also append the + * name of the last conflicting chunk after the ending + * greater-than signs. If somebody knows a better notation to + * present non-three-way merges - feel free to correct here. + */ + lastConflictingName = seqName.get(chunk.getSequenceIndex()); + out.write((threeWayMerge ? "=======\n" : "======= " + + lastConflictingName + "\n").getBytes(charsetName)); + } + // the lines with conflict-metadata are written. Now write the chunk + for (int i = chunk.getBegin(); i < chunk.getEnd(); i++) { + seq.writeLine(out, i); + out.write('\n'); + } + } + // one possible leftover: if the merge result ended with a conflict we + // have to close the last conflict here + if (lastConflictingName != null) { + out.write((">>>>>>> " + lastConflictingName + "\n").getBytes(charsetName)); + } + } + + /** + * Formats the results of a merge of exactly two {@link RawText} objects in + * a Git conformant way. This convenience method accepts the names for the + * three sequences (base and the two merged sequences) as explicit + * parameters and doesn't require the caller to specify a List + * + * @param out + * the {@link OutputStream} where to write the textual + * presentation + * @param res + * the merge result which should be presented + * @param baseName + * the name ranges from the base should get + * @param oursName + * the name ranges from ours should get + * @param theirsName + * the name ranges from theirs should get + * @param charsetName + * the name of the characterSet used when writing conflict + * metadata + * @throws IOException + */ + public void formatMerge(OutputStream out, MergeResult res, String baseName, + String oursName, String theirsName, String charsetName) throws IOException { + List names = new ArrayList(3); + names.add(baseName); + names.add(oursName); + names.add(theirsName); + formatMerge(out, res, names, charsetName); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeResult.java b/org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeResult.java new file mode 100644 index 000000000..0da487bc6 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/merge/MergeResult.java @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2009, Christian Halstrick + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.merge; + +import java.util.Iterator; +import java.util.List; + +import org.eclipse.jgit.diff.Sequence; +import org.eclipse.jgit.merge.MergeChunk.ConflictState; +import org.eclipse.jgit.util.IntList; + +/** + * The result of merging a number of {@link Sequence} objects. These sequences + * have one common predecessor sequence. The result of a merge is a list of + * MergeChunks. Each MergeChunk contains either a range (a subsequence) from + * one of the merged sequences, a range from the common predecessor or a + * conflicting range from one of the merged sequences. A conflict will be + * reported as multiple chunks, one for each conflicting range. The first chunk + * for a conflict is marked specially to distinguish the border between two + * consecutive conflicts. + *

+ * This class does not know anything about how to present the merge result to + * the end-user. MergeFormatters have to be used to construct something human + * readable. + */ +public class MergeResult implements Iterable { + private final List sequences; + + private final IntList chunks = new IntList(); + + private boolean containsConflicts = false; + + /** + * Creates a new empty MergeResult + * + * @param sequences + * contains the common predecessor sequence at position 0 + * followed by the merged sequences. This list should not be + * modified anymore during the lifetime of this {@link MergeResult}. + */ + public MergeResult(List sequences) { + this.sequences = sequences; + } + + /** + * Adds a new range from one of the merged sequences or from the common + * predecessor. This method can add conflicting and non-conflicting ranges + * controlled by the conflictState parameter + * + * @param srcIdx + * determines from which sequence this range comes. An index of + * x specifies the x+1 element in the list of sequences + * specified to the constructor + * @param begin + * the first element from the specified sequence which should be + * included in the merge result. Indexes start with 0. + * @param end + * specifies the end of the range to be added. The element this + * index points to is the first element which not added to the + * merge result. All elements between begin (including begin) and + * this element are added. + * @param conflictState + * when set to NO_CONLICT a non-conflicting range is added. + * This will end implicitly all open conflicts added before. + */ + public void add(int srcIdx, int begin, int end, ConflictState conflictState) { + chunks.add(conflictState.ordinal()); + chunks.add(srcIdx); + chunks.add(begin); + chunks.add(end); + if (conflictState != ConflictState.NO_CONFLICT) + containsConflicts = true; + } + + /** + * Returns the common predecessor sequence and the merged sequence in one + * list. The common predecessor is is the first element in the list + * + * @return the common predecessor at position 0 followed by the merged + * sequences. + */ + public List getSequences() { + return sequences; + } + + private static final ConflictState[] states = ConflictState.values(); + + /** + * @return an iterator over the MergeChunks. The iterator does not support + * the remove operation + */ + public Iterator iterator() { + return new Iterator() { + int idx; + + public boolean hasNext() { + return (idx < chunks.size()); + } + + public MergeChunk next() { + ConflictState state = states[chunks.get(idx++)]; + int srcIdx = chunks.get(idx++); + int begin = chunks.get(idx++); + int end = chunks.get(idx++); + return new MergeChunk(srcIdx, begin, end, state); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * @return true if this merge result contains conflicts + */ + public boolean containsConflicts() { + return containsConflicts; + } +}