From 4a985f5aa858a71561d55c9eb3ce007768c4d0c0 Mon Sep 17 00:00:00 2001 From: KB Sriram Date: Wed, 22 Mar 2017 11:42:38 -0700 Subject: [PATCH] Make diff locations more consistent DiffAlgorithms can return different edit locations for inserts or deletes, if they can be "shifted" up or down repeating blocks of lines. This causes the 3-way merge to apply both edits, resulting in incorrectly removing or duplicating lines. Augment an existing "tidy-up" stage in DiffAlgorithm to move all shiftable edits (not just the last INSERT edit) to a consistent location, and add test cases for previously incorrect merges. Bug: 514095 Change-Id: I5fe150a2fc04e1cdb012d22609d86df16dfb0b7e Signed-off-by: KB Sriram --- .../jgit/diff/AbstractDiffTestCase.java | 26 ++++ .../jgit/merge/MergeAlgorithmTest.java | 39 ++++++ .../org/eclipse/jgit/diff/DiffAlgorithm.java | 119 +++++++++++++++--- .../src/org/eclipse/jgit/diff/Edit.java | 14 +++ 4 files changed, 181 insertions(+), 17 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/AbstractDiffTestCase.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/AbstractDiffTestCase.java index b8f8dcb5a..4130b7ee5 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/AbstractDiffTestCase.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/AbstractDiffTestCase.java @@ -195,6 +195,32 @@ public abstract class AbstractDiffTestCase { assertEquals(2, r.size()); } + @Test + public void testEdit_DeleteNearCommonTail() { + EditList r = diff(t("aCq}nD}nb"), t("aq}nb")); + assertEquals(new Edit(1, 2, 1, 1), r.get(0)); + assertEquals(new Edit(5, 8, 4, 4), r.get(1)); + assertEquals(2, r.size()); + } + + @Test + public void testEdit_DeleteNearCommonCenter() { + EditList r = diff(t("abcd123123uvwxpq"), t("aBcd123uvwxPq")); + assertEquals(new Edit(1, 2, 1, 2), r.get(0)); + assertEquals(new Edit(7, 10, 7, 7), r.get(1)); + assertEquals(new Edit(14, 15, 11, 12), r.get(2)); + assertEquals(3, r.size()); + } + + @Test + public void testEdit_InsertNearCommonCenter() { + EditList r = diff(t("aBcd123uvwxPq"), t("abcd123123uvwxpq")); + assertEquals(new Edit(1, 2, 1, 2), r.get(0)); + assertEquals(new Edit(7, 7, 7, 10), r.get(1)); + assertEquals(new Edit(11, 12, 14, 15), r.get(2)); + assertEquals(3, r.size()); + } + @Test public void testEdit_LinuxBug() { EditList r = diff(t("a{bcdE}z"), t("a{0bcdEE}z")); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/merge/MergeAlgorithmTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/merge/MergeAlgorithmTest.java index d4a3d62da..5af62b670 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/merge/MergeAlgorithmTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/merge/MergeAlgorithmTest.java @@ -131,6 +131,45 @@ public class MergeAlgorithmTest { merge("abcdefghij", "aZZZZfZhZj", "abYdYYYYiY")); } + /** + * Merge two modifications with a shared delete at the end. The underlying + * diff algorithm has to provide consistent edit results to get the expected + * merge result. + * + * @throws IOException + */ + @Test + public void testTwoModificationsWithSharedDelete() throws IOException { + assertEquals(t("Cb}n}"), + merge("ab}n}n}", "ab}n}", "Cb}n}")); + } + + /** + * Merge modifications with a shared insert in the middle. The + * underlying diff algorithm has to provide consistent edit + * results to get the expected merge result. + * + * @throws IOException + */ + @Test + public void testModificationsWithMiddleInsert() throws IOException { + assertEquals(t("aBcd123123uvwxPq"), + merge("abcd123uvwxpq", "aBcd123123uvwxPq", "abcd123123uvwxpq")); + } + + /** + * Merge modifications with a shared delete in the middle. The + * underlying diff algorithm has to provide consistent edit + * results to get the expected merge result. + * + * @throws IOException + */ + @Test + public void testModificationsWithMiddleDelete() throws IOException { + assertEquals(t("Abz}z123Q"), + merge("abz}z}z123q", "Abz}z123Q", "abz}z123q")); + } + /** * Test a conflicting region at the very start of the text. * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffAlgorithm.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffAlgorithm.java index bd6e5c859..5f01366c4 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffAlgorithm.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffAlgorithm.java @@ -121,23 +121,7 @@ public abstract class DiffAlgorithm { Subsequence as = Subsequence.a(a, region); Subsequence bs = Subsequence.b(b, region); EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs); - - // The last insertion may need to be shifted later if it - // inserts elements that were previously reduced out as - // common at the end. - // - Edit last = e.get(e.size() - 1); - if (last.getType() == Edit.Type.INSERT) { - while (last.endB < b.size() - && cmp.equals(b, last.beginB, b, last.endB)) { - last.beginA++; - last.endA++; - last.beginB++; - last.endB++; - } - } - - return e; + return normalize(cmp, e, a, b); } case EMPTY: @@ -152,6 +136,107 @@ public abstract class DiffAlgorithm { return new Edit(0, a.size(), 0, b.size()); } + /** + * Reorganize an {@link EditList} for better diff consistency. + *

+ * {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or + * {@link Edit.Type#DELETE} edits that can be "shifted". For + * example, the deleted section + *

+	 * -a
+	 * -b
+	 * -c
+	 *  a
+	 *  b
+	 *  c
+	 * 
+ * can be shifted down by 1, 2 or 3 locations. + *

+ * To avoid later merge issues, we shift such edits to a + * consistent location. {@code normalize} uses a simple strategy of + * shifting such edits to their latest possible location. + *

+ * This strategy may not always produce an aesthetically pleasing + * diff. For instance, it works well with + *

+	 *  function1 {
+	 *   ...
+	 *  }
+	 *
+	 * +function2 {
+	 * + ...
+	 * +}
+	 * +
+	 * function3 {
+	 * ...
+	 * }
+	 * 
+ * but less so for + *
+	 *  #
+	 *  # comment1
+	 *  #
+	 *  function1() {
+	 *  }
+	 *
+	 *  #
+	 * +# comment3
+	 * +#
+	 * +function3() {
+	 * +}
+	 * +
+	 * +#
+	 *  # comment2
+	 *  #
+	 *  function2() {
+	 *  }
+	 * 
+ * More + * sophisticated strategies are possible, say by calculating a + * suitable "aesthetic cost" for each possible position and using + * the lowest cost, but {@code normalize} just shifts edits + * to the end as much as possible. + * + * @param + * type of sequence being compared. + * @param cmp + * the comparator supplying the element equivalence function. + * @param e + * a modifiable edit list comparing the provided sequences. + * @param a + * the first (also known as old or pre-image) sequence. + * @param b + * the second (also known as new or post-image) sequence. + * @return a modifiable edit list with edit regions shifted to their + * latest possible location. The result list is never null. + * @since 4.7 + */ + private static EditList normalize( + SequenceComparator cmp, EditList e, S a, S b) { + Edit prev = null; + for (int i = e.size() - 1; i >= 0; i--) { + Edit cur = e.get(i); + Edit.Type curType = cur.getType(); + + int maxA = (prev == null) ? a.size() : prev.beginA; + int maxB = (prev == null) ? b.size() : prev.beginB; + + if (curType == Edit.Type.INSERT) { + while (cur.endA < maxA && cur.endB < maxB + && cmp.equals(b, cur.beginB, b, cur.endB)) { + cur.shift(1); + } + } else if (curType == Edit.Type.DELETE) { + while (cur.endA < maxA && cur.endB < maxB + && cmp.equals(a, cur.beginA, a, cur.endA)) { + cur.shift(1); + } + } + prev = cur; + } + return e; + } + /** * Compare two sequences and identify a list of edits between them. * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/Edit.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/Edit.java index 7eecd1351..3ac46ea12 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/Edit.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/Edit.java @@ -169,6 +169,20 @@ public class Edit { return endB - beginB; } + /** + * Move the edit region by the specified amount. + * @param amount + * the region is shifted by this amount, and can be + * positive or negative. + * @since 4.7 + */ + public final void shift(int amount) { + beginA += amount; + endA += amount; + beginB += amount; + endB += amount; + } + /** * Construct a new edit representing the region before cut. *