Browse Source

Perform common start/end elimination by default for DiffAlgorithm

As it turns out, every single diff algorithm we might try to
implement can benfit from using the SequenceComparator's native
concept of the simple reduceCommonStartEnd() step.  For most inputs,
there can be a significant number of elements that can be removed
from the space the DiffAlgorithm needs to consider, which will
reduce the overall running time for the final solution.

Pool this logic inside of DiffAlgorithm itself as a default, but
permit a specific algorithm to override it when necessary.

Convert MyersDiff to use this reduction to reduce the space it
needs to search, making it perform slightly better on common inputs.

Change-Id: I14004d771117e4a4ab2a02cace8deaeda9814bc1
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
stable-0.10
Shawn O. Pearce 14 years ago
parent
commit
857d68d173
  1. 61
      org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffAlgorithm.java
  2. 24
      org.eclipse.jgit/src/org/eclipse/jgit/diff/EditList.java
  3. 52
      org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java
  4. 85
      org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiff.java

61
org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffAlgorithm.java

@ -52,14 +52,12 @@ package org.eclipse.jgit.diff;
* algorithms may support parameterization, in which case the caller can create * algorithms may support parameterization, in which case the caller can create
* a unique instance per thread. * a unique instance per thread.
*/ */
public interface DiffAlgorithm { public abstract class DiffAlgorithm {
/** /**
* Compare two sequences and identify a list of edits between them. * Compare two sequences and identify a list of edits between them.
* *
* @param <S> * @param <S>
* type of sequence being compared. * type of sequence being compared.
* @param <C>
* type of comparator to evaluate the sequence elements.
* @param cmp * @param cmp
* the comparator supplying the element equivalence function. * the comparator supplying the element equivalence function.
* @param a * @param a
@ -74,6 +72,57 @@ public interface DiffAlgorithm {
* sequences are identical according to {@code cmp}'s rules. The * sequences are identical according to {@code cmp}'s rules. The
* result list is never null. * result list is never null.
*/ */
public <S extends Sequence, C extends SequenceComparator<? super S>> EditList diff( public <S extends Sequence> EditList diff(
C cmp, S a, S b); SequenceComparator<? super S> cmp, S a, S b) {
Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b));
switch (region.getType()) {
case INSERT:
case DELETE:
return EditList.singleton(region);
case REPLACE: {
SubsequenceComparator<S> cs = new SubsequenceComparator<S>(cmp);
Subsequence<S> as = Subsequence.a(a, region);
Subsequence<S> bs = Subsequence.b(b, region);
return Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
}
case EMPTY:
return new EditList(0);
default:
throw new IllegalStateException();
}
}
private static <S extends Sequence> Edit coverEdit(S a, S b) {
return new Edit(0, a.size(), 0, b.size());
}
/**
* Compare two sequences and identify a list of edits between them.
*
* This method should be invoked only after the two sequences have been
* proven to have no common starting or ending elements. The expected
* elimination of common starting and ending elements is automatically
* performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
* method, which invokes this method using {@link Subsequence}s.
*
* @param <S>
* type of sequence being compared.
* @param cmp
* the comparator supplying the element equivalence function.
* @param a
* the first (also known as old or pre-image) sequence. Edits
* returned by this algorithm will reference indexes using the
* 'A' side: {@link Edit#getBeginA()}, {@link Edit#getEndA()}.
* @param b
* the second (also known as new or post-image) sequence. Edits
* returned by this algorithm will reference indexes using the
* 'B' side: {@link Edit#getBeginB()}, {@link Edit#getEndB()}.
* @return a modifiable edit list comparing the two sequences.
*/
public abstract <S extends Sequence> EditList diffNonCommon(
SequenceComparator<? super S> cmp, S a, S b);
} }

24
org.eclipse.jgit/src/org/eclipse/jgit/diff/EditList.java

@ -48,6 +48,19 @@ import java.util.ArrayList;
/** Specialized list of {@link Edit}s in a document. */ /** Specialized list of {@link Edit}s in a document. */
public class EditList extends AbstractList<Edit> { public class EditList extends AbstractList<Edit> {
/**
* Construct an edit list containing a single edit.
*
* @param edit
* the edit to return in the list.
* @return list containing only {@code edit}.
*/
public static EditList singleton(Edit edit) {
EditList res = new EditList(1);
res.add(edit);
return res;
}
private final ArrayList<Edit> container; private final ArrayList<Edit> container;
/** Create a new, empty edit list. */ /** Create a new, empty edit list. */
@ -55,6 +68,17 @@ public class EditList extends AbstractList<Edit> {
container = new ArrayList<Edit>(); container = new ArrayList<Edit>();
} }
/**
* Create an empty edit list with the specified capacity.
*
* @param capacity
* the initial capacity of the edit list. If additional edits are
* added to the list, it will be grown to support them.
*/
public EditList(int capacity) {
container = new ArrayList<Edit>(capacity);
}
@Override @Override
public int size() { public int size() {
return container.size(); return container.size();

52
org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java

@ -108,28 +108,9 @@ import org.eclipse.jgit.util.LongList;
public class MyersDiff<S extends Sequence> { public class MyersDiff<S extends Sequence> {
/** Singleton instance of MyersDiff. */ /** Singleton instance of MyersDiff. */
public static final DiffAlgorithm INSTANCE = new DiffAlgorithm() { public static final DiffAlgorithm INSTANCE = new DiffAlgorithm() {
public <S extends Sequence, C extends SequenceComparator<? super S>> EditList diff( public <S extends Sequence> EditList diffNonCommon(
C cmp, S a, S b) { SequenceComparator<? super S> cmp, S a, S b) {
Edit region = new Edit(0, a.size(), 0, b.size()); return new MyersDiff<S>(cmp, a, b).edits;
region = cmp.reduceCommonStartEnd(a, b, region);
switch (region.getType()) {
case INSERT:
case DELETE: {
EditList r = new EditList();
r.add(region);
return r;
}
case REPLACE:
return new MyersDiff<S>(cmp, a, b, region).getEdits();
case EMPTY:
return new EditList();
default:
throw new IllegalStateException();
}
} }
}; };
@ -139,38 +120,27 @@ public class MyersDiff<S extends Sequence> {
protected EditList edits; protected EditList edits;
/** Comparison function for sequences. */ /** Comparison function for sequences. */
protected HashedSequenceComparator<Subsequence<S>> cmp; protected HashedSequenceComparator<S> cmp;
/** /**
* The first text to be compared. Referred to as "Text A" in the comments * The first text to be compared. Referred to as "Text A" in the comments
*/ */
protected HashedSequence<Subsequence<S>> a; protected HashedSequence<S> a;
/** /**
* The second text to be compared. Referred to as "Text B" in the comments * The second text to be compared. Referred to as "Text B" in the comments
*/ */
protected HashedSequence<Subsequence<S>> b; protected HashedSequence<S> b;
private MyersDiff(SequenceComparator<? super S> cmp, S a, S b, Edit region) { private MyersDiff(SequenceComparator<? super S> cmp, S a, S b) {
Subsequence<S> as = Subsequence.a(a, region); HashedSequencePair<S> pair;
Subsequence<S> bs = Subsequence.b(b, region);
HashedSequencePair<Subsequence<S>> pair = new HashedSequencePair<Subsequence<S>>(
new SubsequenceComparator<S>(cmp), as, bs);
pair = new HashedSequencePair<S>(cmp, a, b);
this.cmp = pair.getComparator(); this.cmp = pair.getComparator();
this.a = pair.getA(); this.a = pair.getA();
this.b = pair.getB(); this.b = pair.getB();
calculateEdits(); calculateEdits();
Subsequence.toBase(edits, as, bs);
}
/**
* @return the list of edits found during the last call to {@link #calculateEdits()}
*/
public EditList getEdits() {
return edits;
} }
// TODO: use ThreadLocal for future multi-threaded operations // TODO: use ThreadLocal for future multi-threaded operations
@ -565,8 +535,8 @@ if (k < beginK || k > endK)
try { try {
RawText a = new RawText(new java.io.File(args[0])); RawText a = new RawText(new java.io.File(args[0]));
RawText b = new RawText(new java.io.File(args[1])); RawText b = new RawText(new java.io.File(args[1]));
EditList res = INSTANCE.diff(RawTextComparator.DEFAULT, a, b); EditList r = INSTANCE.diff(RawTextComparator.DEFAULT, a, b);
System.out.println(res.toString()); System.out.println(r.toString());
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }

85
org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiff.java

@ -99,7 +99,7 @@ package org.eclipse.jgit.diff;
* by the prior step 2 or 5.</li> * by the prior step 2 or 5.</li>
* </ol> * </ol>
*/ */
public class PatienceDiff implements DiffAlgorithm { public class PatienceDiff extends DiffAlgorithm {
/** Algorithm we use when there are no common unique lines in a region. */ /** Algorithm we use when there are no common unique lines in a region. */
private DiffAlgorithm fallback; private DiffAlgorithm fallback;
@ -114,38 +114,10 @@ public class PatienceDiff implements DiffAlgorithm {
fallback = alg; fallback = alg;
} }
public <S extends Sequence, C extends SequenceComparator<? super S>> EditList diff( public <S extends Sequence> EditList diffNonCommon(
C cmp, S a, S b) { SequenceComparator<? super S> cmp, S a, S b) {
Edit region = new Edit(0, a.size(), 0, b.size());
region = cmp.reduceCommonStartEnd(a, b, region);
switch (region.getType()) {
case INSERT:
case DELETE: {
EditList r = new EditList();
r.add(region);
return r;
}
case REPLACE: {
SubsequenceComparator<S> cs = new SubsequenceComparator<S>(cmp);
Subsequence<S> as = Subsequence.a(a, region);
Subsequence<S> bs = Subsequence.b(b, region);
return Subsequence.toBase(diffImpl(cs, as, bs), as, bs);
}
case EMPTY:
return new EditList();
default:
throw new IllegalStateException();
}
}
private <S extends Sequence, C extends SequenceComparator<? super S>> EditList diffImpl(
C cmp, S a, S b) {
State<S> s = new State<S>(new HashedSequencePair<S>(cmp, a, b)); State<S> s = new State<S>(new HashedSequencePair<S>(cmp, a, b));
s.diff(new Edit(0, s.a.size(), 0, s.b.size()), null, 0, 0); s.diffReplace(new Edit(0, s.a.size(), 0, s.b.size()), null, 0, 0);
return s.edits; return s.edits;
} }
@ -166,25 +138,12 @@ public class PatienceDiff implements DiffAlgorithm {
this.edits = new EditList(); this.edits = new EditList();
} }
private void diff(Edit r, long[] pCommon, int pIdx, int pEnd) { void diffReplace(Edit r, long[] pCommon, int pIdx, int pEnd) {
switch (r.getType()) {
case INSERT:
case DELETE:
edits.add(r);
return;
case REPLACE:
break;
case EMPTY:
default:
throw new IllegalStateException();
}
PatienceDiffIndex<S> p; PatienceDiffIndex<S> p;
Edit lcs;
p = new PatienceDiffIndex<S>(cmp, a, b, r, pCommon, pIdx, pEnd); p = new PatienceDiffIndex<S>(cmp, a, b, r, pCommon, pIdx, pEnd);
Edit lcs = p.findLongestCommonSequence(); lcs = p.findLongestCommonSequence();
if (lcs != null) { if (lcs != null) {
pCommon = p.nCommon; pCommon = p.nCommon;
@ -196,20 +155,40 @@ public class PatienceDiff implements DiffAlgorithm {
diff(r.after(lcs), pCommon, pIdx + 1, pEnd); diff(r.after(lcs), pCommon, pIdx + 1, pEnd);
} else if (fallback != null) { } else if (fallback != null) {
p = null;
pCommon = null; pCommon = null;
p = null;
SubsequenceComparator<HashedSequence<S>> cs; SubsequenceComparator<HashedSequence<S>> cs = subcmp();
cs = new SubsequenceComparator<HashedSequence<S>>(cmp);
Subsequence<HashedSequence<S>> as = Subsequence.a(a, r); Subsequence<HashedSequence<S>> as = Subsequence.a(a, r);
Subsequence<HashedSequence<S>> bs = Subsequence.b(b, r); Subsequence<HashedSequence<S>> bs = Subsequence.b(b, r);
EditList res = fallback.diff(cs, as, bs);
EditList res = fallback.diffNonCommon(cs, as, bs);
edits.addAll(Subsequence.toBase(res, as, bs)); edits.addAll(Subsequence.toBase(res, as, bs));
} else { } else {
edits.add(r); edits.add(r);
} }
} }
private void diff(Edit r, long[] pCommon, int pIdx, int pEnd) {
switch (r.getType()) {
case INSERT:
case DELETE:
edits.add(r);
break;
case REPLACE:
diffReplace(r, pCommon, pIdx, pEnd);
break;
case EMPTY:
default:
throw new IllegalStateException();
}
}
private SubsequenceComparator<HashedSequence<S>> subcmp() {
return new SubsequenceComparator<HashedSequence<S>>(cmp);
}
} }
} }

Loading…
Cancel
Save