Browse Source

Fixed MyersDiff to be able to handle more than 100k

MyersDiff was crashing with ArrayIndexOutOfBoundsException when
diffing huge files. This was because the snake data (begin and
end position while processing chunks of the text) was sequeezed
into a single int. But with longer texts to diff these values
exceeded the range of java int values. It is fixed by squeezing
the two ints into a long and by adding the LongList helper
class.

Change-Id: Iacb9082e1b076e994d1486aa8e512342ad7432b3
Signed-off-by: Christian Halstrick <christian.halstrick@sap.com>
stable-0.7
Christian Halstrick 15 years ago
parent
commit
b3e4ac2622
  1. 33
      org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java
  2. 152
      org.eclipse.jgit/src/org/eclipse/jgit/util/LongList.java

33
org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java

@ -49,6 +49,7 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import org.eclipse.jgit.util.IntList; import org.eclipse.jgit.util.IntList;
import org.eclipse.jgit.util.LongList;
public class MyersDiff { public class MyersDiff {
protected EditList edits; protected EditList edits;
@ -202,7 +203,7 @@ public class MyersDiff {
abstract class EditPaths { abstract class EditPaths {
private IntList x = new IntList(); private IntList x = new IntList();
private IntList snake = new IntList(); private LongList snake = new LongList();
int beginK, endK, middleK; int beginK, endK, middleK;
int prevBeginK, prevEndK; int prevBeginK, prevEndK;
/* if we hit one end early, no need to look further */ /* if we hit one end early, no need to look further */
@ -222,7 +223,7 @@ if (k < beginK || k > endK)
return x.get(getIndex(d, k)); return x.get(getIndex(d, k));
} }
final int getSnake(int d, int k) { final long getSnake(int d, int k) {
// TODO: remove // TODO: remove
if (k < beginK || k > endK) if (k < beginK || k > endK)
throw new RuntimeException("k " + k + " not in " + beginK + " - " + endK); throw new RuntimeException("k " + k + " not in " + beginK + " - " + endK);
@ -253,22 +254,23 @@ if (k < beginK || k > endK)
abstract int getRight(int x); abstract int getRight(int x);
abstract boolean isBetter(int left, int right); abstract boolean isBetter(int left, int right);
abstract void adjustMinMaxK(final int k, final int x); abstract void adjustMinMaxK(final int k, final int x);
abstract boolean meets(int d, int k, int x, int snake); abstract boolean meets(int d, int k, int x, long snake);
final int newSnake(int k, int x) { final long newSnake(int k, int x) {
int y = k + x; long y = k + x;
return x + (endA + 1) * y; long ret = ((long) x) << 32;
return ret | y;
} }
final int snake2x(int snake) { final int snake2x(long snake) {
return snake % (endA + 1); return (int) (snake >>> 32);
} }
final int snake2y(int snake) { final int snake2y(long snake) {
return snake / (endA + 1); return (int) snake;
} }
final boolean makeEdit(int snake1, int snake2) { final boolean makeEdit(long snake1, long snake2) {
int x1 = snake2x(snake1), x2 = snake2x(snake2); int x1 = snake2x(snake1), x2 = snake2x(snake2);
int y1 = snake2y(snake1), y2 = snake2y(snake2); int y1 = snake2y(snake1), y2 = snake2y(snake2);
/* /*
@ -301,7 +303,7 @@ if (k < beginK || k > endK)
// go backwards so that we can avoid temp vars // go backwards so that we can avoid temp vars
for (int k = endK; k >= beginK; k -= 2) { for (int k = endK; k >= beginK; k -= 2) {
int left = -1, right = -1; int left = -1, right = -1;
int leftSnake = -1, rightSnake = -1; long leftSnake = -1L, rightSnake = -1L;
// TODO: refactor into its own function // TODO: refactor into its own function
if (k > prevBeginK) { if (k > prevBeginK) {
int i = getIndex(d - 1, k - 1); int i = getIndex(d - 1, k - 1);
@ -325,7 +327,8 @@ if (k < beginK || k > endK)
return true; return true;
right = getRight(end); right = getRight(end);
} }
int newX, newSnake; int newX;
long newSnake;
if (k >= prevEndK || if (k >= prevEndK ||
(k > prevBeginK && (k > prevBeginK &&
isBetter(left, right))) { isBetter(left, right))) {
@ -376,7 +379,7 @@ if (k < beginK || k > endK)
} }
} }
final boolean meets(int d, int k, int x, int snake) { final boolean meets(int d, int k, int x, long snake) {
if (k < backward.beginK || k > backward.endK) if (k < backward.beginK || k > backward.endK)
return false; return false;
// TODO: move out of loop // TODO: move out of loop
@ -418,7 +421,7 @@ if (k < beginK || k > endK)
} }
} }
final boolean meets(int d, int k, int x, int snake) { final boolean meets(int d, int k, int x, long snake) {
if (k < forward.beginK || k > forward.endK) if (k < forward.beginK || k > forward.endK)
return false; return false;
// TODO: move out of loop // TODO: move out of loop

152
org.eclipse.jgit/src/org/eclipse/jgit/util/LongList.java

@ -0,0 +1,152 @@
/*
* Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com>
* Copyright (C) 2009, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.util;
/** A more efficient List<Long> using a primitive long array. */
public class LongList {
private long[] entries;
private int count;
/** Create an empty list with a default capacity. */
public LongList() {
this(10);
}
/**
* Create an empty list with the specified capacity.
*
* @param capacity
* number of entries the list can initially hold.
*/
public LongList(final int capacity) {
entries = new long[capacity];
}
/** @return number of entries in this list */
public int size() {
return count;
}
/**
* @param i
* index to read, must be in the range [0, {@link #size()}).
* @return the number at the specified index
* @throws ArrayIndexOutOfBoundsException
* the index outside the valid range
*/
public long get(final int i) {
if (count <= i)
throw new ArrayIndexOutOfBoundsException(i);
return entries[i];
}
/** Empty this list */
public void clear() {
count = 0;
}
/**
* Add an entry to the end of the list.
*
* @param n
* the number to add.
*/
public void add(final long n) {
if (count == entries.length)
grow();
entries[count++] = n;
}
/**
* Assign an entry in the list.
*
* @param index
* index to set, must be in the range [0, {@link #size()}).
* @param n
* value to store at the position.
*/
public void set(final int index, final long n) {
if (count < index)
throw new ArrayIndexOutOfBoundsException(index);
else if (count == index)
add(n);
else
entries[index] = n;
}
/**
* Pad the list with entries.
*
* @param toIndex
* index position to stop filling at. 0 inserts no filler. 1
* ensures the list has a size of 1, adding <code>val</code> if
* the list is currently empty.
* @param val
* value to insert into padded positions.
*/
public void fillTo(int toIndex, final long val) {
while (count < toIndex)
add(val);
}
private void grow() {
final long[] n = new long[(entries.length + 16) * 3 / 2];
System.arraycopy(entries, 0, n, 0, count);
entries = n;
}
public String toString() {
final StringBuilder r = new StringBuilder();
r.append('[');
for (int i = 0; i < count; i++) {
if (i > 0)
r.append(", ");
r.append(entries[i]);
}
r.append(']');
return r.toString();
}
}
Loading…
Cancel
Save