diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java index 7e42e5358..1da5828b3 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java @@ -48,10 +48,11 @@ import java.io.IOException; import junit.framework.TestCase; +import org.eclipse.jgit.diff.SimilarityIndex.TableFullException; import org.eclipse.jgit.lib.Constants; public class SimilarityIndexTest extends TestCase { - public void testIndexingSmallObject() { + public void testIndexingSmallObject() throws TableFullException { SimilarityIndex si = hash("" // + "A\n" // + "B\n" // @@ -70,7 +71,8 @@ public class SimilarityIndexTest extends TestCase { assertEquals(2, si.count(si.findIndex(key_D))); } - public void testIndexingLargeObject() throws IOException { + public void testIndexingLargeObject() throws IOException, + TableFullException { byte[] in = ("" // + "A\n" // + "B\n" // @@ -81,7 +83,7 @@ public class SimilarityIndexTest extends TestCase { assertEquals(2, si.size()); } - public void testCommonScore_SameFiles() { + public void testCommonScore_SameFiles() throws TableFullException { String text = "" // + "A\n" // + "B\n" // @@ -96,21 +98,22 @@ public class SimilarityIndexTest extends TestCase { assertEquals(100, dst.score(src, 100)); } - public void testCommonScore_EmptyFiles() { + public void testCommonScore_EmptyFiles() throws TableFullException { SimilarityIndex src = hash(""); SimilarityIndex dst = hash(""); assertEquals(0, src.common(dst)); assertEquals(0, dst.common(src)); } - public void testCommonScore_TotallyDifferentFiles() { + public void testCommonScore_TotallyDifferentFiles() + throws TableFullException { SimilarityIndex src = hash("A\n"); SimilarityIndex dst = hash("D\n"); assertEquals(0, src.common(dst)); assertEquals(0, dst.common(src)); } - public void testCommonScore_SimiliarBy75() { + public void testCommonScore_SimiliarBy75() throws TableFullException { SimilarityIndex src = hash("A\nB\nC\nD\n"); SimilarityIndex dst = hash("A\nB\nC\nQ\n"); assertEquals(6, src.common(dst)); @@ -120,10 +123,11 @@ public class SimilarityIndexTest extends TestCase { assertEquals(75, dst.score(src, 100)); } - private static SimilarityIndex hash(String text) { + private static SimilarityIndex hash(String text) throws TableFullException { SimilarityIndex src = new SimilarityIndex() { @Override - void hash(byte[] raw, int ptr, final int end) { + void hash(byte[] raw, int ptr, final int end) + throws TableFullException { while (ptr < end) { int hash = raw[ptr] & 0xff; int start = ptr; @@ -143,7 +147,7 @@ public class SimilarityIndexTest extends TestCase { return src; } - private static int keyFor(String line) { + private static int keyFor(String line) throws TableFullException { SimilarityIndex si = hash(line); assertEquals("single line scored", 1, si.size()); return si.key(0); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java index 66218f640..9d9a96d8d 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java @@ -57,6 +57,7 @@ import java.util.List; import org.eclipse.jgit.JGitText; import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.diff.SimilarityIndex.TableFullException; import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.NullProgressMonitor; @@ -445,14 +446,23 @@ public class RenameDetector { private int calculateModifyScore(ContentSource.Pair reader, DiffEntry d) throws IOException { - SimilarityIndex src = new SimilarityIndex(); - src.hash(reader.open(OLD, d)); - src.sort(); - - SimilarityIndex dst = new SimilarityIndex(); - dst.hash(reader.open(NEW, d)); - dst.sort(); - return src.score(dst, 100); + try { + SimilarityIndex src = new SimilarityIndex(); + src.hash(reader.open(OLD, d)); + src.sort(); + + SimilarityIndex dst = new SimilarityIndex(); + dst.hash(reader.open(NEW, d)); + dst.sort(); + return src.score(dst, 100); + } catch (TableFullException tableFull) { + // If either table overflowed while being constructed, don't allow + // the pair to be broken. Returning 1 higher than breakScore will + // ensure its not similar, but not quite dissimilar enough to break. + // + overRenameLimit = true; + return breakScore + 1; + } } private void findContentRenames(ContentSource.Pair reader, @@ -468,6 +478,7 @@ public class RenameDetector { d = new SimilarityRenameDetector(reader, deleted, added); d.setRenameScore(getRenameScore()); d.compute(pm); + overRenameLimit |= d.isTableOverflow(); deleted = d.getLeftOverSources(); added = d.getLeftOverDestinations(); entries.addAll(d.getMatches()); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java index 853132589..045300613 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java @@ -65,8 +65,8 @@ import org.eclipse.jgit.lib.ObjectStream; * file are discovered. */ class SimilarityIndex { - /** The {@link #idHash} table stops growing at {@code 1 << MAX_HASH_BITS}. */ - private static final int MAX_HASH_BITS = 17; + /** A special {@link TableFullException} used in place of OutOfMemoryError. */ + private static final TableFullException TABLE_FULL_OUT_OF_MEMORY = new TableFullException(); /** * Shift to apply before storing a key. @@ -82,14 +82,17 @@ class SimilarityIndex { /** Number of non-zero entries in {@link #idHash}. */ private int idSize; + /** {@link #idSize} that triggers {@link #idHash} to double in size. */ + private int idGrowAt; + /** * Pairings of content keys and counters. *
* Slots in the table are actually two ints wedged into a single long. The
- * upper {@link #MAX_HASH_BITS} bits stores the content key, and the
- * remaining lower bits stores the number of bytes associated with that key.
- * Empty slots are denoted by 0, which cannot occur because the count cannot
- * be 0. Values can only be positive, which we enforce during key addition.
+ * upper 32 bits stores the content key, and the remaining lower bits stores
+ * the number of bytes associated with that key. Empty slots are denoted by
+ * 0, which cannot occur because the count cannot be 0. Values can only be
+ * positive, which we enforce during key addition.
*/
private long[] idHash;
@@ -99,6 +102,7 @@ class SimilarityIndex {
SimilarityIndex() {
idHashBits = 8;
idHash = new long[1 << idHashBits];
+ idGrowAt = growAt(idHashBits);
}
long getFileSize() {
@@ -109,7 +113,8 @@ class SimilarityIndex {
fileSize = size;
}
- void hash(ObjectLoader obj) throws MissingObjectException, IOException {
+ void hash(ObjectLoader obj) throws MissingObjectException, IOException,
+ TableFullException {
if (obj.isLarge()) {
ObjectStream in = obj.openStream();
try {
@@ -125,7 +130,7 @@ class SimilarityIndex {
}
}
- void hash(byte[] raw, int ptr, final int end) {
+ void hash(byte[] raw, int ptr, final int end) throws TableFullException {
while (ptr < end) {
int hash = 5381;
int start = ptr;
@@ -141,7 +146,8 @@ class SimilarityIndex {
}
}
- void hash(InputStream in, long remaining) throws IOException {
+ void hash(InputStream in, long remaining) throws IOException,
+ TableFullException {
byte[] buf = new byte[4096];
int ptr = 0;
int cnt = 0;
@@ -268,7 +274,7 @@ class SimilarityIndex {
return (idHash.length - idSize) + idx;
}
- void add(int key, int cnt) {
+ void add(int key, int cnt) throws TableFullException {
key = (key * 0x9e370001) >>> 1; // Mix bits and ensure not negative.
int j = slot(key);
@@ -276,7 +282,7 @@ class SimilarityIndex {
long v = idHash[j];
if (v == 0) {
// Empty slot in the table, store here.
- if (shouldGrow()) {
+ if (idGrowAt <= idSize) {
grow();
j = slot(key);
continue;
@@ -304,16 +310,26 @@ class SimilarityIndex {
return key >>> (31 - idHashBits);
}
- private boolean shouldGrow() {
- return idHashBits < MAX_HASH_BITS && idHash.length <= idSize * 2;
+ private static int growAt(int idHashBits) {
+ return (1 << idHashBits) * (idHashBits - 3) / idHashBits;
}
- private void grow() {
+ private void grow() throws TableFullException {
+ if (idHashBits == 30)
+ throw new TableFullException();
+
long[] oldHash = idHash;
int oldSize = idHash.length;
idHashBits++;
- idHash = new long[1 << idHashBits];
+ idGrowAt = growAt(idHashBits);
+
+ try {
+ idHash = new long[1 << idHashBits];
+ } catch (OutOfMemoryError noMemory) {
+ throw TABLE_FULL_OUT_OF_MEMORY;
+ }
+
for (int i = 0; i < oldSize; i++) {
long v = oldHash[i];
if (v != 0) {
@@ -333,4 +349,8 @@ class SimilarityIndex {
private static int countOf(long v) {
return (int) v;
}
+
+ static class TableFullException extends Exception {
+ private static final long serialVersionUID = 1L;
+ }
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
index 3075c223a..89e71e666 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
@@ -53,6 +53,7 @@ import java.util.List;
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
+import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ProgressMonitor;
@@ -110,6 +111,9 @@ class SimilarityRenameDetector {
/** Score a pair must exceed to be considered a rename. */
private int renameScore = 60;
+ /** Set if any {@link SimilarityIndex.TableFullException} occurs. */
+ private boolean tableOverflow;
+
private List