SimilarityRenameDetector: Only attempt to index large files once

If a file fails to index the first time the loop encounters it, the file is likely to fail to index again on the next row. Rather than wasting a huge amount of CPU to index it again and fail, remember which destination files failed to index and skip over them on each subsequent row. Because this condition is very unlikely, avoid allocating the BitSet until its actually needed. This keeps the memory usage unaffected for the common case. Change-Id: I93509b28b61a9bba8f681a7b4df4c6127bca2a09 Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
14 years ago · 918e6e20f0
1 changed files with 10 additions and 0 deletions
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
@ -49,6 +49,7 @@ import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.BitSet;
 import java.util.List;

 import org.eclipse.jgit.JGitText;
@ -216,6 +217,7 @@ class SimilarityRenameDetector {

 		long[] srcSizes = new long[srcs.size()];
 		long[] dstSizes = new long[dsts.size()];
+		BitSet dstTooLarge = null;

 		// Init the size arrays to some value that indicates that we haven't
 		// calculated the size yet. Since sizes cannot be negative, -1 will work
@ -255,6 +257,11 @@ class SimilarityRenameDetector {
 					continue;
 				}

+				if (dstTooLarge != null && dstTooLarge.get(dstIdx)) {
+					pm.update(1);
+					continue;
+				}
+
 				long srcSize = srcSizes[srcIdx];
 				if (srcSize < 0) {
 					srcSize = size(OLD, srcEnt);
@ -279,6 +286,9 @@ class SimilarityRenameDetector {
 				try {
 					d = hash(NEW, dstEnt);
 				} catch (TableFullException tableFull) {
+					if (dstTooLarge == null)
+						dstTooLarge = new BitSet(dsts.size());
+					dstTooLarge.set(dstIdx);
 					tableOverflow = true;
 					pm.update(1);
 					continue;