Browse Source

Added file size based rename detection optimization

Prior to this change, files that were very different in size (enough
so that they could not have enough in common to be detected as
renames) were still having their scores calculated. I added an
optimization to skip such files. For example, if the rename detection
threshold is 60%, the larger file is 200kb, and the smaller file is
50kb, the pair cannot be counted as a rename since they cannot
possibly share 60% of their content in common. (200*.6=120, 120>50)

Change-Id: Icd8315412d5de6292839778e7cea7fe6f061b0fc
stable-0.9
Jeff Schumacher 15 years ago
parent
commit
64b9458640
  1. 32
      org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java

32
org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java

@ -205,6 +205,14 @@ class SimilarityRenameDetector {
// //
matrix = new long[srcs.size() * dsts.size()]; matrix = new long[srcs.size() * dsts.size()];
long[] srcSizes = new long[srcs.size()];
long[] dstSizes = new long[dsts.size()];
// Init the size arrays to some value that indicates that we haven't
// calculated the size yet. Since sizes cannot be negative, -1 will work
Arrays.fill(srcSizes, -1);
Arrays.fill(dstSizes, -1);
// Consider each pair of files, if the score is above the minimum // Consider each pair of files, if the score is above the minimum
// threshold we need record that scoring in the matrix so we can // threshold we need record that scoring in the matrix so we can
// later find the best matches. // later find the best matches.
@ -231,6 +239,26 @@ class SimilarityRenameDetector {
continue; continue;
} }
long srcSize = srcSizes[srcIdx];
if (srcSize < 0) {
srcSize = size(srcEnt.oldId.toObjectId());
srcSizes[srcIdx] = srcSize;
}
long dstSize = dstSizes[dstIdx];
if (dstSize < 0) {
dstSize = size(dstEnt.newId.toObjectId());
dstSizes[dstIdx] = dstSize;
}
long max = Math.max(srcSize, dstSize);
long min = Math.min(srcSize, dstSize);
if (min * 100 / max < renameScore) {
// Cannot possibly match, as the file sizes are so different
pm.update(1);
continue;
}
SimilarityIndex d = hash(dstEnt.newId.toObjectId()); SimilarityIndex d = hash(dstEnt.newId.toObjectId());
int score = s.score(d); int score = s.score(d);
@ -259,6 +287,10 @@ class SimilarityRenameDetector {
return r; return r;
} }
private long size(ObjectId objectId) throws IOException {
return repo.openObject(objectId).getSize();
}
private static int score(long value) { private static int score(long value) {
return (int) (value >>> SCORE_SHIFT); return (int) (value >>> SCORE_SHIFT);
} }

Loading…
Cancel
Save