Browse Source

Rename detection should canonicalize line endings

Native Git canonicalizes line endings when detecting
renames, more specifically it replaces CRLF by LF.
See: hash_chars in diffcore-delta.c

Bug: 449545
Change-Id: Iec2aab12ae9e67074cccb7fbd4d9defe176a0130
Signed-off-by: Marc Strapetz <marc.strapetz@syntevo.com>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
stable-3.6
Marc Strapetz 10 years ago committed by Matthias Sohn
parent
commit
1cb5668441
  1. 62
      org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java
  2. 62
      org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java

62
org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java

@ -83,7 +83,7 @@ public class SimilarityIndexTest {
+ "B\n" // + "B\n" //
+ "B\n").getBytes("UTF-8"); + "B\n").getBytes("UTF-8");
SimilarityIndex si = new SimilarityIndex(); SimilarityIndex si = new SimilarityIndex();
si.hash(new ByteArrayInputStream(in), in.length); si.hash(new ByteArrayInputStream(in), in.length, false);
assertEquals(2, si.size()); assertEquals(2, si.size());
} }
@ -103,6 +103,48 @@ public class SimilarityIndexTest {
assertEquals(100, dst.score(src, 100)); assertEquals(100, dst.score(src, 100));
} }
@Test
public void testCommonScore_SameFiles_CR_canonicalization()
throws TableFullException {
String text = "" //
+ "A\r\n" //
+ "B\r\n" //
+ "D\r\n" //
+ "B\r\n";
SimilarityIndex src = hash(text);
SimilarityIndex dst = hash(text.replace("\r", ""));
assertEquals(8, src.common(dst));
assertEquals(8, dst.common(src));
assertEquals(100, src.score(dst, 100));
assertEquals(100, dst.score(src, 100));
}
@Test
public void testCommonScoreLargeObject_SameFiles_CR_canonicalization()
throws TableFullException, IOException {
String text = "" //
+ "A\r\n" //
+ "B\r\n" //
+ "D\r\n" //
+ "B\r\n";
SimilarityIndex src = new SimilarityIndex();
byte[] bytes1 = text.getBytes("UTF-8");
src.hash(new ByteArrayInputStream(bytes1), bytes1.length, true);
src.sort();
SimilarityIndex dst = new SimilarityIndex();
byte[] bytes2 = text.replace("\r", "").getBytes("UTF-8");
dst.hash(new ByteArrayInputStream(bytes2), bytes2.length, true);
dst.sort();
assertEquals(8, src.common(dst));
assertEquals(8, dst.common(src));
assertEquals(100, src.score(dst, 100));
assertEquals(100, dst.score(src, 100));
}
@Test @Test
public void testCommonScore_EmptyFiles() throws TableFullException { public void testCommonScore_EmptyFiles() throws TableFullException {
SimilarityIndex src = hash(""); SimilarityIndex src = hash("");
@ -132,24 +174,8 @@ public class SimilarityIndexTest {
} }
private static SimilarityIndex hash(String text) throws TableFullException { private static SimilarityIndex hash(String text) throws TableFullException {
SimilarityIndex src = new SimilarityIndex() { SimilarityIndex src = new SimilarityIndex();
@Override
void hash(byte[] raw, int ptr, final int end)
throws TableFullException {
while (ptr < end) {
int hash = raw[ptr] & 0xff;
int start = ptr;
do {
int c = raw[ptr++] & 0xff;
if (c == '\n')
break;
} while (ptr < end && ptr - start < 64);
add(hash, ptr - start);
}
}
};
byte[] raw = Constants.encode(text); byte[] raw = Constants.encode(text);
src.setFileSize(raw.length);
src.hash(raw, 0, raw.length); src.hash(raw, 0, raw.length);
src.sort(); src.sort();
return src; return src;

62
org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java

@ -79,8 +79,11 @@ class SimilarityIndex {
/** Maximum value of the count field, also mask to extract the count. */ /** Maximum value of the count field, also mask to extract the count. */
private static final long MAX_COUNT = (1L << KEY_SHIFT) - 1; private static final long MAX_COUNT = (1L << KEY_SHIFT) - 1;
/** Total size of the file we hashed into the structure. */ /**
private long fileSize; * Total amount of bytes hashed into the structure, including \n. This is
* usually the size of the file minus number of CRLF encounters.
*/
private long hashedCnt;
/** Number of non-zero entries in {@link #idHash}. */ /** Number of non-zero entries in {@link #idHash}. */
private int idSize; private int idSize;
@ -108,48 +111,59 @@ class SimilarityIndex {
idGrowAt = growAt(idHashBits); idGrowAt = growAt(idHashBits);
} }
long getFileSize() { void hash(ObjectLoader obj) throws MissingObjectException, IOException,
return fileSize; TableFullException {
if (obj.isLarge()) {
hashLargeObject(obj);
} else {
byte[] raw = obj.getCachedBytes();
hash(raw, 0, raw.length);
} }
void setFileSize(long size) {
fileSize = size;
} }
void hash(ObjectLoader obj) throws MissingObjectException, IOException, private void hashLargeObject(ObjectLoader obj) throws IOException,
TableFullException { TableFullException {
if (obj.isLarge()) { ObjectStream in1 = obj.openStream();
ObjectStream in = obj.openStream(); boolean text;
try { try {
setFileSize(in.getSize()); text = !RawText.isBinary(in1);
hash(in, fileSize);
} finally { } finally {
in.close(); in1.close();
} }
} else {
byte[] raw = obj.getCachedBytes(); ObjectStream in2 = obj.openStream();
setFileSize(raw.length); try {
hash(raw, 0, raw.length); hash(in2, in2.getSize(), text);
} finally {
in2.close();
} }
} }
void hash(byte[] raw, int ptr, final int end) throws TableFullException { void hash(byte[] raw, int ptr, final int end) throws TableFullException {
final boolean text = !RawText.isBinary(raw);
hashedCnt = 0;
while (ptr < end) { while (ptr < end) {
int hash = 5381; int hash = 5381;
int blockHashedCnt = 0;
int start = ptr; int start = ptr;
// Hash one line, or one block, whichever occurs first. // Hash one line, or one block, whichever occurs first.
do { do {
int c = raw[ptr++] & 0xff; int c = raw[ptr++] & 0xff;
// Ignore CR in CRLF sequence if text
if (text && c == '\r' && ptr < end && raw[ptr] == '\n')
continue;
blockHashedCnt++;
if (c == '\n') if (c == '\n')
break; break;
hash = (hash << 5) + hash + c; hash = (hash << 5) + hash + c;
} while (ptr < end && ptr - start < 64); } while (ptr < end && ptr - start < 64);
add(hash, ptr - start); hashedCnt += blockHashedCnt;
add(hash, blockHashedCnt);
} }
} }
void hash(InputStream in, long remaining) throws IOException, void hash(InputStream in, long remaining, boolean text) throws IOException,
TableFullException { TableFullException {
byte[] buf = new byte[4096]; byte[] buf = new byte[4096];
int ptr = 0; int ptr = 0;
@ -157,6 +171,7 @@ class SimilarityIndex {
while (0 < remaining) { while (0 < remaining) {
int hash = 5381; int hash = 5381;
int blockHashedCnt = 0;
// Hash one line, or one block, whichever occurs first. // Hash one line, or one block, whichever occurs first.
int n = 0; int n = 0;
@ -170,11 +185,16 @@ class SimilarityIndex {
n++; n++;
int c = buf[ptr++] & 0xff; int c = buf[ptr++] & 0xff;
// Ignore CR in CRLF sequence if text
if (text && c == '\r' && ptr < cnt && buf[ptr] == '\n')
continue;
blockHashedCnt++;
if (c == '\n') if (c == '\n')
break; break;
hash = (hash << 5) + hash + c; hash = (hash << 5) + hash + c;
} while (n < 64 && n < remaining); } while (n < 64 && n < remaining);
add(hash, n); hashedCnt += blockHashedCnt;
add(hash, blockHashedCnt);
remaining -= n; remaining -= n;
} }
} }
@ -193,7 +213,7 @@ class SimilarityIndex {
} }
int score(SimilarityIndex dst, int maxScore) { int score(SimilarityIndex dst, int maxScore) {
long max = Math.max(fileSize, dst.fileSize); long max = Math.max(hashedCnt, dst.hashedCnt);
if (max == 0) if (max == 0)
return maxScore; return maxScore;
return (int) ((common(dst) * maxScore) / max); return (int) ((common(dst) * maxScore) / max);

Loading…
Cancel
Save