Browse Source
* js/rename: Implemented file path based tie breaking to exact rename detection Added more test cases for RenameDetector Added very small optimization to exact rename detection Fixed Misleading Javadoc Added file path similarity to scoring metric in rename detection Fixed potential div by zero bug Added file size based rename detection optimization Create FileHeader from DiffEntry log: Implement --follow Cache the diff configuration section log: Add whitespace ignore options Format submodule links during differences Redo DiffFormatter API to be easier to use log, diff: Add rename detection support Implement similarity based rename detection Added a preliminary version of rename detection Refactored code out of FileHeader to facilitate rename detectionstable-0.9
Shawn O. Pearce
15 years ago
30 changed files with 3341 additions and 307 deletions
@ -0,0 +1,186 @@
|
||||
/* |
||||
* Copyright (C) 2010, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.diff; |
||||
|
||||
import org.eclipse.jgit.diff.DiffEntry.ChangeType; |
||||
import org.eclipse.jgit.junit.TestRepository; |
||||
import org.eclipse.jgit.lib.FileMode; |
||||
import org.eclipse.jgit.lib.ObjectId; |
||||
import org.eclipse.jgit.lib.RepositoryTestCase; |
||||
import org.eclipse.jgit.patch.FileHeader; |
||||
import org.eclipse.jgit.patch.HunkHeader; |
||||
import org.eclipse.jgit.util.RawParseUtils; |
||||
import org.eclipse.jgit.util.io.DisabledOutputStream; |
||||
|
||||
public class DiffFormatterTest extends RepositoryTestCase { |
||||
private static final String DIFF = "diff --git "; |
||||
|
||||
private static final String REGULAR_FILE = "100644"; |
||||
|
||||
private static final String GITLINK = "160000"; |
||||
|
||||
private static final String PATH_A = "src/a"; |
||||
|
||||
private static final String PATH_B = "src/b"; |
||||
|
||||
private DiffFormatter df; |
||||
|
||||
private TestRepository testDb; |
||||
|
||||
@Override |
||||
public void setUp() throws Exception { |
||||
super.setUp(); |
||||
testDb = new TestRepository(db); |
||||
df = new DiffFormatter(DisabledOutputStream.INSTANCE); |
||||
df.setRepository(db); |
||||
} |
||||
|
||||
public void testCreateFileHeader_Modify() throws Exception { |
||||
ObjectId adId = blob("a\nd\n"); |
||||
ObjectId abcdId = blob("a\nb\nc\nd\n"); |
||||
|
||||
String diffHeader = makeDiffHeader(PATH_A, PATH_A, adId, abcdId); |
||||
|
||||
DiffEntry ad = DiffEntry.delete(PATH_A, adId); |
||||
DiffEntry abcd = DiffEntry.add(PATH_A, abcdId); |
||||
|
||||
DiffEntry mod = DiffEntry.pair(ChangeType.MODIFY, ad, abcd, 0); |
||||
|
||||
FileHeader fh = df.createFileHeader(mod); |
||||
|
||||
assertEquals(diffHeader, RawParseUtils.decode(fh.getBuffer())); |
||||
assertEquals(0, fh.getStartOffset()); |
||||
assertEquals(fh.getBuffer().length, fh.getEndOffset()); |
||||
assertEquals(FileHeader.PatchType.UNIFIED, fh.getPatchType()); |
||||
|
||||
assertEquals(1, fh.getHunks().size()); |
||||
|
||||
HunkHeader hh = fh.getHunks().get(0); |
||||
assertEquals(1, hh.toEditList().size()); |
||||
|
||||
EditList el = hh.toEditList(); |
||||
assertEquals(1, el.size()); |
||||
|
||||
Edit e = el.get(0); |
||||
assertEquals(1, e.getBeginA()); |
||||
assertEquals(1, e.getEndA()); |
||||
assertEquals(1, e.getBeginB()); |
||||
assertEquals(3, e.getEndB()); |
||||
assertEquals(Edit.Type.INSERT, e.getType()); |
||||
} |
||||
|
||||
public void testCreateFileHeader_Binary() throws Exception { |
||||
ObjectId adId = blob("a\nd\n"); |
||||
ObjectId binId = blob("a\nb\nc\n\0\0\0\0d\n"); |
||||
|
||||
String diffHeader = makeDiffHeader(PATH_A, PATH_B, adId, binId) |
||||
+ "Binary files differ\n"; |
||||
|
||||
DiffEntry ad = DiffEntry.delete(PATH_A, adId); |
||||
DiffEntry abcd = DiffEntry.add(PATH_B, binId); |
||||
|
||||
DiffEntry mod = DiffEntry.pair(ChangeType.MODIFY, ad, abcd, 0); |
||||
|
||||
FileHeader fh = df.createFileHeader(mod); |
||||
|
||||
assertEquals(diffHeader, RawParseUtils.decode(fh.getBuffer())); |
||||
assertEquals(FileHeader.PatchType.BINARY, fh.getPatchType()); |
||||
|
||||
assertEquals(1, fh.getHunks().size()); |
||||
|
||||
HunkHeader hh = fh.getHunks().get(0); |
||||
assertEquals(0, hh.toEditList().size()); |
||||
} |
||||
|
||||
public void testCreateFileHeader_GitLink() throws Exception { |
||||
ObjectId aId = blob("a\n"); |
||||
ObjectId bId = blob("b\n"); |
||||
|
||||
String diffHeader = makeDiffHeaderModeChange(PATH_A, PATH_A, aId, bId, |
||||
GITLINK, REGULAR_FILE) |
||||
+ "-Subproject commit " + aId.name() + "\n"; |
||||
|
||||
DiffEntry ad = DiffEntry.delete(PATH_A, aId); |
||||
ad.oldMode = FileMode.GITLINK; |
||||
DiffEntry abcd = DiffEntry.add(PATH_A, bId); |
||||
|
||||
DiffEntry mod = DiffEntry.pair(ChangeType.MODIFY, ad, abcd, 0); |
||||
|
||||
FileHeader fh = df.createFileHeader(mod); |
||||
|
||||
assertEquals(diffHeader, RawParseUtils.decode(fh.getBuffer())); |
||||
|
||||
assertEquals(1, fh.getHunks().size()); |
||||
|
||||
HunkHeader hh = fh.getHunks().get(0); |
||||
assertEquals(0, hh.toEditList().size()); |
||||
} |
||||
|
||||
private String makeDiffHeader(String pathA, String pathB, ObjectId aId, |
||||
ObjectId bId) { |
||||
String a = aId.abbreviate(db).name(); |
||||
String b = bId.abbreviate(db).name(); |
||||
return DIFF + "a/" + pathA + " " + "b/" + pathB + "\n" + //
|
||||
"index " + a + ".." + b + " " + REGULAR_FILE + "\n" + //
|
||||
"--- a/" + pathA + "\n" + //
|
||||
"+++ b/" + pathB + "\n"; |
||||
} |
||||
|
||||
private String makeDiffHeaderModeChange(String pathA, String pathB, |
||||
ObjectId aId, ObjectId bId, String modeA, String modeB) { |
||||
String a = aId.abbreviate(db).name(); |
||||
String b = bId.abbreviate(db).name(); |
||||
return DIFF + "a/" + pathA + " " + "b/" + pathB + "\n" + //
|
||||
"old mode " + modeA + "\n" + //
|
||||
"new mode " + modeB + "\n" + //
|
||||
"index " + a + ".." + b + "\n" + //
|
||||
"--- a/" + pathA + "\n" + //
|
||||
"+++ b/" + pathB + "\n"; |
||||
} |
||||
|
||||
private ObjectId blob(String content) throws Exception { |
||||
return testDb.blob(content).copy(); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,446 @@
|
||||
/* |
||||
* Copyright (C) 2010, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.diff; |
||||
|
||||
import java.util.List; |
||||
|
||||
import org.eclipse.jgit.diff.DiffEntry.ChangeType; |
||||
import org.eclipse.jgit.junit.TestRepository; |
||||
import org.eclipse.jgit.lib.AbbreviatedObjectId; |
||||
import org.eclipse.jgit.lib.FileMode; |
||||
import org.eclipse.jgit.lib.ObjectId; |
||||
import org.eclipse.jgit.lib.RepositoryTestCase; |
||||
|
||||
public class RenameDetectorTest extends RepositoryTestCase { |
||||
private static final String PATH_A = "src/A"; |
||||
private static final String PATH_B = "src/B"; |
||||
private static final String PATH_H = "src/H"; |
||||
private static final String PATH_Q = "src/Q"; |
||||
|
||||
private RenameDetector rd; |
||||
|
||||
private TestRepository testDb; |
||||
|
||||
@Override |
||||
public void setUp() throws Exception { |
||||
super.setUp(); |
||||
testDb = new TestRepository(db); |
||||
rd = new RenameDetector(db); |
||||
} |
||||
|
||||
public void testExactRename_OneRename() throws Exception { |
||||
ObjectId foo = blob("foo"); |
||||
|
||||
DiffEntry a = DiffEntry.add(PATH_A, foo); |
||||
DiffEntry b = DiffEntry.delete(PATH_Q, foo); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(1, entries.size()); |
||||
assertRename(b, a, 100, entries.get(0)); |
||||
} |
||||
|
||||
public void testExactRename_OneRenameOneModify() throws Exception { |
||||
ObjectId foo = blob("foo"); |
||||
ObjectId bar = blob("bar"); |
||||
|
||||
DiffEntry a = DiffEntry.add(PATH_A, foo); |
||||
DiffEntry b = DiffEntry.delete(PATH_Q, foo); |
||||
|
||||
DiffEntry c = DiffEntry.modify(PATH_H); |
||||
c.newId = c.oldId = AbbreviatedObjectId.fromObjectId(bar); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
rd.add(c); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(2, entries.size()); |
||||
assertRename(b, a, 100, entries.get(0)); |
||||
assertSame(c, entries.get(1)); |
||||
} |
||||
|
||||
public void testExactRename_ManyRenames() throws Exception { |
||||
ObjectId foo = blob("foo"); |
||||
ObjectId bar = blob("bar"); |
||||
|
||||
DiffEntry a = DiffEntry.add(PATH_A, foo); |
||||
DiffEntry b = DiffEntry.delete(PATH_Q, foo); |
||||
|
||||
DiffEntry c = DiffEntry.add(PATH_H, bar); |
||||
DiffEntry d = DiffEntry.delete(PATH_B, bar); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
rd.add(c); |
||||
rd.add(d); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(2, entries.size()); |
||||
assertRename(b, a, 100, entries.get(0)); |
||||
assertRename(d, c, 100, entries.get(1)); |
||||
} |
||||
|
||||
public void testExactRename_MultipleIdenticalDeletes() throws Exception { |
||||
ObjectId foo = blob("foo"); |
||||
|
||||
DiffEntry a = DiffEntry.delete(PATH_A, foo); |
||||
DiffEntry b = DiffEntry.delete(PATH_B, foo); |
||||
|
||||
DiffEntry c = DiffEntry.delete(PATH_H, foo); |
||||
DiffEntry d = DiffEntry.add(PATH_Q, foo); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
rd.add(c); |
||||
rd.add(d); |
||||
|
||||
// Pairs the add with the first delete added
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(3, entries.size()); |
||||
assertEquals(b, entries.get(0)); |
||||
assertEquals(c, entries.get(1)); |
||||
assertRename(a, d, 100, entries.get(2)); |
||||
} |
||||
|
||||
public void testExactRename_PathBreaksTie() throws Exception { |
||||
ObjectId foo = blob("foo"); |
||||
|
||||
DiffEntry a = DiffEntry.add("src/com/foo/a.java", foo); |
||||
DiffEntry b = DiffEntry.delete("src/com/foo/b.java", foo); |
||||
|
||||
DiffEntry c = DiffEntry.add("c.txt", foo); |
||||
DiffEntry d = DiffEntry.delete("d.txt", foo); |
||||
|
||||
// Add out of order to avoid first-match succeeding
|
||||
rd.add(a); |
||||
rd.add(d); |
||||
rd.add(b); |
||||
rd.add(c); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(2, entries.size()); |
||||
assertRename(d, c, 100, entries.get(0)); |
||||
assertRename(b, a, 100, entries.get(1)); |
||||
} |
||||
|
||||
public void testExactRename_OneDeleteManyAdds() throws Exception { |
||||
ObjectId foo = blob("foo"); |
||||
|
||||
DiffEntry a = DiffEntry.add("src/com/foo/a.java", foo); |
||||
DiffEntry b = DiffEntry.add("src/com/foo/b.java", foo); |
||||
DiffEntry c = DiffEntry.add("c.txt", foo); |
||||
|
||||
DiffEntry d = DiffEntry.delete("d.txt", foo); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
rd.add(c); |
||||
rd.add(d); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(3, entries.size()); |
||||
assertRename(d, c, 100, entries.get(0)); |
||||
assertCopy(d, a, 100, entries.get(1)); |
||||
assertCopy(d, b, 100, entries.get(2)); |
||||
} |
||||
|
||||
public void testInexactRename_OnePair() throws Exception { |
||||
ObjectId aId = blob("foo\nbar\nbaz\nblarg\n"); |
||||
ObjectId bId = blob("foo\nbar\nbaz\nblah\n"); |
||||
|
||||
DiffEntry a = DiffEntry.add(PATH_A, aId); |
||||
DiffEntry b = DiffEntry.delete(PATH_Q, bId); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(1, entries.size()); |
||||
assertRename(b, a, 66, entries.get(0)); |
||||
} |
||||
|
||||
public void testInexactRename_OneRenameTwoUnrelatedFiles() throws Exception { |
||||
ObjectId aId = blob("foo\nbar\nbaz\nblarg\n"); |
||||
ObjectId bId = blob("foo\nbar\nbaz\nblah\n"); |
||||
DiffEntry a = DiffEntry.add(PATH_A, aId); |
||||
DiffEntry b = DiffEntry.delete(PATH_Q, bId); |
||||
|
||||
ObjectId cId = blob("some\nsort\nof\ntext\n"); |
||||
ObjectId dId = blob("completely\nunrelated\ntext\n"); |
||||
DiffEntry c = DiffEntry.add(PATH_B, cId); |
||||
DiffEntry d = DiffEntry.delete(PATH_H, dId); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
rd.add(c); |
||||
rd.add(d); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(3, entries.size()); |
||||
assertRename(b, a, 66, entries.get(0)); |
||||
assertSame(c, entries.get(1)); |
||||
assertSame(d, entries.get(2)); |
||||
} |
||||
|
||||
public void testInexactRename_LastByteDifferent() throws Exception { |
||||
ObjectId aId = blob("foo\nbar\na"); |
||||
ObjectId bId = blob("foo\nbar\nb"); |
||||
|
||||
DiffEntry a = DiffEntry.add(PATH_A, aId); |
||||
DiffEntry b = DiffEntry.delete(PATH_Q, bId); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(1, entries.size()); |
||||
assertRename(b, a, 88, entries.get(0)); |
||||
} |
||||
|
||||
public void testInexactRename_NewlinesOnly() throws Exception { |
||||
ObjectId aId = blob("\n\n\n"); |
||||
ObjectId bId = blob("\n\n\n\n"); |
||||
|
||||
DiffEntry a = DiffEntry.add(PATH_A, aId); |
||||
DiffEntry b = DiffEntry.delete(PATH_Q, bId); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(1, entries.size()); |
||||
assertRename(b, a, 74, entries.get(0)); |
||||
} |
||||
|
||||
public void testInexactRenames_OnePair2() throws Exception { |
||||
ObjectId aId = blob("ab\nab\nab\nac\nad\nae\n"); |
||||
ObjectId bId = blob("ac\nab\nab\nab\naa\na0\na1\n"); |
||||
|
||||
DiffEntry a = DiffEntry.add(PATH_A, aId); |
||||
DiffEntry b = DiffEntry.delete(PATH_Q, bId); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
rd.setRenameScore(50); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(1, entries.size()); |
||||
assertRename(b, a, 57, entries.get(0)); |
||||
} |
||||
|
||||
public void testNoRenames_SingleByteFiles() throws Exception { |
||||
ObjectId aId = blob("a"); |
||||
ObjectId bId = blob("b"); |
||||
|
||||
DiffEntry a = DiffEntry.add(PATH_A, aId); |
||||
DiffEntry b = DiffEntry.delete(PATH_Q, bId); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(2, entries.size()); |
||||
assertSame(a, entries.get(0)); |
||||
assertSame(b, entries.get(1)); |
||||
} |
||||
|
||||
public void testNoRenames_EmptyFile1() throws Exception { |
||||
ObjectId aId = blob(""); |
||||
DiffEntry a = DiffEntry.add(PATH_A, aId); |
||||
|
||||
rd.add(a); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(1, entries.size()); |
||||
assertSame(a, entries.get(0)); |
||||
} |
||||
|
||||
public void testNoRenames_EmptyFile2() throws Exception { |
||||
ObjectId aId = blob(""); |
||||
ObjectId bId = blob("blah"); |
||||
|
||||
DiffEntry a = DiffEntry.add(PATH_A, aId); |
||||
DiffEntry b = DiffEntry.delete(PATH_Q, bId); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(2, entries.size()); |
||||
assertSame(a, entries.get(0)); |
||||
assertSame(b, entries.get(1)); |
||||
} |
||||
|
||||
public void testNoRenames_SymlinkAndFile() throws Exception { |
||||
ObjectId aId = blob("src/dest"); |
||||
|
||||
DiffEntry a = DiffEntry.add(PATH_A, aId); |
||||
DiffEntry b = DiffEntry.delete(PATH_Q, aId); |
||||
b.oldMode = FileMode.SYMLINK; |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(2, entries.size()); |
||||
assertSame(a, entries.get(0)); |
||||
assertSame(b, entries.get(1)); |
||||
} |
||||
|
||||
public void testNoRenames_GitlinkAndFile() throws Exception { |
||||
ObjectId aId = blob("src/dest"); |
||||
|
||||
DiffEntry a = DiffEntry.add(PATH_A, aId); |
||||
DiffEntry b = DiffEntry.delete(PATH_Q, aId); |
||||
b.oldMode = FileMode.GITLINK; |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(2, entries.size()); |
||||
assertSame(a, entries.get(0)); |
||||
assertSame(b, entries.get(1)); |
||||
} |
||||
|
||||
public void testNoRenames_SymlinkAndFileSamePath() throws Exception { |
||||
ObjectId aId = blob("src/dest"); |
||||
|
||||
DiffEntry a = DiffEntry.delete(PATH_A, aId); |
||||
DiffEntry b = DiffEntry.add(PATH_A, aId); |
||||
a.oldMode = FileMode.SYMLINK; |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
|
||||
// Deletes should be first
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(2, entries.size()); |
||||
assertSame(a, entries.get(0)); |
||||
assertSame(b, entries.get(1)); |
||||
} |
||||
|
||||
public void testSetRenameScore_IllegalArgs() throws Exception { |
||||
try { |
||||
rd.setRenameScore(-1); |
||||
fail(); |
||||
} catch (IllegalArgumentException e) { |
||||
// pass
|
||||
} |
||||
|
||||
try { |
||||
rd.setRenameScore(101); |
||||
fail(); |
||||
} catch (IllegalArgumentException e) { |
||||
// pass
|
||||
} |
||||
} |
||||
|
||||
public void testRenameLimit() throws Exception { |
||||
ObjectId aId = blob("foo\nbar\nbaz\nblarg\n"); |
||||
ObjectId bId = blob("foo\nbar\nbaz\nblah\n"); |
||||
DiffEntry a = DiffEntry.add(PATH_A, aId); |
||||
DiffEntry b = DiffEntry.delete(PATH_B, bId); |
||||
|
||||
ObjectId cId = blob("a\nb\nc\nd\n"); |
||||
ObjectId dId = blob("a\nb\nc\n"); |
||||
DiffEntry c = DiffEntry.add(PATH_H, cId); |
||||
DiffEntry d = DiffEntry.delete(PATH_Q, dId); |
||||
|
||||
rd.add(a); |
||||
rd.add(b); |
||||
rd.add(c); |
||||
rd.add(d); |
||||
|
||||
rd.setRenameLimit(1); |
||||
|
||||
assertTrue(rd.isOverRenameLimit()); |
||||
|
||||
List<DiffEntry> entries = rd.compute(); |
||||
assertEquals(4, entries.size()); |
||||
assertSame(a, entries.get(0)); |
||||
assertSame(b, entries.get(1)); |
||||
assertSame(c, entries.get(2)); |
||||
assertSame(d, entries.get(3)); |
||||
} |
||||
|
||||
private ObjectId blob(String content) throws Exception { |
||||
return testDb.blob(content).copy(); |
||||
} |
||||
|
||||
private static void assertRename(DiffEntry o, DiffEntry n, int score, |
||||
DiffEntry rename) { |
||||
assertEquals(ChangeType.RENAME, rename.getChangeType()); |
||||
|
||||
assertEquals(o.getOldName(), rename.getOldName()); |
||||
assertEquals(n.getNewName(), rename.getNewName()); |
||||
|
||||
assertEquals(o.getOldMode(), rename.getOldMode()); |
||||
assertEquals(n.getNewMode(), rename.getNewMode()); |
||||
|
||||
assertEquals(o.getOldId(), rename.getOldId()); |
||||
assertEquals(n.getNewId(), rename.getNewId()); |
||||
|
||||
assertEquals(score, rename.getScore()); |
||||
} |
||||
|
||||
private static void assertCopy(DiffEntry o, DiffEntry n, int score, |
||||
DiffEntry copy) { |
||||
assertEquals(ChangeType.COPY, copy.getChangeType()); |
||||
|
||||
assertEquals(o.getOldName(), copy.getOldName()); |
||||
assertEquals(n.getNewName(), copy.getNewName()); |
||||
|
||||
assertEquals(o.getOldMode(), copy.getOldMode()); |
||||
assertEquals(n.getNewMode(), copy.getNewMode()); |
||||
|
||||
assertEquals(o.getOldId(), copy.getOldId()); |
||||
assertEquals(n.getNewId(), copy.getNewId()); |
||||
|
||||
assertEquals(score, copy.getScore()); |
||||
} |
||||
} |
@ -0,0 +1,137 @@
|
||||
/* |
||||
* Copyright (C) 2010, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.diff; |
||||
|
||||
import junit.framework.TestCase; |
||||
|
||||
import org.eclipse.jgit.lib.Constants; |
||||
|
||||
public class SimilarityIndexTest extends TestCase { |
||||
public void testIndexing() { |
||||
SimilarityIndex si = hash("" //
|
||||
+ "A\n" //
|
||||
+ "B\n" //
|
||||
+ "D\n" //
|
||||
+ "B\n" //
|
||||
); |
||||
|
||||
int key_A = keyFor("A\n"); |
||||
int key_B = keyFor("B\n"); |
||||
int key_D = keyFor("D\n"); |
||||
assertTrue(key_A != key_B && key_A != key_D && key_B != key_D); |
||||
|
||||
assertEquals(3, si.size()); |
||||
assertEquals(2, si.count(si.findIndex(key_A))); |
||||
assertEquals(4, si.count(si.findIndex(key_B))); |
||||
assertEquals(2, si.count(si.findIndex(key_D))); |
||||
} |
||||
|
||||
public void testCommonScore_SameFiles() { |
||||
String text = "" //
|
||||
+ "A\n" //
|
||||
+ "B\n" //
|
||||
+ "D\n" //
|
||||
+ "B\n"; |
||||
SimilarityIndex src = hash(text); |
||||
SimilarityIndex dst = hash(text); |
||||
assertEquals(8, src.common(dst)); |
||||
assertEquals(8, dst.common(src)); |
||||
|
||||
assertEquals(100, src.score(dst, 100)); |
||||
assertEquals(100, dst.score(src, 100)); |
||||
} |
||||
|
||||
public void testCommonScore_EmptyFiles() { |
||||
SimilarityIndex src = hash(""); |
||||
SimilarityIndex dst = hash(""); |
||||
assertEquals(0, src.common(dst)); |
||||
assertEquals(0, dst.common(src)); |
||||
} |
||||
|
||||
public void testCommonScore_TotallyDifferentFiles() { |
||||
SimilarityIndex src = hash("A\n"); |
||||
SimilarityIndex dst = hash("D\n"); |
||||
assertEquals(0, src.common(dst)); |
||||
assertEquals(0, dst.common(src)); |
||||
} |
||||
|
||||
public void testCommonScore_SimiliarBy75() { |
||||
SimilarityIndex src = hash("A\nB\nC\nD\n"); |
||||
SimilarityIndex dst = hash("A\nB\nC\nQ\n"); |
||||
assertEquals(6, src.common(dst)); |
||||
assertEquals(6, dst.common(src)); |
||||
|
||||
assertEquals(75, src.score(dst, 100)); |
||||
assertEquals(75, dst.score(src, 100)); |
||||
} |
||||
|
||||
private static SimilarityIndex hash(String text) { |
||||
SimilarityIndex src = new SimilarityIndex() { |
||||
@Override |
||||
void hash(byte[] raw, int ptr, final int end) { |
||||
while (ptr < end) { |
||||
int hash = raw[ptr] & 0xff; |
||||
int start = ptr; |
||||
do { |
||||
int c = raw[ptr++] & 0xff; |
||||
if (c == '\n') |
||||
break; |
||||
} while (ptr < end && ptr - start < 64); |
||||
add(hash, ptr - start); |
||||
} |
||||
} |
||||
}; |
||||
byte[] raw = Constants.encode(text); |
||||
src.setFileSize(raw.length); |
||||
src.hash(raw, 0, raw.length); |
||||
src.sort(); |
||||
return src; |
||||
} |
||||
|
||||
private static int keyFor(String line) { |
||||
SimilarityIndex si = hash(line); |
||||
assertEquals("single line scored", 1, si.size()); |
||||
return si.key(0); |
||||
} |
||||
} |
@ -0,0 +1,68 @@
|
||||
/* |
||||
* Copyright (C) 2010, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.diff; |
||||
|
||||
import org.eclipse.jgit.lib.Config; |
||||
import org.eclipse.jgit.lib.Config.SectionParser; |
||||
|
||||
/** Keeps track of diff related configuration options. */ |
||||
public class DiffConfig { |
||||
/** Key for {@link Config#get(SectionParser)}. */ |
||||
public static final Config.SectionParser<DiffConfig> KEY = new SectionParser<DiffConfig>() { |
||||
public DiffConfig parse(final Config cfg) { |
||||
return new DiffConfig(cfg); |
||||
} |
||||
}; |
||||
|
||||
private final int renameLimit; |
||||
|
||||
private DiffConfig(final Config rc) { |
||||
renameLimit = rc.getInt("diff", "renamelimit", 200); |
||||
} |
||||
|
||||
/** @return limit on number of paths to perform inexact rename detection. */ |
||||
public int getRenameLimit() { |
||||
return renameLimit; |
||||
} |
||||
} |
@ -0,0 +1,345 @@
|
||||
/* |
||||
* Copyright (C) 2008-2010, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.diff; |
||||
|
||||
import java.io.IOException; |
||||
import java.util.ArrayList; |
||||
import java.util.Arrays; |
||||
import java.util.List; |
||||
|
||||
import org.eclipse.jgit.lib.AbbreviatedObjectId; |
||||
import org.eclipse.jgit.lib.AnyObjectId; |
||||
import org.eclipse.jgit.lib.FileMode; |
||||
import org.eclipse.jgit.lib.MutableObjectId; |
||||
import org.eclipse.jgit.lib.ObjectId; |
||||
import org.eclipse.jgit.treewalk.TreeWalk; |
||||
|
||||
/** A value class representing a change to a file */ |
||||
public class DiffEntry { |
||||
private static final AbbreviatedObjectId A_ZERO = AbbreviatedObjectId |
||||
.fromObjectId(ObjectId.zeroId()); |
||||
|
||||
/** Magical file name used for file adds or deletes. */ |
||||
public static final String DEV_NULL = "/dev/null"; |
||||
|
||||
/** General type of change a single file-level patch describes. */ |
||||
public static enum ChangeType { |
||||
/** Add a new file to the project */ |
||||
ADD, |
||||
|
||||
/** Modify an existing file in the project (content and/or mode) */ |
||||
MODIFY, |
||||
|
||||
/** Delete an existing file from the project */ |
||||
DELETE, |
||||
|
||||
/** Rename an existing file to a new location */ |
||||
RENAME, |
||||
|
||||
/** Copy an existing file to a new location, keeping the original */ |
||||
COPY; |
||||
} |
||||
|
||||
/** |
||||
* Create an empty DiffEntry |
||||
*/ |
||||
protected DiffEntry(){ |
||||
// reduce the visibility of the default constructor
|
||||
} |
||||
|
||||
/** |
||||
* Convert the TreeWalk into DiffEntry headers. |
||||
* |
||||
* @param walk |
||||
* the TreeWalk to walk through. Must have exactly two trees. |
||||
* @return headers describing the changed files. |
||||
* @throws IOException |
||||
* the repository cannot be accessed. |
||||
*/ |
||||
public static List<DiffEntry> scan(TreeWalk walk) throws IOException { |
||||
List<DiffEntry> r = new ArrayList<DiffEntry>(); |
||||
MutableObjectId idBuf = new MutableObjectId(); |
||||
while (walk.next()) { |
||||
DiffEntry entry = new DiffEntry(); |
||||
|
||||
walk.getObjectId(idBuf, 0); |
||||
entry.oldId = AbbreviatedObjectId.fromObjectId(idBuf); |
||||
|
||||
walk.getObjectId(idBuf, 1); |
||||
entry.newId = AbbreviatedObjectId.fromObjectId(idBuf); |
||||
|
||||
entry.oldMode = walk.getFileMode(0); |
||||
entry.newMode = walk.getFileMode(1); |
||||
entry.newName = entry.oldName = walk.getPathString(); |
||||
|
||||
if (entry.oldMode == FileMode.MISSING) { |
||||
entry.oldName = DiffEntry.DEV_NULL; |
||||
entry.changeType = ChangeType.ADD; |
||||
r.add(entry); |
||||
|
||||
} else if (entry.newMode == FileMode.MISSING) { |
||||
entry.newName = DiffEntry.DEV_NULL; |
||||
entry.changeType = ChangeType.DELETE; |
||||
r.add(entry); |
||||
|
||||
} else { |
||||
entry.changeType = ChangeType.MODIFY; |
||||
if (RenameDetector.sameType(entry.oldMode, entry.newMode)) |
||||
r.add(entry); |
||||
else |
||||
r.addAll(breakModify(entry)); |
||||
} |
||||
} |
||||
return r; |
||||
} |
||||
|
||||
static DiffEntry add(String path, AnyObjectId id) { |
||||
DiffEntry e = new DiffEntry(); |
||||
e.oldId = A_ZERO; |
||||
e.oldMode = FileMode.MISSING; |
||||
e.oldName = DEV_NULL; |
||||
|
||||
e.newId = AbbreviatedObjectId.fromObjectId(id); |
||||
e.newMode = FileMode.REGULAR_FILE; |
||||
e.newName = path; |
||||
e.changeType = ChangeType.ADD; |
||||
return e; |
||||
} |
||||
|
||||
static DiffEntry delete(String path, AnyObjectId id) { |
||||
DiffEntry e = new DiffEntry(); |
||||
e.oldId = AbbreviatedObjectId.fromObjectId(id); |
||||
e.oldMode = FileMode.REGULAR_FILE; |
||||
e.oldName = path; |
||||
|
||||
e.newId = A_ZERO; |
||||
e.newMode = FileMode.MISSING; |
||||
e.newName = DEV_NULL; |
||||
e.changeType = ChangeType.DELETE; |
||||
return e; |
||||
} |
||||
|
||||
static DiffEntry modify(String path) { |
||||
DiffEntry e = new DiffEntry(); |
||||
e.oldMode = FileMode.REGULAR_FILE; |
||||
e.oldName = path; |
||||
|
||||
e.newMode = FileMode.REGULAR_FILE; |
||||
e.newName = path; |
||||
e.changeType = ChangeType.MODIFY; |
||||
return e; |
||||
} |
||||
|
||||
static List<DiffEntry> breakModify(DiffEntry entry) { |
||||
DiffEntry del = new DiffEntry(); |
||||
del.oldId = entry.getOldId(); |
||||
del.oldMode = entry.getOldMode(); |
||||
del.oldName = entry.getOldName(); |
||||
|
||||
del.newId = A_ZERO; |
||||
del.newMode = FileMode.MISSING; |
||||
del.newName = DiffEntry.DEV_NULL; |
||||
del.changeType = ChangeType.DELETE; |
||||
|
||||
DiffEntry add = new DiffEntry(); |
||||
add.oldId = A_ZERO; |
||||
add.oldMode = FileMode.MISSING; |
||||
add.oldName = DiffEntry.DEV_NULL; |
||||
|
||||
add.newId = entry.getNewId(); |
||||
add.newMode = entry.getNewMode(); |
||||
add.newName = entry.getNewName(); |
||||
add.changeType = ChangeType.ADD; |
||||
return Arrays.asList(del, add); |
||||
} |
||||
|
||||
static DiffEntry pair(ChangeType changeType, DiffEntry src, DiffEntry dst, |
||||
int score) { |
||||
DiffEntry r = new DiffEntry(); |
||||
|
||||
r.oldId = src.oldId; |
||||
r.oldMode = src.oldMode; |
||||
r.oldName = src.oldName; |
||||
|
||||
r.newId = dst.newId; |
||||
r.newMode = dst.newMode; |
||||
r.newName = dst.newName; |
||||
|
||||
r.changeType = changeType; |
||||
r.score = score; |
||||
|
||||
return r; |
||||
} |
||||
|
||||
/** File name of the old (pre-image). */ |
||||
protected String oldName; |
||||
|
||||
/** File name of the new (post-image). */ |
||||
protected String newName; |
||||
|
||||
/** Old mode of the file, if described by the patch, else null. */ |
||||
protected FileMode oldMode; |
||||
|
||||
/** New mode of the file, if described by the patch, else null. */ |
||||
protected FileMode newMode; |
||||
|
||||
/** General type of change indicated by the patch. */ |
||||
protected ChangeType changeType; |
||||
|
||||
/** Similarity score if {@link #changeType} is a copy or rename. */ |
||||
protected int score; |
||||
|
||||
/** ObjectId listed on the index line for the old (pre-image) */ |
||||
protected AbbreviatedObjectId oldId; |
||||
|
||||
/** ObjectId listed on the index line for the new (post-image) */ |
||||
protected AbbreviatedObjectId newId; |
||||
|
||||
/** |
||||
* Get the old name associated with this file. |
||||
* <p> |
||||
* The meaning of the old name can differ depending on the semantic meaning |
||||
* of this patch: |
||||
* <ul> |
||||
* <li><i>file add</i>: always <code>/dev/null</code></li> |
||||
* <li><i>file modify</i>: always {@link #getNewName()}</li> |
||||
* <li><i>file delete</i>: always the file being deleted</li> |
||||
* <li><i>file copy</i>: source file the copy originates from</li> |
||||
* <li><i>file rename</i>: source file the rename originates from</li> |
||||
* </ul> |
||||
* |
||||
* @return old name for this file. |
||||
*/ |
||||
public String getOldName() { |
||||
return oldName; |
||||
} |
||||
|
||||
/** |
||||
* Get the new name associated with this file. |
||||
* <p> |
||||
* The meaning of the new name can differ depending on the semantic meaning |
||||
* of this patch: |
||||
* <ul> |
||||
* <li><i>file add</i>: always the file being created</li> |
||||
* <li><i>file modify</i>: always {@link #getOldName()}</li> |
||||
* <li><i>file delete</i>: always <code>/dev/null</code></li> |
||||
* <li><i>file copy</i>: destination file the copy ends up at</li> |
||||
* <li><i>file rename</i>: destination file the rename ends up at/li> |
||||
* </ul> |
||||
* |
||||
* @return new name for this file. |
||||
*/ |
||||
public String getNewName() { |
||||
return newName; |
||||
} |
||||
|
||||
/** @return the old file mode, if described in the patch */ |
||||
public FileMode getOldMode() { |
||||
return oldMode; |
||||
} |
||||
|
||||
/** @return the new file mode, if described in the patch */ |
||||
public FileMode getNewMode() { |
||||
return newMode; |
||||
} |
||||
|
||||
/** @return the type of change this patch makes on {@link #getNewName()} */ |
||||
public ChangeType getChangeType() { |
||||
return changeType; |
||||
} |
||||
|
||||
/** |
||||
* @return similarity score between {@link #getOldName()} and |
||||
* {@link #getNewName()} if {@link #getChangeType()} is |
||||
* {@link ChangeType#COPY} or {@link ChangeType#RENAME}. |
||||
*/ |
||||
public int getScore() { |
||||
return score; |
||||
} |
||||
|
||||
/** |
||||
* Get the old object id from the <code>index</code>. |
||||
* |
||||
* @return the object id; null if there is no index line |
||||
*/ |
||||
public AbbreviatedObjectId getOldId() { |
||||
return oldId; |
||||
} |
||||
|
||||
/** |
||||
* Get the new object id from the <code>index</code>. |
||||
* |
||||
* @return the object id; null if there is no index line |
||||
*/ |
||||
public AbbreviatedObjectId getNewId() { |
||||
return newId; |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
StringBuilder buf = new StringBuilder(); |
||||
buf.append("DiffEntry["); |
||||
buf.append(changeType); |
||||
buf.append(" "); |
||||
switch (changeType) { |
||||
case ADD: |
||||
buf.append(newName); |
||||
break; |
||||
case COPY: |
||||
buf.append(oldName + "->" + newName); |
||||
break; |
||||
case DELETE: |
||||
buf.append(oldName); |
||||
break; |
||||
case MODIFY: |
||||
buf.append(oldName); |
||||
break; |
||||
case RENAME: |
||||
buf.append(oldName + "->" + newName); |
||||
break; |
||||
} |
||||
buf.append("]"); |
||||
return buf.toString(); |
||||
} |
||||
} |
@ -0,0 +1,536 @@
|
||||
/* |
||||
* Copyright (C) 2010, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.diff; |
||||
|
||||
import java.io.IOException; |
||||
import java.util.ArrayList; |
||||
import java.util.Arrays; |
||||
import java.util.Collection; |
||||
import java.util.Collections; |
||||
import java.util.Comparator; |
||||
import java.util.HashMap; |
||||
import java.util.List; |
||||
|
||||
import org.eclipse.jgit.JGitText; |
||||
import org.eclipse.jgit.diff.DiffEntry.ChangeType; |
||||
import org.eclipse.jgit.lib.AbbreviatedObjectId; |
||||
import org.eclipse.jgit.lib.FileMode; |
||||
import org.eclipse.jgit.lib.NullProgressMonitor; |
||||
import org.eclipse.jgit.lib.ProgressMonitor; |
||||
import org.eclipse.jgit.lib.Repository; |
||||
|
||||
/** Detect and resolve object renames. */ |
||||
public class RenameDetector { |
||||
private static final int EXACT_RENAME_SCORE = 100; |
||||
|
||||
private static final Comparator<DiffEntry> DIFF_COMPARATOR = new Comparator<DiffEntry>() { |
||||
public int compare(DiffEntry a, DiffEntry b) { |
||||
int cmp = nameOf(a).compareTo(nameOf(b)); |
||||
if (cmp == 0) |
||||
cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType()); |
||||
return cmp; |
||||
} |
||||
|
||||
private String nameOf(DiffEntry ent) { |
||||
// Sort by the new name, unless the change is a delete. On
|
||||
// deletes the new name is /dev/null, so we sort instead by
|
||||
// the old name.
|
||||
//
|
||||
if (ent.changeType == ChangeType.DELETE) |
||||
return ent.oldName; |
||||
return ent.newName; |
||||
} |
||||
|
||||
private int sortOf(ChangeType changeType) { |
||||
// Sort deletes before adds so that a major type change for
|
||||
// a file path (such as symlink to regular file) will first
|
||||
// remove the path, then add it back with the new type.
|
||||
//
|
||||
switch (changeType) { |
||||
case DELETE: |
||||
return 1; |
||||
case ADD: |
||||
return 2; |
||||
default: |
||||
return 10; |
||||
} |
||||
} |
||||
}; |
||||
|
||||
private final List<DiffEntry> entries = new ArrayList<DiffEntry>(); |
||||
|
||||
private List<DiffEntry> deleted = new ArrayList<DiffEntry>(); |
||||
|
||||
private List<DiffEntry> added = new ArrayList<DiffEntry>(); |
||||
|
||||
private boolean done; |
||||
|
||||
private final Repository repo; |
||||
|
||||
/** Similarity score required to pair an add/delete as a rename. */ |
||||
private int renameScore = 60; |
||||
|
||||
/** Limit in the number of files to consider for renames. */ |
||||
private int renameLimit; |
||||
|
||||
/** Set if the number of adds or deletes was over the limit. */ |
||||
private boolean overRenameLimit; |
||||
|
||||
/** |
||||
* Create a new rename detector for the given repository |
||||
* |
||||
* @param repo |
||||
* the repository to use for rename detection |
||||
*/ |
||||
public RenameDetector(Repository repo) { |
||||
this.repo = repo; |
||||
|
||||
DiffConfig cfg = repo.getConfig().get(DiffConfig.KEY); |
||||
renameLimit = cfg.getRenameLimit(); |
||||
} |
||||
|
||||
/** |
||||
* @return minimum score required to pair an add/delete as a rename. The |
||||
* score ranges are within the bounds of (0, 100). |
||||
*/ |
||||
public int getRenameScore() { |
||||
return renameScore; |
||||
} |
||||
|
||||
/** |
||||
* Set the minimum score required to pair an add/delete as a rename. |
||||
* <p> |
||||
* When comparing two files together their score must be greater than or |
||||
* equal to the rename score for them to be considered a rename match. The |
||||
* score is computed based on content similarity, so a score of 60 implies |
||||
* that approximately 60% of the bytes in the files are identical. |
||||
* |
||||
* @param score |
||||
* new rename score, must be within [0, 100]. |
||||
* @throws IllegalArgumentException |
||||
* the score was not within [0, 100]. |
||||
*/ |
||||
public void setRenameScore(int score) { |
||||
if (score < 0 || score > 100) |
||||
throw new IllegalArgumentException( |
||||
JGitText.get().similarityScoreMustBeWithinBounds); |
||||
renameScore = score; |
||||
} |
||||
|
||||
/** @return limit on number of paths to perform inexact rename detection. */ |
||||
public int getRenameLimit() { |
||||
return renameLimit; |
||||
} |
||||
|
||||
/** |
||||
* Set the limit on the number of files to perform inexact rename detection. |
||||
* <p> |
||||
* The rename detector has to build a square matrix of the rename limit on |
||||
* each side, then perform that many file compares to determine similarity. |
||||
* If 1000 files are added, and 1000 files are deleted, a 1000*1000 matrix |
||||
* must be allocated, and 1,000,000 file compares may need to be performed. |
||||
* |
||||
* @param limit |
||||
* new file limit. |
||||
*/ |
||||
public void setRenameLimit(int limit) { |
||||
renameLimit = limit; |
||||
} |
||||
|
||||
/** |
||||
* Check if the detector is over the rename limit. |
||||
* <p> |
||||
* This method can be invoked either before or after {@code getEntries} has |
||||
* been used to perform rename detection. |
||||
* |
||||
* @return true if the detector has more file additions or removals than the |
||||
* rename limit is currently set to. In such configurations the |
||||
* detector will skip expensive computation. |
||||
*/ |
||||
public boolean isOverRenameLimit() { |
||||
if (done) |
||||
return overRenameLimit; |
||||
int cnt = Math.max(added.size(), deleted.size()); |
||||
return getRenameLimit() != 0 && getRenameLimit() < cnt; |
||||
} |
||||
|
||||
/** |
||||
* Add entries to be considered for rename detection. |
||||
* |
||||
* @param entriesToAdd |
||||
* one or more entries to add. |
||||
* @throws IllegalStateException |
||||
* if {@code getEntries} was already invoked. |
||||
*/ |
||||
public void addAll(Collection<DiffEntry> entriesToAdd) { |
||||
if (done) |
||||
throw new IllegalStateException(JGitText.get().renamesAlreadyFound); |
||||
|
||||
for (DiffEntry entry : entriesToAdd) { |
||||
switch (entry.getChangeType()) { |
||||
case ADD: |
||||
added.add(entry); |
||||
break; |
||||
|
||||
case DELETE: |
||||
deleted.add(entry); |
||||
break; |
||||
|
||||
case MODIFY: |
||||
if (sameType(entry.getOldMode(), entry.getNewMode())) |
||||
entries.add(entry); |
||||
else |
||||
entries.addAll(DiffEntry.breakModify(entry)); |
||||
break; |
||||
|
||||
case COPY: |
||||
case RENAME: |
||||
default: |
||||
entriesToAdd.add(entry); |
||||
} |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Add an entry to be considered for rename detection. |
||||
* |
||||
* @param entry |
||||
* to add. |
||||
* @throws IllegalStateException |
||||
* if {@code getEntries} was already invoked. |
||||
*/ |
||||
public void add(DiffEntry entry) { |
||||
addAll(Collections.singletonList(entry)); |
||||
} |
||||
|
||||
/** |
||||
* Detect renames in the current file set. |
||||
* <p> |
||||
* This convenience function runs without a progress monitor. |
||||
* |
||||
* @return an unmodifiable list of {@link DiffEntry}s representing all files |
||||
* that have been changed. |
||||
* @throws IOException |
||||
* file contents cannot be read from the repository. |
||||
*/ |
||||
public List<DiffEntry> compute() throws IOException { |
||||
return compute(NullProgressMonitor.INSTANCE); |
||||
} |
||||
|
||||
/** |
||||
* Detect renames in the current file set. |
||||
* |
||||
* @param pm |
||||
* report progress during the detection phases. |
||||
* @return an unmodifiable list of {@link DiffEntry}s representing all files |
||||
* that have been changed. |
||||
* @throws IOException |
||||
* file contents cannot be read from the repository. |
||||
*/ |
||||
public List<DiffEntry> compute(ProgressMonitor pm) throws IOException { |
||||
if (!done) { |
||||
done = true; |
||||
|
||||
if (pm == null) |
||||
pm = NullProgressMonitor.INSTANCE; |
||||
findExactRenames(pm); |
||||
findContentRenames(pm); |
||||
|
||||
entries.addAll(added); |
||||
added = null; |
||||
|
||||
entries.addAll(deleted); |
||||
deleted = null; |
||||
|
||||
Collections.sort(entries, DIFF_COMPARATOR); |
||||
} |
||||
return Collections.unmodifiableList(entries); |
||||
} |
||||
|
||||
private void findContentRenames(ProgressMonitor pm) throws IOException { |
||||
int cnt = Math.max(added.size(), deleted.size()); |
||||
if (cnt == 0) |
||||
return; |
||||
|
||||
if (getRenameLimit() == 0 || cnt <= getRenameLimit()) { |
||||
SimilarityRenameDetector d; |
||||
|
||||
d = new SimilarityRenameDetector(repo, deleted, added); |
||||
d.setRenameScore(getRenameScore()); |
||||
d.compute(pm); |
||||
deleted = d.getLeftOverSources(); |
||||
added = d.getLeftOverDestinations(); |
||||
entries.addAll(d.getMatches()); |
||||
} else { |
||||
overRenameLimit = true; |
||||
} |
||||
} |
||||
|
||||
@SuppressWarnings("unchecked") |
||||
private void findExactRenames(ProgressMonitor pm) { |
||||
if (added.isEmpty() || deleted.isEmpty()) |
||||
return; |
||||
|
||||
pm.beginTask(JGitText.get().renamesFindingExact, //
|
||||
added.size() + added.size() + deleted.size() |
||||
+ added.size() * deleted.size()); |
||||
|
||||
HashMap<AbbreviatedObjectId, Object> deletedMap = populateMap(deleted, pm); |
||||
HashMap<AbbreviatedObjectId, Object> addedMap = populateMap(added, pm); |
||||
|
||||
ArrayList<DiffEntry> uniqueAdds = new ArrayList<DiffEntry>(added.size()); |
||||
ArrayList<List<DiffEntry>> nonUniqueAdds = new ArrayList<List<DiffEntry>>(); |
||||
|
||||
for (Object o : addedMap.values()) { |
||||
if (o instanceof DiffEntry) |
||||
uniqueAdds.add((DiffEntry) o); |
||||
else |
||||
nonUniqueAdds.add((List<DiffEntry>) o); |
||||
} |
||||
|
||||
ArrayList<DiffEntry> left = new ArrayList<DiffEntry>(added.size()); |
||||
|
||||
for (DiffEntry a : uniqueAdds) { |
||||
Object del = deletedMap.get(a.newId); |
||||
if (del instanceof DiffEntry) { |
||||
// We have one add to one delete: pair them if they are the same
|
||||
// type
|
||||
DiffEntry e = (DiffEntry) del; |
||||
if (sameType(e.oldMode, a.newMode)) { |
||||
e.changeType = ChangeType.RENAME; |
||||
entries.add(exactRename(e, a)); |
||||
} else { |
||||
left.add(a); |
||||
} |
||||
} else if (del != null) { |
||||
// We have one add to many deletes: find the delete with the
|
||||
// same type and closest name to the add, then pair them
|
||||
List<DiffEntry> list = (List<DiffEntry>) del; |
||||
DiffEntry best = bestPathMatch(a, list); |
||||
if (best != null) { |
||||
best.changeType = ChangeType.RENAME; |
||||
entries.add(exactRename(best, a)); |
||||
} else { |
||||
left.add(a); |
||||
} |
||||
} else { |
||||
left.add(a); |
||||
} |
||||
pm.update(1); |
||||
} |
||||
|
||||
for (List<DiffEntry> adds : nonUniqueAdds) { |
||||
Object o = deletedMap.get(adds.get(0).newId); |
||||
if (o instanceof DiffEntry) { |
||||
// We have many adds to one delete: find the add with the same
|
||||
// type and closest name to the delete, then pair them. Mark the
|
||||
// rest as copies of the delete.
|
||||
DiffEntry d = (DiffEntry) o; |
||||
DiffEntry best = bestPathMatch(d, adds); |
||||
if (best != null) { |
||||
d.changeType = ChangeType.RENAME; |
||||
entries.add(exactRename(d, best)); |
||||
for (DiffEntry a : adds) { |
||||
if (a != best) { |
||||
if (sameType(d.oldMode, a.newMode)) { |
||||
entries.add(exactCopy(d, a)); |
||||
} else { |
||||
left.add(a); |
||||
} |
||||
} |
||||
} |
||||
} else { |
||||
left.addAll(adds); |
||||
} |
||||
} else { |
||||
// We have many adds to many deletes: score all the adds against
|
||||
// all the deletes by path name, take the best matches, pair
|
||||
// them as renames, then call the rest copies
|
||||
List<DiffEntry> dels = (List<DiffEntry>) o; |
||||
long[] matrix = new long[dels.size() * adds.size()]; |
||||
int mNext = 0; |
||||
for (int addIdx = 0; addIdx < adds.size(); addIdx++) { |
||||
String addedName = adds.get(addIdx).newName; |
||||
|
||||
for (int delIdx = 0; delIdx < dels.size(); delIdx++) { |
||||
String deletedName = dels.get(delIdx).oldName; |
||||
|
||||
int score = SimilarityRenameDetector.nameScore(addedName, deletedName); |
||||
matrix[mNext] = SimilarityRenameDetector.encode(score, addIdx, delIdx); |
||||
mNext++; |
||||
} |
||||
} |
||||
|
||||
Arrays.sort(matrix); |
||||
|
||||
for (--mNext; mNext >= 0; mNext--) { |
||||
long ent = matrix[mNext]; |
||||
int delIdx = SimilarityRenameDetector.srcFile(ent); |
||||
int addIdx = SimilarityRenameDetector.dstFile(ent); |
||||
DiffEntry d = dels.get(delIdx); |
||||
DiffEntry a = adds.get(addIdx); |
||||
|
||||
if (a == null) { |
||||
pm.update(1); |
||||
continue; // was already matched earlier
|
||||
} |
||||
|
||||
ChangeType type; |
||||
if (d.changeType == ChangeType.DELETE) { |
||||
// First use of this source file. Tag it as a rename so we
|
||||
// later know it is already been used as a rename, other
|
||||
// matches (if any) will claim themselves as copies instead.
|
||||
//
|
||||
d.changeType = ChangeType.RENAME; |
||||
type = ChangeType.RENAME; |
||||
} else { |
||||
type = ChangeType.COPY; |
||||
} |
||||
|
||||
entries.add(DiffEntry.pair(type, d, a, 100)); |
||||
adds.set(addIdx, null); // Claim the destination was matched.
|
||||
pm.update(1); |
||||
} |
||||
} |
||||
} |
||||
added = left; |
||||
|
||||
deleted = new ArrayList<DiffEntry>(deletedMap.size()); |
||||
for (Object o : deletedMap.values()) { |
||||
if (o instanceof DiffEntry) { |
||||
DiffEntry e = (DiffEntry) o; |
||||
if (e.changeType == ChangeType.DELETE) |
||||
deleted.add(e); |
||||
} else { |
||||
List<DiffEntry> list = (List<DiffEntry>) o; |
||||
for (DiffEntry e : list) { |
||||
if (e.changeType == ChangeType.DELETE) |
||||
deleted.add(e); |
||||
} |
||||
} |
||||
} |
||||
pm.endTask(); |
||||
} |
||||
|
||||
/** |
||||
* Find the best match by file path for a given DiffEntry from a list of |
||||
* DiffEntrys. The returned DiffEntry will be of the same type as <src>. If |
||||
* no DiffEntry can be found that has the same type, this method will return |
||||
* null. |
||||
* |
||||
* @param src |
||||
* the DiffEntry to try to find a match for |
||||
* @param list |
||||
* a list of DiffEntrys to search through |
||||
* @return the DiffEntry from <list> who's file path best matches <src> |
||||
*/ |
||||
private static DiffEntry bestPathMatch(DiffEntry src, List<DiffEntry> list) { |
||||
DiffEntry best = null; |
||||
int score = -1; |
||||
|
||||
for (DiffEntry d : list) { |
||||
if (sameType(mode(d), mode(src))) { |
||||
int tmp = SimilarityRenameDetector |
||||
.nameScore(path(d), path(src)); |
||||
if (tmp > score) { |
||||
best = d; |
||||
score = tmp; |
||||
} |
||||
} |
||||
} |
||||
|
||||
return best; |
||||
} |
||||
|
||||
@SuppressWarnings("unchecked") |
||||
private HashMap<AbbreviatedObjectId, Object> populateMap( |
||||
List<DiffEntry> diffEntries, ProgressMonitor pm) { |
||||
HashMap<AbbreviatedObjectId, Object> map = new HashMap<AbbreviatedObjectId, Object>(); |
||||
for (DiffEntry de : diffEntries) { |
||||
Object old = map.put(id(de), de); |
||||
if (old instanceof DiffEntry) { |
||||
ArrayList<DiffEntry> list = new ArrayList<DiffEntry>(2); |
||||
list.add((DiffEntry) old); |
||||
list.add(de); |
||||
map.put(id(de), list); |
||||
} else if (old != null) { |
||||
// Must be a list of DiffEntries
|
||||
((List<DiffEntry>) old).add(de); |
||||
map.put(id(de), old); |
||||
} |
||||
pm.update(1); |
||||
} |
||||
return map; |
||||
} |
||||
|
||||
private static String path(DiffEntry de) { |
||||
return de.changeType == ChangeType.DELETE ? de.oldName : de.newName; |
||||
} |
||||
|
||||
private static FileMode mode(DiffEntry de) { |
||||
return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode; |
||||
} |
||||
|
||||
private static AbbreviatedObjectId id(DiffEntry de) { |
||||
return de.changeType == ChangeType.DELETE ? de.oldId : de.newId; |
||||
} |
||||
|
||||
static boolean sameType(FileMode a, FileMode b) { |
||||
// Files have to be of the same type in order to rename them.
|
||||
// We would never want to rename a file to a gitlink, or a
|
||||
// symlink to a file.
|
||||
//
|
||||
int aType = a.getBits() & FileMode.TYPE_MASK; |
||||
int bType = b.getBits() & FileMode.TYPE_MASK; |
||||
return aType == bType; |
||||
} |
||||
|
||||
private static DiffEntry exactRename(DiffEntry src, DiffEntry dst) { |
||||
return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE); |
||||
} |
||||
|
||||
private static DiffEntry exactCopy(DiffEntry src, DiffEntry dst) { |
||||
return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE); |
||||
} |
||||
} |
@ -0,0 +1,297 @@
|
||||
/* |
||||
* Copyright (C) 2010, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.diff; |
||||
|
||||
import java.util.Arrays; |
||||
|
||||
import org.eclipse.jgit.lib.ObjectLoader; |
||||
|
||||
/** |
||||
* Index structure of lines/blocks in one file. |
||||
* <p> |
||||
* This structure can be used to compute an approximation of the similarity |
||||
* between two files. The index is used by {@link SimilarityRenameDetector} to |
||||
* compute scores between files. |
||||
* <p> |
||||
* To save space in memory, this index uses a space efficient encoding which |
||||
* will not exceed 1 MiB per instance. The index starts out at a smaller size |
||||
* (closer to 2 KiB), but may grow as more distinct blocks within the scanned |
||||
* file are discovered. |
||||
*/ |
||||
class SimilarityIndex { |
||||
/** The {@link #idHash} table stops growing at {@code 1 << MAX_HASH_BITS}. */ |
||||
private static final int MAX_HASH_BITS = 17; |
||||
|
||||
/** The {@link #idHash} table will not grow bigger than this, ever. */ |
||||
private static final int MAX_HASH_SIZE = 1 << MAX_HASH_BITS; |
||||
|
||||
/** Prime just before {@link #MAX_HASH_SIZE}. */ |
||||
private static final int P = 131071; |
||||
|
||||
/** |
||||
* Shift to apply before storing a key. |
||||
* <p> |
||||
* Within the 64 bit table record space, we leave the highest bit unset so |
||||
* all values are positive, and we need {@link #MAX_HASH_BITS} bits for the |
||||
* keys. The lower 32 bits are used to count bytes impacted. |
||||
*/ |
||||
private static final int KEY_SHIFT = 64 - 1 - MAX_HASH_BITS; |
||||
|
||||
/** Total size of the file we hashed into the structure. */ |
||||
private long fileSize; |
||||
|
||||
/** Number of non-zero entries in {@link #idHash}. */ |
||||
private int idSize; |
||||
|
||||
/** |
||||
* Pairings of content keys and counters. |
||||
* <p> |
||||
* Slots in the table are actually two ints wedged into a single long. The |
||||
* upper {@link #MAX_HASH_BITS} bits stores the content key, and the |
||||
* remaining lower bits stores the number of bytes associated with that key. |
||||
* Empty slots are denoted by 0, which cannot occur because the count cannot |
||||
* be 0. Values can only be positive, which we enforce during key addition. |
||||
*/ |
||||
private long[] idHash; |
||||
|
||||
SimilarityIndex() { |
||||
idHash = new long[256]; |
||||
} |
||||
|
||||
long getFileSize() { |
||||
return fileSize; |
||||
} |
||||
|
||||
void setFileSize(long size) { |
||||
fileSize = size; |
||||
} |
||||
|
||||
void hash(ObjectLoader obj) { |
||||
byte[] raw = obj.getCachedBytes(); |
||||
setFileSize(raw.length); |
||||
hash(raw, 0, raw.length); |
||||
} |
||||
|
||||
void hash(byte[] raw, int ptr, final int end) { |
||||
while (ptr < end) { |
||||
int hash = 5381; |
||||
int start = ptr; |
||||
|
||||
// Hash one line, or one block, whichever occurs first.
|
||||
do { |
||||
int c = raw[ptr++] & 0xff; |
||||
if (c == '\n') |
||||
break; |
||||
hash = (hash << 5) ^ c; |
||||
} while (ptr < end && ptr - start < 64); |
||||
add(hash, ptr - start); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Sort the internal table so it can be used for efficient scoring. |
||||
* <p> |
||||
* Once sorted, additional lines/blocks cannot be added to the index. |
||||
*/ |
||||
void sort() { |
||||
// Sort the array. All of the empty space will wind up at the front,
|
||||
// because we forced all of the keys to always be positive. Later
|
||||
// we only work with the back half of the array.
|
||||
//
|
||||
Arrays.sort(idHash); |
||||
} |
||||
|
||||
int score(SimilarityIndex dst, int maxScore) { |
||||
long max = Math.max(fileSize, dst.fileSize); |
||||
if (max == 0) |
||||
return maxScore; |
||||
return (int) ((common(dst) * maxScore) / max); |
||||
} |
||||
|
||||
int common(SimilarityIndex dst) { |
||||
return common(this, dst); |
||||
} |
||||
|
||||
private static int common(SimilarityIndex src, SimilarityIndex dst) { |
||||
int srcIdx = src.packedIndex(0); |
||||
int dstIdx = dst.packedIndex(0); |
||||
long[] srcHash = src.idHash; |
||||
long[] dstHash = dst.idHash; |
||||
return common(srcHash, srcIdx, dstHash, dstIdx); |
||||
} |
||||
|
||||
private static int common(long[] srcHash, int srcIdx, //
|
||||
long[] dstHash, int dstIdx) { |
||||
if (srcIdx == srcHash.length || dstIdx == dstHash.length) |
||||
return 0; |
||||
|
||||
int common = 0; |
||||
int srcKey = keyOf(srcHash[srcIdx]); |
||||
int dstKey = keyOf(dstHash[dstIdx]); |
||||
|
||||
for (;;) { |
||||
if (srcKey == dstKey) { |
||||
common += countOf(dstHash[dstIdx]); |
||||
|
||||
if (++srcIdx == srcHash.length) |
||||
break; |
||||
srcKey = keyOf(srcHash[srcIdx]); |
||||
|
||||
if (++dstIdx == dstHash.length) |
||||
break; |
||||
dstKey = keyOf(dstHash[dstIdx]); |
||||
|
||||
} else if (srcKey < dstKey) { |
||||
// Regions of src which do not appear in dst.
|
||||
if (++srcIdx == srcHash.length) |
||||
break; |
||||
srcKey = keyOf(srcHash[srcIdx]); |
||||
|
||||
} else /* if (srcKey > dstKey) */{ |
||||
// Regions of dst which do not appear in dst.
|
||||
if (++dstIdx == dstHash.length) |
||||
break; |
||||
dstKey = keyOf(dstHash[dstIdx]); |
||||
} |
||||
} |
||||
|
||||
return common; |
||||
} |
||||
|
||||
// Testing only
|
||||
int size() { |
||||
return idSize; |
||||
} |
||||
|
||||
// Testing only
|
||||
int key(int idx) { |
||||
return keyOf(idHash[packedIndex(idx)]); |
||||
} |
||||
|
||||
// Testing only
|
||||
long count(int idx) { |
||||
return countOf(idHash[packedIndex(idx)]); |
||||
} |
||||
|
||||
// Brute force approach only for testing.
|
||||
int findIndex(int key) { |
||||
for (int i = 0; i < idSize; i++) |
||||
if (key(i) == key) |
||||
return i; |
||||
return -1; |
||||
} |
||||
|
||||
private int packedIndex(int idx) { |
||||
return (idHash.length - idSize) + idx; |
||||
} |
||||
|
||||
void add(int key, int cnt) { |
||||
key = hash(key); |
||||
int j = slot(key); |
||||
for (;;) { |
||||
long v = idHash[j]; |
||||
if (v == 0) { |
||||
// Empty slot in the table, store here.
|
||||
if (shouldGrow()) { |
||||
grow(); |
||||
j = slot(key); |
||||
continue; |
||||
} |
||||
idHash[j] = (((long) key) << KEY_SHIFT) | cnt; |
||||
idSize++; |
||||
return; |
||||
|
||||
} else if (keyOf(v) == key) { |
||||
// Same key, increment the counter.
|
||||
idHash[j] = v + cnt; |
||||
return; |
||||
|
||||
} else if (++j >= idHash.length) { |
||||
j = 0; |
||||
} |
||||
} |
||||
} |
||||
|
||||
private static int hash(int key) { |
||||
// Make the key fit into our table. Since we have a maximum size
|
||||
// that we cap the table at, all keys get squashed before going
|
||||
// into the table. This prevents overflow.
|
||||
//
|
||||
return (key >>> 1) % P; |
||||
} |
||||
|
||||
private int slot(int key) { |
||||
return key % idHash.length; |
||||
} |
||||
|
||||
private boolean shouldGrow() { |
||||
int n = idHash.length; |
||||
return n < MAX_HASH_SIZE && n <= idSize * 2; |
||||
} |
||||
|
||||
private void grow() { |
||||
long[] oldHash = idHash; |
||||
int oldSize = idHash.length; |
||||
|
||||
idHash = new long[2 * oldSize]; |
||||
for (int i = 0; i < oldSize; i++) { |
||||
long v = oldHash[i]; |
||||
if (v != 0) { |
||||
int j = slot(keyOf(v)); |
||||
while (idHash[j] != 0) |
||||
if (++j >= idHash.length) |
||||
j = 0; |
||||
idHash[j] = v; |
||||
} |
||||
} |
||||
} |
||||
|
||||
private static int keyOf(long v) { |
||||
return (int) (v >>> KEY_SHIFT); |
||||
} |
||||
|
||||
private static int countOf(long v) { |
||||
return (int) v; |
||||
} |
||||
} |
@ -0,0 +1,381 @@
|
||||
/* |
||||
* Copyright (C) 2010, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.diff; |
||||
|
||||
import java.io.IOException; |
||||
import java.util.ArrayList; |
||||
import java.util.Arrays; |
||||
import java.util.List; |
||||
|
||||
import org.eclipse.jgit.JGitText; |
||||
import org.eclipse.jgit.diff.DiffEntry.ChangeType; |
||||
import org.eclipse.jgit.lib.FileMode; |
||||
import org.eclipse.jgit.lib.NullProgressMonitor; |
||||
import org.eclipse.jgit.lib.ObjectId; |
||||
import org.eclipse.jgit.lib.ProgressMonitor; |
||||
import org.eclipse.jgit.lib.Repository; |
||||
|
||||
class SimilarityRenameDetector { |
||||
/** |
||||
* Number of bits we need to express an index into src or dst list. |
||||
* <p> |
||||
* This must be 28, giving us a limit of 2^28 entries in either list, which |
||||
* is an insane limit of 536,870,912 file names being considered in a single |
||||
* rename pass. The other 8 bits are used to store the score, while staying |
||||
* under 127 so the long doesn't go negative. |
||||
*/ |
||||
private static final int BITS_PER_INDEX = 28; |
||||
|
||||
private static final int INDEX_MASK = (1 << BITS_PER_INDEX) - 1; |
||||
|
||||
private static final int SCORE_SHIFT = 2 * BITS_PER_INDEX; |
||||
|
||||
private final Repository repo; |
||||
|
||||
/** |
||||
* All sources to consider for copies or renames. |
||||
* <p> |
||||
* A source is typically a {@link ChangeType#DELETE} change, but could be |
||||
* another type when trying to perform copy detection concurrently with |
||||
* rename detection. |
||||
*/ |
||||
private List<DiffEntry> srcs; |
||||
|
||||
/** |
||||
* All destinations to consider looking for a rename. |
||||
* <p> |
||||
* A destination is typically an {@link ChangeType#ADD}, as the name has |
||||
* just come into existence, and we want to discover where its initial |
||||
* content came from. |
||||
*/ |
||||
private List<DiffEntry> dsts; |
||||
|
||||
/** |
||||
* Matrix of all examined file pairs, and their scores. |
||||
* <p> |
||||
* The upper 8 bits of each long stores the score, but the score is bounded |
||||
* to be in the range (0, 128] so that the highest bit is never set, and all |
||||
* entries are therefore positive. |
||||
* <p> |
||||
* List indexes to an element of {@link #srcs} and {@link #dsts} are encoded |
||||
* as the lower two groups of 28 bits, respectively, but the encoding is |
||||
* inverted, so that 0 is expressed as {@code (1 << 28) - 1}. This sorts |
||||
* lower list indices later in the matrix, giving precedence to files whose |
||||
* names sort earlier in the tree. |
||||
*/ |
||||
private long[] matrix; |
||||
|
||||
/** Score a pair must exceed to be considered a rename. */ |
||||
private int renameScore = 60; |
||||
|
||||
private List<DiffEntry> out; |
||||
|
||||
SimilarityRenameDetector(Repository repo, List<DiffEntry> srcs, |
||||
List<DiffEntry> dsts) { |
||||
this.repo = repo; |
||||
this.srcs = srcs; |
||||
this.dsts = dsts; |
||||
} |
||||
|
||||
void setRenameScore(int score) { |
||||
renameScore = score; |
||||
} |
||||
|
||||
void compute(ProgressMonitor pm) throws IOException { |
||||
if (pm == null) |
||||
pm = NullProgressMonitor.INSTANCE; |
||||
|
||||
pm.beginTask(JGitText.get().renamesFindingByContent, //
|
||||
2 * srcs.size() * dsts.size()); |
||||
|
||||
int mNext = buildMatrix(pm); |
||||
out = new ArrayList<DiffEntry>(Math.min(mNext, dsts.size())); |
||||
|
||||
// Match rename pairs on a first come, first serve basis until
|
||||
// we have looked at everything that is above our minimum score.
|
||||
//
|
||||
for (--mNext; mNext >= 0; mNext--) { |
||||
long ent = matrix[mNext]; |
||||
int sIdx = srcFile(ent); |
||||
int dIdx = dstFile(ent); |
||||
DiffEntry s = srcs.get(sIdx); |
||||
DiffEntry d = dsts.get(dIdx); |
||||
|
||||
if (d == null) { |
||||
pm.update(1); |
||||
continue; // was already matched earlier
|
||||
} |
||||
|
||||
ChangeType type; |
||||
if (s.changeType == ChangeType.DELETE) { |
||||
// First use of this source file. Tag it as a rename so we
|
||||
// later know it is already been used as a rename, other
|
||||
// matches (if any) will claim themselves as copies instead.
|
||||
//
|
||||
s.changeType = ChangeType.RENAME; |
||||
type = ChangeType.RENAME; |
||||
} else { |
||||
type = ChangeType.COPY; |
||||
} |
||||
|
||||
out.add(DiffEntry.pair(type, s, d, score(ent))); |
||||
dsts.set(dIdx, null); // Claim the destination was matched.
|
||||
pm.update(1); |
||||
} |
||||
|
||||
srcs = compactSrcList(srcs); |
||||
dsts = compactDstList(dsts); |
||||
pm.endTask(); |
||||
} |
||||
|
||||
List<DiffEntry> getMatches() { |
||||
return out; |
||||
} |
||||
|
||||
List<DiffEntry> getLeftOverSources() { |
||||
return srcs; |
||||
} |
||||
|
||||
List<DiffEntry> getLeftOverDestinations() { |
||||
return dsts; |
||||
} |
||||
|
||||
private static List<DiffEntry> compactSrcList(List<DiffEntry> in) { |
||||
ArrayList<DiffEntry> r = new ArrayList<DiffEntry>(in.size()); |
||||
for (DiffEntry e : in) { |
||||
if (e.changeType == ChangeType.DELETE) |
||||
r.add(e); |
||||
} |
||||
return r; |
||||
} |
||||
|
||||
private static List<DiffEntry> compactDstList(List<DiffEntry> in) { |
||||
ArrayList<DiffEntry> r = new ArrayList<DiffEntry>(in.size()); |
||||
for (DiffEntry e : in) { |
||||
if (e != null) |
||||
r.add(e); |
||||
} |
||||
return r; |
||||
} |
||||
|
||||
private int buildMatrix(ProgressMonitor pm) throws IOException { |
||||
// Allocate for the worst-case scenario where every pair has a
|
||||
// score that we need to consider. We might not need that many.
|
||||
//
|
||||
matrix = new long[srcs.size() * dsts.size()]; |
||||
|
||||
long[] srcSizes = new long[srcs.size()]; |
||||
long[] dstSizes = new long[dsts.size()]; |
||||
|
||||
// Init the size arrays to some value that indicates that we haven't
|
||||
// calculated the size yet. Since sizes cannot be negative, -1 will work
|
||||
Arrays.fill(srcSizes, -1); |
||||
Arrays.fill(dstSizes, -1); |
||||
|
||||
// Consider each pair of files, if the score is above the minimum
|
||||
// threshold we need record that scoring in the matrix so we can
|
||||
// later find the best matches.
|
||||
//
|
||||
int mNext = 0; |
||||
for (int srcIdx = 0; srcIdx < srcs.size(); srcIdx++) { |
||||
DiffEntry srcEnt = srcs.get(srcIdx); |
||||
if (!isFile(srcEnt.oldMode)) { |
||||
pm.update(dsts.size()); |
||||
continue; |
||||
} |
||||
|
||||
SimilarityIndex s = hash(srcEnt.oldId.toObjectId()); |
||||
for (int dstIdx = 0; dstIdx < dsts.size(); dstIdx++) { |
||||
DiffEntry dstEnt = dsts.get(dstIdx); |
||||
|
||||
if (!isFile(dstEnt.newMode)) { |
||||
pm.update(1); |
||||
continue; |
||||
} |
||||
|
||||
if (!RenameDetector.sameType(srcEnt.oldMode, dstEnt.newMode)) { |
||||
pm.update(1); |
||||
continue; |
||||
} |
||||
|
||||
long srcSize = srcSizes[srcIdx]; |
||||
if (srcSize < 0) { |
||||
srcSize = size(srcEnt.oldId.toObjectId()); |
||||
srcSizes[srcIdx] = srcSize; |
||||
} |
||||
|
||||
long dstSize = dstSizes[dstIdx]; |
||||
if (dstSize < 0) { |
||||
dstSize = size(dstEnt.newId.toObjectId()); |
||||
dstSizes[dstIdx] = dstSize; |
||||
} |
||||
|
||||
long max = Math.max(srcSize, dstSize); |
||||
long min = Math.min(srcSize, dstSize); |
||||
if (min * 100 / max < renameScore) { |
||||
// Cannot possibly match, as the file sizes are so different
|
||||
pm.update(1); |
||||
continue; |
||||
} |
||||
|
||||
SimilarityIndex d = hash(dstEnt.newId.toObjectId()); |
||||
int contentScore = s.score(d, 10000); |
||||
|
||||
// nameScore returns a value between 0 and 100, but we want it
|
||||
// to be in the same range as the content score. This allows it
|
||||
// to be dropped into the pretty formula for the final score.
|
||||
int nameScore = nameScore(srcEnt.oldName, dstEnt.newName) * 100; |
||||
|
||||
int score = (contentScore * 99 + nameScore * 1) / 10000; |
||||
|
||||
if (score < renameScore) { |
||||
pm.update(1); |
||||
continue; |
||||
} |
||||
|
||||
matrix[mNext++] = encode(score, srcIdx, dstIdx); |
||||
pm.update(1); |
||||
} |
||||
} |
||||
|
||||
// Sort everything in the range we populated, which might be the
|
||||
// entire matrix, or just a smaller slice if we had some bad low
|
||||
// scoring pairs.
|
||||
//
|
||||
Arrays.sort(matrix, 0, mNext); |
||||
return mNext; |
||||
} |
||||
|
||||
static int nameScore(String a, String b) { |
||||
int aDirLen = a.lastIndexOf("/") + 1; |
||||
int bDirLen = b.lastIndexOf("/") + 1; |
||||
|
||||
int dirMin = Math.min(aDirLen, bDirLen); |
||||
int dirMax = Math.max(aDirLen, bDirLen); |
||||
|
||||
final int dirScoreLtr; |
||||
final int dirScoreRtl; |
||||
|
||||
if (dirMax == 0) { |
||||
dirScoreLtr = 100; |
||||
dirScoreRtl = 100; |
||||
} else { |
||||
int dirSim = 0; |
||||
for (; dirSim < dirMin; dirSim++) { |
||||
if (a.charAt(dirSim) != b.charAt(dirSim)) |
||||
break; |
||||
} |
||||
dirScoreLtr = (dirSim * 100) / dirMax; |
||||
|
||||
if (dirScoreLtr == 100) { |
||||
dirScoreRtl = 100; |
||||
} else { |
||||
for (dirSim = 0; dirSim < dirMin; dirSim++) { |
||||
if (a.charAt(aDirLen - 1 - dirSim) != b.charAt(bDirLen - 1 |
||||
- dirSim)) |
||||
break; |
||||
} |
||||
dirScoreRtl = (dirSim * 100) / dirMax; |
||||
} |
||||
} |
||||
|
||||
int fileMin = Math.min(a.length() - aDirLen, b.length() - bDirLen); |
||||
int fileMax = Math.max(a.length() - aDirLen, b.length() - bDirLen); |
||||
|
||||
int fileSim = 0; |
||||
for (; fileSim < fileMin; fileSim++) { |
||||
if (a.charAt(a.length() - 1 - fileSim) != b.charAt(b.length() - 1 |
||||
- fileSim)) |
||||
break; |
||||
} |
||||
int fileScore = (fileSim * 100) / fileMax; |
||||
|
||||
return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100; |
||||
} |
||||
|
||||
private SimilarityIndex hash(ObjectId objectId) throws IOException { |
||||
SimilarityIndex r = new SimilarityIndex(); |
||||
r.hash(repo.openObject(objectId)); |
||||
r.sort(); |
||||
return r; |
||||
} |
||||
|
||||
private long size(ObjectId objectId) throws IOException { |
||||
return repo.openObject(objectId).getSize(); |
||||
} |
||||
|
||||
private static int score(long value) { |
||||
return (int) (value >>> SCORE_SHIFT); |
||||
} |
||||
|
||||
static int srcFile(long value) { |
||||
return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK); |
||||
} |
||||
|
||||
static int dstFile(long value) { |
||||
return decodeFile(((int) value) & INDEX_MASK); |
||||
} |
||||
|
||||
static long encode(int score, int srcIdx, int dstIdx) { |
||||
return (((long) score) << SCORE_SHIFT) //
|
||||
| (encodeFile(srcIdx) << BITS_PER_INDEX) //
|
||||
| encodeFile(dstIdx); |
||||
} |
||||
|
||||
private static long encodeFile(int idx) { |
||||
// We invert the index so that the first file in the list sorts
|
||||
// later in the table. This permits us to break ties favoring
|
||||
// earlier names over later ones.
|
||||
//
|
||||
return INDEX_MASK - idx; |
||||
} |
||||
|
||||
private static int decodeFile(int v) { |
||||
return INDEX_MASK - v; |
||||
} |
||||
|
||||
private static boolean isFile(FileMode mode) { |
||||
return (mode.getBits() & FileMode.TYPE_MASK) == FileMode.TYPE_FILE; |
||||
} |
||||
} |
@ -0,0 +1,120 @@
|
||||
/* |
||||
* Copyright (C) 2010, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.revwalk; |
||||
|
||||
import java.io.IOException; |
||||
|
||||
import org.eclipse.jgit.errors.IncorrectObjectTypeException; |
||||
import org.eclipse.jgit.errors.MissingObjectException; |
||||
import org.eclipse.jgit.treewalk.TreeWalk; |
||||
import org.eclipse.jgit.treewalk.filter.PathFilter; |
||||
import org.eclipse.jgit.treewalk.filter.TreeFilter; |
||||
|
||||
/** |
||||
* Updates the internal path filter to follow copy/renames. |
||||
* <p> |
||||
* This is a special filter that performs {@code AND(path, ANY_DIFF)}, but also |
||||
* triggers rename detection so that the path node is updated to include a prior |
||||
* file name as the RevWalk traverses history. |
||||
* <p> |
||||
* Results with this filter are unpredictable if the path being followed is a |
||||
* subdirectory. |
||||
*/ |
||||
public class FollowFilter extends TreeFilter { |
||||
/** |
||||
* Create a new tree filter for a user supplied path. |
||||
* <p> |
||||
* Path strings are relative to the root of the repository. If the user's |
||||
* input should be assumed relative to a subdirectory of the repository the |
||||
* caller must prepend the subdirectory's path prior to creating the filter. |
||||
* <p> |
||||
* Path strings use '/' to delimit directories on all platforms. |
||||
* |
||||
* @param path |
||||
* the path to filter on. Must not be the empty string. All |
||||
* trailing '/' characters will be trimmed before string's length |
||||
* is checked or is used as part of the constructed filter. |
||||
* @return a new filter for the requested path. |
||||
* @throws IllegalArgumentException |
||||
* the path supplied was the empty string. |
||||
*/ |
||||
public static FollowFilter create(String path) { |
||||
return new FollowFilter(PathFilter.create(path)); |
||||
} |
||||
|
||||
private final PathFilter path; |
||||
|
||||
FollowFilter(final PathFilter path) { |
||||
this.path = path; |
||||
} |
||||
|
||||
/** @return the path this filter matches. */ |
||||
public String getPath() { |
||||
return path.getPath(); |
||||
} |
||||
|
||||
@Override |
||||
public boolean include(final TreeWalk walker) |
||||
throws MissingObjectException, IncorrectObjectTypeException, |
||||
IOException { |
||||
return path.include(walker) && ANY_DIFF.include(walker); |
||||
} |
||||
|
||||
@Override |
||||
public boolean shouldBeRecursive() { |
||||
return path.shouldBeRecursive() || ANY_DIFF.shouldBeRecursive(); |
||||
} |
||||
|
||||
@Override |
||||
public TreeFilter clone() { |
||||
return new FollowFilter(path.clone()); |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
return "(FOLLOW(" + path.toString() + ")" //
|
||||
+ " AND " //
|
||||
+ ANY_DIFF.toString() + ")"; |
||||
} |
||||
} |
Loading…
Reference in new issue