Browse Source
When working on a difference algorithm's implementation, its generally more important to care about how it behaves on real-world inputs than it does on fake inputs created for unit test cases. Run each implementation against a number of real-world repositories, looking at changes between files in each commit. This gives a better picture of how a particular algorithm performs. This test suite run against JGit and linux-2.6 with the current available algorithms shows HistogramDiff always out-performs MyersDiff, and by a wide margin on the linux-2.6 sources. As HistogramDiff has similar output properties as PatienceDiff, the resulting edits are probably also more human-readable. These test results show that HistogramDiff is a good choice for the default implementation, and also show that PatienceDiff isn't worth keeping. jgit: start at baa83ae 2686 files, 760 commits N= 3 min lines, 3016 max lines Algorithm Time(ns) ( Time(ns) on Time(ns) on ) ( N=3 N=3016 ) --------------------------------------------------------------------- histogram_myers 314652100 ( 3900 298100 ) histogram 315973000 ( 3800 302100 ) patience 774724900 ( 4500 347900 ) patience_histogram_myers 786332800 ( 3700 351200 ) myers 819359300 ( 4100 379100 ) patience_myers 843416700 ( 3800 348000 ) linux-2.6.git: start at 85a3318 4001 files, 2680 commits N= 2 min lines, 39098 max lines Algorithm Time(ns) ( Time(ns) on Time(ns) on ) ( N=2 N=39098 ) --------------------------------------------------------------------- histogram_myers 1229870000 ( 5900 2642700 ) histogram 1235654100 ( 6000 2695400 ) patience 3856546000 ( 5900 2627700 ) patience_histogram_myers 3866728100 ( 7000 2624000 ) patience_myers 4004875300 ( 8000 2651700 ) myers 9794679000 ( 7200 2716200 ) Change-Id: I2502684d31f7851e720356820d04d8cf767f7229 Signed-off-by: Shawn O. Pearce <spearce@spearce.org>stable-0.10
Shawn O. Pearce
14 years ago
committed by
Chris Aniszczyk
2 changed files with 401 additions and 0 deletions
@ -0,0 +1,400 @@
|
||||
/* |
||||
* Copyright (C) 2010, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.pgm.debug; |
||||
|
||||
import java.io.File; |
||||
import java.lang.management.ManagementFactory; |
||||
import java.lang.management.ThreadMXBean; |
||||
import java.lang.reflect.Field; |
||||
import java.util.ArrayList; |
||||
import java.util.Collections; |
||||
import java.util.Comparator; |
||||
import java.util.List; |
||||
|
||||
import org.eclipse.jgit.diff.DiffAlgorithm; |
||||
import org.eclipse.jgit.diff.HistogramDiff; |
||||
import org.eclipse.jgit.diff.MyersDiff; |
||||
import org.eclipse.jgit.diff.PatienceDiff; |
||||
import org.eclipse.jgit.diff.RawText; |
||||
import org.eclipse.jgit.diff.RawTextComparator; |
||||
import org.eclipse.jgit.errors.LargeObjectException; |
||||
import org.eclipse.jgit.lib.AbbreviatedObjectId; |
||||
import org.eclipse.jgit.lib.AnyObjectId; |
||||
import org.eclipse.jgit.lib.Constants; |
||||
import org.eclipse.jgit.lib.FileMode; |
||||
import org.eclipse.jgit.lib.MutableObjectId; |
||||
import org.eclipse.jgit.lib.ObjectId; |
||||
import org.eclipse.jgit.lib.ObjectReader; |
||||
import org.eclipse.jgit.lib.Repository; |
||||
import org.eclipse.jgit.lib.RepositoryBuilder; |
||||
import org.eclipse.jgit.lib.RepositoryCache; |
||||
import org.eclipse.jgit.pgm.CLIText; |
||||
import org.eclipse.jgit.pgm.TextBuiltin; |
||||
import org.eclipse.jgit.revwalk.RevCommit; |
||||
import org.eclipse.jgit.revwalk.RevWalk; |
||||
import org.eclipse.jgit.treewalk.TreeWalk; |
||||
import org.eclipse.jgit.treewalk.filter.TreeFilter; |
||||
import org.eclipse.jgit.util.FS; |
||||
import org.kohsuke.args4j.Option; |
||||
|
||||
class DiffAlgorithms extends TextBuiltin { |
||||
|
||||
final Algorithm myers = new Algorithm() { |
||||
DiffAlgorithm create() { |
||||
return MyersDiff.INSTANCE; |
||||
} |
||||
}; |
||||
|
||||
final Algorithm histogram = new Algorithm() { |
||||
DiffAlgorithm create() { |
||||
HistogramDiff d = new HistogramDiff(); |
||||
d.setFallbackAlgorithm(null); |
||||
return d; |
||||
} |
||||
}; |
||||
|
||||
final Algorithm histogram_myers = new Algorithm() { |
||||
DiffAlgorithm create() { |
||||
HistogramDiff d = new HistogramDiff(); |
||||
d.setFallbackAlgorithm(MyersDiff.INSTANCE); |
||||
return d; |
||||
} |
||||
}; |
||||
|
||||
final Algorithm patience = new Algorithm() { |
||||
DiffAlgorithm create() { |
||||
PatienceDiff d = new PatienceDiff(); |
||||
d.setFallbackAlgorithm(null); |
||||
return d; |
||||
} |
||||
}; |
||||
|
||||
final Algorithm patience_myers = new Algorithm() { |
||||
DiffAlgorithm create() { |
||||
PatienceDiff d = new PatienceDiff(); |
||||
d.setFallbackAlgorithm(MyersDiff.INSTANCE); |
||||
return d; |
||||
} |
||||
}; |
||||
|
||||
final Algorithm patience_histogram_myers = new Algorithm() { |
||||
DiffAlgorithm create() { |
||||
HistogramDiff d2 = new HistogramDiff(); |
||||
d2.setFallbackAlgorithm(MyersDiff.INSTANCE); |
||||
PatienceDiff d1 = new PatienceDiff(); |
||||
d1.setFallbackAlgorithm(d2); |
||||
return d1; |
||||
} |
||||
}; |
||||
|
||||
// -----------------------------------------------------------------------
|
||||
//
|
||||
// Implementation of the suite lives below this line.
|
||||
//
|
||||
//
|
||||
|
||||
@Option(name = "--algorithm", multiValued = true, metaVar = "NAME", usage = "Enable algorithm(s)") |
||||
List<String> algorithms = new ArrayList<String>(); |
||||
|
||||
@Option(name = "--text-limit", metaVar = "LIMIT", usage = "Maximum size in KiB to scan per file revision") |
||||
int textLimit = 15 * 1024; // 15 MiB as later we do * 1024.
|
||||
|
||||
@Option(name = "--repository", aliases = { "-r" }, multiValued = true, metaVar = "GIT_DIR", usage = "Repository to scan") |
||||
List<File> gitDirs = new ArrayList<File>(); |
||||
|
||||
@Option(name = "--count", metaVar = "LIMIT", usage = "Number of file revisions to be compared") |
||||
int count = 0; // unlimited
|
||||
|
||||
private final RawTextComparator cmp = RawTextComparator.DEFAULT; |
||||
|
||||
private ThreadMXBean mxBean; |
||||
|
||||
@Override |
||||
protected boolean requiresRepository() { |
||||
return false; |
||||
} |
||||
|
||||
@Override |
||||
protected void run() throws Exception { |
||||
mxBean = ManagementFactory.getThreadMXBean(); |
||||
if (!mxBean.isCurrentThreadCpuTimeSupported()) |
||||
throw die("Current thread CPU time not supported on this JRE"); |
||||
|
||||
if (gitDirs.isEmpty()) { |
||||
RepositoryBuilder rb = new RepositoryBuilder() //
|
||||
.setGitDir(gitdir) //
|
||||
.readEnvironment() //
|
||||
.findGitDir(); |
||||
if (rb.getGitDir() == null) |
||||
throw die(CLIText.get().cantFindGitDirectory); |
||||
gitDirs.add(rb.getGitDir()); |
||||
} |
||||
|
||||
for (File dir : gitDirs) { |
||||
RepositoryBuilder rb = new RepositoryBuilder(); |
||||
if (RepositoryCache.FileKey.isGitRepository(dir, FS.DETECTED)) |
||||
rb.setGitDir(dir); |
||||
else |
||||
rb.findGitDir(dir); |
||||
|
||||
Repository db = rb.build(); |
||||
try { |
||||
run(db); |
||||
} finally { |
||||
db.close(); |
||||
} |
||||
} |
||||
} |
||||
|
||||
private void run(Repository db) throws Exception { |
||||
List<Test> all = init(); |
||||
|
||||
long files = 0; |
||||
int commits = 0; |
||||
int minN = Integer.MAX_VALUE; |
||||
int maxN = 0; |
||||
|
||||
AbbreviatedObjectId startId; |
||||
ObjectReader or = db.newObjectReader(); |
||||
try { |
||||
final MutableObjectId id = new MutableObjectId(); |
||||
RevWalk rw = new RevWalk(or); |
||||
TreeWalk tw = new TreeWalk(or); |
||||
tw.setFilter(TreeFilter.ANY_DIFF); |
||||
tw.setRecursive(true); |
||||
|
||||
ObjectId start = db.resolve(Constants.HEAD); |
||||
startId = or.abbreviate(start); |
||||
rw.markStart(rw.parseCommit(start)); |
||||
for (;;) { |
||||
final RevCommit c = rw.next(); |
||||
if (c == null) |
||||
break; |
||||
commits++; |
||||
if (c.getParentCount() != 1) |
||||
continue; |
||||
|
||||
RevCommit p = c.getParent(0); |
||||
rw.parseHeaders(p); |
||||
tw.reset(new AnyObjectId[] { p.getTree(), c.getTree() }); |
||||
while (tw.next()) { |
||||
if (!isFile(tw, 0) || !isFile(tw, 1)) |
||||
continue; |
||||
|
||||
byte[] raw0; |
||||
try { |
||||
tw.getObjectId(id, 0); |
||||
raw0 = or.open(id).getCachedBytes(textLimit * 1024); |
||||
} catch (LargeObjectException tooBig) { |
||||
continue; |
||||
} |
||||
if (RawText.isBinary(raw0)) |
||||
continue; |
||||
|
||||
byte[] raw1; |
||||
try { |
||||
tw.getObjectId(id, 1); |
||||
raw1 = or.open(id).getCachedBytes(textLimit * 1024); |
||||
} catch (LargeObjectException tooBig) { |
||||
continue; |
||||
} |
||||
if (RawText.isBinary(raw1)) |
||||
continue; |
||||
|
||||
RawText txt0 = new RawText(raw0); |
||||
RawText txt1 = new RawText(raw1); |
||||
|
||||
minN = Math.min(minN, txt0.size() + txt1.size()); |
||||
maxN = Math.max(maxN, txt0.size() + txt1.size()); |
||||
|
||||
for (Test test : all) |
||||
testOne(test, txt0, txt1); |
||||
files++; |
||||
} |
||||
if (count > 0 && files > count) |
||||
break; |
||||
} |
||||
} finally { |
||||
or.release(); |
||||
} |
||||
|
||||
Collections.sort(all, new Comparator<Test>() { |
||||
public int compare(Test a, Test b) { |
||||
int cmp = Long.signum(a.runningTimeNanos - b.runningTimeNanos); |
||||
if (cmp == 0) |
||||
cmp = a.algorithm.name.compareTo(b.algorithm.name); |
||||
return cmp; |
||||
} |
||||
}); |
||||
|
||||
if (db.getDirectory() != null) { |
||||
String name = db.getDirectory().getName(); |
||||
File parent = db.getDirectory().getParentFile(); |
||||
if (name.equals(Constants.DOT_GIT_EXT) && parent != null) |
||||
name = parent.getName(); |
||||
out.println(name + ": start at " + startId.name()); |
||||
} |
||||
|
||||
out.format(" %12d files, %8d commits\n", files, commits); |
||||
out.format(" N=%10d min lines, %8d max lines\n", minN, maxN); |
||||
|
||||
out.format("%-25s %12s ( %12s %12s )\n", //
|
||||
"Algorithm", "Time(ns)", "Time(ns) on", "Time(ns) on"); |
||||
out.format("%-25s %12s ( %12s %12s )\n", //
|
||||
"", "", "N=" + minN, "N=" + maxN); |
||||
out.println("-----------------------------------------------------" |
||||
+ "----------------"); |
||||
|
||||
for (Test test : all) { |
||||
out.format("%-25s %12d ( %12d %12d )", //
|
||||
test.algorithm.name, //
|
||||
test.runningTimeNanos, //
|
||||
test.minN.runningTimeNanos, //
|
||||
test.maxN.runningTimeNanos); |
||||
out.println(); |
||||
} |
||||
out.println(); |
||||
out.flush(); |
||||
} |
||||
|
||||
private static boolean isFile(TreeWalk tw, int ithTree) { |
||||
FileMode fm = tw.getFileMode(ithTree); |
||||
return FileMode.REGULAR_FILE.equals(fm) |
||||
|| FileMode.EXECUTABLE_FILE.equals(fm); |
||||
} |
||||
|
||||
private static final int minCPUTimerTicks = 10; |
||||
|
||||
private void testOne(Test test, RawText a, RawText b) { |
||||
final DiffAlgorithm da = test.algorithm.create(); |
||||
int cpuTimeChanges = 0; |
||||
int cnt = 0; |
||||
|
||||
final long startTime = mxBean.getCurrentThreadCpuTime(); |
||||
long lastTime = startTime; |
||||
while (cpuTimeChanges < minCPUTimerTicks) { |
||||
da.diff(cmp, a, b); |
||||
cnt++; |
||||
|
||||
long interimTime = mxBean.getCurrentThreadCpuTime(); |
||||
if (interimTime != lastTime) { |
||||
cpuTimeChanges++; |
||||
lastTime = interimTime; |
||||
} |
||||
} |
||||
final long stopTime = mxBean.getCurrentThreadCpuTime(); |
||||
final long runTime = (stopTime - startTime) / cnt; |
||||
|
||||
test.runningTimeNanos += runTime; |
||||
|
||||
if (test.minN == null || a.size() + b.size() < test.minN.n) { |
||||
test.minN = new Run(); |
||||
test.minN.n = a.size() + b.size(); |
||||
test.minN.runningTimeNanos = runTime; |
||||
} |
||||
|
||||
if (test.maxN == null || a.size() + b.size() > test.maxN.n) { |
||||
test.maxN = new Run(); |
||||
test.maxN.n = a.size() + b.size(); |
||||
test.maxN.runningTimeNanos = runTime; |
||||
} |
||||
} |
||||
|
||||
private List<Test> init() { |
||||
List<Test> all = new ArrayList<Test>(); |
||||
|
||||
try { |
||||
for (Field f : DiffAlgorithms.class.getDeclaredFields()) { |
||||
if (f.getType() == Algorithm.class) { |
||||
f.setAccessible(true); |
||||
Algorithm alg = (Algorithm) f.get(this); |
||||
alg.name = f.getName(); |
||||
if (included(alg.name, algorithms)) { |
||||
Test test = new Test(); |
||||
test.algorithm = alg; |
||||
all.add(test); |
||||
} |
||||
} |
||||
} |
||||
} catch (IllegalArgumentException e) { |
||||
throw die("Cannot determine names", e); |
||||
} catch (IllegalAccessException e) { |
||||
throw die("Cannot determine names", e); |
||||
} |
||||
|
||||
return all; |
||||
} |
||||
|
||||
private static boolean included(String name, List<String> want) { |
||||
if (want.isEmpty()) |
||||
return true; |
||||
for (String s : want) { |
||||
if (s.equalsIgnoreCase(name)) |
||||
return true; |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
private static abstract class Algorithm { |
||||
String name; |
||||
|
||||
abstract DiffAlgorithm create(); |
||||
} |
||||
|
||||
private static class Test { |
||||
Algorithm algorithm; |
||||
|
||||
long runningTimeNanos; |
||||
|
||||
Run minN; |
||||
|
||||
Run maxN; |
||||
} |
||||
|
||||
private static class Run { |
||||
int n; |
||||
|
||||
long runningTimeNanos; |
||||
} |
||||
} |
Loading…
Reference in new issue