Browse Source
Add some tests which make sure that the diff algorithm really behaves in the promised O(N*D) manner. This tests compute diffs between multiple big chunks of data, measure time for computing the diffs and fail if the measured times are off O(N*D) by more than a factor 10 Signed-off-by: Christian Halstrick <christian.halstrick@sap.com> Change-Id: I8e1e0be60299472828718371b231f1d8a9dc21a7 Signed-off-by: Robin Rosenberg <robin.rosenberg@dewire.com>stable-0.7
Christian Halstrick
15 years ago
committed by
Robin Rosenberg
3 changed files with 397 additions and 0 deletions
@ -0,0 +1,90 @@
|
||||
/* |
||||
* Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com> |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.diff; |
||||
|
||||
public class DiffTestDataGenerator { |
||||
/** |
||||
* Generate sequence of characters in ascending order. The first character |
||||
* is a space. All subsequent characters have an ASCII code one greater then |
||||
* the ASCII code of the preceding character. On exception: the character |
||||
* following which follows '~' is again a ' '. |
||||
* |
||||
* @param len |
||||
* length of the String to be returned |
||||
* @return the sequence of characters as String |
||||
*/ |
||||
public static String generateSequence(int len) { |
||||
return generateSequence(len, 0, 0); |
||||
} |
||||
|
||||
/** |
||||
* Generate sequence of characters similar to the one returned by |
||||
* {@link #generateSequence(int)}. But this time in each chunk of |
||||
* <skipPeriod> characters the last <skipLength> characters are left out. By |
||||
* calling this method twice with two different prime skipPeriod values and |
||||
* short skipLength values you create test data which is similar to what |
||||
* programmers do to their source code - huge files with only few |
||||
* insertions/deletions/changes. |
||||
* |
||||
* @param len |
||||
* length of the String to be returned |
||||
* @param skipPeriod |
||||
* @param skipLength |
||||
* @return the sequence of characters as String |
||||
*/ |
||||
public static String generateSequence(int len, int skipPeriod, |
||||
int skipLength) { |
||||
StringBuilder text = new StringBuilder(len); |
||||
int skipStart = skipPeriod - skipLength; |
||||
int skippedChars = 0; |
||||
for (int i = 0; i - skippedChars < len; ++i) { |
||||
if (skipPeriod == 0 || i % skipPeriod < skipStart) { |
||||
text.append((char) (32 + i % 95)); |
||||
} else { |
||||
skippedChars++; |
||||
} |
||||
} |
||||
return text.toString(); |
||||
} |
||||
} |
@ -0,0 +1,196 @@
|
||||
/* |
||||
* Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com> |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.diff; |
||||
|
||||
import java.text.DecimalFormat; |
||||
import java.text.NumberFormat; |
||||
import java.util.Collections; |
||||
import java.util.Comparator; |
||||
import java.util.LinkedList; |
||||
import java.util.List; |
||||
|
||||
import junit.framework.TestCase; |
||||
|
||||
import org.eclipse.jgit.util.CPUTimeStopWatch; |
||||
|
||||
/** |
||||
* Test cases for the performance of the diff implementation. The tests test |
||||
* that the performance of the MyersDiff algorithm is really O(N*D). Means the |
||||
* time for computing the diff between a and b should depend on the product of |
||||
* a.length+b.length and the number of found differences. The tests compute |
||||
* diffs between chunks of different length, measure the needed time and check |
||||
* that time/(N*D) does not differ more than a certain factor (currently 10) |
||||
*/ |
||||
public class MyersDiffPerformanceTest extends TestCase { |
||||
private static final long longTaskBoundary = 5000000000L; |
||||
|
||||
private static final int minCPUTimerTicks = 10; |
||||
|
||||
private static final int maxFactor = 15; |
||||
|
||||
private CPUTimeStopWatch stopwatch=CPUTimeStopWatch.createInstance(); |
||||
|
||||
public class PerfData { |
||||
private NumberFormat fmt = new DecimalFormat("#.##E0"); |
||||
|
||||
public long runningTime; |
||||
|
||||
public long D; |
||||
|
||||
public long N; |
||||
|
||||
private double p1 = -1; |
||||
|
||||
private double p2 = -1; |
||||
|
||||
public double perf1() { |
||||
if (p1 < 0) |
||||
p1 = runningTime / ((double) N * D); |
||||
return p1; |
||||
} |
||||
|
||||
public double perf2() { |
||||
if (p2 < 0) |
||||
p2 = runningTime / ((double) N * D * D); |
||||
return p2; |
||||
} |
||||
|
||||
public String toString() { |
||||
return ("diffing " + N / 2 + " bytes took " + runningTime |
||||
+ " ns. N=" + N + ", D=" + D + ", time/(N*D):" |
||||
+ fmt.format(perf1()) + ", time/(N*D^2):" + fmt |
||||
.format(perf2())); |
||||
} |
||||
} |
||||
|
||||
public static Comparator<PerfData> getComparator(final int whichPerf) { |
||||
return new Comparator<PerfData>() { |
||||
public int compare(PerfData o1, PerfData o2) { |
||||
double p1 = (whichPerf == 1) ? o1.perf1() : o1.perf2(); |
||||
double p2 = (whichPerf == 1) ? o2.perf1() : o2.perf2(); |
||||
return (p1 < p2) ? -1 : (p1 > p2) ? 1 : 0; |
||||
} |
||||
}; |
||||
} |
||||
|
||||
public void test() { |
||||
if (stopwatch!=null) { |
||||
List<PerfData> perfData = new LinkedList<PerfData>(); |
||||
perfData.add(test(10000)); |
||||
perfData.add(test(20000)); |
||||
perfData.add(test(50000)); |
||||
perfData.add(test(80000)); |
||||
perfData.add(test(99999)); |
||||
perfData.add(test(999999)); |
||||
|
||||
Comparator<PerfData> c = getComparator(1); |
||||
double factor = Collections.max(perfData, c).perf1() |
||||
/ Collections.min(perfData, c).perf1(); |
||||
assertTrue( |
||||
"minimun and maximum of performance-index t/(N*D) differed too much. Measured factor of " |
||||
+ factor |
||||
+ " (maxFactor=" |
||||
+ maxFactor |
||||
+ "). Perfdata=<" + perfData.toString() + ">", |
||||
factor < maxFactor); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Tests the performance of MyersDiff for texts which are similar (not |
||||
* random data). The CPU time is measured and returned. Because of bad |
||||
* accuracy of CPU time information the diffs are repeated. During each |
||||
* repetition the interim CPU time is checked. The diff operation is |
||||
* repeated until we have seen the CPU time clock changed its value at least |
||||
* {@link #minCPUTimerTicks} times. |
||||
* |
||||
* @param characters |
||||
* the size of the diffed character sequences. |
||||
* @return performance data |
||||
*/ |
||||
private PerfData test(int characters) { |
||||
PerfData ret = new PerfData(); |
||||
String a = DiffTestDataGenerator.generateSequence(characters, 971, 3); |
||||
String b = DiffTestDataGenerator.generateSequence(characters, 1621, 5); |
||||
CharArray ac = new CharArray(a); |
||||
CharArray bc = new CharArray(b); |
||||
MyersDiff myersDiff = null; |
||||
int cpuTimeChanges = 0; |
||||
long lastReadout = 0; |
||||
long interimTime = 0; |
||||
int repetitions = 0; |
||||
stopwatch.start(); |
||||
while (cpuTimeChanges < minCPUTimerTicks && interimTime < longTaskBoundary) { |
||||
myersDiff = new MyersDiff(ac, bc); |
||||
repetitions++; |
||||
interimTime = stopwatch.readout(); |
||||
if (interimTime != lastReadout) { |
||||
cpuTimeChanges++; |
||||
lastReadout = interimTime; |
||||
} |
||||
} |
||||
ret.runningTime = stopwatch.stop() / repetitions; |
||||
ret.N = (ac.size() + bc.size()); |
||||
ret.D = myersDiff.getEdits().size(); |
||||
|
||||
return ret; |
||||
} |
||||
|
||||
private static class CharArray implements Sequence { |
||||
private final char[] array; |
||||
|
||||
public CharArray(String s) { |
||||
array = s.toCharArray(); |
||||
} |
||||
|
||||
public int size() { |
||||
return array.length; |
||||
} |
||||
|
||||
public boolean equals(int i, Sequence other, int j) { |
||||
CharArray o = (CharArray) other; |
||||
return array[i] == o.array[j]; |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,111 @@
|
||||
/* |
||||
* Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com> |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.util; |
||||
|
||||
import java.lang.management.ManagementFactory; |
||||
import java.lang.management.ThreadMXBean; |
||||
|
||||
/** |
||||
* A simple stopwatch which measures elapsed CPU time of the current thread. CPU |
||||
* time is the time spent on executing your own code plus the time spent on |
||||
* executing operating system calls triggered by your application. |
||||
* <p> |
||||
* This stopwatch needs a VM which supports getting CPU Time information for the |
||||
* current thread. The static method createInstance() will take care to return |
||||
* only a new instance of this class if the VM is capable of returning CPU time. |
||||
*/ |
||||
public class CPUTimeStopWatch { |
||||
private long start; |
||||
|
||||
private static ThreadMXBean mxBean=ManagementFactory.getThreadMXBean(); |
||||
|
||||
/** |
||||
* use this method instead of the constructor to be sure that the underlying |
||||
* VM provides all features needed by this class. |
||||
* |
||||
* @return a new instance of {@link #CPUTimeStopWatch()} or |
||||
* <code>null</code> if the VM does not support getting CPU time |
||||
* information |
||||
*/ |
||||
public static CPUTimeStopWatch createInstance() { |
||||
return mxBean.isCurrentThreadCpuTimeSupported() ? new CPUTimeStopWatch() |
||||
: null; |
||||
} |
||||
|
||||
/** |
||||
* Starts the stopwatch. If the stopwatch is already started this will |
||||
* restart the stopwatch. |
||||
*/ |
||||
public void start() { |
||||
start = mxBean.getCurrentThreadCpuTime(); |
||||
} |
||||
|
||||
/** |
||||
* Stops the stopwatch and return the elapsed CPU time in nanoseconds. |
||||
* Should be called only on started stopwatches. |
||||
* |
||||
* @return the elapsed CPU time in nanoseconds. When called on non-started |
||||
* stopwatches (either because {@link #start()} was never called or |
||||
* {@link #stop()} was called after the last call to |
||||
* {@link #start()}) this method will return 0. |
||||
*/ |
||||
public long stop() { |
||||
long cpuTime = readout(); |
||||
start = 0; |
||||
return cpuTime; |
||||
} |
||||
|
||||
/** |
||||
* Return the elapsed CPU time in nanoseconds. In contrast to |
||||
* {@link #stop()} the stopwatch will continue to run after this call. |
||||
* |
||||
* @return the elapsed CPU time in nanoseconds. When called on non-started |
||||
* stopwatches (either because {@link #start()} was never called or |
||||
* {@link #stop()} was called after the last call to |
||||
* {@link #start()}) this method will return 0. |
||||
*/ |
||||
public long readout() { |
||||
return (start == 0) ? 0 : mxBean.getCurrentThreadCpuTime() - start; |
||||
} |
||||
} |
Loading…
Reference in new issue