Browse Source

Add performance tests for MyersDiff

Add some tests which make sure that the diff algorithm really behaves in the
promised O(N*D) manner. This tests compute diffs between multiple big chunks
of data, measure time for computing the diffs and fail if the measured times
are off O(N*D) by more than a factor 10

Signed-off-by: Christian Halstrick <christian.halstrick@sap.com>
Change-Id: I8e1e0be60299472828718371b231f1d8a9dc21a7
Signed-off-by: Robin Rosenberg <robin.rosenberg@dewire.com>
stable-0.7
Christian Halstrick 15 years ago committed by Robin Rosenberg
parent
commit
b0772d7a5c
  1. 90
      org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java
  2. 196
      org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java
  3. 111
      org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java

90
org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java

@ -0,0 +1,90 @@
/*
* Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
public class DiffTestDataGenerator {
/**
* Generate sequence of characters in ascending order. The first character
* is a space. All subsequent characters have an ASCII code one greater then
* the ASCII code of the preceding character. On exception: the character
* following which follows '~' is again a ' '.
*
* @param len
* length of the String to be returned
* @return the sequence of characters as String
*/
public static String generateSequence(int len) {
return generateSequence(len, 0, 0);
}
/**
* Generate sequence of characters similar to the one returned by
* {@link #generateSequence(int)}. But this time in each chunk of
* <skipPeriod> characters the last <skipLength> characters are left out. By
* calling this method twice with two different prime skipPeriod values and
* short skipLength values you create test data which is similar to what
* programmers do to their source code - huge files with only few
* insertions/deletions/changes.
*
* @param len
* length of the String to be returned
* @param skipPeriod
* @param skipLength
* @return the sequence of characters as String
*/
public static String generateSequence(int len, int skipPeriod,
int skipLength) {
StringBuilder text = new StringBuilder(len);
int skipStart = skipPeriod - skipLength;
int skippedChars = 0;
for (int i = 0; i - skippedChars < len; ++i) {
if (skipPeriod == 0 || i % skipPeriod < skipStart) {
text.append((char) (32 + i % 95));
} else {
skippedChars++;
}
}
return text.toString();
}
}

196
org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java

@ -0,0 +1,196 @@
/*
* Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.diff;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import junit.framework.TestCase;
import org.eclipse.jgit.util.CPUTimeStopWatch;
/**
* Test cases for the performance of the diff implementation. The tests test
* that the performance of the MyersDiff algorithm is really O(N*D). Means the
* time for computing the diff between a and b should depend on the product of
* a.length+b.length and the number of found differences. The tests compute
* diffs between chunks of different length, measure the needed time and check
* that time/(N*D) does not differ more than a certain factor (currently 10)
*/
public class MyersDiffPerformanceTest extends TestCase {
private static final long longTaskBoundary = 5000000000L;
private static final int minCPUTimerTicks = 10;
private static final int maxFactor = 15;
private CPUTimeStopWatch stopwatch=CPUTimeStopWatch.createInstance();
public class PerfData {
private NumberFormat fmt = new DecimalFormat("#.##E0");
public long runningTime;
public long D;
public long N;
private double p1 = -1;
private double p2 = -1;
public double perf1() {
if (p1 < 0)
p1 = runningTime / ((double) N * D);
return p1;
}
public double perf2() {
if (p2 < 0)
p2 = runningTime / ((double) N * D * D);
return p2;
}
public String toString() {
return ("diffing " + N / 2 + " bytes took " + runningTime
+ " ns. N=" + N + ", D=" + D + ", time/(N*D):"
+ fmt.format(perf1()) + ", time/(N*D^2):" + fmt
.format(perf2()));
}
}
public static Comparator<PerfData> getComparator(final int whichPerf) {
return new Comparator<PerfData>() {
public int compare(PerfData o1, PerfData o2) {
double p1 = (whichPerf == 1) ? o1.perf1() : o1.perf2();
double p2 = (whichPerf == 1) ? o2.perf1() : o2.perf2();
return (p1 < p2) ? -1 : (p1 > p2) ? 1 : 0;
}
};
}
public void test() {
if (stopwatch!=null) {
List<PerfData> perfData = new LinkedList<PerfData>();
perfData.add(test(10000));
perfData.add(test(20000));
perfData.add(test(50000));
perfData.add(test(80000));
perfData.add(test(99999));
perfData.add(test(999999));
Comparator<PerfData> c = getComparator(1);
double factor = Collections.max(perfData, c).perf1()
/ Collections.min(perfData, c).perf1();
assertTrue(
"minimun and maximum of performance-index t/(N*D) differed too much. Measured factor of "
+ factor
+ " (maxFactor="
+ maxFactor
+ "). Perfdata=<" + perfData.toString() + ">",
factor < maxFactor);
}
}
/**
* Tests the performance of MyersDiff for texts which are similar (not
* random data). The CPU time is measured and returned. Because of bad
* accuracy of CPU time information the diffs are repeated. During each
* repetition the interim CPU time is checked. The diff operation is
* repeated until we have seen the CPU time clock changed its value at least
* {@link #minCPUTimerTicks} times.
*
* @param characters
* the size of the diffed character sequences.
* @return performance data
*/
private PerfData test(int characters) {
PerfData ret = new PerfData();
String a = DiffTestDataGenerator.generateSequence(characters, 971, 3);
String b = DiffTestDataGenerator.generateSequence(characters, 1621, 5);
CharArray ac = new CharArray(a);
CharArray bc = new CharArray(b);
MyersDiff myersDiff = null;
int cpuTimeChanges = 0;
long lastReadout = 0;
long interimTime = 0;
int repetitions = 0;
stopwatch.start();
while (cpuTimeChanges < minCPUTimerTicks && interimTime < longTaskBoundary) {
myersDiff = new MyersDiff(ac, bc);
repetitions++;
interimTime = stopwatch.readout();
if (interimTime != lastReadout) {
cpuTimeChanges++;
lastReadout = interimTime;
}
}
ret.runningTime = stopwatch.stop() / repetitions;
ret.N = (ac.size() + bc.size());
ret.D = myersDiff.getEdits().size();
return ret;
}
private static class CharArray implements Sequence {
private final char[] array;
public CharArray(String s) {
array = s.toCharArray();
}
public int size() {
return array.length;
}
public boolean equals(int i, Sequence other, int j) {
CharArray o = (CharArray) other;
return array[i] == o.array[j];
}
}
}

111
org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java

@ -0,0 +1,111 @@
/*
* Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.util;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadMXBean;
/**
* A simple stopwatch which measures elapsed CPU time of the current thread. CPU
* time is the time spent on executing your own code plus the time spent on
* executing operating system calls triggered by your application.
* <p>
* This stopwatch needs a VM which supports getting CPU Time information for the
* current thread. The static method createInstance() will take care to return
* only a new instance of this class if the VM is capable of returning CPU time.
*/
public class CPUTimeStopWatch {
private long start;
private static ThreadMXBean mxBean=ManagementFactory.getThreadMXBean();
/**
* use this method instead of the constructor to be sure that the underlying
* VM provides all features needed by this class.
*
* @return a new instance of {@link #CPUTimeStopWatch()} or
* <code>null</code> if the VM does not support getting CPU time
* information
*/
public static CPUTimeStopWatch createInstance() {
return mxBean.isCurrentThreadCpuTimeSupported() ? new CPUTimeStopWatch()
: null;
}
/**
* Starts the stopwatch. If the stopwatch is already started this will
* restart the stopwatch.
*/
public void start() {
start = mxBean.getCurrentThreadCpuTime();
}
/**
* Stops the stopwatch and return the elapsed CPU time in nanoseconds.
* Should be called only on started stopwatches.
*
* @return the elapsed CPU time in nanoseconds. When called on non-started
* stopwatches (either because {@link #start()} was never called or
* {@link #stop()} was called after the last call to
* {@link #start()}) this method will return 0.
*/
public long stop() {
long cpuTime = readout();
start = 0;
return cpuTime;
}
/**
* Return the elapsed CPU time in nanoseconds. In contrast to
* {@link #stop()} the stopwatch will continue to run after this call.
*
* @return the elapsed CPU time in nanoseconds. When called on non-started
* stopwatches (either because {@link #start()} was never called or
* {@link #stop()} was called after the last call to
* {@link #start()}) this method will return 0.
*/
public long readout() {
return (start == 0) ? 0 : mxBean.getCurrentThreadCpuTime() - start;
}
}
Loading…
Cancel
Save