From d9b224aeb658dfbdb10e75c7366d0583a95dfa78 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Fri, 29 Oct 2010 18:04:10 -0700 Subject: [PATCH] Delete DiffPerformanceTest This test isn't that useful. The better way to evaluate diff algorithm performance is to run `jgit debug-diff-algorithms` over real-world repositories, such as linux-2.6.git. Whenever we modify an algorithm we should manually verify that its runtime performance doesn't get any worse than it already is. Change-Id: I0beed3a5a8a537c958a5a6438a1283f97fa2097a Signed-off-by: Shawn O. Pearce --- .../jgit/diff/DiffPerformanceTest.java | 213 ------------------ .../jgit/diff/DiffTestDataGenerator.java | 93 -------- .../eclipse/jgit/diff/HistogramDiffTest.java | 40 +--- .../org/eclipse/jgit/diff/MyersDiffTest.java | 12 - 4 files changed, 6 insertions(+), 352 deletions(-) delete mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffPerformanceTest.java delete mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffPerformanceTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffPerformanceTest.java deleted file mode 100644 index 17c586f1e..000000000 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffPerformanceTest.java +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (C) 2009, Christian Halstrick - * and other copyright owners as documented in the project's IP log. - * - * This program and the accompanying materials are made available - * under the terms of the Eclipse Distribution License v1.0 which - * accompanies this distribution, is reproduced below, and is - * available at http://www.eclipse.org/org/documents/edl-v10.php - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * - Neither the name of the Eclipse Foundation, Inc. nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -package org.eclipse.jgit.diff; - -import java.text.DecimalFormat; -import java.text.NumberFormat; -import java.util.Collections; -import java.util.Comparator; -import java.util.LinkedList; -import java.util.List; - -import junit.framework.TestCase; - -import org.eclipse.jgit.util.CPUTimeStopWatch; - -/** - * Test cases for the performance of the diff implementation. The tests test - * that the performance of the MyersDiff algorithm is really O(N*D). Means the - * time for computing the diff between a and b should depend on the product of - * a.length+b.length and the number of found differences. The tests compute - * diffs between chunks of different length, measure the needed time and check - * that time/(N*D) does not differ more than a certain factor. - */ -public class DiffPerformanceTest extends TestCase { - private static final long longTaskBoundary = 5000000000L; - - private static final int minCPUTimerTicks = 10; - - private static final int maxFactor = 15; - - private CPUTimeStopWatch stopwatch=CPUTimeStopWatch.createInstance(); - - public class PerfData { - private NumberFormat fmt = new DecimalFormat("#.##E0"); - - public long runningTime; - - public long D; - - public long N; - - private double p1 = -1; - - private double p2 = -1; - - public double perf1() { - if (p1 < 0) - p1 = runningTime / ((double) N * D); - return p1; - } - - public double perf2() { - if (p2 < 0) - p2 = runningTime / ((double) N * D * D); - return p2; - } - - public String toString() { - return ("diffing " + N / 2 + " bytes took " + runningTime - + " ns. N=" + N + ", D=" + D + ", time/(N*D):" - + fmt.format(perf1()) + ", time/(N*D^2):" + fmt -.format(perf2()) + "\n"); - } - } - - public static Comparator getComparator(final int whichPerf) { - return new Comparator() { - public int compare(PerfData o1, PerfData o2) { - double p1 = (whichPerf == 1) ? o1.perf1() : o1.perf2(); - double p2 = (whichPerf == 1) ? o2.perf1() : o2.perf2(); - return (p1 < p2) ? -1 : (p1 > p2) ? 1 : 0; - } - }; - } - - public void test() { - if (stopwatch!=null) { - // run some tests without recording to let JIT do its optimization - test(10000); - test(20000); - test(10000); - test(20000); - - List perfData = new LinkedList(); - perfData.add(test(10000)); - perfData.add(test(20000)); - perfData.add(test(40000)); - perfData.add(test(80000)); - perfData.add(test(160000)); - perfData.add(test(320000)); - perfData.add(test(640000)); - perfData.add(test(1280000)); - - Comparator c = getComparator(1); - double factor = Collections.max(perfData, c).perf1() - / Collections.min(perfData, c).perf1(); - assertTrue( - "minimun and maximum of performance-index t/(N*D) differed too much. Measured factor of " - + factor - + " (maxFactor=" - + maxFactor - + "). Perfdata=<" + perfData.toString() + ">", - factor < maxFactor); - } - } - - /** - * Tests the performance of MyersDiff for texts which are similar (not - * random data). The CPU time is measured and returned. Because of bad - * accuracy of CPU time information the diffs are repeated. During each - * repetition the interim CPU time is checked. The diff operation is - * repeated until we have seen the CPU time clock changed its value at least - * {@link #minCPUTimerTicks} times. - * - * @param characters - * the size of the diffed character sequences. - * @return performance data - */ - private PerfData test(int characters) { - PerfData ret = new PerfData(); - String a = DiffTestDataGenerator.generateSequence(characters, 971, 3); - String b = DiffTestDataGenerator.generateSequence(characters, 1621, 5); - CharArray ac = new CharArray(a); - CharArray bc = new CharArray(b); - CharCmp cmp = new CharCmp(); - int D = 0; - int cpuTimeChanges = 0; - long lastReadout = 0; - long interimTime = 0; - int repetitions = 0; - stopwatch.start(); - while (cpuTimeChanges < minCPUTimerTicks && interimTime < longTaskBoundary) { - D = MyersDiff.INSTANCE.diff(cmp, ac, bc).size(); - repetitions++; - interimTime = stopwatch.readout(); - if (interimTime != lastReadout) { - cpuTimeChanges++; - lastReadout = interimTime; - } - } - ret.runningTime = stopwatch.stop() / repetitions; - ret.N = ac.size() + bc.size(); - ret.D = D; - - return ret; - } - - static class CharArray extends Sequence { - final char[] array; - - public CharArray(String s) { - array = s.toCharArray(); - } - - @Override - public int size() { - return array.length; - } - } - - static class CharCmp extends SequenceComparator { - @Override - public boolean equals(CharArray a, int ai, CharArray b, int bi) { - return a.array[ai] == b.array[bi]; - } - - @Override - public int hash(CharArray seq, int ptr) { - return seq.array[ptr]; - } - } -} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java deleted file mode 100644 index 91d757bcc..000000000 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (C) 2009, Christian Halstrick - * and other copyright owners as documented in the project's IP log. - * - * This program and the accompanying materials are made available - * under the terms of the Eclipse Distribution License v1.0 which - * accompanies this distribution, is reproduced below, and is - * available at http://www.eclipse.org/org/documents/edl-v10.php - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * - Neither the name of the Eclipse Foundation, Inc. nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -package org.eclipse.jgit.diff; - -public class DiffTestDataGenerator { - /** - * Generate sequence of characters in ascending order. The first character - * is a space. All subsequent characters have an ASCII code one greater then - * the ASCII code of the preceding character. On exception: the character - * following which follows '~' is again a ' '. - * - * @param len - * length of the String to be returned - * @return the sequence of characters as String - */ - public static String generateSequence(int len) { - return generateSequence(len, 0, 0); - } - - /** - * Generate sequence of characters similar to the one returned by - * {@link #generateSequence(int)}. But this time in each chunk of - * characters the last characters are left out. By - * calling this method twice with two different prime skipPeriod values and - * short skipLength values you create test data which is similar to what - * programmers do to their source code - huge files with only few - * insertions/deletions/changes. - * - * @param len - * length of the String to be returned - * @param skipPeriod - * @param skipLength - * @return the sequence of characters as String - */ - public static String generateSequence(int len, int skipPeriod, - int skipLength) { - StringBuilder text = new StringBuilder(len); - int skipStart = skipPeriod - skipLength; - int skippedChars = 0; - int block = 0; - for (int i = 0; i - skippedChars < len; ++i) { - if ((i % skipPeriod) == 1) - text.append((char) (256 + block++)); - else if (skipPeriod == 0 || i % skipPeriod < skipStart) { - text.append((char) (32 + i % 95)); - } else { - skippedChars++; - } - } - return text.toString(); - } -} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/HistogramDiffTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/HistogramDiffTest.java index f9353af17..85740e1f8 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/HistogramDiffTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/HistogramDiffTest.java @@ -43,8 +43,6 @@ package org.eclipse.jgit.diff; -import org.eclipse.jgit.diff.DiffPerformanceTest.CharArray; -import org.eclipse.jgit.diff.DiffPerformanceTest.CharCmp; public class HistogramDiffTest extends AbstractDiffTestCase { @Override @@ -116,47 +114,21 @@ public class HistogramDiffTest extends AbstractDiffTestCase { public void testFallbackToMyersDiff() { HistogramDiff hd = new HistogramDiff(); - hd.setMaxChainLength(64); + hd.setMaxChainLength(4); - String a = DiffTestDataGenerator.generateSequence(40000, 971, 3); - String b = DiffTestDataGenerator.generateSequence(40000, 1621, 5); - CharCmp cmp = new CharCmp(); - CharArray ac = new CharArray(a); - CharArray bc = new CharArray(b); + RawTextComparator cmp = RawTextComparator.DEFAULT; + RawText ac = t("bbbbb"); + RawText bc = t("AbCbDbEFbZ"); EditList r; // Without fallback our results are limited due to collisions. hd.setFallbackAlgorithm(null); r = hd.diff(cmp, ac, bc); - assertEquals(70, r.size()); + assertEquals(1, r.size()); // Results go up when we add a fallback for the high collision regions. hd.setFallbackAlgorithm(MyersDiff.INSTANCE); r = hd.diff(cmp, ac, bc); - assertEquals(73, r.size()); - - // But they still differ from Myers due to the way we did early steps. - EditList myersResult = MyersDiff.INSTANCE.diff(cmp, ac, bc); - assertFalse("Not same as Myers", myersResult.equals(r)); - } - - public void testPerformanceTestDeltaLength() { - HistogramDiff hd = new HistogramDiff(); - hd.setFallbackAlgorithm(null); - - String a = DiffTestDataGenerator.generateSequence(40000, 971, 3); - String b = DiffTestDataGenerator.generateSequence(40000, 1621, 5); - CharCmp cmp = new CharCmp(); - CharArray ac = new CharArray(a); - CharArray bc = new CharArray(b); - EditList r; - - hd.setMaxChainLength(64); - r = hd.diff(cmp, ac, bc); - assertEquals(70, r.size()); - - hd.setMaxChainLength(176); - r = hd.diff(cmp, ac, bc); - assertEquals(72, r.size()); + assertEquals(5, r.size()); } } diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffTest.java index 896b0fbcc..4df2c5a40 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffTest.java @@ -43,21 +43,9 @@ package org.eclipse.jgit.diff; -import org.eclipse.jgit.diff.DiffPerformanceTest.CharArray; -import org.eclipse.jgit.diff.DiffPerformanceTest.CharCmp; - public class MyersDiffTest extends AbstractDiffTestCase { @Override protected DiffAlgorithm algorithm() { return MyersDiff.INSTANCE; } - - public void testPerformanceTestDeltaLength() { - String a = DiffTestDataGenerator.generateSequence(40000, 971, 3); - String b = DiffTestDataGenerator.generateSequence(40000, 1621, 5); - CharArray ac = new CharArray(a); - CharArray bc = new CharArray(b); - EditList r = algorithm().diff(new CharCmp(), ac, bc); - assertEquals(131, r.size()); - } }