From b0772d7a5c74d68a31697ebc971438faa7a132f5 Mon Sep 17 00:00:00 2001 From: Christian Halstrick Date: Mon, 19 Oct 2009 17:10:10 +0200 Subject: [PATCH] Add performance tests for MyersDiff Add some tests which make sure that the diff algorithm really behaves in the promised O(N*D) manner. This tests compute diffs between multiple big chunks of data, measure time for computing the diffs and fail if the measured times are off O(N*D) by more than a factor 10 Signed-off-by: Christian Halstrick Change-Id: I8e1e0be60299472828718371b231f1d8a9dc21a7 Signed-off-by: Robin Rosenberg --- .../jgit/diff/DiffTestDataGenerator.java | 90 ++++++++ .../jgit/diff/MyersDiffPerformanceTest.java | 196 ++++++++++++++++++ .../eclipse/jgit/util/CPUTimeStopWatch.java | 111 ++++++++++ 3 files changed, 397 insertions(+) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java new file mode 100644 index 000000000..c40311214 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2009, Christian Halstrick + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +public class DiffTestDataGenerator { + /** + * Generate sequence of characters in ascending order. The first character + * is a space. All subsequent characters have an ASCII code one greater then + * the ASCII code of the preceding character. On exception: the character + * following which follows '~' is again a ' '. + * + * @param len + * length of the String to be returned + * @return the sequence of characters as String + */ + public static String generateSequence(int len) { + return generateSequence(len, 0, 0); + } + + /** + * Generate sequence of characters similar to the one returned by + * {@link #generateSequence(int)}. But this time in each chunk of + * characters the last characters are left out. By + * calling this method twice with two different prime skipPeriod values and + * short skipLength values you create test data which is similar to what + * programmers do to their source code - huge files with only few + * insertions/deletions/changes. + * + * @param len + * length of the String to be returned + * @param skipPeriod + * @param skipLength + * @return the sequence of characters as String + */ + public static String generateSequence(int len, int skipPeriod, + int skipLength) { + StringBuilder text = new StringBuilder(len); + int skipStart = skipPeriod - skipLength; + int skippedChars = 0; + for (int i = 0; i - skippedChars < len; ++i) { + if (skipPeriod == 0 || i % skipPeriod < skipStart) { + text.append((char) (32 + i % 95)); + } else { + skippedChars++; + } + } + return text.toString(); + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java new file mode 100644 index 000000000..fe63e3d18 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java @@ -0,0 +1,196 @@ +/* + * Copyright (C) 2009, Christian Halstrick + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import java.text.DecimalFormat; +import java.text.NumberFormat; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; + +import junit.framework.TestCase; + +import org.eclipse.jgit.util.CPUTimeStopWatch; + +/** + * Test cases for the performance of the diff implementation. The tests test + * that the performance of the MyersDiff algorithm is really O(N*D). Means the + * time for computing the diff between a and b should depend on the product of + * a.length+b.length and the number of found differences. The tests compute + * diffs between chunks of different length, measure the needed time and check + * that time/(N*D) does not differ more than a certain factor (currently 10) + */ +public class MyersDiffPerformanceTest extends TestCase { + private static final long longTaskBoundary = 5000000000L; + + private static final int minCPUTimerTicks = 10; + + private static final int maxFactor = 15; + + private CPUTimeStopWatch stopwatch=CPUTimeStopWatch.createInstance(); + + public class PerfData { + private NumberFormat fmt = new DecimalFormat("#.##E0"); + + public long runningTime; + + public long D; + + public long N; + + private double p1 = -1; + + private double p2 = -1; + + public double perf1() { + if (p1 < 0) + p1 = runningTime / ((double) N * D); + return p1; + } + + public double perf2() { + if (p2 < 0) + p2 = runningTime / ((double) N * D * D); + return p2; + } + + public String toString() { + return ("diffing " + N / 2 + " bytes took " + runningTime + + " ns. N=" + N + ", D=" + D + ", time/(N*D):" + + fmt.format(perf1()) + ", time/(N*D^2):" + fmt + .format(perf2())); + } + } + + public static Comparator getComparator(final int whichPerf) { + return new Comparator() { + public int compare(PerfData o1, PerfData o2) { + double p1 = (whichPerf == 1) ? o1.perf1() : o1.perf2(); + double p2 = (whichPerf == 1) ? o2.perf1() : o2.perf2(); + return (p1 < p2) ? -1 : (p1 > p2) ? 1 : 0; + } + }; + } + + public void test() { + if (stopwatch!=null) { + List perfData = new LinkedList(); + perfData.add(test(10000)); + perfData.add(test(20000)); + perfData.add(test(50000)); + perfData.add(test(80000)); + perfData.add(test(99999)); + perfData.add(test(999999)); + + Comparator c = getComparator(1); + double factor = Collections.max(perfData, c).perf1() + / Collections.min(perfData, c).perf1(); + assertTrue( + "minimun and maximum of performance-index t/(N*D) differed too much. Measured factor of " + + factor + + " (maxFactor=" + + maxFactor + + "). Perfdata=<" + perfData.toString() + ">", + factor < maxFactor); + } + } + + /** + * Tests the performance of MyersDiff for texts which are similar (not + * random data). The CPU time is measured and returned. Because of bad + * accuracy of CPU time information the diffs are repeated. During each + * repetition the interim CPU time is checked. The diff operation is + * repeated until we have seen the CPU time clock changed its value at least + * {@link #minCPUTimerTicks} times. + * + * @param characters + * the size of the diffed character sequences. + * @return performance data + */ + private PerfData test(int characters) { + PerfData ret = new PerfData(); + String a = DiffTestDataGenerator.generateSequence(characters, 971, 3); + String b = DiffTestDataGenerator.generateSequence(characters, 1621, 5); + CharArray ac = new CharArray(a); + CharArray bc = new CharArray(b); + MyersDiff myersDiff = null; + int cpuTimeChanges = 0; + long lastReadout = 0; + long interimTime = 0; + int repetitions = 0; + stopwatch.start(); + while (cpuTimeChanges < minCPUTimerTicks && interimTime < longTaskBoundary) { + myersDiff = new MyersDiff(ac, bc); + repetitions++; + interimTime = stopwatch.readout(); + if (interimTime != lastReadout) { + cpuTimeChanges++; + lastReadout = interimTime; + } + } + ret.runningTime = stopwatch.stop() / repetitions; + ret.N = (ac.size() + bc.size()); + ret.D = myersDiff.getEdits().size(); + + return ret; + } + + private static class CharArray implements Sequence { + private final char[] array; + + public CharArray(String s) { + array = s.toCharArray(); + } + + public int size() { + return array.length; + } + + public boolean equals(int i, Sequence other, int j) { + CharArray o = (CharArray) other; + return array[i] == o.array[j]; + } + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java new file mode 100644 index 000000000..55e51f710 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2009, Christian Halstrick + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util; + +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadMXBean; + +/** + * A simple stopwatch which measures elapsed CPU time of the current thread. CPU + * time is the time spent on executing your own code plus the time spent on + * executing operating system calls triggered by your application. + *

+ * This stopwatch needs a VM which supports getting CPU Time information for the + * current thread. The static method createInstance() will take care to return + * only a new instance of this class if the VM is capable of returning CPU time. + */ +public class CPUTimeStopWatch { + private long start; + + private static ThreadMXBean mxBean=ManagementFactory.getThreadMXBean(); + + /** + * use this method instead of the constructor to be sure that the underlying + * VM provides all features needed by this class. + * + * @return a new instance of {@link #CPUTimeStopWatch()} or + * null if the VM does not support getting CPU time + * information + */ + public static CPUTimeStopWatch createInstance() { + return mxBean.isCurrentThreadCpuTimeSupported() ? new CPUTimeStopWatch() + : null; + } + + /** + * Starts the stopwatch. If the stopwatch is already started this will + * restart the stopwatch. + */ + public void start() { + start = mxBean.getCurrentThreadCpuTime(); + } + + /** + * Stops the stopwatch and return the elapsed CPU time in nanoseconds. + * Should be called only on started stopwatches. + * + * @return the elapsed CPU time in nanoseconds. When called on non-started + * stopwatches (either because {@link #start()} was never called or + * {@link #stop()} was called after the last call to + * {@link #start()}) this method will return 0. + */ + public long stop() { + long cpuTime = readout(); + start = 0; + return cpuTime; + } + + /** + * Return the elapsed CPU time in nanoseconds. In contrast to + * {@link #stop()} the stopwatch will continue to run after this call. + * + * @return the elapsed CPU time in nanoseconds. When called on non-started + * stopwatches (either because {@link #start()} was never called or + * {@link #stop()} was called after the last call to + * {@link #start()}) this method will return 0. + */ + public long readout() { + return (start == 0) ? 0 : mxBean.getCurrentThreadCpuTime() - start; + } +}