Add performance tests for MyersDiff
Add some tests which make sure that the diff algorithm really behaves in the promised O(N*D) manner. This tests compute diffs between multiple big chunks of data, measure time for computing the diffs and fail if the measured times are off O(N*D) by more than a factor 10 Signed-off-by: Christian Halstrick <christian.halstrick@sap.com> Change-Id: I8e1e0be60299472828718371b231f1d8a9dc21a7 Signed-off-by: Robin Rosenberg <robin.rosenberg@dewire.com>
This commit is contained in:
parent
2484ad6fe0
commit
b0772d7a5c
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com>
|
||||
* and other copyright owners as documented in the project's IP log.
|
||||
*
|
||||
* This program and the accompanying materials are made available
|
||||
* under the terms of the Eclipse Distribution License v1.0 which
|
||||
* accompanies this distribution, is reproduced below, and is
|
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
*
|
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the
|
||||
* names of its contributors may be used to endorse or promote
|
||||
* products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.eclipse.jgit.diff;
|
||||
|
||||
public class DiffTestDataGenerator {
|
||||
/**
|
||||
* Generate sequence of characters in ascending order. The first character
|
||||
* is a space. All subsequent characters have an ASCII code one greater then
|
||||
* the ASCII code of the preceding character. On exception: the character
|
||||
* following which follows '~' is again a ' '.
|
||||
*
|
||||
* @param len
|
||||
* length of the String to be returned
|
||||
* @return the sequence of characters as String
|
||||
*/
|
||||
public static String generateSequence(int len) {
|
||||
return generateSequence(len, 0, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate sequence of characters similar to the one returned by
|
||||
* {@link #generateSequence(int)}. But this time in each chunk of
|
||||
* <skipPeriod> characters the last <skipLength> characters are left out. By
|
||||
* calling this method twice with two different prime skipPeriod values and
|
||||
* short skipLength values you create test data which is similar to what
|
||||
* programmers do to their source code - huge files with only few
|
||||
* insertions/deletions/changes.
|
||||
*
|
||||
* @param len
|
||||
* length of the String to be returned
|
||||
* @param skipPeriod
|
||||
* @param skipLength
|
||||
* @return the sequence of characters as String
|
||||
*/
|
||||
public static String generateSequence(int len, int skipPeriod,
|
||||
int skipLength) {
|
||||
StringBuilder text = new StringBuilder(len);
|
||||
int skipStart = skipPeriod - skipLength;
|
||||
int skippedChars = 0;
|
||||
for (int i = 0; i - skippedChars < len; ++i) {
|
||||
if (skipPeriod == 0 || i % skipPeriod < skipStart) {
|
||||
text.append((char) (32 + i % 95));
|
||||
} else {
|
||||
skippedChars++;
|
||||
}
|
||||
}
|
||||
return text.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,196 @@
|
|||
/*
|
||||
* Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com>
|
||||
* and other copyright owners as documented in the project's IP log.
|
||||
*
|
||||
* This program and the accompanying materials are made available
|
||||
* under the terms of the Eclipse Distribution License v1.0 which
|
||||
* accompanies this distribution, is reproduced below, and is
|
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
*
|
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the
|
||||
* names of its contributors may be used to endorse or promote
|
||||
* products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.eclipse.jgit.diff;
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
import java.text.NumberFormat;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.eclipse.jgit.util.CPUTimeStopWatch;
|
||||
|
||||
/**
|
||||
* Test cases for the performance of the diff implementation. The tests test
|
||||
* that the performance of the MyersDiff algorithm is really O(N*D). Means the
|
||||
* time for computing the diff between a and b should depend on the product of
|
||||
* a.length+b.length and the number of found differences. The tests compute
|
||||
* diffs between chunks of different length, measure the needed time and check
|
||||
* that time/(N*D) does not differ more than a certain factor (currently 10)
|
||||
*/
|
||||
public class MyersDiffPerformanceTest extends TestCase {
|
||||
private static final long longTaskBoundary = 5000000000L;
|
||||
|
||||
private static final int minCPUTimerTicks = 10;
|
||||
|
||||
private static final int maxFactor = 15;
|
||||
|
||||
private CPUTimeStopWatch stopwatch=CPUTimeStopWatch.createInstance();
|
||||
|
||||
public class PerfData {
|
||||
private NumberFormat fmt = new DecimalFormat("#.##E0");
|
||||
|
||||
public long runningTime;
|
||||
|
||||
public long D;
|
||||
|
||||
public long N;
|
||||
|
||||
private double p1 = -1;
|
||||
|
||||
private double p2 = -1;
|
||||
|
||||
public double perf1() {
|
||||
if (p1 < 0)
|
||||
p1 = runningTime / ((double) N * D);
|
||||
return p1;
|
||||
}
|
||||
|
||||
public double perf2() {
|
||||
if (p2 < 0)
|
||||
p2 = runningTime / ((double) N * D * D);
|
||||
return p2;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return ("diffing " + N / 2 + " bytes took " + runningTime
|
||||
+ " ns. N=" + N + ", D=" + D + ", time/(N*D):"
|
||||
+ fmt.format(perf1()) + ", time/(N*D^2):" + fmt
|
||||
.format(perf2()));
|
||||
}
|
||||
}
|
||||
|
||||
public static Comparator<PerfData> getComparator(final int whichPerf) {
|
||||
return new Comparator<PerfData>() {
|
||||
public int compare(PerfData o1, PerfData o2) {
|
||||
double p1 = (whichPerf == 1) ? o1.perf1() : o1.perf2();
|
||||
double p2 = (whichPerf == 1) ? o2.perf1() : o2.perf2();
|
||||
return (p1 < p2) ? -1 : (p1 > p2) ? 1 : 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public void test() {
|
||||
if (stopwatch!=null) {
|
||||
List<PerfData> perfData = new LinkedList<PerfData>();
|
||||
perfData.add(test(10000));
|
||||
perfData.add(test(20000));
|
||||
perfData.add(test(50000));
|
||||
perfData.add(test(80000));
|
||||
perfData.add(test(99999));
|
||||
perfData.add(test(999999));
|
||||
|
||||
Comparator<PerfData> c = getComparator(1);
|
||||
double factor = Collections.max(perfData, c).perf1()
|
||||
/ Collections.min(perfData, c).perf1();
|
||||
assertTrue(
|
||||
"minimun and maximum of performance-index t/(N*D) differed too much. Measured factor of "
|
||||
+ factor
|
||||
+ " (maxFactor="
|
||||
+ maxFactor
|
||||
+ "). Perfdata=<" + perfData.toString() + ">",
|
||||
factor < maxFactor);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests the performance of MyersDiff for texts which are similar (not
|
||||
* random data). The CPU time is measured and returned. Because of bad
|
||||
* accuracy of CPU time information the diffs are repeated. During each
|
||||
* repetition the interim CPU time is checked. The diff operation is
|
||||
* repeated until we have seen the CPU time clock changed its value at least
|
||||
* {@link #minCPUTimerTicks} times.
|
||||
*
|
||||
* @param characters
|
||||
* the size of the diffed character sequences.
|
||||
* @return performance data
|
||||
*/
|
||||
private PerfData test(int characters) {
|
||||
PerfData ret = new PerfData();
|
||||
String a = DiffTestDataGenerator.generateSequence(characters, 971, 3);
|
||||
String b = DiffTestDataGenerator.generateSequence(characters, 1621, 5);
|
||||
CharArray ac = new CharArray(a);
|
||||
CharArray bc = new CharArray(b);
|
||||
MyersDiff myersDiff = null;
|
||||
int cpuTimeChanges = 0;
|
||||
long lastReadout = 0;
|
||||
long interimTime = 0;
|
||||
int repetitions = 0;
|
||||
stopwatch.start();
|
||||
while (cpuTimeChanges < minCPUTimerTicks && interimTime < longTaskBoundary) {
|
||||
myersDiff = new MyersDiff(ac, bc);
|
||||
repetitions++;
|
||||
interimTime = stopwatch.readout();
|
||||
if (interimTime != lastReadout) {
|
||||
cpuTimeChanges++;
|
||||
lastReadout = interimTime;
|
||||
}
|
||||
}
|
||||
ret.runningTime = stopwatch.stop() / repetitions;
|
||||
ret.N = (ac.size() + bc.size());
|
||||
ret.D = myersDiff.getEdits().size();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
private static class CharArray implements Sequence {
|
||||
private final char[] array;
|
||||
|
||||
public CharArray(String s) {
|
||||
array = s.toCharArray();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return array.length;
|
||||
}
|
||||
|
||||
public boolean equals(int i, Sequence other, int j) {
|
||||
CharArray o = (CharArray) other;
|
||||
return array[i] == o.array[j];
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com>
|
||||
* and other copyright owners as documented in the project's IP log.
|
||||
*
|
||||
* This program and the accompanying materials are made available
|
||||
* under the terms of the Eclipse Distribution License v1.0 which
|
||||
* accompanies this distribution, is reproduced below, and is
|
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
*
|
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the
|
||||
* names of its contributors may be used to endorse or promote
|
||||
* products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.eclipse.jgit.util;
|
||||
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.ThreadMXBean;
|
||||
|
||||
/**
|
||||
* A simple stopwatch which measures elapsed CPU time of the current thread. CPU
|
||||
* time is the time spent on executing your own code plus the time spent on
|
||||
* executing operating system calls triggered by your application.
|
||||
* <p>
|
||||
* This stopwatch needs a VM which supports getting CPU Time information for the
|
||||
* current thread. The static method createInstance() will take care to return
|
||||
* only a new instance of this class if the VM is capable of returning CPU time.
|
||||
*/
|
||||
public class CPUTimeStopWatch {
|
||||
private long start;
|
||||
|
||||
private static ThreadMXBean mxBean=ManagementFactory.getThreadMXBean();
|
||||
|
||||
/**
|
||||
* use this method instead of the constructor to be sure that the underlying
|
||||
* VM provides all features needed by this class.
|
||||
*
|
||||
* @return a new instance of {@link #CPUTimeStopWatch()} or
|
||||
* <code>null</code> if the VM does not support getting CPU time
|
||||
* information
|
||||
*/
|
||||
public static CPUTimeStopWatch createInstance() {
|
||||
return mxBean.isCurrentThreadCpuTimeSupported() ? new CPUTimeStopWatch()
|
||||
: null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts the stopwatch. If the stopwatch is already started this will
|
||||
* restart the stopwatch.
|
||||
*/
|
||||
public void start() {
|
||||
start = mxBean.getCurrentThreadCpuTime();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops the stopwatch and return the elapsed CPU time in nanoseconds.
|
||||
* Should be called only on started stopwatches.
|
||||
*
|
||||
* @return the elapsed CPU time in nanoseconds. When called on non-started
|
||||
* stopwatches (either because {@link #start()} was never called or
|
||||
* {@link #stop()} was called after the last call to
|
||||
* {@link #start()}) this method will return 0.
|
||||
*/
|
||||
public long stop() {
|
||||
long cpuTime = readout();
|
||||
start = 0;
|
||||
return cpuTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the elapsed CPU time in nanoseconds. In contrast to
|
||||
* {@link #stop()} the stopwatch will continue to run after this call.
|
||||
*
|
||||
* @return the elapsed CPU time in nanoseconds. When called on non-started
|
||||
* stopwatches (either because {@link #start()} was never called or
|
||||
* {@link #stop()} was called after the last call to
|
||||
* {@link #start()}) this method will return 0.
|
||||
*/
|
||||
public long readout() {
|
||||
return (start == 0) ? 0 : mxBean.getCurrentThreadCpuTime() - start;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue