diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/HistogramDiffTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/HistogramDiffTest.java new file mode 100644 index 000000000..07ca80b5f --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/HistogramDiffTest.java @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import org.eclipse.jgit.diff.DiffPerformanceTest.CharArray; +import org.eclipse.jgit.diff.DiffPerformanceTest.CharCmp; + +public class HistogramDiffTest extends AbstractDiffTestCase { + @Override + protected HistogramDiff algorithm() { + HistogramDiff hd = new HistogramDiff(); + hd.setFallbackAlgorithm(null); + return hd; + } + + public void testEdit_NoUniqueMiddleSide_FlipBlocks() { + EditList r = diff(t("aRRSSz"), t("aSSRRz")); + assertEquals(2, r.size()); + assertEquals(new Edit(1, 3, 1, 1), r.get(0)); // DELETE "RR" + assertEquals(new Edit(5, 5, 3, 5), r.get(1)); // INSERT "RR + } + + public void testEdit_NoUniqueMiddleSide_Insert2() { + EditList r = diff(t("aRSz"), t("aRRSSz")); + assertEquals(1, r.size()); + assertEquals(new Edit(2, 2, 2, 4), r.get(0)); + } + + public void testEdit_NoUniqueMiddleSide_FlipAndExpand() { + EditList r = diff(t("aRSz"), t("aSSRRz")); + assertEquals(2, r.size()); + assertEquals(new Edit(1, 2, 1, 1), r.get(0)); // DELETE "R" + assertEquals(new Edit(3, 3, 2, 5), r.get(1)); // INSERT "SRR" + } + + public void testExceedsChainLenght_DuringScanOfA() { + HistogramDiff hd = new HistogramDiff(); + hd.setFallbackAlgorithm(null); + hd.setMaxChainLength(3); + + SequenceComparator cmp = new SequenceComparator() { + @Override + public boolean equals(RawText a, int ai, RawText b, int bi) { + return RawTextComparator.DEFAULT.equals(a, ai, b, bi); + } + + @Override + public int hash(RawText a, int ai) { + return 1; + } + }; + + EditList r = hd.diff(cmp, t("RabS"), t("QabT")); + assertEquals(1, r.size()); + assertEquals(new Edit(0, 4, 0, 4), r.get(0)); + } + + public void testExceedsChainLenght_DuringScanOfB() { + HistogramDiff hd = new HistogramDiff(); + hd.setFallbackAlgorithm(null); + hd.setMaxChainLength(1); + + EditList r = hd.diff(RawTextComparator.DEFAULT, t("RaaS"), t("QaaT")); + assertEquals(1, r.size()); + assertEquals(new Edit(0, 4, 0, 4), r.get(0)); + } + + public void testFallbackToMyersDiff() { + HistogramDiff hd = new HistogramDiff(); + hd.setMaxChainLength(64); + + String a = DiffTestDataGenerator.generateSequence(40000, 971, 3); + String b = DiffTestDataGenerator.generateSequence(40000, 1621, 5); + CharCmp cmp = new CharCmp(); + CharArray ac = new CharArray(a); + CharArray bc = new CharArray(b); + EditList r; + + // Without fallback our results are limited due to collisions. + hd.setFallbackAlgorithm(null); + r = hd.diff(cmp, ac, bc); + assertEquals(70, r.size()); + + // Results go up when we add a fallback for the high collision regions. + hd.setFallbackAlgorithm(MyersDiff.INSTANCE); + r = hd.diff(cmp, ac, bc); + assertEquals(73, r.size()); + + // But they still differ from Myers due to the way we did early steps. + EditList myersResult = MyersDiff.INSTANCE.diff(cmp, ac, bc); + assertFalse("Not same as Myers", myersResult.equals(r)); + } + + public void testPerformanceTestDeltaLength() { + HistogramDiff hd = new HistogramDiff(); + hd.setFallbackAlgorithm(null); + + String a = DiffTestDataGenerator.generateSequence(40000, 971, 3); + String b = DiffTestDataGenerator.generateSequence(40000, 1621, 5); + CharCmp cmp = new CharCmp(); + CharArray ac = new CharArray(a); + CharArray bc = new CharArray(b); + EditList r; + + hd.setMaxChainLength(64); + r = hd.diff(cmp, ac, bc); + assertEquals(70, r.size()); + + hd.setMaxChainLength(176); + r = hd.diff(cmp, ac, bc); + assertEquals(72, r.size()); + } +} diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties index dff68bad9..b6336283c 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties @@ -336,6 +336,7 @@ repositoryState_rebaseWithMerge=Rebase w/merge requiredHashFunctionNotAvailable=Required hash function {0} not available. resolvingDeltas=Resolving deltas searchForReuse=Finding sources +sequenceTooLargeForDiffAlgorithm=Sequence too large for difference algorithm. serviceNotPermitted={0} not permitted shortCompressedStreamAt=Short compressed stream at {0} shortReadOfBlock=Short read of block. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java index 2194d67e4..bb6ffe184 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java @@ -396,6 +396,7 @@ public static JGitText get() { /***/ public String requiredHashFunctionNotAvailable; /***/ public String resolvingDeltas; /***/ public String searchForReuse; + /***/ public String sequenceTooLargeForDiffAlgorithm; /***/ public String serviceNotPermitted; /***/ public String shortCompressedStreamAt; /***/ public String shortReadOfBlock; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/HistogramDiff.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HistogramDiff.java new file mode 100644 index 000000000..23c0cd12e --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HistogramDiff.java @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +/** + * An extended form of Bram Cohen's patience diff algorithm. + * + * This implementation was derived by using the 4 rules that are outlined in + * Bram Cohen's blog, + * and then was further extended to support low-occurrence common elements. + * + * The basic idea of the algorithm is to create a histogram of occurrences for + * each element of sequence A. Each element of sequence B is then considered in + * turn. If the element also exists in sequence A, and has a lower occurrence + * count, the positions are considered as a candidate for the longest common + * subsequence (LCS). After scanning of B is complete the LCS that has the + * lowest number of occurrences is chosen as a split point. The region is split + * around the LCS, and the algorithm is recursively applied to the sections + * before and after the LCS. + * + * By always selecting a LCS position with the lowest occurrence count, this + * algorithm behaves exactly like Bram Cohen's patience diff whenever there is a + * unique common element available between the two sequences. When no unique + * elements exist, the lowest occurrence element is chosen instead. This offers + * more readable diffs than simply falling back on the standard Myers' O(ND) + * algorithm would produce. + * + * To prevent the algorithm from having an O(N^2) running time, an upper limit + * on the number of unique elements in a histogram bucket is configured by + * {@link #setMaxChainLength(int)}. If sequence A has more than this many + * elements that hash into the same hash bucket, the algorithm passes the region + * to {@link #setFallbackAlgorithm(DiffAlgorithm)}. If no fallback algorithm is + * configured, the region is emitted as a replace edit. + * + * During scanning of sequence B, any element of A that occurs more than + * {@link #setMaxChainLength(int)} times is never considered for an LCS match + * position, even if it is common between the two sequences. This limits the + * number of locations in sequence A that must be considered to find the LCS, + * and helps maintain a lower running time bound. + * + * So long as {@link #setMaxChainLength(int)} is a small constant (such as 64), + * the algorithm runs in O(N * D) time, where N is the sum of the input lengths + * and D is the number of edits in the resulting EditList. If the supplied + * {@link SequenceComparator} has a good hash function, this implementation + * typically out-performs {@link MyersDiff}, even though its theoretical running + * time is the same. + * + * This implementation has an internal limitation that prevents it from handling + * sequences with more than 268,435,456 (2^28) elements. + */ +public class HistogramDiff extends DiffAlgorithm { + /** Algorithm to use when there are too many element occurrences. */ + private DiffAlgorithm fallback = MyersDiff.INSTANCE; + + /** + * Maximum number of positions to consider for a given element hash. + * + * All elements with the same hash are stored into a single chain. The chain + * size is capped to ensure search is linear time at O(len_A + len_B) rather + * than quadratic at O(len_A * len_B). + */ + private int maxChainLength = 64; + + /** + * Set the algorithm used when there are too many element occurrences. + * + * @param alg + * the secondary algorithm. If null the region will be denoted as + * a single REPLACE block. + */ + public void setFallbackAlgorithm(DiffAlgorithm alg) { + fallback = alg; + } + + /** + * Maximum number of positions to consider for a given element hash. + * + * All elements with the same hash are stored into a single chain. The chain + * size is capped to ensure search is linear time at O(len_A + len_B) rather + * than quadratic at O(len_A * len_B). + * + * @param maxLen + * new maximum length. + */ + public void setMaxChainLength(int maxLen) { + maxChainLength = maxLen; + } + + public EditList diffNonCommon( + SequenceComparator cmp, S a, S b) { + State s = new State(new HashedSequencePair(cmp, a, b)); + s.diffReplace(new Edit(0, s.a.size(), 0, s.b.size())); + return s.edits; + } + + private class State { + private final HashedSequenceComparator cmp; + + private final HashedSequence a; + + private final HashedSequence b; + + /** Result edits we have determined that must be made to convert a to b. */ + final EditList edits; + + State(HashedSequencePair p) { + this.cmp = p.getComparator(); + this.a = p.getA(); + this.b = p.getB(); + this.edits = new EditList(); + } + + void diffReplace(Edit r) { + Edit lcs = new HistogramDiffIndex(maxChainLength, cmp, a, b, r) + .findLongestCommonSequence(); + if (lcs != null) { + // If we were given an edit, we can prove a result here. + // + if (lcs.isEmpty()) { + // An empty edit indicates there is nothing in common. + // Replace the entire region. + // + edits.add(r); + } else { + diff(r.before(lcs)); + diff(r.after(lcs)); + } + + } else if (fallback != null) { + SubsequenceComparator> cs = subcmp(); + Subsequence> as = Subsequence.a(a, r); + Subsequence> bs = Subsequence.b(b, r); + + EditList res = fallback.diffNonCommon(cs, as, bs); + edits.addAll(Subsequence.toBase(res, as, bs)); + + } else { + edits.add(r); + } + } + + private void diff(Edit r) { + switch (r.getType()) { + case INSERT: + case DELETE: + edits.add(r); + break; + + case REPLACE: + diffReplace(r); + break; + + case EMPTY: + default: + throw new IllegalStateException(); + } + } + + private SubsequenceComparator> subcmp() { + return new SubsequenceComparator>(cmp); + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/HistogramDiffIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HistogramDiffIndex.java new file mode 100644 index 000000000..5284b3b79 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HistogramDiffIndex.java @@ -0,0 +1,319 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.diff; + +import org.eclipse.jgit.JGitText; + +/** + * Support {@link HistogramDiff} by computing occurrence counts of elements. + * + * Each element in the range being considered is put into a hash table, tracking + * the number of times that distinct element appears in the sequence. Once all + * elements have been inserted from sequence A, each element of sequence B is + * probed in the hash table and the longest common subsequence with the lowest + * occurrence count in A is used as the result. + * + * @param + * type of the base sequence. + */ +final class HistogramDiffIndex { + private static final int REC_NEXT_SHIFT = 28 + 8; + + private static final int REC_PTR_SHIFT = 8; + + private static final int REC_PTR_MASK = (1 << 28) - 1; + + private static final int REC_CNT_MASK = (1 << 8) - 1; + + private static final int MAX_PTR = REC_PTR_MASK; + + private static final int MAX_CNT = (1 << 8) - 1; + + private final int maxChainLength; + + private final HashedSequenceComparator cmp; + + private final HashedSequence a; + + private final HashedSequence b; + + private final Edit region; + + /** Keyed by {@link #hash(HashedSequence, int)} for {@link #recs} index. */ + private final int[] table; + + /** Number of low bits to discard from a key to index {@link #table}. */ + private final int keyShift; + + /** + * Describes a unique element in sequence A. + * + * The records in this table are actually 3-tuples of: + *
    + *
  • index of next record in this table that has same hash code
  • + *
  • index of first element in this occurrence chain
  • + *
  • occurrence count for this element (length of locs list)
  • + *
+ * + * The occurrence count is capped at {@link #MAX_CNT}, as the field is only + * a few bits wide. Elements that occur more frequently will have their + * count capped. + */ + private long[] recs; + + /** Number of elements in {@link #recs}; also is the unique element count. */ + private int recCnt; + + /** + * For {@code ptr}, {@code next[ptr - nextShift]} has subsequent index. + * + * For the sequence element {@code ptr}, the value stored at location + * {@code next[ptr - nextShift]} is the next occurrence of the exact same + * element in the sequence. + * + * Chains always run from the lowest index to the largest index. Therefore + * the array will store {@code next[1] = 2}, but never {@code next[2] = 1}. + * This allows a chain to terminate with {@code 0}, as {@code 0} would never + * be a valid next element. + * + * The array is sized to be {@code region.getLenghtA()} and element indexes + * are converted to array indexes by subtracting {@link #nextShift}, which + * is just a cached version of {@code region.beginA}. + */ + private int[] next; + + /** Value to subtract from element indexes to key {@link #next} array. */ + private int nextShift; + + private Edit lcs; + + private int cnt; + + private boolean hasCommon; + + HistogramDiffIndex(int maxChainLength, HashedSequenceComparator cmp, + HashedSequence a, HashedSequence b, Edit r) { + this.maxChainLength = maxChainLength; + this.cmp = cmp; + this.a = a; + this.b = b; + this.region = r; + + if (region.endA >= MAX_PTR) + throw new IllegalArgumentException( + JGitText.get().sequenceTooLargeForDiffAlgorithm); + + final int sz = r.getLengthA(); + final int tableBits = tableBits(sz); + table = new int[1 << tableBits]; + keyShift = 32 - tableBits; + nextShift = r.beginA; + + recs = new long[Math.max(4, sz >>> 3)]; + next = new int[sz]; + } + + Edit findLongestCommonSequence() { + if (!scanA()) + return null; + + lcs = new Edit(0, 0); + cnt = maxChainLength + 1; + + for (int bPtr = region.beginB; bPtr < region.endB;) + bPtr = tryLongestCommonSequence(bPtr); + + return hasCommon && maxChainLength < cnt ? null : lcs; + } + + private boolean scanA() { + // Scan the elements backwards, inserting them into the hash table + // as we go. Going in reverse places the earliest occurrence of any + // element at the start of the chain, so we consider earlier matches + // before later matches. + // + SCAN: for (int ptr = region.endA - 1; region.beginA <= ptr; ptr--) { + final int tIdx = hash(a, ptr); + + int chainLen = 0; + for (int rIdx = table[tIdx]; rIdx != 0;) { + final long rec = recs[rIdx]; + if (cmp.equals(a, recPtr(rec), a, ptr)) { + // ptr is identical to another element. Insert it onto + // the front of the existing element chain. + // + int newCnt = recCnt(rec) + 1; + if (MAX_CNT < newCnt) + newCnt = MAX_CNT; + recs[rIdx] = recCreate(recNext(rec), ptr, newCnt); + next[ptr - nextShift] = recPtr(rec); + continue SCAN; + } + + rIdx = recNext(rec); + chainLen++; + } + + if (chainLen == maxChainLength) + return false; + + // This is the first time we have ever seen this particular + // element in the sequence. Construct a new chain for it. + // + final int rIdx = ++recCnt; + if (rIdx == recs.length) { + int sz = Math.min(recs.length << 1, 1 + region.getLengthA()); + long[] n = new long[sz]; + System.arraycopy(recs, 0, n, 0, recs.length); + recs = n; + } + + recs[rIdx] = recCreate(table[tIdx], ptr, 1); + table[tIdx] = rIdx; + } + return true; + } + + private int tryLongestCommonSequence(final int bPtr) { + int bNext = bPtr + 1; + int rIdx = table[hash(b, bPtr)]; + for (long rec; rIdx != 0; rIdx = recNext(rec)) { + rec = recs[rIdx]; + + // If there are more occurrences in A, don't use this chain. + if (recCnt(rec) > cnt) { + if (!hasCommon) + hasCommon = cmp.equals(a, recPtr(rec), b, bPtr); + continue; + } + + int as = recPtr(rec); + if (!cmp.equals(a, as, b, bPtr)) + continue; + + hasCommon = true; + TRY_LOCATIONS: for (;;) { + int np = next[as - nextShift]; + int bs = bPtr; + int ae = as + 1; + int be = bs + 1; + + while (region.beginA < as && region.beginB < bs + && cmp.equals(a, as - 1, b, bs - 1)) { + as--; + bs--; + } + while (ae < region.endA && be < region.endB + && cmp.equals(a, ae, b, be)) { + ae++; + be++; + } + + if (bNext < be) + bNext = be; + if (lcs.getLengthA() < ae - as || recCnt(rec) < cnt) { + // If this region is the longest, or there are less + // occurrences of it in A, its now our LCS. + // + lcs.beginA = as; + lcs.beginB = bs; + lcs.endA = ae; + lcs.endB = be; + cnt = recCnt(rec); + } + + // Because we added elements in reverse order index 0 + // cannot possibly be the next position. Its the first + // element of the sequence and thus would have been the + // value of as at the start of the TRY_LOCATIONS loop. + // + if (np == 0) + break TRY_LOCATIONS; + + while (np < ae) { + // The next location to consider was actually within + // the LCS we examined above. Don't reconsider it. + // + np = next[np - nextShift]; + if (np == 0) + break TRY_LOCATIONS; + } + + as = np; + } + } + return bNext; + } + + private int hash(HashedSequence s, int idx) { + return (cmp.hash(s, idx) * 0x9e370001 /* mix bits */) >>> keyShift; + } + + private static long recCreate(int next, int ptr, int cnt) { + return ((long) next << REC_NEXT_SHIFT) // + | ((long) ptr << REC_PTR_SHIFT) // + | cnt; + } + + private static int recNext(long rec) { + return (int) (rec >>> REC_NEXT_SHIFT); + } + + private static int recPtr(long rec) { + return ((int) (rec >>> REC_PTR_SHIFT)) & REC_PTR_MASK; + } + + private static int recCnt(long rec) { + return ((int) rec) & REC_CNT_MASK; + } + + private static int tableBits(final int sz) { + int bits = 31 - Integer.numberOfLeadingZeros(sz); + if (bits == 0) + bits = 1; + if (1 << bits < sz) + bits++; + return bits; + } +}