diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java
index da21c483b..27cf9252e 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java
@@ -91,10 +91,11 @@ final class PatienceDiffIndex {
/** 1 past the last valid entry in {@link #pCommon}. */
private final int pEnd;
- /** Keyed by {@code cmp.hash() & tableMask} to yield an entry offset. */
+ /** Keyed by {@link #hash(HashedSequence, int)} to get an entry offset. */
private final int[] table;
- private final int tableMask;
+ /** Number of low bits to discard from a key to index {@link #table}. */
+ private final int keyShift;
// To save memory the buckets for hash chains are stored in correlated
// arrays. This permits us to get 3 values per entry, without paying
@@ -158,8 +159,9 @@ final class PatienceDiffIndex {
this.pEnd = pCnt;
final int sz = region.getLengthB();
- table = new int[tableSize(sz)];
- tableMask = table.length - 1;
+ final int tableBits = tableBits(sz);
+ table = new int[1 << tableBits];
+ keyShift = 32 - tableBits;
// As we insert elements we preincrement so that 0 is never a
// valid entry. Therefore we have to allocate one extra space.
@@ -187,7 +189,7 @@ private void scanB() {
final int end = region.endB;
int pIdx = pBegin;
SCAN: while (ptr < end) {
- final int tIdx = cmp.hash(b, ptr) & tableMask;
+ final int tIdx = hash(b, ptr);
if (pIdx < pEnd) {
final long priorRec = pCommon[pIdx];
@@ -244,7 +246,7 @@ private void scanA() {
final int end = region.endA;
int pLast = pBegin - 1;
SCAN: while (ptr < end) {
- final int tIdx = cmp.hash(a, ptr) & tableMask;
+ final int tIdx = hash(a, ptr);
for (int eIdx = table[tIdx]; eIdx != 0; eIdx = next[eIdx]) {
final long rec = ptrs[eIdx];
@@ -391,6 +393,10 @@ Edit findLongestCommonSequence() {
return lcs;
}
+ private int hash(HashedSequence s, int idx) {
+ return (cmp.hash(s, idx) * 0x9e370001 /* mix bits */) >>> keyShift;
+ }
+
private static boolean isDuplicate(long rec) {
return (((int) rec) & DUPLICATE_MASK) != 0;
}
@@ -407,11 +413,12 @@ private static int bOf(long rec) {
return (int) (rec >>> B_SHIFT);
}
- private static int tableSize(final int worstCaseBlockCnt) {
- int shift = 32 - Integer.numberOfLeadingZeros(worstCaseBlockCnt);
- int sz = 1 << (shift - 1);
- if (sz < worstCaseBlockCnt)
- sz <<= 1;
- return sz;
+ private static int tableBits(final int sz) {
+ int bits = 31 - Integer.numberOfLeadingZeros(sz);
+ if (bits == 0)
+ bits = 1;
+ if (1 << bits < sz)
+ bits++;
+ return bits;
}
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java
index a1e2e6cec..f9cf376ab 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java
@@ -78,7 +78,7 @@ public boolean equals(RawText a, int ai, RawText b, int bi) {
protected int hashRegion(final byte[] raw, int ptr, final int end) {
int hash = 5381;
for (; ptr < end; ptr++)
- hash = (hash << 5) ^ (raw[ptr] & 0xff);
+ hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
return hash;
}
};
@@ -128,7 +128,7 @@ protected int hashRegion(byte[] raw, int ptr, int end) {
for (; ptr < end; ptr++) {
byte c = raw[ptr];
if (!isWhitespace(c))
- hash = (hash << 5) ^ (c & 0xff);
+ hash = ((hash << 5) + hash) + (c & 0xff);
}
return hash;
}
@@ -163,9 +163,8 @@ public boolean equals(RawText a, int ai, RawText b, int bi) {
protected int hashRegion(final byte[] raw, int ptr, int end) {
int hash = 5381;
ptr = trimLeadingWhitespace(raw, ptr, end);
- for (; ptr < end; ptr++) {
- hash = (hash << 5) ^ (raw[ptr] & 0xff);
- }
+ for (; ptr < end; ptr++)
+ hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
return hash;
}
};
@@ -199,9 +198,8 @@ public boolean equals(RawText a, int ai, RawText b, int bi) {
protected int hashRegion(final byte[] raw, int ptr, int end) {
int hash = 5381;
end = trimTrailingWhitespace(raw, ptr, end);
- for (; ptr < end; ptr++) {
- hash = (hash << 5) ^ (raw[ptr] & 0xff);
- }
+ for (; ptr < end; ptr++)
+ hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
return hash;
}
};
@@ -247,7 +245,7 @@ protected int hashRegion(final byte[] raw, int ptr, int end) {
end = trimTrailingWhitespace(raw, ptr, end);
while (ptr < end) {
byte c = raw[ptr];
- hash = (hash << 5) ^ (c & 0xff);
+ hash = ((hash << 5) + hash) + (c & 0xff);
if (isWhitespace(c))
ptr = trimLeadingWhitespace(raw, ptr, end);
else
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
index 39bcebb4c..6627268e4 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
@@ -68,20 +68,13 @@ class SimilarityIndex {
/** The {@link #idHash} table stops growing at {@code 1 << MAX_HASH_BITS}. */
private static final int MAX_HASH_BITS = 17;
- /** The {@link #idHash} table will not grow bigger than this, ever. */
- private static final int MAX_HASH_SIZE = 1 << MAX_HASH_BITS;
-
- /** Prime just before {@link #MAX_HASH_SIZE}. */
- private static final int P = 131071;
-
/**
* Shift to apply before storing a key.
*
* Within the 64 bit table record space, we leave the highest bit unset so - * all values are positive, and we need {@link #MAX_HASH_BITS} bits for the - * keys. The lower 32 bits are used to count bytes impacted. + * all values are positive. The lower 32 bits to count bytes. */ - private static final int KEY_SHIFT = 64 - 1 - MAX_HASH_BITS; + private static final int KEY_SHIFT = 32; /** Total size of the file we hashed into the structure. */ private long fileSize; @@ -100,8 +93,12 @@ class SimilarityIndex { */ private long[] idHash; + /** {@code idHash.length == 1 << idHashBits}. */ + private int idHashBits; + SimilarityIndex() { - idHash = new long[256]; + idHashBits = 8; + idHash = new long[1 << idHashBits]; } long getFileSize() { @@ -138,7 +135,7 @@ void hash(byte[] raw, int ptr, final int end) { int c = raw[ptr++] & 0xff; if (c == '\n') break; - hash = (hash << 5) ^ c; + hash = (hash << 5) + hash + c; } while (ptr < end && ptr - start < 64); add(hash, ptr - start); } @@ -166,7 +163,7 @@ void hash(InputStream in, long remaining) throws IOException { int c = buf[ptr++] & 0xff; if (c == '\n') break; - hash = (hash << 5) ^ c; + hash = (hash << 5) + hash + c; } while (n < 64 && n < remaining); add(hash, n); remaining -= n; @@ -272,7 +269,8 @@ private int packedIndex(int idx) { } void add(int key, int cnt) { - key = hash(key); + key = (key * 0x9e370001) >>> 1; // Mix bits and ensure not negative. + int j = slot(key); for (;;) { long v = idHash[j]; @@ -298,28 +296,24 @@ void add(int key, int cnt) { } } - private static int hash(int key) { - // Make the key fit into our table. Since we have a maximum size - // that we cap the table at, all keys get squashed before going - // into the table. This prevents overflow. - // - return (key >>> 1) % P; - } - private int slot(int key) { - return key % idHash.length; + // We use 31 - idHashBits because the upper bit was already forced + // to be 0 and we want the remaining high bits to be used as the + // table slot. + // + return key >>> (31 - idHashBits); } private boolean shouldGrow() { - int n = idHash.length; - return n < MAX_HASH_SIZE && n <= idSize * 2; + return idHashBits < MAX_HASH_BITS && idHash.length <= idSize * 2; } private void grow() { long[] oldHash = idHash; int oldSize = idHash.length; - idHash = new long[2 * oldSize]; + idHashBits++; + idHash = new long[1 << idHashBits]; for (int i = 0; i < oldSize; i++) { long v = oldHash[i]; if (v != 0) {