diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java index da21c483b..27cf9252e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java @@ -91,10 +91,11 @@ final class PatienceDiffIndex { /** 1 past the last valid entry in {@link #pCommon}. */ private final int pEnd; - /** Keyed by {@code cmp.hash() & tableMask} to yield an entry offset. */ + /** Keyed by {@link #hash(HashedSequence, int)} to get an entry offset. */ private final int[] table; - private final int tableMask; + /** Number of low bits to discard from a key to index {@link #table}. */ + private final int keyShift; // To save memory the buckets for hash chains are stored in correlated // arrays. This permits us to get 3 values per entry, without paying @@ -158,8 +159,9 @@ final class PatienceDiffIndex { this.pEnd = pCnt; final int sz = region.getLengthB(); - table = new int[tableSize(sz)]; - tableMask = table.length - 1; + final int tableBits = tableBits(sz); + table = new int[1 << tableBits]; + keyShift = 32 - tableBits; // As we insert elements we preincrement so that 0 is never a // valid entry. Therefore we have to allocate one extra space. @@ -187,7 +189,7 @@ private void scanB() { final int end = region.endB; int pIdx = pBegin; SCAN: while (ptr < end) { - final int tIdx = cmp.hash(b, ptr) & tableMask; + final int tIdx = hash(b, ptr); if (pIdx < pEnd) { final long priorRec = pCommon[pIdx]; @@ -244,7 +246,7 @@ private void scanA() { final int end = region.endA; int pLast = pBegin - 1; SCAN: while (ptr < end) { - final int tIdx = cmp.hash(a, ptr) & tableMask; + final int tIdx = hash(a, ptr); for (int eIdx = table[tIdx]; eIdx != 0; eIdx = next[eIdx]) { final long rec = ptrs[eIdx]; @@ -391,6 +393,10 @@ Edit findLongestCommonSequence() { return lcs; } + private int hash(HashedSequence s, int idx) { + return (cmp.hash(s, idx) * 0x9e370001 /* mix bits */) >>> keyShift; + } + private static boolean isDuplicate(long rec) { return (((int) rec) & DUPLICATE_MASK) != 0; } @@ -407,11 +413,12 @@ private static int bOf(long rec) { return (int) (rec >>> B_SHIFT); } - private static int tableSize(final int worstCaseBlockCnt) { - int shift = 32 - Integer.numberOfLeadingZeros(worstCaseBlockCnt); - int sz = 1 << (shift - 1); - if (sz < worstCaseBlockCnt) - sz <<= 1; - return sz; + private static int tableBits(final int sz) { + int bits = 31 - Integer.numberOfLeadingZeros(sz); + if (bits == 0) + bits = 1; + if (1 << bits < sz) + bits++; + return bits; } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java index a1e2e6cec..f9cf376ab 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java @@ -78,7 +78,7 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { protected int hashRegion(final byte[] raw, int ptr, final int end) { int hash = 5381; for (; ptr < end; ptr++) - hash = (hash << 5) ^ (raw[ptr] & 0xff); + hash = ((hash << 5) + hash) + (raw[ptr] & 0xff); return hash; } }; @@ -128,7 +128,7 @@ protected int hashRegion(byte[] raw, int ptr, int end) { for (; ptr < end; ptr++) { byte c = raw[ptr]; if (!isWhitespace(c)) - hash = (hash << 5) ^ (c & 0xff); + hash = ((hash << 5) + hash) + (c & 0xff); } return hash; } @@ -163,9 +163,8 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { protected int hashRegion(final byte[] raw, int ptr, int end) { int hash = 5381; ptr = trimLeadingWhitespace(raw, ptr, end); - for (; ptr < end; ptr++) { - hash = (hash << 5) ^ (raw[ptr] & 0xff); - } + for (; ptr < end; ptr++) + hash = ((hash << 5) + hash) + (raw[ptr] & 0xff); return hash; } }; @@ -199,9 +198,8 @@ public boolean equals(RawText a, int ai, RawText b, int bi) { protected int hashRegion(final byte[] raw, int ptr, int end) { int hash = 5381; end = trimTrailingWhitespace(raw, ptr, end); - for (; ptr < end; ptr++) { - hash = (hash << 5) ^ (raw[ptr] & 0xff); - } + for (; ptr < end; ptr++) + hash = ((hash << 5) + hash) + (raw[ptr] & 0xff); return hash; } }; @@ -247,7 +245,7 @@ protected int hashRegion(final byte[] raw, int ptr, int end) { end = trimTrailingWhitespace(raw, ptr, end); while (ptr < end) { byte c = raw[ptr]; - hash = (hash << 5) ^ (c & 0xff); + hash = ((hash << 5) + hash) + (c & 0xff); if (isWhitespace(c)) ptr = trimLeadingWhitespace(raw, ptr, end); else diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java index 39bcebb4c..6627268e4 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java @@ -68,20 +68,13 @@ class SimilarityIndex { /** The {@link #idHash} table stops growing at {@code 1 << MAX_HASH_BITS}. */ private static final int MAX_HASH_BITS = 17; - /** The {@link #idHash} table will not grow bigger than this, ever. */ - private static final int MAX_HASH_SIZE = 1 << MAX_HASH_BITS; - - /** Prime just before {@link #MAX_HASH_SIZE}. */ - private static final int P = 131071; - /** * Shift to apply before storing a key. *

* Within the 64 bit table record space, we leave the highest bit unset so - * all values are positive, and we need {@link #MAX_HASH_BITS} bits for the - * keys. The lower 32 bits are used to count bytes impacted. + * all values are positive. The lower 32 bits to count bytes. */ - private static final int KEY_SHIFT = 64 - 1 - MAX_HASH_BITS; + private static final int KEY_SHIFT = 32; /** Total size of the file we hashed into the structure. */ private long fileSize; @@ -100,8 +93,12 @@ class SimilarityIndex { */ private long[] idHash; + /** {@code idHash.length == 1 << idHashBits}. */ + private int idHashBits; + SimilarityIndex() { - idHash = new long[256]; + idHashBits = 8; + idHash = new long[1 << idHashBits]; } long getFileSize() { @@ -138,7 +135,7 @@ void hash(byte[] raw, int ptr, final int end) { int c = raw[ptr++] & 0xff; if (c == '\n') break; - hash = (hash << 5) ^ c; + hash = (hash << 5) + hash + c; } while (ptr < end && ptr - start < 64); add(hash, ptr - start); } @@ -166,7 +163,7 @@ void hash(InputStream in, long remaining) throws IOException { int c = buf[ptr++] & 0xff; if (c == '\n') break; - hash = (hash << 5) ^ c; + hash = (hash << 5) + hash + c; } while (n < 64 && n < remaining); add(hash, n); remaining -= n; @@ -272,7 +269,8 @@ private int packedIndex(int idx) { } void add(int key, int cnt) { - key = hash(key); + key = (key * 0x9e370001) >>> 1; // Mix bits and ensure not negative. + int j = slot(key); for (;;) { long v = idHash[j]; @@ -298,28 +296,24 @@ void add(int key, int cnt) { } } - private static int hash(int key) { - // Make the key fit into our table. Since we have a maximum size - // that we cap the table at, all keys get squashed before going - // into the table. This prevents overflow. - // - return (key >>> 1) % P; - } - private int slot(int key) { - return key % idHash.length; + // We use 31 - idHashBits because the upper bit was already forced + // to be 0 and we want the remaining high bits to be used as the + // table slot. + // + return key >>> (31 - idHashBits); } private boolean shouldGrow() { - int n = idHash.length; - return n < MAX_HASH_SIZE && n <= idSize * 2; + return idHashBits < MAX_HASH_BITS && idHash.length <= idSize * 2; } private void grow() { long[] oldHash = idHash; int oldSize = idHash.length; - idHash = new long[2 * oldSize]; + idHashBits++; + idHash = new long[1 << idHashBits]; for (int i = 0; i < oldSize; i++) { long v = oldHash[i]; if (v != 0) {