From 982f5d1bf184a2edab726f3c77f09a1157059e35 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Fri, 24 Feb 2017 14:57:20 -0800 Subject: [PATCH] Pure Java SHA-1 This implementation is derived straight from the description written in RFC 3174. On Mac OS X with Java 1.8.0_91 it offers similar throughput as MessageDigest SHA-1: system 239.75 MiB/s system 244.71 MiB/s system 245.00 MiB/s system 244.92 MiB/s sha1 234.08 MiB/s sha1 244.50 MiB/s sha1 242.99 MiB/s sha1 241.73 MiB/s This is the fastest implementation I could come up with. Common SHA-1 implementation tricks such as unrolling loops creates a method too large for the JIT to effectively optimize, resulting in lower overall hashing throughput. Using a preprocessor to perform the register renaming of A-E also didn't help, as again the method was too large for the JIT to effectively optimize. Fortunately the fastest version is a naive, straight-forward implementation very close to the description in RFC 3174. Change-Id: I228b05c4a294ca2ad51386cf0e47978c68e1aa42 --- org.eclipse.jgit.test/META-INF/MANIFEST.MF | 1 + .../org/eclipse/jgit/util/sha1/SHA1Test.java | 112 ++++++++ org.eclipse.jgit/META-INF/MANIFEST.MF | 1 + .../src/org/eclipse/jgit/util/sha1/SHA1.java | 262 ++++++++++++++++++ 4 files changed, 376 insertions(+) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/util/sha1/SHA1Test.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/util/sha1/SHA1.java diff --git a/org.eclipse.jgit.test/META-INF/MANIFEST.MF b/org.eclipse.jgit.test/META-INF/MANIFEST.MF index a8a70df69..fca6eec3f 100644 --- a/org.eclipse.jgit.test/META-INF/MANIFEST.MF +++ b/org.eclipse.jgit.test/META-INF/MANIFEST.MF @@ -49,6 +49,7 @@ Import-Package: com.googlecode.javaewah;version="[1.1.6,2.0.0)", org.eclipse.jgit.treewalk.filter;version="[4.7.0,4.8.0)", org.eclipse.jgit.util;version="[4.7.0,4.8.0)", org.eclipse.jgit.util.io;version="[4.7.0,4.8.0)", + org.eclipse.jgit.util.sha1;version="[4.7.0,4.8.0)", org.junit;version="[4.4.0,5.0.0)", org.junit.experimental.theories;version="[4.4.0,5.0.0)", org.junit.rules;version="[4.11.0,5.0.0)", diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/sha1/SHA1Test.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/sha1/SHA1Test.java new file mode 100644 index 000000000..c53080455 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/sha1/SHA1Test.java @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util.sha1; + +import static org.junit.Assert.assertEquals; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +import org.eclipse.jgit.lib.ObjectId; +import org.junit.Test; + +public class SHA1Test { + private static final String TEST1 = "abc"; + + private static final String TEST2a = "abcdbcdecdefdefgefghfghighijhi"; + private static final String TEST2b = "jkijkljklmklmnlmnomnopnopq"; + private static final String TEST2 = TEST2a + TEST2b; + + @Test + public void test0() throws NoSuchAlgorithmException { + ObjectId exp = ObjectId + .fromString("da39a3ee5e6b4b0d3255bfef95601890afd80709"); + + MessageDigest m = MessageDigest.getInstance("SHA-1"); + m.update(new byte[] {}); + ObjectId m1 = ObjectId.fromRaw(m.digest()); + + SHA1 s = SHA1.newInstance(); + s.update(new byte[] {}); + ObjectId s1 = ObjectId.fromRaw(s.digest()); + + assertEquals(m1, s1); + assertEquals(exp, s1); + } + + @Test + public void test1() throws NoSuchAlgorithmException { + ObjectId exp = ObjectId + .fromString("a9993e364706816aba3e25717850c26c9cd0d89d"); + + MessageDigest m = MessageDigest.getInstance("SHA-1"); + m.update(TEST1.getBytes(StandardCharsets.UTF_8)); + ObjectId m1 = ObjectId.fromRaw(m.digest()); + + SHA1 s = SHA1.newInstance(); + s.update(TEST1.getBytes(StandardCharsets.UTF_8)); + ObjectId s1 = ObjectId.fromRaw(s.digest()); + + assertEquals(m1, s1); + assertEquals(exp, s1); + } + + @Test + public void test2() throws NoSuchAlgorithmException { + ObjectId exp = ObjectId + .fromString("84983e441c3bd26ebaae4aa1f95129e5e54670f1"); + + MessageDigest m = MessageDigest.getInstance("SHA-1"); + m.update(TEST2.getBytes(StandardCharsets.UTF_8)); + ObjectId m1 = ObjectId.fromRaw(m.digest()); + + SHA1 s = SHA1.newInstance(); + s.update(TEST2.getBytes(StandardCharsets.UTF_8)); + ObjectId s1 = ObjectId.fromRaw(s.digest()); + + assertEquals(m1, s1); + assertEquals(exp, s1); + } +} diff --git a/org.eclipse.jgit/META-INF/MANIFEST.MF b/org.eclipse.jgit/META-INF/MANIFEST.MF index 8b7862820..33c331b4a 100644 --- a/org.eclipse.jgit/META-INF/MANIFEST.MF +++ b/org.eclipse.jgit/META-INF/MANIFEST.MF @@ -138,6 +138,7 @@ Export-Package: org.eclipse.jgit.annotations;version="4.7.0", org.eclipse.jgit.storage.file, org.ietf.jgss", org.eclipse.jgit.util.io;version="4.7.0", + org.eclipse.jgit.util.sha1;version="4.7.0", org.eclipse.jgit.util.time;version="4.7.0" Bundle-RequiredExecutionEnvironment: JavaSE-1.8 Import-Package: com.googlecode.javaewah;version="[1.1.6,2.0.0)", diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/sha1/SHA1.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/sha1/SHA1.java new file mode 100644 index 000000000..625029d3a --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/sha1/SHA1.java @@ -0,0 +1,262 @@ +/* + * Copyright (C) 2017, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util.sha1; + +import java.util.Arrays; + +import org.eclipse.jgit.util.NB; + +/** + * Pure Java implementation of SHA-1 from FIPS 180-1 / RFC 3174. + * + *

+ * See RFC 3174. + * + * @since 4.7 + */ +public class SHA1 { + /** @return a new context to compute a SHA-1 hash of data. */ + public static SHA1 newInstance() { + return new SHA1(); + } + + // Magic initialization constants defined by FIPS180. + private int h0 = 0x67452301; + private int h1 = 0xEFCDAB89; + private int h2 = 0x98BADCFE; + private int h3 = 0x10325476; + private int h4 = 0xC3D2E1F0; + private final int[] w = new int[80]; + + /** Buffer to accumulate partial blocks to 64 byte alignment. */ + private final byte[] buffer = new byte[64]; + + /** Total number of bytes in the message. */ + private long length; + + private SHA1() { + } + + /** + * Update the digest computation by adding a byte. + * + * @param b + */ + public void update(byte b) { + int bufferLen = (int) (length & 63); + length++; + buffer[bufferLen] = b; + if (bufferLen == 63) { + compress(buffer, 0); + } + } + + /** + * Update the digest computation by adding bytes to the message. + * + * @param in + * input array of bytes. + */ + public void update(byte[] in) { + update(in, 0, in.length); + } + + /** + * Update the digest computation by adding bytes to the message. + * + * @param in + * input array of bytes. + * @param p + * offset to start at from {@code in}. + * @param len + * number of bytes to hash. + */ + public void update(byte[] in, int p, int len) { + // SHA-1 compress can only process whole 64 byte blocks. + // Hold partial updates in buffer, whose length is the low bits. + int bufferLen = (int) (length & 63); + length += len; + + if (bufferLen > 0) { + int n = Math.min(64 - bufferLen, len); + System.arraycopy(in, p, buffer, bufferLen, n); + p += n; + len -= n; + if (bufferLen + n < 64) { + return; + } + compress(buffer, 0); + } + while (len >= 64) { + compress(in, p); + p += 64; + len -= 64; + } + if (len > 0) { + System.arraycopy(in, p, buffer, 0, len); + } + } + + private void compress(byte[] block, int p) { + // Method 1 from RFC 3174 section 6.1. + // Method 2 (circular queue of 16 words) is slower. + int a = h0, b = h1, c = h2, d = h3, e = h4; + + // Round 1: 0 <= t <= 15 comes from the input block. + for (int t = 0; t < 16; t++) { + int temp = NB.decodeInt32(block, p + (t << 2)); + w[t] = temp; + temp += ((a << 5) | (a >>> 27)) // S^5(A) + + (((c ^ d) & b) ^ d) // f: 0 <= t <= 19 + + e + 0x5A827999; + e = d; + d = c; + c = (b << 30) | (b >>> 2); // S^30(B) + b = a; + a = temp; + } + + // RFC 3174 6.1.b, extend state vector to 80 words. + for (int t = 16; t < 80; t++) { + int x = w[t - 3] ^ w[t - 8] ^ w[t - 14] ^ w[t - 16]; + w[t] = (x << 1) | (x >>> 31); // S^1(...) + } + + // Round 1: tail + for (int t = 16; t < 20; t++) { + int temp = ((a << 5) | (a >>> 27)) // S^5(A) + + (((c ^ d) & b) ^ d) // f: 0 <= t <= 19 + + e + w[t] + 0x5A827999; + e = d; + d = c; + c = (b << 30) | (b >>> 2); // S^30(B) + b = a; + a = temp; + } + + // Round 2 + for (int t = 20; t < 40; t++) { + int temp = ((a << 5) | (a >>> 27)) // S^5(A) + + (b ^ c ^ d) // f: 20 <= t <= 39 + + e + w[t] + 0x6ED9EBA1; + e = d; + d = c; + c = (b << 30) | (b >>> 2); // S^30(B) + b = a; + a = temp; + } + + // Round 3 + for (int t = 40; t < 60; t++) { + int temp = ((a << 5) | (a >>> 27)) // S^5(A) + + ((b & c) | (d & (b | c))) // f: 40 <= t <= 59 + + e + w[t] + 0x8F1BBCDC; + e = d; + d = c; + c = (b << 30) | (b >>> 2); // S^30(B) + b = a; + a = temp; + } + + // Round 4 + for (int t = 60; t < 80; t++) { + int temp = ((a << 5) | (a >>> 27)) // S^5(A) + + (b ^ c ^ d) // f: 60 <= t <= 79 + + e + w[t] + 0xCA62C1D6; + e = d; + d = c; + c = (b << 30) | (b >>> 2); // S^30(B) + b = a; + a = temp; + } + + h0 += a; + h1 += b; + h2 += c; + h3 += d; + h4 += e; + } + + private void finish() { + int bufferLen = (int) (length & 63); + if (bufferLen > 55) { + // Last block is too small; pad, compress, pad another block. + buffer[bufferLen++] = (byte) 0x80; + Arrays.fill(buffer, bufferLen, 64, (byte) 0); + compress(buffer, 0); + Arrays.fill(buffer, 0, 56, (byte) 0); + } else { + // Last block can hold padding and length. + buffer[bufferLen++] = (byte) 0x80; + Arrays.fill(buffer, bufferLen, 56, (byte) 0); + } + + // SHA-1 appends the length of the message in bits after the + // padding block (above). Here length is in bytes. Multiply by + // 8 by shifting by 3 as part of storing the 64 bit byte length + // into the two words expected in the trailer. + NB.encodeInt32(buffer, 56, (int) (length >>> (32 - 3))); + NB.encodeInt32(buffer, 60, (int) (length << 3)); + compress(buffer, 0); + } + + /** + * Finish the digest and return the resulting hash. + *

+ * Once {@code digest()} is called, this instance should be discarded. + * + * @return the bytes for the resulting hash. + */ + public byte[] digest() { + finish(); + + byte[] b = new byte[20]; + NB.encodeInt32(b, 0, h0); + NB.encodeInt32(b, 4, h1); + NB.encodeInt32(b, 8, h2); + NB.encodeInt32(b, 12, h3); + NB.encodeInt32(b, 16, h4); + return b; + } +}