commit 0e2f002a7b45acb5ed62365b9290b09912e5c709 (tree)
parent 10a99f8f64bf7cf336c990b90c72d81f5f767b4d
Author: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 13 Sep 2023 18:22:56 -0400
Merge pull request #17046 from tiehuis/improve-hash-tests
improve std/hash test coverage
Diffstat:
10 files changed, 202 insertions(+), 112 deletions(-)
diff --git a/lib/std/hash/adler.zig b/lib/std/hash/adler.zig
@@ -3,7 +3,7 @@
// https://tools.ietf.org/html/rfc1950#section-9
// https://github.com/madler/zlib/blob/master/adler32.c
-const std = @import("../std.zig");
+const std = @import("std");
const testing = std.testing;
pub const Adler32 = struct {
@@ -126,3 +126,9 @@ test "adler32 very long with variation" {
try testing.expectEqual(@as(u32, 0x5af38d6e), std.hash.Adler32.hash(long[0..]));
}
+
+const verify = @import("verify.zig");
+
+test "adler32 iterative" {
+ try verify.iterativeApi(Adler32);
+}
diff --git a/lib/std/hash/cityhash.zig b/lib/std/hash/cityhash.zig
@@ -342,64 +342,35 @@ pub const CityHash64 = struct {
}
};
-fn SMHasherTest(comptime hash_fn: anytype) u32 {
- const HashResult = @typeInfo(@TypeOf(hash_fn)).Fn.return_type.?;
-
- var key: [256]u8 = undefined;
- var hashes_bytes: [256 * @sizeOf(HashResult)]u8 = undefined;
-
- @memset(&key, 0);
- @memset(&hashes_bytes, 0);
-
- var i: u32 = 0;
- while (i < 256) : (i += 1) {
- key[i] = @as(u8, @intCast(i));
-
- var h: HashResult = hash_fn(key[0..i], 256 - i);
-
- // comptime can't really do reinterpret casting yet,
- // so we need to write the bytes manually.
- for (hashes_bytes[i * @sizeOf(HashResult) ..][0..@sizeOf(HashResult)]) |*byte| {
- byte.* = @as(u8, @truncate(h));
- h = h >> 8;
- }
- }
-
- return @as(u32, @truncate(hash_fn(&hashes_bytes, 0)));
-}
-
fn CityHash32hashIgnoreSeed(str: []const u8, seed: u32) u32 {
_ = seed;
return CityHash32.hash(str);
}
+const verify = @import("verify.zig");
+
test "cityhash32" {
const Test = struct {
- fn doTest() !void {
- // Note: SMHasher doesn't provide a 32bit version of the algorithm.
- // Note: The implementation was verified against the Google Abseil version.
- try std.testing.expectEqual(SMHasherTest(CityHash32hashIgnoreSeed), 0x68254F81);
- try std.testing.expectEqual(SMHasherTest(CityHash32hashIgnoreSeed), 0x68254F81);
+ fn do() !void {
+ // SMHasher doesn't provide a 32bit version of the algorithm.
+ // The implementation was verified against the Google Abseil version.
+ try std.testing.expectEqual(verify.smhasher(CityHash32hashIgnoreSeed), 0x68254F81);
}
};
- try Test.doTest();
- // TODO This is uncommented to prevent OOM on the CI server. Re-enable this test
- // case once we ship stage2.
- //@setEvalBranchQuota(50000);
- //comptime Test.doTest();
+ try Test.do();
+ @setEvalBranchQuota(75000);
+ try comptime Test.do();
}
test "cityhash64" {
const Test = struct {
- fn doTest() !void {
- // Note: This is not compliant with the SMHasher implementation of CityHash64!
- // Note: The implementation was verified against the Google Abseil version.
- try std.testing.expectEqual(SMHasherTest(CityHash64.hashWithSeed), 0x5FABC5C5);
+ fn do() !void {
+ // This is not compliant with the SMHasher implementation of CityHash64!
+ // The implementation was verified against the Google Abseil version.
+ try std.testing.expectEqual(verify.smhasher(CityHash64.hashWithSeed), 0x5FABC5C5);
}
};
- try Test.doTest();
- // TODO This is uncommented to prevent OOM on the CI server. Re-enable this test
- // case once we ship stage2.
- //@setEvalBranchQuota(50000);
- //comptime Test.doTest();
+ try Test.do();
+ @setEvalBranchQuota(75000);
+ try comptime Test.do();
}
diff --git a/lib/std/hash/crc.zig b/lib/std/hash/crc.zig
@@ -5,7 +5,7 @@
// - Crc32SmallWithPoly uses only 64 bytes of memory but is slower. Be aware that this is
// still moderately fast just slow relative to the slicing approach.
-const std = @import("../std.zig");
+const std = @import("std");
const builtin = @import("builtin");
const debug = std.debug;
const testing = std.testing;
@@ -194,6 +194,8 @@ pub fn Crc32WithPoly(comptime poly: Polynomial) type {
};
}
+const verify = @import("verify.zig");
+
test "crc32 ieee" {
const Crc32Ieee = Crc32WithPoly(.IEEE);
@@ -210,6 +212,10 @@ test "crc32 castagnoli" {
try testing.expect(Crc32Castagnoli.hash("abc") == 0x364b3fb7);
}
+test "crc32 iterative" {
+ try verify.iterativeApi(Crc32WithPoly(.IEEE));
+}
+
// half-byte lookup table implementation.
pub fn Crc32SmallWithPoly(comptime poly: Polynomial) type {
return struct {
@@ -258,6 +264,10 @@ pub fn Crc32SmallWithPoly(comptime poly: Polynomial) type {
};
}
+test "small crc32 iterative" {
+ try verify.iterativeApi(Crc32SmallWithPoly(.IEEE));
+}
+
test "small crc32 ieee" {
const Crc32Ieee = Crc32SmallWithPoly(.IEEE);
diff --git a/lib/std/hash/crc/catalog_test.zig b/lib/std/hash/crc/catalog_test.zig
@@ -1,6 +1,6 @@
//! This file is auto-generated by tools/update_crc_catalog.zig.
-const std = @import("../../std.zig");
+const std = @import("std");
const testing = std.testing;
const catalog = @import("catalog.zig");
diff --git a/lib/std/hash/fnv.zig b/lib/std/hash/fnv.zig
@@ -4,7 +4,7 @@
//
// https://tools.ietf.org/html/draft-eastlake-fnv-14
-const std = @import("../std.zig");
+const std = @import("std");
const testing = std.testing;
pub const Fnv1a_32 = Fnv1a(u32, 0x01000193, 0x811c9dc5);
@@ -40,19 +40,24 @@ fn Fnv1a(comptime T: type, comptime prime: T, comptime offset: T) type {
};
}
+const verify = @import("verify.zig");
+
test "fnv1a-32" {
try testing.expect(Fnv1a_32.hash("") == 0x811c9dc5);
try testing.expect(Fnv1a_32.hash("a") == 0xe40c292c);
try testing.expect(Fnv1a_32.hash("foobar") == 0xbf9cf968);
+ try verify.iterativeApi(Fnv1a_32);
}
test "fnv1a-64" {
try testing.expect(Fnv1a_64.hash("") == 0xcbf29ce484222325);
try testing.expect(Fnv1a_64.hash("a") == 0xaf63dc4c8601ec8c);
try testing.expect(Fnv1a_64.hash("foobar") == 0x85944171f73967e8);
+ try verify.iterativeApi(Fnv1a_64);
}
test "fnv1a-128" {
try testing.expect(Fnv1a_128.hash("") == 0x6c62272e07bb014262b821756295c58d);
try testing.expect(Fnv1a_128.hash("a") == 0xd228cb696f1a8caf78912b704e4a8964);
+ try verify.iterativeApi(Fnv1a_128);
}
diff --git a/lib/std/hash/murmur.zig b/lib/std/hash/murmur.zig
@@ -279,26 +279,9 @@ pub const Murmur3_32 = struct {
}
};
-fn SMHasherTest(comptime hash_fn: anytype, comptime hashbits: u32) u32 {
- const hashbytes = hashbits / 8;
- var key: [256]u8 = [1]u8{0} ** 256;
- var hashes: [hashbytes * 256]u8 = [1]u8{0} ** (hashbytes * 256);
-
- var i: u32 = 0;
- while (i < 256) : (i += 1) {
- key[i] = @as(u8, @truncate(i));
-
- var h = hash_fn(key[0..i], 256 - i);
- if (native_endian == .Big)
- h = @byteSwap(h);
- @memcpy(hashes[i * hashbytes ..][0..hashbytes], @as([*]u8, @ptrCast(&h)));
- }
-
- return @as(u32, @truncate(hash_fn(&hashes, 0)));
-}
+const verify = @import("verify.zig");
test "murmur2_32" {
- try testing.expectEqual(SMHasherTest(Murmur2_32.hashWithSeed, 32), 0x27864C1E);
var v0: u32 = 0x12345678;
var v1: u64 = 0x1234567812345678;
var v0le: u32 = v0;
@@ -311,8 +294,18 @@ test "murmur2_32" {
try testing.expectEqual(Murmur2_32.hash(@as([*]u8, @ptrCast(&v1le))[0..8]), Murmur2_32.hashUint64(v1));
}
+test "murmur2_32 smhasher" {
+ const Test = struct {
+ fn do() !void {
+ try testing.expectEqual(verify.smhasher(Murmur2_32.hashWithSeed), 0x27864C1E);
+ }
+ };
+ try Test.do();
+ @setEvalBranchQuota(30000);
+ try comptime Test.do();
+}
+
test "murmur2_64" {
- try std.testing.expectEqual(SMHasherTest(Murmur2_64.hashWithSeed, 64), 0x1F0D3804);
var v0: u32 = 0x12345678;
var v1: u64 = 0x1234567812345678;
var v0le: u32 = v0;
@@ -325,8 +318,18 @@ test "murmur2_64" {
try testing.expectEqual(Murmur2_64.hash(@as([*]u8, @ptrCast(&v1le))[0..8]), Murmur2_64.hashUint64(v1));
}
+test "mumur2_64 smhasher" {
+ const Test = struct {
+ fn do() !void {
+ try std.testing.expectEqual(verify.smhasher(Murmur2_64.hashWithSeed), 0x1F0D3804);
+ }
+ };
+ try Test.do();
+ @setEvalBranchQuota(30000);
+ try comptime Test.do();
+}
+
test "murmur3_32" {
- try std.testing.expectEqual(SMHasherTest(Murmur3_32.hashWithSeed, 32), 0xB0F57EE3);
var v0: u32 = 0x12345678;
var v1: u64 = 0x1234567812345678;
var v0le: u32 = v0;
@@ -338,3 +341,14 @@ test "murmur3_32" {
try testing.expectEqual(Murmur3_32.hash(@as([*]u8, @ptrCast(&v0le))[0..4]), Murmur3_32.hashUint32(v0));
try testing.expectEqual(Murmur3_32.hash(@as([*]u8, @ptrCast(&v1le))[0..8]), Murmur3_32.hashUint64(v1));
}
+
+test "mumur3_32 smhasher" {
+ const Test = struct {
+ fn do() !void {
+ try std.testing.expectEqual(verify.smhasher(Murmur3_32.hashWithSeed), 0xB0F57EE3);
+ }
+ };
+ try Test.do();
+ @setEvalBranchQuota(30000);
+ try comptime Test.do();
+}
diff --git a/lib/std/hash/verify.zig b/lib/std/hash/verify.zig
@@ -0,0 +1,62 @@
+const std = @import("std");
+
+fn hashMaybeSeed(comptime hash_fn: anytype, seed: anytype, buf: []const u8) @typeInfo(@TypeOf(hash_fn)).Fn.return_type.? {
+ const HashFn = @typeInfo(@TypeOf(hash_fn)).Fn;
+ if (HashFn.params.len > 1) {
+ if (@typeInfo(HashFn.params[0].type.?) == .Int) {
+ return hash_fn(@intCast(seed), buf);
+ } else {
+ return hash_fn(buf, @intCast(seed));
+ }
+ } else {
+ return hash_fn(buf);
+ }
+}
+
+fn initMaybeSeed(comptime Hash: anytype, seed: anytype) Hash {
+ const HashFn = @typeInfo(@TypeOf(Hash.init)).Fn;
+ if (HashFn.params.len == 1) {
+ return Hash.init(@intCast(seed));
+ } else {
+ return Hash.init();
+ }
+}
+
+// Returns a verification code, the same as used by SMHasher.
+//
+// Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255, using 256-N as seed.
+// First four-bytes of the hash, interpreted as little-endian is the verification code.
+pub fn smhasher(comptime hash_fn: anytype) u32 {
+ const HashFnTy = @typeInfo(@TypeOf(hash_fn)).Fn;
+ const HashResult = HashFnTy.return_type.?;
+ const hash_size = @sizeOf(HashResult);
+
+ var buf: [256]u8 = undefined;
+ var buf_all: [256 * hash_size]u8 = undefined;
+
+ for (0..256) |i| {
+ buf[i] = @intCast(i);
+ const h = hashMaybeSeed(hash_fn, 256 - i, buf[0..i]);
+ std.mem.writeIntLittle(HashResult, buf_all[i * hash_size ..][0..hash_size], h);
+ }
+
+ return @truncate(hashMaybeSeed(hash_fn, 0, buf_all[0..]));
+}
+
+pub fn iterativeApi(comptime Hash: anytype) !void {
+ // Sum(1..32) = 528
+ var buf: [528]u8 = [_]u8{0} ** 528;
+ var len: usize = 0;
+ const seed = 0;
+
+ var hasher = initMaybeSeed(Hash, seed);
+ for (1..32) |i| {
+ const r = hashMaybeSeed(Hash.hash, seed, buf[0 .. len + i]);
+ hasher.update(buf[len..][0..i]);
+ const f1 = hasher.final();
+ const f2 = hasher.final();
+ if (f1 != f2) return error.IterativeHashWasNotIdempotent;
+ if (f1 != r) return error.IterativeHashDidNotMatchDirect;
+ len += i;
+ }
+}
diff --git a/lib/std/hash/wyhash.zig b/lib/std/hash/wyhash.zig
@@ -66,7 +66,7 @@ pub const Wyhash = struct {
}
pub fn final(self: *Wyhash) u64 {
- var input = self.buf[0..self.buf_len];
+ var input: []const u8 = self.buf[0..self.buf_len];
var newSelf = self.shallowCopy(); // ensure idempotency
if (self.total_len <= 16) {
@@ -196,6 +196,7 @@ pub const Wyhash = struct {
}
};
+const verify = @import("verify.zig");
const expectEqual = std.testing.expectEqual;
const TestVector = struct {
@@ -229,51 +230,26 @@ test "test vectors at comptime" {
}
}
-test "test vectors streaming" {
- const step = 5;
-
- for (vectors) |e| {
- var wh = Wyhash.init(e.seed);
- var i: usize = 0;
- while (i < e.input.len) : (i += step) {
- const len = if (i + step > e.input.len) e.input.len - i else step;
- wh.update(e.input[i..][0..len]);
+test "smhasher" {
+ const Test = struct {
+ fn do() !void {
+ try expectEqual(verify.smhasher(Wyhash.hash), 0xBD5E840C);
}
- try expectEqual(e.expected, wh.final());
- }
-}
-
-test "test ensure idempotent final call" {
- const e: TestVector = .{ .seed = 6, .expected = 0xc39cab13b115aad3, .input = "12345678901234567890123456789012345678901234567890123456789012345678901234567890" };
- var wh = Wyhash.init(e.seed);
- wh.update(e.input);
-
- for (0..10) |_| {
- try expectEqual(e.expected, wh.final());
- }
+ };
+ try Test.do();
+ @setEvalBranchQuota(50000);
+ try comptime Test.do();
}
-test "iterative non-divisible update" {
- var buf: [8192]u8 = undefined;
- for (&buf, 0..) |*e, i| {
- e.* = @as(u8, @truncate(i));
- }
-
- const seed = 0x128dad08f;
-
- var end: usize = 32;
- while (end < buf.len) : (end += 32) {
- const non_iterative_hash = Wyhash.hash(seed, buf[0..end]);
-
- var wy = Wyhash.init(seed);
- var i: usize = 0;
- while (i < end) : (i += 33) {
- wy.update(buf[i..@min(i + 33, end)]);
+test "iterative api" {
+ const Test = struct {
+ fn do() !void {
+ try verify.iterativeApi(Wyhash);
}
- const iterative_hash = wy.final();
-
- try std.testing.expectEqual(iterative_hash, non_iterative_hash);
- }
+ };
+ try Test.do();
+ @setEvalBranchQuota(50000);
+ try comptime Test.do();
}
test "iterative maintains last sixteen" {
diff --git a/lib/std/hash/xxhash.zig b/lib/std/hash/xxhash.zig
@@ -438,6 +438,8 @@ fn validateType(comptime T: type) void {
}
}
+const verify = @import("verify.zig");
+
fn testExpect(comptime H: type, seed: anytype, input: []const u8, expected: u64) !void {
try expectEqual(expected, H.hash(0, input));
@@ -457,6 +459,28 @@ test "xxhash64" {
try testExpect(H, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890", 0xe04a477f19ee145d);
}
+test "xxhash64 smhasher" {
+ const Test = struct {
+ fn do() !void {
+ try expectEqual(verify.smhasher(XxHash64.hash), 0x024B7CF4);
+ }
+ };
+ try Test.do();
+ @setEvalBranchQuota(75000);
+ comptime try Test.do();
+}
+
+test "xxhash64 iterative api" {
+ const Test = struct {
+ fn do() !void {
+ try verify.iterativeApi(XxHash64);
+ }
+ };
+ try Test.do();
+ @setEvalBranchQuota(30000);
+ comptime try Test.do();
+}
+
test "xxhash32" {
const H = XxHash32;
@@ -468,3 +492,25 @@ test "xxhash32" {
try testExpect(H, 0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 0x9c285e64);
try testExpect(H, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890", 0x9c05f475);
}
+
+test "xxhash32 smhasher" {
+ const Test = struct {
+ fn do() !void {
+ try expectEqual(verify.smhasher(XxHash32.hash), 0xBA88B743);
+ }
+ };
+ try Test.do();
+ @setEvalBranchQuota(75000);
+ comptime try Test.do();
+}
+
+test "xxhash32 iterative api" {
+ const Test = struct {
+ fn do() !void {
+ try verify.iterativeApi(XxHash32);
+ }
+ };
+ try Test.do();
+ @setEvalBranchQuota(30000);
+ comptime try Test.do();
+}
diff --git a/tools/update_crc_catalog.zig b/tools/update_crc_catalog.zig
@@ -55,7 +55,7 @@ pub fn main() anyerror!void {
try test_writer.writeAll(
\\//! This file is auto-generated by tools/update_crc_catalog.zig.
\\
- \\const std = @import("../../std.zig");
+ \\const std = @import("std");
\\const testing = std.testing;
\\const catalog = @import("catalog.zig");
\\