Files
zig/stage0/wyhash_test.zig
Motiejus 8dd28c9a92 stage0: add wyhash and replace boost hash combine in InternPool
Port Zig's std.hash.Wyhash to C (same secret constants, CONDOM=0 mum)
and replace ipHashCombine (boost golden ratio) with Wyhash in ipHashKey.
This aligns the C InternPool's hashing strategy with upstream Zig, which
uses Wyhash for all key hashing including NamespaceType keys.

Tests verify C and Zig Wyhash produce identical results for all standard
test vectors, streaming in various chunk sizes, autoHash equivalence for
u32/u64, and a large 8KB buffer.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-01 17:45:50 +00:00

190 lines
5.6 KiB
Zig

const std = @import("std");
const testing = std.testing;
const c = @cImport({
@cInclude("wyhash.h");
});
const ZigWyhash = std.hash.Wyhash;
const TestVector = struct {
seed: u64,
expected: u64,
input: []const u8,
};
// From lib/std/hash/wyhash.zig lines 209-216
const vectors = [_]TestVector{
.{ .seed = 0, .expected = 0x409638ee2bde459, .input = "" },
.{ .seed = 1, .expected = 0xa8412d091b5fe0a9, .input = "a" },
.{ .seed = 2, .expected = 0x32dd92e4b2915153, .input = "abc" },
.{ .seed = 3, .expected = 0x8619124089a3a16b, .input = "message digest" },
.{ .seed = 4, .expected = 0x7a43afb61d7f5f40, .input = "abcdefghijklmnopqrstuvwxyz" },
.{ .seed = 5, .expected = 0xff42329b90e50d58, .input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" },
.{ .seed = 6, .expected = 0xc39cab13b115aad3, .input = "12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
};
test "one-shot hash matches Zig Wyhash" {
for (vectors) |v| {
const c_result = c.wyhash_hash(v.seed, v.input.ptr, v.input.len);
const zig_result = ZigWyhash.hash(v.seed, v.input);
try testing.expectEqual(v.expected, c_result);
try testing.expectEqual(zig_result, c_result);
}
}
test "streaming matches one-shot" {
for (vectors) |v| {
const oneshot = c.wyhash_hash(v.seed, v.input.ptr, v.input.len);
// Feed all at once via streaming API
var h: c.Wyhash = undefined;
c.wyhash_init(&h, v.seed);
c.wyhash_update(&h, v.input.ptr, v.input.len);
try testing.expectEqual(oneshot, c.wyhash_final(&h));
}
}
test "streaming in chunks matches one-shot" {
const input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
const seed: u64 = 5;
const expected: u64 = 0xff42329b90e50d58;
// Feed byte by byte
{
var h: c.Wyhash = undefined;
c.wyhash_init(&h, seed);
for (input) |byte| {
c.wyhash_update(&h, &byte, 1);
}
try testing.expectEqual(expected, c.wyhash_final(&h));
}
// Feed in chunks of 7
{
var h: c.Wyhash = undefined;
c.wyhash_init(&h, seed);
var i: usize = 0;
while (i < input.len) {
const chunk = @min(7, input.len - i);
c.wyhash_update(&h, input.ptr + i, chunk);
i += chunk;
}
try testing.expectEqual(expected, c.wyhash_final(&h));
}
// Feed in chunks of 48 (block boundary)
{
var h: c.Wyhash = undefined;
c.wyhash_init(&h, seed);
var i: usize = 0;
while (i < input.len) {
const chunk = @min(48, input.len - i);
c.wyhash_update(&h, input.ptr + i, chunk);
i += chunk;
}
try testing.expectEqual(expected, c.wyhash_final(&h));
}
}
test "final is idempotent" {
var h: c.Wyhash = undefined;
c.wyhash_init(&h, 42);
c.wyhash_update(&h, "hello", 5);
const first = c.wyhash_final(&h);
const second = c.wyhash_final(&h);
try testing.expectEqual(first, second);
}
test "autoHash equivalence u32" {
const seed: u64 = 0;
const val: u32 = 42;
// C side
var ch: c.Wyhash = undefined;
c.wyhash_init(&ch, seed);
c.wyhash_update_u32(&ch, val);
const c_result = c.wyhash_final(&ch);
// Zig side: autoHash feeds mem.asBytes (LE bytes)
var zh = ZigWyhash.init(seed);
std.hash.autoHash(&zh, val);
const zig_result = zh.final();
try testing.expectEqual(zig_result, c_result);
}
test "autoHash equivalence u64" {
const seed: u64 = 0;
const val: u64 = 0xDEADBEEFCAFEBABE;
// C side
var ch: c.Wyhash = undefined;
c.wyhash_init(&ch, seed);
c.wyhash_update_u64(&ch, val);
const c_result = c.wyhash_final(&ch);
// Zig side
var zh = ZigWyhash.init(seed);
std.hash.autoHash(&zh, val);
const zig_result = zh.final();
try testing.expectEqual(zig_result, c_result);
}
test "large buffer (>4k) matches Zig Wyhash" {
// 8192-byte deterministic buffer: each byte = truncated index
var buf: [8192]u8 = undefined;
for (&buf, 0..) |*b, i| {
b.* = @truncate(i);
}
const seed: u64 = 0x12345678;
const zig_result = ZigWyhash.hash(seed, &buf);
const c_oneshot = c.wyhash_hash(seed, &buf, buf.len);
try testing.expectEqual(zig_result, c_oneshot);
// Streaming: feed in chunks of 100 (not aligned to 48-byte blocks)
{
var ch: c.Wyhash = undefined;
c.wyhash_init(&ch, seed);
var zh = ZigWyhash.init(seed);
var i: usize = 0;
while (i < buf.len) {
const chunk = @min(100, buf.len - i);
c.wyhash_update(&ch, &buf[i], chunk);
zh.update(buf[i..][0..chunk]);
i += chunk;
}
try testing.expectEqual(zh.final(), c.wyhash_final(&ch));
}
// Streaming: feed byte by byte (stress test)
{
var ch: c.Wyhash = undefined;
c.wyhash_init(&ch, seed);
for (&buf) |*b| {
c.wyhash_update(&ch, b, 1);
}
try testing.expectEqual(zig_result, c.wyhash_final(&ch));
}
}
test "streaming maintains last sixteen" {
// Port of the test from lib/std/hash/wyhash.zig
const input = "Z" ** 48 ++ "01234567890abcdefg";
const seed: u64 = 0;
for (0..17) |i| {
const payload = input[0 .. input.len - i];
const oneshot = c.wyhash_hash(seed, payload.ptr, payload.len);
var h: c.Wyhash = undefined;
c.wyhash_init(&h, seed);
c.wyhash_update(&h, payload.ptr, payload.len);
const streaming = c.wyhash_final(&h);
try testing.expectEqual(oneshot, streaming);
}
}