From 4daa6fd38a1adbf718d360b51d7e3671dccd61b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 18 Mar 2022 06:17:52 +0100 Subject: [PATCH] sections: store offsets in indices --- src/sections.zig | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/sections.zig b/src/sections.zig index fa18182..b3b710c 100644 --- a/src/sections.zig +++ b/src/sections.zig @@ -344,12 +344,15 @@ pub fn groupsSection( }; } -// creates a bdz index using packed_mphf. buf[bdz_search(key)] = index(keys, key) +// creates a bdz index using packed_mphf. +// hash = bdz_search(packed_mphf, keys[i]); +// result[hash] = idx2offset[i]; pub fn bdzIdx( comptime T: type, allocator: Allocator, packed_mphf: []const u8, keys: []const T, + idx2offset: []const u32, ) error{OutOfMemory}![]const u32 { const search_fn = comptime blk: { switch (T) { @@ -361,7 +364,7 @@ pub fn bdzIdx( assert(keys.len <= math.maxInt(u32)); var result = try allocator.alloc(u32, keys.len); for (keys) |key, i| - result[search_fn(packed_mphf, key)] = @intCast(u32, i); + result[search_fn(packed_mphf, key)] = idx2offset[i]; return result; } @@ -441,17 +444,16 @@ pub const AllSections = struct { var groups = try groupsSection(allocator, corpus, group_members.idx2offset); errdefer groups.deinit(allocator); - // TODO: these indices must point to the *offsets*, not the indices in "users" - var idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids); + var idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids, groups.idx2offset); errdefer allocator.free(idx_gid2group); - var idx_groupname2group = try bdzIdx([]const u8, allocator, bdz_groupname, gnames); + var idx_groupname2group = try bdzIdx([]const u8, allocator, bdz_groupname, gnames, groups.idx2offset); errdefer allocator.free(idx_groupname2group); - var idx_uid2user = try bdzIdx(u32, allocator, bdz_uid, uids); + var idx_uid2user = try bdzIdx(u32, allocator, bdz_uid, uids, users.idx2offset); errdefer allocator.free(idx_uid2user); - var idx_name2user = try bdzIdx([]const u8, allocator, bdz_username, unames); + var idx_name2user = try bdzIdx([]const u8, allocator, bdz_username, unames, users.idx2offset); errdefer allocator.free(idx_name2user); return AllSections{ @@ -694,21 +696,23 @@ test "users compare function" { try testing.expect(!cmpUser({}, bb, b)); } +const hash_offsets = &[_]u32{ 0, 10, 20, 30 }; + fn expectUsedHashes(allocator: Allocator, arr: []const u32) !void { - var used = try allocator.alloc(bool, arr.len); - defer allocator.free(used); - mem.set(bool, used, false); + var used = AutoHashMap(u32, void).init(allocator); + defer used.deinit(); + for (arr) |elem| - used[arr[elem]] = true; - for (used) |item| - try testing.expect(item); + try used.putNoClobber(elem, {}); + for (hash_offsets) |item| + try testing.expect(used.get(item) != null); } test "bdzIdx on u32" { const keys = [_]u32{ 42, 1, 2, 3 }; const mphf = try cmph.packU32(testing.allocator, keys[0..]); defer testing.allocator.free(mphf); - var result = try bdzIdx(u32, testing.allocator, mphf, keys[0..]); + var result = try bdzIdx(u32, testing.allocator, mphf, keys[0..], hash_offsets); defer testing.allocator.free(result); try expectUsedHashes(testing.allocator, result); } @@ -717,7 +721,7 @@ test "bdzIdx on str" { const keys = [_][]const u8{ "42", "1", "2", "3" }; const mphf = try cmph.packStr(testing.allocator, keys[0..]); defer testing.allocator.free(mphf); - var result = try bdzIdx([]const u8, testing.allocator, mphf, keys[0..]); + var result = try bdzIdx([]const u8, testing.allocator, mphf, keys[0..], hash_offsets); defer testing.allocator.free(result); try expectUsedHashes(testing.allocator, result); }