sections: store offsets in indices

This commit is contained in:
Motiejus Jakštys 2022-03-18 06:17:52 +01:00 committed by Motiejus Jakštys
parent a426c46efa
commit 4daa6fd38a

View File

@ -344,12 +344,15 @@ pub fn groupsSection(
};
}
// creates a bdz index using packed_mphf. buf[bdz_search(key)] = index(keys, key)
// creates a bdz index using packed_mphf.
// hash = bdz_search(packed_mphf, keys[i]);
// result[hash] = idx2offset[i];
pub fn bdzIdx(
comptime T: type,
allocator: Allocator,
packed_mphf: []const u8,
keys: []const T,
idx2offset: []const u32,
) error{OutOfMemory}![]const u32 {
const search_fn = comptime blk: {
switch (T) {
@ -361,7 +364,7 @@ pub fn bdzIdx(
assert(keys.len <= math.maxInt(u32));
var result = try allocator.alloc(u32, keys.len);
for (keys) |key, i|
result[search_fn(packed_mphf, key)] = @intCast(u32, i);
result[search_fn(packed_mphf, key)] = idx2offset[i];
return result;
}
@ -441,17 +444,16 @@ pub const AllSections = struct {
var groups = try groupsSection(allocator, corpus, group_members.idx2offset);
errdefer groups.deinit(allocator);
// TODO: these indices must point to the *offsets*, not the indices in "users"
var idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids);
var idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids, groups.idx2offset);
errdefer allocator.free(idx_gid2group);
var idx_groupname2group = try bdzIdx([]const u8, allocator, bdz_groupname, gnames);
var idx_groupname2group = try bdzIdx([]const u8, allocator, bdz_groupname, gnames, groups.idx2offset);
errdefer allocator.free(idx_groupname2group);
var idx_uid2user = try bdzIdx(u32, allocator, bdz_uid, uids);
var idx_uid2user = try bdzIdx(u32, allocator, bdz_uid, uids, users.idx2offset);
errdefer allocator.free(idx_uid2user);
var idx_name2user = try bdzIdx([]const u8, allocator, bdz_username, unames);
var idx_name2user = try bdzIdx([]const u8, allocator, bdz_username, unames, users.idx2offset);
errdefer allocator.free(idx_name2user);
return AllSections{
@ -694,21 +696,23 @@ test "users compare function" {
try testing.expect(!cmpUser({}, bb, b));
}
const hash_offsets = &[_]u32{ 0, 10, 20, 30 };
fn expectUsedHashes(allocator: Allocator, arr: []const u32) !void {
var used = try allocator.alloc(bool, arr.len);
defer allocator.free(used);
mem.set(bool, used, false);
var used = AutoHashMap(u32, void).init(allocator);
defer used.deinit();
for (arr) |elem|
used[arr[elem]] = true;
for (used) |item|
try testing.expect(item);
try used.putNoClobber(elem, {});
for (hash_offsets) |item|
try testing.expect(used.get(item) != null);
}
test "bdzIdx on u32" {
const keys = [_]u32{ 42, 1, 2, 3 };
const mphf = try cmph.packU32(testing.allocator, keys[0..]);
defer testing.allocator.free(mphf);
var result = try bdzIdx(u32, testing.allocator, mphf, keys[0..]);
var result = try bdzIdx(u32, testing.allocator, mphf, keys[0..], hash_offsets);
defer testing.allocator.free(result);
try expectUsedHashes(testing.allocator, result);
}
@ -717,7 +721,7 @@ test "bdzIdx on str" {
const keys = [_][]const u8{ "42", "1", "2", "3" };
const mphf = try cmph.packStr(testing.allocator, keys[0..]);
defer testing.allocator.free(mphf);
var result = try bdzIdx([]const u8, testing.allocator, mphf, keys[0..]);
var result = try bdzIdx([]const u8, testing.allocator, mphf, keys[0..], hash_offsets);
defer testing.allocator.free(result);
try expectUsedHashes(testing.allocator, result);
}