1
Fork 0

sections: store offsets in indices

This commit is contained in:
Motiejus Jakštys 2022-03-18 06:17:52 +01:00 committed by Motiejus Jakštys
parent a426c46efa
commit 4daa6fd38a
1 changed files with 19 additions and 15 deletions

View File

@ -344,12 +344,15 @@ pub fn groupsSection(
}; };
} }
// creates a bdz index using packed_mphf. buf[bdz_search(key)] = index(keys, key) // creates a bdz index using packed_mphf.
// hash = bdz_search(packed_mphf, keys[i]);
// result[hash] = idx2offset[i];
pub fn bdzIdx( pub fn bdzIdx(
comptime T: type, comptime T: type,
allocator: Allocator, allocator: Allocator,
packed_mphf: []const u8, packed_mphf: []const u8,
keys: []const T, keys: []const T,
idx2offset: []const u32,
) error{OutOfMemory}![]const u32 { ) error{OutOfMemory}![]const u32 {
const search_fn = comptime blk: { const search_fn = comptime blk: {
switch (T) { switch (T) {
@ -361,7 +364,7 @@ pub fn bdzIdx(
assert(keys.len <= math.maxInt(u32)); assert(keys.len <= math.maxInt(u32));
var result = try allocator.alloc(u32, keys.len); var result = try allocator.alloc(u32, keys.len);
for (keys) |key, i| for (keys) |key, i|
result[search_fn(packed_mphf, key)] = @intCast(u32, i); result[search_fn(packed_mphf, key)] = idx2offset[i];
return result; return result;
} }
@ -441,17 +444,16 @@ pub const AllSections = struct {
var groups = try groupsSection(allocator, corpus, group_members.idx2offset); var groups = try groupsSection(allocator, corpus, group_members.idx2offset);
errdefer groups.deinit(allocator); errdefer groups.deinit(allocator);
// TODO: these indices must point to the *offsets*, not the indices in "users" var idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids, groups.idx2offset);
var idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids);
errdefer allocator.free(idx_gid2group); errdefer allocator.free(idx_gid2group);
var idx_groupname2group = try bdzIdx([]const u8, allocator, bdz_groupname, gnames); var idx_groupname2group = try bdzIdx([]const u8, allocator, bdz_groupname, gnames, groups.idx2offset);
errdefer allocator.free(idx_groupname2group); errdefer allocator.free(idx_groupname2group);
var idx_uid2user = try bdzIdx(u32, allocator, bdz_uid, uids); var idx_uid2user = try bdzIdx(u32, allocator, bdz_uid, uids, users.idx2offset);
errdefer allocator.free(idx_uid2user); errdefer allocator.free(idx_uid2user);
var idx_name2user = try bdzIdx([]const u8, allocator, bdz_username, unames); var idx_name2user = try bdzIdx([]const u8, allocator, bdz_username, unames, users.idx2offset);
errdefer allocator.free(idx_name2user); errdefer allocator.free(idx_name2user);
return AllSections{ return AllSections{
@ -694,21 +696,23 @@ test "users compare function" {
try testing.expect(!cmpUser({}, bb, b)); try testing.expect(!cmpUser({}, bb, b));
} }
const hash_offsets = &[_]u32{ 0, 10, 20, 30 };
fn expectUsedHashes(allocator: Allocator, arr: []const u32) !void { fn expectUsedHashes(allocator: Allocator, arr: []const u32) !void {
var used = try allocator.alloc(bool, arr.len); var used = AutoHashMap(u32, void).init(allocator);
defer allocator.free(used); defer used.deinit();
mem.set(bool, used, false);
for (arr) |elem| for (arr) |elem|
used[arr[elem]] = true; try used.putNoClobber(elem, {});
for (used) |item| for (hash_offsets) |item|
try testing.expect(item); try testing.expect(used.get(item) != null);
} }
test "bdzIdx on u32" { test "bdzIdx on u32" {
const keys = [_]u32{ 42, 1, 2, 3 }; const keys = [_]u32{ 42, 1, 2, 3 };
const mphf = try cmph.packU32(testing.allocator, keys[0..]); const mphf = try cmph.packU32(testing.allocator, keys[0..]);
defer testing.allocator.free(mphf); defer testing.allocator.free(mphf);
var result = try bdzIdx(u32, testing.allocator, mphf, keys[0..]); var result = try bdzIdx(u32, testing.allocator, mphf, keys[0..], hash_offsets);
defer testing.allocator.free(result); defer testing.allocator.free(result);
try expectUsedHashes(testing.allocator, result); try expectUsedHashes(testing.allocator, result);
} }
@ -717,7 +721,7 @@ test "bdzIdx on str" {
const keys = [_][]const u8{ "42", "1", "2", "3" }; const keys = [_][]const u8{ "42", "1", "2", "3" };
const mphf = try cmph.packStr(testing.allocator, keys[0..]); const mphf = try cmph.packStr(testing.allocator, keys[0..]);
defer testing.allocator.free(mphf); defer testing.allocator.free(mphf);
var result = try bdzIdx([]const u8, testing.allocator, mphf, keys[0..]); var result = try bdzIdx([]const u8, testing.allocator, mphf, keys[0..], hash_offsets);
defer testing.allocator.free(result); defer testing.allocator.free(result);
try expectUsedHashes(testing.allocator, result); try expectUsedHashes(testing.allocator, result);
} }