1
Fork 0

bdz/cmph nits

main
Motiejus Jakštys 2022-03-15 06:35:48 +02:00 committed by Motiejus Jakštys
parent 6bf3e10eed
commit 92ee170d54
3 changed files with 31 additions and 45 deletions

View File

@ -361,7 +361,6 @@ idx_gid2group len(group)*4 bdz->offset Groups
idx_groupname2group len(group)*4 bdz->offset Groups
idx_uid2user len(user)*4 bdz->offset Users
idx_name2user len(user)*4 bdz->offset Users
idx_username2gids len(user)*4 bdz->offset UserGids
shellIndex len(shells)*2 shell index array
shellBlob <= 4032 shell data blob (max 63*64 bytes)
groups ? packed Group entries (8b padding)

View File

@ -35,7 +35,7 @@ pub fn pack(allocator: Allocator, input: [][*:0]const u8) Error![]const u8 {
}
// perfect-hash a list of numbers and return the packed mphf
pub fn pack_u32(allocator: Allocator, numbers: []const u32) Error![]const u8 {
pub fn packU32(allocator: Allocator, numbers: []const u32) Error![]const u8 {
var keys: [][6]u8 = try allocator.alloc([6]u8, numbers.len);
defer allocator.free(keys);
for (numbers) |n, i|
@ -49,7 +49,7 @@ pub fn pack_u32(allocator: Allocator, numbers: []const u32) Error![]const u8 {
}
// perfect-hash a list of strings and return the packed mphf
pub fn pack_str(allocator: Allocator, strings: []const []const u8) Error![]const u8 {
pub fn packStr(allocator: Allocator, strings: []const []const u8) Error![]const u8 {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
var keys = try arena.allocator().alloc([*:0]const u8, strings.len);
@ -119,7 +119,7 @@ test "unzeroZ" {
test "pack u32" {
const keys = &[_]u32{ 42, 1, math.maxInt(u32), 2 };
const packed_mphf = try pack_u32(testing.allocator, keys);
const packed_mphf = try packU32(testing.allocator, keys);
defer testing.allocator.free(packed_mphf);
var hashes: [keys.len]u32 = undefined;
for (keys) |key, i| {
@ -132,7 +132,7 @@ test "pack u32" {
test "pack str" {
const keys = &[_][]const u8{ "foo", "bar", "baz", "1", "2", "3" };
const packed_mphf = try pack_str(testing.allocator, keys[0..]);
const packed_mphf = try packStr(testing.allocator, keys[0..]);
defer testing.allocator.free(packed_mphf);
var hashes: [keys.len]u32 = undefined;
for (keys) |key, i| {

View File

@ -33,8 +33,8 @@ const Corpus = struct {
groups: []Group,
// columnar users and groups of the above
usersMulti: MultiArrayList(User),
groupsMulti: MultiArrayList(Group),
usersm: MultiArrayList(User),
groupsm: MultiArrayList(Group),
name2user: StringHashMap(u32),
name2group: StringHashMap(u32),
@ -60,25 +60,25 @@ const Corpus = struct {
sort.sort(User, users, {}, cmpUser);
sort.sort(Group, groups, {}, cmpGroup);
var usersMulti = MultiArrayList(User){};
try usersMulti.ensureTotalCapacity(allocator, users.len);
var usersm = MultiArrayList(User){};
try usersm.ensureTotalCapacity(allocator, users.len);
for (users) |user|
usersMulti.appendAssumeCapacity(user);
var groupsMulti = MultiArrayList(Group){};
try groupsMulti.ensureTotalCapacity(allocator, groups.len);
usersm.appendAssumeCapacity(user);
var groupsm = MultiArrayList(Group){};
try groupsm.ensureTotalCapacity(allocator, groups.len);
for (groups) |group|
groupsMulti.appendAssumeCapacity(group);
groupsm.appendAssumeCapacity(group);
var name2user = StringHashMap(u32).init(allocator);
var name2group = StringHashMap(u32).init(allocator);
for (usersMulti.items(.name)) |name, i| {
for (usersm.items(.name)) |name, i| {
var res1 = try name2user.getOrPut(name);
if (res1.found_existing)
return error.Duplicate;
res1.value_ptr.* = @intCast(u32, i);
}
for (groupsMulti.items(.name)) |name, i| {
for (groupsm.items(.name)) |name, i| {
var res1 = try name2group.getOrPut(name);
if (res1.found_existing)
return error.Duplicate;
@ -93,7 +93,7 @@ const Corpus = struct {
defer baseAllocator.free(user2groups);
mem.set(ArrayListUnmanaged(u32), user2groups, ArrayListUnmanaged(u32){});
for (groupsMulti.items(.members)) |group_members, i| {
for (groupsm.items(.members)) |group_members, i| {
var members = try allocator.alloc(u32, group_members.count());
members.len = 0;
@ -123,8 +123,8 @@ const Corpus = struct {
.arena = arena,
.users = users,
.groups = groups,
.usersMulti = usersMulti,
.groupsMulti = groupsMulti,
.usersm = usersm,
.groupsm = groupsm,
.name2user = name2user,
.name2group = name2group,
.group2users = group2users,
@ -138,33 +138,12 @@ const Corpus = struct {
}
};
pub fn bdzGid(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_u32(allocator, corpus.groupsMulti.items(.gid));
}
pub fn bdzGroupname(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_str(allocator, corpus.groupsMulti.items(.name));
}
pub fn bdzUid(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_u32(allocator, corpus.usersMulti.items(.uid));
}
pub fn bdzUsername(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_str(allocator, corpus.usersMulti.items(.name));
}
// TODO(motiejus) there are a few problems:
// - memory management for shell sections is a mess. Make it easier by ...
// - shell module should accept a list of shells and spit out two slices
// (allocated with a given allocator). There is too much dancing around
// here.
pub fn shellSections(
allocator: Allocator,
corpus: *const Corpus,
) error{ OutOfMemory, Overflow }!ShellSections {
var popcon = shellImport.ShellWriter.init(allocator);
for (corpus.usersMulti.items(.shell)) |shell|
for (corpus.usersm.items(.shell)) |shell|
try popcon.put(shell);
return popcon.toOwnedSections(shellImport.max_shells);
}
@ -396,15 +375,19 @@ pub const AllSections = struct {
user_gids: UserGids,
group_members: GroupMembers,
groups: GroupsSection,
idx_gid2group: []const u32,
idx_groupname2group: []const u32,
idx_uid2user: []const u32,
idx_name2user: []const u32,
pub fn init(
allocator: Allocator,
corpus: *const Corpus,
) error{ Overflow, OutOfMemory, InvalidRecord }!AllSections {
const bdz_gid = try bdzGid(allocator, corpus);
const bdz_groupname = try bdzGroupname(allocator, corpus);
const bdz_uid = try bdzUid(allocator, corpus);
const bdz_username = try bdzUsername(allocator, corpus);
const bdz_gid = try cmph.packU32(allocator, corpus.groupsm.items(.gid));
const bdz_groupname = try cmph.packStr(allocator, corpus.groupsm.items(.name));
const bdz_uid = try cmph.packU32(allocator, corpus.usersm.items(.uid));
const bdz_username = try cmph.packStr(allocator, corpus.usersm.items(.name));
const shell_sections = try shellSections(allocator, corpus);
const user_gids = try userGids(allocator, corpus);
const users = try usersSection(
@ -440,6 +423,10 @@ pub const AllSections = struct {
.users = users,
.group_members = group_members,
.groups = groups,
.idx_gid2group = undefined,
.idx_groupname2group = undefined,
.idx_uid2user = undefined,
.idx_name2user = undefined,
};
}
@ -633,7 +620,7 @@ test "pack gids" {
var corpus = try testCorpus(allocator);
defer corpus.deinit();
const cmph_gid = try cmph.pack_u32(allocator, corpus.groupsMulti.items(.gid));
const cmph_gid = try cmph.packU32(allocator, corpus.groupsm.items(.gid));
defer allocator.free(cmph_gid);
const k1 = bdz.search_u32(cmph_gid, 0);