From 92ee170d54356372b771767e82f133446b4a0307 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Tue, 15 Mar 2022 06:35:48 +0200 Subject: [PATCH] bdz/cmph nits --- README.md | 1 - src/cmph.zig | 8 +++--- src/sections.zig | 67 +++++++++++++++++++----------------------------- 3 files changed, 31 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index adbeb42..39c50dc 100644 --- a/README.md +++ b/README.md @@ -361,7 +361,6 @@ idx_gid2group len(group)*4 bdz->offset Groups idx_groupname2group len(group)*4 bdz->offset Groups idx_uid2user len(user)*4 bdz->offset Users idx_name2user len(user)*4 bdz->offset Users -idx_username2gids len(user)*4 bdz->offset UserGids shellIndex len(shells)*2 shell index array shellBlob <= 4032 shell data blob (max 63*64 bytes) groups ? packed Group entries (8b padding) diff --git a/src/cmph.zig b/src/cmph.zig index a1e966e..28a2951 100644 --- a/src/cmph.zig +++ b/src/cmph.zig @@ -35,7 +35,7 @@ pub fn pack(allocator: Allocator, input: [][*:0]const u8) Error![]const u8 { } // perfect-hash a list of numbers and return the packed mphf -pub fn pack_u32(allocator: Allocator, numbers: []const u32) Error![]const u8 { +pub fn packU32(allocator: Allocator, numbers: []const u32) Error![]const u8 { var keys: [][6]u8 = try allocator.alloc([6]u8, numbers.len); defer allocator.free(keys); for (numbers) |n, i| @@ -49,7 +49,7 @@ pub fn pack_u32(allocator: Allocator, numbers: []const u32) Error![]const u8 { } // perfect-hash a list of strings and return the packed mphf -pub fn pack_str(allocator: Allocator, strings: []const []const u8) Error![]const u8 { +pub fn packStr(allocator: Allocator, strings: []const []const u8) Error![]const u8 { var arena = std.heap.ArenaAllocator.init(allocator); defer arena.deinit(); var keys = try arena.allocator().alloc([*:0]const u8, strings.len); @@ -119,7 +119,7 @@ test "unzeroZ" { test "pack u32" { const keys = &[_]u32{ 42, 1, math.maxInt(u32), 2 }; - const packed_mphf = try pack_u32(testing.allocator, keys); + const packed_mphf = try packU32(testing.allocator, keys); defer testing.allocator.free(packed_mphf); var hashes: [keys.len]u32 = undefined; for (keys) |key, i| { @@ -132,7 +132,7 @@ test "pack u32" { test "pack str" { const keys = &[_][]const u8{ "foo", "bar", "baz", "1", "2", "3" }; - const packed_mphf = try pack_str(testing.allocator, keys[0..]); + const packed_mphf = try packStr(testing.allocator, keys[0..]); defer testing.allocator.free(packed_mphf); var hashes: [keys.len]u32 = undefined; for (keys) |key, i| { diff --git a/src/sections.zig b/src/sections.zig index 87f7539..79db780 100644 --- a/src/sections.zig +++ b/src/sections.zig @@ -33,8 +33,8 @@ const Corpus = struct { groups: []Group, // columnar users and groups of the above - usersMulti: MultiArrayList(User), - groupsMulti: MultiArrayList(Group), + usersm: MultiArrayList(User), + groupsm: MultiArrayList(Group), name2user: StringHashMap(u32), name2group: StringHashMap(u32), @@ -60,25 +60,25 @@ const Corpus = struct { sort.sort(User, users, {}, cmpUser); sort.sort(Group, groups, {}, cmpGroup); - var usersMulti = MultiArrayList(User){}; - try usersMulti.ensureTotalCapacity(allocator, users.len); + var usersm = MultiArrayList(User){}; + try usersm.ensureTotalCapacity(allocator, users.len); for (users) |user| - usersMulti.appendAssumeCapacity(user); - var groupsMulti = MultiArrayList(Group){}; - try groupsMulti.ensureTotalCapacity(allocator, groups.len); + usersm.appendAssumeCapacity(user); + var groupsm = MultiArrayList(Group){}; + try groupsm.ensureTotalCapacity(allocator, groups.len); for (groups) |group| - groupsMulti.appendAssumeCapacity(group); + groupsm.appendAssumeCapacity(group); var name2user = StringHashMap(u32).init(allocator); var name2group = StringHashMap(u32).init(allocator); - for (usersMulti.items(.name)) |name, i| { + for (usersm.items(.name)) |name, i| { var res1 = try name2user.getOrPut(name); if (res1.found_existing) return error.Duplicate; res1.value_ptr.* = @intCast(u32, i); } - for (groupsMulti.items(.name)) |name, i| { + for (groupsm.items(.name)) |name, i| { var res1 = try name2group.getOrPut(name); if (res1.found_existing) return error.Duplicate; @@ -93,7 +93,7 @@ const Corpus = struct { defer baseAllocator.free(user2groups); mem.set(ArrayListUnmanaged(u32), user2groups, ArrayListUnmanaged(u32){}); - for (groupsMulti.items(.members)) |group_members, i| { + for (groupsm.items(.members)) |group_members, i| { var members = try allocator.alloc(u32, group_members.count()); members.len = 0; @@ -123,8 +123,8 @@ const Corpus = struct { .arena = arena, .users = users, .groups = groups, - .usersMulti = usersMulti, - .groupsMulti = groupsMulti, + .usersm = usersm, + .groupsm = groupsm, .name2user = name2user, .name2group = name2group, .group2users = group2users, @@ -138,33 +138,12 @@ const Corpus = struct { } }; -pub fn bdzGid(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 { - return try cmph.pack_u32(allocator, corpus.groupsMulti.items(.gid)); -} - -pub fn bdzGroupname(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 { - return try cmph.pack_str(allocator, corpus.groupsMulti.items(.name)); -} - -pub fn bdzUid(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 { - return try cmph.pack_u32(allocator, corpus.usersMulti.items(.uid)); -} - -pub fn bdzUsername(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 { - return try cmph.pack_str(allocator, corpus.usersMulti.items(.name)); -} - -// TODO(motiejus) there are a few problems: -// - memory management for shell sections is a mess. Make it easier by ... -// - shell module should accept a list of shells and spit out two slices -// (allocated with a given allocator). There is too much dancing around -// here. pub fn shellSections( allocator: Allocator, corpus: *const Corpus, ) error{ OutOfMemory, Overflow }!ShellSections { var popcon = shellImport.ShellWriter.init(allocator); - for (corpus.usersMulti.items(.shell)) |shell| + for (corpus.usersm.items(.shell)) |shell| try popcon.put(shell); return popcon.toOwnedSections(shellImport.max_shells); } @@ -396,15 +375,19 @@ pub const AllSections = struct { user_gids: UserGids, group_members: GroupMembers, groups: GroupsSection, + idx_gid2group: []const u32, + idx_groupname2group: []const u32, + idx_uid2user: []const u32, + idx_name2user: []const u32, pub fn init( allocator: Allocator, corpus: *const Corpus, ) error{ Overflow, OutOfMemory, InvalidRecord }!AllSections { - const bdz_gid = try bdzGid(allocator, corpus); - const bdz_groupname = try bdzGroupname(allocator, corpus); - const bdz_uid = try bdzUid(allocator, corpus); - const bdz_username = try bdzUsername(allocator, corpus); + const bdz_gid = try cmph.packU32(allocator, corpus.groupsm.items(.gid)); + const bdz_groupname = try cmph.packStr(allocator, corpus.groupsm.items(.name)); + const bdz_uid = try cmph.packU32(allocator, corpus.usersm.items(.uid)); + const bdz_username = try cmph.packStr(allocator, corpus.usersm.items(.name)); const shell_sections = try shellSections(allocator, corpus); const user_gids = try userGids(allocator, corpus); const users = try usersSection( @@ -440,6 +423,10 @@ pub const AllSections = struct { .users = users, .group_members = group_members, .groups = groups, + .idx_gid2group = undefined, + .idx_groupname2group = undefined, + .idx_uid2user = undefined, + .idx_name2user = undefined, }; } @@ -633,7 +620,7 @@ test "pack gids" { var corpus = try testCorpus(allocator); defer corpus.deinit(); - const cmph_gid = try cmph.pack_u32(allocator, corpus.groupsMulti.items(.gid)); + const cmph_gid = try cmph.packU32(allocator, corpus.groupsm.items(.gid)); defer allocator.free(cmph_gid); const k1 = bdz.search_u32(cmph_gid, 0);