From 249cdb1a31a8967c65e793c322e1f2e310e954c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Tue, 15 Mar 2022 10:07:05 +0200 Subject: [PATCH] remove corpus.users and corpus.groups These use cases are now fully replaced with MultiArrayList --- README.md | 2 +- src/sections.zig | 127 +++++++++++++++++++++++++++-------------------- 2 files changed, 74 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 39c50dc..6d6e6c4 100644 --- a/README.md +++ b/README.md @@ -377,7 +377,7 @@ Section creation order: 1. ✅ Users. Requires `userGids` and shell. 1. ✅ Groupmembers. Requires Users. 1. ✅ Groups. Requires Groupmembers. -1. `idx_*`. Requires offsets to Groups and Users. +1. ✅ `idx_*`. Requires offsets to Groups and Users. 1. Header. [git-subtrac]: https://apenwarr.ca/log/20191109 diff --git a/src/sections.zig b/src/sections.zig index f1b8ae4..eb59819 100644 --- a/src/sections.zig +++ b/src/sections.zig @@ -28,13 +28,9 @@ const Corpus = struct { arena: std.heap.ArenaAllocator, // sorted by name, by unicode codepoint - users: []User, + users: MultiArrayList(User), // sorted by gid - groups: []Group, - - // columnar users and groups of the above - usersm: MultiArrayList(User), - groupsm: MultiArrayList(Group), + groups: MultiArrayList(Group), name2user: StringHashMap(u32), name2group: StringHashMap(u32), @@ -50,35 +46,35 @@ const Corpus = struct { var allocator = arena.allocator(); errdefer arena.deinit(); - var users = try allocator.alloc(User, usersConst.len); - var groups = try allocator.alloc(Group, groupsConst.len); + var users_arr = try allocator.alloc(User, usersConst.len); + var groups_arr = try allocator.alloc(Group, groupsConst.len); for (usersConst) |*user, i| - users[i] = try user.clone(allocator); + users_arr[i] = try user.clone(allocator); for (groupsConst) |*group, i| - groups[i] = try group.clone(allocator); + groups_arr[i] = try group.clone(allocator); - sort.sort(User, users, {}, cmpUser); - sort.sort(Group, groups, {}, cmpGroup); + sort.sort(User, users_arr, {}, cmpUser); + sort.sort(Group, groups_arr, {}, cmpGroup); - var usersm = MultiArrayList(User){}; - try usersm.ensureTotalCapacity(allocator, users.len); - for (users) |user| - usersm.appendAssumeCapacity(user); - var groupsm = MultiArrayList(Group){}; - try groupsm.ensureTotalCapacity(allocator, groups.len); - for (groups) |group| - groupsm.appendAssumeCapacity(group); + var users = MultiArrayList(User){}; + try users.ensureTotalCapacity(allocator, users_arr.len); + for (users_arr) |user| + users.appendAssumeCapacity(user); + var groups = MultiArrayList(Group){}; + try groups.ensureTotalCapacity(allocator, groups_arr.len); + for (groups_arr) |group| + groups.appendAssumeCapacity(group); var name2user = StringHashMap(u32).init(allocator); var name2group = StringHashMap(u32).init(allocator); - for (usersm.items(.name)) |name, i| { + for (users.items(.name)) |name, i| { var res1 = try name2user.getOrPut(name); if (res1.found_existing) return error.Duplicate; res1.value_ptr.* = @intCast(u32, i); } - for (groupsm.items(.name)) |name, i| { + for (groups.items(.name)) |name, i| { var res1 = try name2group.getOrPut(name); if (res1.found_existing) return error.Duplicate; @@ -93,7 +89,7 @@ const Corpus = struct { defer baseAllocator.free(user2groups); mem.set(ArrayListUnmanaged(u32), user2groups, ArrayListUnmanaged(u32){}); - for (groupsm.items(.members)) |group_members, i| { + for (groups.items(.members)) |group_members, i| { var members = try allocator.alloc(u32, group_members.count()); members.len = 0; @@ -123,8 +119,6 @@ const Corpus = struct { .arena = arena, .users = users, .groups = groups, - .usersm = usersm, - .groupsm = groupsm, .name2user = name2user, .name2group = name2group, .group2users = group2users, @@ -143,7 +137,7 @@ pub fn shellSections( corpus: *const Corpus, ) error{ OutOfMemory, Overflow }!ShellSections { var popcon = shellImport.ShellWriter.init(allocator); - for (corpus.usersm.items(.shell)) |shell| + for (corpus.users.items(.shell)) |shell| try popcon.put(shell); return popcon.toOwnedSections(shellImport.max_shells); } @@ -187,8 +181,9 @@ pub fn userGids( idx2offset[user_idx] = blob.items.len; scratch = try allocator.realloc(scratch, usergroups.len); scratch.len = usergroups.len; + const corpusGids = corpus.groups.items(.gid); for (usergroups) |group_idx, i| - scratch[i] = corpus.groups[group_idx].gid; + scratch[i] = corpusGids[group_idx]; compress.deltaCompress(u32, scratch) catch |err| switch (err) { error.NotSorted => unreachable, }; @@ -227,7 +222,10 @@ pub fn usersSection( // 8 bytes. 24 is an optimistic lower bound for an average record size. var blob = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len); errdefer blob.deinit(); - for (corpus.users) |user, i| { + var i: usize = 0; + while (i < corpus.users.len) : (i += 1) { + // TODO: this is inefficient; it's calling `.slice()` on every iteration + const user = corpus.users.get(i); const user_offset = try math.cast(u32, blob.items.len); std.debug.assert(user_offset & 7 == 0); idx2offset[i] = user_offset; @@ -321,7 +319,10 @@ pub fn groupsSection( var blob = try ArrayList(u8).initCapacity(allocator, 8 * corpus.groups.len); errdefer blob.deinit(); - for (corpus.groups) |group, i| { + var i: usize = 0; + while (i < corpus.groups.len) : (i += 1) { + // TODO: this is inefficient; it's calling `.slice()` on every iteration + const group = corpus.groups.get(i); const group_offset = try math.cast(u32, blob.items.len); std.debug.assert(group_offset & 7 == 0); idx2offset[i] = group_offset; @@ -407,10 +408,10 @@ pub const AllSections = struct { allocator: Allocator, corpus: *const Corpus, ) error{ Overflow, OutOfMemory, InvalidRecord }!AllSections { - const bdz_gid = try cmph.packU32(allocator, corpus.groupsm.items(.gid)); - const bdz_groupname = try cmph.packStr(allocator, corpus.groupsm.items(.name)); - const bdz_uid = try cmph.packU32(allocator, corpus.usersm.items(.uid)); - const bdz_username = try cmph.packStr(allocator, corpus.usersm.items(.name)); + const bdz_gid = try cmph.packU32(allocator, corpus.groups.items(.gid)); + const bdz_groupname = try cmph.packStr(allocator, corpus.groups.items(.name)); + const bdz_uid = try cmph.packU32(allocator, corpus.users.items(.uid)); + const bdz_username = try cmph.packStr(allocator, corpus.users.items(.name)); const shell_sections = try shellSections(allocator, corpus); const user_gids = try userGids(allocator, corpus); const users = try usersSection( @@ -438,13 +439,25 @@ pub const AllSections = struct { u32, allocator, bdz_gid, - corpus.groupsm.items(.gid), + corpus.groups.items(.gid), ); var idx_groupname2group = try bdzIdx( []const u8, allocator, - bdz_gid, - corpus.groupsm.items(.name), + bdz_groupname, + corpus.groups.items(.name), + ); + var idx_uid2user = try bdzIdx( + u32, + allocator, + bdz_uid, + corpus.users.items(.uid), + ); + var idx_name2user = try bdzIdx( + []const u8, + allocator, + bdz_username, + corpus.users.items(.name), ); return AllSections{ @@ -461,8 +474,8 @@ pub const AllSections = struct { .groups = groups, .idx_gid2group = idx_gid2group, .idx_groupname2group = idx_groupname2group, - .idx_uid2user = undefined, - .idx_name2user = undefined, + .idx_uid2user = idx_uid2user, + .idx_name2user = idx_name2user, }; } @@ -478,6 +491,8 @@ pub const AllSections = struct { self.groups.deinit(self.allocator); self.allocator.free(self.idx_gid2group); self.allocator.free(self.idx_groupname2group); + self.allocator.free(self.idx_uid2user); + self.allocator.free(self.idx_name2user); self.* = undefined; } }; @@ -559,18 +574,20 @@ test "test corpus" { const svc_bar = 2; const vidmantas = 3; - try testing.expectEqualStrings(corpus.users[name_name].name, "Name" ** 8); - try testing.expectEqualStrings(corpus.users[nobody].name, "nobody"); - try testing.expectEqualStrings(corpus.users[svc_bar].name, "svc-bar"); - try testing.expectEqualStrings(corpus.users[vidmantas].name, "vidmantas"); + const usernames = corpus.users.items(.name); + try testing.expectEqualStrings(usernames[name_name], "Name" ** 8); + try testing.expectEqualStrings(usernames[nobody], "nobody"); + try testing.expectEqualStrings(usernames[svc_bar], "svc-bar"); + try testing.expectEqualStrings(usernames[vidmantas], "vidmantas"); const g_service_account = 0; const g_vidmantas = 1; const g_all = 2; - try testing.expectEqualStrings(corpus.groups[g_service_account].name, "service-account"); - try testing.expectEqualStrings(corpus.groups[g_vidmantas].name, "vidmantas"); - try testing.expectEqualStrings(corpus.groups[g_all].name, "all"); + const groupnames = corpus.groups.items(.name); + try testing.expectEqualStrings(groupnames[g_service_account], "service-account"); + try testing.expectEqualStrings(groupnames[g_vidmantas], "vidmantas"); + try testing.expectEqualStrings(groupnames[g_all], "all"); try testing.expectEqual(corpus.name2user.get("404"), null); try testing.expectEqual(corpus.name2user.get("vidmantas").?, vidmantas); @@ -616,13 +633,14 @@ test "test groups, group members and users" { ); i = 0; while (i < corpus.users.len) : (i += 1) { - const user = (try it.next()).?; - try testing.expectEqual(corpus.users[i].uid, user.uid()); - try testing.expectEqual(corpus.users[i].gid, user.gid()); - try testing.expectEqualStrings(corpus.users[i].name, user.name()); - try testing.expectEqualStrings(corpus.users[i].gecos, user.gecos()); - try testing.expectEqualStrings(corpus.users[i].home, user.home()); - try testing.expectEqualStrings(corpus.users[i].shell, user.shell(sections.shell_reader)); + const got = (try it.next()).?; + const user = corpus.users.get(i); + try testing.expectEqual(user.uid, got.uid()); + try testing.expectEqual(user.gid, got.gid()); + try testing.expectEqualStrings(user.name, got.name()); + try testing.expectEqualStrings(user.gecos, got.gecos()); + try testing.expectEqualStrings(user.home, got.home()); + try testing.expectEqualStrings(user.shell, got.shell(sections.shell_reader)); } } @@ -646,8 +664,9 @@ test "userGids" { var it = compress.DeltaDecompressionIterator(&vit); try testing.expectEqual(it.remaining(), groups.len); var i: u64 = 0; + const corpusGids = corpus.groups.items(.gid); while (try it.next()) |gid| : (i += 1) { - try testing.expectEqual(gid, corpus.groups[groups[i]].gid); + try testing.expectEqual(gid, corpusGids[groups[i]]); } try testing.expectEqual(i, groups.len); } @@ -658,7 +677,7 @@ test "pack gids" { var corpus = try testCorpus(allocator); defer corpus.deinit(); - const cmph_gid = try cmph.packU32(allocator, corpus.groupsm.items(.gid)); + const cmph_gid = try cmph.packU32(allocator, corpus.groups.items(.gid)); defer allocator.free(cmph_gid); const k1 = bdz.search_u32(cmph_gid, 0);