From 9bf0a3568994ffaf3a4f7e3c108cbb5273ae3bd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Mon, 7 Mar 2022 06:09:20 +0200 Subject: [PATCH] add a struct for all sections --- README.md | 4 +- src/sections.zig | 310 ++++++++++++++++++++++++++++++----------------- 2 files changed, 199 insertions(+), 115 deletions(-) diff --git a/README.md b/README.md index bf32377..3244329 100644 --- a/README.md +++ b/README.md @@ -364,8 +364,8 @@ shellIndex len(shells)*2 shell index array shellBlob <= 4032 shell data blob (max 63*64 bytes) groups ? packed Group entries (8b padding) users ? packed User entries (8b padding) -groupMembers ? per-group memberlist (no padding) -userGids ? per-user gidlist entries (8b padding) +groupMembers ? per-group varint memberlist (no padding) +userGids ? per-user varint gidlist (8b padding) ``` Section creation order: diff --git a/src/sections.zig b/src/sections.zig index caae4c3..020b6bb 100644 --- a/src/sections.zig +++ b/src/sections.zig @@ -1,5 +1,6 @@ const std = @import("std"); const fmt = std.fmt; +const mem = std.mem; const math = std.math; const sort = std.sort; const unicode = std.unicode; @@ -35,12 +36,12 @@ const Corpus = struct { groupsMulti: MultiArrayList(Group), // pointing to `users` and `groups` slices above. - name2user: StringHashMap(*const User), - uid2user: AutoHashMap(u32, *const User), - name2group: StringHashMap(*const Group), - gid2group: AutoHashMap(u32, *const Group), - groupname2users: StringHashMap([]*const User), - username2groups: StringHashMap([]*const Group), + name2user: StringHashMap(usize), + uid2user: AutoHashMap(u32, usize), + name2group: StringHashMap(usize), + gid2group: AutoHashMap(u32, usize), + groupname2users: StringHashMap([]usize), + username2groups: StringHashMap([]usize), pub fn init( baseAllocator: Allocator, @@ -70,60 +71,60 @@ const Corpus = struct { for (groups) |group| groupsMulti.appendAssumeCapacity(group); - var name2user = StringHashMap(*const User).init(allocator); - var uid2user = AutoHashMap(u32, *const User).init(allocator); - var name2group = StringHashMap(*const Group).init(allocator); - var gid2group = AutoHashMap(u32, *const Group).init(allocator); - for (users) |*user| { + var name2user = StringHashMap(usize).init(allocator); + var uid2user = AutoHashMap(u32, usize).init(allocator); + var name2group = StringHashMap(usize).init(allocator); + var gid2group = AutoHashMap(u32, usize).init(allocator); + for (users) |*user, i| { var res1 = try name2user.getOrPut(user.name); if (res1.found_existing) return error.Duplicate; - res1.value_ptr.* = user; + res1.value_ptr.* = i; var res2 = try uid2user.getOrPut(user.uid); if (res2.found_existing) return error.Duplicate; - res2.value_ptr.* = user; + res2.value_ptr.* = i; } - for (groups) |*group| { + for (groups) |*group, i| { var res1 = try name2group.getOrPut(group.name); if (res1.found_existing) return error.Duplicate; - res1.value_ptr.* = group; + res1.value_ptr.* = i; var res2 = try gid2group.getOrPut(group.gid); if (res2.found_existing) return error.Duplicate; - res2.value_ptr.* = group; + res2.value_ptr.* = i; } - var groupname2users = StringHashMap([]*const User).init(allocator); + var groupname2users = StringHashMap([]usize).init(allocator); // uses baseAllocator, because it will be freed before // returning from this function. This keeps the arena clean. var username2groups = StringHashMap( - ArrayListUnmanaged(*const Group), + ArrayListUnmanaged(usize), ).init(baseAllocator); defer username2groups.deinit(); - for (groups) |*group| { - var members = try allocator.alloc(*const User, group.members.count()); + for (groups) |*group, i| { + var members = try allocator.alloc(usize, group.members.count()); members.len = 0; var it = group.members.iterator(); while (it.next()) |memberName| { - if (name2user.get(memberName.*)) |user| { + if (name2user.get(memberName.*)) |idx| { members.len += 1; - members[members.len - 1] = user; + members[members.len - 1] = idx; } else { return error.NotFound; } var groupsOfMember = try username2groups.getOrPut(memberName.*); if (!groupsOfMember.found_existing) - groupsOfMember.value_ptr.* = ArrayListUnmanaged(*const Group){}; - try groupsOfMember.value_ptr.*.append(allocator, group); + groupsOfMember.value_ptr.* = ArrayListUnmanaged(usize){}; + try groupsOfMember.value_ptr.*.append(allocator, i); } var result = try groupname2users.getOrPut(group.name); @@ -134,14 +135,14 @@ const Corpus = struct { var it1 = groupname2users.valueIterator(); while (it1.next()) |groupUsers| { - sort.sort(*const User, groupUsers.*, {}, cmpUserPtr); + sort.sort(usize, groupUsers.*, {}, comptime sort.asc(usize)); } var it2 = username2groups.valueIterator(); while (it2.next()) |userGroups| - sort.sort(*const Group, userGroups.items, {}, cmpGroupPtr); + sort.sort(usize, userGroups.items, {}, comptime sort.asc(usize)); - var username2groups_final = StringHashMap([]*const Group).init(allocator); + var username2groups_final = StringHashMap([]usize).init(allocator); var it = username2groups.iterator(); while (it.next()) |elem| { const username = elem.key_ptr.*; @@ -203,17 +204,17 @@ pub fn shellSections( } pub const UserGids = struct { - // username -> offset in blob - name2offset: StringHashMap(u32), + // user index -> offset in blob + idx2offset: []const u32, // compressed user gids blob. A blob contains N <= users.len items, // an item is: // len: varint // gid: [varint]varint, // ... and the gid list is delta-compressed. - blob: []u8, + blob: []const u8, pub fn deinit(self: *UserGids, allocator: Allocator) void { - self.name2offset.deinit(); + allocator.free(self.idx2offset); allocator.free(self.blob); self.* = undefined; } @@ -227,8 +228,8 @@ pub fn userGids( ) error{ OutOfMemory, Overflow }!UserGids { var blob = ArrayList(u8).init(allocator); errdefer blob.deinit(); - var name2offset = StringHashMap(u32).init(allocator); - errdefer name2offset.deinit(); + var idx2offset = try allocator.alloc(u32, corpus.users.len); + errdefer allocator.free(idx2offset); // zero'th entry is empty, so groupless users can refer to it. try compress.appendUvarint(&blob, 0); @@ -236,13 +237,15 @@ pub fn userGids( var scratch = try allocator.alloc(u32, 256); defer allocator.free(scratch); - for (corpus.users) |user| { + for (corpus.users) |user, user_idx| { if (corpus.username2groups.get(user.name)) |usergroups| { - try name2offset.putNoClobber(user.name, try math.cast(u32, blob.items.len)); + const userOffset = try math.cast(u32, blob.items.len); + std.debug.assert(userOffset & 7 == 0); + idx2offset[user_idx] = userOffset; scratch = try allocator.realloc(scratch, usergroups.len); scratch.len = usergroups.len; - for (usergroups) |group, i| - scratch[i] = group.gid; + for (usergroups) |group_idx, i| + scratch[i] = corpus.groups[group_idx].gid; compress.deltaCompress(u32, scratch) catch |err| switch (err) { error.NotSorted => unreachable, }; @@ -251,60 +254,98 @@ pub fn userGids( try compress.appendUvarint(&blob, gid); try pad.arrayList(&blob, userGidsPaddingBits); } else { - try name2offset.putNoClobber(user.name, 0); + idx2offset[user_idx] = 0; } } return UserGids{ - .name2offset = name2offset, + .idx2offset = idx2offset, .blob = blob.toOwnedSlice(), }; } +pub const UsersSection = struct { + // user index -> offset in blob + idx2offset: []const u32, + blob: []const u8, + + pub fn deinit(self: *UsersSection, allocator: Allocator) void { + allocator.free(self.idx2offset); + allocator.free(self.blob); + self.* = undefined; + } +}; + pub fn usersSection( allocator: Allocator, corpus: *const Corpus, gids: *const UserGids, shells: *const ShellSections, -) error{ OutOfMemory, Overflow, InvalidRecord }![]const u8 { +) error{ OutOfMemory, Overflow, InvalidRecord }!UsersSection { + var idx2offset = try allocator.alloc(u32, corpus.users.len); + errdefer allocator.free(idx2offset); // as of writing each user takes 15 bytes + strings + padding, padded to // 8 bytes. 24 is an optimistic lower bound for an average record size. - var buf = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len); - for (corpus.users) |user| { - const offset = gids.name2offset.get(user.name).?; - std.debug.assert(offset & 7 == 0); + var blob = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len); + errdefer blob.deinit(); + for (corpus.users) |user, i| { + const userOffset = try math.cast(u32, blob.items.len); + const gidOffset = gids.idx2offset[i]; + std.debug.assert(userOffset & 7 == 0); + std.debug.assert(gidOffset & 7 == 0); + idx2offset[i] = userOffset; try userImport.PackedUserHash.packTo( - &buf, + &blob, user, - @truncate(u29, @shrExact(offset, 3)), + @truncate(u29, @shrExact(gidOffset, 3)), shells.indices, ); } - return buf.toOwnedSlice(); + return UsersSection{ + .idx2offset = idx2offset, + .blob = blob.toOwnedSlice(), + }; } +pub const GroupMembers = struct { + // group index to it's offset in blob + idx2offset: []const u32, + blob: []const u8, + + pub fn deinit(self: *GroupMembers, allocator: Allocator) void { + allocator.free(self.idx2offset); + allocator.free(self.blob); + self.* = undefined; + } +}; + pub fn groupMembers( allocator: Allocator, corpus: *const Corpus, -) error{OutOfMemory}!void { - var buf: [compress.maxVarintLen64]u8 = undefined; - var offsets = ArrayListUnmanaged(usize).initCapacity( - allocator, - corpus.groups.len, - ); - var bytes = ArrayList(u8).init(allocator); - var offset: usize = 0; + user2offset: []const u32, +) error{OutOfMemory}!GroupMembers { + var idx2offset = try allocator.alloc(u32, corpus.groups.len); + errdefer allocator.free(idx2offset); + var blob = ArrayList(u8).init(allocator); + errdefer blob.deinit(); + // zero'th entry is empty, so empty groups can refer to it + try compress.appendUvarint(&blob, 0); for (corpus.groups) |group, i| { - offsets[i] = offset; const users = corpus.groupname2users.get(group.name).?; - const len = compress.putVarint(&buf, users.len); - offset += len; - try bytes.appendSlice(buf[0..len]); - for (users) |user| { - // TODO: offset into the User's record - _ = user; + if (users.len == 0) { + idx2offset[i] = 0; + continue; } + + idx2offset[i] = blob.len; + compress.appendUvarint(&blob, users.len); + for (users) |userIdx| + compress.appendUvarint(&blob, user2offset[userIdx]); } + return GroupMembers{ + .idx2offset = idx2offset, + .blob = blob.toOwnedSlice(), + }; } // cmpUser compares two users for sorting. By username's utf8 codepoints, ascending. @@ -327,17 +368,70 @@ fn cmpUser(_: void, a: User, b: User) bool { return true; } -fn cmpUserPtr(context: void, a: *const User, b: *const User) bool { - return cmpUser(context, a.*, b.*); -} - fn cmpGroup(_: void, a: Group, b: Group) bool { return a.gid < b.gid; } -fn cmpGroupPtr(context: void, a: *const Group, b: *const Group) bool { - return cmpGroup(context, a.*, b.*); -} +pub const AllSections = struct { + allocator: Allocator, + + bdz_gid: []const u8, + bdz_groupname: []const u8, + bdz_uid: []const u8, + bdz_username: []const u8, + users: UsersSection, + + shell_sections: ShellSections, + shell_index: []const u8, + shell_blob: []const u8, + + user_gids: UserGids, + user_gids_b: []const u8, + + pub fn init( + allocator: Allocator, + corpus: *const Corpus, + ) error{ Overflow, OutOfMemory, InvalidRecord }!AllSections { + const bdz_gid = try bdzGid(allocator, corpus); + const bdz_groupname = try bdzGroupname(allocator, corpus); + const bdz_uid = try bdzUid(allocator, corpus); + const bdz_username = try bdzUsername(allocator, corpus); + const shell_sections = try shellSections(allocator, corpus); + const shell_index = shell_sections.index; + const shell_blob = shell_sections.blob; + const user_gids = try userGids(allocator, corpus); + const users = try usersSection( + allocator, + corpus, + &user_gids, + &shell_sections, + ); + return AllSections{ + .allocator = allocator, + .bdz_gid = bdz_gid, + .bdz_groupname = bdz_groupname, + .bdz_uid = bdz_uid, + .bdz_username = bdz_username, + .shell_sections = shell_sections, + .shell_index = mem.sliceAsBytes(shell_index.constSlice()), + .shell_blob = mem.sliceAsBytes(shell_blob.constSlice()), + .user_gids = user_gids, + .user_gids_b = user_gids.blob, + .users = users, + }; + } + + pub fn deinit(self: *AllSections) void { + self.allocator.free(self.bdz_gid); + self.allocator.free(self.bdz_groupname); + self.allocator.free(self.bdz_uid); + self.allocator.free(self.bdz_username); + self.shell_sections.deinit(); + self.user_gids.deinit(self.allocator); + self.users.deinit(self.allocator); + self.* = undefined; + } +}; const testing = std.testing; @@ -411,30 +505,43 @@ test "test corpus" { var corpus = try testCorpus(testing.allocator); defer corpus.deinit(); - try testing.expectEqualStrings(corpus.users[0].name, "Name" ** 8); - try testing.expectEqualStrings(corpus.users[1].name, "nobody"); - try testing.expectEqualStrings(corpus.users[2].name, "svc-bar"); - try testing.expectEqualStrings(corpus.users[3].name, "vidmantas"); + const name_name = 0; + const nobody = 1; + const svc_bar = 2; + const vidmantas = 3; + + try testing.expectEqualStrings(corpus.users[name_name].name, "Name" ** 8); + try testing.expectEqualStrings(corpus.users[nobody].name, "nobody"); + try testing.expectEqualStrings(corpus.users[svc_bar].name, "svc-bar"); + try testing.expectEqualStrings(corpus.users[vidmantas].name, "vidmantas"); + + const g_service_account = 0; + const g_vidmantas = 1; + const g_all = 2; + + try testing.expectEqualStrings(corpus.groups[g_service_account].name, "service-account"); + try testing.expectEqualStrings(corpus.groups[g_vidmantas].name, "vidmantas"); + try testing.expectEqualStrings(corpus.groups[g_all].name, "all"); try testing.expectEqual(corpus.name2user.get("404"), null); - try testing.expectEqual(corpus.name2user.get("vidmantas").?.uid, 128); + try testing.expectEqual(corpus.name2user.get("vidmantas").?, vidmantas); try testing.expectEqual(corpus.uid2user.get(42), null); - try testing.expectEqual(corpus.uid2user.get(128).?.gid, 128); + try testing.expectEqual(corpus.uid2user.get(128).?, vidmantas); try testing.expectEqual(corpus.name2group.get("404"), null); - try testing.expectEqual(corpus.name2group.get("vidmantas").?.gid, 128); + try testing.expectEqual(corpus.name2group.get("vidmantas").?, g_vidmantas); try testing.expectEqual(corpus.gid2group.get(42), null); - try testing.expectEqual(corpus.gid2group.get(128).?.gid, 128); + try testing.expectEqual(corpus.gid2group.get(128).?, g_vidmantas); const membersOfAll = corpus.groupname2users.get("all").?; - try testing.expectEqualStrings(membersOfAll[0].name, "Name" ** 8); - try testing.expectEqualStrings(membersOfAll[1].name, "svc-bar"); - try testing.expectEqualStrings(membersOfAll[2].name, "vidmantas"); + try testing.expectEqual(membersOfAll[0], name_name); + try testing.expectEqual(membersOfAll[1], svc_bar); + try testing.expectEqual(membersOfAll[2], vidmantas); try testing.expectEqual(corpus.groupname2users.get("404"), null); const groupsOfVidmantas = corpus.username2groups.get("vidmantas").?; - try testing.expectEqual(groupsOfVidmantas[0].gid, 0); - try testing.expectEqual(groupsOfVidmantas[1].gid, 128); - try testing.expectEqual(groupsOfVidmantas[2].gid, 9999); + try testing.expectEqual(groupsOfVidmantas[0], g_service_account); + try testing.expectEqual(groupsOfVidmantas[1], g_vidmantas); + try testing.expectEqual(groupsOfVidmantas[2], g_all); try testing.expectEqual(corpus.username2groups.get("nobody"), null); try testing.expectEqual(corpus.username2groups.get("doesnotexist"), null); } @@ -444,31 +551,8 @@ test "test sections" { var corpus = try testCorpus(allocator); defer corpus.deinit(); - const bdz_gid = try bdzGid(allocator, &corpus); - defer allocator.free(bdz_gid); - - const bdz_groupname = try bdzGroupname(allocator, &corpus); - defer allocator.free(bdz_groupname); - - const bdz_uid = try bdzUid(allocator, &corpus); - defer allocator.free(bdz_uid); - - const bdz_username = try bdzUsername(allocator, &corpus); - defer allocator.free(bdz_username); - - var shell_sections = try shellSections(allocator, &corpus); - defer shell_sections.deinit(); - - var user_gids = try userGids(allocator, &corpus); - defer user_gids.deinit(allocator); - - var users_section = try usersSection( - allocator, - &corpus, - &user_gids, - &shell_sections, - ); - defer allocator.free(users_section); + var all = try AllSections.init(allocator, &corpus); + defer all.deinit(); } test "userGids" { @@ -479,19 +563,19 @@ test "userGids" { var user_gids = try userGids(allocator, &corpus); defer user_gids.deinit(allocator); - for (corpus.users) |user| { + for (corpus.users) |user, userIdx| { const groups = corpus.username2groups.get(user.name); - const offset = user_gids.name2offset.get(user.name); + const offset = user_gids.idx2offset[userIdx]; if (groups == null) { - try testing.expect(offset.? == 0); + try testing.expect(offset == 0); continue; } - var vit = try compress.VarintSliceIterator(user_gids.blob[offset.?..]); + var vit = try compress.VarintSliceIterator(user_gids.blob[offset..]); var it = compress.DeltaDecompressionIterator(&vit); try testing.expectEqual(it.remaining(), groups.?.len); var i: usize = 0; while (try it.next()) |gid| : (i += 1) { - try testing.expectEqual(gid, groups.?[i].gid); + try testing.expectEqual(gid, corpus.groups[groups.?[i]].gid); } } }