From c343bf64fb3d5859e9e33d95e9fa88c068b00359 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 8 Apr 2022 16:08:03 +0300 Subject: [PATCH] Revert "wip groupmembers is 2-alloc away." This reverts commit 7c41cbabe7a248b2c6b24aa68131ceab8d423aa4. Not doing this now. --- README.md | 29 +++++++++------------- lib/DB.zig | 63 +++++++++++++++--------------------------------- lib/compress.zig | 6 ----- 3 files changed, 31 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 8ac92d4..a6d7081 100644 --- a/README.md +++ b/README.md @@ -274,25 +274,20 @@ Similarly, when user's groups are resolved in (2), they are not always necessary (i.e. not part of `struct user*`), therefore the memberships themselves are stored out of bound. -`groupmembers` and `additional_gids` store group and user memberships -respectively. Membership IDs are packed — not necessitating random access, thus -suitable for compression. +`groupmembers` and `additional_gids` store group and user memberships respectively. +Membership IDs are packed — not necessitating random access, thus suitable for +compression. -- `groupmembers` consists of a number X followed by a list of offsets to User - records, because `getgr*_r` returns pointers to membernames, thus a name has - to be immediately resolvable. -- `additional_gids` is a list of gids, because `initgroups_dyn` (and friends) - returns an array of gids. +- `groupmembers` is a list of pointers (offsets) to User records, because + `getgr*_r` returns pointers to membernames, thus a name has to be immediately + resolvable. +- `additional_gids` is a list of gids, because `initgroups_dyn` (and friends) returns + an array of gids. -Each entry of `groupmembers` and `additional_gids` starts with a varint N, -which is the number of upcoming elements. Then depending on the type: - -- **additional_gids** stores N delta-compressed varints. These varints - correspond to a list of gids. -- **groupmembers** stores a total length of the member names, followed by N, - followed by N offsets. These are byte-offsets to the User records in the - `users` section. Having the length of membernames saves some CPU cycles when - decompressing the members in the hot path. +Each entry of `groupmembers` and `additional_gids` starts with a varint N, which is +the number of upcoming elements, followed by N delta-compressed varints. These +N delta-compressed varints are sorted the same way entries in `users` (in +`groupmembers`) and `groups`. Indices ------- diff --git a/lib/DB.zig b/lib/DB.zig index a63167a..58d7020 100644 --- a/lib/DB.zig +++ b/lib/DB.zig @@ -218,38 +218,26 @@ pub fn fromBytes(buf: []align(8) const u8) InvalidHeader!DB { return result; } -const GroupMemberNames = struct { - arr: [:null]const ?[*:0]const u8, - - pub fn deinit(self: *GroupMemberNames, allocator: Allocator) void { - allocator.free(self.arr[0]); - allocator.free(self.arr); - } -}; - // returns a list of group member names starting at the given offset of // groupmembers blob. fn groupMemberNames( self: *const DB, allocator: Allocator, offset: u64, -) error{OutOfMemory}!GroupMemberNames { - const v = compress.uvarintMust(self.groupmembers[offset..]); - const total_members_len = v.value; - offset += v.bytes_read; +) error{OutOfMemory}![:null]const [*:0]const u8 { var vit = compress.VarintSliceIteratorMust(self.groupmembers[offset..]); - const num_members = vit.remaining; - if (num_members == 0) return null; + if (vit.remaining == 0) return null; + const total_members_len = vit.nextMust().?; // TODO (zig 0.10+) make result type sentinel-aware and stop - // the terminating-null-pointer-dancing. - var result = try allocator.alloc(?[:0]const u8, num_members + 1); + // the terminating-null-pointer-dances. + var result = try allocator.alloc(?[:0]const u8, vit.remaining + 1); errdefer allocator.free(result); - result.len = num_members + 1; - result[num_members] = null; - result.len = num_members; + result.len = vit.remaining + 1; + result[result.len] = null; + result.len = vit.remaining; - var buf = try allocator.alloc(u8, total_members_len + num_members); + var buf = try allocator.alloc(u8, total_members_len + vit.remaining); errdefer allocator.free(buf); var it = compress.DeltaCompressionIterator(&vit); var i: usize = 0; @@ -262,31 +250,28 @@ fn groupMemberNames( buf[buf.len - 1] = 0; result[i] = buf[old_len..buf.len]; } - return GroupMemberNames{ .arr = result }; + return result; } // getgrtnam returns a Group entry by name. The Group must be // deinit'ed by caller. -fn getgrnam( - self: *const DB, - allocator: Allocator, - name: []const u8, -) error{OutOfMemory}!?Group { - const idx = bdz.search(self.bdz_groupname, name); +fn getgrnam(self: *const DB, allocator: Allocator, name: []const u8) ?Group { + const idx = bdz.search(self.bdz_groupname); const offset = self.idx_groupname2group[idx]; const group = PackedGroup.fromBytes(self.groups[offset..]).group; - if (!mem.eql(u8, name, group.name())) return null; + if (!mem.eql(name, group.name())) return null; - var members = try self.groupMemberNames(allocator, group.members_offset); - errdefer members.deinit(allocator); - - const namez = try allocator.dupeZ(u8, name); + const namez = allocator.dupeZ(u8, name); errdefer allocator.free(namez); + // this operation is the last in the function, so it doesn't have + // a complex errdefer to deallocate it if something later fails. + const members = try groupMemberNames(allocator, group.members_offset); + return Group{ .name = namez, .gid = group.gid(), - .members = members.arr, + .members = members, }; } @@ -431,13 +416,6 @@ fn groupMembers( compress.deltaCompress(u32, scratch.items) catch |err| switch (err) { error.NotSorted => unreachable, }; - const total_members_len = blk: { - var sum: u32 = 0; - for (members) |user_idx| - sum += @intCast(u32, corpus.users.get(user_idx).name.len); - break :blk sum; - }; - try compress.appendUvarint(&blob, total_members_len); try compress.appendUvarint(&blob, members.len); for (scratch.items) |elem| try compress.appendUvarint(&blob, elem); @@ -612,9 +590,6 @@ test "high-level API" { var db = try DB.fromCorpus(allocator, &corpus); defer db.deinit(allocator); - - const all = try db.getgrnam(allocator, "all"); - _ = all; } test "additionalGids" { diff --git a/lib/compress.zig b/lib/compress.zig index 322eb8c..908d8e6 100644 --- a/lib/compress.zig +++ b/lib/compress.zig @@ -80,12 +80,6 @@ pub fn uvarint(buf: []const u8) error{Overflow}!Varint { }; } -pub fn uvarintMust(buf: []const u8) Varint { - return uvarint(buf) catch |err| switch (err) { - error.Overflow => unreachable, - }; -} - // https://golang.org/pkg/encoding/binary/#PutUvarint pub fn putUvarint(buf: []u8, x: u64) usize { var i: usize = 0;