From 7c41cbabe7a248b2c6b24aa68131ceab8d423aa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 8 Apr 2022 15:05:56 +0300 Subject: [PATCH] wip groupmembers is 2-alloc away. --- README.md | 29 +++++++++++++--------- lib/DB.zig | 63 +++++++++++++++++++++++++++++++++--------------- lib/compress.zig | 6 +++++ 3 files changed, 67 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index a6d7081..8ac92d4 100644 --- a/README.md +++ b/README.md @@ -274,20 +274,25 @@ Similarly, when user's groups are resolved in (2), they are not always necessary (i.e. not part of `struct user*`), therefore the memberships themselves are stored out of bound. -`groupmembers` and `additional_gids` store group and user memberships respectively. -Membership IDs are packed — not necessitating random access, thus suitable for -compression. +`groupmembers` and `additional_gids` store group and user memberships +respectively. Membership IDs are packed — not necessitating random access, thus +suitable for compression. -- `groupmembers` is a list of pointers (offsets) to User records, because - `getgr*_r` returns pointers to membernames, thus a name has to be immediately - resolvable. -- `additional_gids` is a list of gids, because `initgroups_dyn` (and friends) returns - an array of gids. +- `groupmembers` consists of a number X followed by a list of offsets to User + records, because `getgr*_r` returns pointers to membernames, thus a name has + to be immediately resolvable. +- `additional_gids` is a list of gids, because `initgroups_dyn` (and friends) + returns an array of gids. -Each entry of `groupmembers` and `additional_gids` starts with a varint N, which is -the number of upcoming elements, followed by N delta-compressed varints. These -N delta-compressed varints are sorted the same way entries in `users` (in -`groupmembers`) and `groups`. +Each entry of `groupmembers` and `additional_gids` starts with a varint N, +which is the number of upcoming elements. Then depending on the type: + +- **additional_gids** stores N delta-compressed varints. These varints + correspond to a list of gids. +- **groupmembers** stores a total length of the member names, followed by N, + followed by N offsets. These are byte-offsets to the User records in the + `users` section. Having the length of membernames saves some CPU cycles when + decompressing the members in the hot path. Indices ------- diff --git a/lib/DB.zig b/lib/DB.zig index 58d7020..a63167a 100644 --- a/lib/DB.zig +++ b/lib/DB.zig @@ -218,26 +218,38 @@ pub fn fromBytes(buf: []align(8) const u8) InvalidHeader!DB { return result; } +const GroupMemberNames = struct { + arr: [:null]const ?[*:0]const u8, + + pub fn deinit(self: *GroupMemberNames, allocator: Allocator) void { + allocator.free(self.arr[0]); + allocator.free(self.arr); + } +}; + // returns a list of group member names starting at the given offset of // groupmembers blob. fn groupMemberNames( self: *const DB, allocator: Allocator, offset: u64, -) error{OutOfMemory}![:null]const [*:0]const u8 { +) error{OutOfMemory}!GroupMemberNames { + const v = compress.uvarintMust(self.groupmembers[offset..]); + const total_members_len = v.value; + offset += v.bytes_read; var vit = compress.VarintSliceIteratorMust(self.groupmembers[offset..]); - if (vit.remaining == 0) return null; - const total_members_len = vit.nextMust().?; + const num_members = vit.remaining; + if (num_members == 0) return null; // TODO (zig 0.10+) make result type sentinel-aware and stop - // the terminating-null-pointer-dances. - var result = try allocator.alloc(?[:0]const u8, vit.remaining + 1); + // the terminating-null-pointer-dancing. + var result = try allocator.alloc(?[:0]const u8, num_members + 1); errdefer allocator.free(result); - result.len = vit.remaining + 1; - result[result.len] = null; - result.len = vit.remaining; + result.len = num_members + 1; + result[num_members] = null; + result.len = num_members; - var buf = try allocator.alloc(u8, total_members_len + vit.remaining); + var buf = try allocator.alloc(u8, total_members_len + num_members); errdefer allocator.free(buf); var it = compress.DeltaCompressionIterator(&vit); var i: usize = 0; @@ -250,28 +262,31 @@ fn groupMemberNames( buf[buf.len - 1] = 0; result[i] = buf[old_len..buf.len]; } - return result; + return GroupMemberNames{ .arr = result }; } // getgrtnam returns a Group entry by name. The Group must be // deinit'ed by caller. -fn getgrnam(self: *const DB, allocator: Allocator, name: []const u8) ?Group { - const idx = bdz.search(self.bdz_groupname); +fn getgrnam( + self: *const DB, + allocator: Allocator, + name: []const u8, +) error{OutOfMemory}!?Group { + const idx = bdz.search(self.bdz_groupname, name); const offset = self.idx_groupname2group[idx]; const group = PackedGroup.fromBytes(self.groups[offset..]).group; - if (!mem.eql(name, group.name())) return null; + if (!mem.eql(u8, name, group.name())) return null; - const namez = allocator.dupeZ(u8, name); + var members = try self.groupMemberNames(allocator, group.members_offset); + errdefer members.deinit(allocator); + + const namez = try allocator.dupeZ(u8, name); errdefer allocator.free(namez); - // this operation is the last in the function, so it doesn't have - // a complex errdefer to deallocate it if something later fails. - const members = try groupMemberNames(allocator, group.members_offset); - return Group{ .name = namez, .gid = group.gid(), - .members = members, + .members = members.arr, }; } @@ -416,6 +431,13 @@ fn groupMembers( compress.deltaCompress(u32, scratch.items) catch |err| switch (err) { error.NotSorted => unreachable, }; + const total_members_len = blk: { + var sum: u32 = 0; + for (members) |user_idx| + sum += @intCast(u32, corpus.users.get(user_idx).name.len); + break :blk sum; + }; + try compress.appendUvarint(&blob, total_members_len); try compress.appendUvarint(&blob, members.len); for (scratch.items) |elem| try compress.appendUvarint(&blob, elem); @@ -590,6 +612,9 @@ test "high-level API" { var db = try DB.fromCorpus(allocator, &corpus); defer db.deinit(allocator); + + const all = try db.getgrnam(allocator, "all"); + _ = all; } test "additionalGids" { diff --git a/lib/compress.zig b/lib/compress.zig index 908d8e6..322eb8c 100644 --- a/lib/compress.zig +++ b/lib/compress.zig @@ -80,6 +80,12 @@ pub fn uvarint(buf: []const u8) error{Overflow}!Varint { }; } +pub fn uvarintMust(buf: []const u8) Varint { + return uvarint(buf) catch |err| switch (err) { + error.Overflow => unreachable, + }; +} + // https://golang.org/pkg/encoding/binary/#PutUvarint pub fn putUvarint(buf: []u8, x: u64) usize { var i: usize = 0;