From 64bd5df6333b2fe7794d9a9e963c4068605b6380 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Mon, 18 Apr 2022 19:35:26 +0300 Subject: [PATCH] replace member strlen with getgr_max and getpw_max This way we can use buffers in both Zig and C, without the performance or size penalty. --- README.md | 32 ++++++++------- lib/CGroup.zig | 2 +- lib/DB.zig | 108 ++++++++++++++++++------------------------------- lib/User.zig | 6 +-- 4 files changed, 60 insertions(+), 88 deletions(-) diff --git a/README.md b/README.md index 8ac92d4..7de904b 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ A known implementation runs id(1) at ~250 rps sequentially on ~20k users and To better reason about the trade-offs, it is useful to understand how `id(1)` is implemented, in rough terms: -- lookup user by name ([`getpwent_r(3)`][getpwent_r]). +- lookup user by name ([`getpwent_r(3)`][getpwent]). - get all gids for the user ([`getgrouplist(3)`][getgrouplist]). Note: it is actually using `initgroups_dyn`, accepts a uid, and is very poorly documented. @@ -154,6 +154,9 @@ OFFSET TYPE NAME DESCRIPTION 40 u64 nblocks_users 48 u64 nblocks_groupmembers 56 u64 nblocks_additional_gids + 64 u64 getgr_max + 72 u64 getpw_max + 80 [48]u8 padding ``` `magic` is 0xf09fa4b7, and `version` must be `0`. All integers are @@ -165,6 +168,10 @@ number of bytes. However, interpreting `[2]u6` with `xxd(1)` is harder than interpreting `[2]u8`. Therefore we are using the space we have to make these integers byte-wide. +`getgr_max` and `getpw_max` is a hint for the caller of `getgr*` and +`getpw*`-family calls. This is the recommended size of the buffer, so the +caller does not receive `ENOMEM`. + Primitive types --------------- @@ -279,20 +286,17 @@ respectively. Membership IDs are packed — not necessitating random access, thu suitable for compression. - `groupmembers` consists of a number X followed by a list of offsets to User - records, because `getgr*_r` returns pointers to membernames, thus a name has - to be immediately resolvable. + records, because `getgr*` returns pointers to membernames, thus a name has to + be immediately resolvable. - `additional_gids` is a list of gids, because `initgroups_dyn` (and friends) returns an array of gids. Each entry of `groupmembers` and `additional_gids` starts with a varint N, -which is the number of upcoming elements. Then depending on the type: +which is the number of upcoming elements. Then N delta-compressed varints, +which are: -- **additional_gids** stores N delta-compressed varints. These varints - correspond to a list of gids. -- **groupmembers** stores a total length of the member names, followed by N, - followed by N offsets. These are byte-offsets to the User records in the - `users` section. Having the length of membernames saves some CPU cycles when - decompressing the members in the hot path. +- **additional_gids** a list of gids. +- **groupmembers** byte-offsets to the User records in the `users` section. Indices ------- @@ -339,7 +343,7 @@ Each section is padded to 64 bytes. ``` SECTION SIZE DESCRIPTION -header 64 see "Turbonss header" section +header 128 see "Turbonss header" section bdz_gid ? bdz(gid) bdz_groupname ? bdz(groupname) bdz_uid ? bdz(uid) @@ -365,7 +369,7 @@ Section creation order: 1. ✅ `groupmembers` requires `users`. 1. ✅ `groups` requires `groupmembers`. 1. ✅ `idx_*`. requires offsets to `groups` and `users`. -1. Header. +1. ✅ Header. [git-subtrac]: https://apenwarr.ca/log/20191109 [cmph]: http://cmph.sourceforge.net/ @@ -374,6 +378,6 @@ Section creation order: [data-oriented-design]: https://media.handmade-seattle.com/practical-data-oriented-design/ [getpwnam_r]: https://linux.die.net/man/3/getpwnam_r [varint]: https://developers.google.com/protocol-buffers/docs/encoding#varints -[getpwent_r]: https://www.man7.org/linux/man-pages/man3/getpwent_r.3.html +[getpwent]: https://www.man7.org/linux/man-pages/man3/getpwent_r.3.html [getgrouplist]: https://www.man7.org/linux/man-pages/man3/getgrouplist.3.html -[getgrid_r]: https://www.man7.org/linux/man-pages/man3/getgrid_r.3.html +[getgrid]: https://www.man7.org/linux/man-pages/man3/getgrid_r.3.html diff --git a/lib/CGroup.zig b/lib/CGroup.zig index 8c16efc..d0c9e70 100644 --- a/lib/CGroup.zig +++ b/lib/CGroup.zig @@ -4,4 +4,4 @@ gid: u32, name: [:0]const u8, // Should be a sentinel-terminated array. // https://github.com/ziglang/zig/issues/7517 -members: []const ?[*:0]const u8, +members: []align(1) const ?[*:0]const u8, diff --git a/lib/DB.zig b/lib/DB.zig index 36e3c33..796f226 100644 --- a/lib/DB.zig +++ b/lib/DB.zig @@ -222,86 +222,63 @@ pub fn fromBytes(buf: []align(8) const u8) InvalidHeader!DB { return result; } -const GroupMemberNames = struct { - _buf: []u8, - arr: []const ?[*:0]const u8, - - pub fn deinit(self: *GroupMemberNames, allocator: Allocator) void { - if (self._buf.len == 0) return; - allocator.free(self._buf); - allocator.free(self.arr); - } -}; - -// returns a list of group member names starting at the given offset of -// groupmembers blob. -fn groupMemberNames( +// dumps PackedGroup to []u8 and returns a CGroup. +fn getGroup( self: *const DB, - allocator: Allocator, - offset: u64, -) error{OutOfMemory}!GroupMemberNames { - const v = compress.uvarintMust(self.groupmembers[offset..]); - const total_members_len = v.value; - const offset2 = offset + v.bytes_read; - var vit = compress.VarintSliceIteratorMust(self.groupmembers[offset2..]); + group: PackedGroup, + buf: *[]u8, +) error{OutOfMemory}!CGroup { + const members_slice = self.groupmembers[group.members_offset..]; + var vit = compress.VarintSliceIteratorMust(members_slice); const num_members = vit.remaining; - if (num_members == 0) - return GroupMemberNames{ - ._buf = &[0]u8{}, - .arr = &[1]?[*:0]const u8{null}, - }; - // TODO (zig 0.10+) make result type sentinel-aware and stop - // the terminating-null-pointer-dancing. - var arr = try allocator.alloc(?[*:0]const u8, num_members + 1); - errdefer allocator.free(arr); - arr.len = num_members + 1; - arr[num_members] = null; - arr.len = num_members; - - var buf = std.ArrayList(u8).init(allocator); - errdefer buf.deinit(); - // +num_members are for sentinel zeroes - try buf.ensureTotalCapacity(total_members_len + num_members); + const ptr_end = @sizeOf(?[*:0]const u8) * (num_members + 1); + if (ptr_end > buf.len) return error.OutOfMemory; + var member_ptrs = mem.bytesAsSlice(?[*:0]const u8, buf.*[0..ptr_end]); + member_ptrs[member_ptrs.len - 1] = null; + var buf_offset: usize = ptr_end; var it = compress.DeltaDecompressionIterator(&vit); var i: usize = 0; while (it.nextMust()) |member_offset| : (i += 1) { const entry = PackedUser.fromBytes(self.users[member_offset << 3 ..]); - const start = buf.items.len; + const start = buf_offset; const name = entry.user.name(); - buf.appendSliceAssumeCapacity(name); - buf.appendAssumeCapacity(0); + if (buf_offset + name.len + 1 > buf.len) return error.OutOfMemory; + mem.copy(u8, buf.*[buf_offset..], name); + buf_offset += name.len; + buf.*[buf_offset] = 0; + buf_offset += 1; + // TODO: arr[i] = buf[...] triggers a bug in zig pre-0.10 - const terminated = buf.items[start .. buf.items.len - 1 :0]; - arr[i] = terminated; + const terminated = buf.*[start .. buf_offset - 1 :0]; + member_ptrs[i] = terminated; } - return GroupMemberNames{ ._buf = buf.toOwnedSlice(), .arr = arr }; + + const name = group.name(); + if (buf_offset + name.len + 1 > buf.len) return error.OutOfMemory; + mem.copy(u8, buf.*[buf_offset..], name); + buf.*[buf_offset + name.len] = 0; + + return CGroup{ + .name = buf.*[buf_offset .. buf_offset + name.len :0], + .gid = group.gid(), + .members = member_ptrs, + }; } // getgrtnam returns a Group entry by name. The Group must be // deinit'ed by caller. fn getgrnam( self: *const DB, - allocator: Allocator, name: []const u8, + buf: *[]u8, ) error{OutOfMemory}!?CGroup { const idx = bdz.search(self.bdz_groupname, name); const offset = self.idx_groupname2group[idx]; const group = PackedGroup.fromBytes(self.groups[offset << 3 ..]).group; if (!mem.eql(u8, name, group.name())) return null; - - var members = try self.groupMemberNames(allocator, group.members_offset); - errdefer members.deinit(allocator); - - const namez = try allocator.dupeZ(u8, name); - errdefer allocator.free(namez); - - return CGroup{ - .name = namez, - .gid = group.gid(), - .members = members.arr, - }; + return try self.getGroup(group, buf); } fn shellSections( @@ -445,13 +422,6 @@ fn groupMembers( compress.deltaCompress(u32, scratch.items) catch |err| switch (err) { error.NotSorted => unreachable, }; - const total_members_len = blk: { - var sum: usize = 0; - for (members) |user_idx| - sum += corpus.users.get(user_idx).name.len; - break :blk @intCast(u32, sum); - }; - try compress.appendUvarint(&blob, total_members_len); try compress.appendUvarint(&blob, members.len); for (scratch.items) |elem| try compress.appendUvarint(&blob, elem); @@ -628,19 +598,19 @@ test "high-level API" { var db = try DB.fromCorpus(allocator, &corpus); defer db.deinit(allocator); - var arena = ArenaAllocator.init(allocator); - defer arena.deinit(); // TODO: should accept a buffer instead of an allocator instead. - const all = try db.getgrnam(arena.allocator(), "all"); + var buf = try allocator.alloc(u8, db.header.getgr_max); + defer allocator.free(buf); + const all = try db.getgrnam("all", &buf); try testing.expect(all != null); try testing.expectEqual(all.?.gid, 9999); try testing.expectEqualStrings(all.?.name, "all"); - const members = mem.sliceTo(all.?.members, null); - try testing.expectEqual(members.len, 4); + const members = all.?.members; try testing.expectEqualStrings(mem.sliceTo(members[0].?, 0), "Name" ** 8); try testing.expectEqualStrings(mem.sliceTo(members[1].?, 0), "root"); try testing.expectEqualStrings(mem.sliceTo(members[2].?, 0), "svc-bar"); try testing.expectEqualStrings(mem.sliceTo(members[3].?, 0), "vidmantas"); + try testing.expectEqual(members[4], null); } test "additionalGids" { diff --git a/lib/User.zig b/lib/User.zig index adab1be..db099ae 100644 --- a/lib/User.zig +++ b/lib/User.zig @@ -104,11 +104,9 @@ fn strlen(self: *const User) usize { } // length of all string-data fields, assuming they are zero-terminated. -// Includes one character for "password". +// Does not include password, since that's always static 'x\00'. pub fn strlenZ(self: *const User) usize { - return self.strlen() + - 4 + // '\0' of name, gecos, home and shell - 2; // password: 'x\0' + return self.strlen() + 4; // '\0' of name, gecos, home and shell } pub fn deinit(self: *User, allocator: Allocator) void {