wip groupmembers is 2-alloc away.

This commit is contained in:
Motiejus Jakštys 2022-04-08 15:05:56 +03:00 committed by Motiejus Jakštys
parent 0766175915
commit 7c41cbabe7
3 changed files with 67 additions and 31 deletions

View File

@ -274,20 +274,25 @@ Similarly, when user's groups are resolved in (2), they are not always necessary
(i.e. not part of `struct user*`), therefore the memberships themselves are (i.e. not part of `struct user*`), therefore the memberships themselves are
stored out of bound. stored out of bound.
`groupmembers` and `additional_gids` store group and user memberships respectively. `groupmembers` and `additional_gids` store group and user memberships
Membership IDs are packed — not necessitating random access, thus suitable for respectively. Membership IDs are packed — not necessitating random access, thus
compression. suitable for compression.
- `groupmembers` is a list of pointers (offsets) to User records, because - `groupmembers` consists of a number X followed by a list of offsets to User
`getgr*_r` returns pointers to membernames, thus a name has to be immediately records, because `getgr*_r` returns pointers to membernames, thus a name has
resolvable. to be immediately resolvable.
- `additional_gids` is a list of gids, because `initgroups_dyn` (and friends) returns - `additional_gids` is a list of gids, because `initgroups_dyn` (and friends)
an array of gids. returns an array of gids.
Each entry of `groupmembers` and `additional_gids` starts with a varint N, which is Each entry of `groupmembers` and `additional_gids` starts with a varint N,
the number of upcoming elements, followed by N delta-compressed varints. These which is the number of upcoming elements. Then depending on the type:
N delta-compressed varints are sorted the same way entries in `users` (in
`groupmembers`) and `groups`. - **additional_gids** stores N delta-compressed varints. These varints
correspond to a list of gids.
- **groupmembers** stores a total length of the member names, followed by N,
followed by N offsets. These are byte-offsets to the User records in the
`users` section. Having the length of membernames saves some CPU cycles when
decompressing the members in the hot path.
Indices Indices
------- -------

View File

@ -218,26 +218,38 @@ pub fn fromBytes(buf: []align(8) const u8) InvalidHeader!DB {
return result; return result;
} }
const GroupMemberNames = struct {
arr: [:null]const ?[*:0]const u8,
pub fn deinit(self: *GroupMemberNames, allocator: Allocator) void {
allocator.free(self.arr[0]);
allocator.free(self.arr);
}
};
// returns a list of group member names starting at the given offset of // returns a list of group member names starting at the given offset of
// groupmembers blob. // groupmembers blob.
fn groupMemberNames( fn groupMemberNames(
self: *const DB, self: *const DB,
allocator: Allocator, allocator: Allocator,
offset: u64, offset: u64,
) error{OutOfMemory}![:null]const [*:0]const u8 { ) error{OutOfMemory}!GroupMemberNames {
const v = compress.uvarintMust(self.groupmembers[offset..]);
const total_members_len = v.value;
offset += v.bytes_read;
var vit = compress.VarintSliceIteratorMust(self.groupmembers[offset..]); var vit = compress.VarintSliceIteratorMust(self.groupmembers[offset..]);
if (vit.remaining == 0) return null; const num_members = vit.remaining;
const total_members_len = vit.nextMust().?; if (num_members == 0) return null;
// TODO (zig 0.10+) make result type sentinel-aware and stop // TODO (zig 0.10+) make result type sentinel-aware and stop
// the terminating-null-pointer-dances. // the terminating-null-pointer-dancing.
var result = try allocator.alloc(?[:0]const u8, vit.remaining + 1); var result = try allocator.alloc(?[:0]const u8, num_members + 1);
errdefer allocator.free(result); errdefer allocator.free(result);
result.len = vit.remaining + 1; result.len = num_members + 1;
result[result.len] = null; result[num_members] = null;
result.len = vit.remaining; result.len = num_members;
var buf = try allocator.alloc(u8, total_members_len + vit.remaining); var buf = try allocator.alloc(u8, total_members_len + num_members);
errdefer allocator.free(buf); errdefer allocator.free(buf);
var it = compress.DeltaCompressionIterator(&vit); var it = compress.DeltaCompressionIterator(&vit);
var i: usize = 0; var i: usize = 0;
@ -250,28 +262,31 @@ fn groupMemberNames(
buf[buf.len - 1] = 0; buf[buf.len - 1] = 0;
result[i] = buf[old_len..buf.len]; result[i] = buf[old_len..buf.len];
} }
return result; return GroupMemberNames{ .arr = result };
} }
// getgrtnam returns a Group entry by name. The Group must be // getgrtnam returns a Group entry by name. The Group must be
// deinit'ed by caller. // deinit'ed by caller.
fn getgrnam(self: *const DB, allocator: Allocator, name: []const u8) ?Group { fn getgrnam(
const idx = bdz.search(self.bdz_groupname); self: *const DB,
allocator: Allocator,
name: []const u8,
) error{OutOfMemory}!?Group {
const idx = bdz.search(self.bdz_groupname, name);
const offset = self.idx_groupname2group[idx]; const offset = self.idx_groupname2group[idx];
const group = PackedGroup.fromBytes(self.groups[offset..]).group; const group = PackedGroup.fromBytes(self.groups[offset..]).group;
if (!mem.eql(name, group.name())) return null; if (!mem.eql(u8, name, group.name())) return null;
const namez = allocator.dupeZ(u8, name); var members = try self.groupMemberNames(allocator, group.members_offset);
errdefer members.deinit(allocator);
const namez = try allocator.dupeZ(u8, name);
errdefer allocator.free(namez); errdefer allocator.free(namez);
// this operation is the last in the function, so it doesn't have
// a complex errdefer to deallocate it if something later fails.
const members = try groupMemberNames(allocator, group.members_offset);
return Group{ return Group{
.name = namez, .name = namez,
.gid = group.gid(), .gid = group.gid(),
.members = members, .members = members.arr,
}; };
} }
@ -416,6 +431,13 @@ fn groupMembers(
compress.deltaCompress(u32, scratch.items) catch |err| switch (err) { compress.deltaCompress(u32, scratch.items) catch |err| switch (err) {
error.NotSorted => unreachable, error.NotSorted => unreachable,
}; };
const total_members_len = blk: {
var sum: u32 = 0;
for (members) |user_idx|
sum += @intCast(u32, corpus.users.get(user_idx).name.len);
break :blk sum;
};
try compress.appendUvarint(&blob, total_members_len);
try compress.appendUvarint(&blob, members.len); try compress.appendUvarint(&blob, members.len);
for (scratch.items) |elem| for (scratch.items) |elem|
try compress.appendUvarint(&blob, elem); try compress.appendUvarint(&blob, elem);
@ -590,6 +612,9 @@ test "high-level API" {
var db = try DB.fromCorpus(allocator, &corpus); var db = try DB.fromCorpus(allocator, &corpus);
defer db.deinit(allocator); defer db.deinit(allocator);
const all = try db.getgrnam(allocator, "all");
_ = all;
} }
test "additionalGids" { test "additionalGids" {

View File

@ -80,6 +80,12 @@ pub fn uvarint(buf: []const u8) error{Overflow}!Varint {
}; };
} }
pub fn uvarintMust(buf: []const u8) Varint {
return uvarint(buf) catch |err| switch (err) {
error.Overflow => unreachable,
};
}
// https://golang.org/pkg/encoding/binary/#PutUvarint // https://golang.org/pkg/encoding/binary/#PutUvarint
pub fn putUvarint(buf: []u8, x: u64) usize { pub fn putUvarint(buf: []u8, x: u64) usize {
var i: usize = 0; var i: usize = 0;