user packing

This commit is contained in:
Motiejus Jakštys 2022-02-19 11:35:29 +02:00 committed by Motiejus Jakštys
parent 93c6a1c12a
commit 13b75e8046
3 changed files with 118 additions and 50 deletions

View File

@ -67,7 +67,7 @@ Tight packing places some constraints on the underlying data:
- Maximum database size: 4GB. - Maximum database size: 4GB.
- Permitted length of username and groupname: 1-32 bytes. - Permitted length of username and groupname: 1-32 bytes.
- Permitted length of shell and homedir: 1-64 bytes. - Permitted length of shell and home: 1-64 bytes.
- Permitted comment ("gecos") length: 0-255 bytes. - Permitted comment ("gecos") length: 0-255 bytes.
- User name, groupname and gecos must be utf8-encoded. - User name, groupname and gecos must be utf8-encoded.
@ -219,11 +219,11 @@ const User = struct {
// pointer to a separate structure that contains a list of gids // pointer to a separate structure that contains a list of gids
additional_gids_offset: u29, additional_gids_offset: u29,
// shell is a different story, documented elsewhere. // shell is a different story, documented elsewhere.
shell_here: u1, shell_here: bool,
shell_len_or_idx: u6, shell_len_or_idx: u6,
homedir_len: u6, home_len: u6,
username_is_a_suffix: u1, name_is_a_suffix: bool,
username_offset_or_len: u5, name_len: u5,
gecos_len: u8, gecos_len: u8,
// a variable-sized array that will be stored immediately after this // a variable-sized array that will be stored immediately after this
// struct. // struct.
@ -232,27 +232,27 @@ const User = struct {
``` ```
`stringdata` contains a few string entries: `stringdata` contains a few string entries:
- homedir. - home.
- username. - name.
- gecos. - gecos.
- shell (optional). - shell (optional).
First byte of the homedir is stored right after the `gecos_len` field, and it's First byte of the home is stored right after the `gecos_len` field, and it's
length is `homedir_len`. The same logic applies to all the `stringdata` fields: length is `home_len`. The same logic applies to all the `stringdata` fields:
there is a way to calculate their relative position from the length of the there is a way to calculate their relative position from the length of the
fields before them. fields before them.
Additionally, two optimizations for special fields are made: Additionally, two optimizations for special fields are made:
- shells are often shared across different users, see the "Shells" section. - shells are often shared across different users, see the "Shells" section.
- username is frequently a suffix of the homedir. For example, `/home/motiejus` - name is frequently a suffix of the home. For example, `/home/motiejus`
and `motiejus`. In which case storing both username and homedir strings is and `motiejus`. In which case storing both name and home strings is
wasteful. For that cases, username has two options: wasteful. For that cases, name has two options:
1. `username_is_a_suffix=true`: username is a suffix of the home dir. In that 1. `name_is_a_suffix=true`: name is a suffix of the home dir. In that
case, the username starts at the `username_offset_or_len`'th byte of the case, the name starts at the `home_len - name_len`'th
homedir, and ends at the same place as the homedir. byte of the home, and ends at the same place as the home.
2. `username_is_a_suffix=false`: username is stored separately. In that case, 2. `name_is_a_suffix=false`: name is stored separately. In that case,
it begins one byte after homedir, and it's length is it begins one byte after home, and it's length is
`username_offset_or_len`. `name_len`.
Shells Shells
------ ------
@ -315,8 +315,7 @@ const AdditionalGids = PackedList;
An entry in `members` field points to the offset into a respective `User` or An entry in `members` field points to the offset into a respective `User` or
`Group` entry (number of bytes relative to the first entry of the type). `Group` entry (number of bytes relative to the first entry of the type).
`members` in `PackedList` is sorted by the name (`username` or `groupname`) of `members` in `PackedList` are sorted the same way as in the input.
the record it is pointing to.
A packed list is a list of varints. A packed list is a list of varints.

View File

@ -41,7 +41,7 @@ pub const ShellWriter = struct {
const ShellSections = struct { const ShellSections = struct {
index: BoundedArray(ShellIndex, MaxShells), index: BoundedArray(ShellIndex, MaxShells),
blob: BoundedArray(u8, MaxShells * MaxShellLen), blob: BoundedArray(u8, MaxShells * MaxShellLen),
indices: StringHashMap(u10), indices: StringHashMap(u6),
// initializes and populates shell sections. All strings are copied, // initializes and populates shell sections. All strings are copied,
// nothing is owned. // nothing is owned.
@ -52,10 +52,10 @@ pub const ShellWriter = struct {
var self = ShellSections{ var self = ShellSections{
.index = try BoundedArray(ShellIndex, MaxShells).init(shells.len), .index = try BoundedArray(ShellIndex, MaxShells).init(shells.len),
.blob = try BoundedArray(u8, MaxShells * MaxShellLen).init(0), .blob = try BoundedArray(u8, MaxShells * MaxShellLen).init(0),
.indices = StringHashMap(u10).init(allocator), .indices = StringHashMap(u6).init(allocator),
}; };
var fullOffset: u12 = 0; var fullOffset: u12 = 0;
var idx: u10 = 0; var idx: u6 = 0;
while (idx < shells.len) { while (idx < shells.len) {
const len = @intCast(u6, shells.get(idx).len); const len = @intCast(u6, shells.get(idx).len);
try self.blob.appendSlice(shells.get(idx)); try self.blob.appendSlice(shells.get(idx));
@ -90,7 +90,7 @@ pub const ShellWriter = struct {
self.* = undefined; self.* = undefined;
} }
pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u10 { pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u6 {
return self.indices.get(shell); return self.indices.get(shell);
} }
}; };

View File

@ -1,17 +1,18 @@
const std = @import("std"); const std = @import("std");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const cast = std.math.cast;
pub const PackedUserSize = @sizeOf(PackedUser); pub const PackedUserSize = @sizeOf(PackedUser);
pub const PackedUser = packed struct { pub const PackedUser = packed struct {
uid: u32, uid: u32,
gid: u32, gid: u32,
additional_gids_offset: u29, additional_gids_offset: u29,
shell_here: u1, shell_here: bool,
shell_len_or_idx: u6, shell_len_or_idx: u6,
homedir_len: u6, home_len: u6,
username_is_a_suffix: u1, name_is_a_suffix: bool,
username_offset_or_len: u5, name_len: u5,
gecos_len: u8, gecos_len: u8,
}; };
@ -31,7 +32,7 @@ pub const UserWriter = struct {
// string and returns it's index in the global shell section. Passing a // string and returns it's index in the global shell section. Passing a
// function makes tests easier, and removes the Shell dependency of this // function makes tests easier, and removes the Shell dependency of this
// module. // module.
const shellIndexFnType = fn ([]const u8) ?u10; const shellIndexFnType = fn ([]const u8) ?u6;
allocator: Allocator, allocator: Allocator,
shellIndexFn: shellIndexFnType, shellIndexFn: shellIndexFnType,
@ -42,30 +43,65 @@ pub const UserWriter = struct {
}; };
} }
pub fn fromUser(self: *UserWriter, user: User) !PackedUser { const fromUserErr = std.mem.Allocator.Error || error{InvalidRecord};
var shell_here: u1 = undefined;
var shell_len_or_idx: u6 = undefined; pub fn fromUser(self: *UserWriter, user: User) fromUserErr![]const u8 {
if (self.shellIndexFn(user.shell)) |idx| { const home_len = std.math.cast(u6, user.home.len - 1) catch return error.InvalidRecord;
shell_here = false; const name_len = cast(u5, user.name.len - 1) catch return error.InvalidRecord;
shell_len_or_idx = idx; const shell_len = cast(u6, user.shell.len - 1) catch return error.InvalidRecord;
} else { const gecos_len = cast(u8, user.gecos.len) catch return error.InvalidRecord;
shell_here = true; var bindata_len: u32 = home_len;
shell_len_or_idx = user.shell.len;
}
var puser = PackedUser{ var puser = PackedUser{
.uid = user.uid, .uid = @as(u32, user.uid),
.gid = user.gid, .gid = @as(u32, user.gid),
.additional_gids_offset = 0, // second pass .additional_gids_offset = std.math.maxInt(u29), // needs second pass
.shell_here = shell_here, .shell_here = undefined,
.shell_len_or_idx = shell_len_or_idx, .shell_len_or_idx = undefined,
.homedir_len = undefined, .home_len = home_len,
.username_is_a_suffix = undefined, .name_is_a_suffix = undefined,
.username_offset_or_len = undefined, .name_len = name_len,
.gecos_len = undefined, .gecos_len = gecos_len,
}; };
return puser; if (std.mem.endsWith(u8, user.home, user.name)) {
puser.name_is_a_suffix = true;
} else {
puser.name_is_a_suffix = false;
bindata_len += name_len;
}
bindata_len += gecos_len;
if (self.shellIndexFn(user.shell)) |idx| {
puser.shell_here = false;
puser.shell_len_or_idx = idx;
} else {
puser.shell_here = true;
puser.shell_len_or_idx = shell_len;
bindata_len += shell_len;
}
var result = try self.allocator.alloc(u8, PackedUserSize + bindata_len);
const userPointer = @ptrCast([*]const u8, &puser);
{
var i: u32 = 0;
while (i < PackedUserSize) {
result[i] = userPointer[i];
i += 1;
}
}
std.mem.copy(u8, result, user.home);
if (!puser.name_is_a_suffix) {
std.mem.copy(u8, result, user.name);
}
std.mem.copy(u8, result, user.gecos);
if (puser.shell_here) {
std.mem.copy(u8, result, user.shell);
}
return result;
} }
}; };
@ -74,3 +110,36 @@ const testing = std.testing;
test "PackedUser is byte-aligned" { test "PackedUser is byte-aligned" {
try testing.expectEqual(0, @rem(@bitSizeOf(PackedUser), 8)); try testing.expectEqual(0, @rem(@bitSizeOf(PackedUser), 8));
} }
fn testShellIndex(shell: []const u8) ?u6 {
if (std.mem.eql(u8, shell, "/bin/bash")) {
return 0;
} else if (std.mem.eql(u8, shell, "/bin/zsh")) {
return 1;
}
return null;
}
test "construct PackedUser blob" {
var writer = UserWriter.init(testing.allocator, testShellIndex);
const user1 = User{
.uid = 1000,
.gid = 1000,
.name = "vidmantas",
.gecos = "Vidmantas Kaminskas",
.home = "/home/vidmantas",
.shell = "/bin/bash",
};
const user2 = User{
.uid = 1001,
.gid = 1001,
.name = "svc-foo",
.gecos = "Service Account",
.home = "/home/service1",
.shell = "/usr/bin/nologin",
};
const puser1 = try writer.fromUser(user1);
const puser2 = try writer.fromUser(user2);
defer testing.allocator.free(puser1);
defer testing.allocator.free(puser2);
}