Fork 0

user packing

Motiejus Jakštys 2022-02-19 11:35:29 +02:00 committed by Motiejus Jakštys
parent 93c6a1c12a
commit 13b75e8046
3 changed files with 118 additions and 50 deletions

View File

@ -67,7 +67,7 @@ Tight packing places some constraints on the underlying data:
- Maximum database size: 4GB.
- Permitted length of username and groupname: 1-32 bytes.
- Permitted length of shell and homedir: 1-64 bytes.
- Permitted length of shell and home: 1-64 bytes.
- Permitted comment ("gecos") length: 0-255 bytes.
- User name, groupname and gecos must be utf8-encoded.
@ -219,11 +219,11 @@ const User = struct {
// pointer to a separate structure that contains a list of gids
additional_gids_offset: u29,
// shell is a different story, documented elsewhere.
shell_here: u1,
shell_here: bool,
shell_len_or_idx: u6,
homedir_len: u6,
username_is_a_suffix: u1,
username_offset_or_len: u5,
home_len: u6,
name_is_a_suffix: bool,
name_len: u5,
gecos_len: u8,
// a variable-sized array that will be stored immediately after this
// struct.
@ -232,27 +232,27 @@ const User = struct {
`stringdata` contains a few string entries:
- homedir.
- username.
- home.
- name.
- gecos.
- shell (optional).
First byte of the homedir is stored right after the `gecos_len` field, and it's
length is `homedir_len`. The same logic applies to all the `stringdata` fields:
First byte of the home is stored right after the `gecos_len` field, and it's
length is `home_len`. The same logic applies to all the `stringdata` fields:
there is a way to calculate their relative position from the length of the
fields before them.
Additionally, two optimizations for special fields are made:
- shells are often shared across different users, see the "Shells" section.
- username is frequently a suffix of the homedir. For example, `/home/motiejus`
and `motiejus`. In which case storing both username and homedir strings is
wasteful. For that cases, username has two options:
1. `username_is_a_suffix=true`: username is a suffix of the home dir. In that
case, the username starts at the `username_offset_or_len`'th byte of the
homedir, and ends at the same place as the homedir.
2. `username_is_a_suffix=false`: username is stored separately. In that case,
it begins one byte after homedir, and it's length is
- name is frequently a suffix of the home. For example, `/home/motiejus`
and `motiejus`. In which case storing both name and home strings is
wasteful. For that cases, name has two options:
1. `name_is_a_suffix=true`: name is a suffix of the home dir. In that
case, the name starts at the `home_len - name_len`'th
byte of the home, and ends at the same place as the home.
2. `name_is_a_suffix=false`: name is stored separately. In that case,
it begins one byte after home, and it's length is
@ -315,8 +315,7 @@ const AdditionalGids = PackedList;
An entry in `members` field points to the offset into a respective `User` or
`Group` entry (number of bytes relative to the first entry of the type).
`members` in `PackedList` is sorted by the name (`username` or `groupname`) of
the record it is pointing to.
`members` in `PackedList` are sorted the same way as in the input.
A packed list is a list of varints.

View File

@ -41,7 +41,7 @@ pub const ShellWriter = struct {
const ShellSections = struct {
index: BoundedArray(ShellIndex, MaxShells),
blob: BoundedArray(u8, MaxShells * MaxShellLen),
indices: StringHashMap(u10),
indices: StringHashMap(u6),
// initializes and populates shell sections. All strings are copied,
// nothing is owned.
@ -52,10 +52,10 @@ pub const ShellWriter = struct {
var self = ShellSections{
.index = try BoundedArray(ShellIndex, MaxShells).init(shells.len),
.blob = try BoundedArray(u8, MaxShells * MaxShellLen).init(0),
.indices = StringHashMap(u10).init(allocator),
.indices = StringHashMap(u6).init(allocator),
var fullOffset: u12 = 0;
var idx: u10 = 0;
var idx: u6 = 0;
while (idx < shells.len) {
const len = @intCast(u6, shells.get(idx).len);
try self.blob.appendSlice(shells.get(idx));
@ -90,7 +90,7 @@ pub const ShellWriter = struct {
self.* = undefined;
pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u10 {
pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u6 {
return self.indices.get(shell);

View File

@ -1,17 +1,18 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const cast = std.math.cast;
pub const PackedUserSize = @sizeOf(PackedUser);
pub const PackedUser = packed struct {
uid: u32,
gid: u32,
additional_gids_offset: u29,
shell_here: u1,
shell_here: bool,
shell_len_or_idx: u6,
homedir_len: u6,
username_is_a_suffix: u1,
username_offset_or_len: u5,
home_len: u6,
name_is_a_suffix: bool,
name_len: u5,
gecos_len: u8,
@ -31,7 +32,7 @@ pub const UserWriter = struct {
// string and returns it's index in the global shell section. Passing a
// function makes tests easier, and removes the Shell dependency of this
// module.
const shellIndexFnType = fn ([]const u8) ?u10;
const shellIndexFnType = fn ([]const u8) ?u6;
allocator: Allocator,
shellIndexFn: shellIndexFnType,
@ -42,30 +43,65 @@ pub const UserWriter = struct {
pub fn fromUser(self: *UserWriter, user: User) !PackedUser {
var shell_here: u1 = undefined;
var shell_len_or_idx: u6 = undefined;
if (self.shellIndexFn(user.shell)) |idx| {
shell_here = false;
shell_len_or_idx = idx;
} else {
shell_here = true;
shell_len_or_idx = user.shell.len;
const fromUserErr = std.mem.Allocator.Error || error{InvalidRecord};
pub fn fromUser(self: *UserWriter, user: User) fromUserErr![]const u8 {
const home_len = std.math.cast(u6, user.home.len - 1) catch return error.InvalidRecord;
const name_len = cast(u5, user.name.len - 1) catch return error.InvalidRecord;
const shell_len = cast(u6, user.shell.len - 1) catch return error.InvalidRecord;
const gecos_len = cast(u8, user.gecos.len) catch return error.InvalidRecord;
var bindata_len: u32 = home_len;
var puser = PackedUser{
.uid = user.uid,
.gid = user.gid,
.additional_gids_offset = 0, // second pass
.shell_here = shell_here,
.shell_len_or_idx = shell_len_or_idx,
.homedir_len = undefined,
.username_is_a_suffix = undefined,
.username_offset_or_len = undefined,
.gecos_len = undefined,
.uid = @as(u32, user.uid),
.gid = @as(u32, user.gid),
.additional_gids_offset = std.math.maxInt(u29), // needs second pass
.shell_here = undefined,
.shell_len_or_idx = undefined,
.home_len = home_len,
.name_is_a_suffix = undefined,
.name_len = name_len,
.gecos_len = gecos_len,
return puser;
if (std.mem.endsWith(u8, user.home, user.name)) {
puser.name_is_a_suffix = true;
} else {
puser.name_is_a_suffix = false;
bindata_len += name_len;
bindata_len += gecos_len;
if (self.shellIndexFn(user.shell)) |idx| {
puser.shell_here = false;
puser.shell_len_or_idx = idx;
} else {
puser.shell_here = true;
puser.shell_len_or_idx = shell_len;
bindata_len += shell_len;
var result = try self.allocator.alloc(u8, PackedUserSize + bindata_len);
const userPointer = @ptrCast([*]const u8, &puser);
var i: u32 = 0;
while (i < PackedUserSize) {
result[i] = userPointer[i];
i += 1;
std.mem.copy(u8, result, user.home);
if (!puser.name_is_a_suffix) {
std.mem.copy(u8, result, user.name);
std.mem.copy(u8, result, user.gecos);
if (puser.shell_here) {
std.mem.copy(u8, result, user.shell);
return result;
@ -74,3 +110,36 @@ const testing = std.testing;
test "PackedUser is byte-aligned" {
try testing.expectEqual(0, @rem(@bitSizeOf(PackedUser), 8));
fn testShellIndex(shell: []const u8) ?u6 {
if (std.mem.eql(u8, shell, "/bin/bash")) {
return 0;
} else if (std.mem.eql(u8, shell, "/bin/zsh")) {
return 1;
return null;
test "construct PackedUser blob" {
var writer = UserWriter.init(testing.allocator, testShellIndex);
const user1 = User{
.uid = 1000,
.gid = 1000,
.name = "vidmantas",
.gecos = "Vidmantas Kaminskas",
.home = "/home/vidmantas",
.shell = "/bin/bash",
const user2 = User{
.uid = 1001,
.gid = 1001,
.name = "svc-foo",
.gecos = "Service Account",
.home = "/home/service1",
.shell = "/usr/bin/nologin",
const puser1 = try writer.fromUser(user1);
const puser2 = try writer.fromUser(user2);
defer testing.allocator.free(puser1);
defer testing.allocator.free(puser2);