From 13b75e804689dca371119893e0834726d81629ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Sat, 19 Feb 2022 11:35:29 +0200 Subject: [PATCH] user packing --- README.md | 41 +++++++++-------- src/shell.zig | 8 ++-- src/user.zig | 119 +++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 118 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index d80fb19..6a31cef 100644 --- a/README.md +++ b/README.md @@ -67,9 +67,9 @@ Tight packing places some constraints on the underlying data: - Maximum database size: 4GB. - Permitted length of username and groupname: 1-32 bytes. -- Permitted length of shell and homedir: 1-64 bytes. +- Permitted length of shell and home: 1-64 bytes. - Permitted comment ("gecos") length: 0-255 bytes. -- Username, groupname and gecos must be utf8-encoded. +- User name, groupname and gecos must be utf8-encoded. Checking out and building ------------------------- @@ -219,11 +219,11 @@ const User = struct { // pointer to a separate structure that contains a list of gids additional_gids_offset: u29, // shell is a different story, documented elsewhere. - shell_here: u1, + shell_here: bool, shell_len_or_idx: u6, - homedir_len: u6, - username_is_a_suffix: u1, - username_offset_or_len: u5, + home_len: u6, + name_is_a_suffix: bool, + name_len: u5, gecos_len: u8, // a variable-sized array that will be stored immediately after this // struct. @@ -232,27 +232,27 @@ const User = struct { ``` `stringdata` contains a few string entries: -- homedir. -- username. +- home. +- name. - gecos. - shell (optional). -First byte of the homedir is stored right after the `gecos_len` field, and it's -length is `homedir_len`. The same logic applies to all the `stringdata` fields: +First byte of the home is stored right after the `gecos_len` field, and it's +length is `home_len`. The same logic applies to all the `stringdata` fields: there is a way to calculate their relative position from the length of the fields before them. Additionally, two optimizations for special fields are made: - shells are often shared across different users, see the "Shells" section. -- username is frequently a suffix of the homedir. For example, `/home/motiejus` - and `motiejus`. In which case storing both username and homedir strings is - wasteful. For that cases, username has two options: - 1. `username_is_a_suffix=true`: username is a suffix of the home dir. In that - case, the username starts at the `username_offset_or_len`'th byte of the - homedir, and ends at the same place as the homedir. - 2. `username_is_a_suffix=false`: username is stored separately. In that case, - it begins one byte after homedir, and it's length is - `username_offset_or_len`. +- name is frequently a suffix of the home. For example, `/home/motiejus` + and `motiejus`. In which case storing both name and home strings is + wasteful. For that cases, name has two options: + 1. `name_is_a_suffix=true`: name is a suffix of the home dir. In that + case, the name starts at the `home_len - name_len`'th + byte of the home, and ends at the same place as the home. + 2. `name_is_a_suffix=false`: name is stored separately. In that case, + it begins one byte after home, and it's length is + `name_len`. Shells ------ @@ -315,8 +315,7 @@ const AdditionalGids = PackedList; An entry in `members` field points to the offset into a respective `User` or `Group` entry (number of bytes relative to the first entry of the type). -`members` in `PackedList` is sorted by the name (`username` or `groupname`) of -the record it is pointing to. +`members` in `PackedList` are sorted the same way as in the input. A packed list is a list of varints. diff --git a/src/shell.zig b/src/shell.zig index 2d1c5b8..0534d6d 100644 --- a/src/shell.zig +++ b/src/shell.zig @@ -41,7 +41,7 @@ pub const ShellWriter = struct { const ShellSections = struct { index: BoundedArray(ShellIndex, MaxShells), blob: BoundedArray(u8, MaxShells * MaxShellLen), - indices: StringHashMap(u10), + indices: StringHashMap(u6), // initializes and populates shell sections. All strings are copied, // nothing is owned. @@ -52,10 +52,10 @@ pub const ShellWriter = struct { var self = ShellSections{ .index = try BoundedArray(ShellIndex, MaxShells).init(shells.len), .blob = try BoundedArray(u8, MaxShells * MaxShellLen).init(0), - .indices = StringHashMap(u10).init(allocator), + .indices = StringHashMap(u6).init(allocator), }; var fullOffset: u12 = 0; - var idx: u10 = 0; + var idx: u6 = 0; while (idx < shells.len) { const len = @intCast(u6, shells.get(idx).len); try self.blob.appendSlice(shells.get(idx)); @@ -90,7 +90,7 @@ pub const ShellWriter = struct { self.* = undefined; } - pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u10 { + pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u6 { return self.indices.get(shell); } }; diff --git a/src/user.zig b/src/user.zig index d4f264f..c02b4b2 100644 --- a/src/user.zig +++ b/src/user.zig @@ -1,17 +1,18 @@ const std = @import("std"); const Allocator = std.mem.Allocator; +const cast = std.math.cast; pub const PackedUserSize = @sizeOf(PackedUser); pub const PackedUser = packed struct { uid: u32, gid: u32, additional_gids_offset: u29, - shell_here: u1, + shell_here: bool, shell_len_or_idx: u6, - homedir_len: u6, - username_is_a_suffix: u1, - username_offset_or_len: u5, + home_len: u6, + name_is_a_suffix: bool, + name_len: u5, gecos_len: u8, }; @@ -31,7 +32,7 @@ pub const UserWriter = struct { // string and returns it's index in the global shell section. Passing a // function makes tests easier, and removes the Shell dependency of this // module. - const shellIndexFnType = fn ([]const u8) ?u10; + const shellIndexFnType = fn ([]const u8) ?u6; allocator: Allocator, shellIndexFn: shellIndexFnType, @@ -42,30 +43,65 @@ pub const UserWriter = struct { }; } - pub fn fromUser(self: *UserWriter, user: User) !PackedUser { - var shell_here: u1 = undefined; - var shell_len_or_idx: u6 = undefined; - if (self.shellIndexFn(user.shell)) |idx| { - shell_here = false; - shell_len_or_idx = idx; - } else { - shell_here = true; - shell_len_or_idx = user.shell.len; - } + const fromUserErr = std.mem.Allocator.Error || error{InvalidRecord}; + + pub fn fromUser(self: *UserWriter, user: User) fromUserErr![]const u8 { + const home_len = std.math.cast(u6, user.home.len - 1) catch return error.InvalidRecord; + const name_len = cast(u5, user.name.len - 1) catch return error.InvalidRecord; + const shell_len = cast(u6, user.shell.len - 1) catch return error.InvalidRecord; + const gecos_len = cast(u8, user.gecos.len) catch return error.InvalidRecord; + var bindata_len: u32 = home_len; var puser = PackedUser{ - .uid = user.uid, - .gid = user.gid, - .additional_gids_offset = 0, // second pass - .shell_here = shell_here, - .shell_len_or_idx = shell_len_or_idx, - .homedir_len = undefined, - .username_is_a_suffix = undefined, - .username_offset_or_len = undefined, - .gecos_len = undefined, + .uid = @as(u32, user.uid), + .gid = @as(u32, user.gid), + .additional_gids_offset = std.math.maxInt(u29), // needs second pass + .shell_here = undefined, + .shell_len_or_idx = undefined, + .home_len = home_len, + .name_is_a_suffix = undefined, + .name_len = name_len, + .gecos_len = gecos_len, }; - return puser; + if (std.mem.endsWith(u8, user.home, user.name)) { + puser.name_is_a_suffix = true; + } else { + puser.name_is_a_suffix = false; + bindata_len += name_len; + } + + bindata_len += gecos_len; + + if (self.shellIndexFn(user.shell)) |idx| { + puser.shell_here = false; + puser.shell_len_or_idx = idx; + } else { + puser.shell_here = true; + puser.shell_len_or_idx = shell_len; + bindata_len += shell_len; + } + + var result = try self.allocator.alloc(u8, PackedUserSize + bindata_len); + + const userPointer = @ptrCast([*]const u8, &puser); + { + var i: u32 = 0; + while (i < PackedUserSize) { + result[i] = userPointer[i]; + i += 1; + } + } + std.mem.copy(u8, result, user.home); + if (!puser.name_is_a_suffix) { + std.mem.copy(u8, result, user.name); + } + std.mem.copy(u8, result, user.gecos); + if (puser.shell_here) { + std.mem.copy(u8, result, user.shell); + } + + return result; } }; @@ -74,3 +110,36 @@ const testing = std.testing; test "PackedUser is byte-aligned" { try testing.expectEqual(0, @rem(@bitSizeOf(PackedUser), 8)); } + +fn testShellIndex(shell: []const u8) ?u6 { + if (std.mem.eql(u8, shell, "/bin/bash")) { + return 0; + } else if (std.mem.eql(u8, shell, "/bin/zsh")) { + return 1; + } + return null; +} + +test "construct PackedUser blob" { + var writer = UserWriter.init(testing.allocator, testShellIndex); + const user1 = User{ + .uid = 1000, + .gid = 1000, + .name = "vidmantas", + .gecos = "Vidmantas Kaminskas", + .home = "/home/vidmantas", + .shell = "/bin/bash", + }; + const user2 = User{ + .uid = 1001, + .gid = 1001, + .name = "svc-foo", + .gecos = "Service Account", + .home = "/home/service1", + .shell = "/usr/bin/nologin", + }; + const puser1 = try writer.fromUser(user1); + const puser2 = try writer.fromUser(user2); + defer testing.allocator.free(puser1); + defer testing.allocator.free(puser2); +}