user packing
This commit is contained in:
parent
93c6a1c12a
commit
13b75e8046
39
README.md
39
README.md
|
@ -67,7 +67,7 @@ Tight packing places some constraints on the underlying data:
|
|||
|
||||
- Maximum database size: 4GB.
|
||||
- Permitted length of username and groupname: 1-32 bytes.
|
||||
- Permitted length of shell and homedir: 1-64 bytes.
|
||||
- Permitted length of shell and home: 1-64 bytes.
|
||||
- Permitted comment ("gecos") length: 0-255 bytes.
|
||||
- User name, groupname and gecos must be utf8-encoded.
|
||||
|
||||
|
@ -219,11 +219,11 @@ const User = struct {
|
|||
// pointer to a separate structure that contains a list of gids
|
||||
additional_gids_offset: u29,
|
||||
// shell is a different story, documented elsewhere.
|
||||
shell_here: u1,
|
||||
shell_here: bool,
|
||||
shell_len_or_idx: u6,
|
||||
homedir_len: u6,
|
||||
username_is_a_suffix: u1,
|
||||
username_offset_or_len: u5,
|
||||
home_len: u6,
|
||||
name_is_a_suffix: bool,
|
||||
name_len: u5,
|
||||
gecos_len: u8,
|
||||
// a variable-sized array that will be stored immediately after this
|
||||
// struct.
|
||||
|
@ -232,27 +232,27 @@ const User = struct {
|
|||
```
|
||||
|
||||
`stringdata` contains a few string entries:
|
||||
- homedir.
|
||||
- username.
|
||||
- home.
|
||||
- name.
|
||||
- gecos.
|
||||
- shell (optional).
|
||||
|
||||
First byte of the homedir is stored right after the `gecos_len` field, and it's
|
||||
length is `homedir_len`. The same logic applies to all the `stringdata` fields:
|
||||
First byte of the home is stored right after the `gecos_len` field, and it's
|
||||
length is `home_len`. The same logic applies to all the `stringdata` fields:
|
||||
there is a way to calculate their relative position from the length of the
|
||||
fields before them.
|
||||
|
||||
Additionally, two optimizations for special fields are made:
|
||||
- shells are often shared across different users, see the "Shells" section.
|
||||
- username is frequently a suffix of the homedir. For example, `/home/motiejus`
|
||||
and `motiejus`. In which case storing both username and homedir strings is
|
||||
wasteful. For that cases, username has two options:
|
||||
1. `username_is_a_suffix=true`: username is a suffix of the home dir. In that
|
||||
case, the username starts at the `username_offset_or_len`'th byte of the
|
||||
homedir, and ends at the same place as the homedir.
|
||||
2. `username_is_a_suffix=false`: username is stored separately. In that case,
|
||||
it begins one byte after homedir, and it's length is
|
||||
`username_offset_or_len`.
|
||||
- name is frequently a suffix of the home. For example, `/home/motiejus`
|
||||
and `motiejus`. In which case storing both name and home strings is
|
||||
wasteful. For that cases, name has two options:
|
||||
1. `name_is_a_suffix=true`: name is a suffix of the home dir. In that
|
||||
case, the name starts at the `home_len - name_len`'th
|
||||
byte of the home, and ends at the same place as the home.
|
||||
2. `name_is_a_suffix=false`: name is stored separately. In that case,
|
||||
it begins one byte after home, and it's length is
|
||||
`name_len`.
|
||||
|
||||
Shells
|
||||
------
|
||||
|
@ -315,8 +315,7 @@ const AdditionalGids = PackedList;
|
|||
|
||||
An entry in `members` field points to the offset into a respective `User` or
|
||||
`Group` entry (number of bytes relative to the first entry of the type).
|
||||
`members` in `PackedList` is sorted by the name (`username` or `groupname`) of
|
||||
the record it is pointing to.
|
||||
`members` in `PackedList` are sorted the same way as in the input.
|
||||
|
||||
A packed list is a list of varints.
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ pub const ShellWriter = struct {
|
|||
const ShellSections = struct {
|
||||
index: BoundedArray(ShellIndex, MaxShells),
|
||||
blob: BoundedArray(u8, MaxShells * MaxShellLen),
|
||||
indices: StringHashMap(u10),
|
||||
indices: StringHashMap(u6),
|
||||
|
||||
// initializes and populates shell sections. All strings are copied,
|
||||
// nothing is owned.
|
||||
|
@ -52,10 +52,10 @@ pub const ShellWriter = struct {
|
|||
var self = ShellSections{
|
||||
.index = try BoundedArray(ShellIndex, MaxShells).init(shells.len),
|
||||
.blob = try BoundedArray(u8, MaxShells * MaxShellLen).init(0),
|
||||
.indices = StringHashMap(u10).init(allocator),
|
||||
.indices = StringHashMap(u6).init(allocator),
|
||||
};
|
||||
var fullOffset: u12 = 0;
|
||||
var idx: u10 = 0;
|
||||
var idx: u6 = 0;
|
||||
while (idx < shells.len) {
|
||||
const len = @intCast(u6, shells.get(idx).len);
|
||||
try self.blob.appendSlice(shells.get(idx));
|
||||
|
@ -90,7 +90,7 @@ pub const ShellWriter = struct {
|
|||
self.* = undefined;
|
||||
}
|
||||
|
||||
pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u10 {
|
||||
pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u6 {
|
||||
return self.indices.get(shell);
|
||||
}
|
||||
};
|
||||
|
|
119
src/user.zig
119
src/user.zig
|
@ -1,17 +1,18 @@
|
|||
const std = @import("std");
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
const cast = std.math.cast;
|
||||
|
||||
pub const PackedUserSize = @sizeOf(PackedUser);
|
||||
pub const PackedUser = packed struct {
|
||||
uid: u32,
|
||||
gid: u32,
|
||||
additional_gids_offset: u29,
|
||||
shell_here: u1,
|
||||
shell_here: bool,
|
||||
shell_len_or_idx: u6,
|
||||
homedir_len: u6,
|
||||
username_is_a_suffix: u1,
|
||||
username_offset_or_len: u5,
|
||||
home_len: u6,
|
||||
name_is_a_suffix: bool,
|
||||
name_len: u5,
|
||||
gecos_len: u8,
|
||||
};
|
||||
|
||||
|
@ -31,7 +32,7 @@ pub const UserWriter = struct {
|
|||
// string and returns it's index in the global shell section. Passing a
|
||||
// function makes tests easier, and removes the Shell dependency of this
|
||||
// module.
|
||||
const shellIndexFnType = fn ([]const u8) ?u10;
|
||||
const shellIndexFnType = fn ([]const u8) ?u6;
|
||||
allocator: Allocator,
|
||||
shellIndexFn: shellIndexFnType,
|
||||
|
||||
|
@ -42,30 +43,65 @@ pub const UserWriter = struct {
|
|||
};
|
||||
}
|
||||
|
||||
pub fn fromUser(self: *UserWriter, user: User) !PackedUser {
|
||||
var shell_here: u1 = undefined;
|
||||
var shell_len_or_idx: u6 = undefined;
|
||||
if (self.shellIndexFn(user.shell)) |idx| {
|
||||
shell_here = false;
|
||||
shell_len_or_idx = idx;
|
||||
} else {
|
||||
shell_here = true;
|
||||
shell_len_or_idx = user.shell.len;
|
||||
}
|
||||
const fromUserErr = std.mem.Allocator.Error || error{InvalidRecord};
|
||||
|
||||
pub fn fromUser(self: *UserWriter, user: User) fromUserErr![]const u8 {
|
||||
const home_len = std.math.cast(u6, user.home.len - 1) catch return error.InvalidRecord;
|
||||
const name_len = cast(u5, user.name.len - 1) catch return error.InvalidRecord;
|
||||
const shell_len = cast(u6, user.shell.len - 1) catch return error.InvalidRecord;
|
||||
const gecos_len = cast(u8, user.gecos.len) catch return error.InvalidRecord;
|
||||
var bindata_len: u32 = home_len;
|
||||
|
||||
var puser = PackedUser{
|
||||
.uid = user.uid,
|
||||
.gid = user.gid,
|
||||
.additional_gids_offset = 0, // second pass
|
||||
.shell_here = shell_here,
|
||||
.shell_len_or_idx = shell_len_or_idx,
|
||||
.homedir_len = undefined,
|
||||
.username_is_a_suffix = undefined,
|
||||
.username_offset_or_len = undefined,
|
||||
.gecos_len = undefined,
|
||||
.uid = @as(u32, user.uid),
|
||||
.gid = @as(u32, user.gid),
|
||||
.additional_gids_offset = std.math.maxInt(u29), // needs second pass
|
||||
.shell_here = undefined,
|
||||
.shell_len_or_idx = undefined,
|
||||
.home_len = home_len,
|
||||
.name_is_a_suffix = undefined,
|
||||
.name_len = name_len,
|
||||
.gecos_len = gecos_len,
|
||||
};
|
||||
|
||||
return puser;
|
||||
if (std.mem.endsWith(u8, user.home, user.name)) {
|
||||
puser.name_is_a_suffix = true;
|
||||
} else {
|
||||
puser.name_is_a_suffix = false;
|
||||
bindata_len += name_len;
|
||||
}
|
||||
|
||||
bindata_len += gecos_len;
|
||||
|
||||
if (self.shellIndexFn(user.shell)) |idx| {
|
||||
puser.shell_here = false;
|
||||
puser.shell_len_or_idx = idx;
|
||||
} else {
|
||||
puser.shell_here = true;
|
||||
puser.shell_len_or_idx = shell_len;
|
||||
bindata_len += shell_len;
|
||||
}
|
||||
|
||||
var result = try self.allocator.alloc(u8, PackedUserSize + bindata_len);
|
||||
|
||||
const userPointer = @ptrCast([*]const u8, &puser);
|
||||
{
|
||||
var i: u32 = 0;
|
||||
while (i < PackedUserSize) {
|
||||
result[i] = userPointer[i];
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
std.mem.copy(u8, result, user.home);
|
||||
if (!puser.name_is_a_suffix) {
|
||||
std.mem.copy(u8, result, user.name);
|
||||
}
|
||||
std.mem.copy(u8, result, user.gecos);
|
||||
if (puser.shell_here) {
|
||||
std.mem.copy(u8, result, user.shell);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -74,3 +110,36 @@ const testing = std.testing;
|
|||
test "PackedUser is byte-aligned" {
|
||||
try testing.expectEqual(0, @rem(@bitSizeOf(PackedUser), 8));
|
||||
}
|
||||
|
||||
fn testShellIndex(shell: []const u8) ?u6 {
|
||||
if (std.mem.eql(u8, shell, "/bin/bash")) {
|
||||
return 0;
|
||||
} else if (std.mem.eql(u8, shell, "/bin/zsh")) {
|
||||
return 1;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
test "construct PackedUser blob" {
|
||||
var writer = UserWriter.init(testing.allocator, testShellIndex);
|
||||
const user1 = User{
|
||||
.uid = 1000,
|
||||
.gid = 1000,
|
||||
.name = "vidmantas",
|
||||
.gecos = "Vidmantas Kaminskas",
|
||||
.home = "/home/vidmantas",
|
||||
.shell = "/bin/bash",
|
||||
};
|
||||
const user2 = User{
|
||||
.uid = 1001,
|
||||
.gid = 1001,
|
||||
.name = "svc-foo",
|
||||
.gecos = "Service Account",
|
||||
.home = "/home/service1",
|
||||
.shell = "/usr/bin/nologin",
|
||||
};
|
||||
const puser1 = try writer.fromUser(user1);
|
||||
const puser2 = try writer.fromUser(user2);
|
||||
defer testing.allocator.free(puser1);
|
||||
defer testing.allocator.free(puser2);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue