1
Fork 0
turbonss/src/DB.zig

952 lines
33 KiB
Zig

const std = @import("std");
const os = std.os;
const mem = std.mem;
const math = std.math;
const meta = std.meta;
const sort = std.sort;
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const ArrayListAligned = std.ArrayListAligned;
const AutoHashMap = std.AutoHashMap;
const BoundedArray = std.BoundedArray;
const ErrCtx = @import("ErrCtx.zig");
const Corpus = @import("Corpus.zig");
const compress = @import("compress.zig");
const Group = @import("Group.zig");
const CGroup = Group.CGroup;
const PackedGroup = @import("PackedGroup.zig");
const GroupStored = PackedGroup.GroupStored;
const User = @import("User.zig");
const CUser = User.CUser;
const PackedUser = @import("PackedUser.zig");
const ShellSections = @import("shell.zig").ShellWriter.ShellSections;
const ShellReader = @import("shell.zig").ShellReader;
const ShellWriter = @import("shell.zig").ShellWriter;
const InvalidHeader = @import("header.zig").Invalid;
const Header = @import("header.zig").Header;
const max_shells = @import("shell.zig").max_shells;
const section_length_bits = @import("header.zig").section_length_bits;
const section_length = @import("header.zig").section_length;
const cmph = @import("cmph.zig");
const bdz = @import("bdz.zig");
const zeroes = &[_]u8{0} ** section_length;
const DB = @This();
// All sections, as they end up in the DB. Order is important.
header: *const Header,
bdz_gid: []align(8) const u8,
bdz_groupname: []align(8) const u8,
bdz_uid: []align(8) const u8,
bdz_username: []align(8) const u8,
idx_gid2group: []align(8) const u32,
idx_groupname2group: []align(8) const u32,
idx_uid2user: []align(8) const u32,
idx_name2user: []align(8) const u32,
shell_index: []align(8) const u16,
shell_blob: []align(8) const u8,
groups: []align(8) const u8,
users: []align(8) const u8,
groupmembers: []align(8) const u8,
additional_gids: []align(8) const u8,
pub fn fromCorpus(
allocator: Allocator,
corpus: *const Corpus,
err: *ErrCtx,
) error{ OutOfMemory, InvalidRecord, TooMany }!DB {
_ = err;
const gids = corpus.groups.items(.gid);
const gnames = corpus.groups.items(.name);
const uids = corpus.users.items(.uid);
const unames = corpus.users.items(.name);
const bdz_gid = try cmph.packU32(allocator, gids);
errdefer allocator.free(bdz_gid);
const bdz_groupname = try cmph.packStr(allocator, gnames);
errdefer allocator.free(bdz_groupname);
const bdz_uid = try cmph.packU32(allocator, uids);
errdefer allocator.free(bdz_uid);
const bdz_username = try cmph.packStr(allocator, unames);
errdefer allocator.free(bdz_username);
var shell = try shellSections(allocator, corpus);
defer shell.deinit();
const shell_index = try allocator.alignedAlloc(u16, 8, shell.index.len);
mem.copy(u16, shell_index, shell.index.constSlice());
errdefer allocator.free(shell_index);
const shell_blob = try allocator.alignedAlloc(u8, 8, shell.blob.len);
mem.copy(u8, shell_blob, shell.blob.constSlice());
errdefer allocator.free(shell_blob);
const additional_gids = try additionalGids(allocator, corpus);
errdefer allocator.free(additional_gids.blob);
defer allocator.free(additional_gids.idx2offset);
const users = try usersSection(allocator, corpus, &additional_gids, &shell);
errdefer allocator.free(users.blob);
defer allocator.free(users.idx2offset);
const groupmembers = try groupMembers(allocator, corpus, users.idx2offset);
errdefer allocator.free(groupmembers.blob);
defer allocator.free(groupmembers.idx2offset);
const groups = try groupsSection(allocator, corpus, groupmembers.idx2offset);
errdefer allocator.free(groups.blob);
defer allocator.free(groups.idx2offset);
const idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids, groups.idx2offset);
errdefer allocator.free(idx_gid2group);
const idx_groupname2group = try bdzIdx([]const u8, allocator, bdz_groupname, gnames, groups.idx2offset);
errdefer allocator.free(idx_groupname2group);
const idx_uid2user = try bdzIdx(u32, allocator, bdz_uid, uids, users.idx2offset);
errdefer allocator.free(idx_uid2user);
const idx_name2user = try bdzIdx([]const u8, allocator, bdz_username, unames, users.idx2offset);
errdefer allocator.free(idx_name2user);
const header = try allocator.create(Header);
errdefer allocator.destroy(header);
header.* = Header{
.nblocks_shell_blob = nblocks(u8, shell.blob.constSlice()),
.num_shells = shell.len,
.num_groups = groups.len,
.num_users = users.len,
.nblocks_bdz_gid = nblocks(u32, bdz_gid),
.nblocks_bdz_groupname = nblocks(u32, bdz_groupname),
.nblocks_bdz_uid = nblocks(u32, bdz_uid),
.nblocks_bdz_username = nblocks(u32, bdz_username),
.nblocks_groups = nblocks(u64, groups.blob),
.nblocks_users = nblocks(u64, users.blob),
.nblocks_groupmembers = nblocks(u64, groupmembers.blob),
.nblocks_additional_gids = nblocks(u64, additional_gids.blob),
.getgr_bufsize = corpus.getgr_bufsize,
.getpw_bufsize = corpus.getpw_bufsize,
};
return DB{
.header = header,
.bdz_gid = bdz_gid,
.bdz_groupname = bdz_groupname,
.bdz_uid = bdz_uid,
.bdz_username = bdz_username,
.idx_gid2group = idx_gid2group,
.idx_groupname2group = idx_groupname2group,
.idx_uid2user = idx_uid2user,
.idx_name2user = idx_name2user,
.shell_index = shell_index,
.shell_blob = shell_blob,
.groups = groups.blob,
.users = users.blob,
.groupmembers = groupmembers.blob,
.additional_gids = additional_gids.blob,
};
}
pub fn getgrBufsize(self: *const DB) usize {
return self.header.getgr_bufsize;
}
pub fn getpwBufsize(self: *const DB) usize {
return self.header.getpw_bufsize;
}
pub fn deinit(self: *DB, allocator: Allocator) void {
allocator.destroy(self.header);
allocator.free(self.bdz_gid);
allocator.free(self.bdz_groupname);
allocator.free(self.bdz_uid);
allocator.free(self.bdz_username);
allocator.free(self.idx_gid2group);
allocator.free(self.idx_groupname2group);
allocator.free(self.idx_uid2user);
allocator.free(self.idx_name2user);
allocator.free(self.shell_index);
allocator.free(self.shell_blob);
allocator.free(self.groups);
allocator.free(self.users);
allocator.free(self.groupmembers);
allocator.free(self.additional_gids);
self.* = undefined;
}
const DB_fields = meta.fields(DB);
pub fn iov(self: *align(8) const DB) BoundedArray(os.iovec_const, DB_fields.len * 2) {
var result = BoundedArray(os.iovec_const, DB_fields.len * 2).init(0) catch unreachable;
inline for (DB_fields) |field| {
comptime assertDefinedLayout(field.type);
const value = @field(self, field.name);
const bytes: []align(8) const u8 = switch (@TypeOf(value)) {
*const Header => mem.asBytes(value),
else => mem.sliceAsBytes(value),
};
result.appendAssumeCapacity(os.iovec_const{
.iov_base = bytes.ptr,
.iov_len = bytes.len,
});
const padding = mem.alignForward(bytes.len, section_length) - bytes.len;
if (padding != 0)
result.appendAssumeCapacity(.{
.iov_base = zeroes,
.iov_len = padding,
});
}
return result;
}
pub const DBNumbers = struct {
header: u64,
bdz_gid: u64,
bdz_groupname: u64,
bdz_uid: u64,
bdz_username: u64,
idx_gid2group: u64,
idx_groupname2group: u64,
idx_uid2user: u64,
idx_name2user: u64,
shell_index: u64,
shell_blob: u64,
groups: u64,
users: u64,
groupmembers: u64,
additional_gids: u64,
};
// in blocks
pub fn fieldLengths(header: *const Header) DBNumbers {
return DBNumbers{
.header = comptime DB.nblocks_n(u64, @sizeOf(Header)),
.bdz_gid = header.nblocks_bdz_gid,
.bdz_groupname = header.nblocks_bdz_groupname,
.bdz_uid = header.nblocks_bdz_uid,
.bdz_username = header.nblocks_bdz_username,
.idx_gid2group = nblocks_n(u32, header.num_groups * 4),
.idx_groupname2group = nblocks_n(u32, header.num_groups * 4),
.idx_uid2user = nblocks_n(u32, header.num_users * 4),
.idx_name2user = nblocks_n(u32, header.num_users * 4),
.shell_index = nblocks_n(u16, header.num_shells * 2),
.shell_blob = header.nblocks_shell_blob,
.groups = header.nblocks_groups,
.users = header.nblocks_users,
.groupmembers = header.nblocks_groupmembers,
.additional_gids = header.nblocks_additional_gids,
};
}
// in blocks
pub fn fieldOffsets(lengths: DBNumbers) DBNumbers {
var result: DBNumbers = undefined;
result.header = 0;
var offset = comptime nblocks_n(u64, @sizeOf(Header));
// skipping header (so starting with index 1)
inline for (DB_fields[1..], meta.fields(DBNumbers)[1..]) |db_field, dbn_field| {
assert(mem.eql(u8, db_field.name, dbn_field.name));
@field(result, db_field.name) = offset;
offset += @field(lengths, db_field.name);
}
return result;
}
pub fn fromBytes(buf: []align(8) const u8) InvalidHeader!DB {
const header = try Header.fromBytes(buf[0..@sizeOf(Header)]);
const lengths = fieldLengths(header);
const offsets = fieldOffsets(lengths);
var result: DB = undefined;
result.header = header;
inline for (DB_fields[1..]) |field| {
const start_block = @field(offsets, field.name);
const end = (start_block + @field(lengths, field.name)) << section_length_bits;
const start = start_block << section_length_bits;
const slice_type = meta.Child(field.type);
const value = mem.bytesAsSlice(slice_type, @alignCast(8, buf[start..end]));
@field(result, field.name) = value;
}
return result;
}
pub fn packCGroupNoMembers(group: *const PackedGroup, buf: []u8) error{BufferTooSmall}!CGroup {
// First word in buf will be a pointer to null.
const name_start = @sizeOf(?[*:0]const u8);
if (name_start > buf.len) return error.BufferTooSmall;
var member_ptrs = mem.bytesAsSlice(?[*:0]const u8, buf[0..name_start]);
member_ptrs[0] = null;
// write name
const name = group.name();
if (name_start + name.len + 1 > buf.len) return error.BufferTooSmall;
mem.copy(u8, buf[name_start..], name);
buf[name_start + name.len] = 0;
return CGroup{
.gr_name = buf[name_start .. name_start + name.len :0].ptr,
.gr_gid = group.gid(),
// TODO: how can we use bytesAsSlice in a way that does not need
// this ugly ptrCast?
.gr_mem = @ptrCast([*:null]align(1) const ?[*:0]const u8, member_ptrs.ptr),
};
}
// The pointer to `arr` gets changed on second iteration, and I don't know why.
// Leaving this for better times.
const GroupMembersIter = struct {
vit: compress.VarintSliceIterator,
it: compress.DeltaDecompressionIterator,
total: usize,
arr: []const u8,
pub fn nextMust(self: *GroupMembersIter) ?u64 {
return self.it.nextMust();
}
};
pub fn groupMembersIter(members_slice: []const u8) GroupMembersIter {
var vit = compress.varintSliceIteratorMust(members_slice);
var it = compress.deltaDecompressionIterator(&vit);
return GroupMembersIter{
.arr = members_slice,
.vit = vit,
.it = it,
.total = vit.remaining,
};
}
// dumps PackedGroup to []u8 and returns a CGroup.
pub fn packCGroup(self: *const DB, group: *const PackedGroup, buf: []u8) error{BufferTooSmall}!CGroup {
const members_slice = self.groupmembers[group.members_offset..];
var it = groupMembersIter(members_slice);
const num_members = it.total;
const ptr_end = @sizeOf(?[*:0]const u8) * (num_members + 1);
if (ptr_end > buf.len)
return error.BufferTooSmall;
var member_ptrs = mem.bytesAsSlice(?[*:0]const u8, buf[0..ptr_end]);
member_ptrs[member_ptrs.len - 1] = null;
var buf_offset: usize = ptr_end;
var i: usize = 0;
while (it.nextMust()) |member_offset| : (i += 1) {
const entry = PackedUser.fromBytes(@alignCast(8, self.users[member_offset << 3 ..]));
const start = buf_offset;
const name = entry.user.name();
if (buf_offset + name.len + 1 > buf.len)
return error.BufferTooSmall;
mem.copy(u8, buf[buf_offset..], name);
buf_offset += name.len;
buf[buf_offset] = 0;
buf_offset += 1;
member_ptrs[i] = buf[start .. buf_offset - 1 :0];
}
const name = group.name();
if (buf_offset + name.len + 1 > buf.len) return error.BufferTooSmall;
mem.copy(u8, buf[buf_offset..], name);
buf[buf_offset + name.len] = 0;
return CGroup{
.gr_name = buf[buf_offset .. buf_offset + name.len :0].ptr,
.gr_gid = group.gid(),
// TODO: how can we use bytesAsSlice in a way that does not need
// this ugly ptrCast?
.gr_mem = @ptrCast([*:null]align(1) const ?[*:0]const u8, member_ptrs.ptr),
};
}
pub fn getGroupByName(self: *const DB, name: []const u8) ?PackedGroup {
const idx = bdz.search(self.bdz_groupname, name);
if (idx >= self.header.num_groups) return null;
const offset = self.idx_groupname2group[idx];
const group = PackedGroup.fromBytes(@alignCast(8, self.groups[offset << 3 ..])).group;
if (!mem.eql(u8, name, group.name())) return null;
return group;
}
pub fn getGroupByGid(self: *const DB, gid: u32) ?PackedGroup {
const idx = bdz.search_u32(self.bdz_gid, gid);
if (idx >= self.header.num_groups) return null;
const offset = self.idx_gid2group[idx];
const group = PackedGroup.fromBytes(@alignCast(8, self.groups[offset << 3 ..])).group;
if (gid != group.gid()) return null;
return group;
}
// get a CGroup entry by name.
pub fn getgrnam(
self: *const DB,
name: []const u8,
buf: []u8,
omit_members: bool,
) error{BufferTooSmall}!?CGroup {
const group = self.getGroupByName(name) orelse return null;
if (omit_members)
return try packCGroupNoMembers(&group, buf)
else
return try self.packCGroup(&group, buf);
}
// get a CGroup entry by it's gid.
pub fn getgrgid(
self: *const DB,
gid: u32,
buf: []u8,
omit_members: bool,
) error{BufferTooSmall}!?CGroup {
const group = self.getGroupByGid(gid) orelse return null;
if (omit_members)
return try packCGroupNoMembers(&group, buf)
else
return try self.packCGroup(&group, buf);
}
fn pushStr(str: []const u8, buf: []u8, offset: *usize) [*:0]const u8 {
const start = offset.*;
mem.copy(u8, buf[start..], str);
buf[start + str.len] = 0;
offset.* += str.len + 1;
return buf[start .. start + str.len :0].ptr;
}
pub fn shellReader(self: *const DB) ShellReader {
return ShellReader{
.index = self.shell_index,
.blob = self.shell_blob,
};
}
pub fn writeUser(self: *const DB, user: PackedUser, buf: []u8) error{BufferTooSmall}!CUser {
const name = user.name();
const gecos = user.gecos();
const home = user.home();
const shell = user.shell(self.shellReader());
const strlen =
name.len + 1 +
gecos.len + 1 +
home.len + 1 +
shell.len + 1;
if (strlen > buf.len) return error.BufferTooSmall;
var offset: usize = 0;
const pw_name = pushStr(name, buf, &offset);
const pw_gecos = pushStr(gecos, buf, &offset);
const pw_dir = pushStr(home, buf, &offset);
const pw_shell = pushStr(shell, buf, &offset);
return CUser{
.pw_name = pw_name,
.pw_uid = user.uid(),
.pw_gid = user.gid(),
.pw_gecos = pw_gecos,
.pw_dir = pw_dir,
.pw_shell = pw_shell,
};
}
pub fn getUserByName(self: *const DB, name: []const u8) ?PackedUser {
const idx = bdz.search(self.bdz_username, name);
// bdz may return a hash that's bigger than the number of users
if (idx >= self.header.num_users) return null;
const offset = self.idx_name2user[idx];
const user = PackedUser.fromBytes(@alignCast(8, self.users[offset << 3 ..])).user;
if (!mem.eql(u8, name, user.name())) return null;
return user;
}
// get a CUser entry by name.
pub fn getpwnam(self: *const DB, name: []const u8, buf: []u8) error{BufferTooSmall}!?CUser {
const user = self.getUserByName(name) orelse return null;
return try self.writeUser(user, buf);
}
pub fn getUserByUid(self: *const DB, uid: u32) ?PackedUser {
const idx = bdz.search_u32(self.bdz_uid, uid);
if (idx >= self.header.num_users) return null;
const offset = self.idx_uid2user[idx];
const user = PackedUser.fromBytes(@alignCast(8, self.users[offset << 3 ..])).user;
if (uid != user.uid()) return null;
return user;
}
// get a CUser entry by uid.
pub fn getpwuid(self: *const DB, uid: u32, buf: []u8) error{BufferTooSmall}!?CUser {
const user = self.getUserByUid(uid) orelse return null;
return try self.writeUser(user, buf);
}
fn shellSections(
allocator: Allocator,
corpus: *const Corpus,
) error{OutOfMemory}!ShellSections {
var popcon = ShellWriter.init(allocator);
errdefer popcon.deinit();
for (corpus.users.items(.shell)) |shell|
try popcon.put(shell);
return popcon.toOwnedSections(max_shells);
}
const AdditionalGids = struct {
// user index -> offset in blob
idx2offset: []align(8) const u64,
// compressed user gids blob. A blob contains N <= users.len items,
// an item is:
// len: varint
// gid: [varint]varint,
// ... and the gid list is delta-compressed.
blob: []align(8) const u8,
};
fn additionalGids(
allocator: Allocator,
corpus: *const Corpus,
) error{OutOfMemory}!AdditionalGids {
var blob = ArrayListAligned(u8, 8).init(allocator);
errdefer blob.deinit();
var idx2offset = try allocator.alloc(u64, corpus.users.len);
errdefer allocator.free(idx2offset);
// zero'th entry is empty, so groupless users can refer to it.
try compress.appendUvarint(&blob, 0);
var scratch = try allocator.alignedAlloc(u32, 8, 256);
var scratch_allocated: bool = true;
defer if (scratch_allocated) allocator.free(scratch);
for (corpus.user2groups, idx2offset) |usergroups, *u_idx2offset| {
if (usergroups.len == 0) {
u_idx2offset.* = 0;
continue;
}
u_idx2offset.* = blob.items.len;
if (scratch.len < usergroups.len) {
allocator.free(scratch);
scratch_allocated = false;
scratch = try allocator.alignedAlloc(u32, 8, usergroups.len);
scratch_allocated = true;
}
scratch.len = usergroups.len;
const corpusGids = corpus.groups.items(.gid);
for (usergroups, 0..) |group_idx, i|
scratch[i] = corpusGids[group_idx];
compress.deltaCompress(u32, scratch) catch |err| switch (err) {
error.NotSorted => unreachable,
};
try compress.appendUvarint(&blob, usergroups.len);
for (scratch) |gid|
try compress.appendUvarint(&blob, gid);
}
return AdditionalGids{
.idx2offset = idx2offset,
.blob = try blob.toOwnedSlice(),
};
}
const UsersSection = struct {
// number of users in this section
len: u32,
// user index -> offset in blob
idx2offset: []align(8) const u32,
blob: []align(8) const u8,
};
fn usersSection(
allocator: Allocator,
corpus: *const Corpus,
gids: *const AdditionalGids,
shells: *const ShellSections,
) error{ OutOfMemory, InvalidRecord, TooMany }!UsersSection {
var idx2offset = try allocator.alignedAlloc(u32, 8, corpus.users.len);
errdefer allocator.free(idx2offset);
// as of writing each user takes 12 bytes + blobs + padding, padded to
// 8 bytes. 24 is an optimistic lower bound for an average record size.
var blob = try ArrayListAligned(u8, 8).initCapacity(allocator, 24 * corpus.users.len);
errdefer blob.deinit();
const users = corpus.users.slice();
for (0..users.len) |i| {
// TODO: this may be inefficient by calling `.slice()` on every iteration?
const user = users.get(i);
const user_offset = math.cast(u35, blob.items.len) orelse return error.TooMany;
assert(user_offset & 7 == 0);
idx2offset[i] = @truncate(u32, user_offset >> 3);
try PackedUser.packTo(
&blob,
user,
gids.idx2offset[i],
shells.shell2idx,
);
try blob.appendNTimes(0, mem.alignForward(blob.items.len, 8) - blob.items.len);
}
return UsersSection{
.len = @intCast(u32, users.len),
.idx2offset = idx2offset,
.blob = try blob.toOwnedSlice(),
};
}
const GroupMembers = struct {
// group index to it's offset in blob
idx2offset: []const u64,
// members are delta-varint encoded byte-offsets to the user struct
blob: []align(8) const u8,
};
fn groupMembers(
allocator: Allocator,
corpus: *const Corpus,
user2offset: []const u32,
) error{OutOfMemory}!GroupMembers {
var idx2offset = try allocator.alloc(u64, corpus.groups.len);
errdefer allocator.free(idx2offset);
var blob = ArrayListAligned(u8, 8).init(allocator);
errdefer blob.deinit();
// zero'th entry is empty, so empty groups can refer to it
try compress.appendUvarint(&blob, 0);
var scratch = try ArrayListAligned(u32, 8).initCapacity(allocator, 1024);
defer scratch.deinit();
for (corpus.group2users, idx2offset) |members, *idx2offset_g| {
if (members.len == 0) {
idx2offset_g.* = 0;
continue;
}
idx2offset_g.* = blob.items.len;
try scratch.ensureTotalCapacity(members.len);
scratch.items.len = members.len;
for (members, scratch.items) |user_idx, *scratch_i|
scratch_i.* = user2offset[user_idx];
compress.deltaCompress(u32, scratch.items) catch |err| switch (err) {
error.NotSorted => unreachable,
};
try compress.appendUvarint(&blob, members.len);
for (scratch.items) |elem|
try compress.appendUvarint(&blob, elem);
}
return GroupMembers{
.idx2offset = idx2offset,
.blob = try blob.toOwnedSlice(),
};
}
const GroupsSection = struct {
// number of groups in this section
len: u32,
// group index -> offset in blob
idx2offset: []align(8) const u32,
blob: []align(8) const u8,
};
fn groupsSection(
allocator: Allocator,
corpus: *const Corpus,
members_offset: []const u64,
) error{ OutOfMemory, InvalidRecord }!GroupsSection {
var idx2offset = try allocator.alignedAlloc(u32, 8, corpus.groups.len);
errdefer allocator.free(idx2offset);
var blob = try ArrayListAligned(u8, 8).initCapacity(allocator, 8 * corpus.groups.len);
errdefer blob.deinit();
for (
corpus.groups.items(.gid),
corpus.groups.items(.name),
idx2offset,
members_offset,
) |gid, name, *idx2offset_i, members_offset_i| {
// TODO: this may be inefficient; it's calling `.slice()` on every iteration
const group_offset = @intCast(u32, blob.items.len);
assert(group_offset & 7 == 0);
idx2offset_i.* = @truncate(u32, group_offset >> 3);
const group_stored = GroupStored{
.gid = gid,
.name = name,
.members_offset = members_offset_i,
};
try PackedGroup.packTo(&blob, group_stored);
try blob.appendNTimes(0, mem.alignForward(blob.items.len, 8) - blob.items.len);
}
return GroupsSection{
.len = @intCast(u32, corpus.groups.len),
.idx2offset = idx2offset,
.blob = try blob.toOwnedSlice(),
};
}
pub fn userGids(self: *const DB, offset: u64) compress.DeltaDecompressionIterator {
var vit = compress.varintSliceIteratorMust(self.additional_gids[offset..]);
return compress.deltaDecompressionIterator(&vit);
}
// creates a bdz index using packed_mphf.
// hash = bdz_search(packed_mphf, keys[i]);
// result[hash] = idx2offset[i];
fn bdzIdx(
comptime T: type,
allocator: Allocator,
packed_mphf: []const u8,
keys: []const T,
idx2offset: []const u32,
) error{OutOfMemory}![]align(8) const u32 {
const search_fn = comptime switch (T) {
u32 => bdz.search_u32,
[]const u8 => bdz.search,
else => @compileError("unexpected type " ++ @typeName(T)),
};
assert(keys.len <= math.maxInt(u32));
var result = try allocator.alignedAlloc(u32, 8, keys.len);
errdefer allocator.free(result);
for (keys, idx2offset) |key, idx2offset_i|
result[search_fn(packed_mphf, key)] = idx2offset_i;
return result;
}
// nblocks_n returns how many blocks a given number of bytes will take
pub fn nblocks_n(comptime T: type, nbytes: usize) T {
const B = switch (T) {
u8 => u14,
u16 => u22,
u32 => u38,
u64 => u70,
else => @compileError("unsupported type " ++ @typeName(T)),
};
const upper = @intCast(B, mem.alignForward(nbytes, section_length));
assert(upper & (section_length - 1) == 0);
return @truncate(T, upper >> section_length_bits);
}
// nblocks returns how many blocks a particular slice will take.
fn nblocks(comptime T: type, arr: []const u8) T {
return nblocks_n(T, arr.len);
}
fn assertDefinedLayout(comptime T: type) void {
return switch (T) {
u4, u8, u16, u32, u64 => {},
else => switch (@typeInfo(T)) {
.Array => assertDefinedLayout(meta.Elem(T)),
.Pointer => |info| assertDefinedLayout(info.child),
.Enum => assertDefinedLayout(meta.Tag(T)),
.Struct => {
if (meta.containerLayout(T) == .Auto)
@compileError("layout of " ++ @typeName(T) ++ " is undefined");
for (meta.fields(T)) |field|
assertDefinedLayout(field.type);
},
else => @compileError("unexpected type " ++ @typeName(T)),
},
};
}
const testing = std.testing;
test "DB read/write via iovec" {
const allocator = testing.allocator;
var corpus = try Corpus.testCorpus(allocator);
defer corpus.deinit();
var errc = ErrCtx{};
var db = try DB.fromCorpus(allocator, &corpus, &errc);
defer db.deinit(allocator);
const fd = try os.memfd_create("test_turbonss_db", 0);
defer os.close(fd);
const len = try os.writev(fd, db.iov().constSlice());
const buf = try os.mmap(null, len, os.PROT.READ, os.MAP.SHARED, fd, 0);
const db2 = try fromBytes(buf);
try testing.expectEqual(corpus.groups.len, db.header.num_groups);
try testing.expectEqual(corpus.users.len, db.header.num_users);
try testing.expectEqual(db.header.num_groups, db2.header.num_groups);
try testing.expectEqual(db.header.num_users, db2.header.num_users);
const num_groups = db2.header.num_groups;
const num_users = db2.header.num_users;
try testing.expectEqualSlices(u32, db.idx_gid2group, db2.idx_gid2group[0..num_groups]);
try testing.expectEqualSlices(u32, db.idx_uid2user, db2.idx_uid2user[0..num_users]);
try testing.expectEqualStrings("", errc.unwrap().constSlice());
}
test "DB getgrnam/getgrgid" {
var corpus = try Corpus.testCorpus(testing.allocator);
defer corpus.deinit();
var errc = ErrCtx{};
var db = try DB.fromCorpus(testing.allocator, &corpus, &errc);
defer db.deinit(testing.allocator);
var buf = try testing.allocator.alignedAlloc(u8, 8, db.getgrBufsize());
defer testing.allocator.free(buf);
{
try testing.expectEqual(try db.getgrnam("doesnotexist", buf, false), null);
const all = (try db.getgrnam("all", buf, false)).?;
try testing.expectEqual(all.gr_gid, 9999);
try testing.expectEqualStrings(all.gr_name[0..4], "all\x00");
const members = all.gr_mem;
try testing.expectEqualStrings(mem.sliceTo(members[0].?, 0), "Name" ** 8);
try testing.expectEqualStrings(mem.sliceTo(members[1].?, 0), "root");
try testing.expectEqualStrings(mem.sliceTo(members[2].?, 0), "svc-bar");
try testing.expectEqualStrings(mem.sliceTo(members[3].?, 0), "vidmantas");
try testing.expectEqual(members[4], null);
}
{
const all = (try db.getgrnam("all", buf, true)).?;
try testing.expectEqual(all.gr_gid, 9999);
try testing.expectEqualStrings(all.gr_name[0..4], "all\x00");
try testing.expectEqual(all.gr_mem[0], null);
}
{
try testing.expectEqual(try db.getgrgid(42, buf, false), null);
const all = (try db.getgrgid(9999, buf, false)).?;
try testing.expectEqual(all.gr_gid, 9999);
try testing.expectEqualStrings(all.gr_name[0..3], "all");
}
try testing.expectEqualStrings("", errc.unwrap().constSlice());
_ = try db.getgrnam("all", buf, false);
buf.len -= 1;
try testing.expectError(error.BufferTooSmall, db.getgrnam("all", buf, false));
}
test "DB getpwnam/getpwuid" {
var corpus = try Corpus.testCorpus(testing.allocator);
defer corpus.deinit();
var errc = ErrCtx{};
var db = try DB.fromCorpus(testing.allocator, &corpus, &errc);
defer db.deinit(testing.allocator);
var buf = try testing.allocator.alignedAlloc(u8, 8, db.getpwBufsize());
defer testing.allocator.free(buf);
{
try testing.expectEqual(try db.getpwnam("doesnotexist", buf), null);
const vidmantas = (try db.getpwnam("vidmantas", buf)).?;
try testing.expectEqual(vidmantas.pw_uid, 128);
try testing.expectEqual(vidmantas.pw_gid, 128);
try testing.expectEqualStrings(vidmantas.pw_name[0..10], "vidmantas\x00");
try testing.expectEqualStrings(vidmantas.pw_gecos[0..20], "Vidmantas Kaminskas\x00");
try testing.expectEqualStrings(vidmantas.pw_dir[0..16], "/home/vidmantas\x00");
}
{
try testing.expectEqual(try db.getpwuid(123456, buf), null);
const vidmantas = (try db.getpwuid(128, buf)).?;
try testing.expectEqual(vidmantas.pw_uid, 128);
try testing.expectEqual(vidmantas.pw_gid, 128);
try testing.expectEqualStrings(vidmantas.pw_name[0..10], "vidmantas\x00");
}
try testing.expectEqualStrings("", errc.unwrap().constSlice());
const long = try db.getpwnam("Name" ** 8, buf);
try testing.expectEqualStrings(long.?.pw_name[0..33], "Name" ** 8 ++ "\x00");
buf.len -= 1;
try testing.expectError(error.BufferTooSmall, db.getpwnam("Name" ** 8, buf));
}
test "DB additionalGids" {
const allocator = testing.allocator;
var corpus = try Corpus.testCorpus(allocator);
defer corpus.deinit();
var additional_gids = try additionalGids(allocator, &corpus);
defer allocator.free(additional_gids.idx2offset);
defer allocator.free(additional_gids.blob);
for (0..corpus.users.len) |user_idx| {
const groups = corpus.user2groups[user_idx];
const offset = additional_gids.idx2offset[user_idx];
if (groups.len == 0) {
try testing.expect(offset == 0);
continue;
}
var vit = try compress.varintSliceIterator(additional_gids.blob[offset..]);
var it = compress.deltaDecompressionIterator(&vit);
try testing.expectEqual(it.remaining(), groups.len);
var i: u64 = 0;
const corpusGids = corpus.groups.items(.gid);
while (try it.next()) |gid| : (i += 1) {
try testing.expectEqual(gid, corpusGids[groups[i]]);
}
try testing.expectEqual(i, groups.len);
}
}
test "DB pack gids" {
const allocator = testing.allocator;
var corpus = try Corpus.testCorpus(allocator);
defer corpus.deinit();
const cmph_gid = try cmph.packU32(allocator, corpus.groups.items(.gid));
defer allocator.free(cmph_gid);
const k1 = bdz.search_u32(cmph_gid, 0);
const k2 = bdz.search_u32(cmph_gid, 128);
const k3 = bdz.search_u32(cmph_gid, 9999);
const k4 = bdz.search_u32(cmph_gid, 100000);
var hashes = [_]u32{ k1, k2, k3, k4 };
sort.heap(u32, &hashes, {}, comptime sort.asc(u32));
for (hashes, 0..) |hash, i|
try testing.expectEqual(i, hash);
}
const hash_offsets = &[_]u32{ 0, 10, 20, 30 };
fn expectUsedHashes(allocator: Allocator, arr: []const u32) !void {
var used = AutoHashMap(u32, void).init(allocator);
defer used.deinit();
for (arr) |elem|
try used.putNoClobber(elem, {});
for (hash_offsets) |item|
try testing.expect(used.get(item) != null);
}
test "DB bdzIdx on u32" {
const keys = [_]u32{ 42, 1, 2, 3 };
const mphf = try cmph.packU32(testing.allocator, keys[0..]);
defer testing.allocator.free(mphf);
var result = try bdzIdx(u32, testing.allocator, mphf, keys[0..], hash_offsets);
defer testing.allocator.free(result);
try expectUsedHashes(testing.allocator, result);
}
test "DB bdzIdx on str" {
const keys = [_][]const u8{ "42", "1", "2", "3" };
const mphf = try cmph.packStr(testing.allocator, keys[0..]);
defer testing.allocator.free(mphf);
var result = try bdzIdx([]const u8, testing.allocator, mphf, keys[0..], hash_offsets);
defer testing.allocator.free(result);
try expectUsedHashes(testing.allocator, result);
}
test "DB nblocks" {
const tests = .{
.{ 0, &[_]u8{} },
.{ 1, &[_]u8{ 1, 2, 42 } },
.{ 1, &[_]u8{1} ** 63 },
.{ 1, &[_]u8{1} ** 64 },
.{ 2, &[_]u8{1} ** 65 },
.{ 255, &[_]u8{1} ** (255 * 64) },
};
inline for (tests) |tt| {
try testing.expectEqual(nblocks(u8, tt[1]), tt[0]);
try testing.expectEqual(nblocks(u32, tt[1]), tt[0]);
try testing.expectEqual(nblocks(u64, tt[1]), tt[0]);
}
}