From 347f0a13927a00b8b2da79786582e13145bfe111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Wed, 23 Mar 2022 22:14:48 +0200 Subject: [PATCH] Write the file Also move DB and Corpus to their own files while doing that. --- README.md | 2 +- lib/Corpus.zig | 284 ++++++++++++++++ lib/DB.zig | 604 ++++++++++++++++++++++++++++++++++ lib/header.zig | 22 +- lib/sections.zig | 840 ----------------------------------------------- lib/shell.zig | 2 - lib/so.zig | 16 +- lib/test_all.zig | 3 +- 8 files changed, 908 insertions(+), 865 deletions(-) create mode 100644 lib/Corpus.zig create mode 100644 lib/DB.zig delete mode 100644 lib/sections.zig diff --git a/README.md b/README.md index 49bdcb9..a6d7081 100644 --- a/README.md +++ b/README.md @@ -141,7 +141,7 @@ The turbonss header looks like this: OFFSET TYPE NAME DESCRIPTION 0 [4]u8 magic f0 9f a4 b7 4 u8 version 0 - 5 u8 bigendian 0 for little-endian, 1 for big-endian + 5 u8 endian 0 for little, 1 for big 6 u8 nblocks_shell_blob max value: 63 7 u8 num_shells max value: 63 8 u32 num_groups number of group entries diff --git a/lib/Corpus.zig b/lib/Corpus.zig new file mode 100644 index 0000000..d071069 --- /dev/null +++ b/lib/Corpus.zig @@ -0,0 +1,284 @@ +const std = @import("std"); +const mem = std.mem; +const math = std.math; +const sort = std.sort; +const unicode = std.unicode; +const Allocator = std.mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; +const StringHashMap = std.StringHashMap; +const MultiArrayList = std.MultiArrayList; +const ArrayListUnmanaged = std.ArrayListUnmanaged; + +const User = @import("user.zig").User; +const Group = @import("group.zig").Group; + +pub const Corpus = @This(); + +arena: ArenaAllocator, + +// sorted by name, by unicode codepoint +users: MultiArrayList(User), +// sorted by gid +groups: MultiArrayList(Group), + +name2user: StringHashMap(u32), +name2group: StringHashMap(u32), +group2users: []const []const u32, +user2groups: []const []const u32, + +pub fn init( + baseAllocator: Allocator, + usersConst: []const User, + groupsConst: []const Group, +) error{ OutOfMemory, InvalidUtf8, Duplicate, NotFound, TooMany }!Corpus { + if (usersConst.len >= math.maxInt(u32)) return error.TooMany; + if (groupsConst.len >= math.maxInt(u32)) return error.TooMany; + + var arena = ArenaAllocator.init(baseAllocator); + var allocator = arena.allocator(); + errdefer arena.deinit(); + + var users_arr = try allocator.alloc(User, usersConst.len); + var groups_arr = try allocator.alloc(Group, groupsConst.len); + for (usersConst) |*user, i| + users_arr[i] = try user.clone(allocator); + for (groupsConst) |*group, i| + groups_arr[i] = try group.clone(allocator); + + sort.sort(User, users_arr, {}, cmpUser); + sort.sort(Group, groups_arr, {}, cmpGroup); + + var users = MultiArrayList(User){}; + try users.ensureTotalCapacity(allocator, users_arr.len); + for (users_arr) |user| + users.appendAssumeCapacity(user); + var groups = MultiArrayList(Group){}; + try groups.ensureTotalCapacity(allocator, groups_arr.len); + for (groups_arr) |group| + groups.appendAssumeCapacity(group); + + var name2user = StringHashMap(u32).init(allocator); + var name2group = StringHashMap(u32).init(allocator); + for (users.items(.name)) |name, i| { + var res1 = try name2user.getOrPut(name); + if (res1.found_existing) + return error.Duplicate; + res1.value_ptr.* = @intCast(u32, i); + } + + for (groups.items(.name)) |name, i| { + var res1 = try name2group.getOrPut(name); + if (res1.found_existing) + return error.Duplicate; + res1.value_ptr.* = @intCast(u32, i); + } + + var group2users = try allocator.alloc([]u32, groups.len); + + // uses baseAllocator, because it will be freed before + // returning from this function. This keeps the arena clean. + var user2groups = try baseAllocator.alloc(ArrayListUnmanaged(u32), users.len); + defer baseAllocator.free(user2groups); + mem.set(ArrayListUnmanaged(u32), user2groups, ArrayListUnmanaged(u32){}); + + for (groups.items(.members)) |groupmembers, i| { + var members = try allocator.alloc(u32, groupmembers.count()); + members.len = 0; + + var it = groupmembers.iterator(); + while (it.next()) |member_name| { + if (name2user.get(member_name.*)) |user_idx| { + members.len += 1; + members[members.len - 1] = user_idx; + try user2groups[user_idx].append(allocator, @intCast(u32, i)); + } else return error.NotFound; + } + + group2users[i] = members; + } + + for (group2users) |*groupusers| + sort.sort(u32, groupusers.*, {}, comptime sort.asc(u32)); + + var user2groups_final = try allocator.alloc([]const u32, users.len); + user2groups_final.len = users.len; + for (user2groups) |*usergroups, i| { + sort.sort(u32, usergroups.items, {}, comptime sort.asc(u32)); + user2groups_final[i] = usergroups.toOwnedSlice(allocator); + } + + return Corpus{ + .arena = arena, + .users = users, + .groups = groups, + .name2user = name2user, + .name2group = name2group, + .group2users = group2users, + .user2groups = user2groups_final, + }; +} + +pub fn deinit(self: *Corpus) void { + self.arena.deinit(); + self.* = undefined; +} + +// cmpUser compares two users for sorting. By username's utf8 codepoints, ascending. +fn cmpUser(_: void, a: User, b: User) bool { + var utf8_a = (unicode.Utf8View.init(a.name) catch unreachable).iterator(); + var utf8_b = (unicode.Utf8View.init(b.name) catch unreachable).iterator(); + while (utf8_a.nextCodepoint()) |codepoint_a| { + if (utf8_b.nextCodepoint()) |codepoint_b| { + if (codepoint_a == codepoint_b) { + continue; + } else return codepoint_a < codepoint_b; + } + + // a is a prefix of b. It is thus shorter. + return false; + } + // b is a prefix of a + return true; +} + +fn cmpGroup(_: void, a: Group, b: Group) bool { + return a.gid < b.gid; +} + +fn testUser(name: []const u8) User { + var result = mem.zeroes(User); + result.name = name; + return result; +} + +const testing = std.testing; +const someMembers = @import("group.zig").someMembers; + +test "users compare function" { + const a = testUser("a"); + const b = testUser("b"); + const bb = testUser("bb"); + try testing.expect(cmpUser({}, a, b)); + try testing.expect(!cmpUser({}, b, a)); + try testing.expect(cmpUser({}, a, bb)); + try testing.expect(!cmpUser({}, bb, a)); + try testing.expect(cmpUser({}, b, bb)); + try testing.expect(!cmpUser({}, bb, b)); +} + +pub fn testCorpus(allocator: Allocator) !Corpus { + const users = [_]User{ User{ + .uid = 0, + .gid = 0, + .name = "root", + .gecos = "", + .home = "/root", + .shell = "/bin/bash", + }, User{ + .uid = 128, + .gid = 128, + .name = "vidmantas", + .gecos = "Vidmantas Kaminskas", + .home = "/home/vidmantas", + .shell = "/bin/bash", + }, User{ + .uid = 1000, + .gid = math.maxInt(u32), + .name = "Name" ** 8, + .gecos = "Gecos" ** 51, + .home = "Home" ** 16, + .shell = "She.LllL" ** 8, + }, User{ + .uid = 100000, + .gid = 1002, + .name = "svc-bar", + .gecos = "", + .home = "/", + .shell = "/", + }, User{ + .uid = 65534, + .gid = 65534, + .name = "nobody", + .gecos = "nobody", + .home = "/nonexistent", + .shell = "/usr/sbin/nologin", + } }; + + var members0 = try someMembers( + allocator, + &[_][]const u8{"root"}, + ); + defer members0.deinit(); + + var members1 = try someMembers( + allocator, + &[_][]const u8{"vidmantas"}, + ); + defer members1.deinit(); + + var members2 = try someMembers( + allocator, + &[_][]const u8{ "svc-bar", "vidmantas" }, + ); + defer members2.deinit(); + + var members3 = try someMembers( + allocator, + &[_][]const u8{ "svc-bar", "Name" ** 8, "vidmantas", "root" }, + ); + defer members3.deinit(); + + const groups = [_]Group{ + Group{ .gid = 0, .name = "root", .members = members0 }, + Group{ .gid = 128, .name = "vidmantas", .members = members1 }, + Group{ .gid = 9999, .name = "all", .members = members3 }, + Group{ .gid = 100000, .name = "service-account", .members = members2 }, + }; + + return try Corpus.init(allocator, users[0..], groups[0..]); +} + +test "test corpus" { + var corpus = try testCorpus(testing.allocator); + defer corpus.deinit(); + + const name_name = 0; + const nobody = 1; + const root = 2; + const svc_bar = 3; + const vidmantas = 4; + + const usernames = corpus.users.items(.name); + try testing.expectEqualStrings(usernames[name_name], "Name" ** 8); + try testing.expectEqualStrings(usernames[nobody], "nobody"); + try testing.expectEqualStrings(usernames[root], "root"); + try testing.expectEqualStrings(usernames[svc_bar], "svc-bar"); + try testing.expectEqualStrings(usernames[vidmantas], "vidmantas"); + + const g_root = 0; + const g_vidmantas = 1; + const g_all = 2; + const g_service_account = 3; + + const groupnames = corpus.groups.items(.name); + try testing.expectEqualStrings(groupnames[g_root], "root"); + try testing.expectEqualStrings(groupnames[g_service_account], "service-account"); + try testing.expectEqualStrings(groupnames[g_vidmantas], "vidmantas"); + try testing.expectEqualStrings(groupnames[g_all], "all"); + + try testing.expectEqual(corpus.name2user.get("404"), null); + try testing.expectEqual(corpus.name2user.get("vidmantas").?, vidmantas); + try testing.expectEqual(corpus.name2group.get("404"), null); + try testing.expectEqual(corpus.name2group.get("vidmantas").?, g_vidmantas); + + const membersOfAll = corpus.group2users[g_all]; + try testing.expectEqual(membersOfAll[0], name_name); + try testing.expectEqual(membersOfAll[1], root); + try testing.expectEqual(membersOfAll[2], svc_bar); + try testing.expectEqual(membersOfAll[3], vidmantas); + + const groupsOfVidmantas = corpus.user2groups[vidmantas]; + try testing.expectEqual(groupsOfVidmantas[0], g_vidmantas); + try testing.expectEqual(groupsOfVidmantas[1], g_all); + try testing.expectEqual(groupsOfVidmantas[2], g_service_account); +} diff --git a/lib/DB.zig b/lib/DB.zig new file mode 100644 index 0000000..196e483 --- /dev/null +++ b/lib/DB.zig @@ -0,0 +1,604 @@ +const std = @import("std"); +const os = std.os; +const mem = std.mem; +const math = std.math; +const meta = std.meta; +const sort = std.sort; +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const AutoHashMap = std.AutoHashMap; +const BoundedArray = std.BoundedArray; + +const Corpus = @import("Corpus.zig"); +const pad = @import("padding.zig"); +const compress = @import("compress.zig"); +const PackedUser = @import("user.zig").PackedUser; +const User = @import("user.zig").User; +const Group = @import("group.zig").Group; +const GroupStored = @import("group.zig").GroupStored; +const PackedGroup = @import("group.zig").PackedGroup; +const ShellSections = @import("shell.zig").ShellWriter.ShellSections; +const ShellReader = @import("shell.zig").ShellReader; +const ShellWriter = @import("shell.zig").ShellWriter; +const Header = @import("header.zig").Header; +const max_shells = @import("shell.zig").max_shells; +const section_length_bits = @import("header.zig").section_length_bits; +const section_length = @import("header.zig").section_length; +const cmph = @import("cmph.zig"); +const bdz = @import("bdz.zig"); + +const zeroes = &[_]u8{0} ** section_length; + +const DB = @This(); +// All sections, as they end up in the DB. Order is important. +header: Header, +bdz_gid: []const u8, +bdz_groupname: []const u8, +bdz_uid: []const u8, +bdz_username: []const u8, +idx_gid2group: []const u32, +idx_groupname2group: []const u32, +idx_uid2user: []const u32, +idx_name2user: []const u32, +shell_index: []const u16, +shell_blob: []const u8, +groups: []const u8, +users: []const u8, +groupmembers: []const u8, +additional_gids: []const u8, + +pub fn fromCorpus( + allocator: Allocator, + corpus: *const Corpus, +) error{ OutOfMemory, InvalidRecord, TooMany }!DB { + const gids = corpus.groups.items(.gid); + const gnames = corpus.groups.items(.name); + const uids = corpus.users.items(.uid); + const unames = corpus.users.items(.name); + + var bdz_gid = try cmph.packU32(allocator, gids); + errdefer allocator.free(bdz_gid); + + var bdz_groupname = try cmph.packStr(allocator, gnames); + errdefer allocator.free(bdz_groupname); + + var bdz_uid = try cmph.packU32(allocator, uids); + errdefer allocator.free(bdz_uid); + + const bdz_username = try cmph.packStr(allocator, unames); + errdefer allocator.free(bdz_username); + + var shell = try shellSections(allocator, corpus); + defer shell.deinit(); + + var additional_gids = try additionalGids(allocator, corpus); + errdefer allocator.free(additional_gids.blob); + + var users = try usersSection(allocator, corpus, &additional_gids, &shell); + allocator.free(additional_gids.idx2offset); + errdefer allocator.free(users.blob); + + var groupmembers = try groupMembers(allocator, corpus, users.idx2offset); + errdefer allocator.free(groupmembers.blob); + + var groups = try groupsSection(allocator, corpus, groupmembers.idx2offset); + allocator.free(groupmembers.idx2offset); + errdefer allocator.free(groups.blob); + + var idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids, groups.idx2offset); + errdefer allocator.free(idx_gid2group); + + var idx_groupname2group = try bdzIdx([]const u8, allocator, bdz_groupname, gnames, groups.idx2offset); + allocator.free(groups.idx2offset); + errdefer allocator.free(idx_groupname2group); + + var idx_uid2user = try bdzIdx(u32, allocator, bdz_uid, uids, users.idx2offset); + errdefer allocator.free(idx_uid2user); + + var idx_name2user = try bdzIdx([]const u8, allocator, bdz_username, unames, users.idx2offset); + allocator.free(users.idx2offset); + errdefer allocator.free(idx_name2user); + + const header = Header{ + .nblocks_shell_blob = nblocks(u8, shell.blob.constSlice()), + .num_shells = shell.len, + .num_groups = groups.len, + .num_users = users.len, + .nblocks_bdz_gid = nblocks(u32, bdz_gid), + .nblocks_bdz_groupname = nblocks(u32, bdz_groupname), + .nblocks_bdz_uid = nblocks(u32, bdz_uid), + .nblocks_bdz_username = nblocks(u32, bdz_username), + .nblocks_groups = nblocks(u64, groups.blob), + .nblocks_users = nblocks(u64, users.blob), + .nblocks_groupmembers = nblocks(u64, groupmembers.blob), + .nblocks_additional_gids = nblocks(u64, additional_gids.blob), + }; + + return DB{ + .header = header, + .bdz_gid = bdz_gid, + .bdz_groupname = bdz_groupname, + .bdz_uid = bdz_uid, + .bdz_username = bdz_username, + .idx_gid2group = idx_gid2group, + .idx_groupname2group = idx_groupname2group, + .idx_uid2user = idx_uid2user, + .idx_name2user = idx_name2user, + .shell_index = shell.index.constSlice(), + .shell_blob = shell.blob.constSlice(), + .groups = groups.blob, + .users = users.blob, + .groupmembers = groupmembers.blob, + .additional_gids = additional_gids.blob, + }; +} + +const DB_fields = meta.fields(DB); +pub fn iov(self: *const DB) BoundedArray(os.iovec_const, DB_fields.len * 2) { + var result = BoundedArray(os.iovec_const, DB_fields.len * 2).init(0) catch unreachable; + inline for (DB_fields) |field| { + comptime assertDefinedLayout(field.field_type); + const value = @field(self, field.name); + const bytes: []const u8 = switch (@TypeOf(value)) { + Header => mem.asBytes(&value), + else => mem.sliceAsBytes(value), + }; + result.appendAssumeCapacity(os.iovec_const{ + .iov_base = bytes.ptr, + .iov_len = bytes.len, + }); + const padding = pad.until(usize, section_length_bits, bytes.len); + if (padding != 0) + result.appendAssumeCapacity(.{ + .iov_base = zeroes, + .iov_len = padding, + }); + } + + return result; +} + +pub fn fromBytes(buf: []const u8) Header.Invalid!DB { + const header = try Header.fromBytes(buf); + const lengths = .{ + .{ "bdz_gid", header.nblocks_bdz_gid }, + .{ "bdz_groupname", header.nblocks_bdz_groupname }, + .{ "bdz_uid", header.nblocks_bdz_uid }, + .{ "bdz_username", header.nblocks_bdz_username }, + .{ "idx_gid2group", nblocks_n(header.num_groups * 4) }, + .{ "idx_groupname2group", nblocks_n(header.num_groups * 4) }, + .{ "idx_uid2user", nblocks_n(header.num_users * 4) }, + .{ "idx_name2user", nblocks_n(header.num_users * 4) }, + .{ "shell_index", nblocks_n(header.num_shells * 2) }, + .{ "shell_blob", header.nblocks_shell_blob }, + .{ "groups", header.nblocks_groups }, + .{ "users", header.nblocks_users }, + .{ "groupmembers", header.nblocks_groupmembers }, + .{ "additional_gids", header.nblocks_additional_gids }, + }; + + var result: DB = undefined; + result.header = header; + var offset = comptime nblocks_n(usize, @sizeOf(Header)); + comptime assert(DB_fields[0].name == "header"); + inline for (DB_fields[1..]) |field, i| { + assert(lengths[i][0] == field.name); + + const start = offset << 6; + const end = (offset + lengths[i][1]) << 6; + const value = mem.bytesAsValue(field.field_type, buf[start..end]); + @field(result, field.name) = value; + offset += lengths[i][1]; + } + + return result; +} + +pub fn deinit(self: *DB, allocator: Allocator) void { + allocator.free(self.bdz_gid); + allocator.free(self.bdz_groupname); + allocator.free(self.bdz_uid); + allocator.free(self.bdz_username); + allocator.free(self.idx_gid2group); + allocator.free(self.idx_groupname2group); + allocator.free(self.idx_uid2user); + allocator.free(self.idx_name2user); + allocator.free(self.groups); + allocator.free(self.users); + allocator.free(self.groupmembers); + allocator.free(self.additional_gids); + self.* = undefined; +} + +fn shellSections( + allocator: Allocator, + corpus: *const Corpus, +) error{OutOfMemory}!ShellSections { + var popcon = ShellWriter.init(allocator); + for (corpus.users.items(.shell)) |shell| + try popcon.put(shell); + return popcon.toOwnedSections(max_shells); +} + +const AdditionalGids = struct { + // user index -> offset in blob + idx2offset: []const u64, + // compressed user gids blob. A blob contains N <= users.len items, + // an item is: + // len: varint + // gid: [varint]varint, + // ... and the gid list is delta-compressed. + blob: []const u8, +}; + +fn additionalGids( + allocator: Allocator, + corpus: *const Corpus, +) error{OutOfMemory}!AdditionalGids { + var blob = ArrayList(u8).init(allocator); + errdefer blob.deinit(); + var idx2offset = try allocator.alloc(u64, corpus.users.len); + errdefer allocator.free(idx2offset); + + // zero'th entry is empty, so groupless users can refer to it. + try compress.appendUvarint(&blob, 0); + + var scratch = try allocator.alloc(u32, 256); + defer allocator.free(scratch); + for (corpus.user2groups) |usergroups, user_idx| { + if (usergroups.len == 0) { + idx2offset[user_idx] = 0; + continue; + } + idx2offset[user_idx] = blob.items.len; + scratch = try allocator.realloc(scratch, usergroups.len); + scratch.len = usergroups.len; + const corpusGids = corpus.groups.items(.gid); + for (usergroups) |group_idx, i| + scratch[i] = corpusGids[group_idx]; + compress.deltaCompress(u32, scratch) catch |err| switch (err) { + error.NotSorted => unreachable, + }; + try compress.appendUvarint(&blob, usergroups.len); + for (scratch) |gid| + try compress.appendUvarint(&blob, gid); + } + + return AdditionalGids{ + .idx2offset = idx2offset, + .blob = blob.toOwnedSlice(), + }; +} + +const UsersSection = struct { + // number of users in this section + len: u32, + // user index -> offset in blob + idx2offset: []const u32, + blob: []const u8, +}; + +fn usersSection( + allocator: Allocator, + corpus: *const Corpus, + gids: *const AdditionalGids, + shells: *const ShellSections, +) error{ OutOfMemory, InvalidRecord, TooMany }!UsersSection { + var idx2offset = try allocator.alloc(u32, corpus.users.len); + errdefer allocator.free(idx2offset); + // as of writing each user takes 12 bytes + blobs + padding, padded to + // 8 bytes. 24 is an optimistic lower bound for an average record size. + var blob = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len); + errdefer blob.deinit(); + var i: usize = 0; + while (i < corpus.users.len) : (i += 1) { + // TODO: this is inefficient by calling `.slice()` on every iteration + const user = corpus.users.get(i); + const user_offset = math.cast(u35, blob.items.len) catch |err| switch (err) { + error.Overflow => return error.TooMany, + }; + assert(user_offset & 7 == 0); + idx2offset[i] = @truncate(u32, user_offset >> 3); + try PackedUser.packTo( + &blob, + user, + gids.idx2offset[i], + shells.shell2idx, + ); + try pad.arrayList(&blob, PackedUser.alignment_bits); + } + return UsersSection{ + .len = @intCast(u32, corpus.users.len), + .idx2offset = idx2offset, + .blob = blob.toOwnedSlice(), + }; +} + +const GroupMembers = struct { + // group index to it's offset in blob + idx2offset: []const u64, + // members are delta-varint encoded byte-offsets to the user struct + blob: []const u8, +}; + +fn groupMembers( + allocator: Allocator, + corpus: *const Corpus, + user2offset: []const u32, +) error{OutOfMemory}!GroupMembers { + var idx2offset = try allocator.alloc(u64, corpus.groups.len); + errdefer allocator.free(idx2offset); + var blob = ArrayList(u8).init(allocator); + errdefer blob.deinit(); + // zero'th entry is empty, so empty groups can refer to it + try compress.appendUvarint(&blob, 0); + + var scratch = try ArrayList(u32).initCapacity(allocator, 1024); + defer scratch.deinit(); + + for (corpus.group2users) |members, group_idx| { + if (members.len == 0) { + idx2offset[group_idx] = 0; + continue; + } + + idx2offset[group_idx] = blob.items.len; + try scratch.ensureTotalCapacity(members.len); + scratch.items.len = members.len; + for (members) |user_idx, i| + scratch.items[i] = user2offset[user_idx]; + + compress.deltaCompress(u32, scratch.items) catch |err| switch (err) { + error.NotSorted => unreachable, + }; + try compress.appendUvarint(&blob, members.len); + for (scratch.items) |elem| + try compress.appendUvarint(&blob, elem); + } + return GroupMembers{ + .idx2offset = idx2offset, + .blob = blob.toOwnedSlice(), + }; +} + +const GroupsSection = struct { + // number of groups in this section + len: u32, + // group index -> offset in blob + idx2offset: []const u32, + blob: []const u8, +}; + +fn groupsSection( + allocator: Allocator, + corpus: *const Corpus, + members_offset: []const u64, +) error{ OutOfMemory, InvalidRecord }!GroupsSection { + var idx2offset = try allocator.alloc(u32, corpus.groups.len); + errdefer allocator.free(idx2offset); + + var blob = try ArrayList(u8).initCapacity(allocator, 8 * corpus.groups.len); + errdefer blob.deinit(); + + var i: usize = 0; + while (i < corpus.groups.len) : (i += 1) { + // TODO: this is inefficient; it's calling `.slice()` on every iteration + const group = corpus.groups.get(i); + const group_offset = @intCast(u32, blob.items.len); + assert(group_offset & 7 == 0); + idx2offset[i] = @truncate(u32, group_offset >> 3); + const group_stored = GroupStored{ + .gid = group.gid, + .name = group.name, + .members_offset = members_offset[i], + }; + try PackedGroup.packTo(&blob, group_stored); + try pad.arrayList(&blob, PackedGroup.alignment_bits); + } + + return GroupsSection{ + .len = @intCast(u32, corpus.groups.len), + .idx2offset = idx2offset, + .blob = blob.toOwnedSlice(), + }; +} + +// creates a bdz index using packed_mphf. +// hash = bdz_search(packed_mphf, keys[i]); +// result[hash] = idx2offset[i]; +fn bdzIdx( + comptime T: type, + allocator: Allocator, + packed_mphf: []const u8, + keys: []const T, + idx2offset: []const u32, +) error{OutOfMemory}![]const u32 { + const search_fn = comptime blk: { + switch (T) { + u32 => break :blk bdz.search_u32, + []const u8 => break :blk bdz.search, + else => unreachable, + } + }; + assert(keys.len <= math.maxInt(u32)); + var result = try allocator.alloc(u32, keys.len); + for (keys) |key, i| + result[search_fn(packed_mphf, key)] = idx2offset[i]; + return result; +} + +// nblocks_n returns how many blocks a given number of bytes will take +fn nblocks_n(comptime T: type, nbytes: usize) T { + const B = switch (T) { + u8 => u14, + u32 => u38, + u64 => u70, + else => @compileError("only u8, u32 and u64 are supported"), + }; + const upper = pad.roundUp(B, section_length_bits, @intCast(B, nbytes)); + assert(upper & (section_length - 1) == 0); + return @truncate(T, upper >> 6); +} + +// nblocks returns how many blocks a particular slice will take. +fn nblocks(comptime T: type, arr: []const u8) T { + return nblocks_n(T, arr.len); +} + +fn assertDefinedLayout(comptime T: type) void { + return switch (T) { + u8, u16, u32, u64 => {}, + else => switch (@typeInfo(T)) { + .Array, .Pointer => assertDefinedLayout(meta.Elem(T)), + .Enum => assertDefinedLayout(meta.Tag(T)), + .Struct => { + if (meta.containerLayout(T) == .Auto) + @compileError("layout of " ++ @typeName(T) ++ " is undefined"); + for (meta.fields(T)) |field| + assertDefinedLayout(field.field_type); + }, + else => @compileError("unexpected type " ++ @typeName(T)), + }, + }; +} + +const testing = std.testing; + +test "test groups, group members and users" { + const allocator = testing.allocator; + var corpus = try Corpus.testCorpus(allocator); + defer corpus.deinit(); + + var db = try DB.fromCorpus(allocator, &corpus); + defer db.deinit(allocator); + + // TODO: replace with an integration test when high-level + // reader API is present + //const blob = sections.groupmembers.blob; + //var i: usize = 0; + //while (i < corpus.groups.len) : (i += 1) { + //const offset = sections.groupmembers.idx2offset[i]; + //var vit = try compress.VarintSliceIterator(blob[offset..]); + //var it = compress.DeltaDecompressionIterator(&vit); + //for (corpus.group2users[i]) |user_idx| { + // const got_user_offset = (try it.next()).?; + // const want_user_offset = sections.users.idx2offset[user_idx]; + // try testing.expectEqual(got_user_offset, want_user_offset); + //} + //try testing.expectEqual(it.next(), null); + //} + + //var it = PackedUser.iterator(sections.users.blob, sections.shell_reader); + //i = 0; + //while (i < corpus.users.len) : (i += 1) { + // const got = (try it.next()).?; + // const user = corpus.users.get(i); + // try testing.expectEqual(user.uid, got.uid()); + // try testing.expectEqual(user.gid, got.gid()); + // try testing.expectEqualStrings(user.name, got.name()); + // try testing.expectEqualStrings(user.gecos, got.gecos()); + // try testing.expectEqualStrings(user.home, got.home()); + // try testing.expectEqualStrings(user.shell, got.shell(sections.shell_reader)); + //} + + const fd = try os.memfd_create("test_turbonss_db", 0); + defer os.close(fd); + + const written = try os.writev(fd, db.iov().constSlice()); + try testing.expect(written != 0); +} + +test "additionalGids" { + const allocator = testing.allocator; + var corpus = try Corpus.testCorpus(allocator); + defer corpus.deinit(); + + var additional_gids = try additionalGids(allocator, &corpus); + defer allocator.free(additional_gids.idx2offset); + defer allocator.free(additional_gids.blob); + + var user_idx: usize = 0; + while (user_idx < corpus.users.len) : (user_idx += 1) { + const groups = corpus.user2groups[user_idx]; + const offset = additional_gids.idx2offset[user_idx]; + if (groups.len == 0) { + try testing.expect(offset == 0); + continue; + } + var vit = try compress.VarintSliceIterator(additional_gids.blob[offset..]); + var it = compress.DeltaDecompressionIterator(&vit); + try testing.expectEqual(it.remaining(), groups.len); + var i: u64 = 0; + const corpusGids = corpus.groups.items(.gid); + while (try it.next()) |gid| : (i += 1) { + try testing.expectEqual(gid, corpusGids[groups[i]]); + } + try testing.expectEqual(i, groups.len); + } +} + +test "pack gids" { + const allocator = testing.allocator; + var corpus = try Corpus.testCorpus(allocator); + defer corpus.deinit(); + + const cmph_gid = try cmph.packU32(allocator, corpus.groups.items(.gid)); + defer allocator.free(cmph_gid); + + const k1 = bdz.search_u32(cmph_gid, 0); + const k2 = bdz.search_u32(cmph_gid, 128); + const k3 = bdz.search_u32(cmph_gid, 9999); + const k4 = bdz.search_u32(cmph_gid, 100000); + var hashes = &[_]u32{ k1, k2, k3, k4 }; + sort.sort(u32, hashes, {}, comptime sort.asc(u32)); + for (hashes) |hash, i| + try testing.expectEqual(i, hash); +} + +const hash_offsets = &[_]u32{ 0, 10, 20, 30 }; + +fn expectUsedHashes(allocator: Allocator, arr: []const u32) !void { + var used = AutoHashMap(u32, void).init(allocator); + defer used.deinit(); + + for (arr) |elem| + try used.putNoClobber(elem, {}); + for (hash_offsets) |item| + try testing.expect(used.get(item) != null); +} + +test "bdzIdx on u32" { + const keys = [_]u32{ 42, 1, 2, 3 }; + const mphf = try cmph.packU32(testing.allocator, keys[0..]); + defer testing.allocator.free(mphf); + var result = try bdzIdx(u32, testing.allocator, mphf, keys[0..], hash_offsets); + defer testing.allocator.free(result); + try expectUsedHashes(testing.allocator, result); +} + +test "bdzIdx on str" { + const keys = [_][]const u8{ "42", "1", "2", "3" }; + const mphf = try cmph.packStr(testing.allocator, keys[0..]); + defer testing.allocator.free(mphf); + var result = try bdzIdx([]const u8, testing.allocator, mphf, keys[0..], hash_offsets); + defer testing.allocator.free(result); + try expectUsedHashes(testing.allocator, result); +} + +test "nblocks" { + const tests = .{ + .{ 0, &[_]u8{} }, + .{ 1, &[_]u8{ 1, 2, 42 } }, + .{ 1, &[_]u8{1} ** 63 }, + .{ 1, &[_]u8{1} ** 64 }, + .{ 2, &[_]u8{1} ** 65 }, + .{ 255, &[_]u8{1} ** (255 * 64) }, + }; + + inline for (tests) |tt| { + try testing.expectEqual(nblocks(u8, tt[1]), tt[0]); + try testing.expectEqual(nblocks(u32, tt[1]), tt[0]); + try testing.expectEqual(nblocks(u64, tt[1]), tt[0]); + } +} diff --git a/lib/header.zig b/lib/header.zig index be4c180..5abc896 100644 --- a/lib/header.zig +++ b/lib/header.zig @@ -1,9 +1,9 @@ const std = @import("std"); -const native_endian = @import("builtin").target.cpu.arch.endian(); const mem = std.mem; -const max_shells = @import("shell.zig").max_shells; +const math = std.math; +const native_endian = @import("builtin").target.cpu.arch.endian(); -const header_size = @sizeOf(Header); +const max_shells = @import("shell.zig").max_shells; const magic = [4]u8{ 0xf0, 0x9f, 0xa4, 0xb7 }; const version = 0; @@ -22,11 +22,10 @@ const Endian = enum(u8) { pub const section_length_bits = 6; pub const section_length = 1 << section_length_bits; -pub const InvalidHeader = error{ +pub const Invalid = error{ InvalidMagic, InvalidVersion, InvalidEndianess, - TooManyShells, }; pub const Header = packed struct { @@ -46,7 +45,7 @@ pub const Header = packed struct { nblocks_groupmembers: u64, nblocks_additional_gids: u64, - pub fn fromBytes(blob: []const u8) InvalidHeader!Header { + pub fn fromBytes(blob: []const u8) Invalid!Header { const self = mem.bytesAsValue(Header, blob); if (!mem.eql(magic, blob[0..4])) @@ -58,15 +57,8 @@ pub const Header = packed struct { if (self.endian != Endian.native()) return error.InvalidEndianess; - if (self.num_shells > max_shells) - return error.TooManyShells; - return self; } - - pub fn asBytes(self: *const Header) []const u8 { - return mem.asBytes(self); - } }; const testing = std.testing; @@ -75,6 +67,10 @@ test "Section length is a power of two" { try testing.expect(std.math.isPowerOfTwo(section_length)); } +test "Header fits into a section" { + try testing.expect(@sizeOf(Header) <= section_length); +} + test "bit header size is equal to @sizeOf(Header)" { try testing.expectEqual(@sizeOf(Header) * 8, @bitSizeOf(Header)); } diff --git a/lib/sections.zig b/lib/sections.zig deleted file mode 100644 index 032481b..0000000 --- a/lib/sections.zig +++ /dev/null @@ -1,840 +0,0 @@ -const std = @import("std"); -const os = std.os; -const fmt = std.fmt; -const mem = std.mem; -const math = std.math; -const meta = std.meta; -const sort = std.sort; -const assert = std.debug.assert; -const unicode = std.unicode; -const Allocator = std.mem.Allocator; -const ArenaAllocator = std.heap.ArenaAllocator; -const ArrayListUnmanaged = std.ArrayListUnmanaged; -const ArrayList = std.ArrayList; -const MultiArrayList = std.MultiArrayList; -const StringHashMap = std.StringHashMap; -const AutoHashMap = std.AutoHashMap; -const BufSet = std.BufSet; -const BoundedArray = std.BoundedArray; - -const pad = @import("padding.zig"); -const compress = @import("compress.zig"); -const PackedUser = @import("user.zig").PackedUser; -const User = @import("user.zig").User; -const Group = @import("group.zig").Group; -const GroupStored = @import("group.zig").GroupStored; -const PackedGroup = @import("group.zig").PackedGroup; -const ShellSections = @import("shell.zig").ShellWriter.ShellSections; -const ShellReader = @import("shell.zig").ShellReader; -const ShellWriter = @import("shell.zig").ShellWriter; -const Header = @import("header.zig").Header; -const max_shells = @import("shell.zig").max_shells; -const section_length_bits = @import("header.zig").section_length_bits; -const section_length = @import("header.zig").section_length; -const cmph = @import("cmph.zig"); -const bdz = @import("bdz.zig"); - -const zeroes = &[_]u8{0} ** section_length; - -const Corpus = struct { - arena: ArenaAllocator, - - // sorted by name, by unicode codepoint - users: MultiArrayList(User), - // sorted by gid - groups: MultiArrayList(Group), - - name2user: StringHashMap(u32), - name2group: StringHashMap(u32), - group2users: []const []const u32, - user2groups: []const []const u32, - - pub fn init( - baseAllocator: Allocator, - usersConst: []const User, - groupsConst: []const Group, - ) error{ OutOfMemory, InvalidUtf8, Duplicate, NotFound, TooMany }!Corpus { - if (usersConst.len >= math.maxInt(u32)) return error.TooMany; - if (groupsConst.len >= math.maxInt(u32)) return error.TooMany; - - var arena = ArenaAllocator.init(baseAllocator); - var allocator = arena.allocator(); - errdefer arena.deinit(); - - var users_arr = try allocator.alloc(User, usersConst.len); - var groups_arr = try allocator.alloc(Group, groupsConst.len); - for (usersConst) |*user, i| - users_arr[i] = try user.clone(allocator); - for (groupsConst) |*group, i| - groups_arr[i] = try group.clone(allocator); - - sort.sort(User, users_arr, {}, cmpUser); - sort.sort(Group, groups_arr, {}, cmpGroup); - - var users = MultiArrayList(User){}; - try users.ensureTotalCapacity(allocator, users_arr.len); - for (users_arr) |user| - users.appendAssumeCapacity(user); - var groups = MultiArrayList(Group){}; - try groups.ensureTotalCapacity(allocator, groups_arr.len); - for (groups_arr) |group| - groups.appendAssumeCapacity(group); - - var name2user = StringHashMap(u32).init(allocator); - var name2group = StringHashMap(u32).init(allocator); - for (users.items(.name)) |name, i| { - var res1 = try name2user.getOrPut(name); - if (res1.found_existing) - return error.Duplicate; - res1.value_ptr.* = @intCast(u32, i); - } - - for (groups.items(.name)) |name, i| { - var res1 = try name2group.getOrPut(name); - if (res1.found_existing) - return error.Duplicate; - res1.value_ptr.* = @intCast(u32, i); - } - - var group2users = try allocator.alloc([]u32, groups.len); - - // uses baseAllocator, because it will be freed before - // returning from this function. This keeps the arena clean. - var user2groups = try baseAllocator.alloc(ArrayListUnmanaged(u32), users.len); - defer baseAllocator.free(user2groups); - mem.set(ArrayListUnmanaged(u32), user2groups, ArrayListUnmanaged(u32){}); - - for (groups.items(.members)) |groupmembers, i| { - var members = try allocator.alloc(u32, groupmembers.count()); - members.len = 0; - - var it = groupmembers.iterator(); - while (it.next()) |member_name| { - if (name2user.get(member_name.*)) |user_idx| { - members.len += 1; - members[members.len - 1] = user_idx; - try user2groups[user_idx].append(allocator, @intCast(u32, i)); - } else return error.NotFound; - } - - group2users[i] = members; - } - - for (group2users) |*groupusers| - sort.sort(u32, groupusers.*, {}, comptime sort.asc(u32)); - - var user2groups_final = try allocator.alloc([]const u32, users.len); - user2groups_final.len = users.len; - for (user2groups) |*usergroups, i| { - sort.sort(u32, usergroups.items, {}, comptime sort.asc(u32)); - user2groups_final[i] = usergroups.toOwnedSlice(allocator); - } - - return Corpus{ - .arena = arena, - .users = users, - .groups = groups, - .name2user = name2user, - .name2group = name2group, - .group2users = group2users, - .user2groups = user2groups_final, - }; - } - - pub fn deinit(self: *Corpus) void { - self.arena.deinit(); - self.* = undefined; - } -}; - -pub fn shellSections( - allocator: Allocator, - corpus: *const Corpus, -) error{OutOfMemory}!ShellSections { - var popcon = ShellWriter.init(allocator); - for (corpus.users.items(.shell)) |shell| - try popcon.put(shell); - return popcon.toOwnedSections(max_shells); -} - -pub const AdditionalGids = struct { - // user index -> offset in blob - idx2offset: []const u64, - // compressed user gids blob. A blob contains N <= users.len items, - // an item is: - // len: varint - // gid: [varint]varint, - // ... and the gid list is delta-compressed. - blob: []const u8, - - pub fn deinit(self: *AdditionalGids, allocator: Allocator) void { - allocator.free(self.idx2offset); - allocator.free(self.blob); - self.* = undefined; - } -}; - -pub fn additionalGids( - allocator: Allocator, - corpus: *const Corpus, -) error{OutOfMemory}!AdditionalGids { - var blob = ArrayList(u8).init(allocator); - errdefer blob.deinit(); - var idx2offset = try allocator.alloc(u64, corpus.users.len); - errdefer allocator.free(idx2offset); - - // zero'th entry is empty, so groupless users can refer to it. - try compress.appendUvarint(&blob, 0); - - var scratch = try allocator.alloc(u32, 256); - defer allocator.free(scratch); - for (corpus.user2groups) |usergroups, user_idx| { - if (usergroups.len == 0) { - idx2offset[user_idx] = 0; - continue; - } - idx2offset[user_idx] = blob.items.len; - scratch = try allocator.realloc(scratch, usergroups.len); - scratch.len = usergroups.len; - const corpusGids = corpus.groups.items(.gid); - for (usergroups) |group_idx, i| - scratch[i] = corpusGids[group_idx]; - compress.deltaCompress(u32, scratch) catch |err| switch (err) { - error.NotSorted => unreachable, - }; - try compress.appendUvarint(&blob, usergroups.len); - for (scratch) |gid| - try compress.appendUvarint(&blob, gid); - } - - return AdditionalGids{ - .idx2offset = idx2offset, - .blob = blob.toOwnedSlice(), - }; -} - -pub const UsersSection = struct { - // number of users in this section - len: u32, - // user index -> offset in blob - idx2offset: []const u32, - blob: []const u8, - - pub fn deinit(self: *UsersSection, allocator: Allocator) void { - allocator.free(self.idx2offset); - allocator.free(self.blob); - self.* = undefined; - } -}; - -pub fn usersSection( - allocator: Allocator, - corpus: *const Corpus, - gids: *const AdditionalGids, - shells: *const ShellSections, -) error{ OutOfMemory, InvalidRecord, TooMany }!UsersSection { - var idx2offset = try allocator.alloc(u32, corpus.users.len); - errdefer allocator.free(idx2offset); - // as of writing each user takes 12 bytes + blobs + padding, padded to - // 8 bytes. 24 is an optimistic lower bound for an average record size. - var blob = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len); - errdefer blob.deinit(); - var i: usize = 0; - while (i < corpus.users.len) : (i += 1) { - // TODO: this is inefficient by calling `.slice()` on every iteration - const user = corpus.users.get(i); - const user_offset = math.cast(u35, blob.items.len) catch |err| switch (err) { - error.Overflow => return error.TooMany, - }; - assert(user_offset & 7 == 0); - idx2offset[i] = @truncate(u32, user_offset >> 3); - try PackedUser.packTo( - &blob, - user, - gids.idx2offset[i], - shells.shell2idx, - ); - try pad.arrayList(&blob, PackedUser.alignment_bits); - } - return UsersSection{ - .len = @intCast(u32, corpus.users.len), - .idx2offset = idx2offset, - .blob = blob.toOwnedSlice(), - }; -} - -pub const GroupMembers = struct { - // group index to it's offset in blob - idx2offset: []const u64, - // members are delta-varint encoded byte-offsets to the user struct - blob: []const u8, - - pub fn deinit(self: *GroupMembers, allocator: Allocator) void { - allocator.free(self.idx2offset); - allocator.free(self.blob); - self.* = undefined; - } -}; - -pub fn groupMembers( - allocator: Allocator, - corpus: *const Corpus, - user2offset: []const u32, -) error{OutOfMemory}!GroupMembers { - var idx2offset = try allocator.alloc(u64, corpus.groups.len); - errdefer allocator.free(idx2offset); - var blob = ArrayList(u8).init(allocator); - errdefer blob.deinit(); - // zero'th entry is empty, so empty groups can refer to it - try compress.appendUvarint(&blob, 0); - - var scratch = try ArrayList(u32).initCapacity(allocator, 1024); - defer scratch.deinit(); - - for (corpus.group2users) |members, group_idx| { - if (members.len == 0) { - idx2offset[group_idx] = 0; - continue; - } - - idx2offset[group_idx] = blob.items.len; - try scratch.ensureTotalCapacity(members.len); - scratch.items.len = members.len; - for (members) |user_idx, i| - scratch.items[i] = user2offset[user_idx]; - - compress.deltaCompress(u32, scratch.items) catch |err| switch (err) { - error.NotSorted => unreachable, - }; - try compress.appendUvarint(&blob, members.len); - for (scratch.items) |elem| - try compress.appendUvarint(&blob, elem); - } - return GroupMembers{ - .idx2offset = idx2offset, - .blob = blob.toOwnedSlice(), - }; -} - -pub const GroupsSection = struct { - // number of groups in this section - len: u32, - // group index -> offset in blob - idx2offset: []const u32, - blob: []const u8, - - pub fn deinit(self: *GroupsSection, allocator: Allocator) void { - allocator.free(self.idx2offset); - allocator.free(self.blob); - self.* = undefined; - } -}; - -pub fn groupsSection( - allocator: Allocator, - corpus: *const Corpus, - members_offset: []const u64, -) error{ OutOfMemory, InvalidRecord }!GroupsSection { - var idx2offset = try allocator.alloc(u32, corpus.groups.len); - errdefer allocator.free(idx2offset); - - var blob = try ArrayList(u8).initCapacity(allocator, 8 * corpus.groups.len); - errdefer blob.deinit(); - - var i: usize = 0; - while (i < corpus.groups.len) : (i += 1) { - // TODO: this is inefficient; it's calling `.slice()` on every iteration - const group = corpus.groups.get(i); - const group_offset = @intCast(u32, blob.items.len); - assert(group_offset & 7 == 0); - idx2offset[i] = @truncate(u32, group_offset >> 3); - const group_stored = GroupStored{ - .gid = group.gid, - .name = group.name, - .members_offset = members_offset[i], - }; - try PackedGroup.packTo(&blob, group_stored); - try pad.arrayList(&blob, PackedGroup.alignment_bits); - } - - return GroupsSection{ - .len = @intCast(u32, corpus.groups.len), - .idx2offset = idx2offset, - .blob = blob.toOwnedSlice(), - }; -} - -// creates a bdz index using packed_mphf. -// hash = bdz_search(packed_mphf, keys[i]); -// result[hash] = idx2offset[i]; -pub fn bdzIdx( - comptime T: type, - allocator: Allocator, - packed_mphf: []const u8, - keys: []const T, - idx2offset: []const u32, -) error{OutOfMemory}![]const u32 { - const search_fn = comptime blk: { - switch (T) { - u32 => break :blk bdz.search_u32, - []const u8 => break :blk bdz.search, - else => unreachable, - } - }; - assert(keys.len <= math.maxInt(u32)); - var result = try allocator.alloc(u32, keys.len); - for (keys) |key, i| - result[search_fn(packed_mphf, key)] = idx2offset[i]; - return result; -} - -// cmpUser compares two users for sorting. By username's utf8 codepoints, ascending. -fn cmpUser(_: void, a: User, b: User) bool { - var utf8_a = (unicode.Utf8View.init(a.name) catch unreachable).iterator(); - var utf8_b = (unicode.Utf8View.init(b.name) catch unreachable).iterator(); - while (utf8_a.nextCodepoint()) |codepoint_a| { - if (utf8_b.nextCodepoint()) |codepoint_b| { - if (codepoint_a == codepoint_b) { - continue; - } else return codepoint_a < codepoint_b; - } - - // a is a prefix of b. It is thus shorter. - return false; - } - // b is a prefix of a - return true; -} - -fn cmpGroup(_: void, a: Group, b: Group) bool { - return a.gid < b.gid; -} - -// nblocks returns how many blocks a particular slice will take. -fn nblocks(comptime T: type, arr: []const u8) T { - const B = switch (T) { - u8 => u14, - u32 => u38, - u64 => u70, - else => @compileError("only u8, u32 and u64 are supported"), - }; - const upper = pad.roundUp(B, section_length_bits, @intCast(B, arr.len)); - assert(upper & (section_length - 1) == 0); - return @truncate(T, upper >> 6); -} - -pub const DB = struct { - // All sections, as they end up in the DB. Order is important. - header: []const u8, - bdz_gid: []const u8, - bdz_groupname: []const u8, - bdz_uid: []const u8, - bdz_username: []const u8, - idx_gid2group: []const u32, - idx_groupname2group: []const u32, - idx_uid2user: []const u32, - idx_name2user: []const u32, - shell_index: []const u16, - shell_blob: []const u8, - groups: []const u8, - users: []const u8, - groupmembers: []const u8, - additional_gids: []const u8, - - pub fn fromCorpus( - allocator: Allocator, - corpus: *const Corpus, - ) error{ OutOfMemory, InvalidRecord, TooMany }!DB { - const gids = corpus.groups.items(.gid); - const gnames = corpus.groups.items(.name); - const uids = corpus.users.items(.uid); - const unames = corpus.users.items(.name); - - var bdz_gid = try cmph.packU32(allocator, gids); - errdefer allocator.free(bdz_gid); - - var bdz_groupname = try cmph.packStr(allocator, gnames); - errdefer allocator.free(bdz_groupname); - - var bdz_uid = try cmph.packU32(allocator, uids); - errdefer allocator.free(bdz_uid); - - const bdz_username = try cmph.packStr(allocator, unames); - errdefer allocator.free(bdz_username); - - var shell = try shellSections(allocator, corpus); - defer shell.deinit(); - - var additional_gids = try additionalGids(allocator, corpus); - errdefer allocator.free(additional_gids.blob); - - var users = try usersSection(allocator, corpus, &additional_gids, &shell); - allocator.free(additional_gids.idx2offset); - errdefer allocator.free(users.blob); - - var groupmembers = try groupMembers(allocator, corpus, users.idx2offset); - errdefer allocator.free(groupmembers.blob); - - var groups = try groupsSection(allocator, corpus, groupmembers.idx2offset); - allocator.free(groupmembers.idx2offset); - errdefer allocator.free(groups.blob); - - var idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids, groups.idx2offset); - errdefer allocator.free(idx_gid2group); - - var idx_groupname2group = try bdzIdx([]const u8, allocator, bdz_groupname, gnames, groups.idx2offset); - allocator.free(groups.idx2offset); - errdefer allocator.free(idx_groupname2group); - - var idx_uid2user = try bdzIdx(u32, allocator, bdz_uid, uids, users.idx2offset); - errdefer allocator.free(idx_uid2user); - - var idx_name2user = try bdzIdx([]const u8, allocator, bdz_username, unames, users.idx2offset); - allocator.free(users.idx2offset); - errdefer allocator.free(idx_name2user); - - const header = Header{ - .nblocks_shell_blob = nblocks(u8, shell.blob.constSlice()), - .num_shells = shell.len, - .num_groups = groups.len, - .num_users = users.len, - .nblocks_bdz_gid = nblocks(u32, bdz_gid), - .nblocks_bdz_groupname = nblocks(u32, bdz_groupname), - .nblocks_bdz_uid = nblocks(u32, bdz_uid), - .nblocks_bdz_username = nblocks(u32, bdz_username), - .nblocks_groups = nblocks(u64, groups.blob), - .nblocks_users = nblocks(u64, users.blob), - .nblocks_groupmembers = nblocks(u64, groupmembers.blob), - .nblocks_additional_gids = nblocks(u64, additional_gids.blob), - }; - - return DB{ - .header = header.asBytes(), - .bdz_gid = bdz_gid, - .bdz_groupname = bdz_groupname, - .bdz_uid = bdz_uid, - .bdz_username = bdz_username, - .idx_gid2group = idx_gid2group, - .idx_groupname2group = idx_groupname2group, - .idx_uid2user = idx_uid2user, - .idx_name2user = idx_name2user, - .shell_index = shell.index.constSlice(), - .shell_blob = shell.blob.constSlice(), - .groups = groups.blob, - .users = users.blob, - .groupmembers = groupmembers.blob, - .additional_gids = additional_gids.blob, - }; - } - - pub fn iov(self: *const DB) error{OutOfMemory}![]const os.iovec_const { - const fields = comptime meta.fieldNames(DB); - var result = BoundedArray(os.iovec_const, fields.len * 2).init(0) catch |err| switch (err) { - error.Overflow => unreachable, - }; - - inline for (fields) |fname| { - const bytes = mem.sliceAsBytes(@field(self, fname)); - result.appendAssumeCapacity(os.iovec_const{ - .iov_base = bytes.ptr, - .iov_len = bytes.len, - }); - const padding = pad.until(usize, section_length_bits, bytes.len); - if (padding != 0) - result.appendAssumeCapacity(.{ - .iov_base = zeroes, - .iov_len = padding, - }); - } - - return result.constSlice(); - } - - pub fn deinit(self: *DB, allocator: Allocator) void { - allocator.free(self.bdz_gid); - allocator.free(self.bdz_groupname); - allocator.free(self.bdz_uid); - allocator.free(self.bdz_username); - allocator.free(self.idx_gid2group); - allocator.free(self.idx_groupname2group); - allocator.free(self.idx_uid2user); - allocator.free(self.idx_name2user); - allocator.free(self.groups); - allocator.free(self.users); - allocator.free(self.groupmembers); - allocator.free(self.additional_gids); - self.* = undefined; - } -}; - -const testing = std.testing; -const someMembers = @import("group.zig").someMembers; - -fn testCorpus(allocator: Allocator) !Corpus { - const users = [_]User{ User{ - .uid = 0, - .gid = 0, - .name = "root", - .gecos = "", - .home = "/root", - .shell = "/bin/bash", - }, User{ - .uid = 128, - .gid = 128, - .name = "vidmantas", - .gecos = "Vidmantas Kaminskas", - .home = "/home/vidmantas", - .shell = "/bin/bash", - }, User{ - .uid = 1000, - .gid = math.maxInt(u32), - .name = "Name" ** 8, - .gecos = "Gecos" ** 51, - .home = "Home" ** 16, - .shell = "She.LllL" ** 8, - }, User{ - .uid = 100000, - .gid = 1002, - .name = "svc-bar", - .gecos = "", - .home = "/", - .shell = "/", - }, User{ - .uid = 65534, - .gid = 65534, - .name = "nobody", - .gecos = "nobody", - .home = "/nonexistent", - .shell = "/usr/sbin/nologin", - } }; - - var members0 = try someMembers( - allocator, - &[_][]const u8{"root"}, - ); - defer members0.deinit(); - - var members1 = try someMembers( - allocator, - &[_][]const u8{"vidmantas"}, - ); - defer members1.deinit(); - - var members2 = try someMembers( - allocator, - &[_][]const u8{ "svc-bar", "vidmantas" }, - ); - defer members2.deinit(); - - var members3 = try someMembers( - allocator, - &[_][]const u8{ "svc-bar", "Name" ** 8, "vidmantas", "root" }, - ); - defer members3.deinit(); - - const groups = [_]Group{ - Group{ .gid = 0, .name = "root", .members = members0 }, - Group{ .gid = 128, .name = "vidmantas", .members = members1 }, - Group{ .gid = 9999, .name = "all", .members = members3 }, - Group{ .gid = 100000, .name = "service-account", .members = members2 }, - }; - - return try Corpus.init(allocator, users[0..], groups[0..]); -} - -test "test corpus" { - var corpus = try testCorpus(testing.allocator); - defer corpus.deinit(); - - const name_name = 0; - const nobody = 1; - const root = 2; - const svc_bar = 3; - const vidmantas = 4; - - const usernames = corpus.users.items(.name); - try testing.expectEqualStrings(usernames[name_name], "Name" ** 8); - try testing.expectEqualStrings(usernames[nobody], "nobody"); - try testing.expectEqualStrings(usernames[root], "root"); - try testing.expectEqualStrings(usernames[svc_bar], "svc-bar"); - try testing.expectEqualStrings(usernames[vidmantas], "vidmantas"); - - const g_root = 0; - const g_vidmantas = 1; - const g_all = 2; - const g_service_account = 3; - - const groupnames = corpus.groups.items(.name); - try testing.expectEqualStrings(groupnames[g_root], "root"); - try testing.expectEqualStrings(groupnames[g_service_account], "service-account"); - try testing.expectEqualStrings(groupnames[g_vidmantas], "vidmantas"); - try testing.expectEqualStrings(groupnames[g_all], "all"); - - try testing.expectEqual(corpus.name2user.get("404"), null); - try testing.expectEqual(corpus.name2user.get("vidmantas").?, vidmantas); - try testing.expectEqual(corpus.name2group.get("404"), null); - try testing.expectEqual(corpus.name2group.get("vidmantas").?, g_vidmantas); - - const membersOfAll = corpus.group2users[g_all]; - try testing.expectEqual(membersOfAll[0], name_name); - try testing.expectEqual(membersOfAll[1], root); - try testing.expectEqual(membersOfAll[2], svc_bar); - try testing.expectEqual(membersOfAll[3], vidmantas); - - const groupsOfVidmantas = corpus.user2groups[vidmantas]; - try testing.expectEqual(groupsOfVidmantas[0], g_vidmantas); - try testing.expectEqual(groupsOfVidmantas[1], g_all); - try testing.expectEqual(groupsOfVidmantas[2], g_service_account); -} - -test "test groups, group members and users" { - const allocator = testing.allocator; - var corpus = try testCorpus(allocator); - defer corpus.deinit(); - - var db = try DB.fromCorpus(allocator, &corpus); - defer db.deinit(allocator); - - // TODO: replace with an integration test when high-level - // reader API is present - //const blob = sections.groupmembers.blob; - //var i: usize = 0; - //while (i < corpus.groups.len) : (i += 1) { - //const offset = sections.groupmembers.idx2offset[i]; - //var vit = try compress.VarintSliceIterator(blob[offset..]); - //var it = compress.DeltaDecompressionIterator(&vit); - //for (corpus.group2users[i]) |user_idx| { - // const got_user_offset = (try it.next()).?; - // const want_user_offset = sections.users.idx2offset[user_idx]; - // try testing.expectEqual(got_user_offset, want_user_offset); - //} - //try testing.expectEqual(it.next(), null); - //} - - //var it = PackedUser.iterator(sections.users.blob, sections.shell_reader); - //i = 0; - //while (i < corpus.users.len) : (i += 1) { - // const got = (try it.next()).?; - // const user = corpus.users.get(i); - // try testing.expectEqual(user.uid, got.uid()); - // try testing.expectEqual(user.gid, got.gid()); - // try testing.expectEqualStrings(user.name, got.name()); - // try testing.expectEqualStrings(user.gecos, got.gecos()); - // try testing.expectEqualStrings(user.home, got.home()); - // try testing.expectEqualStrings(user.shell, got.shell(sections.shell_reader)); - //} - - var iovec = try db.iov(); - _ = iovec; -} - -test "additionalGids" { - const allocator = testing.allocator; - var corpus = try testCorpus(allocator); - defer corpus.deinit(); - - var additional_gids = try additionalGids(allocator, &corpus); - defer additional_gids.deinit(allocator); - - var user_idx: usize = 0; - while (user_idx < corpus.users.len) : (user_idx += 1) { - const groups = corpus.user2groups[user_idx]; - const offset = additional_gids.idx2offset[user_idx]; - if (groups.len == 0) { - try testing.expect(offset == 0); - continue; - } - var vit = try compress.VarintSliceIterator(additional_gids.blob[offset..]); - var it = compress.DeltaDecompressionIterator(&vit); - try testing.expectEqual(it.remaining(), groups.len); - var i: u64 = 0; - const corpusGids = corpus.groups.items(.gid); - while (try it.next()) |gid| : (i += 1) { - try testing.expectEqual(gid, corpusGids[groups[i]]); - } - try testing.expectEqual(i, groups.len); - } -} - -test "pack gids" { - const allocator = testing.allocator; - var corpus = try testCorpus(allocator); - defer corpus.deinit(); - - const cmph_gid = try cmph.packU32(allocator, corpus.groups.items(.gid)); - defer allocator.free(cmph_gid); - - const k1 = bdz.search_u32(cmph_gid, 0); - const k2 = bdz.search_u32(cmph_gid, 128); - const k3 = bdz.search_u32(cmph_gid, 9999); - const k4 = bdz.search_u32(cmph_gid, 100000); - var hashes = &[_]u32{ k1, k2, k3, k4 }; - sort.sort(u32, hashes, {}, comptime sort.asc(u32)); - for (hashes) |hash, i| - try testing.expectEqual(i, hash); -} - -fn testUser(name: []const u8) User { - var result = mem.zeroes(User); - result.name = name; - return result; -} - -test "users compare function" { - const a = testUser("a"); - const b = testUser("b"); - const bb = testUser("bb"); - try testing.expect(cmpUser({}, a, b)); - try testing.expect(!cmpUser({}, b, a)); - try testing.expect(cmpUser({}, a, bb)); - try testing.expect(!cmpUser({}, bb, a)); - try testing.expect(cmpUser({}, b, bb)); - try testing.expect(!cmpUser({}, bb, b)); -} - -const hash_offsets = &[_]u32{ 0, 10, 20, 30 }; - -fn expectUsedHashes(allocator: Allocator, arr: []const u32) !void { - var used = AutoHashMap(u32, void).init(allocator); - defer used.deinit(); - - for (arr) |elem| - try used.putNoClobber(elem, {}); - for (hash_offsets) |item| - try testing.expect(used.get(item) != null); -} - -test "bdzIdx on u32" { - const keys = [_]u32{ 42, 1, 2, 3 }; - const mphf = try cmph.packU32(testing.allocator, keys[0..]); - defer testing.allocator.free(mphf); - var result = try bdzIdx(u32, testing.allocator, mphf, keys[0..], hash_offsets); - defer testing.allocator.free(result); - try expectUsedHashes(testing.allocator, result); -} - -test "bdzIdx on str" { - const keys = [_][]const u8{ "42", "1", "2", "3" }; - const mphf = try cmph.packStr(testing.allocator, keys[0..]); - defer testing.allocator.free(mphf); - var result = try bdzIdx([]const u8, testing.allocator, mphf, keys[0..], hash_offsets); - defer testing.allocator.free(result); - try expectUsedHashes(testing.allocator, result); -} - -test "nblocks" { - const tests = .{ - .{ 0, &[_]u8{} }, - .{ 1, &[_]u8{ 1, 2, 42 } }, - .{ 1, &[_]u8{1} ** 63 }, - .{ 1, &[_]u8{1} ** 64 }, - .{ 2, &[_]u8{1} ** 65 }, - .{ 255, &[_]u8{1} ** (255 * 64) }, - }; - - inline for (tests) |tt| { - try testing.expectEqual(nblocks(u8, tt[1]), tt[0]); - try testing.expectEqual(nblocks(u32, tt[1]), tt[0]); - try testing.expectEqual(nblocks(u64, tt[1]), tt[0]); - } -} diff --git a/lib/shell.zig b/lib/shell.zig index a25dead..6f33f23 100644 --- a/lib/shell.zig +++ b/lib/shell.zig @@ -1,10 +1,8 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const PriorityDequeue = std.PriorityDequeue; -const StringArrayHashMap = std.StringArrayHashMap; const StringHashMap = std.StringHashMap; const BoundedArray = std.BoundedArray; -const StringContext = std.hash_map.StringContext; const assert = std.debug.assert; pub const max_shells = 255; diff --git a/lib/so.zig b/lib/so.zig index 093564c..a5c31c5 100644 --- a/lib/so.zig +++ b/lib/so.zig @@ -1,19 +1,19 @@ const Passwd = extern struct { // zig fmt: off - pw_name: [*:0]u8, // username - pw_passwd: [*:0]const u8, // user password, always '*' - pw_uid: u32, // user ID - pw_gid: u32, // group ID - pw_gecos: [*:0]const u8, // user information - pw_dir: [*:0]const u8, // home directory - pw_shell: [*:0]const u8, // shell program + pw_name: [*:0]u8, // username + pw_passwd: [*:0]const u8 = "*", // user password, always '*' + pw_uid: u32, // user ID + pw_gid: u32, // group ID + pw_gecos: [*:0]const u8, // user information + pw_dir: [*:0]const u8, // home directory + pw_shell: [*:0]const u8, // shell program // zig fmt: on }; const Group = extern struct { // zig fmt: off gr_name: [*:0]u8, // group name - gr_passwd: [*:0]u8, // group password, always '*' + gr_passwd: [*:0]u8 = "*", // group password, always '*' gr_gid: u32, // group ID gr_mem: [*:0][*:0] const u8, // NULL-terminated array of pointers to group members // zig fmt: off diff --git a/lib/test_all.zig b/lib/test_all.zig index 9914af7..66930c8 100644 --- a/lib/test_all.zig +++ b/lib/test_all.zig @@ -1,7 +1,8 @@ test "turbonss test suite" { _ = @import("header.zig"); _ = @import("so.zig"); - _ = @import("sections.zig"); + _ = @import("DB.zig"); + _ = @import("Corpus.zig"); _ = @import("shell.zig"); _ = @import("user.zig"); _ = @import("group.zig");