const std = @import("std"); const os = std.os; const mem = std.mem; const math = std.math; const meta = std.meta; const sort = std.sort; const assert = std.debug.assert; const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; const AutoHashMap = std.AutoHashMap; const BoundedArray = std.BoundedArray; const ArenaAllocator = std.heap.ArenaAllocator; const Corpus = @import("Corpus.zig"); const pad = @import("padding.zig"); const compress = @import("compress.zig"); const PackedUser = @import("PackedUser.zig"); const User = @import("User.zig"); const CGroup = @import("CGroup.zig"); const Group = @import("Group.zig"); const PackedGroup = @import("PackedGroup.zig"); const GroupStored = PackedGroup.GroupStored; const ShellSections = @import("shell.zig").ShellWriter.ShellSections; const ShellReader = @import("shell.zig").ShellReader; const ShellWriter = @import("shell.zig").ShellWriter; const InvalidHeader = @import("header.zig").Invalid; const Header = @import("header.zig").Header; const max_shells = @import("shell.zig").max_shells; const section_length_bits = @import("header.zig").section_length_bits; const section_length = @import("header.zig").section_length; const cmph = @import("cmph.zig"); const bdz = @import("bdz.zig"); const zeroes = &[_]u8{0} ** section_length; const DB = @This(); // All sections, as they end up in the DB. Order is important. header: *const Header, bdz_gid: []const u8, bdz_groupname: []const u8, bdz_uid: []const u8, bdz_username: []const u8, idx_gid2group: []const u32, idx_groupname2group: []const u32, idx_uid2user: []const u32, idx_name2user: []const u32, shell_index: []const u16, shell_blob: []const u8, groups: []const u8, users: []const u8, groupmembers: []const u8, additional_gids: []const u8, pub fn fromCorpus( allocator: Allocator, corpus: *const Corpus, ) error{ OutOfMemory, InvalidRecord, TooMany }!DB { const gids = corpus.groups.items(.gid); const gnames = corpus.groups.items(.name); const uids = corpus.users.items(.uid); const unames = corpus.users.items(.name); const bdz_gid = try cmph.packU32(allocator, gids); errdefer allocator.free(bdz_gid); const bdz_groupname = try cmph.packStr(allocator, gnames); errdefer allocator.free(bdz_groupname); const bdz_uid = try cmph.packU32(allocator, uids); errdefer allocator.free(bdz_uid); const bdz_username = try cmph.packStr(allocator, unames); errdefer allocator.free(bdz_username); var shell = try shellSections(allocator, corpus); defer shell.deinit(); const additional_gids = try additionalGids(allocator, corpus); errdefer allocator.free(additional_gids.blob); defer allocator.free(additional_gids.idx2offset); const users = try usersSection(allocator, corpus, &additional_gids, &shell); errdefer allocator.free(users.blob); defer allocator.free(users.idx2offset); const groupmembers = try groupMembers(allocator, corpus, users.idx2offset); errdefer allocator.free(groupmembers.blob); defer allocator.free(groupmembers.idx2offset); const groups = try groupsSection(allocator, corpus, groupmembers.idx2offset); errdefer allocator.free(groups.blob); defer allocator.free(groups.idx2offset); const idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids, groups.idx2offset); errdefer allocator.free(idx_gid2group); const idx_groupname2group = try bdzIdx([]const u8, allocator, bdz_groupname, gnames, groups.idx2offset); errdefer allocator.free(idx_groupname2group); const idx_uid2user = try bdzIdx(u32, allocator, bdz_uid, uids, users.idx2offset); errdefer allocator.free(idx_uid2user); const idx_name2user = try bdzIdx([]const u8, allocator, bdz_username, unames, users.idx2offset); errdefer allocator.free(idx_name2user); const header = try allocator.create(Header); errdefer allocator.destroy(header); header.* = Header{ .nblocks_shell_blob = nblocks(u8, shell.blob.constSlice()), .num_shells = shell.len, .num_groups = groups.len, .num_users = users.len, .nblocks_bdz_gid = nblocks(u32, bdz_gid), .nblocks_bdz_groupname = nblocks(u32, bdz_groupname), .nblocks_bdz_uid = nblocks(u32, bdz_uid), .nblocks_bdz_username = nblocks(u32, bdz_username), .nblocks_groups = nblocks(u64, groups.blob), .nblocks_users = nblocks(u64, users.blob), .nblocks_groupmembers = nblocks(u64, groupmembers.blob), .nblocks_additional_gids = nblocks(u64, additional_gids.blob), }; return DB{ .header = header, .bdz_gid = bdz_gid, .bdz_groupname = bdz_groupname, .bdz_uid = bdz_uid, .bdz_username = bdz_username, .idx_gid2group = idx_gid2group, .idx_groupname2group = idx_groupname2group, .idx_uid2user = idx_uid2user, .idx_name2user = idx_name2user, .shell_index = shell.index.constSlice(), .shell_blob = shell.blob.constSlice(), .groups = groups.blob, .users = users.blob, .groupmembers = groupmembers.blob, .additional_gids = additional_gids.blob, }; } pub fn deinit(self: *DB, allocator: Allocator) void { allocator.destroy(self.header); allocator.free(self.bdz_gid); allocator.free(self.bdz_groupname); allocator.free(self.bdz_uid); allocator.free(self.bdz_username); allocator.free(self.idx_gid2group); allocator.free(self.idx_groupname2group); allocator.free(self.idx_uid2user); allocator.free(self.idx_name2user); allocator.free(self.groups); allocator.free(self.users); allocator.free(self.groupmembers); allocator.free(self.additional_gids); self.* = undefined; } const DB_fields = meta.fields(DB); pub fn iov(self: *const DB) BoundedArray(os.iovec_const, DB_fields.len * 2) { var result = BoundedArray(os.iovec_const, DB_fields.len * 2).init(0) catch unreachable; inline for (DB_fields) |field| { comptime assertDefinedLayout(field.field_type); const value = @field(self, field.name); const bytes: []const u8 = switch (@TypeOf(value)) { *const Header => mem.asBytes(value), else => mem.sliceAsBytes(value), }; result.appendAssumeCapacity(os.iovec_const{ .iov_base = bytes.ptr, .iov_len = bytes.len, }); const padding = pad.until(usize, section_length_bits, bytes.len); if (padding != 0) result.appendAssumeCapacity(.{ .iov_base = zeroes, .iov_len = padding, }); } return result; } pub fn fromBytes(buf: []align(8) const u8) InvalidHeader!DB { const header = try Header.fromBytes(buf[0..@sizeOf(Header)]); // At first the tuple below had field names too, but moved it to comments, // because it segfaulted. https://github.com/ziglang/zig/issues/3915 and // https://paste.sr.ht/~motiejus/2830736e796801517c1fa8639be6615cd56ada27 const lengths = .{ header.nblocks_bdz_gid, // bdz_gid header.nblocks_bdz_groupname, // bdz_groupname header.nblocks_bdz_uid, // bdz_uid header.nblocks_bdz_username, // bdz_username nblocks_n(u32, header.num_groups * 4), // idx_gid2group nblocks_n(u32, header.num_groups * 4), // idx_groupname2group nblocks_n(u32, header.num_users * 4), // idx_uid2user nblocks_n(u32, header.num_users * 4), // idx_name2user nblocks_n(u16, header.num_shells * 2), // shell_index header.nblocks_shell_blob, // shell_blob header.nblocks_groups, // groups header.nblocks_users, // users header.nblocks_groupmembers, // groupmembers header.nblocks_additional_gids, // additional_gids }; var result: DB = undefined; result.header = header; var offset = comptime nblocks_n(u64, @sizeOf(Header)); comptime assert(mem.eql(u8, DB_fields[0].name, "header")); inline for (DB_fields[1..]) |field, i| { const start = offset << section_length_bits; const end = (offset + lengths[i]) << section_length_bits; const slice_type = meta.Child(field.field_type); const value = mem.bytesAsSlice(slice_type, buf[start..end]); @field(result, field.name) = value; offset += lengths[i]; } return result; } const GroupMemberNames = struct { _buf: []u8, arr: []const ?[*:0]const u8, pub fn deinit(self: *GroupMemberNames, allocator: Allocator) void { if (self._buf.len == 0) return; allocator.free(self._buf); allocator.free(self.arr); } }; // returns a list of group member names starting at the given offset of // groupmembers blob. fn groupMemberNames( self: *const DB, allocator: Allocator, offset: u64, ) error{OutOfMemory}!GroupMemberNames { const v = compress.uvarintMust(self.groupmembers[offset..]); const total_members_len = v.value; const offset2 = offset + v.bytes_read; var vit = compress.VarintSliceIteratorMust(self.groupmembers[offset2..]); const num_members = vit.remaining; if (num_members == 0) return GroupMemberNames{ ._buf = &[0]u8{}, .arr = &[1]?[*:0]const u8{null}, }; // TODO (zig 0.10+) make result type sentinel-aware and stop // the terminating-null-pointer-dancing. var arr = try allocator.alloc(?[*:0]const u8, num_members + 1); errdefer allocator.free(arr); arr.len = num_members + 1; arr[num_members] = null; arr.len = num_members; var buf = std.ArrayList(u8).init(allocator); errdefer buf.deinit(); // +num_members are for sentinel zeroes try buf.ensureTotalCapacity(total_members_len + num_members); var it = compress.DeltaDecompressionIterator(&vit); var i: usize = 0; while (it.nextMust()) |member_offset| : (i += 1) { const entry = PackedUser.fromBytes(self.users[member_offset << 3 ..]); const start = buf.items.len; const name = entry.user.name(); buf.appendSliceAssumeCapacity(name); buf.appendAssumeCapacity(0); // TODO: arr[i] = buf[...] triggers a bug in zig pre-0.10 const terminated = buf.items[start .. buf.items.len - 1 :0]; arr[i] = terminated; } return GroupMemberNames{ ._buf = buf.toOwnedSlice(), .arr = arr }; } // getgrtnam returns a Group entry by name. The Group must be // deinit'ed by caller. fn getgrnam( self: *const DB, allocator: Allocator, name: []const u8, ) error{OutOfMemory}!?CGroup { const idx = bdz.search(self.bdz_groupname, name); const offset = self.idx_groupname2group[idx]; const group = PackedGroup.fromBytes(self.groups[offset << 3 ..]).group; if (!mem.eql(u8, name, group.name())) return null; var members = try self.groupMemberNames(allocator, group.members_offset); errdefer members.deinit(allocator); const namez = try allocator.dupeZ(u8, name); errdefer allocator.free(namez); return CGroup{ .name = namez, .gid = group.gid(), .members = members.arr, }; } fn shellSections( allocator: Allocator, corpus: *const Corpus, ) error{OutOfMemory}!ShellSections { var popcon = ShellWriter.init(allocator); for (corpus.users.items(.shell)) |shell| try popcon.put(shell); return popcon.toOwnedSections(max_shells); } const AdditionalGids = struct { // user index -> offset in blob idx2offset: []const u64, // compressed user gids blob. A blob contains N <= users.len items, // an item is: // len: varint // gid: [varint]varint, // ... and the gid list is delta-compressed. blob: []const u8, }; fn additionalGids( allocator: Allocator, corpus: *const Corpus, ) error{OutOfMemory}!AdditionalGids { var blob = ArrayList(u8).init(allocator); errdefer blob.deinit(); var idx2offset = try allocator.alloc(u64, corpus.users.len); errdefer allocator.free(idx2offset); // zero'th entry is empty, so groupless users can refer to it. try compress.appendUvarint(&blob, 0); var scratch = try allocator.alloc(u32, 256); defer allocator.free(scratch); for (corpus.user2groups) |usergroups, user_idx| { if (usergroups.len == 0) { idx2offset[user_idx] = 0; continue; } idx2offset[user_idx] = blob.items.len; scratch = try allocator.realloc(scratch, usergroups.len); scratch.len = usergroups.len; const corpusGids = corpus.groups.items(.gid); for (usergroups) |group_idx, i| scratch[i] = corpusGids[group_idx]; compress.deltaCompress(u32, scratch) catch |err| switch (err) { error.NotSorted => unreachable, }; try compress.appendUvarint(&blob, usergroups.len); for (scratch) |gid| try compress.appendUvarint(&blob, gid); } return AdditionalGids{ .idx2offset = idx2offset, .blob = blob.toOwnedSlice(), }; } const UsersSection = struct { // number of users in this section len: u32, // user index -> offset in blob idx2offset: []const u32, blob: []const u8, }; fn usersSection( allocator: Allocator, corpus: *const Corpus, gids: *const AdditionalGids, shells: *const ShellSections, ) error{ OutOfMemory, InvalidRecord, TooMany }!UsersSection { var idx2offset = try allocator.alloc(u32, corpus.users.len); errdefer allocator.free(idx2offset); // as of writing each user takes 12 bytes + blobs + padding, padded to // 8 bytes. 24 is an optimistic lower bound for an average record size. var blob = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len); errdefer blob.deinit(); var i: usize = 0; while (i < corpus.users.len) : (i += 1) { // TODO: this is inefficient by calling `.slice()` on every iteration const user = corpus.users.get(i); const user_offset = math.cast(u35, blob.items.len) catch |err| switch (err) { error.Overflow => return error.TooMany, }; assert(user_offset & 7 == 0); idx2offset[i] = @truncate(u32, user_offset >> 3); try PackedUser.packTo( &blob, user, gids.idx2offset[i], shells.shell2idx, ); try pad.arrayList(&blob, PackedUser.alignment_bits); } return UsersSection{ .len = @intCast(u32, corpus.users.len), .idx2offset = idx2offset, .blob = blob.toOwnedSlice(), }; } const GroupMembers = struct { // group index to it's offset in blob idx2offset: []const u64, // members are delta-varint encoded byte-offsets to the user struct blob: []const u8, }; fn groupMembers( allocator: Allocator, corpus: *const Corpus, user2offset: []const u32, ) error{OutOfMemory}!GroupMembers { var idx2offset = try allocator.alloc(u64, corpus.groups.len); errdefer allocator.free(idx2offset); var blob = ArrayList(u8).init(allocator); errdefer blob.deinit(); // zero'th entry is empty, so empty groups can refer to it try compress.appendUvarint(&blob, 0); var scratch = try ArrayList(u32).initCapacity(allocator, 1024); defer scratch.deinit(); for (corpus.group2users) |members, group_idx| { if (members.len == 0) { idx2offset[group_idx] = 0; continue; } idx2offset[group_idx] = blob.items.len; try scratch.ensureTotalCapacity(members.len); scratch.items.len = members.len; for (members) |user_idx, i| scratch.items[i] = user2offset[user_idx]; compress.deltaCompress(u32, scratch.items) catch |err| switch (err) { error.NotSorted => unreachable, }; const total_members_len = blk: { var sum: usize = 0; for (members) |user_idx| sum += corpus.users.get(user_idx).name.len; break :blk @intCast(u32, sum); }; try compress.appendUvarint(&blob, total_members_len); try compress.appendUvarint(&blob, members.len); for (scratch.items) |elem| try compress.appendUvarint(&blob, elem); } return GroupMembers{ .idx2offset = idx2offset, .blob = blob.toOwnedSlice(), }; } const GroupsSection = struct { // number of groups in this section len: u32, // group index -> offset in blob idx2offset: []const u32, blob: []const u8, }; fn groupsSection( allocator: Allocator, corpus: *const Corpus, members_offset: []const u64, ) error{ OutOfMemory, InvalidRecord }!GroupsSection { var idx2offset = try allocator.alloc(u32, corpus.groups.len); errdefer allocator.free(idx2offset); var blob = try ArrayList(u8).initCapacity(allocator, 8 * corpus.groups.len); errdefer blob.deinit(); var i: usize = 0; while (i < corpus.groups.len) : (i += 1) { // TODO: this is inefficient; it's calling `.slice()` on every iteration const group = corpus.groups.get(i); const group_offset = @intCast(u32, blob.items.len); assert(group_offset & 7 == 0); idx2offset[i] = @truncate(u32, group_offset >> 3); const group_stored = GroupStored{ .gid = group.gid, .name = group.name, .members_offset = members_offset[i], }; try PackedGroup.packTo(&blob, group_stored); try pad.arrayList(&blob, PackedGroup.alignment_bits); } return GroupsSection{ .len = @intCast(u32, corpus.groups.len), .idx2offset = idx2offset, .blob = blob.toOwnedSlice(), }; } // creates a bdz index using packed_mphf. // hash = bdz_search(packed_mphf, keys[i]); // result[hash] = idx2offset[i]; fn bdzIdx( comptime T: type, allocator: Allocator, packed_mphf: []const u8, keys: []const T, idx2offset: []const u32, ) error{OutOfMemory}![]const u32 { const search_fn = switch (T) { u32 => bdz.search_u32, []const u8 => bdz.search, else => unreachable, }; assert(keys.len <= math.maxInt(u32)); var result = try allocator.alloc(u32, keys.len); errdefer allocator.free(result); for (keys) |key, i| result[search_fn(packed_mphf, key)] = idx2offset[i]; return result; } // nblocks_n returns how many blocks a given number of bytes will take fn nblocks_n(comptime T: type, nbytes: usize) T { const B = switch (T) { u8 => u14, u16 => u22, u32 => u38, u64 => u70, else => @compileError("got " ++ @typeName(T) ++ ", only u8, u32 and u64 are supported"), }; const upper = pad.roundUp(B, section_length_bits, @intCast(B, nbytes)); assert(upper & (section_length - 1) == 0); return @truncate(T, upper >> section_length_bits); } // nblocks returns how many blocks a particular slice will take. fn nblocks(comptime T: type, arr: []const u8) T { return nblocks_n(T, arr.len); } fn assertDefinedLayout(comptime T: type) void { return switch (T) { u8, u16, u32, u64 => {}, else => switch (@typeInfo(T)) { .Array => assertDefinedLayout(meta.Elem(T)), .Pointer => |info| assertDefinedLayout(info.child), .Enum => assertDefinedLayout(meta.Tag(T)), .Struct => { if (meta.containerLayout(T) == .Auto) @compileError("layout of " ++ @typeName(T) ++ " is undefined"); for (meta.fields(T)) |field| assertDefinedLayout(field.field_type); }, else => @compileError("unexpected type " ++ @typeName(T)), }, }; } const testing = std.testing; test "read/write via iovec" { const allocator = testing.allocator; var corpus = try Corpus.testCorpus(allocator); defer corpus.deinit(); var db = try DB.fromCorpus(allocator, &corpus); defer db.deinit(allocator); // TODO: replace with an integration test when high-level // reader API is present //const blob = sections.groupmembers.blob; //var i: usize = 0; //while (i < corpus.groups.len) : (i += 1) { //const offset = sections.groupmembers.idx2offset[i]; //var vit = try compress.VarintSliceIterator(blob[offset..]); //var it = compress.DeltaDecompressionIterator(&vit); //for (corpus.group2users[i]) |user_idx| { // const got_user_offset = (try it.next()).?; // const want_user_offset = sections.users.idx2offset[user_idx]; // try testing.expectEqual(got_user_offset, want_user_offset); //} //try testing.expectEqual(it.next(), null); //} //var it = PackedUser.iterator(sections.users.blob, sections.shell_reader); //i = 0; //while (i < corpus.users.len) : (i += 1) { // const got = (try it.next()).?; // const user = corpus.users.get(i); // try testing.expectEqual(user.uid, got.uid()); // try testing.expectEqual(user.gid, got.gid()); // try testing.expectEqualStrings(user.name, got.name()); // try testing.expectEqualStrings(user.gecos, got.gecos()); // try testing.expectEqualStrings(user.home, got.home()); // try testing.expectEqualStrings(user.shell, got.shell(sections.shell_reader)); //} const fd = try os.memfd_create("test_turbonss_db", 0); defer os.close(fd); const len = try os.writev(fd, db.iov().constSlice()); const buf = try os.mmap(null, len, os.PROT.READ, os.MAP.SHARED, fd, 0); const db2 = try fromBytes(buf); try testing.expectEqual(corpus.groups.len, db.header.num_groups); try testing.expectEqual(corpus.users.len, db.header.num_users); try testing.expectEqual(db.header.num_groups, db2.header.num_groups); try testing.expectEqual(db.header.num_users, db2.header.num_users); const num_groups = db2.header.num_groups; const num_users = db2.header.num_users; try testing.expectEqualSlices(u32, db.idx_gid2group, db2.idx_gid2group[0..num_groups]); try testing.expectEqualSlices(u32, db.idx_uid2user, db2.idx_uid2user[0..num_users]); } test "high-level API" { const allocator = testing.allocator; var corpus = try Corpus.testCorpus(allocator); defer corpus.deinit(); var db = try DB.fromCorpus(allocator, &corpus); defer db.deinit(allocator); var arena = ArenaAllocator.init(allocator); defer arena.deinit(); const all = try db.getgrnam(arena.allocator(), "all"); try testing.expect(all != null); try testing.expectEqual(all.?.gid, 9999); try testing.expectEqualStrings(all.?.name, "all"); } test "additionalGids" { const allocator = testing.allocator; var corpus = try Corpus.testCorpus(allocator); defer corpus.deinit(); var additional_gids = try additionalGids(allocator, &corpus); defer allocator.free(additional_gids.idx2offset); defer allocator.free(additional_gids.blob); var user_idx: usize = 0; while (user_idx < corpus.users.len) : (user_idx += 1) { const groups = corpus.user2groups[user_idx]; const offset = additional_gids.idx2offset[user_idx]; if (groups.len == 0) { try testing.expect(offset == 0); continue; } var vit = try compress.VarintSliceIterator(additional_gids.blob[offset..]); var it = compress.DeltaDecompressionIterator(&vit); try testing.expectEqual(it.remaining(), groups.len); var i: u64 = 0; const corpusGids = corpus.groups.items(.gid); while (try it.next()) |gid| : (i += 1) { try testing.expectEqual(gid, corpusGids[groups[i]]); } try testing.expectEqual(i, groups.len); } } test "pack gids" { const allocator = testing.allocator; var corpus = try Corpus.testCorpus(allocator); defer corpus.deinit(); const cmph_gid = try cmph.packU32(allocator, corpus.groups.items(.gid)); defer allocator.free(cmph_gid); const k1 = bdz.search_u32(cmph_gid, 0); const k2 = bdz.search_u32(cmph_gid, 128); const k3 = bdz.search_u32(cmph_gid, 9999); const k4 = bdz.search_u32(cmph_gid, 100000); var hashes = &[_]u32{ k1, k2, k3, k4 }; sort.sort(u32, hashes, {}, comptime sort.asc(u32)); for (hashes) |hash, i| try testing.expectEqual(i, hash); } const hash_offsets = &[_]u32{ 0, 10, 20, 30 }; fn expectUsedHashes(allocator: Allocator, arr: []const u32) !void { var used = AutoHashMap(u32, void).init(allocator); defer used.deinit(); for (arr) |elem| try used.putNoClobber(elem, {}); for (hash_offsets) |item| try testing.expect(used.get(item) != null); } test "bdzIdx on u32" { const keys = [_]u32{ 42, 1, 2, 3 }; const mphf = try cmph.packU32(testing.allocator, keys[0..]); defer testing.allocator.free(mphf); var result = try bdzIdx(u32, testing.allocator, mphf, keys[0..], hash_offsets); defer testing.allocator.free(result); try expectUsedHashes(testing.allocator, result); } test "bdzIdx on str" { const keys = [_][]const u8{ "42", "1", "2", "3" }; const mphf = try cmph.packStr(testing.allocator, keys[0..]); defer testing.allocator.free(mphf); var result = try bdzIdx([]const u8, testing.allocator, mphf, keys[0..], hash_offsets); defer testing.allocator.free(result); try expectUsedHashes(testing.allocator, result); } test "nblocks" { const tests = .{ .{ 0, &[_]u8{} }, .{ 1, &[_]u8{ 1, 2, 42 } }, .{ 1, &[_]u8{1} ** 63 }, .{ 1, &[_]u8{1} ** 64 }, .{ 2, &[_]u8{1} ** 65 }, .{ 255, &[_]u8{1} ** (255 * 64) }, }; inline for (tests) |tt| { try testing.expectEqual(nblocks(u8, tt[1]), tt[0]); try testing.expectEqual(nblocks(u32, tt[1]), tt[0]); try testing.expectEqual(nblocks(u64, tt[1]), tt[0]); } }