const std = @import("std"); const fmt = std.fmt; const mem = std.mem; const math = std.math; const sort = std.sort; const unicode = std.unicode; const Allocator = std.mem.Allocator; const ArrayListUnmanaged = std.ArrayListUnmanaged; const ArrayList = std.ArrayList; const MultiArrayList = std.MultiArrayList; const StringHashMap = std.StringHashMap; const AutoHashMap = std.AutoHashMap; const BufSet = std.BufSet; const pad = @import("padding.zig"); const compress = @import("compress.zig"); const shellImport = @import("shell.zig"); const userImport = @import("user.zig"); const groupImport = @import("group.zig"); const cmph = @import("cmph.zig"); const bdz = @import("bdz.zig"); const User = userImport.User; const Group = groupImport.Group; const ShellSections = shellImport.ShellWriter.ShellSections; const ShellReader = shellImport.ShellReader; const Corpus = struct { arena: std.heap.ArenaAllocator, // sorted by name, by unicode codepoint users: MultiArrayList(User), // sorted by gid groups: MultiArrayList(Group), name2user: StringHashMap(u32), name2group: StringHashMap(u32), group2users: []const []const u32, user2groups: []const []const u32, pub fn init( baseAllocator: Allocator, usersConst: []const User, groupsConst: []const Group, ) error{ OutOfMemory, InvalidUtf8, Duplicate, NotFound }!Corpus { var arena = std.heap.ArenaAllocator.init(baseAllocator); var allocator = arena.allocator(); errdefer arena.deinit(); var users_arr = try allocator.alloc(User, usersConst.len); var groups_arr = try allocator.alloc(Group, groupsConst.len); for (usersConst) |*user, i| users_arr[i] = try user.clone(allocator); for (groupsConst) |*group, i| groups_arr[i] = try group.clone(allocator); sort.sort(User, users_arr, {}, cmpUser); sort.sort(Group, groups_arr, {}, cmpGroup); var users = MultiArrayList(User){}; try users.ensureTotalCapacity(allocator, users_arr.len); for (users_arr) |user| users.appendAssumeCapacity(user); var groups = MultiArrayList(Group){}; try groups.ensureTotalCapacity(allocator, groups_arr.len); for (groups_arr) |group| groups.appendAssumeCapacity(group); var name2user = StringHashMap(u32).init(allocator); var name2group = StringHashMap(u32).init(allocator); for (users.items(.name)) |name, i| { var res1 = try name2user.getOrPut(name); if (res1.found_existing) return error.Duplicate; res1.value_ptr.* = @intCast(u32, i); } for (groups.items(.name)) |name, i| { var res1 = try name2group.getOrPut(name); if (res1.found_existing) return error.Duplicate; res1.value_ptr.* = @intCast(u32, i); } var group2users = try allocator.alloc([]u32, groups.len); // uses baseAllocator, because it will be freed before // returning from this function. This keeps the arena clean. var user2groups = try baseAllocator.alloc(ArrayListUnmanaged(u32), users.len); defer baseAllocator.free(user2groups); mem.set(ArrayListUnmanaged(u32), user2groups, ArrayListUnmanaged(u32){}); for (groups.items(.members)) |group_members, i| { var members = try allocator.alloc(u32, group_members.count()); members.len = 0; var it = group_members.iterator(); while (it.next()) |memberName| { if (name2user.get(memberName.*)) |user_idx| { members.len += 1; members[members.len - 1] = user_idx; try user2groups[user_idx].append(allocator, @intCast(u32, i)); } else return error.NotFound; } group2users[i] = members; } for (group2users) |*groupusers| sort.sort(u32, groupusers.*, {}, comptime sort.asc(u32)); var user2groups_final = try allocator.alloc([]const u32, users.len); user2groups_final.len = users.len; for (user2groups) |*usergroups, i| { sort.sort(u32, usergroups.items, {}, comptime sort.asc(u32)); user2groups_final[i] = usergroups.toOwnedSlice(allocator); } return Corpus{ .arena = arena, .users = users, .groups = groups, .name2user = name2user, .name2group = name2group, .group2users = group2users, .user2groups = user2groups_final, }; } pub fn deinit(self: *Corpus) void { self.arena.deinit(); self.* = undefined; } }; pub fn shellSections( allocator: Allocator, corpus: *const Corpus, ) error{ OutOfMemory, Overflow }!ShellSections { var popcon = shellImport.ShellWriter.init(allocator); for (corpus.users.items(.shell)) |shell| try popcon.put(shell); return popcon.toOwnedSections(shellImport.max_shells); } pub const UserGids = struct { // user index -> offset in blob idx2offset: []const u64, // compressed user gids blob. A blob contains N <= users.len items, // an item is: // len: varint // gid: [varint]varint, // ... and the gid list is delta-compressed. blob: []const u8, pub fn deinit(self: *UserGids, allocator: Allocator) void { allocator.free(self.idx2offset); allocator.free(self.blob); self.* = undefined; } }; pub fn userGids( allocator: Allocator, corpus: *const Corpus, ) error{ OutOfMemory, Overflow }!UserGids { var blob = ArrayList(u8).init(allocator); errdefer blob.deinit(); var idx2offset = try allocator.alloc(u64, corpus.users.len); errdefer allocator.free(idx2offset); // zero'th entry is empty, so groupless users can refer to it. try compress.appendUvarint(&blob, 0); var scratch = try allocator.alloc(u32, 256); defer allocator.free(scratch); for (corpus.user2groups) |usergroups, user_idx| { if (usergroups.len == 0) { idx2offset[user_idx] = 0; continue; } idx2offset[user_idx] = blob.items.len; scratch = try allocator.realloc(scratch, usergroups.len); scratch.len = usergroups.len; const corpusGids = corpus.groups.items(.gid); for (usergroups) |group_idx, i| scratch[i] = corpusGids[group_idx]; compress.deltaCompress(u32, scratch) catch |err| switch (err) { error.NotSorted => unreachable, }; try compress.appendUvarint(&blob, usergroups.len); for (scratch) |gid| try compress.appendUvarint(&blob, gid); } return UserGids{ .idx2offset = idx2offset, .blob = blob.toOwnedSlice(), }; } pub const UsersSection = struct { // user index -> offset in blob idx2offset: []const u32, blob: []const u8, pub fn deinit(self: *UsersSection, allocator: Allocator) void { allocator.free(self.idx2offset); allocator.free(self.blob); self.* = undefined; } }; pub fn usersSection( allocator: Allocator, corpus: *const Corpus, gids: *const UserGids, shells: *const ShellSections, ) error{ OutOfMemory, Overflow, InvalidRecord }!UsersSection { var idx2offset = try allocator.alloc(u32, corpus.users.len); errdefer allocator.free(idx2offset); // as of writing each user takes 12 bytes + blobs + padding, padded to // 8 bytes. 24 is an optimistic lower bound for an average record size. var blob = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len); errdefer blob.deinit(); var i: usize = 0; while (i < corpus.users.len) : (i += 1) { // TODO: this is inefficient; it's calling `.slice()` on every iteration const user = corpus.users.get(i); const user_offset = try math.cast(u32, blob.items.len); std.debug.assert(user_offset & 7 == 0); idx2offset[i] = user_offset; try userImport.PackedUser.packTo( &blob, user, gids.idx2offset[i], shells.indices, ); try pad.arrayList(&blob, userImport.PackedUser.alignment_bits); } return UsersSection{ .idx2offset = idx2offset, .blob = blob.toOwnedSlice(), }; } pub const GroupMembers = struct { // group index to it's offset in blob idx2offset: []const u64, // members are delta-varint encoded byte-offsets to the user struct blob: []const u8, pub fn deinit(self: *GroupMembers, allocator: Allocator) void { allocator.free(self.idx2offset); allocator.free(self.blob); self.* = undefined; } }; pub fn groupMembers( allocator: Allocator, corpus: *const Corpus, user2offset: []const u32, ) error{OutOfMemory}!GroupMembers { var idx2offset = try allocator.alloc(u64, corpus.groups.len); errdefer allocator.free(idx2offset); var blob = ArrayList(u8).init(allocator); errdefer blob.deinit(); // zero'th entry is empty, so empty groups can refer to it try compress.appendUvarint(&blob, 0); var scratch = try ArrayList(u32).initCapacity(allocator, 1024); defer scratch.deinit(); for (corpus.group2users) |members, group_idx| { if (members.len == 0) { idx2offset[group_idx] = 0; continue; } idx2offset[group_idx] = blob.items.len; try scratch.ensureTotalCapacity(members.len); scratch.items.len = members.len; for (members) |user_idx, i| scratch.items[i] = user2offset[user_idx]; compress.deltaCompress(u32, scratch.items) catch |err| switch (err) { error.NotSorted => unreachable, }; try compress.appendUvarint(&blob, members.len); for (scratch.items) |elem| try compress.appendUvarint(&blob, elem); } return GroupMembers{ .idx2offset = idx2offset, .blob = blob.toOwnedSlice(), }; } pub const GroupsSection = struct { // group index -> offset in blob idx2offset: []const u32, blob: []const u8, pub fn deinit(self: *GroupsSection, allocator: Allocator) void { allocator.free(self.idx2offset); allocator.free(self.blob); self.* = undefined; } }; pub fn groupsSection( allocator: Allocator, corpus: *const Corpus, members_offset: []const u64, ) error{ OutOfMemory, Overflow, InvalidRecord }!GroupsSection { var idx2offset = try allocator.alloc(u32, corpus.groups.len); errdefer allocator.free(idx2offset); var blob = try ArrayList(u8).initCapacity(allocator, 8 * corpus.groups.len); errdefer blob.deinit(); var i: usize = 0; while (i < corpus.groups.len) : (i += 1) { // TODO: this is inefficient; it's calling `.slice()` on every iteration const group = corpus.groups.get(i); const group_offset = try math.cast(u32, blob.items.len); std.debug.assert(group_offset & 7 == 0); idx2offset[i] = group_offset; const group_stored = groupImport.GroupStored{ .gid = group.gid, .name = group.name, .members_offset = members_offset[i], }; try groupImport.PackedGroup.packTo(&blob, group_stored); try pad.arrayList(&blob, groupImport.PackedGroup.alignment_bits); } return GroupsSection{ .idx2offset = idx2offset, .blob = blob.toOwnedSlice(), }; } // creates a bdz index using packed_mphf. buf[bdz_search(key)] = index(keys, key) pub fn bdzIdx( comptime T: type, allocator: Allocator, packed_mphf: []const u8, keys: []const T, ) error{OutOfMemory}![]const u32 { const search_fn = comptime blk: { switch (T) { u32 => break :blk bdz.search_u32, []const u8 => break :blk bdz.search, else => unreachable, } }; std.debug.assert(keys.len <= math.maxInt(u32)); var result = try allocator.alloc(u32, keys.len); for (keys) |key, i| { const hash = search_fn(packed_mphf, key); result[hash] = @intCast(u32, i); } return result; } // cmpUser compares two users for sorting. By username's utf8 codepoints, ascending. fn cmpUser(_: void, a: User, b: User) bool { var utf8_a = (unicode.Utf8View.init(a.name) catch unreachable).iterator(); var utf8_b = (unicode.Utf8View.init(b.name) catch unreachable).iterator(); while (utf8_a.nextCodepoint()) |codepoint_a| { if (utf8_b.nextCodepoint()) |codepoint_b| { if (codepoint_a == codepoint_b) { continue; } else return codepoint_a < codepoint_b; } // a is a prefix of b. It is thus shorter. return false; } // b is a prefix of a return true; } fn cmpGroup(_: void, a: Group, b: Group) bool { return a.gid < b.gid; } pub const AllSections = struct { allocator: Allocator, bdz_gid: []const u8, bdz_groupname: []const u8, bdz_uid: []const u8, bdz_username: []const u8, users: UsersSection, shell_sections: ShellSections, shell_reader: ShellReader, user_gids: UserGids, group_members: GroupMembers, groups: GroupsSection, idx_gid2group: []const u32, idx_groupname2group: []const u32, idx_uid2user: []const u32, idx_name2user: []const u32, pub fn init( allocator: Allocator, corpus: *const Corpus, ) error{ Overflow, OutOfMemory, InvalidRecord }!AllSections { const bdz_gid = try cmph.packU32(allocator, corpus.groups.items(.gid)); const bdz_groupname = try cmph.packStr(allocator, corpus.groups.items(.name)); const bdz_uid = try cmph.packU32(allocator, corpus.users.items(.uid)); const bdz_username = try cmph.packStr(allocator, corpus.users.items(.name)); const shell_sections = try shellSections(allocator, corpus); const user_gids = try userGids(allocator, corpus); const users = try usersSection( allocator, corpus, &user_gids, &shell_sections, ); const group_members = try groupMembers( allocator, corpus, users.idx2offset, ); const groups = try groupsSection( allocator, corpus, group_members.idx2offset, ); const shell_reader = shellImport.ShellReader.init( mem.sliceAsBytes(shell_sections.index.constSlice()), mem.sliceAsBytes(shell_sections.blob.constSlice()), ); var idx_gid2group = try bdzIdx( u32, allocator, bdz_gid, corpus.groups.items(.gid), ); var idx_groupname2group = try bdzIdx( []const u8, allocator, bdz_groupname, corpus.groups.items(.name), ); var idx_uid2user = try bdzIdx( u32, allocator, bdz_uid, corpus.users.items(.uid), ); var idx_name2user = try bdzIdx( []const u8, allocator, bdz_username, corpus.users.items(.name), ); return AllSections{ .allocator = allocator, .bdz_gid = bdz_gid, .bdz_groupname = bdz_groupname, .bdz_uid = bdz_uid, .bdz_username = bdz_username, .shell_sections = shell_sections, .shell_reader = shell_reader, .user_gids = user_gids, .users = users, .group_members = group_members, .groups = groups, .idx_gid2group = idx_gid2group, .idx_groupname2group = idx_groupname2group, .idx_uid2user = idx_uid2user, .idx_name2user = idx_name2user, }; } pub fn deinit(self: *AllSections) void { self.allocator.free(self.bdz_gid); self.allocator.free(self.bdz_groupname); self.allocator.free(self.bdz_uid); self.allocator.free(self.bdz_username); self.shell_sections.deinit(); self.user_gids.deinit(self.allocator); self.users.deinit(self.allocator); self.group_members.deinit(self.allocator); self.groups.deinit(self.allocator); self.allocator.free(self.idx_gid2group); self.allocator.free(self.idx_groupname2group); self.allocator.free(self.idx_uid2user); self.allocator.free(self.idx_name2user); self.* = undefined; } }; const testing = std.testing; fn testCorpus(allocator: Allocator) !Corpus { const users = [_]User{ User{ .uid = 128, .gid = 128, .name = "vidmantas", .gecos = "Vidmantas Kaminskas", .home = "/home/vidmantas", .shell = "/bin/bash", }, User{ .uid = 0, .gid = math.maxInt(u32), .name = "Name" ** 8, .gecos = "Gecos" ** 51, .home = "Home" ** 16, .shell = "She.LllL" ** 8, }, User{ .uid = 1002, .gid = 1002, .name = "svc-bar", .gecos = "", .home = "/", .shell = "/", }, User{ .uid = 65534, .gid = 65534, .name = "nobody", .gecos = "nobody", .home = "/nonexistent", .shell = "/usr/sbin/nologin", } }; var members1 = try groupImport.someMembers( allocator, &[_][]const u8{"vidmantas"}, ); defer members1.deinit(); var members2 = try groupImport.someMembers( allocator, &[_][]const u8{ "svc-bar", "vidmantas" }, ); defer members2.deinit(); var members3 = try groupImport.someMembers( allocator, &[_][]const u8{ "svc-bar", "Name" ** 8, "vidmantas" }, ); defer members3.deinit(); const groups = [_]Group{ Group{ .gid = 128, .name = "vidmantas", .members = members1, }, Group{ .gid = 9999, .name = "all", .members = members3, }, Group{ .gid = 0, .name = "service-account", .members = members2, } }; return try Corpus.init(allocator, users[0..], groups[0..]); } test "test corpus" { var corpus = try testCorpus(testing.allocator); defer corpus.deinit(); const name_name = 0; const nobody = 1; const svc_bar = 2; const vidmantas = 3; const usernames = corpus.users.items(.name); try testing.expectEqualStrings(usernames[name_name], "Name" ** 8); try testing.expectEqualStrings(usernames[nobody], "nobody"); try testing.expectEqualStrings(usernames[svc_bar], "svc-bar"); try testing.expectEqualStrings(usernames[vidmantas], "vidmantas"); const g_service_account = 0; const g_vidmantas = 1; const g_all = 2; const groupnames = corpus.groups.items(.name); try testing.expectEqualStrings(groupnames[g_service_account], "service-account"); try testing.expectEqualStrings(groupnames[g_vidmantas], "vidmantas"); try testing.expectEqualStrings(groupnames[g_all], "all"); try testing.expectEqual(corpus.name2user.get("404"), null); try testing.expectEqual(corpus.name2user.get("vidmantas").?, vidmantas); try testing.expectEqual(corpus.name2group.get("404"), null); try testing.expectEqual(corpus.name2group.get("vidmantas").?, g_vidmantas); const membersOfAll = corpus.group2users[g_all]; try testing.expectEqual(membersOfAll[0], name_name); try testing.expectEqual(membersOfAll[1], svc_bar); try testing.expectEqual(membersOfAll[2], vidmantas); const groupsOfVidmantas = corpus.user2groups[vidmantas]; try testing.expectEqual(groupsOfVidmantas[0], g_service_account); try testing.expectEqual(groupsOfVidmantas[1], g_vidmantas); try testing.expectEqual(groupsOfVidmantas[2], g_all); } test "test groups, group members and users" { const allocator = testing.allocator; var corpus = try testCorpus(allocator); defer corpus.deinit(); var sections = try AllSections.init(allocator, &corpus); defer sections.deinit(); const blob = sections.group_members.blob; var i: usize = 0; while (i < corpus.groups.len) : (i += 1) { const offset = sections.group_members.idx2offset[i]; var vit = try compress.VarintSliceIterator(blob[offset..]); var it = compress.DeltaDecompressionIterator(&vit); for (corpus.group2users[i]) |user_idx| { const got_user_offset = (try it.next()).?; const want_user_offset = sections.users.idx2offset[user_idx]; try testing.expectEqual(got_user_offset, want_user_offset); } try testing.expectEqual(it.next(), null); } var it = userImport.PackedUser.iterator( sections.users.blob, sections.shell_reader, ); i = 0; while (i < corpus.users.len) : (i += 1) { const got = (try it.next()).?; const user = corpus.users.get(i); try testing.expectEqual(user.uid, got.uid()); try testing.expectEqual(user.gid, got.gid()); try testing.expectEqualStrings(user.name, got.name()); try testing.expectEqualStrings(user.gecos, got.gecos()); try testing.expectEqualStrings(user.home, got.home()); try testing.expectEqualStrings(user.shell, got.shell(sections.shell_reader)); } } test "userGids" { const allocator = testing.allocator; var corpus = try testCorpus(allocator); defer corpus.deinit(); var user_gids = try userGids(allocator, &corpus); defer user_gids.deinit(allocator); var user_idx: usize = 0; while (user_idx < corpus.users.len) : (user_idx += 1) { const groups = corpus.user2groups[user_idx]; const offset = user_gids.idx2offset[user_idx]; if (groups.len == 0) { try testing.expect(offset == 0); continue; } var vit = try compress.VarintSliceIterator(user_gids.blob[offset..]); var it = compress.DeltaDecompressionIterator(&vit); try testing.expectEqual(it.remaining(), groups.len); var i: u64 = 0; const corpusGids = corpus.groups.items(.gid); while (try it.next()) |gid| : (i += 1) { try testing.expectEqual(gid, corpusGids[groups[i]]); } try testing.expectEqual(i, groups.len); } } test "pack gids" { const allocator = testing.allocator; var corpus = try testCorpus(allocator); defer corpus.deinit(); const cmph_gid = try cmph.packU32(allocator, corpus.groups.items(.gid)); defer allocator.free(cmph_gid); const k1 = bdz.search_u32(cmph_gid, 0); const k2 = bdz.search_u32(cmph_gid, 128); const k3 = bdz.search_u32(cmph_gid, 9999); var hashes = &[_]u32{ k1, k2, k3 }; sort.sort(u32, hashes, {}, comptime sort.asc(u32)); for (hashes) |hash, i| try testing.expectEqual(i, hash); } fn testUser(name: []const u8) User { var result = std.mem.zeroes(User); result.name = name; return result; } test "users compare function" { const a = testUser("a"); const b = testUser("b"); const bb = testUser("bb"); try testing.expect(cmpUser({}, a, b)); try testing.expect(!cmpUser({}, b, a)); try testing.expect(cmpUser({}, a, bb)); try testing.expect(!cmpUser({}, bb, a)); try testing.expect(cmpUser({}, b, bb)); try testing.expect(!cmpUser({}, bb, b)); } test "bdzIdx" { const allocator = testing.allocator; const k_u32 = [_]u32{ 42, 1, 2, 3 }; const k_str = [_][]const u8{ "42", "1", "2", "3" }; const mphf_str = try cmph.packStr(allocator, k_str[0..]); const mphf_u32 = try cmph.packU32(allocator, k_u32[0..]); defer allocator.free(mphf_str); defer allocator.free(mphf_u32); { var result = try bdzIdx(u32, allocator, mphf_u32, k_u32[0..]); defer allocator.free(result); var used = [_]bool{false} ** 4; for (result) |elem| used[result[elem]] = true; for (used) |item| try testing.expect(item); } { var result = try bdzIdx([]const u8, allocator, mphf_str, k_str[0..]); defer allocator.free(result); var used = [_]bool{false} ** 4; for (result) |elem| used[result[elem]] = true; for (used) |item| try testing.expect(item); } }