remove corpus.users and corpus.groups

These use cases are now fully replaced with MultiArrayList
This commit is contained in:
Motiejus Jakštys 2022-03-15 10:07:05 +02:00 committed by Motiejus Jakštys
parent 0a0559824a
commit 249cdb1a31
2 changed files with 74 additions and 55 deletions

View File

@ -377,7 +377,7 @@ Section creation order:
1. ✅ Users. Requires `userGids` and shell. 1. ✅ Users. Requires `userGids` and shell.
1. ✅ Groupmembers. Requires Users. 1. ✅ Groupmembers. Requires Users.
1. ✅ Groups. Requires Groupmembers. 1. ✅ Groups. Requires Groupmembers.
1. `idx_*`. Requires offsets to Groups and Users. 1. `idx_*`. Requires offsets to Groups and Users.
1. Header. 1. Header.
[git-subtrac]: https://apenwarr.ca/log/20191109 [git-subtrac]: https://apenwarr.ca/log/20191109

View File

@ -28,13 +28,9 @@ const Corpus = struct {
arena: std.heap.ArenaAllocator, arena: std.heap.ArenaAllocator,
// sorted by name, by unicode codepoint // sorted by name, by unicode codepoint
users: []User, users: MultiArrayList(User),
// sorted by gid // sorted by gid
groups: []Group, groups: MultiArrayList(Group),
// columnar users and groups of the above
usersm: MultiArrayList(User),
groupsm: MultiArrayList(Group),
name2user: StringHashMap(u32), name2user: StringHashMap(u32),
name2group: StringHashMap(u32), name2group: StringHashMap(u32),
@ -50,35 +46,35 @@ const Corpus = struct {
var allocator = arena.allocator(); var allocator = arena.allocator();
errdefer arena.deinit(); errdefer arena.deinit();
var users = try allocator.alloc(User, usersConst.len); var users_arr = try allocator.alloc(User, usersConst.len);
var groups = try allocator.alloc(Group, groupsConst.len); var groups_arr = try allocator.alloc(Group, groupsConst.len);
for (usersConst) |*user, i| for (usersConst) |*user, i|
users[i] = try user.clone(allocator); users_arr[i] = try user.clone(allocator);
for (groupsConst) |*group, i| for (groupsConst) |*group, i|
groups[i] = try group.clone(allocator); groups_arr[i] = try group.clone(allocator);
sort.sort(User, users, {}, cmpUser); sort.sort(User, users_arr, {}, cmpUser);
sort.sort(Group, groups, {}, cmpGroup); sort.sort(Group, groups_arr, {}, cmpGroup);
var usersm = MultiArrayList(User){}; var users = MultiArrayList(User){};
try usersm.ensureTotalCapacity(allocator, users.len); try users.ensureTotalCapacity(allocator, users_arr.len);
for (users) |user| for (users_arr) |user|
usersm.appendAssumeCapacity(user); users.appendAssumeCapacity(user);
var groupsm = MultiArrayList(Group){}; var groups = MultiArrayList(Group){};
try groupsm.ensureTotalCapacity(allocator, groups.len); try groups.ensureTotalCapacity(allocator, groups_arr.len);
for (groups) |group| for (groups_arr) |group|
groupsm.appendAssumeCapacity(group); groups.appendAssumeCapacity(group);
var name2user = StringHashMap(u32).init(allocator); var name2user = StringHashMap(u32).init(allocator);
var name2group = StringHashMap(u32).init(allocator); var name2group = StringHashMap(u32).init(allocator);
for (usersm.items(.name)) |name, i| { for (users.items(.name)) |name, i| {
var res1 = try name2user.getOrPut(name); var res1 = try name2user.getOrPut(name);
if (res1.found_existing) if (res1.found_existing)
return error.Duplicate; return error.Duplicate;
res1.value_ptr.* = @intCast(u32, i); res1.value_ptr.* = @intCast(u32, i);
} }
for (groupsm.items(.name)) |name, i| { for (groups.items(.name)) |name, i| {
var res1 = try name2group.getOrPut(name); var res1 = try name2group.getOrPut(name);
if (res1.found_existing) if (res1.found_existing)
return error.Duplicate; return error.Duplicate;
@ -93,7 +89,7 @@ const Corpus = struct {
defer baseAllocator.free(user2groups); defer baseAllocator.free(user2groups);
mem.set(ArrayListUnmanaged(u32), user2groups, ArrayListUnmanaged(u32){}); mem.set(ArrayListUnmanaged(u32), user2groups, ArrayListUnmanaged(u32){});
for (groupsm.items(.members)) |group_members, i| { for (groups.items(.members)) |group_members, i| {
var members = try allocator.alloc(u32, group_members.count()); var members = try allocator.alloc(u32, group_members.count());
members.len = 0; members.len = 0;
@ -123,8 +119,6 @@ const Corpus = struct {
.arena = arena, .arena = arena,
.users = users, .users = users,
.groups = groups, .groups = groups,
.usersm = usersm,
.groupsm = groupsm,
.name2user = name2user, .name2user = name2user,
.name2group = name2group, .name2group = name2group,
.group2users = group2users, .group2users = group2users,
@ -143,7 +137,7 @@ pub fn shellSections(
corpus: *const Corpus, corpus: *const Corpus,
) error{ OutOfMemory, Overflow }!ShellSections { ) error{ OutOfMemory, Overflow }!ShellSections {
var popcon = shellImport.ShellWriter.init(allocator); var popcon = shellImport.ShellWriter.init(allocator);
for (corpus.usersm.items(.shell)) |shell| for (corpus.users.items(.shell)) |shell|
try popcon.put(shell); try popcon.put(shell);
return popcon.toOwnedSections(shellImport.max_shells); return popcon.toOwnedSections(shellImport.max_shells);
} }
@ -187,8 +181,9 @@ pub fn userGids(
idx2offset[user_idx] = blob.items.len; idx2offset[user_idx] = blob.items.len;
scratch = try allocator.realloc(scratch, usergroups.len); scratch = try allocator.realloc(scratch, usergroups.len);
scratch.len = usergroups.len; scratch.len = usergroups.len;
const corpusGids = corpus.groups.items(.gid);
for (usergroups) |group_idx, i| for (usergroups) |group_idx, i|
scratch[i] = corpus.groups[group_idx].gid; scratch[i] = corpusGids[group_idx];
compress.deltaCompress(u32, scratch) catch |err| switch (err) { compress.deltaCompress(u32, scratch) catch |err| switch (err) {
error.NotSorted => unreachable, error.NotSorted => unreachable,
}; };
@ -227,7 +222,10 @@ pub fn usersSection(
// 8 bytes. 24 is an optimistic lower bound for an average record size. // 8 bytes. 24 is an optimistic lower bound for an average record size.
var blob = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len); var blob = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len);
errdefer blob.deinit(); errdefer blob.deinit();
for (corpus.users) |user, i| { var i: usize = 0;
while (i < corpus.users.len) : (i += 1) {
// TODO: this is inefficient; it's calling `.slice()` on every iteration
const user = corpus.users.get(i);
const user_offset = try math.cast(u32, blob.items.len); const user_offset = try math.cast(u32, blob.items.len);
std.debug.assert(user_offset & 7 == 0); std.debug.assert(user_offset & 7 == 0);
idx2offset[i] = user_offset; idx2offset[i] = user_offset;
@ -321,7 +319,10 @@ pub fn groupsSection(
var blob = try ArrayList(u8).initCapacity(allocator, 8 * corpus.groups.len); var blob = try ArrayList(u8).initCapacity(allocator, 8 * corpus.groups.len);
errdefer blob.deinit(); errdefer blob.deinit();
for (corpus.groups) |group, i| { var i: usize = 0;
while (i < corpus.groups.len) : (i += 1) {
// TODO: this is inefficient; it's calling `.slice()` on every iteration
const group = corpus.groups.get(i);
const group_offset = try math.cast(u32, blob.items.len); const group_offset = try math.cast(u32, blob.items.len);
std.debug.assert(group_offset & 7 == 0); std.debug.assert(group_offset & 7 == 0);
idx2offset[i] = group_offset; idx2offset[i] = group_offset;
@ -407,10 +408,10 @@ pub const AllSections = struct {
allocator: Allocator, allocator: Allocator,
corpus: *const Corpus, corpus: *const Corpus,
) error{ Overflow, OutOfMemory, InvalidRecord }!AllSections { ) error{ Overflow, OutOfMemory, InvalidRecord }!AllSections {
const bdz_gid = try cmph.packU32(allocator, corpus.groupsm.items(.gid)); const bdz_gid = try cmph.packU32(allocator, corpus.groups.items(.gid));
const bdz_groupname = try cmph.packStr(allocator, corpus.groupsm.items(.name)); const bdz_groupname = try cmph.packStr(allocator, corpus.groups.items(.name));
const bdz_uid = try cmph.packU32(allocator, corpus.usersm.items(.uid)); const bdz_uid = try cmph.packU32(allocator, corpus.users.items(.uid));
const bdz_username = try cmph.packStr(allocator, corpus.usersm.items(.name)); const bdz_username = try cmph.packStr(allocator, corpus.users.items(.name));
const shell_sections = try shellSections(allocator, corpus); const shell_sections = try shellSections(allocator, corpus);
const user_gids = try userGids(allocator, corpus); const user_gids = try userGids(allocator, corpus);
const users = try usersSection( const users = try usersSection(
@ -438,13 +439,25 @@ pub const AllSections = struct {
u32, u32,
allocator, allocator,
bdz_gid, bdz_gid,
corpus.groupsm.items(.gid), corpus.groups.items(.gid),
); );
var idx_groupname2group = try bdzIdx( var idx_groupname2group = try bdzIdx(
[]const u8, []const u8,
allocator, allocator,
bdz_gid, bdz_groupname,
corpus.groupsm.items(.name), corpus.groups.items(.name),
);
var idx_uid2user = try bdzIdx(
u32,
allocator,
bdz_uid,
corpus.users.items(.uid),
);
var idx_name2user = try bdzIdx(
[]const u8,
allocator,
bdz_username,
corpus.users.items(.name),
); );
return AllSections{ return AllSections{
@ -461,8 +474,8 @@ pub const AllSections = struct {
.groups = groups, .groups = groups,
.idx_gid2group = idx_gid2group, .idx_gid2group = idx_gid2group,
.idx_groupname2group = idx_groupname2group, .idx_groupname2group = idx_groupname2group,
.idx_uid2user = undefined, .idx_uid2user = idx_uid2user,
.idx_name2user = undefined, .idx_name2user = idx_name2user,
}; };
} }
@ -478,6 +491,8 @@ pub const AllSections = struct {
self.groups.deinit(self.allocator); self.groups.deinit(self.allocator);
self.allocator.free(self.idx_gid2group); self.allocator.free(self.idx_gid2group);
self.allocator.free(self.idx_groupname2group); self.allocator.free(self.idx_groupname2group);
self.allocator.free(self.idx_uid2user);
self.allocator.free(self.idx_name2user);
self.* = undefined; self.* = undefined;
} }
}; };
@ -559,18 +574,20 @@ test "test corpus" {
const svc_bar = 2; const svc_bar = 2;
const vidmantas = 3; const vidmantas = 3;
try testing.expectEqualStrings(corpus.users[name_name].name, "Name" ** 8); const usernames = corpus.users.items(.name);
try testing.expectEqualStrings(corpus.users[nobody].name, "nobody"); try testing.expectEqualStrings(usernames[name_name], "Name" ** 8);
try testing.expectEqualStrings(corpus.users[svc_bar].name, "svc-bar"); try testing.expectEqualStrings(usernames[nobody], "nobody");
try testing.expectEqualStrings(corpus.users[vidmantas].name, "vidmantas"); try testing.expectEqualStrings(usernames[svc_bar], "svc-bar");
try testing.expectEqualStrings(usernames[vidmantas], "vidmantas");
const g_service_account = 0; const g_service_account = 0;
const g_vidmantas = 1; const g_vidmantas = 1;
const g_all = 2; const g_all = 2;
try testing.expectEqualStrings(corpus.groups[g_service_account].name, "service-account"); const groupnames = corpus.groups.items(.name);
try testing.expectEqualStrings(corpus.groups[g_vidmantas].name, "vidmantas"); try testing.expectEqualStrings(groupnames[g_service_account], "service-account");
try testing.expectEqualStrings(corpus.groups[g_all].name, "all"); try testing.expectEqualStrings(groupnames[g_vidmantas], "vidmantas");
try testing.expectEqualStrings(groupnames[g_all], "all");
try testing.expectEqual(corpus.name2user.get("404"), null); try testing.expectEqual(corpus.name2user.get("404"), null);
try testing.expectEqual(corpus.name2user.get("vidmantas").?, vidmantas); try testing.expectEqual(corpus.name2user.get("vidmantas").?, vidmantas);
@ -616,13 +633,14 @@ test "test groups, group members and users" {
); );
i = 0; i = 0;
while (i < corpus.users.len) : (i += 1) { while (i < corpus.users.len) : (i += 1) {
const user = (try it.next()).?; const got = (try it.next()).?;
try testing.expectEqual(corpus.users[i].uid, user.uid()); const user = corpus.users.get(i);
try testing.expectEqual(corpus.users[i].gid, user.gid()); try testing.expectEqual(user.uid, got.uid());
try testing.expectEqualStrings(corpus.users[i].name, user.name()); try testing.expectEqual(user.gid, got.gid());
try testing.expectEqualStrings(corpus.users[i].gecos, user.gecos()); try testing.expectEqualStrings(user.name, got.name());
try testing.expectEqualStrings(corpus.users[i].home, user.home()); try testing.expectEqualStrings(user.gecos, got.gecos());
try testing.expectEqualStrings(corpus.users[i].shell, user.shell(sections.shell_reader)); try testing.expectEqualStrings(user.home, got.home());
try testing.expectEqualStrings(user.shell, got.shell(sections.shell_reader));
} }
} }
@ -646,8 +664,9 @@ test "userGids" {
var it = compress.DeltaDecompressionIterator(&vit); var it = compress.DeltaDecompressionIterator(&vit);
try testing.expectEqual(it.remaining(), groups.len); try testing.expectEqual(it.remaining(), groups.len);
var i: u64 = 0; var i: u64 = 0;
const corpusGids = corpus.groups.items(.gid);
while (try it.next()) |gid| : (i += 1) { while (try it.next()) |gid| : (i += 1) {
try testing.expectEqual(gid, corpus.groups[groups[i]].gid); try testing.expectEqual(gid, corpusGids[groups[i]]);
} }
try testing.expectEqual(i, groups.len); try testing.expectEqual(i, groups.len);
} }
@ -658,7 +677,7 @@ test "pack gids" {
var corpus = try testCorpus(allocator); var corpus = try testCorpus(allocator);
defer corpus.deinit(); defer corpus.deinit();
const cmph_gid = try cmph.packU32(allocator, corpus.groupsm.items(.gid)); const cmph_gid = try cmph.packU32(allocator, corpus.groups.items(.gid));
defer allocator.free(cmph_gid); defer allocator.free(cmph_gid);
const k1 = bdz.search_u32(cmph_gid, 0); const k1 = bdz.search_u32(cmph_gid, 0);