add a struct for all sections

This commit is contained in:
Motiejus Jakštys 2022-03-07 06:09:20 +02:00 committed by Motiejus Jakštys
parent 24d984d712
commit 9bf0a35689
2 changed files with 199 additions and 115 deletions

View File

@ -364,8 +364,8 @@ shellIndex len(shells)*2 shell index array
shellBlob <= 4032 shell data blob (max 63*64 bytes) shellBlob <= 4032 shell data blob (max 63*64 bytes)
groups ? packed Group entries (8b padding) groups ? packed Group entries (8b padding)
users ? packed User entries (8b padding) users ? packed User entries (8b padding)
groupMembers ? per-group memberlist (no padding) groupMembers ? per-group varint memberlist (no padding)
userGids ? per-user gidlist entries (8b padding) userGids ? per-user varint gidlist (8b padding)
``` ```
Section creation order: Section creation order:

View File

@ -1,5 +1,6 @@
const std = @import("std"); const std = @import("std");
const fmt = std.fmt; const fmt = std.fmt;
const mem = std.mem;
const math = std.math; const math = std.math;
const sort = std.sort; const sort = std.sort;
const unicode = std.unicode; const unicode = std.unicode;
@ -35,12 +36,12 @@ const Corpus = struct {
groupsMulti: MultiArrayList(Group), groupsMulti: MultiArrayList(Group),
// pointing to `users` and `groups` slices above. // pointing to `users` and `groups` slices above.
name2user: StringHashMap(*const User), name2user: StringHashMap(usize),
uid2user: AutoHashMap(u32, *const User), uid2user: AutoHashMap(u32, usize),
name2group: StringHashMap(*const Group), name2group: StringHashMap(usize),
gid2group: AutoHashMap(u32, *const Group), gid2group: AutoHashMap(u32, usize),
groupname2users: StringHashMap([]*const User), groupname2users: StringHashMap([]usize),
username2groups: StringHashMap([]*const Group), username2groups: StringHashMap([]usize),
pub fn init( pub fn init(
baseAllocator: Allocator, baseAllocator: Allocator,
@ -70,60 +71,60 @@ const Corpus = struct {
for (groups) |group| for (groups) |group|
groupsMulti.appendAssumeCapacity(group); groupsMulti.appendAssumeCapacity(group);
var name2user = StringHashMap(*const User).init(allocator); var name2user = StringHashMap(usize).init(allocator);
var uid2user = AutoHashMap(u32, *const User).init(allocator); var uid2user = AutoHashMap(u32, usize).init(allocator);
var name2group = StringHashMap(*const Group).init(allocator); var name2group = StringHashMap(usize).init(allocator);
var gid2group = AutoHashMap(u32, *const Group).init(allocator); var gid2group = AutoHashMap(u32, usize).init(allocator);
for (users) |*user| { for (users) |*user, i| {
var res1 = try name2user.getOrPut(user.name); var res1 = try name2user.getOrPut(user.name);
if (res1.found_existing) if (res1.found_existing)
return error.Duplicate; return error.Duplicate;
res1.value_ptr.* = user; res1.value_ptr.* = i;
var res2 = try uid2user.getOrPut(user.uid); var res2 = try uid2user.getOrPut(user.uid);
if (res2.found_existing) if (res2.found_existing)
return error.Duplicate; return error.Duplicate;
res2.value_ptr.* = user; res2.value_ptr.* = i;
} }
for (groups) |*group| { for (groups) |*group, i| {
var res1 = try name2group.getOrPut(group.name); var res1 = try name2group.getOrPut(group.name);
if (res1.found_existing) if (res1.found_existing)
return error.Duplicate; return error.Duplicate;
res1.value_ptr.* = group; res1.value_ptr.* = i;
var res2 = try gid2group.getOrPut(group.gid); var res2 = try gid2group.getOrPut(group.gid);
if (res2.found_existing) if (res2.found_existing)
return error.Duplicate; return error.Duplicate;
res2.value_ptr.* = group; res2.value_ptr.* = i;
} }
var groupname2users = StringHashMap([]*const User).init(allocator); var groupname2users = StringHashMap([]usize).init(allocator);
// uses baseAllocator, because it will be freed before // uses baseAllocator, because it will be freed before
// returning from this function. This keeps the arena clean. // returning from this function. This keeps the arena clean.
var username2groups = StringHashMap( var username2groups = StringHashMap(
ArrayListUnmanaged(*const Group), ArrayListUnmanaged(usize),
).init(baseAllocator); ).init(baseAllocator);
defer username2groups.deinit(); defer username2groups.deinit();
for (groups) |*group| { for (groups) |*group, i| {
var members = try allocator.alloc(*const User, group.members.count()); var members = try allocator.alloc(usize, group.members.count());
members.len = 0; members.len = 0;
var it = group.members.iterator(); var it = group.members.iterator();
while (it.next()) |memberName| { while (it.next()) |memberName| {
if (name2user.get(memberName.*)) |user| { if (name2user.get(memberName.*)) |idx| {
members.len += 1; members.len += 1;
members[members.len - 1] = user; members[members.len - 1] = idx;
} else { } else {
return error.NotFound; return error.NotFound;
} }
var groupsOfMember = try username2groups.getOrPut(memberName.*); var groupsOfMember = try username2groups.getOrPut(memberName.*);
if (!groupsOfMember.found_existing) if (!groupsOfMember.found_existing)
groupsOfMember.value_ptr.* = ArrayListUnmanaged(*const Group){}; groupsOfMember.value_ptr.* = ArrayListUnmanaged(usize){};
try groupsOfMember.value_ptr.*.append(allocator, group); try groupsOfMember.value_ptr.*.append(allocator, i);
} }
var result = try groupname2users.getOrPut(group.name); var result = try groupname2users.getOrPut(group.name);
@ -134,14 +135,14 @@ const Corpus = struct {
var it1 = groupname2users.valueIterator(); var it1 = groupname2users.valueIterator();
while (it1.next()) |groupUsers| { while (it1.next()) |groupUsers| {
sort.sort(*const User, groupUsers.*, {}, cmpUserPtr); sort.sort(usize, groupUsers.*, {}, comptime sort.asc(usize));
} }
var it2 = username2groups.valueIterator(); var it2 = username2groups.valueIterator();
while (it2.next()) |userGroups| while (it2.next()) |userGroups|
sort.sort(*const Group, userGroups.items, {}, cmpGroupPtr); sort.sort(usize, userGroups.items, {}, comptime sort.asc(usize));
var username2groups_final = StringHashMap([]*const Group).init(allocator); var username2groups_final = StringHashMap([]usize).init(allocator);
var it = username2groups.iterator(); var it = username2groups.iterator();
while (it.next()) |elem| { while (it.next()) |elem| {
const username = elem.key_ptr.*; const username = elem.key_ptr.*;
@ -203,17 +204,17 @@ pub fn shellSections(
} }
pub const UserGids = struct { pub const UserGids = struct {
// username -> offset in blob // user index -> offset in blob
name2offset: StringHashMap(u32), idx2offset: []const u32,
// compressed user gids blob. A blob contains N <= users.len items, // compressed user gids blob. A blob contains N <= users.len items,
// an item is: // an item is:
// len: varint // len: varint
// gid: [varint]varint, // gid: [varint]varint,
// ... and the gid list is delta-compressed. // ... and the gid list is delta-compressed.
blob: []u8, blob: []const u8,
pub fn deinit(self: *UserGids, allocator: Allocator) void { pub fn deinit(self: *UserGids, allocator: Allocator) void {
self.name2offset.deinit(); allocator.free(self.idx2offset);
allocator.free(self.blob); allocator.free(self.blob);
self.* = undefined; self.* = undefined;
} }
@ -227,8 +228,8 @@ pub fn userGids(
) error{ OutOfMemory, Overflow }!UserGids { ) error{ OutOfMemory, Overflow }!UserGids {
var blob = ArrayList(u8).init(allocator); var blob = ArrayList(u8).init(allocator);
errdefer blob.deinit(); errdefer blob.deinit();
var name2offset = StringHashMap(u32).init(allocator); var idx2offset = try allocator.alloc(u32, corpus.users.len);
errdefer name2offset.deinit(); errdefer allocator.free(idx2offset);
// zero'th entry is empty, so groupless users can refer to it. // zero'th entry is empty, so groupless users can refer to it.
try compress.appendUvarint(&blob, 0); try compress.appendUvarint(&blob, 0);
@ -236,13 +237,15 @@ pub fn userGids(
var scratch = try allocator.alloc(u32, 256); var scratch = try allocator.alloc(u32, 256);
defer allocator.free(scratch); defer allocator.free(scratch);
for (corpus.users) |user| { for (corpus.users) |user, user_idx| {
if (corpus.username2groups.get(user.name)) |usergroups| { if (corpus.username2groups.get(user.name)) |usergroups| {
try name2offset.putNoClobber(user.name, try math.cast(u32, blob.items.len)); const userOffset = try math.cast(u32, blob.items.len);
std.debug.assert(userOffset & 7 == 0);
idx2offset[user_idx] = userOffset;
scratch = try allocator.realloc(scratch, usergroups.len); scratch = try allocator.realloc(scratch, usergroups.len);
scratch.len = usergroups.len; scratch.len = usergroups.len;
for (usergroups) |group, i| for (usergroups) |group_idx, i|
scratch[i] = group.gid; scratch[i] = corpus.groups[group_idx].gid;
compress.deltaCompress(u32, scratch) catch |err| switch (err) { compress.deltaCompress(u32, scratch) catch |err| switch (err) {
error.NotSorted => unreachable, error.NotSorted => unreachable,
}; };
@ -251,60 +254,98 @@ pub fn userGids(
try compress.appendUvarint(&blob, gid); try compress.appendUvarint(&blob, gid);
try pad.arrayList(&blob, userGidsPaddingBits); try pad.arrayList(&blob, userGidsPaddingBits);
} else { } else {
try name2offset.putNoClobber(user.name, 0); idx2offset[user_idx] = 0;
} }
} }
return UserGids{ return UserGids{
.name2offset = name2offset, .idx2offset = idx2offset,
.blob = blob.toOwnedSlice(), .blob = blob.toOwnedSlice(),
}; };
} }
pub const UsersSection = struct {
// user index -> offset in blob
idx2offset: []const u32,
blob: []const u8,
pub fn deinit(self: *UsersSection, allocator: Allocator) void {
allocator.free(self.idx2offset);
allocator.free(self.blob);
self.* = undefined;
}
};
pub fn usersSection( pub fn usersSection(
allocator: Allocator, allocator: Allocator,
corpus: *const Corpus, corpus: *const Corpus,
gids: *const UserGids, gids: *const UserGids,
shells: *const ShellSections, shells: *const ShellSections,
) error{ OutOfMemory, Overflow, InvalidRecord }![]const u8 { ) error{ OutOfMemory, Overflow, InvalidRecord }!UsersSection {
var idx2offset = try allocator.alloc(u32, corpus.users.len);
errdefer allocator.free(idx2offset);
// as of writing each user takes 15 bytes + strings + padding, padded to // as of writing each user takes 15 bytes + strings + padding, padded to
// 8 bytes. 24 is an optimistic lower bound for an average record size. // 8 bytes. 24 is an optimistic lower bound for an average record size.
var buf = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len); var blob = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len);
for (corpus.users) |user| { errdefer blob.deinit();
const offset = gids.name2offset.get(user.name).?; for (corpus.users) |user, i| {
std.debug.assert(offset & 7 == 0); const userOffset = try math.cast(u32, blob.items.len);
const gidOffset = gids.idx2offset[i];
std.debug.assert(userOffset & 7 == 0);
std.debug.assert(gidOffset & 7 == 0);
idx2offset[i] = userOffset;
try userImport.PackedUserHash.packTo( try userImport.PackedUserHash.packTo(
&buf, &blob,
user, user,
@truncate(u29, @shrExact(offset, 3)), @truncate(u29, @shrExact(gidOffset, 3)),
shells.indices, shells.indices,
); );
} }
return buf.toOwnedSlice(); return UsersSection{
.idx2offset = idx2offset,
.blob = blob.toOwnedSlice(),
};
} }
pub const GroupMembers = struct {
// group index to it's offset in blob
idx2offset: []const u32,
blob: []const u8,
pub fn deinit(self: *GroupMembers, allocator: Allocator) void {
allocator.free(self.idx2offset);
allocator.free(self.blob);
self.* = undefined;
}
};
pub fn groupMembers( pub fn groupMembers(
allocator: Allocator, allocator: Allocator,
corpus: *const Corpus, corpus: *const Corpus,
) error{OutOfMemory}!void { user2offset: []const u32,
var buf: [compress.maxVarintLen64]u8 = undefined; ) error{OutOfMemory}!GroupMembers {
var offsets = ArrayListUnmanaged(usize).initCapacity( var idx2offset = try allocator.alloc(u32, corpus.groups.len);
allocator, errdefer allocator.free(idx2offset);
corpus.groups.len, var blob = ArrayList(u8).init(allocator);
); errdefer blob.deinit();
var bytes = ArrayList(u8).init(allocator); // zero'th entry is empty, so empty groups can refer to it
var offset: usize = 0; try compress.appendUvarint(&blob, 0);
for (corpus.groups) |group, i| { for (corpus.groups) |group, i| {
offsets[i] = offset;
const users = corpus.groupname2users.get(group.name).?; const users = corpus.groupname2users.get(group.name).?;
const len = compress.putVarint(&buf, users.len); if (users.len == 0) {
offset += len; idx2offset[i] = 0;
try bytes.appendSlice(buf[0..len]); continue;
for (users) |user| {
// TODO: offset into the User's record
_ = user;
} }
idx2offset[i] = blob.len;
compress.appendUvarint(&blob, users.len);
for (users) |userIdx|
compress.appendUvarint(&blob, user2offset[userIdx]);
} }
return GroupMembers{
.idx2offset = idx2offset,
.blob = blob.toOwnedSlice(),
};
} }
// cmpUser compares two users for sorting. By username's utf8 codepoints, ascending. // cmpUser compares two users for sorting. By username's utf8 codepoints, ascending.
@ -327,17 +368,70 @@ fn cmpUser(_: void, a: User, b: User) bool {
return true; return true;
} }
fn cmpUserPtr(context: void, a: *const User, b: *const User) bool {
return cmpUser(context, a.*, b.*);
}
fn cmpGroup(_: void, a: Group, b: Group) bool { fn cmpGroup(_: void, a: Group, b: Group) bool {
return a.gid < b.gid; return a.gid < b.gid;
} }
fn cmpGroupPtr(context: void, a: *const Group, b: *const Group) bool { pub const AllSections = struct {
return cmpGroup(context, a.*, b.*); allocator: Allocator,
}
bdz_gid: []const u8,
bdz_groupname: []const u8,
bdz_uid: []const u8,
bdz_username: []const u8,
users: UsersSection,
shell_sections: ShellSections,
shell_index: []const u8,
shell_blob: []const u8,
user_gids: UserGids,
user_gids_b: []const u8,
pub fn init(
allocator: Allocator,
corpus: *const Corpus,
) error{ Overflow, OutOfMemory, InvalidRecord }!AllSections {
const bdz_gid = try bdzGid(allocator, corpus);
const bdz_groupname = try bdzGroupname(allocator, corpus);
const bdz_uid = try bdzUid(allocator, corpus);
const bdz_username = try bdzUsername(allocator, corpus);
const shell_sections = try shellSections(allocator, corpus);
const shell_index = shell_sections.index;
const shell_blob = shell_sections.blob;
const user_gids = try userGids(allocator, corpus);
const users = try usersSection(
allocator,
corpus,
&user_gids,
&shell_sections,
);
return AllSections{
.allocator = allocator,
.bdz_gid = bdz_gid,
.bdz_groupname = bdz_groupname,
.bdz_uid = bdz_uid,
.bdz_username = bdz_username,
.shell_sections = shell_sections,
.shell_index = mem.sliceAsBytes(shell_index.constSlice()),
.shell_blob = mem.sliceAsBytes(shell_blob.constSlice()),
.user_gids = user_gids,
.user_gids_b = user_gids.blob,
.users = users,
};
}
pub fn deinit(self: *AllSections) void {
self.allocator.free(self.bdz_gid);
self.allocator.free(self.bdz_groupname);
self.allocator.free(self.bdz_uid);
self.allocator.free(self.bdz_username);
self.shell_sections.deinit();
self.user_gids.deinit(self.allocator);
self.users.deinit(self.allocator);
self.* = undefined;
}
};
const testing = std.testing; const testing = std.testing;
@ -411,30 +505,43 @@ test "test corpus" {
var corpus = try testCorpus(testing.allocator); var corpus = try testCorpus(testing.allocator);
defer corpus.deinit(); defer corpus.deinit();
try testing.expectEqualStrings(corpus.users[0].name, "Name" ** 8); const name_name = 0;
try testing.expectEqualStrings(corpus.users[1].name, "nobody"); const nobody = 1;
try testing.expectEqualStrings(corpus.users[2].name, "svc-bar"); const svc_bar = 2;
try testing.expectEqualStrings(corpus.users[3].name, "vidmantas"); const vidmantas = 3;
try testing.expectEqualStrings(corpus.users[name_name].name, "Name" ** 8);
try testing.expectEqualStrings(corpus.users[nobody].name, "nobody");
try testing.expectEqualStrings(corpus.users[svc_bar].name, "svc-bar");
try testing.expectEqualStrings(corpus.users[vidmantas].name, "vidmantas");
const g_service_account = 0;
const g_vidmantas = 1;
const g_all = 2;
try testing.expectEqualStrings(corpus.groups[g_service_account].name, "service-account");
try testing.expectEqualStrings(corpus.groups[g_vidmantas].name, "vidmantas");
try testing.expectEqualStrings(corpus.groups[g_all].name, "all");
try testing.expectEqual(corpus.name2user.get("404"), null); try testing.expectEqual(corpus.name2user.get("404"), null);
try testing.expectEqual(corpus.name2user.get("vidmantas").?.uid, 128); try testing.expectEqual(corpus.name2user.get("vidmantas").?, vidmantas);
try testing.expectEqual(corpus.uid2user.get(42), null); try testing.expectEqual(corpus.uid2user.get(42), null);
try testing.expectEqual(corpus.uid2user.get(128).?.gid, 128); try testing.expectEqual(corpus.uid2user.get(128).?, vidmantas);
try testing.expectEqual(corpus.name2group.get("404"), null); try testing.expectEqual(corpus.name2group.get("404"), null);
try testing.expectEqual(corpus.name2group.get("vidmantas").?.gid, 128); try testing.expectEqual(corpus.name2group.get("vidmantas").?, g_vidmantas);
try testing.expectEqual(corpus.gid2group.get(42), null); try testing.expectEqual(corpus.gid2group.get(42), null);
try testing.expectEqual(corpus.gid2group.get(128).?.gid, 128); try testing.expectEqual(corpus.gid2group.get(128).?, g_vidmantas);
const membersOfAll = corpus.groupname2users.get("all").?; const membersOfAll = corpus.groupname2users.get("all").?;
try testing.expectEqualStrings(membersOfAll[0].name, "Name" ** 8); try testing.expectEqual(membersOfAll[0], name_name);
try testing.expectEqualStrings(membersOfAll[1].name, "svc-bar"); try testing.expectEqual(membersOfAll[1], svc_bar);
try testing.expectEqualStrings(membersOfAll[2].name, "vidmantas"); try testing.expectEqual(membersOfAll[2], vidmantas);
try testing.expectEqual(corpus.groupname2users.get("404"), null); try testing.expectEqual(corpus.groupname2users.get("404"), null);
const groupsOfVidmantas = corpus.username2groups.get("vidmantas").?; const groupsOfVidmantas = corpus.username2groups.get("vidmantas").?;
try testing.expectEqual(groupsOfVidmantas[0].gid, 0); try testing.expectEqual(groupsOfVidmantas[0], g_service_account);
try testing.expectEqual(groupsOfVidmantas[1].gid, 128); try testing.expectEqual(groupsOfVidmantas[1], g_vidmantas);
try testing.expectEqual(groupsOfVidmantas[2].gid, 9999); try testing.expectEqual(groupsOfVidmantas[2], g_all);
try testing.expectEqual(corpus.username2groups.get("nobody"), null); try testing.expectEqual(corpus.username2groups.get("nobody"), null);
try testing.expectEqual(corpus.username2groups.get("doesnotexist"), null); try testing.expectEqual(corpus.username2groups.get("doesnotexist"), null);
} }
@ -444,31 +551,8 @@ test "test sections" {
var corpus = try testCorpus(allocator); var corpus = try testCorpus(allocator);
defer corpus.deinit(); defer corpus.deinit();
const bdz_gid = try bdzGid(allocator, &corpus); var all = try AllSections.init(allocator, &corpus);
defer allocator.free(bdz_gid); defer all.deinit();
const bdz_groupname = try bdzGroupname(allocator, &corpus);
defer allocator.free(bdz_groupname);
const bdz_uid = try bdzUid(allocator, &corpus);
defer allocator.free(bdz_uid);
const bdz_username = try bdzUsername(allocator, &corpus);
defer allocator.free(bdz_username);
var shell_sections = try shellSections(allocator, &corpus);
defer shell_sections.deinit();
var user_gids = try userGids(allocator, &corpus);
defer user_gids.deinit(allocator);
var users_section = try usersSection(
allocator,
&corpus,
&user_gids,
&shell_sections,
);
defer allocator.free(users_section);
} }
test "userGids" { test "userGids" {
@ -479,19 +563,19 @@ test "userGids" {
var user_gids = try userGids(allocator, &corpus); var user_gids = try userGids(allocator, &corpus);
defer user_gids.deinit(allocator); defer user_gids.deinit(allocator);
for (corpus.users) |user| { for (corpus.users) |user, userIdx| {
const groups = corpus.username2groups.get(user.name); const groups = corpus.username2groups.get(user.name);
const offset = user_gids.name2offset.get(user.name); const offset = user_gids.idx2offset[userIdx];
if (groups == null) { if (groups == null) {
try testing.expect(offset.? == 0); try testing.expect(offset == 0);
continue; continue;
} }
var vit = try compress.VarintSliceIterator(user_gids.blob[offset.?..]); var vit = try compress.VarintSliceIterator(user_gids.blob[offset..]);
var it = compress.DeltaDecompressionIterator(&vit); var it = compress.DeltaDecompressionIterator(&vit);
try testing.expectEqual(it.remaining(), groups.?.len); try testing.expectEqual(it.remaining(), groups.?.len);
var i: usize = 0; var i: usize = 0;
while (try it.next()) |gid| : (i += 1) { while (try it.next()) |gid| : (i += 1) {
try testing.expectEqual(gid, groups.?[i].gid); try testing.expectEqual(gid, corpus.groups[groups.?[i]].gid);
} }
} }
} }