1
Fork 0

add a struct for all sections

This commit is contained in:
Motiejus Jakštys 2022-03-07 06:09:20 +02:00 committed by Motiejus Jakštys
parent 24d984d712
commit 9bf0a35689
2 changed files with 199 additions and 115 deletions

View File

@ -364,8 +364,8 @@ shellIndex len(shells)*2 shell index array
shellBlob <= 4032 shell data blob (max 63*64 bytes)
groups ? packed Group entries (8b padding)
users ? packed User entries (8b padding)
groupMembers ? per-group memberlist (no padding)
userGids ? per-user gidlist entries (8b padding)
groupMembers ? per-group varint memberlist (no padding)
userGids ? per-user varint gidlist (8b padding)
```
Section creation order:

View File

@ -1,5 +1,6 @@
const std = @import("std");
const fmt = std.fmt;
const mem = std.mem;
const math = std.math;
const sort = std.sort;
const unicode = std.unicode;
@ -35,12 +36,12 @@ const Corpus = struct {
groupsMulti: MultiArrayList(Group),
// pointing to `users` and `groups` slices above.
name2user: StringHashMap(*const User),
uid2user: AutoHashMap(u32, *const User),
name2group: StringHashMap(*const Group),
gid2group: AutoHashMap(u32, *const Group),
groupname2users: StringHashMap([]*const User),
username2groups: StringHashMap([]*const Group),
name2user: StringHashMap(usize),
uid2user: AutoHashMap(u32, usize),
name2group: StringHashMap(usize),
gid2group: AutoHashMap(u32, usize),
groupname2users: StringHashMap([]usize),
username2groups: StringHashMap([]usize),
pub fn init(
baseAllocator: Allocator,
@ -70,60 +71,60 @@ const Corpus = struct {
for (groups) |group|
groupsMulti.appendAssumeCapacity(group);
var name2user = StringHashMap(*const User).init(allocator);
var uid2user = AutoHashMap(u32, *const User).init(allocator);
var name2group = StringHashMap(*const Group).init(allocator);
var gid2group = AutoHashMap(u32, *const Group).init(allocator);
for (users) |*user| {
var name2user = StringHashMap(usize).init(allocator);
var uid2user = AutoHashMap(u32, usize).init(allocator);
var name2group = StringHashMap(usize).init(allocator);
var gid2group = AutoHashMap(u32, usize).init(allocator);
for (users) |*user, i| {
var res1 = try name2user.getOrPut(user.name);
if (res1.found_existing)
return error.Duplicate;
res1.value_ptr.* = user;
res1.value_ptr.* = i;
var res2 = try uid2user.getOrPut(user.uid);
if (res2.found_existing)
return error.Duplicate;
res2.value_ptr.* = user;
res2.value_ptr.* = i;
}
for (groups) |*group| {
for (groups) |*group, i| {
var res1 = try name2group.getOrPut(group.name);
if (res1.found_existing)
return error.Duplicate;
res1.value_ptr.* = group;
res1.value_ptr.* = i;
var res2 = try gid2group.getOrPut(group.gid);
if (res2.found_existing)
return error.Duplicate;
res2.value_ptr.* = group;
res2.value_ptr.* = i;
}
var groupname2users = StringHashMap([]*const User).init(allocator);
var groupname2users = StringHashMap([]usize).init(allocator);
// uses baseAllocator, because it will be freed before
// returning from this function. This keeps the arena clean.
var username2groups = StringHashMap(
ArrayListUnmanaged(*const Group),
ArrayListUnmanaged(usize),
).init(baseAllocator);
defer username2groups.deinit();
for (groups) |*group| {
var members = try allocator.alloc(*const User, group.members.count());
for (groups) |*group, i| {
var members = try allocator.alloc(usize, group.members.count());
members.len = 0;
var it = group.members.iterator();
while (it.next()) |memberName| {
if (name2user.get(memberName.*)) |user| {
if (name2user.get(memberName.*)) |idx| {
members.len += 1;
members[members.len - 1] = user;
members[members.len - 1] = idx;
} else {
return error.NotFound;
}
var groupsOfMember = try username2groups.getOrPut(memberName.*);
if (!groupsOfMember.found_existing)
groupsOfMember.value_ptr.* = ArrayListUnmanaged(*const Group){};
try groupsOfMember.value_ptr.*.append(allocator, group);
groupsOfMember.value_ptr.* = ArrayListUnmanaged(usize){};
try groupsOfMember.value_ptr.*.append(allocator, i);
}
var result = try groupname2users.getOrPut(group.name);
@ -134,14 +135,14 @@ const Corpus = struct {
var it1 = groupname2users.valueIterator();
while (it1.next()) |groupUsers| {
sort.sort(*const User, groupUsers.*, {}, cmpUserPtr);
sort.sort(usize, groupUsers.*, {}, comptime sort.asc(usize));
}
var it2 = username2groups.valueIterator();
while (it2.next()) |userGroups|
sort.sort(*const Group, userGroups.items, {}, cmpGroupPtr);
sort.sort(usize, userGroups.items, {}, comptime sort.asc(usize));
var username2groups_final = StringHashMap([]*const Group).init(allocator);
var username2groups_final = StringHashMap([]usize).init(allocator);
var it = username2groups.iterator();
while (it.next()) |elem| {
const username = elem.key_ptr.*;
@ -203,17 +204,17 @@ pub fn shellSections(
}
pub const UserGids = struct {
// username -> offset in blob
name2offset: StringHashMap(u32),
// user index -> offset in blob
idx2offset: []const u32,
// compressed user gids blob. A blob contains N <= users.len items,
// an item is:
// len: varint
// gid: [varint]varint,
// ... and the gid list is delta-compressed.
blob: []u8,
blob: []const u8,
pub fn deinit(self: *UserGids, allocator: Allocator) void {
self.name2offset.deinit();
allocator.free(self.idx2offset);
allocator.free(self.blob);
self.* = undefined;
}
@ -227,8 +228,8 @@ pub fn userGids(
) error{ OutOfMemory, Overflow }!UserGids {
var blob = ArrayList(u8).init(allocator);
errdefer blob.deinit();
var name2offset = StringHashMap(u32).init(allocator);
errdefer name2offset.deinit();
var idx2offset = try allocator.alloc(u32, corpus.users.len);
errdefer allocator.free(idx2offset);
// zero'th entry is empty, so groupless users can refer to it.
try compress.appendUvarint(&blob, 0);
@ -236,13 +237,15 @@ pub fn userGids(
var scratch = try allocator.alloc(u32, 256);
defer allocator.free(scratch);
for (corpus.users) |user| {
for (corpus.users) |user, user_idx| {
if (corpus.username2groups.get(user.name)) |usergroups| {
try name2offset.putNoClobber(user.name, try math.cast(u32, blob.items.len));
const userOffset = try math.cast(u32, blob.items.len);
std.debug.assert(userOffset & 7 == 0);
idx2offset[user_idx] = userOffset;
scratch = try allocator.realloc(scratch, usergroups.len);
scratch.len = usergroups.len;
for (usergroups) |group, i|
scratch[i] = group.gid;
for (usergroups) |group_idx, i|
scratch[i] = corpus.groups[group_idx].gid;
compress.deltaCompress(u32, scratch) catch |err| switch (err) {
error.NotSorted => unreachable,
};
@ -251,60 +254,98 @@ pub fn userGids(
try compress.appendUvarint(&blob, gid);
try pad.arrayList(&blob, userGidsPaddingBits);
} else {
try name2offset.putNoClobber(user.name, 0);
idx2offset[user_idx] = 0;
}
}
return UserGids{
.name2offset = name2offset,
.idx2offset = idx2offset,
.blob = blob.toOwnedSlice(),
};
}
pub const UsersSection = struct {
// user index -> offset in blob
idx2offset: []const u32,
blob: []const u8,
pub fn deinit(self: *UsersSection, allocator: Allocator) void {
allocator.free(self.idx2offset);
allocator.free(self.blob);
self.* = undefined;
}
};
pub fn usersSection(
allocator: Allocator,
corpus: *const Corpus,
gids: *const UserGids,
shells: *const ShellSections,
) error{ OutOfMemory, Overflow, InvalidRecord }![]const u8 {
) error{ OutOfMemory, Overflow, InvalidRecord }!UsersSection {
var idx2offset = try allocator.alloc(u32, corpus.users.len);
errdefer allocator.free(idx2offset);
// as of writing each user takes 15 bytes + strings + padding, padded to
// 8 bytes. 24 is an optimistic lower bound for an average record size.
var buf = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len);
for (corpus.users) |user| {
const offset = gids.name2offset.get(user.name).?;
std.debug.assert(offset & 7 == 0);
var blob = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len);
errdefer blob.deinit();
for (corpus.users) |user, i| {
const userOffset = try math.cast(u32, blob.items.len);
const gidOffset = gids.idx2offset[i];
std.debug.assert(userOffset & 7 == 0);
std.debug.assert(gidOffset & 7 == 0);
idx2offset[i] = userOffset;
try userImport.PackedUserHash.packTo(
&buf,
&blob,
user,
@truncate(u29, @shrExact(offset, 3)),
@truncate(u29, @shrExact(gidOffset, 3)),
shells.indices,
);
}
return buf.toOwnedSlice();
return UsersSection{
.idx2offset = idx2offset,
.blob = blob.toOwnedSlice(),
};
}
pub const GroupMembers = struct {
// group index to it's offset in blob
idx2offset: []const u32,
blob: []const u8,
pub fn deinit(self: *GroupMembers, allocator: Allocator) void {
allocator.free(self.idx2offset);
allocator.free(self.blob);
self.* = undefined;
}
};
pub fn groupMembers(
allocator: Allocator,
corpus: *const Corpus,
) error{OutOfMemory}!void {
var buf: [compress.maxVarintLen64]u8 = undefined;
var offsets = ArrayListUnmanaged(usize).initCapacity(
allocator,
corpus.groups.len,
);
var bytes = ArrayList(u8).init(allocator);
var offset: usize = 0;
user2offset: []const u32,
) error{OutOfMemory}!GroupMembers {
var idx2offset = try allocator.alloc(u32, corpus.groups.len);
errdefer allocator.free(idx2offset);
var blob = ArrayList(u8).init(allocator);
errdefer blob.deinit();
// zero'th entry is empty, so empty groups can refer to it
try compress.appendUvarint(&blob, 0);
for (corpus.groups) |group, i| {
offsets[i] = offset;
const users = corpus.groupname2users.get(group.name).?;
const len = compress.putVarint(&buf, users.len);
offset += len;
try bytes.appendSlice(buf[0..len]);
for (users) |user| {
// TODO: offset into the User's record
_ = user;
if (users.len == 0) {
idx2offset[i] = 0;
continue;
}
idx2offset[i] = blob.len;
compress.appendUvarint(&blob, users.len);
for (users) |userIdx|
compress.appendUvarint(&blob, user2offset[userIdx]);
}
return GroupMembers{
.idx2offset = idx2offset,
.blob = blob.toOwnedSlice(),
};
}
// cmpUser compares two users for sorting. By username's utf8 codepoints, ascending.
@ -327,17 +368,70 @@ fn cmpUser(_: void, a: User, b: User) bool {
return true;
}
fn cmpUserPtr(context: void, a: *const User, b: *const User) bool {
return cmpUser(context, a.*, b.*);
}
fn cmpGroup(_: void, a: Group, b: Group) bool {
return a.gid < b.gid;
}
fn cmpGroupPtr(context: void, a: *const Group, b: *const Group) bool {
return cmpGroup(context, a.*, b.*);
}
pub const AllSections = struct {
allocator: Allocator,
bdz_gid: []const u8,
bdz_groupname: []const u8,
bdz_uid: []const u8,
bdz_username: []const u8,
users: UsersSection,
shell_sections: ShellSections,
shell_index: []const u8,
shell_blob: []const u8,
user_gids: UserGids,
user_gids_b: []const u8,
pub fn init(
allocator: Allocator,
corpus: *const Corpus,
) error{ Overflow, OutOfMemory, InvalidRecord }!AllSections {
const bdz_gid = try bdzGid(allocator, corpus);
const bdz_groupname = try bdzGroupname(allocator, corpus);
const bdz_uid = try bdzUid(allocator, corpus);
const bdz_username = try bdzUsername(allocator, corpus);
const shell_sections = try shellSections(allocator, corpus);
const shell_index = shell_sections.index;
const shell_blob = shell_sections.blob;
const user_gids = try userGids(allocator, corpus);
const users = try usersSection(
allocator,
corpus,
&user_gids,
&shell_sections,
);
return AllSections{
.allocator = allocator,
.bdz_gid = bdz_gid,
.bdz_groupname = bdz_groupname,
.bdz_uid = bdz_uid,
.bdz_username = bdz_username,
.shell_sections = shell_sections,
.shell_index = mem.sliceAsBytes(shell_index.constSlice()),
.shell_blob = mem.sliceAsBytes(shell_blob.constSlice()),
.user_gids = user_gids,
.user_gids_b = user_gids.blob,
.users = users,
};
}
pub fn deinit(self: *AllSections) void {
self.allocator.free(self.bdz_gid);
self.allocator.free(self.bdz_groupname);
self.allocator.free(self.bdz_uid);
self.allocator.free(self.bdz_username);
self.shell_sections.deinit();
self.user_gids.deinit(self.allocator);
self.users.deinit(self.allocator);
self.* = undefined;
}
};
const testing = std.testing;
@ -411,30 +505,43 @@ test "test corpus" {
var corpus = try testCorpus(testing.allocator);
defer corpus.deinit();
try testing.expectEqualStrings(corpus.users[0].name, "Name" ** 8);
try testing.expectEqualStrings(corpus.users[1].name, "nobody");
try testing.expectEqualStrings(corpus.users[2].name, "svc-bar");
try testing.expectEqualStrings(corpus.users[3].name, "vidmantas");
const name_name = 0;
const nobody = 1;
const svc_bar = 2;
const vidmantas = 3;
try testing.expectEqualStrings(corpus.users[name_name].name, "Name" ** 8);
try testing.expectEqualStrings(corpus.users[nobody].name, "nobody");
try testing.expectEqualStrings(corpus.users[svc_bar].name, "svc-bar");
try testing.expectEqualStrings(corpus.users[vidmantas].name, "vidmantas");
const g_service_account = 0;
const g_vidmantas = 1;
const g_all = 2;
try testing.expectEqualStrings(corpus.groups[g_service_account].name, "service-account");
try testing.expectEqualStrings(corpus.groups[g_vidmantas].name, "vidmantas");
try testing.expectEqualStrings(corpus.groups[g_all].name, "all");
try testing.expectEqual(corpus.name2user.get("404"), null);
try testing.expectEqual(corpus.name2user.get("vidmantas").?.uid, 128);
try testing.expectEqual(corpus.name2user.get("vidmantas").?, vidmantas);
try testing.expectEqual(corpus.uid2user.get(42), null);
try testing.expectEqual(corpus.uid2user.get(128).?.gid, 128);
try testing.expectEqual(corpus.uid2user.get(128).?, vidmantas);
try testing.expectEqual(corpus.name2group.get("404"), null);
try testing.expectEqual(corpus.name2group.get("vidmantas").?.gid, 128);
try testing.expectEqual(corpus.name2group.get("vidmantas").?, g_vidmantas);
try testing.expectEqual(corpus.gid2group.get(42), null);
try testing.expectEqual(corpus.gid2group.get(128).?.gid, 128);
try testing.expectEqual(corpus.gid2group.get(128).?, g_vidmantas);
const membersOfAll = corpus.groupname2users.get("all").?;
try testing.expectEqualStrings(membersOfAll[0].name, "Name" ** 8);
try testing.expectEqualStrings(membersOfAll[1].name, "svc-bar");
try testing.expectEqualStrings(membersOfAll[2].name, "vidmantas");
try testing.expectEqual(membersOfAll[0], name_name);
try testing.expectEqual(membersOfAll[1], svc_bar);
try testing.expectEqual(membersOfAll[2], vidmantas);
try testing.expectEqual(corpus.groupname2users.get("404"), null);
const groupsOfVidmantas = corpus.username2groups.get("vidmantas").?;
try testing.expectEqual(groupsOfVidmantas[0].gid, 0);
try testing.expectEqual(groupsOfVidmantas[1].gid, 128);
try testing.expectEqual(groupsOfVidmantas[2].gid, 9999);
try testing.expectEqual(groupsOfVidmantas[0], g_service_account);
try testing.expectEqual(groupsOfVidmantas[1], g_vidmantas);
try testing.expectEqual(groupsOfVidmantas[2], g_all);
try testing.expectEqual(corpus.username2groups.get("nobody"), null);
try testing.expectEqual(corpus.username2groups.get("doesnotexist"), null);
}
@ -444,31 +551,8 @@ test "test sections" {
var corpus = try testCorpus(allocator);
defer corpus.deinit();
const bdz_gid = try bdzGid(allocator, &corpus);
defer allocator.free(bdz_gid);
const bdz_groupname = try bdzGroupname(allocator, &corpus);
defer allocator.free(bdz_groupname);
const bdz_uid = try bdzUid(allocator, &corpus);
defer allocator.free(bdz_uid);
const bdz_username = try bdzUsername(allocator, &corpus);
defer allocator.free(bdz_username);
var shell_sections = try shellSections(allocator, &corpus);
defer shell_sections.deinit();
var user_gids = try userGids(allocator, &corpus);
defer user_gids.deinit(allocator);
var users_section = try usersSection(
allocator,
&corpus,
&user_gids,
&shell_sections,
);
defer allocator.free(users_section);
var all = try AllSections.init(allocator, &corpus);
defer all.deinit();
}
test "userGids" {
@ -479,19 +563,19 @@ test "userGids" {
var user_gids = try userGids(allocator, &corpus);
defer user_gids.deinit(allocator);
for (corpus.users) |user| {
for (corpus.users) |user, userIdx| {
const groups = corpus.username2groups.get(user.name);
const offset = user_gids.name2offset.get(user.name);
const offset = user_gids.idx2offset[userIdx];
if (groups == null) {
try testing.expect(offset.? == 0);
try testing.expect(offset == 0);
continue;
}
var vit = try compress.VarintSliceIterator(user_gids.blob[offset.?..]);
var vit = try compress.VarintSliceIterator(user_gids.blob[offset..]);
var it = compress.DeltaDecompressionIterator(&vit);
try testing.expectEqual(it.remaining(), groups.?.len);
var i: usize = 0;
while (try it.next()) |gid| : (i += 1) {
try testing.expectEqual(gid, groups.?[i].gid);
try testing.expectEqual(gid, corpus.groups[groups.?[i]].gid);
}
}
}