1
Fork 0
turbonss/src/sections.zig

583 lines
19 KiB
Zig

const std = @import("std");
const fmt = std.fmt;
const mem = std.mem;
const math = std.math;
const sort = std.sort;
const unicode = std.unicode;
const Allocator = std.mem.Allocator;
const ArrayListUnmanaged = std.ArrayListUnmanaged;
const ArrayList = std.ArrayList;
const MultiArrayList = std.MultiArrayList;
const StringHashMap = std.StringHashMap;
const AutoHashMap = std.AutoHashMap;
const BufSet = std.BufSet;
const pad = @import("padding.zig");
const compress = @import("compress.zig");
const shellImport = @import("shell.zig");
const userImport = @import("user.zig");
const groupImport = @import("group.zig");
const cmph = @import("cmph.zig");
const bdz = @import("bdz.zig");
const User = userImport.User;
const Group = groupImport.Group;
const ShellSections = shellImport.ShellWriter.ShellSections;
const Corpus = struct {
arena: std.heap.ArenaAllocator,
// sorted by name, by unicode codepoint
users: []User,
// sorted by gid
groups: []Group,
// columnar users and groups of the above
usersMulti: MultiArrayList(User),
groupsMulti: MultiArrayList(Group),
name2user: StringHashMap(u32),
name2group: StringHashMap(u32),
group2users: []const []const u32,
user2groups: []const []const u32,
pub fn init(
baseAllocator: Allocator,
usersConst: []const User,
groupsConst: []const Group,
) error{ OutOfMemory, InvalidUtf8, Duplicate, NotFound }!Corpus {
var arena = std.heap.ArenaAllocator.init(baseAllocator);
var allocator = arena.allocator();
errdefer arena.deinit();
var users = try allocator.alloc(User, usersConst.len);
var groups = try allocator.alloc(Group, groupsConst.len);
for (usersConst) |*user, i|
users[i] = try user.clone(allocator);
for (groupsConst) |*group, i|
groups[i] = try group.clone(allocator);
sort.sort(User, users, {}, cmpUser);
sort.sort(Group, groups, {}, cmpGroup);
var usersMulti = MultiArrayList(User){};
try usersMulti.ensureTotalCapacity(allocator, users.len);
for (users) |user|
usersMulti.appendAssumeCapacity(user);
var groupsMulti = MultiArrayList(Group){};
try groupsMulti.ensureTotalCapacity(allocator, groups.len);
for (groups) |group|
groupsMulti.appendAssumeCapacity(group);
var name2user = StringHashMap(u32).init(allocator);
var name2group = StringHashMap(u32).init(allocator);
for (usersMulti.items(.name)) |name, i| {
var res1 = try name2user.getOrPut(name);
if (res1.found_existing)
return error.Duplicate;
res1.value_ptr.* = @intCast(u32, i);
}
for (groupsMulti.items(.name)) |name, i| {
var res1 = try name2group.getOrPut(name);
if (res1.found_existing)
return error.Duplicate;
res1.value_ptr.* = @intCast(u32, i);
}
var group2users = try allocator.alloc([]u32, groups.len);
// uses baseAllocator, because it will be freed before
// returning from this function. This keeps the arena clean.
var user2groups = try baseAllocator.alloc(ArrayListUnmanaged(u32), users.len);
defer baseAllocator.free(user2groups);
mem.set(ArrayListUnmanaged(u32), user2groups, ArrayListUnmanaged(u32){});
for (groupsMulti.items(.members)) |group_members, i| {
var members = try allocator.alloc(u32, group_members.count());
members.len = 0;
var it = group_members.iterator();
while (it.next()) |memberName| {
if (name2user.get(memberName.*)) |user_idx| {
members.len += 1;
members[members.len - 1] = user_idx;
try user2groups[user_idx].append(allocator, @intCast(u32, i));
} else {
return error.NotFound;
}
}
group2users[i] = members;
}
for (group2users) |*groupusers| {
sort.sort(u32, groupusers.*, {}, comptime sort.asc(u32));
}
var user2groups_final = try allocator.alloc([]const u32, users.len);
user2groups_final.len = users.len;
for (user2groups) |*usergroups, i| {
sort.sort(u32, usergroups.items, {}, comptime sort.asc(u32));
user2groups_final[i] = usergroups.toOwnedSlice(allocator);
}
return Corpus{
.arena = arena,
.users = users,
.groups = groups,
.usersMulti = usersMulti,
.groupsMulti = groupsMulti,
.name2user = name2user,
.name2group = name2group,
.group2users = group2users,
.user2groups = user2groups_final,
};
}
pub fn deinit(self: *Corpus) void {
self.arena.deinit();
self.* = undefined;
}
};
pub fn bdzGid(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_u32(allocator, corpus.groupsMulti.items(.gid));
}
pub fn bdzGroupname(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_str(allocator, corpus.groupsMulti.items(.name));
}
pub fn bdzUid(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_u32(allocator, corpus.usersMulti.items(.uid));
}
pub fn bdzUsername(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_str(allocator, corpus.usersMulti.items(.name));
}
// TODO(motiejus) there are a few problems:
// - memory management for shell sections is a mess. Make it easier by ...
// - shell module should accept a list of shells and spit out two slices
// (allocated with a given allocator). There is too much dancing around
// here.
pub fn shellSections(
allocator: Allocator,
corpus: *const Corpus,
) error{ OutOfMemory, Overflow }!ShellSections {
var popcon = shellImport.ShellWriter.init(allocator);
for (corpus.usersMulti.items(.shell)) |shell|
try popcon.put(shell);
return popcon.toOwnedSections(shellImport.max_shells);
}
pub const UserGids = struct {
// user index -> offset in blob
idx2offset: []const u64,
// compressed user gids blob. A blob contains N <= users.len items,
// an item is:
// len: varint
// gid: [varint]varint,
// ... and the gid list is delta-compressed.
blob: []const u8,
pub fn deinit(self: *UserGids, allocator: Allocator) void {
allocator.free(self.idx2offset);
allocator.free(self.blob);
self.* = undefined;
}
};
pub fn userGids(
allocator: Allocator,
corpus: *const Corpus,
) error{ OutOfMemory, Overflow }!UserGids {
var blob = ArrayList(u8).init(allocator);
errdefer blob.deinit();
var idx2offset = try allocator.alloc(u64, corpus.users.len);
errdefer allocator.free(idx2offset);
// zero'th entry is empty, so groupless users can refer to it.
try compress.appendUvarint(&blob, 0);
var scratch = try allocator.alloc(u32, 256);
defer allocator.free(scratch);
for (corpus.user2groups) |usergroups, user_idx| {
if (usergroups.len == 0) {
idx2offset[user_idx] = 0;
continue;
}
idx2offset[user_idx] = blob.items.len;
scratch = try allocator.realloc(scratch, usergroups.len);
scratch.len = usergroups.len;
for (usergroups) |group_idx, i|
scratch[i] = corpus.groups[group_idx].gid;
compress.deltaCompress(u32, scratch) catch |err| switch (err) {
error.NotSorted => unreachable,
};
try compress.appendUvarint(&blob, usergroups.len);
for (scratch) |gid|
try compress.appendUvarint(&blob, gid);
}
return UserGids{
.idx2offset = idx2offset,
.blob = blob.toOwnedSlice(),
};
}
pub const UsersSection = struct {
// user index -> offset in blob
idx2offset: []const u32,
blob: []const u8,
pub fn deinit(self: *UsersSection, allocator: Allocator) void {
allocator.free(self.idx2offset);
allocator.free(self.blob);
self.* = undefined;
}
};
pub fn usersSection(
allocator: Allocator,
corpus: *const Corpus,
gids: *const UserGids,
shells: *const ShellSections,
) error{ OutOfMemory, Overflow, InvalidRecord }!UsersSection {
var idx2offset = try allocator.alloc(u32, corpus.users.len);
errdefer allocator.free(idx2offset);
// as of writing each user takes 15 bytes + strings + padding, padded to
// 8 bytes. 24 is an optimistic lower bound for an average record size.
var blob = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len);
errdefer blob.deinit();
for (corpus.users) |user, i| {
const userOffset = try math.cast(u32, blob.items.len);
std.debug.assert(userOffset & 7 == 0);
idx2offset[i] = userOffset;
try userImport.PackedUserHash.packTo(
&blob,
user,
gids.idx2offset[i],
shells.indices,
);
}
return UsersSection{
.idx2offset = idx2offset,
.blob = blob.toOwnedSlice(),
};
}
pub const GroupMembers = struct {
// group index to it's offset in blob
idx2offset: []const u64,
blob: []const u8,
pub fn deinit(self: *GroupMembers, allocator: Allocator) void {
allocator.free(self.idx2offset);
allocator.free(self.blob);
self.* = undefined;
}
};
pub fn groupMembers(
allocator: Allocator,
corpus: *const Corpus,
user2offset: []const u32,
) error{OutOfMemory}!GroupMembers {
var idx2offset = try allocator.alloc(u64, corpus.groups.len);
errdefer allocator.free(idx2offset);
var blob = ArrayList(u8).init(allocator);
errdefer blob.deinit();
// zero'th entry is empty, so empty groups can refer to it
try compress.appendUvarint(&blob, 0);
var scratch = try allocator.alloc(u32, 256);
defer allocator.free(scratch);
for (corpus.group2users) |members, group_idx| {
if (members.len == 0) {
idx2offset[group_idx] = 0;
continue;
}
scratch = try allocator.realloc(scratch, members.len);
scratch.len = members.len;
mem.copy(u32, scratch, members);
compress.deltaCompress(u32, scratch) catch |err| switch (err) {
error.NotSorted => unreachable,
};
try compress.appendUvarint(&blob, members.len);
for (scratch) |user_idx|
try compress.appendUvarint(&blob, user2offset[user_idx]);
}
return GroupMembers{
.idx2offset = idx2offset,
.blob = blob.toOwnedSlice(),
};
}
// cmpUser compares two users for sorting. By username's utf8 codepoints, ascending.
fn cmpUser(_: void, a: User, b: User) bool {
var utf8_a = (unicode.Utf8View.init(a.name) catch unreachable).iterator();
var utf8_b = (unicode.Utf8View.init(b.name) catch unreachable).iterator();
while (utf8_a.nextCodepoint()) |codepoint_a| {
if (utf8_b.nextCodepoint()) |codepoint_b| {
if (codepoint_a == codepoint_b) {
continue;
} else {
return codepoint_a < codepoint_b;
}
}
// a is a prefix of b. It is thus shorter.
return false;
}
// b is a prefix of a
return true;
}
fn cmpGroup(_: void, a: Group, b: Group) bool {
return a.gid < b.gid;
}
pub const AllSections = struct {
allocator: Allocator,
bdz_gid: []const u8,
bdz_groupname: []const u8,
bdz_uid: []const u8,
bdz_username: []const u8,
users: UsersSection,
shell_sections: ShellSections,
shell_index: []const u8,
shell_blob: []const u8,
user_gids: UserGids,
group_members: GroupMembers,
pub fn init(
allocator: Allocator,
corpus: *const Corpus,
) error{ Overflow, OutOfMemory, InvalidRecord }!AllSections {
const bdz_gid = try bdzGid(allocator, corpus);
const bdz_groupname = try bdzGroupname(allocator, corpus);
const bdz_uid = try bdzUid(allocator, corpus);
const bdz_username = try bdzUsername(allocator, corpus);
const shell_sections = try shellSections(allocator, corpus);
const shell_index = shell_sections.index;
const shell_blob = shell_sections.blob;
const user_gids = try userGids(allocator, corpus);
const users = try usersSection(
allocator,
corpus,
&user_gids,
&shell_sections,
);
//const group_members = try groupMembers(
// allocator,
// corpus,
// users.idx2offset,
//);
return AllSections{
.allocator = allocator,
.bdz_gid = bdz_gid,
.bdz_groupname = bdz_groupname,
.bdz_uid = bdz_uid,
.bdz_username = bdz_username,
.shell_sections = shell_sections,
.shell_index = mem.sliceAsBytes(shell_index.constSlice()),
.shell_blob = mem.sliceAsBytes(shell_blob.constSlice()),
.user_gids = user_gids,
.users = users,
//.group_members = group_members,
.group_members = undefined,
};
}
pub fn deinit(self: *AllSections) void {
self.allocator.free(self.bdz_gid);
self.allocator.free(self.bdz_groupname);
self.allocator.free(self.bdz_uid);
self.allocator.free(self.bdz_username);
self.shell_sections.deinit();
self.user_gids.deinit(self.allocator);
self.users.deinit(self.allocator);
self.* = undefined;
}
};
const testing = std.testing;
fn testCorpus(allocator: Allocator) !Corpus {
const users = [_]User{ User{
.uid = 128,
.gid = 128,
.name = "vidmantas",
.gecos = "Vidmantas Kaminskas",
.home = "/home/vidmantas",
.shell = "/bin/bash",
}, User{
.uid = 0,
.gid = math.maxInt(u32),
.name = "Name" ** 8,
.gecos = "Gecos" ** 51,
.home = "Home" ** 16,
.shell = "She.LllL" ** 8,
}, User{
.uid = 1002,
.gid = 1002,
.name = "svc-bar",
.gecos = "",
.home = "/",
.shell = "/",
}, User{
.uid = 65534,
.gid = 65534,
.name = "nobody",
.gecos = "nobody",
.home = "/nonexistent",
.shell = "/usr/sbin/nologin",
} };
var members1 = try groupImport.someMembers(
allocator,
&[_][]const u8{"vidmantas"},
);
defer members1.deinit();
var members2 = try groupImport.someMembers(
allocator,
&[_][]const u8{ "svc-bar", "vidmantas" },
);
defer members2.deinit();
var members3 = try groupImport.someMembers(
allocator,
&[_][]const u8{ "svc-bar", "Name" ** 8, "vidmantas" },
);
defer members3.deinit();
const groups = [_]Group{ Group{
.gid = 128,
.name = "vidmantas",
.members = members1,
}, Group{
.gid = 9999,
.name = "all",
.members = members3,
}, Group{
.gid = 0,
.name = "service-account",
.members = members2,
} };
return try Corpus.init(allocator, users[0..], groups[0..]);
}
test "test corpus" {
var corpus = try testCorpus(testing.allocator);
defer corpus.deinit();
const name_name = 0;
const nobody = 1;
const svc_bar = 2;
const vidmantas = 3;
try testing.expectEqualStrings(corpus.users[name_name].name, "Name" ** 8);
try testing.expectEqualStrings(corpus.users[nobody].name, "nobody");
try testing.expectEqualStrings(corpus.users[svc_bar].name, "svc-bar");
try testing.expectEqualStrings(corpus.users[vidmantas].name, "vidmantas");
const g_service_account = 0;
const g_vidmantas = 1;
const g_all = 2;
try testing.expectEqualStrings(corpus.groups[g_service_account].name, "service-account");
try testing.expectEqualStrings(corpus.groups[g_vidmantas].name, "vidmantas");
try testing.expectEqualStrings(corpus.groups[g_all].name, "all");
try testing.expectEqual(corpus.name2user.get("404"), null);
try testing.expectEqual(corpus.name2user.get("vidmantas").?, vidmantas);
try testing.expectEqual(corpus.name2group.get("404"), null);
try testing.expectEqual(corpus.name2group.get("vidmantas").?, g_vidmantas);
const membersOfAll = corpus.group2users[g_all];
try testing.expectEqual(membersOfAll[0], name_name);
try testing.expectEqual(membersOfAll[1], svc_bar);
try testing.expectEqual(membersOfAll[2], vidmantas);
const groupsOfVidmantas = corpus.user2groups[vidmantas];
try testing.expectEqual(groupsOfVidmantas[0], g_service_account);
try testing.expectEqual(groupsOfVidmantas[1], g_vidmantas);
try testing.expectEqual(groupsOfVidmantas[2], g_all);
}
test "test sections" {
const allocator = testing.allocator;
var corpus = try testCorpus(allocator);
defer corpus.deinit();
var all = try AllSections.init(allocator, &corpus);
defer all.deinit();
}
test "userGids" {
const allocator = testing.allocator;
var corpus = try testCorpus(allocator);
defer corpus.deinit();
var user_gids = try userGids(allocator, &corpus);
defer user_gids.deinit(allocator);
for (corpus.users) |_, userIdx| {
const groups = corpus.user2groups[userIdx];
const offset = user_gids.idx2offset[userIdx];
if (groups.len == 0) {
try testing.expect(offset == 0);
continue;
}
var vit = try compress.VarintSliceIterator(user_gids.blob[offset..]);
var it = compress.DeltaDecompressionIterator(&vit);
try testing.expectEqual(it.remaining(), groups.len);
var i: u64 = 0;
while (try it.next()) |gid| : (i += 1) {
try testing.expectEqual(gid, corpus.groups[groups[i]].gid);
}
}
}
test "pack gids" {
const allocator = testing.allocator;
var corpus = try testCorpus(allocator);
defer corpus.deinit();
const cmph_gid = try cmph.pack_u32(allocator, corpus.groupsMulti.items(.gid));
defer allocator.free(cmph_gid);
const k1 = bdz.search_u32(cmph_gid, 0);
const k2 = bdz.search_u32(cmph_gid, 128);
const k3 = bdz.search_u32(cmph_gid, 9999);
var hashes = &[_]u32{ k1, k2, k3 };
sort.sort(u32, hashes, {}, comptime sort.asc(u32));
for (hashes) |hash, i|
try testing.expectEqual(i, hash);
}
fn testUser(name: []const u8) User {
var result = std.mem.zeroes(User);
result.name = name;
return result;
}
test "users compare function" {
const a = testUser("a");
const b = testUser("b");
const bb = testUser("bb");
try testing.expect(cmpUser({}, a, b));
try testing.expect(!cmpUser({}, b, a));
try testing.expect(cmpUser({}, a, bb));
try testing.expect(!cmpUser({}, bb, a));
try testing.expect(cmpUser({}, b, bb));
try testing.expect(!cmpUser({}, bb, b));
}