1
Fork 0
turbonss/src/Corpus.zig

339 lines
11 KiB
Zig

const std = @import("std");
const mem = std.mem;
const math = std.math;
const sort = std.sort;
const unicode = std.unicode;
const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const AutoHashMap = std.AutoHashMap;
const StringHashMap = std.StringHashMap;
const MultiArrayList = std.MultiArrayList;
const ArrayListUnmanaged = std.ArrayListUnmanaged;
const User = @import("User.zig");
const Group = @import("Group.zig");
const ErrCtx = @import("ErrCtx.zig");
pub const Corpus = @This();
arena: ArenaAllocator,
// sorted by name, by unicode codepoint
users: MultiArrayList(User),
// sorted by gid
groups: MultiArrayList(Group),
name2user: StringHashMap(u32),
name2group: StringHashMap(u32),
group2users: []const []const u32,
user2groups: []const []const u32,
getgr_bufsize: usize,
getpw_bufsize: usize,
pub fn init(
baseAllocator: Allocator,
usersConst: []const User,
groupsConst: []const Group,
err: *ErrCtx,
) error{ OutOfMemory, InvalidUtf8, Duplicate, NotFound, TooMany }!Corpus {
if (usersConst.len >= math.maxInt(u32)) return error.TooMany;
if (groupsConst.len >= math.maxInt(u32)) return error.TooMany;
var users = MultiArrayList(User){};
var groups = MultiArrayList(Group){};
var getgr_bufsize: usize = 0;
var getpw_bufsize: usize = 0;
var arena = ArenaAllocator.init(baseAllocator);
var allocator = arena.allocator();
errdefer arena.deinit();
const NameIdx = struct {
name: []const u8,
idx: usize,
};
const GidIdx = struct {
gid: u32,
idx: usize,
};
const Compare = struct {
fn name(_: void, a: NameIdx, b: NameIdx) bool {
var utf8_a = (unicode.Utf8View.init(a.name) catch unreachable).iterator();
var utf8_b = (unicode.Utf8View.init(b.name) catch unreachable).iterator();
while (utf8_a.nextCodepoint()) |codepoint_a| {
if (utf8_b.nextCodepoint()) |codepoint_b| {
if (codepoint_a == codepoint_b) {
continue;
} else return codepoint_a < codepoint_b;
}
// a is a prefix of b. It is thus shorter.
return true;
}
// b is a prefix of a
return false;
}
fn gid(_: void, a: GidIdx, b: GidIdx) bool {
return a.gid < b.gid;
}
};
// TODO: replace with MultiArrayList sort when
// https://github.com/ziglang/zig/issues/11117 is done. As of writing it
// was quite a bit slower.
{
var name_idx = try baseAllocator.alloc(NameIdx, usersConst.len);
defer baseAllocator.free(name_idx);
for (usersConst, 0..) |user, i| name_idx[i] =
NameIdx{ .name = user.name, .idx = i };
sort.sort(NameIdx, name_idx, {}, Compare.name);
try users.ensureTotalCapacity(allocator, usersConst.len);
for (name_idx) |entry| {
const user = try usersConst[entry.idx].clone(allocator);
users.appendAssumeCapacity(user);
getpw_bufsize = math.max(getpw_bufsize, user.strlenZ());
}
}
{
var gid_idx = try baseAllocator.alloc(GidIdx, groupsConst.len);
defer baseAllocator.free(gid_idx);
for (groupsConst, 0..) |group, i|
gid_idx[i] = GidIdx{ .gid = group.gid, .idx = i };
sort.sort(GidIdx, gid_idx, {}, Compare.gid);
try groups.ensureTotalCapacity(allocator, groupsConst.len);
for (gid_idx) |entry| {
const group = try groupsConst[entry.idx].clone(allocator);
groups.appendAssumeCapacity(group);
getgr_bufsize = math.max(getgr_bufsize, group.strlenZ());
}
}
// verify whatever comes to cmph are unique: user names
var name2user = StringHashMap(u32).init(allocator);
for (users.items(.name), 0..) |name, i| {
var result = try name2user.getOrPut(name);
if (result.found_existing)
return error.Duplicate;
result.value_ptr.* = @intCast(u32, i);
}
// verify whatever comes to cmph are unique: group names
var name2group = StringHashMap(u32).init(allocator);
for (groups.items(.name), 0..) |name, i| {
var result = try name2group.getOrPut(name);
if (result.found_existing)
return error.Duplicate;
result.value_ptr.* = @intCast(u32, i);
}
// verify whatever comes to cmph are unique: gids
{
const gids = groups.items(.gid);
var last_gid = gids[0];
for (gids[1..]) |gid| {
if (gid == last_gid)
return err.returnf("duplicate gid {d}", .{gid}, error.Duplicate);
last_gid = gid;
}
}
// verify whatever comes to cmph are unique: uids
{
var uid_map = AutoHashMap(u32, void).init(allocator);
defer uid_map.deinit();
for (users.items(.uid)) |uid| {
const result = try uid_map.getOrPut(uid);
if (result.found_existing)
return err.returnf("duplicate uid {d}", .{uid}, error.Duplicate);
}
}
var group2users = try allocator.alloc([]u32, groups.len);
// uses baseAllocator, because it will be freed before
// returning from this function. This keeps the arena clean.
var user2groups = try baseAllocator.alloc(ArrayListUnmanaged(u32), users.len);
defer baseAllocator.free(user2groups);
mem.set(ArrayListUnmanaged(u32), user2groups, ArrayListUnmanaged(u32){});
for (groups.items(.members), groups.items(.name), 0..) |groupmembers, name, i| {
var members = try allocator.alloc(u32, groupmembers.len);
members.len = 0;
for (groupmembers) |member_name| {
if (name2user.get(member_name)) |user_idx| {
// If user is in a group of their primary gid, the right thing
// to do is to skip the membersip. This is what classical tools
// do (look at your etc/passwd), but not what Ours Truly nss
// implementation does. However, since there is at least one
// known implementation that does not do this, push the
// responsibility to the validator (and keep the code
// commented).
// if (users.items(.gid)[user_idx] == groups.items(.gid)[i])
// continue;
members.len += 1;
members[members.len - 1] = user_idx;
try user2groups[user_idx].append(allocator, @intCast(u32, i));
} else {
return err.returnf(
"user '{s}' not found, member of group '{s}'",
.{ member_name, name },
error.NotFound,
);
}
}
group2users[i] = members;
}
for (group2users) |*groupusers|
sort.sort(u32, groupusers.*, {}, comptime sort.asc(u32));
var user2groups_final = try allocator.alloc([]const u32, users.len);
user2groups_final.len = users.len;
for (user2groups, user2groups_final) |*usergroups, *user2groups_final_i| {
sort.sort(u32, usergroups.items, {}, comptime sort.asc(u32));
user2groups_final_i.* = try usergroups.toOwnedSlice(allocator);
}
return Corpus{
.arena = arena,
.users = users,
.groups = groups,
.name2user = name2user,
.name2group = name2group,
.group2users = group2users,
.user2groups = user2groups_final,
.getgr_bufsize = getgr_bufsize,
.getpw_bufsize = getpw_bufsize,
};
}
pub fn deinit(self: *Corpus) void {
self.arena.deinit();
self.* = undefined;
}
fn testUser(name: []const u8) User {
var result = mem.zeroes(User);
result.name = name;
return result;
}
const testing = std.testing;
const someMembers = @import("Group.zig").someMembers;
pub fn testCorpus(allocator: Allocator) !Corpus {
const users = [_]User{ User{
.uid = 0,
.gid = 0,
.name = "root",
.gecos = "",
.home = "/root",
.shell = "/bin/bash",
}, User{
.uid = 128,
.gid = 128,
.name = "vidmantas",
.gecos = "Vidmantas Kaminskas",
.home = "/home/vidmantas",
.shell = "/bin/bash",
}, User{
.uid = 1000,
.gid = math.maxInt(u32),
.name = "Name" ** 8,
.gecos = "Gecos" ** 51,
.home = "/Hom" ** 16,
.shell = "/She.Lll" ** 8,
}, User{
.uid = 100000,
.gid = 1002,
.name = "svc-bar",
.gecos = "",
.home = "/",
.shell = "/",
}, User{
.uid = 65534,
.gid = 65534,
.name = "nobody",
.gecos = "nobody",
.home = "/nonexistent",
.shell = "/usr/sbin/nologin",
} };
var group0 = try Group.init(allocator, 0, "root", &[_][]const u8{});
var group1 = try Group.init(allocator, 128, "vidmantas", &[_][]const u8{});
var group2 = try Group.init(
allocator,
9999,
"all",
&[_][]const u8{ "svc-bar", "Name" ** 8, "vidmantas", "root" },
);
var group3 = try Group.init(
allocator,
100000,
"service-group",
&[_][]const u8{ "vidmantas", "root" },
);
defer group0.deinit(allocator);
defer group1.deinit(allocator);
defer group2.deinit(allocator);
defer group3.deinit(allocator);
const groups = [_]Group{ group0, group1, group2, group3 };
var errc = ErrCtx{};
const result = try Corpus.init(allocator, users[0..], groups[0..], &errc);
try testing.expectEqualStrings("", errc.unwrap().constSlice());
return result;
}
test "corpus smoke test" {
var corpus = try testCorpus(testing.allocator);
defer corpus.deinit();
const name_name = 0;
const nobody = 1;
const root = 2;
const svc_bar = 3;
const vidmantas = 4;
const usernames = corpus.users.items(.name);
try testing.expectEqualStrings(usernames[name_name], "Name" ** 8);
try testing.expectEqualStrings(usernames[nobody], "nobody");
try testing.expectEqualStrings(usernames[root], "root");
try testing.expectEqualStrings(usernames[svc_bar], "svc-bar");
try testing.expectEqualStrings(usernames[vidmantas], "vidmantas");
const g_root = 0;
const g_vidmantas = 1;
const g_all = 2;
const g_service_account = 3;
const groupnames = corpus.groups.items(.name);
try testing.expectEqualStrings(groupnames[g_root], "root");
try testing.expectEqualStrings(groupnames[g_service_account], "service-group");
try testing.expectEqualStrings(groupnames[g_vidmantas], "vidmantas");
try testing.expectEqualStrings(groupnames[g_all], "all");
try testing.expectEqual(corpus.name2user.get("404"), null);
try testing.expectEqual(corpus.name2user.get("vidmantas").?, vidmantas);
try testing.expectEqual(corpus.name2group.get("404"), null);
try testing.expectEqual(corpus.name2group.get("vidmantas").?, g_vidmantas);
const membersOfAll = corpus.group2users[g_all];
try testing.expectEqual(membersOfAll[0], name_name);
try testing.expectEqual(membersOfAll[1], root);
try testing.expectEqual(membersOfAll[2], svc_bar);
try testing.expectEqual(membersOfAll[3], vidmantas);
const groupsOfVidmantas = corpus.user2groups[vidmantas];
try testing.expectEqual(groupsOfVidmantas[0], g_all);
try testing.expectEqual(groupsOfVidmantas[1], g_service_account);
}