1
Fork 0

get rid of Corpus struct

it had some non-useful internal state
This commit is contained in:
Motiejus Jakštys 2022-03-04 11:01:19 +02:00 committed by Motiejus Jakštys
parent b81072f726
commit 87c424aab9
1 changed files with 124 additions and 131 deletions

View File

@ -173,133 +173,130 @@ const Corpus = struct {
}
};
pub const Sections = struct {
allocator: Allocator,
corpus: *const Corpus,
pub fn bdzGid(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_u32(allocator, corpus.groupsMulti.items(.gid));
}
pub fn init(allocator: Allocator, corpus: *const Corpus) Sections {
return Sections{
.allocator = allocator,
.corpus = corpus,
};
}
pub fn bdzGroupname(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_str(allocator, corpus.groupsMulti.items(.name));
}
pub const GroupMembers = struct {
offsets: []const usize,
bytes: []const u8,
};
pub fn bdzUid(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_u32(allocator, corpus.usersMulti.items(.uid));
}
pub fn bdzGid(self: *const Sections) cmph.Error![]const u8 {
return try cmph.pack_u32(self.allocator, self.corpus.groupsMulti.items(.gid));
}
pub fn bdzUsername(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_str(allocator, corpus.usersMulti.items(.name));
}
pub fn bdzGroupname(self: *const Sections) cmph.Error![]const u8 {
return try cmph.pack_str(self.allocator, self.corpus.groupsMulti.items(.name));
}
pub const ShellSections = struct {
index: []const u8,
blob: []const u8,
pub fn bdzUid(self: *const Sections) cmph.Error![]const u8 {
return try cmph.pack_u32(self.allocator, self.corpus.usersMulti.items(.uid));
}
pub fn bdzUsername(self: *const Sections) cmph.Error![]const u8 {
return try cmph.pack_str(self.allocator, self.corpus.usersMulti.items(.name));
}
pub const ShellSections = struct {
index: []const u8,
blob: []const u8,
};
// TODO(motiejus) there are a few problems:
// - memory management for shell sections is a mess. Make it easier by ...
// - shell module should accept a list of shells and spit out two slices
// (allocated with a given allocator). There is too much dancing around
// here.
const shellSectionsErr = Allocator.Error || error{Overflow};
pub fn shellSections(self: *const Sections) shellSectionsErr!ShellSections {
var popcon = shellImport.ShellWriter.init(self.allocator);
defer popcon.deinit();
for (self.corpus.usersMulti.items(.shell)) |shell| {
try popcon.put(shell);
}
var sections = try popcon.toOwnedSections(shellImport.max_shells);
defer sections.deinit();
return ShellSections{
.index = try self.allocator.dupe(u8, sections.sectionIndex()),
.blob = try self.allocator.dupe(u8, sections.sectionBlob()),
};
}
pub const UserGids = struct {
// username -> offset in blob
name2offset: StringHashMap(u32),
// compressed user gids blob. A blob contains N <= users.len items,
// an item is:
// len: varint
// gid: [varint]varint,
// ... and the gid list is delta-compressed.
blob: []u8,
};
const userGidsErr = Allocator.Error || error{Overflow};
pub fn userGids(self: *const Sections) userGidsErr!UserGids {
var arena = std.heap.ArenaAllocator.init(self.allocator);
defer arena.deinit();
var blob = ArrayList(u8).init(self.allocator);
var name2offset = StringHashMap(u32).init(self.allocator);
for (self.corpus.users) |user| {
const usergroups_maybe = self.corpus.username2groups.get(user.name);
if (usergroups_maybe == null)
continue;
const usergroups = usergroups_maybe.?;
try name2offset.putNoClobber(user.name, try math.cast(u32, blob.items.len));
var userBlob = try ArrayList(u8).initCapacity(arena.allocator(), usergroups.len * 2);
var deltaCompressedGids = try arena.allocator().alloc(u32, usergroups.len);
deltaCompressedGids.len = usergroups.len;
for (usergroups) |group, i| {
deltaCompressedGids[i] = group.gid;
}
compress.deltaCompress(u32, deltaCompressedGids) catch |err| switch (err) {
error.NotSorted => unreachable,
};
try compress.appendUvarint(&userBlob, usergroups.len);
for (deltaCompressedGids) |gid| {
try compress.appendUvarint(&userBlob, gid);
}
try blob.appendSlice(userBlob.toOwnedSlice());
}
return UserGids{
.name2offset = name2offset,
.blob = blob.toOwnedSlice(),
};
}
pub fn groupMembers(self: *const Sections) Allocator.Error!GroupMembers {
var buf: [compress.maxVarintLen64]u8 = undefined;
var offsets = ArrayListUnmanaged(usize).initCapacity(
self.allocator,
self.corpus.groups.len,
);
var bytes = ArrayList(u8).init(self.allocator);
var offset: usize = 0;
for (self.corpus.groups) |group, i| {
offsets[i] = offset;
const users = self.corpus.groupname2users.get(group.name).?;
const len = compress.putVarint(&buf, users.len);
offset += len;
try bytes.appendSlice(buf[0..len]);
for (users) |user| {
// TODO: offset into the User's record
_ = user;
}
}
pub fn deinit(self: *ShellSections, allocator: Allocator) void {
allocator.free(self.index);
allocator.free(self.blob);
self.* = undefined;
}
};
// TODO(motiejus) there are a few problems:
// - memory management for shell sections is a mess. Make it easier by ...
// - shell module should accept a list of shells and spit out two slices
// (allocated with a given allocator). There is too much dancing around
// here.
const shellSectionsErr = Allocator.Error || error{Overflow};
pub fn shellSections(
allocator: Allocator,
corpus: *const Corpus,
) shellSectionsErr!ShellSections {
var popcon = shellImport.ShellWriter.init(allocator);
defer popcon.deinit();
for (corpus.usersMulti.items(.shell)) |shell| {
try popcon.put(shell);
}
var sections = try popcon.toOwnedSections(shellImport.max_shells);
defer sections.deinit();
return ShellSections{
.index = try allocator.dupe(u8, sections.sectionIndex()),
.blob = try allocator.dupe(u8, sections.sectionBlob()),
};
}
pub const UserGids = struct {
// username -> offset in blob
name2offset: StringHashMap(u32),
// compressed user gids blob. A blob contains N <= users.len items,
// an item is:
// len: varint
// gid: [varint]varint,
// ... and the gid list is delta-compressed.
blob: []u8,
pub fn deinit(self: *UserGids, allocator: Allocator) void {
self.name2offset.deinit();
allocator.free(self.blob);
self.* = undefined;
}
};
const userGidsErr = Allocator.Error || error{Overflow};
pub fn userGids(allocator: Allocator, corpus: *const Corpus) userGidsErr!UserGids {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
var blob = ArrayList(u8).init(allocator);
var name2offset = StringHashMap(u32).init(allocator);
for (corpus.users) |user| {
const usergroups_maybe = corpus.username2groups.get(user.name);
if (usergroups_maybe == null)
continue;
const usergroups = usergroups_maybe.?;
try name2offset.putNoClobber(user.name, try math.cast(u32, blob.items.len));
var deltaCompressedGids = try arena.allocator().alloc(u32, usergroups.len);
deltaCompressedGids.len = usergroups.len;
for (usergroups) |group, i| {
deltaCompressedGids[i] = group.gid;
}
compress.deltaCompress(u32, deltaCompressedGids) catch |err| switch (err) {
error.NotSorted => unreachable,
};
try compress.appendUvarint(&blob, usergroups.len);
for (deltaCompressedGids) |gid| {
try compress.appendUvarint(&blob, gid);
}
}
return UserGids{
.name2offset = name2offset,
.blob = blob.toOwnedSlice(),
};
}
pub fn groupMembers(allocator: Allocator, corpus: *const Corpus) Allocator.Error!void {
var buf: [compress.maxVarintLen64]u8 = undefined;
var offsets = ArrayListUnmanaged(usize).initCapacity(
allocator,
corpus.groups.len,
);
var bytes = ArrayList(u8).init(allocator);
var offset: usize = 0;
for (corpus.groups) |group, i| {
offsets[i] = offset;
const users = corpus.groupname2users.get(group.name).?;
const len = compress.putVarint(&buf, users.len);
offset += len;
try bytes.appendSlice(buf[0..len]);
for (users) |user| {
// TODO: offset into the User's record
_ = user;
}
}
}
// cmpUser compares two users for sorting. By username's utf8 codepoints, ascending.
fn cmpUser(_: void, a: User, b: User) bool {
var utf8_a = (unicode.Utf8View.init(a.name) catch unreachable).iterator();
@ -437,27 +434,23 @@ test "test sections" {
var corpus = try testCorpus(allocator);
defer corpus.deinit();
var sections = Sections.init(allocator, &corpus);
const bdz_gid = try sections.bdzGid();
const bdz_gid = try bdzGid(allocator, &corpus);
defer allocator.free(bdz_gid);
const bdz_groupname = try sections.bdzGroupname();
const bdz_groupname = try bdzGroupname(allocator, &corpus);
defer allocator.free(bdz_groupname);
const bdz_uid = try sections.bdzUid();
const bdz_uid = try bdzUid(allocator, &corpus);
defer allocator.free(bdz_uid);
const bdz_username = try sections.bdzUsername();
const bdz_username = try bdzUsername(allocator, &corpus);
defer allocator.free(bdz_username);
const shell_sections = try sections.shellSections();
defer allocator.free(shell_sections.index);
defer allocator.free(shell_sections.blob);
var shell_sections = try shellSections(allocator, &corpus);
defer shell_sections.deinit(allocator);
var user_gids = try sections.userGids();
defer user_gids.name2offset.deinit();
defer allocator.free(user_gids.blob);
var user_gids = try userGids(allocator, &corpus);
defer user_gids.deinit(allocator);
}
test "pack gids" {