get rid of Corpus struct

it had some non-useful internal state
This commit is contained in:
Motiejus Jakštys 2022-03-04 11:01:19 +02:00 committed by Motiejus Jakštys
parent b81072f726
commit 87c424aab9

View File

@ -173,133 +173,130 @@ const Corpus = struct {
} }
}; };
pub const Sections = struct { pub fn bdzGid(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
allocator: Allocator, return try cmph.pack_u32(allocator, corpus.groupsMulti.items(.gid));
corpus: *const Corpus, }
pub fn init(allocator: Allocator, corpus: *const Corpus) Sections { pub fn bdzGroupname(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return Sections{ return try cmph.pack_str(allocator, corpus.groupsMulti.items(.name));
.allocator = allocator, }
.corpus = corpus,
};
}
pub const GroupMembers = struct { pub fn bdzUid(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
offsets: []const usize, return try cmph.pack_u32(allocator, corpus.usersMulti.items(.uid));
bytes: []const u8, }
};
pub fn bdzGid(self: *const Sections) cmph.Error![]const u8 { pub fn bdzUsername(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 {
return try cmph.pack_u32(self.allocator, self.corpus.groupsMulti.items(.gid)); return try cmph.pack_str(allocator, corpus.usersMulti.items(.name));
} }
pub fn bdzGroupname(self: *const Sections) cmph.Error![]const u8 { pub const ShellSections = struct {
return try cmph.pack_str(self.allocator, self.corpus.groupsMulti.items(.name)); index: []const u8,
} blob: []const u8,
pub fn bdzUid(self: *const Sections) cmph.Error![]const u8 { pub fn deinit(self: *ShellSections, allocator: Allocator) void {
return try cmph.pack_u32(self.allocator, self.corpus.usersMulti.items(.uid)); allocator.free(self.index);
} allocator.free(self.blob);
self.* = undefined;
pub fn bdzUsername(self: *const Sections) cmph.Error![]const u8 {
return try cmph.pack_str(self.allocator, self.corpus.usersMulti.items(.name));
}
pub const ShellSections = struct {
index: []const u8,
blob: []const u8,
};
// TODO(motiejus) there are a few problems:
// - memory management for shell sections is a mess. Make it easier by ...
// - shell module should accept a list of shells and spit out two slices
// (allocated with a given allocator). There is too much dancing around
// here.
const shellSectionsErr = Allocator.Error || error{Overflow};
pub fn shellSections(self: *const Sections) shellSectionsErr!ShellSections {
var popcon = shellImport.ShellWriter.init(self.allocator);
defer popcon.deinit();
for (self.corpus.usersMulti.items(.shell)) |shell| {
try popcon.put(shell);
}
var sections = try popcon.toOwnedSections(shellImport.max_shells);
defer sections.deinit();
return ShellSections{
.index = try self.allocator.dupe(u8, sections.sectionIndex()),
.blob = try self.allocator.dupe(u8, sections.sectionBlob()),
};
}
pub const UserGids = struct {
// username -> offset in blob
name2offset: StringHashMap(u32),
// compressed user gids blob. A blob contains N <= users.len items,
// an item is:
// len: varint
// gid: [varint]varint,
// ... and the gid list is delta-compressed.
blob: []u8,
};
const userGidsErr = Allocator.Error || error{Overflow};
pub fn userGids(self: *const Sections) userGidsErr!UserGids {
var arena = std.heap.ArenaAllocator.init(self.allocator);
defer arena.deinit();
var blob = ArrayList(u8).init(self.allocator);
var name2offset = StringHashMap(u32).init(self.allocator);
for (self.corpus.users) |user| {
const usergroups_maybe = self.corpus.username2groups.get(user.name);
if (usergroups_maybe == null)
continue;
const usergroups = usergroups_maybe.?;
try name2offset.putNoClobber(user.name, try math.cast(u32, blob.items.len));
var userBlob = try ArrayList(u8).initCapacity(arena.allocator(), usergroups.len * 2);
var deltaCompressedGids = try arena.allocator().alloc(u32, usergroups.len);
deltaCompressedGids.len = usergroups.len;
for (usergroups) |group, i| {
deltaCompressedGids[i] = group.gid;
}
compress.deltaCompress(u32, deltaCompressedGids) catch |err| switch (err) {
error.NotSorted => unreachable,
};
try compress.appendUvarint(&userBlob, usergroups.len);
for (deltaCompressedGids) |gid| {
try compress.appendUvarint(&userBlob, gid);
}
try blob.appendSlice(userBlob.toOwnedSlice());
}
return UserGids{
.name2offset = name2offset,
.blob = blob.toOwnedSlice(),
};
}
pub fn groupMembers(self: *const Sections) Allocator.Error!GroupMembers {
var buf: [compress.maxVarintLen64]u8 = undefined;
var offsets = ArrayListUnmanaged(usize).initCapacity(
self.allocator,
self.corpus.groups.len,
);
var bytes = ArrayList(u8).init(self.allocator);
var offset: usize = 0;
for (self.corpus.groups) |group, i| {
offsets[i] = offset;
const users = self.corpus.groupname2users.get(group.name).?;
const len = compress.putVarint(&buf, users.len);
offset += len;
try bytes.appendSlice(buf[0..len]);
for (users) |user| {
// TODO: offset into the User's record
_ = user;
}
}
} }
}; };
// TODO(motiejus) there are a few problems:
// - memory management for shell sections is a mess. Make it easier by ...
// - shell module should accept a list of shells and spit out two slices
// (allocated with a given allocator). There is too much dancing around
// here.
const shellSectionsErr = Allocator.Error || error{Overflow};
pub fn shellSections(
allocator: Allocator,
corpus: *const Corpus,
) shellSectionsErr!ShellSections {
var popcon = shellImport.ShellWriter.init(allocator);
defer popcon.deinit();
for (corpus.usersMulti.items(.shell)) |shell| {
try popcon.put(shell);
}
var sections = try popcon.toOwnedSections(shellImport.max_shells);
defer sections.deinit();
return ShellSections{
.index = try allocator.dupe(u8, sections.sectionIndex()),
.blob = try allocator.dupe(u8, sections.sectionBlob()),
};
}
pub const UserGids = struct {
// username -> offset in blob
name2offset: StringHashMap(u32),
// compressed user gids blob. A blob contains N <= users.len items,
// an item is:
// len: varint
// gid: [varint]varint,
// ... and the gid list is delta-compressed.
blob: []u8,
pub fn deinit(self: *UserGids, allocator: Allocator) void {
self.name2offset.deinit();
allocator.free(self.blob);
self.* = undefined;
}
};
const userGidsErr = Allocator.Error || error{Overflow};
pub fn userGids(allocator: Allocator, corpus: *const Corpus) userGidsErr!UserGids {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
var blob = ArrayList(u8).init(allocator);
var name2offset = StringHashMap(u32).init(allocator);
for (corpus.users) |user| {
const usergroups_maybe = corpus.username2groups.get(user.name);
if (usergroups_maybe == null)
continue;
const usergroups = usergroups_maybe.?;
try name2offset.putNoClobber(user.name, try math.cast(u32, blob.items.len));
var deltaCompressedGids = try arena.allocator().alloc(u32, usergroups.len);
deltaCompressedGids.len = usergroups.len;
for (usergroups) |group, i| {
deltaCompressedGids[i] = group.gid;
}
compress.deltaCompress(u32, deltaCompressedGids) catch |err| switch (err) {
error.NotSorted => unreachable,
};
try compress.appendUvarint(&blob, usergroups.len);
for (deltaCompressedGids) |gid| {
try compress.appendUvarint(&blob, gid);
}
}
return UserGids{
.name2offset = name2offset,
.blob = blob.toOwnedSlice(),
};
}
pub fn groupMembers(allocator: Allocator, corpus: *const Corpus) Allocator.Error!void {
var buf: [compress.maxVarintLen64]u8 = undefined;
var offsets = ArrayListUnmanaged(usize).initCapacity(
allocator,
corpus.groups.len,
);
var bytes = ArrayList(u8).init(allocator);
var offset: usize = 0;
for (corpus.groups) |group, i| {
offsets[i] = offset;
const users = corpus.groupname2users.get(group.name).?;
const len = compress.putVarint(&buf, users.len);
offset += len;
try bytes.appendSlice(buf[0..len]);
for (users) |user| {
// TODO: offset into the User's record
_ = user;
}
}
}
// cmpUser compares two users for sorting. By username's utf8 codepoints, ascending. // cmpUser compares two users for sorting. By username's utf8 codepoints, ascending.
fn cmpUser(_: void, a: User, b: User) bool { fn cmpUser(_: void, a: User, b: User) bool {
var utf8_a = (unicode.Utf8View.init(a.name) catch unreachable).iterator(); var utf8_a = (unicode.Utf8View.init(a.name) catch unreachable).iterator();
@ -437,27 +434,23 @@ test "test sections" {
var corpus = try testCorpus(allocator); var corpus = try testCorpus(allocator);
defer corpus.deinit(); defer corpus.deinit();
var sections = Sections.init(allocator, &corpus); const bdz_gid = try bdzGid(allocator, &corpus);
const bdz_gid = try sections.bdzGid();
defer allocator.free(bdz_gid); defer allocator.free(bdz_gid);
const bdz_groupname = try sections.bdzGroupname(); const bdz_groupname = try bdzGroupname(allocator, &corpus);
defer allocator.free(bdz_groupname); defer allocator.free(bdz_groupname);
const bdz_uid = try sections.bdzUid(); const bdz_uid = try bdzUid(allocator, &corpus);
defer allocator.free(bdz_uid); defer allocator.free(bdz_uid);
const bdz_username = try sections.bdzUsername(); const bdz_username = try bdzUsername(allocator, &corpus);
defer allocator.free(bdz_username); defer allocator.free(bdz_username);
const shell_sections = try sections.shellSections(); var shell_sections = try shellSections(allocator, &corpus);
defer allocator.free(shell_sections.index); defer shell_sections.deinit(allocator);
defer allocator.free(shell_sections.blob);
var user_gids = try sections.userGids(); var user_gids = try userGids(allocator, &corpus);
defer user_gids.name2offset.deinit(); defer user_gids.deinit(allocator);
defer allocator.free(user_gids.blob);
} }
test "pack gids" { test "pack gids" {