1
Fork 0

packing shell sections

This commit is contained in:
Motiejus Jakštys 2022-03-03 18:05:46 +02:00 committed by Motiejus Jakštys
parent e1bdb6c529
commit a4e3e08f5f
4 changed files with 104 additions and 37 deletions

View File

@ -354,24 +354,24 @@ STATUS SECTION SIZE DESCRIPTION
✅ bdz_gid ? bdz(gid) ✅ bdz_gid ? bdz(gid)
✅ bdz_groupname ? bdz(groupname) ✅ bdz_groupname ? bdz(groupname)
✅ bdz_uid ? bdz(uid) ✅ bdz_uid ? bdz(uid)
✅ bdz_name ? bdz(username) ✅ bdz_username ? bdz(username)
idx_gid2group len(group)*29/8 bdz->offset Groups idx_gid2group len(group)*29/8 bdz->offset Groups
idx_groupname2group len(group)*29/8 bdz->offset Groups idx_groupname2group len(group)*29/8 bdz->offset Groups
idx_uid2user len(user)*29/8 bdz->offset Users idx_uid2user len(user)*29/8 bdz->offset Users
idx_name2user len(user)*29/8 bdz->offset Users idx_name2user len(user)*29/8 bdz->offset Users
idx_username2gids len(user)*29/8 bdz->offset UserGids idx_username2gids len(user)*29/8 bdz->offset UserGids
ShellIndex len(shells)*2 shell index array shellIndex len(shells)*2 shell index array
ShellBlob <= 4032 shell data blob (max 63*64 bytes) shellBlob <= 4032 shell data blob (max 63*64 bytes)
Groups ? packed Group entries (8b padding) groups ? packed Group entries (8b padding)
Users ? packed User entries (8b padding) users ? packed User entries (8b padding)
Groupmembers ? per-group memberlist (no padding) groupMembers ? per-group memberlist (no padding)
UserGids ? per-user gidlist entries (8b padding) userGids ? per-user gidlist entries (8b padding)
``` ```
Section creation order: Section creation order:
1. `bdz_*`. No depdendencies. 1. `bdz_*`. No depdendencies.
1. ShellIndex, ShellBlob. No dependencies. 1. `shellIndex`, `shellBlob`. No dependencies.
1. UserGids. No dependencies. 1. UserGids. No dependencies.
1. Users, but without `additional_gids_offset`. No dependencies. 1. Users, but without `additional_gids_offset`. No dependencies.
1. Groupmembers. Depends on Users, ex. `additional_gids_offset`. 1. Groupmembers. Depends on Users, ex. `additional_gids_offset`.

View File

@ -43,7 +43,7 @@ const Header = packed struct {
if (self.bom != Bom) { if (self.bom != Bom) {
return error.InvalidBom; return error.InvalidBom;
} }
if (self.num_shells > shell.MaxShells) { if (self.num_shells > shell.max_shells) {
return error.TooManyShells; return error.TooManyShells;
} }
@ -112,7 +112,7 @@ test "header pack, unpack and validation" {
{ {
var header = goodHeader; var header = goodHeader;
header.num_shells = shell.MaxShells + 1; header.num_shells = shell.max_shells + 1;
try testing.expectError(error.TooManyShells, Header.init(header.asArray())); try testing.expectError(error.TooManyShells, Header.init(header.asArray()));
} }

View File

@ -13,6 +13,7 @@ const BufSet = std.BufSet;
const pad = @import("padding.zig"); const pad = @import("padding.zig");
const compress = @import("compress.zig"); const compress = @import("compress.zig");
const shellImport = @import("shell.zig");
const userImport = @import("user.zig"); const userImport = @import("user.zig");
const groupImport = @import("group.zig"); const groupImport = @import("group.zig");
const cmph = @import("cmph.zig"); const cmph = @import("cmph.zig");
@ -185,9 +186,49 @@ pub const Sections = struct {
bytes: []const u8, bytes: []const u8,
}; };
const groupMembersErr = error{Overflow} || Allocator.Error; pub fn bdzGid(self: *const Sections) cmph.Error![]const u8 {
return try cmph.pack_u32(self.allocator, self.corpus.groupsMulti.items(.gid));
}
pub fn groupMembers(self: *const Sections) groupMembersErr!GroupMembers { pub fn bdzGroupname(self: *const Sections) cmph.Error![]const u8 {
return try cmph.pack_str(self.allocator, self.corpus.groupsMulti.items(.name));
}
pub fn bdzUid(self: *const Sections) cmph.Error![]const u8 {
return try cmph.pack_u32(self.allocator, self.corpus.usersMulti.items(.uid));
}
pub fn bdzUsername(self: *const Sections) cmph.Error![]const u8 {
return try cmph.pack_str(self.allocator, self.corpus.usersMulti.items(.name));
}
pub const ShellSections = struct {
index: []const u8,
blob: []const u8,
};
// TODO(motiejus) there are a few problems:
// - memory management for shell sections is a mess. Make it easier by ...
// - shell module should accept a list of shells and spit out two slices
// (allocated with a given allocator). There is too much dancing around
// here.
const shellSectionsErr = Allocator.Error || error{Overflow};
pub fn shellSections(self: *const Sections) shellSectionsErr!ShellSections {
var popcon = shellImport.ShellWriter.init(self.allocator);
defer popcon.deinit();
for (self.corpus.usersMulti.items(.shell)) |shell| {
try popcon.put(shell);
}
var sections = try popcon.toOwnedSections(shellImport.max_shells);
defer sections.deinit();
return ShellSections{
.index = try self.allocator.dupe(u8, sections.sectionIndex()),
.blob = try self.allocator.dupe(u8, sections.sectionBlob()),
};
}
pub fn groupMembers(self: *const Sections) Allocator.Error!GroupMembers {
var buf: [compress.maxVarintLen64]u8 = undefined; var buf: [compress.maxVarintLen64]u8 = undefined;
var offsets = ArrayListUnmanaged(usize).initCapacity( var offsets = ArrayListUnmanaged(usize).initCapacity(
self.allocator, self.allocator,
@ -198,7 +239,7 @@ pub const Sections = struct {
for (self.corpus.groups) |group, i| { for (self.corpus.groups) |group, i| {
offsets[i] = offset; offsets[i] = offset;
const users = self.corpus.groupname2users.get(group.name).?; const users = self.corpus.groupname2users.get(group.name).?;
const len = try compress.putVarint(&buf, users.len); const len = compress.putVarint(&buf, users.len);
offset += len; offset += len;
try bytes.appendSlice(buf[0..len]); try bytes.appendSlice(buf[0..len]);
for (users) |user| { for (users) |user| {
@ -332,6 +373,30 @@ test "test corpus" {
try testing.expectEqual(corpus.username2groups.get("404"), null); try testing.expectEqual(corpus.username2groups.get("404"), null);
} }
test "test sections" {
const allocator = testing.allocator;
var corpus = try testCorpus(allocator);
defer corpus.deinit();
var sections = Sections.init(allocator, &corpus);
const bdz_gid = try sections.bdzGid();
defer allocator.free(bdz_gid);
const bdz_groupname = try sections.bdzGroupname();
defer allocator.free(bdz_groupname);
const bdz_uid = try sections.bdzUid();
defer allocator.free(bdz_uid);
const bdz_username = try sections.bdzUsername();
defer allocator.free(bdz_username);
const shellSections = try sections.shellSections();
defer allocator.free(shellSections.index);
defer allocator.free(shellSections.blob);
}
test "pack gids" { test "pack gids" {
const allocator = testing.allocator; const allocator = testing.allocator;
var corpus = try testCorpus(allocator); var corpus = try testCorpus(allocator);

View File

@ -7,11 +7,22 @@ const StringHashMap = std.StringHashMap;
const BoundedArray = std.BoundedArray; const BoundedArray = std.BoundedArray;
const StringContext = std.hash_map.StringContext; const StringContext = std.hash_map.StringContext;
// MaxShells is the maximum number of "popular" shells. // maxShells is the maximum number of "popular" shells.
pub const MaxShells = 63; pub const max_shells = 63;
pub const MaxShellLen = 64; pub const max_shell_len = 64;
const ShellAlignment = 2; // bits const ShellAlignment = 2; // bits
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
// to 4 bytes.
// The actual shell length is len+1: we don't allow empty shells, and the real
// length of the shell is 1-64 bytes.
const ShellIndex = packed struct {
offset: u10,
len: u6,
};
// ShellReader interprets "Shell Index" and "Shell Blob" sections. // ShellReader interprets "Shell Index" and "Shell Blob" sections.
pub const ShellReader = struct { pub const ShellReader = struct {
sectionIndex: []const ShellIndex, sectionIndex: []const ShellIndex,
@ -44,19 +55,20 @@ pub const ShellWriter = struct {
}; };
const ShellSections = struct { const ShellSections = struct {
index: BoundedArray(ShellIndex, MaxShells), index: BoundedArray(ShellIndex, max_shells),
blob: BoundedArray(u8, MaxShells * MaxShellLen), blob: BoundedArray(u8, max_shells * max_shell_len),
indices: StringHashMap(u6), indices: StringHashMap(u6),
// initializes and populates shell sections. All strings are copied, // initializes and populates shell sections. All strings are copied,
// nothing is owned. // nothing is owned.
pub const initErr = Allocator.Error || error{Overflow};
pub fn init( pub fn init(
allocator: Allocator, allocator: Allocator,
shells: BoundedArray([]const u8, MaxShells), shells: BoundedArray([]const u8, max_shells),
) !ShellSections { ) initErr!ShellSections {
var self = ShellSections{ var self = ShellSections{
.index = try BoundedArray(ShellIndex, MaxShells).init(shells.len), .index = try BoundedArray(ShellIndex, max_shells).init(shells.len),
.blob = try BoundedArray(u8, MaxShells * MaxShellLen).init(0), .blob = try BoundedArray(u8, max_shells * max_shell_len).init(0),
.indices = StringHashMap(u6).init(allocator), .indices = StringHashMap(u6).init(allocator),
}; };
var fullOffset: u12 = 0; var fullOffset: u12 = 0;
@ -132,7 +144,8 @@ pub const ShellWriter = struct {
// toOwnedSections returns the analyzed ShellSections. Resets the shell // toOwnedSections returns the analyzed ShellSections. Resets the shell
// popularity contest. ShellSections memory is allocated by the ShellWriter // popularity contest. ShellSections memory is allocated by the ShellWriter
// allocator, and must be deInit'ed by the caller. // allocator, and must be deInit'ed by the caller.
pub fn toOwnedSections(self: *ShellWriter, limit: u10) !ShellSections { const toOwnedSectionsErr = Allocator.Error || error{Overflow};
pub fn toOwnedSections(self: *ShellWriter, limit: u10) toOwnedSectionsErr!ShellSections {
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {}); var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
defer deque.deinit(); defer deque.deinit();
@ -145,7 +158,7 @@ pub const ShellWriter = struct {
} }
const total = std.math.min(deque.count(), limit); const total = std.math.min(deque.count(), limit);
var topShells = try BoundedArray([]const u8, MaxShells).init(total); var topShells = try BoundedArray([]const u8, max_shells).init(total);
var i: u32 = 0; var i: u32 = 0;
while (i < total) : (i += 1) { while (i < total) : (i += 1) {
@ -161,17 +174,6 @@ pub const ShellWriter = struct {
} }
}; };
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
// to 4 bytes.
// The actual shell length is len+1: we don't allow empty shells, and the real
// length of the shell is 1-64 bytes.
const ShellIndex = packed struct {
offset: u10,
len: u6,
};
const testing = std.testing; const testing = std.testing;
test "basic shellpopcon" { test "basic shellpopcon" {
@ -192,7 +194,7 @@ test "basic shellpopcon" {
try popcon.put(shell); try popcon.put(shell);
} }
var sections = try popcon.toOwnedSections(MaxShells); var sections = try popcon.toOwnedSections(max_shells);
defer sections.deinit(); defer sections.deinit();
try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify