packing shell sections
This commit is contained in:
parent
e1bdb6c529
commit
a4e3e08f5f
18
README.md
18
README.md
@ -354,24 +354,24 @@ STATUS SECTION SIZE DESCRIPTION
|
||||
✅ bdz_gid ? bdz(gid)
|
||||
✅ bdz_groupname ? bdz(groupname)
|
||||
✅ bdz_uid ? bdz(uid)
|
||||
✅ bdz_name ? bdz(username)
|
||||
✅ bdz_username ? bdz(username)
|
||||
idx_gid2group len(group)*29/8 bdz->offset Groups
|
||||
idx_groupname2group len(group)*29/8 bdz->offset Groups
|
||||
idx_uid2user len(user)*29/8 bdz->offset Users
|
||||
idx_name2user len(user)*29/8 bdz->offset Users
|
||||
idx_username2gids len(user)*29/8 bdz->offset UserGids
|
||||
✅ ShellIndex len(shells)*2 shell index array
|
||||
✅ ShellBlob <= 4032 shell data blob (max 63*64 bytes)
|
||||
✅ Groups ? packed Group entries (8b padding)
|
||||
✅ Users ? packed User entries (8b padding)
|
||||
Groupmembers ? per-group memberlist (no padding)
|
||||
UserGids ? per-user gidlist entries (8b padding)
|
||||
✅ shellIndex len(shells)*2 shell index array
|
||||
✅ shellBlob <= 4032 shell data blob (max 63*64 bytes)
|
||||
✅ groups ? packed Group entries (8b padding)
|
||||
✅ users ? packed User entries (8b padding)
|
||||
groupMembers ? per-group memberlist (no padding)
|
||||
userGids ? per-user gidlist entries (8b padding)
|
||||
```
|
||||
|
||||
Section creation order:
|
||||
|
||||
1. `bdz_*`. No depdendencies.
|
||||
1. ShellIndex, ShellBlob. No dependencies.
|
||||
1. ✅ `bdz_*`. No depdendencies.
|
||||
1. ✅ `shellIndex`, `shellBlob`. No dependencies.
|
||||
1. UserGids. No dependencies.
|
||||
1. Users, but without `additional_gids_offset`. No dependencies.
|
||||
1. Groupmembers. Depends on Users, ex. `additional_gids_offset`.
|
||||
|
@ -43,7 +43,7 @@ const Header = packed struct {
|
||||
if (self.bom != Bom) {
|
||||
return error.InvalidBom;
|
||||
}
|
||||
if (self.num_shells > shell.MaxShells) {
|
||||
if (self.num_shells > shell.max_shells) {
|
||||
return error.TooManyShells;
|
||||
}
|
||||
|
||||
@ -112,7 +112,7 @@ test "header pack, unpack and validation" {
|
||||
|
||||
{
|
||||
var header = goodHeader;
|
||||
header.num_shells = shell.MaxShells + 1;
|
||||
header.num_shells = shell.max_shells + 1;
|
||||
try testing.expectError(error.TooManyShells, Header.init(header.asArray()));
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,7 @@ const BufSet = std.BufSet;
|
||||
|
||||
const pad = @import("padding.zig");
|
||||
const compress = @import("compress.zig");
|
||||
const shellImport = @import("shell.zig");
|
||||
const userImport = @import("user.zig");
|
||||
const groupImport = @import("group.zig");
|
||||
const cmph = @import("cmph.zig");
|
||||
@ -185,9 +186,49 @@ pub const Sections = struct {
|
||||
bytes: []const u8,
|
||||
};
|
||||
|
||||
const groupMembersErr = error{Overflow} || Allocator.Error;
|
||||
pub fn bdzGid(self: *const Sections) cmph.Error![]const u8 {
|
||||
return try cmph.pack_u32(self.allocator, self.corpus.groupsMulti.items(.gid));
|
||||
}
|
||||
|
||||
pub fn groupMembers(self: *const Sections) groupMembersErr!GroupMembers {
|
||||
pub fn bdzGroupname(self: *const Sections) cmph.Error![]const u8 {
|
||||
return try cmph.pack_str(self.allocator, self.corpus.groupsMulti.items(.name));
|
||||
}
|
||||
|
||||
pub fn bdzUid(self: *const Sections) cmph.Error![]const u8 {
|
||||
return try cmph.pack_u32(self.allocator, self.corpus.usersMulti.items(.uid));
|
||||
}
|
||||
|
||||
pub fn bdzUsername(self: *const Sections) cmph.Error![]const u8 {
|
||||
return try cmph.pack_str(self.allocator, self.corpus.usersMulti.items(.name));
|
||||
}
|
||||
|
||||
pub const ShellSections = struct {
|
||||
index: []const u8,
|
||||
blob: []const u8,
|
||||
};
|
||||
|
||||
// TODO(motiejus) there are a few problems:
|
||||
// - memory management for shell sections is a mess. Make it easier by ...
|
||||
// - shell module should accept a list of shells and spit out two slices
|
||||
// (allocated with a given allocator). There is too much dancing around
|
||||
// here.
|
||||
const shellSectionsErr = Allocator.Error || error{Overflow};
|
||||
pub fn shellSections(self: *const Sections) shellSectionsErr!ShellSections {
|
||||
var popcon = shellImport.ShellWriter.init(self.allocator);
|
||||
defer popcon.deinit();
|
||||
for (self.corpus.usersMulti.items(.shell)) |shell| {
|
||||
try popcon.put(shell);
|
||||
}
|
||||
var sections = try popcon.toOwnedSections(shellImport.max_shells);
|
||||
defer sections.deinit();
|
||||
|
||||
return ShellSections{
|
||||
.index = try self.allocator.dupe(u8, sections.sectionIndex()),
|
||||
.blob = try self.allocator.dupe(u8, sections.sectionBlob()),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn groupMembers(self: *const Sections) Allocator.Error!GroupMembers {
|
||||
var buf: [compress.maxVarintLen64]u8 = undefined;
|
||||
var offsets = ArrayListUnmanaged(usize).initCapacity(
|
||||
self.allocator,
|
||||
@ -198,7 +239,7 @@ pub const Sections = struct {
|
||||
for (self.corpus.groups) |group, i| {
|
||||
offsets[i] = offset;
|
||||
const users = self.corpus.groupname2users.get(group.name).?;
|
||||
const len = try compress.putVarint(&buf, users.len);
|
||||
const len = compress.putVarint(&buf, users.len);
|
||||
offset += len;
|
||||
try bytes.appendSlice(buf[0..len]);
|
||||
for (users) |user| {
|
||||
@ -332,6 +373,30 @@ test "test corpus" {
|
||||
try testing.expectEqual(corpus.username2groups.get("404"), null);
|
||||
}
|
||||
|
||||
test "test sections" {
|
||||
const allocator = testing.allocator;
|
||||
var corpus = try testCorpus(allocator);
|
||||
defer corpus.deinit();
|
||||
|
||||
var sections = Sections.init(allocator, &corpus);
|
||||
|
||||
const bdz_gid = try sections.bdzGid();
|
||||
defer allocator.free(bdz_gid);
|
||||
|
||||
const bdz_groupname = try sections.bdzGroupname();
|
||||
defer allocator.free(bdz_groupname);
|
||||
|
||||
const bdz_uid = try sections.bdzUid();
|
||||
defer allocator.free(bdz_uid);
|
||||
|
||||
const bdz_username = try sections.bdzUsername();
|
||||
defer allocator.free(bdz_username);
|
||||
|
||||
const shellSections = try sections.shellSections();
|
||||
defer allocator.free(shellSections.index);
|
||||
defer allocator.free(shellSections.blob);
|
||||
}
|
||||
|
||||
test "pack gids" {
|
||||
const allocator = testing.allocator;
|
||||
var corpus = try testCorpus(allocator);
|
||||
|
@ -7,11 +7,22 @@ const StringHashMap = std.StringHashMap;
|
||||
const BoundedArray = std.BoundedArray;
|
||||
const StringContext = std.hash_map.StringContext;
|
||||
|
||||
// MaxShells is the maximum number of "popular" shells.
|
||||
pub const MaxShells = 63;
|
||||
pub const MaxShellLen = 64;
|
||||
// maxShells is the maximum number of "popular" shells.
|
||||
pub const max_shells = 63;
|
||||
pub const max_shell_len = 64;
|
||||
const ShellAlignment = 2; // bits
|
||||
|
||||
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
|
||||
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
|
||||
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
|
||||
// to 4 bytes.
|
||||
// The actual shell length is len+1: we don't allow empty shells, and the real
|
||||
// length of the shell is 1-64 bytes.
|
||||
const ShellIndex = packed struct {
|
||||
offset: u10,
|
||||
len: u6,
|
||||
};
|
||||
|
||||
// ShellReader interprets "Shell Index" and "Shell Blob" sections.
|
||||
pub const ShellReader = struct {
|
||||
sectionIndex: []const ShellIndex,
|
||||
@ -44,19 +55,20 @@ pub const ShellWriter = struct {
|
||||
};
|
||||
|
||||
const ShellSections = struct {
|
||||
index: BoundedArray(ShellIndex, MaxShells),
|
||||
blob: BoundedArray(u8, MaxShells * MaxShellLen),
|
||||
index: BoundedArray(ShellIndex, max_shells),
|
||||
blob: BoundedArray(u8, max_shells * max_shell_len),
|
||||
indices: StringHashMap(u6),
|
||||
|
||||
// initializes and populates shell sections. All strings are copied,
|
||||
// nothing is owned.
|
||||
pub const initErr = Allocator.Error || error{Overflow};
|
||||
pub fn init(
|
||||
allocator: Allocator,
|
||||
shells: BoundedArray([]const u8, MaxShells),
|
||||
) !ShellSections {
|
||||
shells: BoundedArray([]const u8, max_shells),
|
||||
) initErr!ShellSections {
|
||||
var self = ShellSections{
|
||||
.index = try BoundedArray(ShellIndex, MaxShells).init(shells.len),
|
||||
.blob = try BoundedArray(u8, MaxShells * MaxShellLen).init(0),
|
||||
.index = try BoundedArray(ShellIndex, max_shells).init(shells.len),
|
||||
.blob = try BoundedArray(u8, max_shells * max_shell_len).init(0),
|
||||
.indices = StringHashMap(u6).init(allocator),
|
||||
};
|
||||
var fullOffset: u12 = 0;
|
||||
@ -132,7 +144,8 @@ pub const ShellWriter = struct {
|
||||
// toOwnedSections returns the analyzed ShellSections. Resets the shell
|
||||
// popularity contest. ShellSections memory is allocated by the ShellWriter
|
||||
// allocator, and must be deInit'ed by the caller.
|
||||
pub fn toOwnedSections(self: *ShellWriter, limit: u10) !ShellSections {
|
||||
const toOwnedSectionsErr = Allocator.Error || error{Overflow};
|
||||
pub fn toOwnedSections(self: *ShellWriter, limit: u10) toOwnedSectionsErr!ShellSections {
|
||||
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
|
||||
defer deque.deinit();
|
||||
|
||||
@ -145,7 +158,7 @@ pub const ShellWriter = struct {
|
||||
}
|
||||
|
||||
const total = std.math.min(deque.count(), limit);
|
||||
var topShells = try BoundedArray([]const u8, MaxShells).init(total);
|
||||
var topShells = try BoundedArray([]const u8, max_shells).init(total);
|
||||
|
||||
var i: u32 = 0;
|
||||
while (i < total) : (i += 1) {
|
||||
@ -161,17 +174,6 @@ pub const ShellWriter = struct {
|
||||
}
|
||||
};
|
||||
|
||||
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
|
||||
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
|
||||
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
|
||||
// to 4 bytes.
|
||||
// The actual shell length is len+1: we don't allow empty shells, and the real
|
||||
// length of the shell is 1-64 bytes.
|
||||
const ShellIndex = packed struct {
|
||||
offset: u10,
|
||||
len: u6,
|
||||
};
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
test "basic shellpopcon" {
|
||||
@ -192,7 +194,7 @@ test "basic shellpopcon" {
|
||||
try popcon.put(shell);
|
||||
}
|
||||
|
||||
var sections = try popcon.toOwnedSections(MaxShells);
|
||||
var sections = try popcon.toOwnedSections(max_shells);
|
||||
defer sections.deinit();
|
||||
try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user