packing shell sections
This commit is contained in:
parent
e1bdb6c529
commit
a4e3e08f5f
18
README.md
18
README.md
|
@ -354,24 +354,24 @@ STATUS SECTION SIZE DESCRIPTION
|
||||||
✅ bdz_gid ? bdz(gid)
|
✅ bdz_gid ? bdz(gid)
|
||||||
✅ bdz_groupname ? bdz(groupname)
|
✅ bdz_groupname ? bdz(groupname)
|
||||||
✅ bdz_uid ? bdz(uid)
|
✅ bdz_uid ? bdz(uid)
|
||||||
✅ bdz_name ? bdz(username)
|
✅ bdz_username ? bdz(username)
|
||||||
idx_gid2group len(group)*29/8 bdz->offset Groups
|
idx_gid2group len(group)*29/8 bdz->offset Groups
|
||||||
idx_groupname2group len(group)*29/8 bdz->offset Groups
|
idx_groupname2group len(group)*29/8 bdz->offset Groups
|
||||||
idx_uid2user len(user)*29/8 bdz->offset Users
|
idx_uid2user len(user)*29/8 bdz->offset Users
|
||||||
idx_name2user len(user)*29/8 bdz->offset Users
|
idx_name2user len(user)*29/8 bdz->offset Users
|
||||||
idx_username2gids len(user)*29/8 bdz->offset UserGids
|
idx_username2gids len(user)*29/8 bdz->offset UserGids
|
||||||
✅ ShellIndex len(shells)*2 shell index array
|
✅ shellIndex len(shells)*2 shell index array
|
||||||
✅ ShellBlob <= 4032 shell data blob (max 63*64 bytes)
|
✅ shellBlob <= 4032 shell data blob (max 63*64 bytes)
|
||||||
✅ Groups ? packed Group entries (8b padding)
|
✅ groups ? packed Group entries (8b padding)
|
||||||
✅ Users ? packed User entries (8b padding)
|
✅ users ? packed User entries (8b padding)
|
||||||
Groupmembers ? per-group memberlist (no padding)
|
groupMembers ? per-group memberlist (no padding)
|
||||||
UserGids ? per-user gidlist entries (8b padding)
|
userGids ? per-user gidlist entries (8b padding)
|
||||||
```
|
```
|
||||||
|
|
||||||
Section creation order:
|
Section creation order:
|
||||||
|
|
||||||
1. `bdz_*`. No depdendencies.
|
1. ✅ `bdz_*`. No depdendencies.
|
||||||
1. ShellIndex, ShellBlob. No dependencies.
|
1. ✅ `shellIndex`, `shellBlob`. No dependencies.
|
||||||
1. UserGids. No dependencies.
|
1. UserGids. No dependencies.
|
||||||
1. Users, but without `additional_gids_offset`. No dependencies.
|
1. Users, but without `additional_gids_offset`. No dependencies.
|
||||||
1. Groupmembers. Depends on Users, ex. `additional_gids_offset`.
|
1. Groupmembers. Depends on Users, ex. `additional_gids_offset`.
|
||||||
|
|
|
@ -43,7 +43,7 @@ const Header = packed struct {
|
||||||
if (self.bom != Bom) {
|
if (self.bom != Bom) {
|
||||||
return error.InvalidBom;
|
return error.InvalidBom;
|
||||||
}
|
}
|
||||||
if (self.num_shells > shell.MaxShells) {
|
if (self.num_shells > shell.max_shells) {
|
||||||
return error.TooManyShells;
|
return error.TooManyShells;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,7 +112,7 @@ test "header pack, unpack and validation" {
|
||||||
|
|
||||||
{
|
{
|
||||||
var header = goodHeader;
|
var header = goodHeader;
|
||||||
header.num_shells = shell.MaxShells + 1;
|
header.num_shells = shell.max_shells + 1;
|
||||||
try testing.expectError(error.TooManyShells, Header.init(header.asArray()));
|
try testing.expectError(error.TooManyShells, Header.init(header.asArray()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ const BufSet = std.BufSet;
|
||||||
|
|
||||||
const pad = @import("padding.zig");
|
const pad = @import("padding.zig");
|
||||||
const compress = @import("compress.zig");
|
const compress = @import("compress.zig");
|
||||||
|
const shellImport = @import("shell.zig");
|
||||||
const userImport = @import("user.zig");
|
const userImport = @import("user.zig");
|
||||||
const groupImport = @import("group.zig");
|
const groupImport = @import("group.zig");
|
||||||
const cmph = @import("cmph.zig");
|
const cmph = @import("cmph.zig");
|
||||||
|
@ -185,9 +186,49 @@ pub const Sections = struct {
|
||||||
bytes: []const u8,
|
bytes: []const u8,
|
||||||
};
|
};
|
||||||
|
|
||||||
const groupMembersErr = error{Overflow} || Allocator.Error;
|
pub fn bdzGid(self: *const Sections) cmph.Error![]const u8 {
|
||||||
|
return try cmph.pack_u32(self.allocator, self.corpus.groupsMulti.items(.gid));
|
||||||
|
}
|
||||||
|
|
||||||
pub fn groupMembers(self: *const Sections) groupMembersErr!GroupMembers {
|
pub fn bdzGroupname(self: *const Sections) cmph.Error![]const u8 {
|
||||||
|
return try cmph.pack_str(self.allocator, self.corpus.groupsMulti.items(.name));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn bdzUid(self: *const Sections) cmph.Error![]const u8 {
|
||||||
|
return try cmph.pack_u32(self.allocator, self.corpus.usersMulti.items(.uid));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn bdzUsername(self: *const Sections) cmph.Error![]const u8 {
|
||||||
|
return try cmph.pack_str(self.allocator, self.corpus.usersMulti.items(.name));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const ShellSections = struct {
|
||||||
|
index: []const u8,
|
||||||
|
blob: []const u8,
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO(motiejus) there are a few problems:
|
||||||
|
// - memory management for shell sections is a mess. Make it easier by ...
|
||||||
|
// - shell module should accept a list of shells and spit out two slices
|
||||||
|
// (allocated with a given allocator). There is too much dancing around
|
||||||
|
// here.
|
||||||
|
const shellSectionsErr = Allocator.Error || error{Overflow};
|
||||||
|
pub fn shellSections(self: *const Sections) shellSectionsErr!ShellSections {
|
||||||
|
var popcon = shellImport.ShellWriter.init(self.allocator);
|
||||||
|
defer popcon.deinit();
|
||||||
|
for (self.corpus.usersMulti.items(.shell)) |shell| {
|
||||||
|
try popcon.put(shell);
|
||||||
|
}
|
||||||
|
var sections = try popcon.toOwnedSections(shellImport.max_shells);
|
||||||
|
defer sections.deinit();
|
||||||
|
|
||||||
|
return ShellSections{
|
||||||
|
.index = try self.allocator.dupe(u8, sections.sectionIndex()),
|
||||||
|
.blob = try self.allocator.dupe(u8, sections.sectionBlob()),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn groupMembers(self: *const Sections) Allocator.Error!GroupMembers {
|
||||||
var buf: [compress.maxVarintLen64]u8 = undefined;
|
var buf: [compress.maxVarintLen64]u8 = undefined;
|
||||||
var offsets = ArrayListUnmanaged(usize).initCapacity(
|
var offsets = ArrayListUnmanaged(usize).initCapacity(
|
||||||
self.allocator,
|
self.allocator,
|
||||||
|
@ -198,7 +239,7 @@ pub const Sections = struct {
|
||||||
for (self.corpus.groups) |group, i| {
|
for (self.corpus.groups) |group, i| {
|
||||||
offsets[i] = offset;
|
offsets[i] = offset;
|
||||||
const users = self.corpus.groupname2users.get(group.name).?;
|
const users = self.corpus.groupname2users.get(group.name).?;
|
||||||
const len = try compress.putVarint(&buf, users.len);
|
const len = compress.putVarint(&buf, users.len);
|
||||||
offset += len;
|
offset += len;
|
||||||
try bytes.appendSlice(buf[0..len]);
|
try bytes.appendSlice(buf[0..len]);
|
||||||
for (users) |user| {
|
for (users) |user| {
|
||||||
|
@ -332,6 +373,30 @@ test "test corpus" {
|
||||||
try testing.expectEqual(corpus.username2groups.get("404"), null);
|
try testing.expectEqual(corpus.username2groups.get("404"), null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "test sections" {
|
||||||
|
const allocator = testing.allocator;
|
||||||
|
var corpus = try testCorpus(allocator);
|
||||||
|
defer corpus.deinit();
|
||||||
|
|
||||||
|
var sections = Sections.init(allocator, &corpus);
|
||||||
|
|
||||||
|
const bdz_gid = try sections.bdzGid();
|
||||||
|
defer allocator.free(bdz_gid);
|
||||||
|
|
||||||
|
const bdz_groupname = try sections.bdzGroupname();
|
||||||
|
defer allocator.free(bdz_groupname);
|
||||||
|
|
||||||
|
const bdz_uid = try sections.bdzUid();
|
||||||
|
defer allocator.free(bdz_uid);
|
||||||
|
|
||||||
|
const bdz_username = try sections.bdzUsername();
|
||||||
|
defer allocator.free(bdz_username);
|
||||||
|
|
||||||
|
const shellSections = try sections.shellSections();
|
||||||
|
defer allocator.free(shellSections.index);
|
||||||
|
defer allocator.free(shellSections.blob);
|
||||||
|
}
|
||||||
|
|
||||||
test "pack gids" {
|
test "pack gids" {
|
||||||
const allocator = testing.allocator;
|
const allocator = testing.allocator;
|
||||||
var corpus = try testCorpus(allocator);
|
var corpus = try testCorpus(allocator);
|
||||||
|
|
|
@ -7,11 +7,22 @@ const StringHashMap = std.StringHashMap;
|
||||||
const BoundedArray = std.BoundedArray;
|
const BoundedArray = std.BoundedArray;
|
||||||
const StringContext = std.hash_map.StringContext;
|
const StringContext = std.hash_map.StringContext;
|
||||||
|
|
||||||
// MaxShells is the maximum number of "popular" shells.
|
// maxShells is the maximum number of "popular" shells.
|
||||||
pub const MaxShells = 63;
|
pub const max_shells = 63;
|
||||||
pub const MaxShellLen = 64;
|
pub const max_shell_len = 64;
|
||||||
const ShellAlignment = 2; // bits
|
const ShellAlignment = 2; // bits
|
||||||
|
|
||||||
|
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
|
||||||
|
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
|
||||||
|
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
|
||||||
|
// to 4 bytes.
|
||||||
|
// The actual shell length is len+1: we don't allow empty shells, and the real
|
||||||
|
// length of the shell is 1-64 bytes.
|
||||||
|
const ShellIndex = packed struct {
|
||||||
|
offset: u10,
|
||||||
|
len: u6,
|
||||||
|
};
|
||||||
|
|
||||||
// ShellReader interprets "Shell Index" and "Shell Blob" sections.
|
// ShellReader interprets "Shell Index" and "Shell Blob" sections.
|
||||||
pub const ShellReader = struct {
|
pub const ShellReader = struct {
|
||||||
sectionIndex: []const ShellIndex,
|
sectionIndex: []const ShellIndex,
|
||||||
|
@ -44,19 +55,20 @@ pub const ShellWriter = struct {
|
||||||
};
|
};
|
||||||
|
|
||||||
const ShellSections = struct {
|
const ShellSections = struct {
|
||||||
index: BoundedArray(ShellIndex, MaxShells),
|
index: BoundedArray(ShellIndex, max_shells),
|
||||||
blob: BoundedArray(u8, MaxShells * MaxShellLen),
|
blob: BoundedArray(u8, max_shells * max_shell_len),
|
||||||
indices: StringHashMap(u6),
|
indices: StringHashMap(u6),
|
||||||
|
|
||||||
// initializes and populates shell sections. All strings are copied,
|
// initializes and populates shell sections. All strings are copied,
|
||||||
// nothing is owned.
|
// nothing is owned.
|
||||||
|
pub const initErr = Allocator.Error || error{Overflow};
|
||||||
pub fn init(
|
pub fn init(
|
||||||
allocator: Allocator,
|
allocator: Allocator,
|
||||||
shells: BoundedArray([]const u8, MaxShells),
|
shells: BoundedArray([]const u8, max_shells),
|
||||||
) !ShellSections {
|
) initErr!ShellSections {
|
||||||
var self = ShellSections{
|
var self = ShellSections{
|
||||||
.index = try BoundedArray(ShellIndex, MaxShells).init(shells.len),
|
.index = try BoundedArray(ShellIndex, max_shells).init(shells.len),
|
||||||
.blob = try BoundedArray(u8, MaxShells * MaxShellLen).init(0),
|
.blob = try BoundedArray(u8, max_shells * max_shell_len).init(0),
|
||||||
.indices = StringHashMap(u6).init(allocator),
|
.indices = StringHashMap(u6).init(allocator),
|
||||||
};
|
};
|
||||||
var fullOffset: u12 = 0;
|
var fullOffset: u12 = 0;
|
||||||
|
@ -132,7 +144,8 @@ pub const ShellWriter = struct {
|
||||||
// toOwnedSections returns the analyzed ShellSections. Resets the shell
|
// toOwnedSections returns the analyzed ShellSections. Resets the shell
|
||||||
// popularity contest. ShellSections memory is allocated by the ShellWriter
|
// popularity contest. ShellSections memory is allocated by the ShellWriter
|
||||||
// allocator, and must be deInit'ed by the caller.
|
// allocator, and must be deInit'ed by the caller.
|
||||||
pub fn toOwnedSections(self: *ShellWriter, limit: u10) !ShellSections {
|
const toOwnedSectionsErr = Allocator.Error || error{Overflow};
|
||||||
|
pub fn toOwnedSections(self: *ShellWriter, limit: u10) toOwnedSectionsErr!ShellSections {
|
||||||
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
|
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
|
||||||
defer deque.deinit();
|
defer deque.deinit();
|
||||||
|
|
||||||
|
@ -145,7 +158,7 @@ pub const ShellWriter = struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
const total = std.math.min(deque.count(), limit);
|
const total = std.math.min(deque.count(), limit);
|
||||||
var topShells = try BoundedArray([]const u8, MaxShells).init(total);
|
var topShells = try BoundedArray([]const u8, max_shells).init(total);
|
||||||
|
|
||||||
var i: u32 = 0;
|
var i: u32 = 0;
|
||||||
while (i < total) : (i += 1) {
|
while (i < total) : (i += 1) {
|
||||||
|
@ -161,17 +174,6 @@ pub const ShellWriter = struct {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
|
|
||||||
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
|
|
||||||
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
|
|
||||||
// to 4 bytes.
|
|
||||||
// The actual shell length is len+1: we don't allow empty shells, and the real
|
|
||||||
// length of the shell is 1-64 bytes.
|
|
||||||
const ShellIndex = packed struct {
|
|
||||||
offset: u10,
|
|
||||||
len: u6,
|
|
||||||
};
|
|
||||||
|
|
||||||
const testing = std.testing;
|
const testing = std.testing;
|
||||||
|
|
||||||
test "basic shellpopcon" {
|
test "basic shellpopcon" {
|
||||||
|
@ -192,7 +194,7 @@ test "basic shellpopcon" {
|
||||||
try popcon.put(shell);
|
try popcon.put(shell);
|
||||||
}
|
}
|
||||||
|
|
||||||
var sections = try popcon.toOwnedSections(MaxShells);
|
var sections = try popcon.toOwnedSections(max_shells);
|
||||||
defer sections.deinit();
|
defer sections.deinit();
|
||||||
try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify
|
try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue