From a4e3e08f5f15047d6f9f3404b2b76925ad2e2426 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Thu, 3 Mar 2022 18:05:46 +0200 Subject: [PATCH] packing shell sections --- README.md | 18 ++++++------ src/header.zig | 4 +-- src/sections.zig | 71 ++++++++++++++++++++++++++++++++++++++++++++++-- src/shell.zig | 48 ++++++++++++++++---------------- 4 files changed, 104 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index d8652fd..d6ec01b 100644 --- a/README.md +++ b/README.md @@ -354,24 +354,24 @@ STATUS SECTION SIZE DESCRIPTION ✅ bdz_gid ? bdz(gid) ✅ bdz_groupname ? bdz(groupname) ✅ bdz_uid ? bdz(uid) -✅ bdz_name ? bdz(username) +✅ bdz_username ? bdz(username) idx_gid2group len(group)*29/8 bdz->offset Groups idx_groupname2group len(group)*29/8 bdz->offset Groups idx_uid2user len(user)*29/8 bdz->offset Users idx_name2user len(user)*29/8 bdz->offset Users idx_username2gids len(user)*29/8 bdz->offset UserGids -✅ ShellIndex len(shells)*2 shell index array -✅ ShellBlob <= 4032 shell data blob (max 63*64 bytes) -✅ Groups ? packed Group entries (8b padding) -✅ Users ? packed User entries (8b padding) - Groupmembers ? per-group memberlist (no padding) - UserGids ? per-user gidlist entries (8b padding) +✅ shellIndex len(shells)*2 shell index array +✅ shellBlob <= 4032 shell data blob (max 63*64 bytes) +✅ groups ? packed Group entries (8b padding) +✅ users ? packed User entries (8b padding) + groupMembers ? per-group memberlist (no padding) + userGids ? per-user gidlist entries (8b padding) ``` Section creation order: -1. `bdz_*`. No depdendencies. -1. ShellIndex, ShellBlob. No dependencies. +1. ✅ `bdz_*`. No depdendencies. +1. ✅ `shellIndex`, `shellBlob`. No dependencies. 1. UserGids. No dependencies. 1. Users, but without `additional_gids_offset`. No dependencies. 1. Groupmembers. Depends on Users, ex. `additional_gids_offset`. diff --git a/src/header.zig b/src/header.zig index a141730..2eb49e9 100644 --- a/src/header.zig +++ b/src/header.zig @@ -43,7 +43,7 @@ const Header = packed struct { if (self.bom != Bom) { return error.InvalidBom; } - if (self.num_shells > shell.MaxShells) { + if (self.num_shells > shell.max_shells) { return error.TooManyShells; } @@ -112,7 +112,7 @@ test "header pack, unpack and validation" { { var header = goodHeader; - header.num_shells = shell.MaxShells + 1; + header.num_shells = shell.max_shells + 1; try testing.expectError(error.TooManyShells, Header.init(header.asArray())); } diff --git a/src/sections.zig b/src/sections.zig index 806e672..9c4c012 100644 --- a/src/sections.zig +++ b/src/sections.zig @@ -13,6 +13,7 @@ const BufSet = std.BufSet; const pad = @import("padding.zig"); const compress = @import("compress.zig"); +const shellImport = @import("shell.zig"); const userImport = @import("user.zig"); const groupImport = @import("group.zig"); const cmph = @import("cmph.zig"); @@ -185,9 +186,49 @@ pub const Sections = struct { bytes: []const u8, }; - const groupMembersErr = error{Overflow} || Allocator.Error; + pub fn bdzGid(self: *const Sections) cmph.Error![]const u8 { + return try cmph.pack_u32(self.allocator, self.corpus.groupsMulti.items(.gid)); + } - pub fn groupMembers(self: *const Sections) groupMembersErr!GroupMembers { + pub fn bdzGroupname(self: *const Sections) cmph.Error![]const u8 { + return try cmph.pack_str(self.allocator, self.corpus.groupsMulti.items(.name)); + } + + pub fn bdzUid(self: *const Sections) cmph.Error![]const u8 { + return try cmph.pack_u32(self.allocator, self.corpus.usersMulti.items(.uid)); + } + + pub fn bdzUsername(self: *const Sections) cmph.Error![]const u8 { + return try cmph.pack_str(self.allocator, self.corpus.usersMulti.items(.name)); + } + + pub const ShellSections = struct { + index: []const u8, + blob: []const u8, + }; + + // TODO(motiejus) there are a few problems: + // - memory management for shell sections is a mess. Make it easier by ... + // - shell module should accept a list of shells and spit out two slices + // (allocated with a given allocator). There is too much dancing around + // here. + const shellSectionsErr = Allocator.Error || error{Overflow}; + pub fn shellSections(self: *const Sections) shellSectionsErr!ShellSections { + var popcon = shellImport.ShellWriter.init(self.allocator); + defer popcon.deinit(); + for (self.corpus.usersMulti.items(.shell)) |shell| { + try popcon.put(shell); + } + var sections = try popcon.toOwnedSections(shellImport.max_shells); + defer sections.deinit(); + + return ShellSections{ + .index = try self.allocator.dupe(u8, sections.sectionIndex()), + .blob = try self.allocator.dupe(u8, sections.sectionBlob()), + }; + } + + pub fn groupMembers(self: *const Sections) Allocator.Error!GroupMembers { var buf: [compress.maxVarintLen64]u8 = undefined; var offsets = ArrayListUnmanaged(usize).initCapacity( self.allocator, @@ -198,7 +239,7 @@ pub const Sections = struct { for (self.corpus.groups) |group, i| { offsets[i] = offset; const users = self.corpus.groupname2users.get(group.name).?; - const len = try compress.putVarint(&buf, users.len); + const len = compress.putVarint(&buf, users.len); offset += len; try bytes.appendSlice(buf[0..len]); for (users) |user| { @@ -332,6 +373,30 @@ test "test corpus" { try testing.expectEqual(corpus.username2groups.get("404"), null); } +test "test sections" { + const allocator = testing.allocator; + var corpus = try testCorpus(allocator); + defer corpus.deinit(); + + var sections = Sections.init(allocator, &corpus); + + const bdz_gid = try sections.bdzGid(); + defer allocator.free(bdz_gid); + + const bdz_groupname = try sections.bdzGroupname(); + defer allocator.free(bdz_groupname); + + const bdz_uid = try sections.bdzUid(); + defer allocator.free(bdz_uid); + + const bdz_username = try sections.bdzUsername(); + defer allocator.free(bdz_username); + + const shellSections = try sections.shellSections(); + defer allocator.free(shellSections.index); + defer allocator.free(shellSections.blob); +} + test "pack gids" { const allocator = testing.allocator; var corpus = try testCorpus(allocator); diff --git a/src/shell.zig b/src/shell.zig index aaa591f..da54250 100644 --- a/src/shell.zig +++ b/src/shell.zig @@ -7,11 +7,22 @@ const StringHashMap = std.StringHashMap; const BoundedArray = std.BoundedArray; const StringContext = std.hash_map.StringContext; -// MaxShells is the maximum number of "popular" shells. -pub const MaxShells = 63; -pub const MaxShellLen = 64; +// maxShells is the maximum number of "popular" shells. +pub const max_shells = 63; +pub const max_shell_len = 64; const ShellAlignment = 2; // bits +// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes +// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset +// is 1<<12. To make location resolvable in 10 bits, all shells will be padded +// to 4 bytes. +// The actual shell length is len+1: we don't allow empty shells, and the real +// length of the shell is 1-64 bytes. +const ShellIndex = packed struct { + offset: u10, + len: u6, +}; + // ShellReader interprets "Shell Index" and "Shell Blob" sections. pub const ShellReader = struct { sectionIndex: []const ShellIndex, @@ -44,19 +55,20 @@ pub const ShellWriter = struct { }; const ShellSections = struct { - index: BoundedArray(ShellIndex, MaxShells), - blob: BoundedArray(u8, MaxShells * MaxShellLen), + index: BoundedArray(ShellIndex, max_shells), + blob: BoundedArray(u8, max_shells * max_shell_len), indices: StringHashMap(u6), // initializes and populates shell sections. All strings are copied, // nothing is owned. + pub const initErr = Allocator.Error || error{Overflow}; pub fn init( allocator: Allocator, - shells: BoundedArray([]const u8, MaxShells), - ) !ShellSections { + shells: BoundedArray([]const u8, max_shells), + ) initErr!ShellSections { var self = ShellSections{ - .index = try BoundedArray(ShellIndex, MaxShells).init(shells.len), - .blob = try BoundedArray(u8, MaxShells * MaxShellLen).init(0), + .index = try BoundedArray(ShellIndex, max_shells).init(shells.len), + .blob = try BoundedArray(u8, max_shells * max_shell_len).init(0), .indices = StringHashMap(u6).init(allocator), }; var fullOffset: u12 = 0; @@ -132,7 +144,8 @@ pub const ShellWriter = struct { // toOwnedSections returns the analyzed ShellSections. Resets the shell // popularity contest. ShellSections memory is allocated by the ShellWriter // allocator, and must be deInit'ed by the caller. - pub fn toOwnedSections(self: *ShellWriter, limit: u10) !ShellSections { + const toOwnedSectionsErr = Allocator.Error || error{Overflow}; + pub fn toOwnedSections(self: *ShellWriter, limit: u10) toOwnedSectionsErr!ShellSections { var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {}); defer deque.deinit(); @@ -145,7 +158,7 @@ pub const ShellWriter = struct { } const total = std.math.min(deque.count(), limit); - var topShells = try BoundedArray([]const u8, MaxShells).init(total); + var topShells = try BoundedArray([]const u8, max_shells).init(total); var i: u32 = 0; while (i < total) : (i += 1) { @@ -161,17 +174,6 @@ pub const ShellWriter = struct { } }; -// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes -// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset -// is 1<<12. To make location resolvable in 10 bits, all shells will be padded -// to 4 bytes. -// The actual shell length is len+1: we don't allow empty shells, and the real -// length of the shell is 1-64 bytes. -const ShellIndex = packed struct { - offset: u10, - len: u6, -}; - const testing = std.testing; test "basic shellpopcon" { @@ -192,7 +194,7 @@ test "basic shellpopcon" { try popcon.put(shell); } - var sections = try popcon.toOwnedSections(MaxShells); + var sections = try popcon.toOwnedSections(max_shells); defer sections.deinit(); try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify