From 87c424aab9ca9e415b58a14f2081d1fa2d025ae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 4 Mar 2022 11:01:19 +0200 Subject: [PATCH] get rid of Corpus struct it had some non-useful internal state --- src/sections.zig | 255 +++++++++++++++++++++++------------------------ 1 file changed, 124 insertions(+), 131 deletions(-) diff --git a/src/sections.zig b/src/sections.zig index 6fa1e04..93ee1da 100644 --- a/src/sections.zig +++ b/src/sections.zig @@ -173,133 +173,130 @@ const Corpus = struct { } }; -pub const Sections = struct { - allocator: Allocator, - corpus: *const Corpus, +pub fn bdzGid(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 { + return try cmph.pack_u32(allocator, corpus.groupsMulti.items(.gid)); +} - pub fn init(allocator: Allocator, corpus: *const Corpus) Sections { - return Sections{ - .allocator = allocator, - .corpus = corpus, - }; - } +pub fn bdzGroupname(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 { + return try cmph.pack_str(allocator, corpus.groupsMulti.items(.name)); +} - pub const GroupMembers = struct { - offsets: []const usize, - bytes: []const u8, - }; +pub fn bdzUid(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 { + return try cmph.pack_u32(allocator, corpus.usersMulti.items(.uid)); +} - pub fn bdzGid(self: *const Sections) cmph.Error![]const u8 { - return try cmph.pack_u32(self.allocator, self.corpus.groupsMulti.items(.gid)); - } +pub fn bdzUsername(allocator: Allocator, corpus: *const Corpus) cmph.Error![]const u8 { + return try cmph.pack_str(allocator, corpus.usersMulti.items(.name)); +} - pub fn bdzGroupname(self: *const Sections) cmph.Error![]const u8 { - return try cmph.pack_str(self.allocator, self.corpus.groupsMulti.items(.name)); - } +pub const ShellSections = struct { + index: []const u8, + blob: []const u8, - pub fn bdzUid(self: *const Sections) cmph.Error![]const u8 { - return try cmph.pack_u32(self.allocator, self.corpus.usersMulti.items(.uid)); - } - - pub fn bdzUsername(self: *const Sections) cmph.Error![]const u8 { - return try cmph.pack_str(self.allocator, self.corpus.usersMulti.items(.name)); - } - - pub const ShellSections = struct { - index: []const u8, - blob: []const u8, - }; - - // TODO(motiejus) there are a few problems: - // - memory management for shell sections is a mess. Make it easier by ... - // - shell module should accept a list of shells and spit out two slices - // (allocated with a given allocator). There is too much dancing around - // here. - const shellSectionsErr = Allocator.Error || error{Overflow}; - pub fn shellSections(self: *const Sections) shellSectionsErr!ShellSections { - var popcon = shellImport.ShellWriter.init(self.allocator); - defer popcon.deinit(); - for (self.corpus.usersMulti.items(.shell)) |shell| { - try popcon.put(shell); - } - var sections = try popcon.toOwnedSections(shellImport.max_shells); - defer sections.deinit(); - - return ShellSections{ - .index = try self.allocator.dupe(u8, sections.sectionIndex()), - .blob = try self.allocator.dupe(u8, sections.sectionBlob()), - }; - } - - pub const UserGids = struct { - // username -> offset in blob - name2offset: StringHashMap(u32), - // compressed user gids blob. A blob contains N <= users.len items, - // an item is: - // len: varint - // gid: [varint]varint, - // ... and the gid list is delta-compressed. - blob: []u8, - }; - - const userGidsErr = Allocator.Error || error{Overflow}; - pub fn userGids(self: *const Sections) userGidsErr!UserGids { - var arena = std.heap.ArenaAllocator.init(self.allocator); - defer arena.deinit(); - var blob = ArrayList(u8).init(self.allocator); - - var name2offset = StringHashMap(u32).init(self.allocator); - for (self.corpus.users) |user| { - const usergroups_maybe = self.corpus.username2groups.get(user.name); - if (usergroups_maybe == null) - continue; - const usergroups = usergroups_maybe.?; - - try name2offset.putNoClobber(user.name, try math.cast(u32, blob.items.len)); - var userBlob = try ArrayList(u8).initCapacity(arena.allocator(), usergroups.len * 2); - var deltaCompressedGids = try arena.allocator().alloc(u32, usergroups.len); - deltaCompressedGids.len = usergroups.len; - for (usergroups) |group, i| { - deltaCompressedGids[i] = group.gid; - } - compress.deltaCompress(u32, deltaCompressedGids) catch |err| switch (err) { - error.NotSorted => unreachable, - }; - try compress.appendUvarint(&userBlob, usergroups.len); - for (deltaCompressedGids) |gid| { - try compress.appendUvarint(&userBlob, gid); - } - try blob.appendSlice(userBlob.toOwnedSlice()); - } - - return UserGids{ - .name2offset = name2offset, - .blob = blob.toOwnedSlice(), - }; - } - - pub fn groupMembers(self: *const Sections) Allocator.Error!GroupMembers { - var buf: [compress.maxVarintLen64]u8 = undefined; - var offsets = ArrayListUnmanaged(usize).initCapacity( - self.allocator, - self.corpus.groups.len, - ); - var bytes = ArrayList(u8).init(self.allocator); - var offset: usize = 0; - for (self.corpus.groups) |group, i| { - offsets[i] = offset; - const users = self.corpus.groupname2users.get(group.name).?; - const len = compress.putVarint(&buf, users.len); - offset += len; - try bytes.appendSlice(buf[0..len]); - for (users) |user| { - // TODO: offset into the User's record - _ = user; - } - } + pub fn deinit(self: *ShellSections, allocator: Allocator) void { + allocator.free(self.index); + allocator.free(self.blob); + self.* = undefined; } }; +// TODO(motiejus) there are a few problems: +// - memory management for shell sections is a mess. Make it easier by ... +// - shell module should accept a list of shells and spit out two slices +// (allocated with a given allocator). There is too much dancing around +// here. +const shellSectionsErr = Allocator.Error || error{Overflow}; +pub fn shellSections( + allocator: Allocator, + corpus: *const Corpus, +) shellSectionsErr!ShellSections { + var popcon = shellImport.ShellWriter.init(allocator); + defer popcon.deinit(); + for (corpus.usersMulti.items(.shell)) |shell| { + try popcon.put(shell); + } + var sections = try popcon.toOwnedSections(shellImport.max_shells); + defer sections.deinit(); + + return ShellSections{ + .index = try allocator.dupe(u8, sections.sectionIndex()), + .blob = try allocator.dupe(u8, sections.sectionBlob()), + }; +} + +pub const UserGids = struct { + // username -> offset in blob + name2offset: StringHashMap(u32), + // compressed user gids blob. A blob contains N <= users.len items, + // an item is: + // len: varint + // gid: [varint]varint, + // ... and the gid list is delta-compressed. + blob: []u8, + + pub fn deinit(self: *UserGids, allocator: Allocator) void { + self.name2offset.deinit(); + allocator.free(self.blob); + self.* = undefined; + } +}; + +const userGidsErr = Allocator.Error || error{Overflow}; +pub fn userGids(allocator: Allocator, corpus: *const Corpus) userGidsErr!UserGids { + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + var blob = ArrayList(u8).init(allocator); + + var name2offset = StringHashMap(u32).init(allocator); + for (corpus.users) |user| { + const usergroups_maybe = corpus.username2groups.get(user.name); + if (usergroups_maybe == null) + continue; + const usergroups = usergroups_maybe.?; + + try name2offset.putNoClobber(user.name, try math.cast(u32, blob.items.len)); + var deltaCompressedGids = try arena.allocator().alloc(u32, usergroups.len); + + deltaCompressedGids.len = usergroups.len; + for (usergroups) |group, i| { + deltaCompressedGids[i] = group.gid; + } + compress.deltaCompress(u32, deltaCompressedGids) catch |err| switch (err) { + error.NotSorted => unreachable, + }; + try compress.appendUvarint(&blob, usergroups.len); + for (deltaCompressedGids) |gid| { + try compress.appendUvarint(&blob, gid); + } + } + + return UserGids{ + .name2offset = name2offset, + .blob = blob.toOwnedSlice(), + }; +} + +pub fn groupMembers(allocator: Allocator, corpus: *const Corpus) Allocator.Error!void { + var buf: [compress.maxVarintLen64]u8 = undefined; + var offsets = ArrayListUnmanaged(usize).initCapacity( + allocator, + corpus.groups.len, + ); + var bytes = ArrayList(u8).init(allocator); + var offset: usize = 0; + for (corpus.groups) |group, i| { + offsets[i] = offset; + const users = corpus.groupname2users.get(group.name).?; + const len = compress.putVarint(&buf, users.len); + offset += len; + try bytes.appendSlice(buf[0..len]); + for (users) |user| { + // TODO: offset into the User's record + _ = user; + } + } +} + // cmpUser compares two users for sorting. By username's utf8 codepoints, ascending. fn cmpUser(_: void, a: User, b: User) bool { var utf8_a = (unicode.Utf8View.init(a.name) catch unreachable).iterator(); @@ -437,27 +434,23 @@ test "test sections" { var corpus = try testCorpus(allocator); defer corpus.deinit(); - var sections = Sections.init(allocator, &corpus); - - const bdz_gid = try sections.bdzGid(); + const bdz_gid = try bdzGid(allocator, &corpus); defer allocator.free(bdz_gid); - const bdz_groupname = try sections.bdzGroupname(); + const bdz_groupname = try bdzGroupname(allocator, &corpus); defer allocator.free(bdz_groupname); - const bdz_uid = try sections.bdzUid(); + const bdz_uid = try bdzUid(allocator, &corpus); defer allocator.free(bdz_uid); - const bdz_username = try sections.bdzUsername(); + const bdz_username = try bdzUsername(allocator, &corpus); defer allocator.free(bdz_username); - const shell_sections = try sections.shellSections(); - defer allocator.free(shell_sections.index); - defer allocator.free(shell_sections.blob); + var shell_sections = try shellSections(allocator, &corpus); + defer shell_sections.deinit(allocator); - var user_gids = try sections.userGids(); - defer user_gids.name2offset.deinit(); - defer allocator.free(user_gids.blob); + var user_gids = try userGids(allocator, &corpus); + defer user_gids.deinit(allocator); } test "pack gids" {