From 4cc655de24bcbefe7395af1bcba022fd0074a242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Wed, 9 Mar 2022 07:04:33 +0200 Subject: [PATCH] store members_offset as a varint --- README.md | 14 ++++++-------- src/group.zig | 44 +++++++++++++++++++++++--------------------- src/sections.zig | 40 +++++++++++++++++++++++++++------------- src/user.zig | 6 +----- 4 files changed, 57 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 5f9dd5c..5da60e2 100644 --- a/README.md +++ b/README.md @@ -187,11 +187,9 @@ the beginning of the section. ``` const PackedGroup = packed struct { gid: u32, - // index to a separate structure with a list of members. - members_offset: u32, groupname_len: u8, // max is 32, but have too much space here. - // a groupname_len-sized string - groupname []u8; + // varint members_offset + (groupname_len-1)-length string + groupdata []u8; } pub const PackedUser = packed struct { @@ -206,11 +204,11 @@ pub const PackedUser = packed struct { gecos_len: u11, // pseudocode: variable-sized array that will be stored immediately after // this struct. - stringdata []u8; + userdata []u8; } ``` -`stringdata` contains a few string entries: +`userdata` contains a few entries: - home. - name (optional). - gecos. @@ -368,8 +366,8 @@ shellIndex len(shells)*2 shell index array shellBlob <= 4032 shell data blob (max 63*64 bytes) groups ? packed Group entries (8b padding) users ? packed User entries (8b padding) -groupMembers ? per-group varint memberlist (no padding) -userGids ? per-user varint gidlist (no padding) +groupMembers ? per-group delta varint memberlist (no padding) +userGids ? per-user delta varint gidlist (no padding) ``` Section creation order: diff --git a/src/group.zig b/src/group.zig index ea59b3c..1f854e8 100644 --- a/src/group.zig +++ b/src/group.zig @@ -2,6 +2,7 @@ const std = @import("std"); const pad = @import("padding.zig"); const validate = @import("validate.zig"); +const compress = @import("compress.zig"); const InvalidRecord = validate.InvalidRecord; const mem = std.mem; @@ -33,15 +34,14 @@ pub const Group = struct { const GroupStored = struct { gid: u32, name: []const u8, - members_offset: u32, + members_offset: u64, }; const PackedGroup = struct { - const alignmentBits = 3; + const alignment_bits = 3; const Inner = packed struct { gid: u32, - members_offset: u32, groupname_len: u8, pub fn groupnameLen(self: *const Inner) usize { @@ -51,31 +51,35 @@ const PackedGroup = struct { inner: *const Inner, groupdata: []const u8, + members_offset: u64, pub const Entry = struct { group: PackedGroup, next: ?[]const u8, }; - pub fn fromBytes(bytes: []const u8) Entry { + pub fn fromBytes(bytes: []const u8) error{Overflow}!Entry { const inner = mem.bytesAsValue(Inner, bytes[0..@sizeOf(Inner)]); - const endBlob = @sizeOf(Inner) + inner.groupnameLen(); - const nextStart = pad.roundUp(usize, alignmentBits, endBlob); + const start_blob = @sizeOf(Inner); + const end_strings = @sizeOf(Inner) + inner.groupnameLen(); + const members_offset = try compress.uvarint(bytes[end_strings..]); + const end_blob = end_strings + members_offset.bytes_read; + const next_start = pad.roundUp(usize, alignment_bits, end_blob); var next: ?[]const u8 = null; - if (nextStart < bytes.len) - next = bytes[nextStart..]; + if (next_start < bytes.len) + next = bytes[next_start..]; return Entry{ .group = PackedGroup{ .inner = inner, - .groupdata = bytes[@sizeOf(Inner)..endBlob], + .groupdata = bytes[start_blob..end_strings], + .members_offset = members_offset.value, }, .next = next, }; } - const packErr = validate.InvalidRecord || Allocator.Error; fn validateUtf8(s: []const u8) InvalidRecord!void { if (!std.unicode.utf8ValidateSlice(s)) return error.InvalidRecord; @@ -84,9 +88,9 @@ const PackedGroup = struct { pub const Iterator = struct { section: ?[]const u8, - pub fn next(it: *Iterator) ?PackedGroup { + pub fn next(it: *Iterator) error{Overflow}!?PackedGroup { if (it.section) |section| { - const entry = fromBytes(section); + const entry = try fromBytes(section); it.section = entry.next; return entry.group; } @@ -102,32 +106,30 @@ const PackedGroup = struct { return self.inner.gid; } - pub fn membersOffset(self: *const PackedGroup) u32 { - return self.inner.members_offset; + pub fn membersOffset(self: *const PackedGroup) u64 { + return self.members_offset; } pub fn name(self: *const PackedGroup) []const u8 { return self.groupdata; } + const packErr = validate.InvalidRecord || Allocator.Error || error{Overflow}; pub fn packTo( arr: *ArrayList(u8), group: GroupStored, ) packErr!void { const groupname_len = try validate.downCast(u5, group.name.len - 1); - try validate.utf8(group.name); - const inner = Inner{ .gid = group.gid, - .members_offset = group.members_offset, .groupname_len = groupname_len, }; try arr.*.appendSlice(mem.asBytes(&inner)); try arr.*.appendSlice(group.name); - - try pad.arrayList(arr, alignmentBits); + try compress.appendUvarint(arr, group.members_offset); + try pad.arrayList(arr, alignment_bits); } }; @@ -163,7 +165,7 @@ test "construct PackedGroups" { .{ .gid = std.math.maxInt(u32), .name = "Name" ** 8, // 32 - .members_offset = std.math.maxInt(u32), + .members_offset = std.math.maxInt(u64), }, }; @@ -173,7 +175,7 @@ test "construct PackedGroups" { var i: u29 = 0; var it = PackedGroup.iterator(buf.items); - while (it.next()) |group| : (i += 1) { + while (try it.next()) |group| : (i += 1) { try testing.expectEqual(groups[i].gid, group.gid()); try testing.expectEqualStrings(groups[i].name, group.name()); try testing.expectEqual(groups[i].members_offset, group.membersOffset()); diff --git a/src/sections.zig b/src/sections.zig index f503807..4b3c349 100644 --- a/src/sections.zig +++ b/src/sections.zig @@ -269,7 +269,7 @@ pub fn usersSection( pub const GroupMembers = struct { // group index to it's offset in blob - idx2offset: []const u32, + idx2offset: []const u64, blob: []const u8, pub fn deinit(self: *GroupMembers, allocator: Allocator) void { @@ -284,22 +284,32 @@ pub fn groupMembers( corpus: *const Corpus, user2offset: []const u32, ) error{OutOfMemory}!GroupMembers { - var idx2offset = try allocator.alloc(u32, corpus.groups.len); + var idx2offset = try allocator.alloc(u64, corpus.groups.len); errdefer allocator.free(idx2offset); var blob = ArrayList(u8).init(allocator); errdefer blob.deinit(); // zero'th entry is empty, so empty groups can refer to it try compress.appendUvarint(&blob, 0); - for (corpus.groupsMulti.items(.group)) |group_users, i| { - if (group_users.len == 0) { - idx2offset[i] = 0; + + var scratch = try allocator.alloc(u32, 256); + defer allocator.free(scratch); + + for (corpus.group2users) |members, group_idx| { + if (members.len == 0) { + idx2offset[group_idx] = 0; continue; } - idx2offset[i] = blob.len; - compress.appendUvarint(&blob, group_users.len); - for (group_users) |userIdx| - compress.appendUvarint(&blob, user2offset[userIdx]); + scratch = try allocator.realloc(scratch, members.len); + scratch.len = members.len; + mem.copy(u32, scratch, members); + + compress.deltaCompress(u32, scratch) catch |err| switch (err) { + error.NotSorted => unreachable, + }; + try compress.appendUvarint(&blob, members.len); + for (scratch) |user_idx| + try compress.appendUvarint(&blob, user2offset[user_idx]); } return GroupMembers{ .idx2offset = idx2offset, @@ -339,13 +349,11 @@ pub const AllSections = struct { bdz_uid: []const u8, bdz_username: []const u8, users: UsersSection, - shell_sections: ShellSections, shell_index: []const u8, shell_blob: []const u8, - user_gids: UserGids, - user_gids_b: []const u8, + group_members: GroupMembers, pub fn init( allocator: Allocator, @@ -365,6 +373,11 @@ pub const AllSections = struct { &user_gids, &shell_sections, ); + //const group_members = try groupMembers( + // allocator, + // corpus, + // users.idx2offset, + //); return AllSections{ .allocator = allocator, .bdz_gid = bdz_gid, @@ -375,8 +388,9 @@ pub const AllSections = struct { .shell_index = mem.sliceAsBytes(shell_index.constSlice()), .shell_blob = mem.sliceAsBytes(shell_blob.constSlice()), .user_gids = user_gids, - .user_gids_b = user_gids.blob, .users = users, + //.group_members = group_members, + .group_members = undefined, }; } diff --git a/src/user.zig b/src/user.zig index 91ec653..ea1f68b 100644 --- a/src/user.zig +++ b/src/user.zig @@ -163,11 +163,7 @@ fn packedUser(comptime ShellIndexType: type) type { // - will not return the 'next' slice. // - cannot throw an Overflow error. pub fn fromBytes(bytes: []const u8) error{Overflow}!Entry { - const inner = mem.bytesAsValue( - Inner, - bytes[0..@sizeOf(Inner)], - ); - + const inner = mem.bytesAsValue(Inner, bytes[0..@sizeOf(Inner)]); const start_blob = @sizeOf(Inner); const end_strings = start_blob + inner.stringLength(); const gids_offset = try compress.uvarint(bytes[end_strings..]);