store members_offset as a varint

This commit is contained in:
Motiejus Jakštys 2022-03-09 07:04:33 +02:00 committed by Motiejus Jakštys
parent ba56ff8d3b
commit 4cc655de24
4 changed files with 57 additions and 47 deletions

View File

@ -187,11 +187,9 @@ the beginning of the section.
``` ```
const PackedGroup = packed struct { const PackedGroup = packed struct {
gid: u32, gid: u32,
// index to a separate structure with a list of members.
members_offset: u32,
groupname_len: u8, // max is 32, but have too much space here. groupname_len: u8, // max is 32, but have too much space here.
// a groupname_len-sized string // varint members_offset + (groupname_len-1)-length string
groupname []u8; groupdata []u8;
} }
pub const PackedUser = packed struct { pub const PackedUser = packed struct {
@ -206,11 +204,11 @@ pub const PackedUser = packed struct {
gecos_len: u11, gecos_len: u11,
// pseudocode: variable-sized array that will be stored immediately after // pseudocode: variable-sized array that will be stored immediately after
// this struct. // this struct.
stringdata []u8; userdata []u8;
} }
``` ```
`stringdata` contains a few string entries: `userdata` contains a few entries:
- home. - home.
- name (optional). - name (optional).
- gecos. - gecos.
@ -368,8 +366,8 @@ shellIndex len(shells)*2 shell index array
shellBlob <= 4032 shell data blob (max 63*64 bytes) shellBlob <= 4032 shell data blob (max 63*64 bytes)
groups ? packed Group entries (8b padding) groups ? packed Group entries (8b padding)
users ? packed User entries (8b padding) users ? packed User entries (8b padding)
groupMembers ? per-group varint memberlist (no padding) groupMembers ? per-group delta varint memberlist (no padding)
userGids ? per-user varint gidlist (no padding) userGids ? per-user delta varint gidlist (no padding)
``` ```
Section creation order: Section creation order:

View File

@ -2,6 +2,7 @@ const std = @import("std");
const pad = @import("padding.zig"); const pad = @import("padding.zig");
const validate = @import("validate.zig"); const validate = @import("validate.zig");
const compress = @import("compress.zig");
const InvalidRecord = validate.InvalidRecord; const InvalidRecord = validate.InvalidRecord;
const mem = std.mem; const mem = std.mem;
@ -33,15 +34,14 @@ pub const Group = struct {
const GroupStored = struct { const GroupStored = struct {
gid: u32, gid: u32,
name: []const u8, name: []const u8,
members_offset: u32, members_offset: u64,
}; };
const PackedGroup = struct { const PackedGroup = struct {
const alignmentBits = 3; const alignment_bits = 3;
const Inner = packed struct { const Inner = packed struct {
gid: u32, gid: u32,
members_offset: u32,
groupname_len: u8, groupname_len: u8,
pub fn groupnameLen(self: *const Inner) usize { pub fn groupnameLen(self: *const Inner) usize {
@ -51,31 +51,35 @@ const PackedGroup = struct {
inner: *const Inner, inner: *const Inner,
groupdata: []const u8, groupdata: []const u8,
members_offset: u64,
pub const Entry = struct { pub const Entry = struct {
group: PackedGroup, group: PackedGroup,
next: ?[]const u8, next: ?[]const u8,
}; };
pub fn fromBytes(bytes: []const u8) Entry { pub fn fromBytes(bytes: []const u8) error{Overflow}!Entry {
const inner = mem.bytesAsValue(Inner, bytes[0..@sizeOf(Inner)]); const inner = mem.bytesAsValue(Inner, bytes[0..@sizeOf(Inner)]);
const endBlob = @sizeOf(Inner) + inner.groupnameLen(); const start_blob = @sizeOf(Inner);
const nextStart = pad.roundUp(usize, alignmentBits, endBlob); const end_strings = @sizeOf(Inner) + inner.groupnameLen();
const members_offset = try compress.uvarint(bytes[end_strings..]);
const end_blob = end_strings + members_offset.bytes_read;
const next_start = pad.roundUp(usize, alignment_bits, end_blob);
var next: ?[]const u8 = null; var next: ?[]const u8 = null;
if (nextStart < bytes.len) if (next_start < bytes.len)
next = bytes[nextStart..]; next = bytes[next_start..];
return Entry{ return Entry{
.group = PackedGroup{ .group = PackedGroup{
.inner = inner, .inner = inner,
.groupdata = bytes[@sizeOf(Inner)..endBlob], .groupdata = bytes[start_blob..end_strings],
.members_offset = members_offset.value,
}, },
.next = next, .next = next,
}; };
} }
const packErr = validate.InvalidRecord || Allocator.Error;
fn validateUtf8(s: []const u8) InvalidRecord!void { fn validateUtf8(s: []const u8) InvalidRecord!void {
if (!std.unicode.utf8ValidateSlice(s)) if (!std.unicode.utf8ValidateSlice(s))
return error.InvalidRecord; return error.InvalidRecord;
@ -84,9 +88,9 @@ const PackedGroup = struct {
pub const Iterator = struct { pub const Iterator = struct {
section: ?[]const u8, section: ?[]const u8,
pub fn next(it: *Iterator) ?PackedGroup { pub fn next(it: *Iterator) error{Overflow}!?PackedGroup {
if (it.section) |section| { if (it.section) |section| {
const entry = fromBytes(section); const entry = try fromBytes(section);
it.section = entry.next; it.section = entry.next;
return entry.group; return entry.group;
} }
@ -102,32 +106,30 @@ const PackedGroup = struct {
return self.inner.gid; return self.inner.gid;
} }
pub fn membersOffset(self: *const PackedGroup) u32 { pub fn membersOffset(self: *const PackedGroup) u64 {
return self.inner.members_offset; return self.members_offset;
} }
pub fn name(self: *const PackedGroup) []const u8 { pub fn name(self: *const PackedGroup) []const u8 {
return self.groupdata; return self.groupdata;
} }
const packErr = validate.InvalidRecord || Allocator.Error || error{Overflow};
pub fn packTo( pub fn packTo(
arr: *ArrayList(u8), arr: *ArrayList(u8),
group: GroupStored, group: GroupStored,
) packErr!void { ) packErr!void {
const groupname_len = try validate.downCast(u5, group.name.len - 1); const groupname_len = try validate.downCast(u5, group.name.len - 1);
try validate.utf8(group.name); try validate.utf8(group.name);
const inner = Inner{ const inner = Inner{
.gid = group.gid, .gid = group.gid,
.members_offset = group.members_offset,
.groupname_len = groupname_len, .groupname_len = groupname_len,
}; };
try arr.*.appendSlice(mem.asBytes(&inner)); try arr.*.appendSlice(mem.asBytes(&inner));
try arr.*.appendSlice(group.name); try arr.*.appendSlice(group.name);
try compress.appendUvarint(arr, group.members_offset);
try pad.arrayList(arr, alignmentBits); try pad.arrayList(arr, alignment_bits);
} }
}; };
@ -163,7 +165,7 @@ test "construct PackedGroups" {
.{ .{
.gid = std.math.maxInt(u32), .gid = std.math.maxInt(u32),
.name = "Name" ** 8, // 32 .name = "Name" ** 8, // 32
.members_offset = std.math.maxInt(u32), .members_offset = std.math.maxInt(u64),
}, },
}; };
@ -173,7 +175,7 @@ test "construct PackedGroups" {
var i: u29 = 0; var i: u29 = 0;
var it = PackedGroup.iterator(buf.items); var it = PackedGroup.iterator(buf.items);
while (it.next()) |group| : (i += 1) { while (try it.next()) |group| : (i += 1) {
try testing.expectEqual(groups[i].gid, group.gid()); try testing.expectEqual(groups[i].gid, group.gid());
try testing.expectEqualStrings(groups[i].name, group.name()); try testing.expectEqualStrings(groups[i].name, group.name());
try testing.expectEqual(groups[i].members_offset, group.membersOffset()); try testing.expectEqual(groups[i].members_offset, group.membersOffset());

View File

@ -269,7 +269,7 @@ pub fn usersSection(
pub const GroupMembers = struct { pub const GroupMembers = struct {
// group index to it's offset in blob // group index to it's offset in blob
idx2offset: []const u32, idx2offset: []const u64,
blob: []const u8, blob: []const u8,
pub fn deinit(self: *GroupMembers, allocator: Allocator) void { pub fn deinit(self: *GroupMembers, allocator: Allocator) void {
@ -284,22 +284,32 @@ pub fn groupMembers(
corpus: *const Corpus, corpus: *const Corpus,
user2offset: []const u32, user2offset: []const u32,
) error{OutOfMemory}!GroupMembers { ) error{OutOfMemory}!GroupMembers {
var idx2offset = try allocator.alloc(u32, corpus.groups.len); var idx2offset = try allocator.alloc(u64, corpus.groups.len);
errdefer allocator.free(idx2offset); errdefer allocator.free(idx2offset);
var blob = ArrayList(u8).init(allocator); var blob = ArrayList(u8).init(allocator);
errdefer blob.deinit(); errdefer blob.deinit();
// zero'th entry is empty, so empty groups can refer to it // zero'th entry is empty, so empty groups can refer to it
try compress.appendUvarint(&blob, 0); try compress.appendUvarint(&blob, 0);
for (corpus.groupsMulti.items(.group)) |group_users, i| {
if (group_users.len == 0) { var scratch = try allocator.alloc(u32, 256);
idx2offset[i] = 0; defer allocator.free(scratch);
for (corpus.group2users) |members, group_idx| {
if (members.len == 0) {
idx2offset[group_idx] = 0;
continue; continue;
} }
idx2offset[i] = blob.len; scratch = try allocator.realloc(scratch, members.len);
compress.appendUvarint(&blob, group_users.len); scratch.len = members.len;
for (group_users) |userIdx| mem.copy(u32, scratch, members);
compress.appendUvarint(&blob, user2offset[userIdx]);
compress.deltaCompress(u32, scratch) catch |err| switch (err) {
error.NotSorted => unreachable,
};
try compress.appendUvarint(&blob, members.len);
for (scratch) |user_idx|
try compress.appendUvarint(&blob, user2offset[user_idx]);
} }
return GroupMembers{ return GroupMembers{
.idx2offset = idx2offset, .idx2offset = idx2offset,
@ -339,13 +349,11 @@ pub const AllSections = struct {
bdz_uid: []const u8, bdz_uid: []const u8,
bdz_username: []const u8, bdz_username: []const u8,
users: UsersSection, users: UsersSection,
shell_sections: ShellSections, shell_sections: ShellSections,
shell_index: []const u8, shell_index: []const u8,
shell_blob: []const u8, shell_blob: []const u8,
user_gids: UserGids, user_gids: UserGids,
user_gids_b: []const u8, group_members: GroupMembers,
pub fn init( pub fn init(
allocator: Allocator, allocator: Allocator,
@ -365,6 +373,11 @@ pub const AllSections = struct {
&user_gids, &user_gids,
&shell_sections, &shell_sections,
); );
//const group_members = try groupMembers(
// allocator,
// corpus,
// users.idx2offset,
//);
return AllSections{ return AllSections{
.allocator = allocator, .allocator = allocator,
.bdz_gid = bdz_gid, .bdz_gid = bdz_gid,
@ -375,8 +388,9 @@ pub const AllSections = struct {
.shell_index = mem.sliceAsBytes(shell_index.constSlice()), .shell_index = mem.sliceAsBytes(shell_index.constSlice()),
.shell_blob = mem.sliceAsBytes(shell_blob.constSlice()), .shell_blob = mem.sliceAsBytes(shell_blob.constSlice()),
.user_gids = user_gids, .user_gids = user_gids,
.user_gids_b = user_gids.blob,
.users = users, .users = users,
//.group_members = group_members,
.group_members = undefined,
}; };
} }

View File

@ -163,11 +163,7 @@ fn packedUser(comptime ShellIndexType: type) type {
// - will not return the 'next' slice. // - will not return the 'next' slice.
// - cannot throw an Overflow error. // - cannot throw an Overflow error.
pub fn fromBytes(bytes: []const u8) error{Overflow}!Entry { pub fn fromBytes(bytes: []const u8) error{Overflow}!Entry {
const inner = mem.bytesAsValue( const inner = mem.bytesAsValue(Inner, bytes[0..@sizeOf(Inner)]);
Inner,
bytes[0..@sizeOf(Inner)],
);
const start_blob = @sizeOf(Inner); const start_blob = @sizeOf(Inner);
const end_strings = start_blob + inner.stringLength(); const end_strings = start_blob + inner.stringLength();
const gids_offset = try compress.uvarint(bytes[end_strings..]); const gids_offset = try compress.uvarint(bytes[end_strings..]);