store members_offset as a varint

This commit is contained in:
Motiejus Jakštys 2022-03-09 07:04:33 +02:00 committed by Motiejus Jakštys
parent ba56ff8d3b
commit 4cc655de24
4 changed files with 57 additions and 47 deletions

View File

@ -187,11 +187,9 @@ the beginning of the section.
```
const PackedGroup = packed struct {
gid: u32,
// index to a separate structure with a list of members.
members_offset: u32,
groupname_len: u8, // max is 32, but have too much space here.
// a groupname_len-sized string
groupname []u8;
// varint members_offset + (groupname_len-1)-length string
groupdata []u8;
}
pub const PackedUser = packed struct {
@ -206,11 +204,11 @@ pub const PackedUser = packed struct {
gecos_len: u11,
// pseudocode: variable-sized array that will be stored immediately after
// this struct.
stringdata []u8;
userdata []u8;
}
```
`stringdata` contains a few string entries:
`userdata` contains a few entries:
- home.
- name (optional).
- gecos.
@ -368,8 +366,8 @@ shellIndex len(shells)*2 shell index array
shellBlob <= 4032 shell data blob (max 63*64 bytes)
groups ? packed Group entries (8b padding)
users ? packed User entries (8b padding)
groupMembers ? per-group varint memberlist (no padding)
userGids ? per-user varint gidlist (no padding)
groupMembers ? per-group delta varint memberlist (no padding)
userGids ? per-user delta varint gidlist (no padding)
```
Section creation order:

View File

@ -2,6 +2,7 @@ const std = @import("std");
const pad = @import("padding.zig");
const validate = @import("validate.zig");
const compress = @import("compress.zig");
const InvalidRecord = validate.InvalidRecord;
const mem = std.mem;
@ -33,15 +34,14 @@ pub const Group = struct {
const GroupStored = struct {
gid: u32,
name: []const u8,
members_offset: u32,
members_offset: u64,
};
const PackedGroup = struct {
const alignmentBits = 3;
const alignment_bits = 3;
const Inner = packed struct {
gid: u32,
members_offset: u32,
groupname_len: u8,
pub fn groupnameLen(self: *const Inner) usize {
@ -51,31 +51,35 @@ const PackedGroup = struct {
inner: *const Inner,
groupdata: []const u8,
members_offset: u64,
pub const Entry = struct {
group: PackedGroup,
next: ?[]const u8,
};
pub fn fromBytes(bytes: []const u8) Entry {
pub fn fromBytes(bytes: []const u8) error{Overflow}!Entry {
const inner = mem.bytesAsValue(Inner, bytes[0..@sizeOf(Inner)]);
const endBlob = @sizeOf(Inner) + inner.groupnameLen();
const nextStart = pad.roundUp(usize, alignmentBits, endBlob);
const start_blob = @sizeOf(Inner);
const end_strings = @sizeOf(Inner) + inner.groupnameLen();
const members_offset = try compress.uvarint(bytes[end_strings..]);
const end_blob = end_strings + members_offset.bytes_read;
const next_start = pad.roundUp(usize, alignment_bits, end_blob);
var next: ?[]const u8 = null;
if (nextStart < bytes.len)
next = bytes[nextStart..];
if (next_start < bytes.len)
next = bytes[next_start..];
return Entry{
.group = PackedGroup{
.inner = inner,
.groupdata = bytes[@sizeOf(Inner)..endBlob],
.groupdata = bytes[start_blob..end_strings],
.members_offset = members_offset.value,
},
.next = next,
};
}
const packErr = validate.InvalidRecord || Allocator.Error;
fn validateUtf8(s: []const u8) InvalidRecord!void {
if (!std.unicode.utf8ValidateSlice(s))
return error.InvalidRecord;
@ -84,9 +88,9 @@ const PackedGroup = struct {
pub const Iterator = struct {
section: ?[]const u8,
pub fn next(it: *Iterator) ?PackedGroup {
pub fn next(it: *Iterator) error{Overflow}!?PackedGroup {
if (it.section) |section| {
const entry = fromBytes(section);
const entry = try fromBytes(section);
it.section = entry.next;
return entry.group;
}
@ -102,32 +106,30 @@ const PackedGroup = struct {
return self.inner.gid;
}
pub fn membersOffset(self: *const PackedGroup) u32 {
return self.inner.members_offset;
pub fn membersOffset(self: *const PackedGroup) u64 {
return self.members_offset;
}
pub fn name(self: *const PackedGroup) []const u8 {
return self.groupdata;
}
const packErr = validate.InvalidRecord || Allocator.Error || error{Overflow};
pub fn packTo(
arr: *ArrayList(u8),
group: GroupStored,
) packErr!void {
const groupname_len = try validate.downCast(u5, group.name.len - 1);
try validate.utf8(group.name);
const inner = Inner{
.gid = group.gid,
.members_offset = group.members_offset,
.groupname_len = groupname_len,
};
try arr.*.appendSlice(mem.asBytes(&inner));
try arr.*.appendSlice(group.name);
try pad.arrayList(arr, alignmentBits);
try compress.appendUvarint(arr, group.members_offset);
try pad.arrayList(arr, alignment_bits);
}
};
@ -163,7 +165,7 @@ test "construct PackedGroups" {
.{
.gid = std.math.maxInt(u32),
.name = "Name" ** 8, // 32
.members_offset = std.math.maxInt(u32),
.members_offset = std.math.maxInt(u64),
},
};
@ -173,7 +175,7 @@ test "construct PackedGroups" {
var i: u29 = 0;
var it = PackedGroup.iterator(buf.items);
while (it.next()) |group| : (i += 1) {
while (try it.next()) |group| : (i += 1) {
try testing.expectEqual(groups[i].gid, group.gid());
try testing.expectEqualStrings(groups[i].name, group.name());
try testing.expectEqual(groups[i].members_offset, group.membersOffset());

View File

@ -269,7 +269,7 @@ pub fn usersSection(
pub const GroupMembers = struct {
// group index to it's offset in blob
idx2offset: []const u32,
idx2offset: []const u64,
blob: []const u8,
pub fn deinit(self: *GroupMembers, allocator: Allocator) void {
@ -284,22 +284,32 @@ pub fn groupMembers(
corpus: *const Corpus,
user2offset: []const u32,
) error{OutOfMemory}!GroupMembers {
var idx2offset = try allocator.alloc(u32, corpus.groups.len);
var idx2offset = try allocator.alloc(u64, corpus.groups.len);
errdefer allocator.free(idx2offset);
var blob = ArrayList(u8).init(allocator);
errdefer blob.deinit();
// zero'th entry is empty, so empty groups can refer to it
try compress.appendUvarint(&blob, 0);
for (corpus.groupsMulti.items(.group)) |group_users, i| {
if (group_users.len == 0) {
idx2offset[i] = 0;
var scratch = try allocator.alloc(u32, 256);
defer allocator.free(scratch);
for (corpus.group2users) |members, group_idx| {
if (members.len == 0) {
idx2offset[group_idx] = 0;
continue;
}
idx2offset[i] = blob.len;
compress.appendUvarint(&blob, group_users.len);
for (group_users) |userIdx|
compress.appendUvarint(&blob, user2offset[userIdx]);
scratch = try allocator.realloc(scratch, members.len);
scratch.len = members.len;
mem.copy(u32, scratch, members);
compress.deltaCompress(u32, scratch) catch |err| switch (err) {
error.NotSorted => unreachable,
};
try compress.appendUvarint(&blob, members.len);
for (scratch) |user_idx|
try compress.appendUvarint(&blob, user2offset[user_idx]);
}
return GroupMembers{
.idx2offset = idx2offset,
@ -339,13 +349,11 @@ pub const AllSections = struct {
bdz_uid: []const u8,
bdz_username: []const u8,
users: UsersSection,
shell_sections: ShellSections,
shell_index: []const u8,
shell_blob: []const u8,
user_gids: UserGids,
user_gids_b: []const u8,
group_members: GroupMembers,
pub fn init(
allocator: Allocator,
@ -365,6 +373,11 @@ pub const AllSections = struct {
&user_gids,
&shell_sections,
);
//const group_members = try groupMembers(
// allocator,
// corpus,
// users.idx2offset,
//);
return AllSections{
.allocator = allocator,
.bdz_gid = bdz_gid,
@ -375,8 +388,9 @@ pub const AllSections = struct {
.shell_index = mem.sliceAsBytes(shell_index.constSlice()),
.shell_blob = mem.sliceAsBytes(shell_blob.constSlice()),
.user_gids = user_gids,
.user_gids_b = user_gids.blob,
.users = users,
//.group_members = group_members,
.group_members = undefined,
};
}

View File

@ -163,11 +163,7 @@ fn packedUser(comptime ShellIndexType: type) type {
// - will not return the 'next' slice.
// - cannot throw an Overflow error.
pub fn fromBytes(bytes: []const u8) error{Overflow}!Entry {
const inner = mem.bytesAsValue(
Inner,
bytes[0..@sizeOf(Inner)],
);
const inner = mem.bytesAsValue(Inner, bytes[0..@sizeOf(Inner)]);
const start_blob = @sizeOf(Inner);
const end_strings = start_blob + inner.stringLength();
const gids_offset = try compress.uvarint(bytes[end_strings..]);