offsets are u64

This commit is contained in:
Motiejus Jakštys 2022-03-08 20:44:32 +02:00 committed by Motiejus Jakštys
parent d2ace3e125
commit fc469acbf9
3 changed files with 66 additions and 71 deletions

View File

@ -197,13 +197,13 @@ const PackedGroup = packed struct {
pub const PackedUser = packed struct { pub const PackedUser = packed struct {
uid: u32, uid: u32,
gid: u32, gid: u32,
additional_gids_offset: u29, padding: u2 = 0,
shell_here: bool,
shell_len_or_idx: u6, shell_len_or_idx: u6,
home_len: u6, shell_here: bool,
name_is_a_suffix: bool, name_is_a_suffix: bool,
home_len: u6,
name_len: u5, name_len: u5,
gecos_len: u8, gecos_len: u11,
// pseudocode: variable-sized array that will be stored immediately after // pseudocode: variable-sized array that will be stored immediately after
// this struct. // this struct.
stringdata []u8; stringdata []u8;
@ -215,6 +215,7 @@ pub const PackedUser = packed struct {
- name (optional). - name (optional).
- gecos. - gecos.
- shell (optional). - shell (optional).
- `additional_gids_offset`: varint.
First byte of home is stored right after the `gecos_len` field, and it's First byte of home is stored right after the `gecos_len` field, and it's
length is `home_len`. The same logic applies to all the `stringdata` fields: length is `home_len`. The same logic applies to all the `stringdata` fields:
@ -232,6 +233,9 @@ Additionally, there are two "easy" optimizations:
2. `name_is_a_suffix=false`: name begins one byte after home, and it's length 2. `name_is_a_suffix=false`: name begins one byte after home, and it's length
is `name_len`. is `name_len`.
The last field, `additional_gids_offset`, which is needed least frequently,
is stored at the end.
Shells Shells
------ ------

View File

@ -35,10 +35,10 @@ const Corpus = struct {
usersMulti: MultiArrayList(User), usersMulti: MultiArrayList(User),
groupsMulti: MultiArrayList(Group), groupsMulti: MultiArrayList(Group),
name2user: StringHashMap(usize), name2user: StringHashMap(u64),
name2group: StringHashMap(usize), name2group: StringHashMap(u64),
groupname2users: StringHashMap([]usize), groupname2users: StringHashMap([]u64),
username2groups: StringHashMap([]usize), username2groups: StringHashMap([]u64),
pub fn init( pub fn init(
baseAllocator: Allocator, baseAllocator: Allocator,
@ -68,8 +68,8 @@ const Corpus = struct {
for (groups) |group| for (groups) |group|
groupsMulti.appendAssumeCapacity(group); groupsMulti.appendAssumeCapacity(group);
var name2user = StringHashMap(usize).init(allocator); var name2user = StringHashMap(u64).init(allocator);
var name2group = StringHashMap(usize).init(allocator); var name2group = StringHashMap(u64).init(allocator);
for (users) |*user, i| { for (users) |*user, i| {
var res1 = try name2user.getOrPut(user.name); var res1 = try name2user.getOrPut(user.name);
if (res1.found_existing) if (res1.found_existing)
@ -84,17 +84,17 @@ const Corpus = struct {
res1.value_ptr.* = i; res1.value_ptr.* = i;
} }
var groupname2users = StringHashMap([]usize).init(allocator); var groupname2users = StringHashMap([]u64).init(allocator);
// uses baseAllocator, because it will be freed before // uses baseAllocator, because it will be freed before
// returning from this function. This keeps the arena clean. // returning from this function. This keeps the arena clean.
var username2groups = StringHashMap( var username2groups = StringHashMap(
ArrayListUnmanaged(usize), ArrayListUnmanaged(u64),
).init(baseAllocator); ).init(baseAllocator);
defer username2groups.deinit(); defer username2groups.deinit();
for (groups) |*group, i| { for (groups) |*group, i| {
var members = try allocator.alloc(usize, group.members.count()); var members = try allocator.alloc(u64, group.members.count());
members.len = 0; members.len = 0;
var it = group.members.iterator(); var it = group.members.iterator();
@ -108,7 +108,7 @@ const Corpus = struct {
var groupsOfMember = try username2groups.getOrPut(memberName.*); var groupsOfMember = try username2groups.getOrPut(memberName.*);
if (!groupsOfMember.found_existing) if (!groupsOfMember.found_existing)
groupsOfMember.value_ptr.* = ArrayListUnmanaged(usize){}; groupsOfMember.value_ptr.* = ArrayListUnmanaged(u64){};
try groupsOfMember.value_ptr.*.append(allocator, i); try groupsOfMember.value_ptr.*.append(allocator, i);
} }
@ -120,14 +120,14 @@ const Corpus = struct {
var it1 = groupname2users.valueIterator(); var it1 = groupname2users.valueIterator();
while (it1.next()) |groupUsers| { while (it1.next()) |groupUsers| {
sort.sort(usize, groupUsers.*, {}, comptime sort.asc(usize)); sort.sort(u64, groupUsers.*, {}, comptime sort.asc(u64));
} }
var it2 = username2groups.valueIterator(); var it2 = username2groups.valueIterator();
while (it2.next()) |userGroups| while (it2.next()) |userGroups|
sort.sort(usize, userGroups.items, {}, comptime sort.asc(usize)); sort.sort(u64, userGroups.items, {}, comptime sort.asc(u64));
var username2groups_final = StringHashMap([]usize).init(allocator); var username2groups_final = StringHashMap([]u64).init(allocator);
var it = username2groups.iterator(); var it = username2groups.iterator();
while (it.next()) |elem| { while (it.next()) |elem| {
const username = elem.key_ptr.*; const username = elem.key_ptr.*;
@ -188,7 +188,7 @@ pub fn shellSections(
pub const UserGids = struct { pub const UserGids = struct {
// user index -> offset in blob // user index -> offset in blob
idx2offset: []const u32, idx2offset: []const u64,
// compressed user gids blob. A blob contains N <= users.len items, // compressed user gids blob. A blob contains N <= users.len items,
// an item is: // an item is:
// len: varint // len: varint
@ -211,7 +211,7 @@ pub fn userGids(
) error{ OutOfMemory, Overflow }!UserGids { ) error{ OutOfMemory, Overflow }!UserGids {
var blob = ArrayList(u8).init(allocator); var blob = ArrayList(u8).init(allocator);
errdefer blob.deinit(); errdefer blob.deinit();
var idx2offset = try allocator.alloc(u32, corpus.users.len); var idx2offset = try allocator.alloc(u64, corpus.users.len);
errdefer allocator.free(idx2offset); errdefer allocator.free(idx2offset);
// zero'th entry is empty, so groupless users can refer to it. // zero'th entry is empty, so groupless users can refer to it.
@ -222,9 +222,7 @@ pub fn userGids(
defer allocator.free(scratch); defer allocator.free(scratch);
for (corpus.users) |user, user_idx| { for (corpus.users) |user, user_idx| {
if (corpus.username2groups.get(user.name)) |usergroups| { if (corpus.username2groups.get(user.name)) |usergroups| {
const userOffset = try math.cast(u32, blob.items.len); idx2offset[user_idx] = blob.items.len;
std.debug.assert(userOffset & 7 == 0);
idx2offset[user_idx] = userOffset;
scratch = try allocator.realloc(scratch, usergroups.len); scratch = try allocator.realloc(scratch, usergroups.len);
scratch.len = usergroups.len; scratch.len = usergroups.len;
for (usergroups) |group_idx, i| for (usergroups) |group_idx, i|
@ -552,7 +550,7 @@ test "userGids" {
var vit = try compress.VarintSliceIterator(user_gids.blob[offset..]); var vit = try compress.VarintSliceIterator(user_gids.blob[offset..]);
var it = compress.DeltaDecompressionIterator(&vit); var it = compress.DeltaDecompressionIterator(&vit);
try testing.expectEqual(it.remaining(), groups.?.len); try testing.expectEqual(it.remaining(), groups.?.len);
var i: usize = 0; var i: u64 = 0;
while (try it.next()) |gid| : (i += 1) { while (try it.next()) |gid| : (i += 1) {
try testing.expectEqual(gid, corpus.groups[groups.?[i]].gid); try testing.expectEqual(gid, corpus.groups[groups.?[i]].gid);
} }

View File

@ -2,6 +2,7 @@ const std = @import("std");
const pad = @import("padding.zig"); const pad = @import("padding.zig");
const validate = @import("validate.zig"); const validate = @import("validate.zig");
const compress = @import("compress.zig");
const InvalidRecord = validate.InvalidRecord; const InvalidRecord = validate.InvalidRecord;
const assert = std.debug.assert; const assert = std.debug.assert;
@ -85,17 +86,16 @@ fn packedUser(comptime ShellIndexType: type) type {
const alignmentBits = 3; const alignmentBits = 3;
const InnerSize = @divExact(@bitSizeOf(Inner), 8);
const Inner = packed struct { const Inner = packed struct {
uid: u32, uid: u32,
gid: u32, gid: u32,
additional_gids_offset: u29, padding: u2 = 0,
shell_len_or_idx: u6, shell_len_or_idx: u6,
shell_here: bool, shell_here: bool,
home_len: u6,
name_is_a_suffix: bool, name_is_a_suffix: bool,
home_len: u6,
name_len: u5, name_len: u5,
gecos_len: u8, gecos_len: u11,
fn homeLen(self: *const Inner) usize { fn homeLen(self: *const Inner) usize {
return @as(u32, self.home_len) + 1; return @as(u32, self.home_len) + 1;
@ -135,8 +135,8 @@ fn packedUser(comptime ShellIndexType: type) type {
return @as(u32, self.shell_len_or_idx) + 1; return @as(u32, self.shell_len_or_idx) + 1;
} }
// blobLength returns the length of the blob storing string values. // stringLength returns the length of the blob storing string values.
fn blobLength(self: *const Inner) usize { fn stringLength(self: *const Inner) usize {
var result: usize = self.homeLen() + self.gecosLen(); var result: usize = self.homeLen() + self.gecosLen();
if (!self.name_is_a_suffix) if (!self.name_is_a_suffix)
result += self.nameLen(); result += self.nameLen();
@ -150,25 +150,30 @@ fn packedUser(comptime ShellIndexType: type) type {
// field. Both of those fields are pointers to "our representation" of // field. Both of those fields are pointers to "our representation" of
// that field. // that field.
inner: *const Inner, inner: *const Inner,
userdata: []const u8, bytes: []const u8,
additional_gids_offset: u64,
pub const Entry = struct { pub const Entry = struct {
user: Self, user: Self,
next: ?[]const u8, next: ?[]const u8,
}; };
pub fn fromBytes(bytes: []const u8) Entry { // TODO(motiejus) provide a way to return an entry without decoding the
// additional_gids_offset:
// - will not return the 'next' slice.
// - cannot throw an Overflow error.
pub fn fromBytes(bytes: []const u8) error{Overflow}!Entry {
const inner = mem.bytesAsValue( const inner = mem.bytesAsValue(
Inner, Inner,
// Should use InnerSize instead of sizeOf, see
// https://github.com/ziglang/zig/issues/10958
bytes[0..@sizeOf(Inner)], bytes[0..@sizeOf(Inner)],
); );
const startBlob = InnerSize; const start_blob = @sizeOf(Inner);
const endBlob = startBlob + inner.blobLength(); const end_strings = start_blob + inner.stringLength();
const gids_offset = try compress.uvarint(bytes[end_strings..]);
const end_blob = end_strings + gids_offset.bytes_read;
const nextStart = pad.roundUp(usize, alignmentBits, endBlob); const nextStart = pad.roundUp(usize, alignmentBits, end_blob);
var next: ?[]const u8 = null; var next: ?[]const u8 = null;
if (nextStart < bytes.len) if (nextStart < bytes.len)
next = bytes[nextStart..]; next = bytes[nextStart..];
@ -176,7 +181,8 @@ fn packedUser(comptime ShellIndexType: type) type {
return Entry{ return Entry{
.user = Self{ .user = Self{
.inner = inner, .inner = inner,
.userdata = bytes[startBlob..endBlob], .bytes = bytes[start_blob..end_blob],
.additional_gids_offset = gids_offset.value,
}, },
.next = next, .next = next,
}; };
@ -186,9 +192,9 @@ fn packedUser(comptime ShellIndexType: type) type {
section: ?[]const u8, section: ?[]const u8,
shellIndex: Idx2ShellProto, shellIndex: Idx2ShellProto,
pub fn next(it: *Iterator) ?Self { pub fn next(it: *Iterator) error{Overflow}!?Self {
if (it.section) |section| { if (it.section) |section| {
const entry = Self.fromBytes(section); const entry = try Self.fromBytes(section);
it.section = entry.next; it.section = entry.next;
return entry.user; return entry.user;
} }
@ -203,13 +209,12 @@ fn packedUser(comptime ShellIndexType: type) type {
// packTo packs the User record and copies it to the given byte slice. // packTo packs the User record and copies it to the given byte slice.
// The slice must have at least maxRecordSize() bytes available. The // The slice must have at least maxRecordSize() bytes available. The
// slice is passed as a pointer, so it can be mutated. // slice is passed as a pointer, so it can be mutated.
const packErr = InvalidRecord || Allocator.Error;
pub fn packTo( pub fn packTo(
arr: *ArrayList(u8), arr: *ArrayList(u8),
user: User, user: User,
additional_gids_offset: u29, additional_gids_offset: usize,
idxFn: ShellIndexType, idxFn: ShellIndexType,
) packErr!void { ) error{ InvalidRecord, OutOfMemory }!void {
// function arguments are consts. We need to mutate the underlying // function arguments are consts. We need to mutate the underlying
// slice, so passing it via pointer instead. // slice, so passing it via pointer instead.
const home_len = try validate.downCast(u6, user.home.len - 1); const home_len = try validate.downCast(u6, user.home.len - 1);
@ -225,7 +230,6 @@ fn packedUser(comptime ShellIndexType: type) type {
const inner = Inner{ const inner = Inner{
.uid = user.uid, .uid = user.uid,
.gid = user.gid, .gid = user.gid,
.additional_gids_offset = additional_gids_offset,
.shell_here = idxFn.get(user.shell) == null, .shell_here = idxFn.get(user.shell) == null,
.shell_len_or_idx = idxFn.get(user.shell) orelse shell_len, .shell_len_or_idx = idxFn.get(user.shell) orelse shell_len,
.home_len = home_len, .home_len = home_len,
@ -235,9 +239,9 @@ fn packedUser(comptime ShellIndexType: type) type {
}; };
const innerBytes = mem.asBytes(&inner); const innerBytes = mem.asBytes(&inner);
// innerBytes.len is longer than InnerSize. We want to copy // innerBytes.len is longer than @sizeOf(Inner). We want to copy
// only the InnerSize-number of bytes. // only the @sizeOf(Inner)-number of bytes.
try arr.*.appendSlice(innerBytes[0..InnerSize]); try arr.*.appendSlice(innerBytes[0..@sizeOf(Inner)]);
try arr.*.appendSlice(user.home); try arr.*.appendSlice(user.home);
if (!inner.name_is_a_suffix) if (!inner.name_is_a_suffix)
@ -245,14 +249,15 @@ fn packedUser(comptime ShellIndexType: type) type {
try arr.*.appendSlice(user.gecos); try arr.*.appendSlice(user.gecos);
if (inner.shell_here) if (inner.shell_here)
try arr.*.appendSlice(user.shell); try arr.*.appendSlice(user.shell);
try compress.appendUvarint(arr, additional_gids_offset);
try pad.arrayList(arr, alignmentBits); try pad.arrayList(arr, alignmentBits);
} }
// maxSize is the maximum number of records a PackedUser can take // maxSize is the maximum number of records a PackedUser can take
// (struct + userdata). // (struct + strings).
pub fn maxSize() usize { pub fn maxSize() usize {
comptime { comptime {
const unpadded = InnerSize + const unpadded = @sizeOf(Inner) +
math.maxInt(u6) + 1 + // home math.maxInt(u6) + 1 + // home
math.maxInt(u5) + 1 + // name math.maxInt(u5) + 1 + // name
math.maxInt(u6) + 1 + // shell math.maxInt(u6) + 1 + // shell
@ -269,55 +274,43 @@ fn packedUser(comptime ShellIndexType: type) type {
return self.inner.gid; return self.inner.gid;
} }
pub fn additionalGidsOffset(self: Self) u29 { pub fn additionalGidsOffset(self: Self) u64 {
return self.inner.additional_gids_offset; return self.additional_gids_offset;
} }
pub fn home(self: Self) []const u8 { pub fn home(self: Self) []const u8 {
return self.userdata[0..self.inner.homeLen()]; return self.bytes[0..self.inner.homeLen()];
} }
pub fn name(self: Self) []const u8 { pub fn name(self: Self) []const u8 {
const name_pos = self.inner.nameStart(); const name_pos = self.inner.nameStart();
const name_len = self.inner.nameLen(); const name_len = self.inner.nameLen();
return self.userdata[name_pos .. name_pos + name_len]; return self.bytes[name_pos .. name_pos + name_len];
} }
pub fn gecos(self: Self) []const u8 { pub fn gecos(self: Self) []const u8 {
const gecos_pos = self.inner.gecosStart(); const gecos_pos = self.inner.gecosStart();
const gecos_len = self.inner.gecosLen(); const gecos_len = self.inner.gecosLen();
return self.userdata[gecos_pos .. gecos_pos + gecos_len]; return self.bytes[gecos_pos .. gecos_pos + gecos_len];
} }
pub fn shell(self: Self, idxFn: Idx2ShellProto) []const u8 { pub fn shell(self: Self, idxFn: Idx2ShellProto) []const u8 {
if (self.inner.shell_here) { if (self.inner.shell_here) {
const shell_pos = self.inner.maybeShellStart(); const shell_pos = self.inner.maybeShellStart();
const shell_len = self.inner.shellLen(); const shell_len = self.inner.shellLen();
return self.userdata[shell_pos .. shell_pos + shell_len]; return self.bytes[shell_pos .. shell_pos + shell_len];
} }
return idxFn(self.inner.shell_len_or_idx); return idxFn(self.inner.shell_len_or_idx);
} }
// mutable only: this function will refuse to compile otherwise.
pub fn setAdditionalGidsOffset(self: Self, new: u29) void {
self.inner.additional_gids_offset = new;
}
}; };
} }
const testing = std.testing; const testing = std.testing;
test "PackedUser internal and external alignment" { test "PackedUser internal and external alignment" {
// External padding (alignmentBits) must be higher or equal to
// the "internal" PackedUser alignment. By aligning PackedUser we are also
// working around https://github.com/ziglang/zig/issues/10958 ; PackedUser
// cannot be converted from/to [@bitSizeOf(PackedUser)/8]u8;
// asBytes/bytesAsValue use @sizeOf, which is larger. Now we are putting no
// more than 1, but it probably could be higher.
try testing.expectEqual( try testing.expectEqual(
8, @sizeOf(PackedUserHash.Inner) * 8,
@sizeOf(PackedUserHash.Inner) * 8 - @bitSizeOf(PackedUserHash.Inner),
@bitSizeOf(PackedUserHash.Inner),
); );
} }
@ -376,15 +369,15 @@ test "construct PackedUser section" {
.shell = "/", .shell = "/",
} }; } };
for (users) |user| for (users) |user|
try PackedUserTest.packTo(&buf, user, math.maxInt(u29), TestShellIndex{}); try PackedUserTest.packTo(&buf, user, math.maxInt(u64), TestShellIndex{});
var i: u29 = 0; var i: u29 = 0;
var it1 = PackedUserTest.iterator(buf.items, testShell); var it1 = PackedUserTest.iterator(buf.items, testShell);
while (it1.next()) |user| : (i += 1) { while (try it1.next()) |user| : (i += 1) {
try testing.expectEqual(users[i].uid, user.uid()); try testing.expectEqual(users[i].uid, user.uid());
try testing.expectEqual(users[i].gid, user.gid()); try testing.expectEqual(users[i].gid, user.gid());
try testing.expectEqual( try testing.expectEqual(
@as(u29, math.maxInt(u29)), @as(u64, math.maxInt(u64)),
user.additionalGidsOffset(), user.additionalGidsOffset(),
); );
try testing.expectEqualStrings(users[i].name, user.name()); try testing.expectEqualStrings(users[i].name, user.name());