rewrite shells

- Shell is up to 256 bytes long.
- Store up to 255 shells in the Shells area.
- Remove padding from the User struct.
This commit is contained in:
2022-03-17 16:50:41 +01:00
committed by Motiejus Jakštys
parent 85552c1302
commit 4e36d7850e
6 changed files with 81 additions and 134 deletions

View File

@@ -42,10 +42,11 @@ pub const PackedGroup = struct {
const Inner = packed struct {
gid: u32,
groupname_len: u8,
padding: u3 = 0,
groupname_len: u5,
pub fn groupnameLen(self: *const Inner) usize {
return self.groupname_len + 1;
return @as(usize, self.groupname_len) + 1;
}
};
@@ -120,13 +121,9 @@ pub const PackedGroup = struct {
group: GroupStored,
) packErr!void {
std.debug.assert(arr.items.len & 7 == 0);
const groupname_len = try validate.downCast(u5, group.name.len - 1);
try validate.utf8(group.name);
const inner = Inner{
.gid = group.gid,
.groupname_len = groupname_len,
};
const len = try validate.downCast(u5, group.name.len - 1);
const inner = Inner{ .gid = group.gid, .groupname_len = len };
try arr.*.appendSlice(mem.asBytes(&inner));
try arr.*.appendSlice(group.name);
try compress.appendUvarint(arr, group.members_offset);

View File

@@ -110,12 +110,6 @@ test "header pack, unpack and validation" {
try testing.expectError(error.InvalidBom, Header.init(header.asArray()));
}
{
var header = goodHeader;
header.num_shells = shell.max_shells + 1;
try testing.expectError(error.TooManyShells, Header.init(header.asArray()));
}
{
var header = goodHeader;
header.offset_bdz_uid2user = 65;

View File

@@ -234,7 +234,7 @@ pub fn usersSection(
&blob,
user,
gids.idx2offset[i],
shells.indices,
shells.shell2idx,
);
try pad.arrayList(&blob, userImport.PackedUser.alignment_bits);
}
@@ -439,6 +439,7 @@ pub const AllSections = struct {
var groups = try groupsSection(allocator, corpus, group_members.idx2offset);
errdefer groups.deinit(allocator);
// TODO: these indices must point to the *offsets*, not the indices in "users"
var idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids);
errdefer allocator.free(idx_gid2group);

View File

@@ -1,5 +1,4 @@
const std = @import("std");
const pad = @import("padding.zig");
const Allocator = std.mem.Allocator;
const PriorityDequeue = std.PriorityDequeue;
const StringArrayHashMap = std.StringArrayHashMap;
@@ -7,40 +6,24 @@ const StringHashMap = std.StringHashMap;
const BoundedArray = std.BoundedArray;
const StringContext = std.hash_map.StringContext;
// maxShells is the maximum number of "popular" shells.
pub const max_shells = 63;
pub const max_shell_len = 64;
pub const shell_alignment_bits = 2; // bits
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
// to 4 bytes.
// The actual shell length is len+1: we don't allow empty shells, and the real
// length of the shell is 1-64 bytes.
pub const ShellIndex = packed struct {
offset: u10,
len: u6,
};
pub const max_shells = 255;
pub const max_shell_len = 256;
// ShellReader interprets "Shell Index" and "Shell Blob" sections.
pub const ShellReader = struct {
section_index: []const ShellIndex,
section_blob: []const u8,
index: []const u16,
blob: []const u8,
pub fn init(index: []const u8, blob: []const u8) ShellReader {
pub fn init(index: []align(2) const u8, blob: []const u8) ShellReader {
return ShellReader{
.section_index = std.mem.bytesAsSlice(ShellIndex, index),
.section_blob = blob,
.index = std.mem.bytesAsSlice(u16, index),
.blob = blob,
};
}
// get returns a shell at the given index.
pub fn get(self: *const ShellReader, idx: u6) []const u8 {
const shell_index = self.section_index[idx];
const start = shell_index.offset << 2;
const end = start + shell_index.len + 1;
return self.section_blob[start..end];
pub fn get(self: *const ShellReader, idx: u8) []const u8 {
return self.blob[self.index[idx]..self.index[idx + 1]];
}
};
@@ -55,45 +38,42 @@ pub const ShellWriter = struct {
};
pub const ShellSections = struct {
index: BoundedArray(ShellIndex, max_shells),
blob: BoundedArray(u8, max_shells * max_shell_len),
indices: StringHashMap(u6),
// index points the i'th shell to it's offset in blob. The last
// byte of the i'th shell is index[i+1].
index: BoundedArray(u16, max_shells),
// blob contains `index.len+1` number of records. The last record is
// pointing to the end of the blob, so length of the last shell can be
// calculated from the index array.
blob: BoundedArray(u8, (max_shells + 1) * max_shell_len),
// shell2idx helps translate a shell (string) to it's index.
shell2idx: StringHashMap(u8),
// initializes and populates shell sections. All strings are copied,
// nothing is owned.
pub const initErr = Allocator.Error || error{Overflow};
pub fn init(
allocator: Allocator,
shells: BoundedArray([]const u8, max_shells),
) initErr!ShellSections {
) error{ Overflow, OutOfMemory }!ShellSections {
var self = ShellSections{
.index = try BoundedArray(ShellIndex, max_shells).init(shells.len),
.blob = try BoundedArray(u8, max_shells * max_shell_len).init(0),
.indices = StringHashMap(u6).init(allocator),
.index = try BoundedArray(u16, max_shells).init(shells.len),
.blob = try BoundedArray(u8, (max_shells + 1) * max_shell_len).init(0),
.shell2idx = StringHashMap(u8).init(allocator),
};
errdefer self.indices.deinit();
var full_offset: u12 = 0;
var idx: u6 = 0;
while (idx < shells.len) : (idx += 1) {
const len = try std.math.cast(u6, shells.get(idx).len);
try self.blob.appendSlice(shells.get(idx));
const our_shell = self.blob.constSlice()[full_offset .. full_offset + len];
try self.indices.put(our_shell, idx);
std.debug.assert(full_offset & 3 == 0);
self.index.set(idx, ShellIndex{
.offset = try std.math.cast(u10, full_offset >> 2),
.len = len - 1,
});
if (shells.len == 0) return self;
full_offset += len;
const padding = pad.roundUpPadding(u12, shell_alignment_bits, full_offset);
full_offset += padding;
try self.blob.appendNTimes(0, padding);
errdefer self.shell2idx.deinit();
for (shells.constSlice()) |shell, idx| {
const idx8 = @intCast(u8, idx);
const offset = @intCast(u16, self.blob.len);
try self.blob.appendSlice(shell);
try self.shell2idx.put(self.blob.constSlice()[offset..], idx8);
self.index.set(idx8, offset);
}
try self.index.append(@intCast(u8, self.blob.len));
return self;
}
pub fn section_index(self: *const ShellSections) []const u8 {
pub fn section_index(self: *const ShellSections) []align(2) const u8 {
return std.mem.sliceAsBytes(self.index.constSlice());
}
@@ -102,12 +82,12 @@ pub const ShellWriter = struct {
}
pub fn deinit(self: *ShellSections) void {
self.indices.deinit();
self.shell2idx.deinit();
self.* = undefined;
}
pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u6 {
return self.indices.get(shell);
pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u8 {
return self.shell2idx.get(shell);
}
};
@@ -143,8 +123,10 @@ pub const ShellWriter = struct {
// toOwnedSections returns the analyzed ShellSections. Resets the shell
// popularity contest. ShellSections memory is allocated by the ShellWriter
// allocator, and must be deInit'ed by the caller.
const toOwnedSectionsErr = Allocator.Error || error{Overflow};
pub fn toOwnedSections(self: *ShellWriter, limit: u10) toOwnedSectionsErr!ShellSections {
pub fn toOwnedSections(
self: *ShellWriter,
limit: u10,
) error{ Overflow, OutOfMemory }!ShellSections {
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
defer deque.deinit();
@@ -164,9 +146,8 @@ pub const ShellWriter = struct {
topShells.set(i, deque.removeMax().shell);
const result = ShellSections.init(self.allocator, topShells);
const allocator = self.allocator;
self.deinit();
self.* = init(allocator);
self.* = init(self.allocator);
return result;
}
};
@@ -192,16 +173,13 @@ test "basic shellpopcon" {
var sections = try popcon.toOwnedSections(max_shells);
defer sections.deinit();
try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify
try testing.expectEqual(sections.index.len, 4); // all but "nobody" qualify
try testing.expectEqual(sections.getIndex(long).?, 0);
try testing.expectEqual(sections.getIndex(zsh).?, 1);
try testing.expectEqual(sections.getIndex(bash).?, 2);
try testing.expectEqual(sections.getIndex(nobody), null);
try testing.expectEqual(
sections.section_blob().len,
pad.roundUp(u12, 2, bash.len) + pad.roundUp(u12, 2, zsh.len) + pad.roundUp(u12, 2, long.len),
);
try testing.expectEqual(sections.section_blob().len, bash.len + zsh.len + long.len);
const shellReader = ShellReader.init(
sections.section_index(),
@@ -211,5 +189,5 @@ test "basic shellpopcon" {
try testing.expectEqualStrings(shellReader.get(1), zsh);
try testing.expectEqualStrings(shellReader.get(2), bash);
try testing.expectEqual(shellReader.section_index.len, 3);
try testing.expectEqual(shellReader.index.len, 4);
}

View File

@@ -13,10 +13,6 @@ const Allocator = mem.Allocator;
const ArrayList = std.ArrayList;
const StringHashMap = std.StringHashMap;
// Idx2ShellProto is a function prototype that, given a shell's index (in
// global shell section), will return a shell string. Matches ShellReader.get.
const Idx2ShellProto = fn (u6) []const u8;
// User is a convenient public struct for record construction and
// serialization.
pub const User = struct {
@@ -65,21 +61,6 @@ pub const User = struct {
}
};
pub fn Shell2Index(T: type) type {
return struct {
const Self = @This();
data: T,
pub fn init(data: T) Self {
return Self{ .data = data };
}
pub fn get(self: *const Self, str: []const u8) ?u6 {
return self.data.get(str);
}
};
}
pub const PackedUser = struct {
const Self = @This();
@@ -88,8 +69,7 @@ pub const PackedUser = struct {
const Inner = packed struct {
uid: u32,
gid: u32,
padding: u2 = 0,
shell_len_or_idx: u6,
shell_len_or_idx: u8,
shell_here: bool,
name_is_a_suffix: bool,
home_len: u6,
@@ -204,14 +184,14 @@ pub const PackedUser = struct {
arr: *ArrayList(u8),
user: User,
additional_gids_offset: u64,
idxFn: StringHashMap(u6),
idxFn: StringHashMap(u8),
) error{ InvalidRecord, OutOfMemory }!void {
std.debug.assert(arr.items.len & 7 == 0);
// function arguments are consts. We need to mutate the underlying
// slice, so passing it via pointer instead.
const home_len = try validate.downCast(u6, user.home.len - 1);
const name_len = try validate.downCast(u5, user.name.len - 1);
const shell_len = try validate.downCast(u6, user.shell.len - 1);
const shell_len = try validate.downCast(u8, user.shell.len - 1);
const gecos_len = try validate.downCast(u8, user.gecos.len);
try validate.utf8(user.home);
@@ -289,19 +269,16 @@ test "PackedUser internal and external alignment" {
);
}
fn testShellIndex(allocator: Allocator) StringHashMap(u6) {
var result = StringHashMap(u6).init(allocator);
fn testShellIndex(allocator: Allocator) StringHashMap(u8) {
var result = StringHashMap(u8).init(allocator);
result.put("/bin/bash", 0) catch unreachable;
result.put("/bin/zsh", 1) catch unreachable;
return result;
}
const test_shell_reader = shellImport.ShellReader{
.section_blob = "/bin/bash.../bin/zsh",
.section_index = &[_]shellImport.ShellIndex{
shellImport.ShellIndex{ .offset = 0, .len = 9 - 1 },
shellImport.ShellIndex{ .offset = 12 >> 2, .len = 8 - 1 },
},
.blob = "/bin/bash/bin/zsh",
.index = &[_]u16{ 0, 9, 17 },
};
test "construct PackedUser section" {
@@ -328,7 +305,7 @@ test "construct PackedUser section" {
.name = "Name" ** 8,
.gecos = "Gecos" ** 51,
.home = "Home" ** 16,
.shell = "She.LllL" ** 8,
.shell = "She.LllL" ** 32,
}, User{
.uid = 1002,
.gid = 1002,