Fork 0

220 lines
7.7 KiB
Raw Normal View History

2022-02-15 10:49:03 +02:00
const std = @import("std");
2022-02-19 18:18:14 +02:00
const pad = @import("padding.zig");
2022-02-15 10:49:03 +02:00
const Allocator = std.mem.Allocator;
const PriorityDequeue = std.PriorityDequeue;
const StringArrayHashMap = std.StringArrayHashMap;
const StringHashMap = std.StringHashMap;
const BoundedArray = std.BoundedArray;
2022-02-17 11:04:32 +02:00
const StringContext = std.hash_map.StringContext;
2022-02-15 10:49:03 +02:00
2022-03-03 18:05:46 +02:00
// maxShells is the maximum number of "popular" shells.
pub const max_shells = 63;
pub const max_shell_len = 64;
2022-02-20 09:10:25 +02:00
const ShellAlignment = 2; // bits
2022-02-17 06:38:54 +02:00
2022-03-03 18:05:46 +02:00
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
// to 4 bytes.
// The actual shell length is len+1: we don't allow empty shells, and the real
// length of the shell is 1-64 bytes.
const ShellIndex = packed struct {
offset: u10,
len: u6,
2022-02-17 06:38:54 +02:00
// ShellReader interprets "Shell Index" and "Shell Blob" sections.
2022-02-18 07:42:43 +02:00
pub const ShellReader = struct {
2022-02-17 06:38:54 +02:00
sectionIndex: []const ShellIndex,
sectionBlob: []const u8,
pub fn init(index: []const u8, blob: []const u8) ShellReader {
return ShellReader{
2022-02-19 15:48:51 +02:00
.sectionIndex = std.mem.bytesAsSlice(ShellIndex, index),
2022-02-17 06:38:54 +02:00
.sectionBlob = blob,
// get returns a shell at the given index.
2022-02-20 13:17:05 +02:00
pub fn get(self: *const ShellReader, idx: u6) []const u8 {
2022-02-17 06:38:54 +02:00
const shellIndex = self.sectionIndex[idx];
const start = shellIndex.offset << 2;
const end = start + shellIndex.len + 1;
return self.sectionBlob[start..end];
2022-02-15 10:49:03 +02:00
2022-02-17 06:38:54 +02:00
// ShellWriter is a shell popularity contest: collect shells and return the
2022-02-15 10:49:03 +02:00
// popular ones, sorted by score. score := len(shell) * number_of_shells.
2022-02-18 07:42:43 +02:00
pub const ShellWriter = struct {
2022-02-15 10:49:03 +02:00
counts: std.StringHashMap(u32),
allocator: Allocator,
2022-02-19 15:50:13 +02:00
const KV = struct {
shell: []const u8,
score: u64,
2022-02-15 10:49:03 +02:00
const ShellSections = struct {
2022-03-03 18:05:46 +02:00
index: BoundedArray(ShellIndex, max_shells),
blob: BoundedArray(u8, max_shells * max_shell_len),
2022-02-19 11:35:29 +02:00
indices: StringHashMap(u6),
2022-02-16 11:48:53 +02:00
// initializes and populates shell sections. All strings are copied,
// nothing is owned.
2022-03-03 18:05:46 +02:00
pub const initErr = Allocator.Error || error{Overflow};
2022-02-16 11:48:53 +02:00
pub fn init(
allocator: Allocator,
2022-03-03 18:05:46 +02:00
shells: BoundedArray([]const u8, max_shells),
) initErr!ShellSections {
2022-02-16 11:21:51 +02:00
var self = ShellSections{
2022-03-03 18:05:46 +02:00
.index = try BoundedArray(ShellIndex, max_shells).init(shells.len),
.blob = try BoundedArray(u8, max_shells * max_shell_len).init(0),
2022-02-19 11:35:29 +02:00
.indices = StringHashMap(u6).init(allocator),
2022-02-16 11:21:51 +02:00
2022-02-16 11:55:13 +02:00
var fullOffset: u12 = 0;
2022-02-19 11:35:29 +02:00
var idx: u6 = 0;
2022-02-20 09:47:47 +02:00
while (idx < shells.len) : (idx += 1) {
2022-02-16 11:21:51 +02:00
const len = @intCast(u6, shells.get(idx).len);
try self.blob.appendSlice(shells.get(idx));
2022-02-16 11:55:13 +02:00
const ourShell = self.blob.constSlice()[fullOffset .. fullOffset + len];
2022-02-16 11:48:53 +02:00
try self.indices.put(ourShell, idx);
2022-02-16 11:21:51 +02:00
self.index.set(idx, ShellIndex{
2022-02-16 11:55:13 +02:00
.offset = @intCast(u10, fullOffset >> 2),
2022-02-17 06:38:54 +02:00
.len = len - 1,
2022-02-16 11:21:51 +02:00
2022-02-17 06:38:54 +02:00
fullOffset += len;
2022-02-20 09:10:25 +02:00
const padding = pad.roundUpPadding(u12, ShellAlignment, fullOffset);
2022-02-17 06:38:54 +02:00
fullOffset += padding;
2022-02-16 11:21:51 +02:00
try self.blob.appendNTimes(0, padding);
return self;
2022-02-17 11:04:32 +02:00
pub fn sectionIndex(self: *const ShellSections) []const u8 {
2022-02-19 15:48:51 +02:00
return std.mem.sliceAsBytes(self.index.constSlice());
2022-02-17 06:38:54 +02:00
2022-02-17 11:04:32 +02:00
pub fn sectionBlob(self: *const ShellSections) []const u8 {
2022-02-17 06:38:54 +02:00
return self.blob.constSlice();
2022-02-16 11:21:51 +02:00
pub fn deinit(self: *ShellSections) void {
2022-02-16 11:48:53 +02:00
2022-02-16 11:21:51 +02:00
self.* = undefined;
2022-02-15 10:49:03 +02:00
2022-02-19 11:35:29 +02:00
pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u6 {
2022-02-16 11:48:53 +02:00
return self.indices.get(shell);
2022-02-15 10:49:03 +02:00
2022-02-17 06:38:54 +02:00
pub fn init(allocator: Allocator) ShellWriter {
return ShellWriter{
2022-02-15 10:49:03 +02:00
.counts = std.StringHashMap(u32).init(allocator),
.allocator = allocator,
2022-02-17 06:38:54 +02:00
pub fn deinit(self: *ShellWriter) void {
2022-02-15 10:49:03 +02:00
var it = self.counts.keyIterator();
while (it.next()) |key_ptr| {
self.* = undefined;
2022-02-17 06:38:54 +02:00
pub fn put(self: *ShellWriter, shell: []const u8) !void {
2022-02-17 11:04:32 +02:00
const res = try self.counts.getOrPutAdapted(shell, self.counts.ctx);
if (res.found_existing) {
res.value_ptr.* += 1;
2022-02-15 10:49:03 +02:00
} else {
2022-03-02 11:05:20 +02:00
res.key_ptr.* = try self.allocator.dupe(u8, shell);
2022-02-17 11:04:32 +02:00
res.value_ptr.* = 1;
2022-02-15 10:49:03 +02:00
fn cmpShells(context: void, a: KV, b: KV) std.math.Order {
_ = context;
return std.math.order(a.score, b.score);
2022-02-16 11:21:51 +02:00
// toOwnedSections returns the analyzed ShellSections. Resets the shell
2022-02-17 06:38:54 +02:00
// popularity contest. ShellSections memory is allocated by the ShellWriter
2022-02-16 11:21:51 +02:00
// allocator, and must be deInit'ed by the caller.
2022-03-03 18:05:46 +02:00
const toOwnedSectionsErr = Allocator.Error || error{Overflow};
pub fn toOwnedSections(self: *ShellWriter, limit: u10) toOwnedSectionsErr!ShellSections {
2022-02-15 10:49:03 +02:00
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
defer deque.deinit();
var it = self.counts.iterator();
while (it.next()) |entry| {
2022-03-02 06:18:19 +02:00
if (entry.value_ptr.* == 1)
2022-02-15 10:49:03 +02:00
2022-02-19 15:50:13 +02:00
const score = entry.key_ptr.*.len * entry.value_ptr.*;
2022-02-15 10:49:03 +02:00
try deque.add(KV{ .shell = entry.key_ptr.*, .score = score });
const total = std.math.min(deque.count(), limit);
2022-03-03 18:05:46 +02:00
var topShells = try BoundedArray([]const u8, max_shells).init(total);
2022-02-15 10:49:03 +02:00
var i: u32 = 0;
2022-02-20 09:47:47 +02:00
while (i < total) : (i += 1) {
2022-02-17 11:04:32 +02:00
const elem = deque.removeMax().shell;
2022-02-16 11:21:51 +02:00
topShells.set(i, elem);
2022-02-15 10:49:03 +02:00
2022-02-16 11:21:51 +02:00
const result = ShellSections.init(self.allocator, topShells);
const allocator = self.allocator;
2022-02-16 11:32:27 +02:00
2022-02-16 11:21:51 +02:00
self.* = init(allocator);
return result;
2022-02-15 10:49:03 +02:00
2022-02-17 11:16:30 +02:00
const testing = std.testing;
2022-02-16 11:21:51 +02:00
test "basic shellpopcon" {
2022-02-17 06:38:54 +02:00
var popcon = ShellWriter.init(testing.allocator);
2022-02-15 10:49:03 +02:00
defer popcon.deinit();
2022-02-16 11:21:51 +02:00
const bash = "/bin/bash"; // 9 chars
const zsh = "/bin/zsh"; // 8 chars
2022-02-19 16:04:13 +02:00
const long = "/bin/very-long-shell-name-ought-to-be-first"; // 43 chars
2022-02-16 11:21:51 +02:00
const nobody = "/bin/nobody"; // only 1 instance, ought to ignore
const input = [_][]const u8{
zsh, zsh, zsh, zsh, // zsh score 8*4=32
bash, bash, bash, nobody, // bash score 3*9=27
2022-02-19 16:04:13 +02:00
long, long, // long score 2*43=86
2022-02-16 11:21:51 +02:00
for (input) |shell| {
try popcon.put(shell);
2022-03-03 18:05:46 +02:00
var sections = try popcon.toOwnedSections(max_shells);
2022-02-16 11:21:51 +02:00
defer sections.deinit();
try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify
2022-02-16 11:48:53 +02:00
try testing.expectEqual(sections.getIndex(long).?, 0);
try testing.expectEqual(sections.getIndex(zsh).?, 1);
try testing.expectEqual(sections.getIndex(bash).?, 2);
try testing.expectEqual(sections.getIndex(nobody), null);
2022-02-19 16:04:13 +02:00
try testing.expectEqual(
2022-02-19 21:23:33 +02:00
pad.roundUp(u12, 2, bash.len) + pad.roundUp(u12, 2, zsh.len) + pad.roundUp(u12, 2, long.len),
2022-02-19 16:04:13 +02:00
2022-02-16 11:48:53 +02:00
2022-02-17 11:04:32 +02:00
const shellReader = ShellReader.init(
2022-02-17 06:38:54 +02:00
try testing.expectEqualStrings(shellReader.get(0), long);
try testing.expectEqualStrings(shellReader.get(1), zsh);
try testing.expectEqualStrings(shellReader.get(2), bash);
2022-02-19 16:04:13 +02:00
try testing.expectEqual(shellReader.sectionIndex.len, 3);
2022-02-17 06:38:54 +02:00