2022-02-15 10:49:03 +02:00
|
|
|
const std = @import("std");
|
|
|
|
const Allocator = std.mem.Allocator;
|
|
|
|
const PriorityDequeue = std.PriorityDequeue;
|
|
|
|
const StringArrayHashMap = std.StringArrayHashMap;
|
|
|
|
const StringHashMap = std.StringHashMap;
|
|
|
|
const BoundedArray = std.BoundedArray;
|
2022-02-17 11:04:32 +02:00
|
|
|
const StringContext = std.hash_map.StringContext;
|
2022-02-15 10:49:03 +02:00
|
|
|
|
2022-02-17 06:38:54 +02:00
|
|
|
// MaxShells is the maximum number of "popular" shells.
|
2022-02-18 07:42:43 +02:00
|
|
|
pub const MaxShells = 63;
|
|
|
|
pub const MaxShellLen = 64;
|
2022-02-17 06:38:54 +02:00
|
|
|
|
2022-02-15 10:49:03 +02:00
|
|
|
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
|
|
|
|
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
|
|
|
|
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
|
|
|
|
// to 4 bytes.
|
2022-02-17 06:38:54 +02:00
|
|
|
// The actual shell length is len+1: we don't allow empty shells, and the real
|
|
|
|
// length of the shell is 1-64 bytes.
|
2022-02-15 10:49:03 +02:00
|
|
|
const ShellIndex = struct {
|
|
|
|
offset: u10,
|
|
|
|
len: u6,
|
|
|
|
};
|
|
|
|
|
2022-02-17 06:38:54 +02:00
|
|
|
// ShellReader interprets "Shell Index" and "Shell Blob" sections.
|
2022-02-18 07:42:43 +02:00
|
|
|
pub const ShellReader = struct {
|
2022-02-17 06:38:54 +02:00
|
|
|
sectionIndex: []const ShellIndex,
|
|
|
|
sectionBlob: []const u8,
|
|
|
|
|
|
|
|
pub fn init(index: []const u8, blob: []const u8) ShellReader {
|
|
|
|
return ShellReader{
|
|
|
|
.sectionIndex = @bitCast([]const ShellIndex, index),
|
|
|
|
.sectionBlob = blob,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
// get returns a shell at the given index.
|
2022-02-17 11:04:32 +02:00
|
|
|
pub fn get(self: *const ShellReader, idx: u10) []const u8 {
|
2022-02-17 06:38:54 +02:00
|
|
|
const shellIndex = self.sectionIndex[idx];
|
|
|
|
const start = shellIndex.offset << 2;
|
|
|
|
const end = start + shellIndex.len + 1;
|
|
|
|
return self.sectionBlob[start..end];
|
|
|
|
}
|
|
|
|
};
|
2022-02-15 10:49:03 +02:00
|
|
|
|
2022-02-17 06:38:54 +02:00
|
|
|
// ShellWriter is a shell popularity contest: collect shells and return the
|
2022-02-15 10:49:03 +02:00
|
|
|
// popular ones, sorted by score. score := len(shell) * number_of_shells.
|
2022-02-18 07:42:43 +02:00
|
|
|
pub const ShellWriter = struct {
|
2022-02-15 10:49:03 +02:00
|
|
|
counts: std.StringHashMap(u32),
|
|
|
|
allocator: Allocator,
|
2022-02-16 11:48:53 +02:00
|
|
|
const KV = struct { shell: []const u8, score: u32 };
|
2022-02-15 10:49:03 +02:00
|
|
|
|
|
|
|
const ShellSections = struct {
|
2022-02-16 11:21:51 +02:00
|
|
|
index: BoundedArray(ShellIndex, MaxShells),
|
|
|
|
blob: BoundedArray(u8, MaxShells * MaxShellLen),
|
2022-02-16 11:48:53 +02:00
|
|
|
indices: StringHashMap(u10),
|
|
|
|
|
|
|
|
// initializes and populates shell sections. All strings are copied,
|
|
|
|
// nothing is owned.
|
|
|
|
pub fn init(
|
|
|
|
allocator: Allocator,
|
|
|
|
shells: BoundedArray([]const u8, MaxShells),
|
|
|
|
) !ShellSections {
|
2022-02-16 11:21:51 +02:00
|
|
|
var self = ShellSections{
|
|
|
|
.index = try BoundedArray(ShellIndex, MaxShells).init(shells.len),
|
|
|
|
.blob = try BoundedArray(u8, MaxShells * MaxShellLen).init(0),
|
2022-02-16 11:48:53 +02:00
|
|
|
.indices = StringHashMap(u10).init(allocator),
|
2022-02-16 11:21:51 +02:00
|
|
|
};
|
2022-02-16 11:55:13 +02:00
|
|
|
var fullOffset: u12 = 0;
|
2022-02-16 11:21:51 +02:00
|
|
|
var idx: u10 = 0;
|
|
|
|
while (idx < shells.len) {
|
|
|
|
const len = @intCast(u6, shells.get(idx).len);
|
|
|
|
try self.blob.appendSlice(shells.get(idx));
|
2022-02-16 11:55:13 +02:00
|
|
|
const ourShell = self.blob.constSlice()[fullOffset .. fullOffset + len];
|
2022-02-16 11:48:53 +02:00
|
|
|
try self.indices.put(ourShell, idx);
|
2022-02-16 11:21:51 +02:00
|
|
|
self.index.set(idx, ShellIndex{
|
2022-02-16 11:55:13 +02:00
|
|
|
.offset = @intCast(u10, fullOffset >> 2),
|
2022-02-17 06:38:54 +02:00
|
|
|
.len = len - 1,
|
2022-02-16 11:21:51 +02:00
|
|
|
});
|
|
|
|
|
2022-02-17 06:38:54 +02:00
|
|
|
fullOffset += len;
|
|
|
|
const padding = roundUp4Padding(fullOffset);
|
|
|
|
fullOffset += padding;
|
|
|
|
//const stderr = std.io.getStdErr().writer();
|
|
|
|
//try stderr.print("\n", .{});
|
2022-02-16 11:21:51 +02:00
|
|
|
try self.blob.appendNTimes(0, padding);
|
|
|
|
idx += 1;
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
2022-02-17 11:04:32 +02:00
|
|
|
pub fn sectionIndex(self: *const ShellSections) []const u8 {
|
2022-02-17 06:38:54 +02:00
|
|
|
return @bitCast([]const u8, self.index.constSlice());
|
|
|
|
}
|
|
|
|
|
2022-02-17 11:04:32 +02:00
|
|
|
pub fn sectionBlob(self: *const ShellSections) []const u8 {
|
2022-02-17 06:38:54 +02:00
|
|
|
return self.blob.constSlice();
|
|
|
|
}
|
|
|
|
|
2022-02-16 11:21:51 +02:00
|
|
|
pub fn deinit(self: *ShellSections) void {
|
2022-02-16 11:48:53 +02:00
|
|
|
self.indices.deinit();
|
2022-02-16 11:21:51 +02:00
|
|
|
self.* = undefined;
|
2022-02-15 10:49:03 +02:00
|
|
|
}
|
|
|
|
|
2022-02-17 11:04:32 +02:00
|
|
|
pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u10 {
|
2022-02-16 11:48:53 +02:00
|
|
|
return self.indices.get(shell);
|
2022-02-15 10:49:03 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2022-02-17 06:38:54 +02:00
|
|
|
pub fn init(allocator: Allocator) ShellWriter {
|
|
|
|
return ShellWriter{
|
2022-02-15 10:49:03 +02:00
|
|
|
.counts = std.StringHashMap(u32).init(allocator),
|
|
|
|
.allocator = allocator,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-02-17 06:38:54 +02:00
|
|
|
pub fn deinit(self: *ShellWriter) void {
|
2022-02-15 10:49:03 +02:00
|
|
|
var it = self.counts.keyIterator();
|
|
|
|
while (it.next()) |key_ptr| {
|
|
|
|
self.counts.allocator.free(key_ptr.*);
|
|
|
|
}
|
|
|
|
self.counts.deinit();
|
|
|
|
self.* = undefined;
|
|
|
|
}
|
|
|
|
|
2022-02-17 06:38:54 +02:00
|
|
|
pub fn put(self: *ShellWriter, shell: []const u8) !void {
|
2022-02-17 11:04:32 +02:00
|
|
|
const res = try self.counts.getOrPutAdapted(shell, self.counts.ctx);
|
|
|
|
|
|
|
|
if (res.found_existing) {
|
|
|
|
res.value_ptr.* += 1;
|
2022-02-15 10:49:03 +02:00
|
|
|
} else {
|
2022-02-17 11:04:32 +02:00
|
|
|
// TODO(motiejus): can we avoid `ourShell` variable here?
|
|
|
|
const ourShell = try self.allocator.alloc(u8, shell.len);
|
2022-02-15 10:49:03 +02:00
|
|
|
std.mem.copy(u8, ourShell, shell);
|
2022-02-17 11:04:32 +02:00
|
|
|
res.key_ptr.* = ourShell;
|
|
|
|
res.value_ptr.* = 1;
|
2022-02-15 10:49:03 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn cmpShells(context: void, a: KV, b: KV) std.math.Order {
|
|
|
|
_ = context;
|
|
|
|
return std.math.order(a.score, b.score);
|
|
|
|
}
|
|
|
|
|
2022-02-16 11:21:51 +02:00
|
|
|
// toOwnedSections returns the analyzed ShellSections. Resets the shell
|
2022-02-17 06:38:54 +02:00
|
|
|
// popularity contest. ShellSections memory is allocated by the ShellWriter
|
2022-02-16 11:21:51 +02:00
|
|
|
// allocator, and must be deInit'ed by the caller.
|
2022-02-17 06:38:54 +02:00
|
|
|
pub fn toOwnedSections(self: *ShellWriter, limit: u10) !ShellSections {
|
2022-02-15 10:49:03 +02:00
|
|
|
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
|
|
|
|
defer deque.deinit();
|
|
|
|
|
|
|
|
var it = self.counts.iterator();
|
|
|
|
while (it.next()) |entry| {
|
|
|
|
if (entry.value_ptr.* == 1) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
const score = @truncate(u32, entry.key_ptr.*.len) * entry.value_ptr.*;
|
|
|
|
try deque.add(KV{ .shell = entry.key_ptr.*, .score = score });
|
|
|
|
}
|
|
|
|
|
|
|
|
const total = std.math.min(deque.count(), limit);
|
2022-02-16 11:21:51 +02:00
|
|
|
var topShells = try BoundedArray([]const u8, MaxShells).init(total);
|
2022-02-15 10:49:03 +02:00
|
|
|
|
|
|
|
var i: u32 = 0;
|
|
|
|
while (i < total) {
|
2022-02-17 11:04:32 +02:00
|
|
|
const elem = deque.removeMax().shell;
|
2022-02-16 11:21:51 +02:00
|
|
|
topShells.set(i, elem);
|
2022-02-15 10:49:03 +02:00
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
|
2022-02-16 11:21:51 +02:00
|
|
|
const result = ShellSections.init(self.allocator, topShells);
|
|
|
|
const allocator = self.allocator;
|
2022-02-16 11:32:27 +02:00
|
|
|
self.deinit();
|
2022-02-16 11:21:51 +02:00
|
|
|
self.* = init(allocator);
|
|
|
|
return result;
|
2022-02-15 10:49:03 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2022-02-17 06:38:54 +02:00
|
|
|
// rounds up a u12 to the nearest factor of 4 and returns the difference
|
|
|
|
// (padding)
|
|
|
|
inline fn roundUp4Padding(n: u12) u12 {
|
|
|
|
return ((n + 3) & ~@intCast(u12, 3)) - n;
|
|
|
|
}
|
|
|
|
|
2022-02-17 11:16:30 +02:00
|
|
|
const testing = std.testing;
|
|
|
|
|
2022-02-16 11:21:51 +02:00
|
|
|
test "basic shellpopcon" {
|
2022-02-17 06:38:54 +02:00
|
|
|
var popcon = ShellWriter.init(testing.allocator);
|
2022-02-15 10:49:03 +02:00
|
|
|
defer popcon.deinit();
|
|
|
|
|
2022-02-16 11:21:51 +02:00
|
|
|
const bash = "/bin/bash"; // 9 chars
|
|
|
|
const zsh = "/bin/zsh"; // 8 chars
|
|
|
|
const nobody = "/bin/nobody"; // only 1 instance, ought to ignore
|
|
|
|
const long = "/bin/very-long-shell-name-ought-to-be-first";
|
|
|
|
const input = [_][]const u8{
|
|
|
|
zsh, zsh, zsh, zsh, // zsh score 8*4=32
|
|
|
|
bash, bash, bash, nobody, // bash score 3*9=27
|
|
|
|
long, long, // long score 2*42=84
|
|
|
|
};
|
|
|
|
|
|
|
|
for (input) |shell| {
|
|
|
|
try popcon.put(shell);
|
|
|
|
}
|
|
|
|
|
|
|
|
var sections = try popcon.toOwnedSections(MaxShells);
|
|
|
|
defer sections.deinit();
|
|
|
|
try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify
|
|
|
|
|
2022-02-16 11:48:53 +02:00
|
|
|
try testing.expectEqual(sections.getIndex(long).?, 0);
|
|
|
|
try testing.expectEqual(sections.getIndex(zsh).?, 1);
|
|
|
|
try testing.expectEqual(sections.getIndex(bash).?, 2);
|
|
|
|
try testing.expectEqual(sections.getIndex(nobody), null);
|
|
|
|
|
2022-02-17 11:04:32 +02:00
|
|
|
const shellReader = ShellReader.init(
|
2022-02-17 06:38:54 +02:00
|
|
|
sections.sectionIndex(),
|
|
|
|
sections.sectionBlob(),
|
|
|
|
);
|
|
|
|
try testing.expectEqualStrings(shellReader.get(0), long);
|
|
|
|
try testing.expectEqualStrings(shellReader.get(1), zsh);
|
|
|
|
try testing.expectEqualStrings(shellReader.get(2), bash);
|
|
|
|
}
|
2022-02-16 11:48:53 +02:00
|
|
|
|
2022-02-17 06:38:54 +02:00
|
|
|
test "padding" {
|
|
|
|
try testing.expectEqual(roundUp4Padding(@intCast(u12, 0)), 0);
|
|
|
|
try testing.expectEqual(roundUp4Padding(@intCast(u12, 1)), 3);
|
|
|
|
try testing.expectEqual(roundUp4Padding(@intCast(u12, 2)), 2);
|
|
|
|
try testing.expectEqual(roundUp4Padding(@intCast(u12, 3)), 1);
|
|
|
|
try testing.expectEqual(roundUp4Padding(@intCast(u12, 4)), 0);
|
|
|
|
try testing.expectEqual(roundUp4Padding(@intCast(u12, 40)), 0);
|
|
|
|
try testing.expectEqual(roundUp4Padding(@intCast(u12, 41)), 3);
|
|
|
|
try testing.expectEqual(roundUp4Padding(@intCast(u12, 42)), 2);
|
|
|
|
try testing.expectEqual(roundUp4Padding(@intCast(u12, 43)), 1);
|
|
|
|
try testing.expectEqual(roundUp4Padding(@intCast(u12, 44)), 0);
|
|
|
|
try testing.expectEqual(roundUp4Padding(@intCast(u12, 4091)), 1);
|
|
|
|
try testing.expectEqual(roundUp4Padding(@intCast(u12, 4092)), 0);
|
2022-02-15 10:49:03 +02:00
|
|
|
}
|