turbonss/lib/shell.zig

198 lines
7.0 KiB
Zig
Raw Normal View History

2022-02-15 10:49:03 +02:00
const std = @import("std");
const Allocator = std.mem.Allocator;
const PriorityDequeue = std.PriorityDequeue;
const StringHashMap = std.StringHashMap;
const BoundedArray = std.BoundedArray;
const assert = std.debug.assert;
2022-02-15 10:49:03 +02:00
pub const max_shells = 255;
pub const max_shell_len = 256;
2022-03-03 18:05:46 +02:00
2022-02-17 06:38:54 +02:00
// ShellReader interprets "Shell Index" and "Shell Blob" sections.
2022-02-18 07:42:43 +02:00
pub const ShellReader = struct {
index: []const u16,
blob: []const u8,
2022-02-17 06:38:54 +02:00
pub fn init(index: []align(2) const u8, blob: []const u8) ShellReader {
2022-02-17 06:38:54 +02:00
return ShellReader{
.index = std.mem.bytesAsSlice(u16, index),
.blob = blob,
2022-02-17 06:38:54 +02:00
};
}
// get returns a shell at the given index.
pub fn get(self: *const ShellReader, idx: u8) []const u8 {
return self.blob[self.index[idx]..self.index[idx + 1]];
2022-02-17 06:38:54 +02:00
}
};
2022-02-15 10:49:03 +02:00
2022-02-17 06:38:54 +02:00
// ShellWriter is a shell popularity contest: collect shells and return the
2022-02-15 10:49:03 +02:00
// popular ones, sorted by score. score := len(shell) * number_of_shells.
2022-02-18 07:42:43 +02:00
pub const ShellWriter = struct {
2022-02-15 10:49:03 +02:00
counts: std.StringHashMap(u32),
allocator: Allocator,
2022-02-19 15:50:13 +02:00
const KV = struct {
shell: []const u8,
score: u64,
};
2022-02-15 10:49:03 +02:00
2022-03-05 06:08:01 +02:00
pub const ShellSections = struct {
2022-03-18 19:22:35 +02:00
// len is the number of shells in this section.
len: u8,
// index points the i'th shell to it's offset in blob. The last
// byte of the i'th shell is index[i+1].
index: BoundedArray(u16, max_shells),
// blob contains `index.len+1` number of records. The last record is
// pointing to the end of the blob, so length of the last shell can be
// calculated from the index array.
blob: BoundedArray(u8, (max_shells + 1) * max_shell_len),
// shell2idx helps translate a shell (string) to it's index.
shell2idx: StringHashMap(u8),
2022-02-16 11:48:53 +02:00
// initializes and populates shell sections. All strings are copied,
// nothing is owned.
pub fn init(
allocator: Allocator,
2022-03-03 18:05:46 +02:00
shells: BoundedArray([]const u8, max_shells),
) error{OutOfMemory}!ShellSections {
assert(shells.len <= max_shells);
2022-02-16 11:21:51 +02:00
var self = ShellSections{
2022-03-18 19:22:35 +02:00
.len = @intCast(u8, shells.len),
.index = BoundedArray(u16, max_shells).init(shells.len) catch unreachable,
.blob = BoundedArray(u8, (max_shells + 1) * max_shell_len).init(0) catch unreachable,
.shell2idx = StringHashMap(u8).init(allocator),
2022-02-16 11:21:51 +02:00
};
if (shells.len == 0) return self;
errdefer self.shell2idx.deinit();
for (shells.constSlice()) |shell, idx| {
const idx8 = @intCast(u8, idx);
const offset = @intCast(u16, self.blob.len);
self.blob.appendSliceAssumeCapacity(shell);
try self.shell2idx.put(self.blob.constSlice()[offset..], idx8);
self.index.set(idx8, offset);
2022-02-16 11:21:51 +02:00
}
self.index.appendAssumeCapacity(@intCast(u8, self.blob.len));
2022-02-16 11:21:51 +02:00
return self;
}
pub fn section_index(self: *const ShellSections) []align(2) const u8 {
2022-02-19 15:48:51 +02:00
return std.mem.sliceAsBytes(self.index.constSlice());
2022-02-17 06:38:54 +02:00
}
pub fn section_blob(self: *const ShellSections) []const u8 {
2022-02-17 06:38:54 +02:00
return self.blob.constSlice();
}
2022-02-16 11:21:51 +02:00
pub fn deinit(self: *ShellSections) void {
self.shell2idx.deinit();
2022-02-16 11:21:51 +02:00
self.* = undefined;
2022-02-15 10:49:03 +02:00
}
pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u8 {
return self.shell2idx.get(shell);
2022-02-15 10:49:03 +02:00
}
};
2022-02-17 06:38:54 +02:00
pub fn init(allocator: Allocator) ShellWriter {
return ShellWriter{
2022-02-15 10:49:03 +02:00
.counts = std.StringHashMap(u32).init(allocator),
.allocator = allocator,
};
}
2022-02-17 06:38:54 +02:00
pub fn deinit(self: *ShellWriter) void {
2022-02-15 10:49:03 +02:00
var it = self.counts.keyIterator();
2022-03-05 06:08:01 +02:00
while (it.next()) |key_ptr|
2022-02-15 10:49:03 +02:00
self.counts.allocator.free(key_ptr.*);
self.counts.deinit();
self.* = undefined;
}
2022-02-17 06:38:54 +02:00
pub fn put(self: *ShellWriter, shell: []const u8) !void {
2022-02-17 11:04:32 +02:00
const res = try self.counts.getOrPutAdapted(shell, self.counts.ctx);
2022-03-05 06:08:01 +02:00
if (!res.found_existing) {
2022-03-02 11:05:20 +02:00
res.key_ptr.* = try self.allocator.dupe(u8, shell);
2022-02-17 11:04:32 +02:00
res.value_ptr.* = 1;
2022-03-05 06:08:01 +02:00
} else {
res.value_ptr.* += 1;
2022-02-15 10:49:03 +02:00
}
}
2022-03-05 06:08:01 +02:00
fn cmpShells(_: void, a: KV, b: KV) std.math.Order {
2022-02-15 10:49:03 +02:00
return std.math.order(a.score, b.score);
}
2022-02-16 11:21:51 +02:00
// toOwnedSections returns the analyzed ShellSections. Resets the shell
2022-02-17 06:38:54 +02:00
// popularity contest. ShellSections memory is allocated by the ShellWriter
2022-02-16 11:21:51 +02:00
// allocator, and must be deInit'ed by the caller.
pub fn toOwnedSections(self: *ShellWriter, limit: u10) error{OutOfMemory}!ShellSections {
assert(limit <= max_shells);
2022-02-15 10:49:03 +02:00
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
defer deque.deinit();
var it = self.counts.iterator();
while (it.next()) |entry| {
2022-03-02 06:18:19 +02:00
if (entry.value_ptr.* == 1)
2022-02-15 10:49:03 +02:00
continue;
2022-02-19 15:50:13 +02:00
const score = entry.key_ptr.*.len * entry.value_ptr.*;
2022-02-15 10:49:03 +02:00
try deque.add(KV{ .shell = entry.key_ptr.*, .score = score });
}
const total = std.math.min(deque.count(), limit);
var topShells = BoundedArray([]const u8, max_shells).init(total) catch |err| switch (err) {
error.Overflow => unreachable,
};
2022-02-15 10:49:03 +02:00
var i: u32 = 0;
2022-03-05 06:08:01 +02:00
while (i < total) : (i += 1)
topShells.set(i, deque.removeMax().shell);
2022-02-15 10:49:03 +02:00
2022-02-16 11:21:51 +02:00
const result = ShellSections.init(self.allocator, topShells);
2022-02-16 11:32:27 +02:00
self.deinit();
self.* = init(self.allocator);
2022-02-16 11:21:51 +02:00
return result;
2022-02-15 10:49:03 +02:00
}
};
2022-02-17 11:16:30 +02:00
const testing = std.testing;
2022-02-16 11:21:51 +02:00
test "basic shellpopcon" {
2022-02-17 06:38:54 +02:00
var popcon = ShellWriter.init(testing.allocator);
2022-02-15 10:49:03 +02:00
2022-02-16 11:21:51 +02:00
const bash = "/bin/bash"; // 9 chars
const zsh = "/bin/zsh"; // 8 chars
2022-02-19 16:04:13 +02:00
const long = "/bin/very-long-shell-name-ought-to-be-first"; // 43 chars
2022-02-16 11:21:51 +02:00
const nobody = "/bin/nobody"; // only 1 instance, ought to ignore
const input = [_][]const u8{
zsh, zsh, zsh, zsh, // zsh score 8*4=32
bash, bash, bash, nobody, // bash score 3*9=27
2022-02-19 16:04:13 +02:00
long, long, // long score 2*43=86
2022-02-16 11:21:51 +02:00
};
for (input) |shell| {
try popcon.put(shell);
}
2022-03-03 18:05:46 +02:00
var sections = try popcon.toOwnedSections(max_shells);
2022-02-16 11:21:51 +02:00
defer sections.deinit();
try testing.expectEqual(sections.index.len, 4); // all but "nobody" qualify
2022-02-16 11:21:51 +02:00
2022-02-16 11:48:53 +02:00
try testing.expectEqual(sections.getIndex(long).?, 0);
try testing.expectEqual(sections.getIndex(zsh).?, 1);
try testing.expectEqual(sections.getIndex(bash).?, 2);
try testing.expectEqual(sections.getIndex(nobody), null);
try testing.expectEqual(sections.section_blob().len, bash.len + zsh.len + long.len);
2022-02-16 11:48:53 +02:00
2022-02-17 11:04:32 +02:00
const shellReader = ShellReader.init(
sections.section_index(),
sections.section_blob(),
2022-02-17 06:38:54 +02:00
);
try testing.expectEqualStrings(shellReader.get(0), long);
try testing.expectEqualStrings(shellReader.get(1), zsh);
try testing.expectEqualStrings(shellReader.get(2), bash);
2022-02-19 16:04:13 +02:00
try testing.expectEqual(shellReader.index.len, 4);
2022-02-17 06:38:54 +02:00
}