192 lines
6.9 KiB
Zig
192 lines
6.9 KiB
Zig
const std = @import("std");
|
|
const Allocator = std.mem.Allocator;
|
|
const PriorityDequeue = std.PriorityDequeue;
|
|
const StringHashMap = std.StringHashMap;
|
|
const BoundedArray = std.BoundedArray;
|
|
const assert = std.debug.assert;
|
|
|
|
pub const max_shells = 255;
|
|
pub const max_shell_len = 256;
|
|
|
|
// ShellReader interprets "Shell Index" and "Shell Blob" sections.
|
|
pub const ShellReader = struct {
|
|
index: []align(8) const u16,
|
|
blob: []const u8,
|
|
|
|
pub fn init(index: []align(8) const u8, blob: []const u8) ShellReader {
|
|
return ShellReader{
|
|
.index = std.mem.bytesAsSlice(u16, index),
|
|
.blob = blob,
|
|
};
|
|
}
|
|
|
|
// get returns a shell at the given index.
|
|
pub fn get(self: *const ShellReader, idx: u8) []const u8 {
|
|
return self.blob[self.index[idx]..self.index[idx + 1]];
|
|
}
|
|
};
|
|
|
|
// ShellWriter is a shell popularity contest: collect shells and return the
|
|
// popular ones, sorted by score. score := len(shell) * number_of_shells.
|
|
pub const ShellWriter = struct {
|
|
counts: std.StringHashMap(u32),
|
|
allocator: Allocator,
|
|
const KV = struct {
|
|
shell: []const u8,
|
|
score: u64,
|
|
};
|
|
|
|
pub const ShellSections = struct {
|
|
// len is the number of shells in this section.
|
|
len: u8,
|
|
// index points the i'th shell to it's offset in blob. The last
|
|
// byte of the i'th shell is index[i+1].
|
|
index: BoundedArray(u16, max_shells),
|
|
// blob contains `index.len+1` number of records. The last record is
|
|
// pointing to the end of the blob, so length of the last shell can be
|
|
// calculated from the index array.
|
|
blob: BoundedArray(u8, (max_shells + 1) * max_shell_len),
|
|
// shell2idx helps translate a shell (string) to it's index.
|
|
shell2idx: StringHashMap(u8),
|
|
|
|
// initializes and populates shell sections. All strings are copied,
|
|
// nothing is owned.
|
|
pub fn init(
|
|
allocator: Allocator,
|
|
shells: BoundedArray([]const u8, max_shells),
|
|
) error{OutOfMemory}!ShellSections {
|
|
assert(shells.len <= max_shells);
|
|
var self = ShellSections{
|
|
.len = @intCast(u8, shells.len),
|
|
.index = BoundedArray(u16, max_shells).init(shells.len) catch unreachable,
|
|
.blob = BoundedArray(u8, (max_shells + 1) * max_shell_len).init(0) catch unreachable,
|
|
.shell2idx = StringHashMap(u8).init(allocator),
|
|
};
|
|
if (shells.len == 0) return self;
|
|
|
|
errdefer self.shell2idx.deinit();
|
|
for (shells.constSlice(), 0..) |shell, idx| {
|
|
const idx8 = @intCast(u8, idx);
|
|
const offset = @intCast(u16, self.blob.len);
|
|
self.blob.appendSliceAssumeCapacity(shell);
|
|
try self.shell2idx.put(self.blob.constSlice()[offset..], idx8);
|
|
self.index.set(idx8, offset);
|
|
}
|
|
self.index.appendAssumeCapacity(@intCast(u8, self.blob.len));
|
|
return self;
|
|
}
|
|
|
|
pub fn deinit(self: *ShellSections) void {
|
|
self.shell2idx.deinit();
|
|
self.* = undefined;
|
|
}
|
|
|
|
pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u8 {
|
|
return self.shell2idx.get(shell);
|
|
}
|
|
};
|
|
|
|
pub fn init(allocator: Allocator) ShellWriter {
|
|
return ShellWriter{
|
|
.counts = std.StringHashMap(u32).init(allocator),
|
|
.allocator = allocator,
|
|
};
|
|
}
|
|
|
|
pub fn deinit(self: *ShellWriter) void {
|
|
var it = self.counts.keyIterator();
|
|
while (it.next()) |key_ptr|
|
|
self.counts.allocator.free(key_ptr.*);
|
|
self.counts.deinit();
|
|
self.* = undefined;
|
|
}
|
|
|
|
pub fn put(self: *ShellWriter, shell: []const u8) !void {
|
|
const res = try self.counts.getOrPutAdapted(shell, self.counts.ctx);
|
|
if (!res.found_existing) {
|
|
res.key_ptr.* = try self.allocator.dupe(u8, shell);
|
|
res.value_ptr.* = 1;
|
|
} else {
|
|
res.value_ptr.* += 1;
|
|
}
|
|
}
|
|
|
|
fn cmpShells(_: void, a: KV, b: KV) std.math.Order {
|
|
return std.math.order(a.score, b.score);
|
|
}
|
|
|
|
// toOwnedSections returns the analyzed ShellSections. Resets the shell
|
|
// popularity contest. ShellSections memory is allocated by the ShellWriter
|
|
// allocator, and must be deInit'ed by the caller.
|
|
pub fn toOwnedSections(self: *ShellWriter, limit: u10) error{OutOfMemory}!ShellSections {
|
|
assert(limit <= max_shells);
|
|
|
|
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
|
|
defer deque.deinit();
|
|
|
|
var it = self.counts.iterator();
|
|
while (it.next()) |entry| {
|
|
if (entry.value_ptr.* == 1)
|
|
continue;
|
|
const score = entry.key_ptr.*.len * entry.value_ptr.*;
|
|
try deque.add(KV{ .shell = entry.key_ptr.*, .score = score });
|
|
}
|
|
|
|
const total = std.math.min(deque.count(), limit);
|
|
var topShells = BoundedArray([]const u8, max_shells).init(total) catch unreachable;
|
|
|
|
var i: u32 = 0;
|
|
while (i < total) : (i += 1)
|
|
topShells.set(i, deque.removeMax().shell);
|
|
|
|
const result = ShellSections.init(self.allocator, topShells);
|
|
self.deinit();
|
|
self.* = init(self.allocator);
|
|
return result;
|
|
}
|
|
};
|
|
|
|
const testing = std.testing;
|
|
|
|
test "shell basic shellpopcon" {
|
|
var popcon = ShellWriter.init(testing.allocator);
|
|
|
|
const bash = "/bin/bash"; // 9 chars
|
|
const zsh = "/bin/zsh"; // 8 chars
|
|
const long = "/bin/very-long-shell-name-ought-to-be-first"; // 43 chars
|
|
const nobody = "/bin/nobody"; // only 1 instance, ought to ignore
|
|
const input = [_][]const u8{
|
|
zsh, zsh, zsh, zsh, // zsh score 8*4=32
|
|
bash, bash, bash, nobody, // bash score 3*9=27
|
|
long, long, // long score 2*43=86
|
|
};
|
|
|
|
for (input) |shell| {
|
|
try popcon.put(shell);
|
|
}
|
|
|
|
var sections = try popcon.toOwnedSections(max_shells);
|
|
defer sections.deinit();
|
|
try testing.expectEqual(sections.index.len, 4); // all but "nobody" qualify
|
|
|
|
try testing.expectEqual(sections.getIndex(long).?, 0);
|
|
try testing.expectEqual(sections.getIndex(zsh).?, 1);
|
|
try testing.expectEqual(sections.getIndex(bash).?, 2);
|
|
try testing.expectEqual(sections.getIndex(nobody), null);
|
|
try testing.expectEqual(sections.blob.constSlice().len, bash.len + zsh.len + long.len);
|
|
|
|
// copying section_index until https://github.com/ziglang/zig/pull/14580
|
|
var section_index: [max_shells]u16 align(8) = undefined;
|
|
for (sections.index.constSlice(), 0..) |elem, i|
|
|
section_index[i] = elem;
|
|
const shellReader = ShellReader.init(
|
|
std.mem.sliceAsBytes(section_index[0..sections.index.len]),
|
|
sections.blob.constSlice(),
|
|
);
|
|
try testing.expectEqualStrings(shellReader.get(0), long);
|
|
try testing.expectEqualStrings(shellReader.get(1), zsh);
|
|
try testing.expectEqualStrings(shellReader.get(2), bash);
|
|
|
|
try testing.expectEqual(shellReader.index.len, 4);
|
|
}
|