1
Fork 0
turbonss/src/shellpop.zig

184 lines
6.4 KiB
Zig

const std = @import("std");
const Allocator = std.mem.Allocator;
const PriorityDequeue = std.PriorityDequeue;
const StringArrayHashMap = std.StringArrayHashMap;
const StringHashMap = std.StringHashMap;
const BoundedArray = std.BoundedArray;
const testing = std.testing;
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
// to 4 bytes.
const ShellIndex = struct {
offset: u10,
len: u6,
};
// MaxShells is the maximum number of "popular" shells.
const MaxShells = 63;
const MaxShellLen = 64;
// ShellPopcon is a shell popularity contest: collect shells and return the
// popular ones, sorted by score. score := len(shell) * number_of_shells.
const ShellPopcon = struct {
counts: std.StringHashMap(u32),
allocator: Allocator,
const Self = @This();
const KV = struct { shell: []const u8, score: u32 };
const ShellSections = struct {
index: BoundedArray(ShellIndex, MaxShells),
blob: BoundedArray(u8, MaxShells * MaxShellLen),
indices: StringHashMap(u10),
// initializes and populates shell sections. All strings are copied,
// nothing is owned.
pub fn init(
allocator: Allocator,
shells: BoundedArray([]const u8, MaxShells),
) !ShellSections {
var self = ShellSections{
.index = try BoundedArray(ShellIndex, MaxShells).init(shells.len),
.blob = try BoundedArray(u8, MaxShells * MaxShellLen).init(0),
.indices = StringHashMap(u10).init(allocator),
};
var fullOffset: u12 = 0;
var idx: u10 = 0;
while (idx < shells.len) {
const len = @intCast(u6, shells.get(idx).len);
try self.blob.appendSlice(shells.get(idx));
const ourShell = self.blob.constSlice()[fullOffset .. fullOffset + len];
try self.indices.put(ourShell, idx);
self.index.set(idx, ShellIndex{
.offset = @intCast(u10, fullOffset >> 2),
.len = len,
});
// Padd padding to make offset divisible by 4.
const padding = (fullOffset + 3) & ~@intCast(u12, 3);
fullOffset += len + padding;
try self.blob.appendNTimes(0, padding);
idx += 1;
}
return self;
}
pub fn deinit(self: *ShellSections) void {
self.indices.deinit();
self.* = undefined;
}
pub fn getIndex(self: *ShellSections, shell: []const u8) ?u10 {
return self.indices.get(shell);
}
};
pub fn init(allocator: Allocator) Self {
return Self{
.counts = std.StringHashMap(u32).init(allocator),
.allocator = allocator,
};
}
pub fn deinit(self: *Self) void {
var it = self.counts.keyIterator();
while (it.next()) |key_ptr| {
self.counts.allocator.free(key_ptr.*);
}
self.counts.deinit();
self.* = undefined;
}
pub fn put(self: *Self, shell: []const u8) !void {
// TODO getOrPutAdapted may be more elegant, not sure which
// context to pass.
if (self.counts.getPtr(shell)) |ptr| {
ptr.* += 1;
} else {
var ourShell = try self.allocator.alloc(u8, shell.len);
std.mem.copy(u8, ourShell, shell);
try self.counts.put(ourShell, 1);
}
}
fn cmpShells(context: void, a: KV, b: KV) std.math.Order {
_ = context;
return std.math.order(a.score, b.score);
}
// toOwnedSections returns the analyzed ShellSections. Resets the shell
// popularity contest. ShellSections memory is allocated by the ShellPopcon
// allocator, and must be deInit'ed by the caller.
pub fn toOwnedSections(self: *Self, limit: u10) !ShellSections {
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
defer deque.deinit();
var it = self.counts.iterator();
while (it.next()) |entry| {
if (entry.value_ptr.* == 1) {
continue;
}
const score = @truncate(u32, entry.key_ptr.*.len) * entry.value_ptr.*;
try deque.add(KV{ .shell = entry.key_ptr.*, .score = score });
}
const total = std.math.min(deque.count(), limit);
var topShells = try BoundedArray([]const u8, MaxShells).init(total);
var i: u32 = 0;
while (i < total) {
const elem: []const u8 = deque.removeMax().shell;
topShells.set(i, elem);
i += 1;
}
const result = ShellSections.init(self.allocator, topShells);
const allocator = self.allocator;
self.deinit();
self.* = init(allocator);
return result;
}
};
test "basic shellpopcon" {
var popcon = ShellPopcon.init(testing.allocator);
defer popcon.deinit();
const bash = "/bin/bash"; // 9 chars
const zsh = "/bin/zsh"; // 8 chars
const nobody = "/bin/nobody"; // only 1 instance, ought to ignore
const long = "/bin/very-long-shell-name-ought-to-be-first";
const input = [_][]const u8{
zsh, zsh, zsh, zsh, // zsh score 8*4=32
bash, bash, bash, nobody, // bash score 3*9=27
long, long, // long score 2*42=84
};
for (input) |shell| {
try popcon.put(shell);
}
var sections = try popcon.toOwnedSections(MaxShells);
defer sections.deinit();
try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify
// TODO(motiejus): reverse the arguments: first should be "expected".
try testing.expectEqual(sections.getIndex(long).?, 0);
try testing.expectEqual(sections.getIndex(zsh).?, 1);
try testing.expectEqual(sections.getIndex(bash).?, 2);
try testing.expectEqual(sections.getIndex(nobody), null);
const idx = sections.getIndex(zsh).?;
const start = sections.index.get(idx).offset << 2;
const end = start + sections.index.get(idx).len;
const got = sections.blob.constSlice()[start..end];
const stderr = std.io.getStdErr().writer();
try stderr.print("\n", .{});
try stderr.print("gotLong: {s}\n", .{got});
try stderr.print(" long: {s}\n", .{zsh});
try testing.expectEqual(got, zsh);
}