const std = @import("std"); const Allocator = std.mem.Allocator; const PriorityDequeue = std.PriorityDequeue; const StringHashMap = std.StringHashMap; const BoundedArray = std.BoundedArray; const assert = std.debug.assert; pub const max_shells = 255; pub const max_shell_len = 256; // ShellReader interprets "Shell Index" and "Shell Blob" sections. pub const ShellReader = struct { index: []align(8) const u16, blob: []const u8, pub fn init(index: []align(8) const u8, blob: []const u8) ShellReader { return ShellReader{ .index = std.mem.bytesAsSlice(u16, index), .blob = blob, }; } // get returns a shell at the given index. pub fn get(self: *const ShellReader, idx: u8) []const u8 { return self.blob[self.index[idx]..self.index[idx + 1]]; } }; // ShellWriter is a shell popularity contest: collect shells and return the // popular ones, sorted by score. score := len(shell) * number_of_shells. pub const ShellWriter = struct { counts: std.StringHashMap(u32), allocator: Allocator, const KV = struct { shell: []const u8, score: u64, }; pub const ShellSections = struct { // len is the number of shells in this section. len: u8, // index points the i'th shell to it's offset in blob. The last // byte of the i'th shell is index[i+1]. index: BoundedArray(u16, max_shells), // blob contains `index.len+1` number of records. The last record is // pointing to the end of the blob, so length of the last shell can be // calculated from the index array. blob: BoundedArray(u8, (max_shells + 1) * max_shell_len), // shell2idx helps translate a shell (string) to it's index. shell2idx: StringHashMap(u8), // initializes and populates shell sections. All strings are copied, // nothing is owned. pub fn init( allocator: Allocator, shells: BoundedArray([]const u8, max_shells), ) error{OutOfMemory}!ShellSections { assert(shells.len <= max_shells); var self = ShellSections{ .len = @intCast(u8, shells.len), .index = BoundedArray(u16, max_shells).init(shells.len) catch unreachable, .blob = BoundedArray(u8, (max_shells + 1) * max_shell_len).init(0) catch unreachable, .shell2idx = StringHashMap(u8).init(allocator), }; if (shells.len == 0) return self; errdefer self.shell2idx.deinit(); for (shells.constSlice(), 0..) |shell, idx| { const idx8 = @intCast(u8, idx); const offset = @intCast(u16, self.blob.len); self.blob.appendSliceAssumeCapacity(shell); try self.shell2idx.put(self.blob.constSlice()[offset..], idx8); self.index.set(idx8, offset); } self.index.appendAssumeCapacity(@intCast(u8, self.blob.len)); return self; } pub fn deinit(self: *ShellSections) void { self.shell2idx.deinit(); self.* = undefined; } pub fn getIndex(self: *const ShellSections, shell: []const u8) ?u8 { return self.shell2idx.get(shell); } }; pub fn init(allocator: Allocator) ShellWriter { return ShellWriter{ .counts = std.StringHashMap(u32).init(allocator), .allocator = allocator, }; } pub fn deinit(self: *ShellWriter) void { var it = self.counts.keyIterator(); while (it.next()) |key_ptr| self.counts.allocator.free(key_ptr.*); self.counts.deinit(); self.* = undefined; } pub fn put(self: *ShellWriter, shell: []const u8) !void { const res = try self.counts.getOrPutAdapted(shell, self.counts.ctx); if (!res.found_existing) { res.key_ptr.* = try self.allocator.dupe(u8, shell); res.value_ptr.* = 1; } else { res.value_ptr.* += 1; } } fn cmpShells(_: void, a: KV, b: KV) std.math.Order { return std.math.order(a.score, b.score); } // toOwnedSections returns the analyzed ShellSections. Resets the shell // popularity contest. ShellSections memory is allocated by the ShellWriter // allocator, and must be deInit'ed by the caller. pub fn toOwnedSections(self: *ShellWriter, limit: u10) error{OutOfMemory}!ShellSections { assert(limit <= max_shells); var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {}); defer deque.deinit(); var it = self.counts.iterator(); while (it.next()) |entry| { if (entry.value_ptr.* == 1) continue; const score = entry.key_ptr.*.len * entry.value_ptr.*; try deque.add(KV{ .shell = entry.key_ptr.*, .score = score }); } const total = std.math.min(deque.count(), limit); var topShells = BoundedArray([]const u8, max_shells).init(total) catch unreachable; var i: u32 = 0; while (i < total) : (i += 1) topShells.set(i, deque.removeMax().shell); const result = ShellSections.init(self.allocator, topShells); self.deinit(); self.* = init(self.allocator); return result; } }; const testing = std.testing; test "shell basic shellpopcon" { var popcon = ShellWriter.init(testing.allocator); const bash = "/bin/bash"; // 9 chars const zsh = "/bin/zsh"; // 8 chars const long = "/bin/very-long-shell-name-ought-to-be-first"; // 43 chars const nobody = "/bin/nobody"; // only 1 instance, ought to ignore const input = [_][]const u8{ zsh, zsh, zsh, zsh, // zsh score 8*4=32 bash, bash, bash, nobody, // bash score 3*9=27 long, long, // long score 2*43=86 }; for (input) |shell| { try popcon.put(shell); } var sections = try popcon.toOwnedSections(max_shells); defer sections.deinit(); try testing.expectEqual(sections.index.len, 4); // all but "nobody" qualify try testing.expectEqual(sections.getIndex(long).?, 0); try testing.expectEqual(sections.getIndex(zsh).?, 1); try testing.expectEqual(sections.getIndex(bash).?, 2); try testing.expectEqual(sections.getIndex(nobody), null); try testing.expectEqual(sections.blob.constSlice().len, bash.len + zsh.len + long.len); // copying section_index until https://github.com/ziglang/zig/pull/14580 var section_index: [max_shells]u16 align(8) = undefined; for (sections.index.constSlice(), 0..) |elem, i| section_index[i] = elem; const shellReader = ShellReader.init( std.mem.sliceAsBytes(section_index[0..sections.index.len]), sections.blob.constSlice(), ); try testing.expectEqualStrings(shellReader.get(0), long); try testing.expectEqualStrings(shellReader.get(1), zsh); try testing.expectEqualStrings(shellReader.get(2), bash); try testing.expectEqual(shellReader.index.len, 4); }