make Shell Writer work.

This commit is contained in:
Motiejus Jakštys 2022-02-17 06:38:54 +02:00 committed by Motiejus Jakštys
parent e2bc4e6094
commit d9c8e69440
2 changed files with 85 additions and 29 deletions

View File

@ -6,25 +6,47 @@ const StringHashMap = std.StringHashMap;
const BoundedArray = std.BoundedArray;
const testing = std.testing;
// MaxShells is the maximum number of "popular" shells.
const MaxShells = 63;
const MaxShellLen = 64;
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
// to 4 bytes.
// The actual shell length is len+1: we don't allow empty shells, and the real
// length of the shell is 1-64 bytes.
const ShellIndex = struct {
offset: u10,
len: u6,
};
// MaxShells is the maximum number of "popular" shells.
const MaxShells = 63;
const MaxShellLen = 64;
// ShellReader interprets "Shell Index" and "Shell Blob" sections.
const ShellReader = struct {
sectionIndex: []const ShellIndex,
sectionBlob: []const u8,
// ShellPopcon is a shell popularity contest: collect shells and return the
pub fn init(index: []const u8, blob: []const u8) ShellReader {
return ShellReader{
.sectionIndex = @bitCast([]const ShellIndex, index),
.sectionBlob = blob,
};
}
// get returns a shell at the given index.
pub fn get(self: *ShellReader, idx: u10) []const u8 {
const shellIndex = self.sectionIndex[idx];
const start = shellIndex.offset << 2;
const end = start + shellIndex.len + 1;
return self.sectionBlob[start..end];
}
};
// ShellWriter is a shell popularity contest: collect shells and return the
// popular ones, sorted by score. score := len(shell) * number_of_shells.
const ShellPopcon = struct {
const ShellWriter = struct {
counts: std.StringHashMap(u32),
allocator: Allocator,
const Self = @This();
const KV = struct { shell: []const u8, score: u32 };
const ShellSections = struct {
@ -52,18 +74,28 @@ const ShellPopcon = struct {
try self.indices.put(ourShell, idx);
self.index.set(idx, ShellIndex{
.offset = @intCast(u10, fullOffset >> 2),
.len = len,
.len = len - 1,
});
// Padd padding to make offset divisible by 4.
const padding = (fullOffset + 3) & ~@intCast(u12, 3);
fullOffset += len + padding;
fullOffset += len;
const padding = roundUp4Padding(fullOffset);
fullOffset += padding;
//const stderr = std.io.getStdErr().writer();
//try stderr.print("\n", .{});
try self.blob.appendNTimes(0, padding);
idx += 1;
}
return self;
}
pub fn sectionIndex(self: *ShellSections) []const u8 {
return @bitCast([]const u8, self.index.constSlice());
}
pub fn sectionBlob(self: *ShellSections) []const u8 {
return self.blob.constSlice();
}
pub fn deinit(self: *ShellSections) void {
self.indices.deinit();
self.* = undefined;
@ -74,14 +106,14 @@ const ShellPopcon = struct {
}
};
pub fn init(allocator: Allocator) Self {
return Self{
pub fn init(allocator: Allocator) ShellWriter {
return ShellWriter{
.counts = std.StringHashMap(u32).init(allocator),
.allocator = allocator,
};
}
pub fn deinit(self: *Self) void {
pub fn deinit(self: *ShellWriter) void {
var it = self.counts.keyIterator();
while (it.next()) |key_ptr| {
self.counts.allocator.free(key_ptr.*);
@ -90,7 +122,7 @@ const ShellPopcon = struct {
self.* = undefined;
}
pub fn put(self: *Self, shell: []const u8) !void {
pub fn put(self: *ShellWriter, shell: []const u8) !void {
// TODO getOrPutAdapted may be more elegant, not sure which
// context to pass.
if (self.counts.getPtr(shell)) |ptr| {
@ -108,9 +140,9 @@ const ShellPopcon = struct {
}
// toOwnedSections returns the analyzed ShellSections. Resets the shell
// popularity contest. ShellSections memory is allocated by the ShellPopcon
// popularity contest. ShellSections memory is allocated by the ShellWriter
// allocator, and must be deInit'ed by the caller.
pub fn toOwnedSections(self: *Self, limit: u10) !ShellSections {
pub fn toOwnedSections(self: *ShellWriter, limit: u10) !ShellSections {
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
defer deque.deinit();
@ -141,8 +173,14 @@ const ShellPopcon = struct {
}
};
// rounds up a u12 to the nearest factor of 4 and returns the difference
// (padding)
inline fn roundUp4Padding(n: u12) u12 {
return ((n + 3) & ~@intCast(u12, 3)) - n;
}
test "basic shellpopcon" {
var popcon = ShellPopcon.init(testing.allocator);
var popcon = ShellWriter.init(testing.allocator);
defer popcon.deinit();
const bash = "/bin/bash"; // 9 chars
@ -163,21 +201,39 @@ test "basic shellpopcon" {
defer sections.deinit();
try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify
// TODO(motiejus): reverse the arguments: first should be "expected".
try testing.expectEqual(sections.getIndex(long).?, 0);
try testing.expectEqual(sections.getIndex(zsh).?, 1);
try testing.expectEqual(sections.getIndex(bash).?, 2);
try testing.expectEqual(sections.getIndex(nobody), null);
const idx = sections.getIndex(zsh).?;
const start = sections.index.get(idx).offset << 2;
const end = start + sections.index.get(idx).len;
const got = sections.blob.constSlice()[start..end];
var shellReader = ShellReader.init(
sections.sectionIndex(),
sections.sectionBlob(),
);
try testing.expectEqualStrings(shellReader.get(0), long);
try testing.expectEqualStrings(shellReader.get(1), zsh);
try testing.expectEqualStrings(shellReader.get(2), bash);
const stderr = std.io.getStdErr().writer();
try stderr.print("\n", .{});
try stderr.print("gotLong: {s}\n", .{got});
try stderr.print(" long: {s}\n", .{zsh});
try testing.expectEqual(got, zsh);
for ([_][]const u8{ long, zsh, bash }) |shell| {
const idx = sections.getIndex(shell).?;
const start = sections.index.get(idx).offset << 2;
const end = start + sections.index.get(idx).len + 1;
const got = sections.blob.constSlice()[start..end];
try testing.expectEqualStrings(got, shell);
}
}
test "padding" {
try testing.expectEqual(roundUp4Padding(@intCast(u12, 0)), 0);
try testing.expectEqual(roundUp4Padding(@intCast(u12, 1)), 3);
try testing.expectEqual(roundUp4Padding(@intCast(u12, 2)), 2);
try testing.expectEqual(roundUp4Padding(@intCast(u12, 3)), 1);
try testing.expectEqual(roundUp4Padding(@intCast(u12, 4)), 0);
try testing.expectEqual(roundUp4Padding(@intCast(u12, 40)), 0);
try testing.expectEqual(roundUp4Padding(@intCast(u12, 41)), 3);
try testing.expectEqual(roundUp4Padding(@intCast(u12, 42)), 2);
try testing.expectEqual(roundUp4Padding(@intCast(u12, 43)), 1);
try testing.expectEqual(roundUp4Padding(@intCast(u12, 44)), 0);
try testing.expectEqual(roundUp4Padding(@intCast(u12, 4091)), 1);
try testing.expectEqual(roundUp4Padding(@intCast(u12, 4092)), 0);
}

View File

@ -1,4 +1,4 @@
test "turbonss test suite" {
_ = @import("main.zig");
_ = @import("shellpop.zig");
_ = @import("shell.zig");
}