make Shell Writer work.
This commit is contained in:
parent
e2bc4e6094
commit
d9c8e69440
@ -6,25 +6,47 @@ const StringHashMap = std.StringHashMap;
|
||||
const BoundedArray = std.BoundedArray;
|
||||
const testing = std.testing;
|
||||
|
||||
// MaxShells is the maximum number of "popular" shells.
|
||||
const MaxShells = 63;
|
||||
const MaxShellLen = 64;
|
||||
|
||||
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
|
||||
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
|
||||
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
|
||||
// to 4 bytes.
|
||||
// The actual shell length is len+1: we don't allow empty shells, and the real
|
||||
// length of the shell is 1-64 bytes.
|
||||
const ShellIndex = struct {
|
||||
offset: u10,
|
||||
len: u6,
|
||||
};
|
||||
|
||||
// MaxShells is the maximum number of "popular" shells.
|
||||
const MaxShells = 63;
|
||||
const MaxShellLen = 64;
|
||||
// ShellReader interprets "Shell Index" and "Shell Blob" sections.
|
||||
const ShellReader = struct {
|
||||
sectionIndex: []const ShellIndex,
|
||||
sectionBlob: []const u8,
|
||||
|
||||
// ShellPopcon is a shell popularity contest: collect shells and return the
|
||||
pub fn init(index: []const u8, blob: []const u8) ShellReader {
|
||||
return ShellReader{
|
||||
.sectionIndex = @bitCast([]const ShellIndex, index),
|
||||
.sectionBlob = blob,
|
||||
};
|
||||
}
|
||||
|
||||
// get returns a shell at the given index.
|
||||
pub fn get(self: *ShellReader, idx: u10) []const u8 {
|
||||
const shellIndex = self.sectionIndex[idx];
|
||||
const start = shellIndex.offset << 2;
|
||||
const end = start + shellIndex.len + 1;
|
||||
return self.sectionBlob[start..end];
|
||||
}
|
||||
};
|
||||
|
||||
// ShellWriter is a shell popularity contest: collect shells and return the
|
||||
// popular ones, sorted by score. score := len(shell) * number_of_shells.
|
||||
const ShellPopcon = struct {
|
||||
const ShellWriter = struct {
|
||||
counts: std.StringHashMap(u32),
|
||||
allocator: Allocator,
|
||||
const Self = @This();
|
||||
const KV = struct { shell: []const u8, score: u32 };
|
||||
|
||||
const ShellSections = struct {
|
||||
@ -52,18 +74,28 @@ const ShellPopcon = struct {
|
||||
try self.indices.put(ourShell, idx);
|
||||
self.index.set(idx, ShellIndex{
|
||||
.offset = @intCast(u10, fullOffset >> 2),
|
||||
.len = len,
|
||||
.len = len - 1,
|
||||
});
|
||||
|
||||
// Padd padding to make offset divisible by 4.
|
||||
const padding = (fullOffset + 3) & ~@intCast(u12, 3);
|
||||
fullOffset += len + padding;
|
||||
fullOffset += len;
|
||||
const padding = roundUp4Padding(fullOffset);
|
||||
fullOffset += padding;
|
||||
//const stderr = std.io.getStdErr().writer();
|
||||
//try stderr.print("\n", .{});
|
||||
try self.blob.appendNTimes(0, padding);
|
||||
idx += 1;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn sectionIndex(self: *ShellSections) []const u8 {
|
||||
return @bitCast([]const u8, self.index.constSlice());
|
||||
}
|
||||
|
||||
pub fn sectionBlob(self: *ShellSections) []const u8 {
|
||||
return self.blob.constSlice();
|
||||
}
|
||||
|
||||
pub fn deinit(self: *ShellSections) void {
|
||||
self.indices.deinit();
|
||||
self.* = undefined;
|
||||
@ -74,14 +106,14 @@ const ShellPopcon = struct {
|
||||
}
|
||||
};
|
||||
|
||||
pub fn init(allocator: Allocator) Self {
|
||||
return Self{
|
||||
pub fn init(allocator: Allocator) ShellWriter {
|
||||
return ShellWriter{
|
||||
.counts = std.StringHashMap(u32).init(allocator),
|
||||
.allocator = allocator,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
pub fn deinit(self: *ShellWriter) void {
|
||||
var it = self.counts.keyIterator();
|
||||
while (it.next()) |key_ptr| {
|
||||
self.counts.allocator.free(key_ptr.*);
|
||||
@ -90,7 +122,7 @@ const ShellPopcon = struct {
|
||||
self.* = undefined;
|
||||
}
|
||||
|
||||
pub fn put(self: *Self, shell: []const u8) !void {
|
||||
pub fn put(self: *ShellWriter, shell: []const u8) !void {
|
||||
// TODO getOrPutAdapted may be more elegant, not sure which
|
||||
// context to pass.
|
||||
if (self.counts.getPtr(shell)) |ptr| {
|
||||
@ -108,9 +140,9 @@ const ShellPopcon = struct {
|
||||
}
|
||||
|
||||
// toOwnedSections returns the analyzed ShellSections. Resets the shell
|
||||
// popularity contest. ShellSections memory is allocated by the ShellPopcon
|
||||
// popularity contest. ShellSections memory is allocated by the ShellWriter
|
||||
// allocator, and must be deInit'ed by the caller.
|
||||
pub fn toOwnedSections(self: *Self, limit: u10) !ShellSections {
|
||||
pub fn toOwnedSections(self: *ShellWriter, limit: u10) !ShellSections {
|
||||
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
|
||||
defer deque.deinit();
|
||||
|
||||
@ -141,8 +173,14 @@ const ShellPopcon = struct {
|
||||
}
|
||||
};
|
||||
|
||||
// rounds up a u12 to the nearest factor of 4 and returns the difference
|
||||
// (padding)
|
||||
inline fn roundUp4Padding(n: u12) u12 {
|
||||
return ((n + 3) & ~@intCast(u12, 3)) - n;
|
||||
}
|
||||
|
||||
test "basic shellpopcon" {
|
||||
var popcon = ShellPopcon.init(testing.allocator);
|
||||
var popcon = ShellWriter.init(testing.allocator);
|
||||
defer popcon.deinit();
|
||||
|
||||
const bash = "/bin/bash"; // 9 chars
|
||||
@ -163,21 +201,39 @@ test "basic shellpopcon" {
|
||||
defer sections.deinit();
|
||||
try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify
|
||||
|
||||
// TODO(motiejus): reverse the arguments: first should be "expected".
|
||||
try testing.expectEqual(sections.getIndex(long).?, 0);
|
||||
try testing.expectEqual(sections.getIndex(zsh).?, 1);
|
||||
try testing.expectEqual(sections.getIndex(bash).?, 2);
|
||||
try testing.expectEqual(sections.getIndex(nobody), null);
|
||||
|
||||
const idx = sections.getIndex(zsh).?;
|
||||
var shellReader = ShellReader.init(
|
||||
sections.sectionIndex(),
|
||||
sections.sectionBlob(),
|
||||
);
|
||||
try testing.expectEqualStrings(shellReader.get(0), long);
|
||||
try testing.expectEqualStrings(shellReader.get(1), zsh);
|
||||
try testing.expectEqualStrings(shellReader.get(2), bash);
|
||||
|
||||
for ([_][]const u8{ long, zsh, bash }) |shell| {
|
||||
const idx = sections.getIndex(shell).?;
|
||||
const start = sections.index.get(idx).offset << 2;
|
||||
const end = start + sections.index.get(idx).len;
|
||||
const end = start + sections.index.get(idx).len + 1;
|
||||
const got = sections.blob.constSlice()[start..end];
|
||||
|
||||
const stderr = std.io.getStdErr().writer();
|
||||
try stderr.print("\n", .{});
|
||||
|
||||
try stderr.print("gotLong: {s}\n", .{got});
|
||||
try stderr.print(" long: {s}\n", .{zsh});
|
||||
try testing.expectEqual(got, zsh);
|
||||
try testing.expectEqualStrings(got, shell);
|
||||
}
|
||||
}
|
||||
|
||||
test "padding" {
|
||||
try testing.expectEqual(roundUp4Padding(@intCast(u12, 0)), 0);
|
||||
try testing.expectEqual(roundUp4Padding(@intCast(u12, 1)), 3);
|
||||
try testing.expectEqual(roundUp4Padding(@intCast(u12, 2)), 2);
|
||||
try testing.expectEqual(roundUp4Padding(@intCast(u12, 3)), 1);
|
||||
try testing.expectEqual(roundUp4Padding(@intCast(u12, 4)), 0);
|
||||
try testing.expectEqual(roundUp4Padding(@intCast(u12, 40)), 0);
|
||||
try testing.expectEqual(roundUp4Padding(@intCast(u12, 41)), 3);
|
||||
try testing.expectEqual(roundUp4Padding(@intCast(u12, 42)), 2);
|
||||
try testing.expectEqual(roundUp4Padding(@intCast(u12, 43)), 1);
|
||||
try testing.expectEqual(roundUp4Padding(@intCast(u12, 44)), 0);
|
||||
try testing.expectEqual(roundUp4Padding(@intCast(u12, 4091)), 1);
|
||||
try testing.expectEqual(roundUp4Padding(@intCast(u12, 4092)), 0);
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
test "turbonss test suite" {
|
||||
_ = @import("main.zig");
|
||||
_ = @import("shellpop.zig");
|
||||
_ = @import("shell.zig");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user