From d9c8e694407893633bd7b4f5af6a84c2065fd1f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Thu, 17 Feb 2022 06:38:54 +0200 Subject: [PATCH] make Shell Writer work. --- src/{shellpop.zig => shell.zig} | 112 ++++++++++++++++++++++++-------- src/test_main.zig | 2 +- 2 files changed, 85 insertions(+), 29 deletions(-) rename src/{shellpop.zig => shell.zig} (61%) diff --git a/src/shellpop.zig b/src/shell.zig similarity index 61% rename from src/shellpop.zig rename to src/shell.zig index 6c79651..7ed0da5 100644 --- a/src/shellpop.zig +++ b/src/shell.zig @@ -6,25 +6,47 @@ const StringHashMap = std.StringHashMap; const BoundedArray = std.BoundedArray; const testing = std.testing; +// MaxShells is the maximum number of "popular" shells. +const MaxShells = 63; +const MaxShellLen = 64; + // ShellIndex is an index to the shell strings. As shell can be up to 64 bytes // (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset // is 1<<12. To make location resolvable in 10 bits, all shells will be padded // to 4 bytes. +// The actual shell length is len+1: we don't allow empty shells, and the real +// length of the shell is 1-64 bytes. const ShellIndex = struct { offset: u10, len: u6, }; -// MaxShells is the maximum number of "popular" shells. -const MaxShells = 63; -const MaxShellLen = 64; +// ShellReader interprets "Shell Index" and "Shell Blob" sections. +const ShellReader = struct { + sectionIndex: []const ShellIndex, + sectionBlob: []const u8, -// ShellPopcon is a shell popularity contest: collect shells and return the + pub fn init(index: []const u8, blob: []const u8) ShellReader { + return ShellReader{ + .sectionIndex = @bitCast([]const ShellIndex, index), + .sectionBlob = blob, + }; + } + + // get returns a shell at the given index. + pub fn get(self: *ShellReader, idx: u10) []const u8 { + const shellIndex = self.sectionIndex[idx]; + const start = shellIndex.offset << 2; + const end = start + shellIndex.len + 1; + return self.sectionBlob[start..end]; + } +}; + +// ShellWriter is a shell popularity contest: collect shells and return the // popular ones, sorted by score. score := len(shell) * number_of_shells. -const ShellPopcon = struct { +const ShellWriter = struct { counts: std.StringHashMap(u32), allocator: Allocator, - const Self = @This(); const KV = struct { shell: []const u8, score: u32 }; const ShellSections = struct { @@ -52,18 +74,28 @@ const ShellPopcon = struct { try self.indices.put(ourShell, idx); self.index.set(idx, ShellIndex{ .offset = @intCast(u10, fullOffset >> 2), - .len = len, + .len = len - 1, }); - // Padd padding to make offset divisible by 4. - const padding = (fullOffset + 3) & ~@intCast(u12, 3); - fullOffset += len + padding; + fullOffset += len; + const padding = roundUp4Padding(fullOffset); + fullOffset += padding; + //const stderr = std.io.getStdErr().writer(); + //try stderr.print("\n", .{}); try self.blob.appendNTimes(0, padding); idx += 1; } return self; } + pub fn sectionIndex(self: *ShellSections) []const u8 { + return @bitCast([]const u8, self.index.constSlice()); + } + + pub fn sectionBlob(self: *ShellSections) []const u8 { + return self.blob.constSlice(); + } + pub fn deinit(self: *ShellSections) void { self.indices.deinit(); self.* = undefined; @@ -74,14 +106,14 @@ const ShellPopcon = struct { } }; - pub fn init(allocator: Allocator) Self { - return Self{ + pub fn init(allocator: Allocator) ShellWriter { + return ShellWriter{ .counts = std.StringHashMap(u32).init(allocator), .allocator = allocator, }; } - pub fn deinit(self: *Self) void { + pub fn deinit(self: *ShellWriter) void { var it = self.counts.keyIterator(); while (it.next()) |key_ptr| { self.counts.allocator.free(key_ptr.*); @@ -90,7 +122,7 @@ const ShellPopcon = struct { self.* = undefined; } - pub fn put(self: *Self, shell: []const u8) !void { + pub fn put(self: *ShellWriter, shell: []const u8) !void { // TODO getOrPutAdapted may be more elegant, not sure which // context to pass. if (self.counts.getPtr(shell)) |ptr| { @@ -108,9 +140,9 @@ const ShellPopcon = struct { } // toOwnedSections returns the analyzed ShellSections. Resets the shell - // popularity contest. ShellSections memory is allocated by the ShellPopcon + // popularity contest. ShellSections memory is allocated by the ShellWriter // allocator, and must be deInit'ed by the caller. - pub fn toOwnedSections(self: *Self, limit: u10) !ShellSections { + pub fn toOwnedSections(self: *ShellWriter, limit: u10) !ShellSections { var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {}); defer deque.deinit(); @@ -141,8 +173,14 @@ const ShellPopcon = struct { } }; +// rounds up a u12 to the nearest factor of 4 and returns the difference +// (padding) +inline fn roundUp4Padding(n: u12) u12 { + return ((n + 3) & ~@intCast(u12, 3)) - n; +} + test "basic shellpopcon" { - var popcon = ShellPopcon.init(testing.allocator); + var popcon = ShellWriter.init(testing.allocator); defer popcon.deinit(); const bash = "/bin/bash"; // 9 chars @@ -163,21 +201,39 @@ test "basic shellpopcon" { defer sections.deinit(); try testing.expectEqual(sections.index.len, 3); // all but "nobody" qualify - // TODO(motiejus): reverse the arguments: first should be "expected". try testing.expectEqual(sections.getIndex(long).?, 0); try testing.expectEqual(sections.getIndex(zsh).?, 1); try testing.expectEqual(sections.getIndex(bash).?, 2); try testing.expectEqual(sections.getIndex(nobody), null); - const idx = sections.getIndex(zsh).?; - const start = sections.index.get(idx).offset << 2; - const end = start + sections.index.get(idx).len; - const got = sections.blob.constSlice()[start..end]; + var shellReader = ShellReader.init( + sections.sectionIndex(), + sections.sectionBlob(), + ); + try testing.expectEqualStrings(shellReader.get(0), long); + try testing.expectEqualStrings(shellReader.get(1), zsh); + try testing.expectEqualStrings(shellReader.get(2), bash); - const stderr = std.io.getStdErr().writer(); - try stderr.print("\n", .{}); - - try stderr.print("gotLong: {s}\n", .{got}); - try stderr.print(" long: {s}\n", .{zsh}); - try testing.expectEqual(got, zsh); + for ([_][]const u8{ long, zsh, bash }) |shell| { + const idx = sections.getIndex(shell).?; + const start = sections.index.get(idx).offset << 2; + const end = start + sections.index.get(idx).len + 1; + const got = sections.blob.constSlice()[start..end]; + try testing.expectEqualStrings(got, shell); + } +} + +test "padding" { + try testing.expectEqual(roundUp4Padding(@intCast(u12, 0)), 0); + try testing.expectEqual(roundUp4Padding(@intCast(u12, 1)), 3); + try testing.expectEqual(roundUp4Padding(@intCast(u12, 2)), 2); + try testing.expectEqual(roundUp4Padding(@intCast(u12, 3)), 1); + try testing.expectEqual(roundUp4Padding(@intCast(u12, 4)), 0); + try testing.expectEqual(roundUp4Padding(@intCast(u12, 40)), 0); + try testing.expectEqual(roundUp4Padding(@intCast(u12, 41)), 3); + try testing.expectEqual(roundUp4Padding(@intCast(u12, 42)), 2); + try testing.expectEqual(roundUp4Padding(@intCast(u12, 43)), 1); + try testing.expectEqual(roundUp4Padding(@intCast(u12, 44)), 0); + try testing.expectEqual(roundUp4Padding(@intCast(u12, 4091)), 1); + try testing.expectEqual(roundUp4Padding(@intCast(u12, 4092)), 0); } diff --git a/src/test_main.zig b/src/test_main.zig index 92f8977..76b625e 100644 --- a/src/test_main.zig +++ b/src/test_main.zig @@ -1,4 +1,4 @@ test "turbonss test suite" { _ = @import("main.zig"); - _ = @import("shellpop.zig"); + _ = @import("shell.zig"); }