shellpop skeleton
This commit is contained in:
parent
ce882b9086
commit
f584642cca
44
README.md
44
README.md
|
@ -66,10 +66,10 @@ consumed heap space for each separate turbonss instance will be minimal.
|
|||
Tight packing places some constraints on the underlying data:
|
||||
|
||||
- Maximum database size: 4GB.
|
||||
- Maximum length of username and groupname: 32 bytes.
|
||||
- Maximum length of shell and homedir: 64 bytes.
|
||||
- Maximum comment ("gecos") length: 256 bytes.
|
||||
- Username and groupname must be utf8-encoded.
|
||||
- Permitted length of username and groupname: 1-32 bytes.
|
||||
- Permitted length of shell and homedir: 1-64 bytes.
|
||||
- Permitted comment ("gecos") length: 0-255 bytes.
|
||||
- Username, groupname and gecos must be utf8-encoded.
|
||||
|
||||
Checking out and building
|
||||
-------------------------
|
||||
|
@ -156,7 +156,7 @@ OFFSET TYPE NAME DESCRIPTION
|
|||
0 [4]u8 magic always 0xf09fa4b7
|
||||
4 u8 version now `0`
|
||||
5 u16 bom 0x1234
|
||||
7 u8 padding
|
||||
7 u6 num_shells max value: 63
|
||||
8 u32 num_users number of passwd entries
|
||||
12 u32 num_groups number of group entries
|
||||
16 u32 offset_cmph_uid2user
|
||||
|
@ -165,9 +165,8 @@ OFFSET TYPE NAME DESCRIPTION
|
|||
28 u32 offset_idx offset to the first idx_ section
|
||||
32 u32 offset_groups
|
||||
36 u32 offset_users
|
||||
40 u32 offset_shells
|
||||
44 u32 offset_groupmembers
|
||||
48 u32 offset_additional_gids
|
||||
40 u32 offset_groupmembers
|
||||
44 u32 offset_additional_gids
|
||||
```
|
||||
|
||||
`magic` is 0xf09fa4b7, and `version` must be `0`. All integers are
|
||||
|
@ -255,15 +254,25 @@ few examples: `/bin/bash`, `/usr/bin/nologin`, `/bin/zsh` among others.
|
|||
Therefore, "shells" have an optimization: they can be pointed by in the
|
||||
external list, or reside among the user's data.
|
||||
|
||||
64 (1>>6) most popular shells (i.e. referred to by at least two User entries)
|
||||
are stored externally in "Shells" area. The less popular ones are stored with
|
||||
63 most popular shells (i.e. referred to by at least two User entries) are
|
||||
stored externally in "Shells" area. The less popular ones are stored with
|
||||
userdata.
|
||||
|
||||
The `shell_here=true` bit signifies that the shell is stored with userdata.
|
||||
`false` means it is stored in the `Shells` section. If the shell is stored
|
||||
"here", it is the first element in `stringdata`, and it's length is
|
||||
`shell_len_or_place`. If it is stored externally, the latter variable points
|
||||
to it's index in the external storage.
|
||||
There are two "Shells" areas: the index and the blob. The index is a list of
|
||||
structs which point to a location in the "blob" area:
|
||||
|
||||
```
|
||||
const ShellIndex = struct {
|
||||
offset: u10,
|
||||
len: u6,
|
||||
};
|
||||
```
|
||||
|
||||
In the user's struct the `shell_here=true` bit signifies that the shell is
|
||||
stored with userdata. `false` means it is stored in the `Shells` section. If
|
||||
the shell is stored "here", it is the first element in `stringdata`, and it's
|
||||
length is `shell_len_or_place`. If it is stored externally, the latter variable
|
||||
points to it's index in the ShellIndex area.
|
||||
|
||||
Shells in the external storage are sorted by their weight, which is
|
||||
`length*frequency`.
|
||||
|
@ -315,7 +324,7 @@ Each section is padded to 64 bytes.
|
|||
|
||||
```
|
||||
SECTION SIZE DESCRIPTION
|
||||
Header 52 see "Turbonss header" section
|
||||
Header 48 see "Turbonss header" section
|
||||
cmph_gid2group ? gid->group cmph
|
||||
cmph_uid2user ? uid->user cmph
|
||||
cmph_groupname2group ? groupname->group cmph
|
||||
|
@ -324,9 +333,10 @@ idx_gid2group len(group)*4*29/32 cmph->offset gid2group
|
|||
idx_groupname2group len(group)*4*29/32 cmph->offset groupname2group
|
||||
idx_uid2user len(user)*4*29/32 cmph->offset uid2user
|
||||
idx_username2user len(user)*4*29/32 cmph->offset username2user
|
||||
ShellIndex len(shells)*2 Shell index array
|
||||
ShellBlob <= 4032 Shell data blob (max 63*64 bytes)
|
||||
Groups ? packed Group entries (8b padding)
|
||||
Users ? packed User entries (8b padding)
|
||||
Shells ? See "Shells" section
|
||||
groupmembers ? per-group memberlist (32b padding)
|
||||
additional_gids ? per-user grouplist (8b padding)
|
||||
```
|
||||
|
|
|
@ -68,7 +68,7 @@ pub fn build(b: *zbs.Builder) void {
|
|||
exe.install();
|
||||
|
||||
{
|
||||
const turbonss_test = b.addTest("src/main.zig");
|
||||
const turbonss_test = b.addTest("src/test_main.zig");
|
||||
addCmphDeps(turbonss_test, cmph);
|
||||
const test_step = b.step("test", "Run the tests");
|
||||
test_step.dependOn(&turbonss_test.step);
|
||||
|
|
|
@ -14,7 +14,7 @@ pub fn main() !void {}
|
|||
test "simple cmph usage" {
|
||||
var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
const arena = arena_instance.allocator();
|
||||
const stderr = std.io.getStdErr().writer();
|
||||
const stdout = std.io.getStdOut().writer();
|
||||
|
||||
var vector = std.ArrayList([*:0]const u8).init(arena);
|
||||
try vector.appendSlice(&.{
|
||||
|
@ -50,10 +50,10 @@ test "simple cmph usage" {
|
|||
hash = c.cmph_load(mphf_fd) orelse unreachable;
|
||||
defer c.cmph_destroy(hash);
|
||||
|
||||
try stderr.print("\n", .{});
|
||||
try stdout.print("\n", .{});
|
||||
for (vector.items) |key| {
|
||||
var id = c.cmph_search(hash, key, @truncate(c_uint, c.strlen(key)));
|
||||
try stderr.print("key: {s}, id: {d}\n", .{ key, id });
|
||||
try stdout.print("key: {s}, id: {d}\n", .{ key, id });
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,153 @@
|
|||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const PriorityDequeue = std.PriorityDequeue;
|
||||
const StringArrayHashMap = std.StringArrayHashMap;
|
||||
const StringHashMap = std.StringHashMap;
|
||||
const BoundedArray = std.BoundedArray;
|
||||
const testing = std.testing;
|
||||
|
||||
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
|
||||
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
|
||||
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
|
||||
// to 4 bytes.
|
||||
const ShellIndex = struct {
|
||||
offset: u10,
|
||||
len: u6,
|
||||
};
|
||||
|
||||
// MaxShells is the maximum number of "popular" shells.
|
||||
const MaxShells = 63;
|
||||
|
||||
// ShellPopcon is a shell popularity contest: collect shells and return the
|
||||
// popular ones, sorted by score. score := len(shell) * number_of_shells.
|
||||
// String values are copied, the returned slice of shells is allocated
|
||||
// using an allocator.
|
||||
const ShellPopcon = struct {
|
||||
counts: std.StringHashMap(u32),
|
||||
allocator: Allocator,
|
||||
const Self = @This();
|
||||
const KV = struct {
|
||||
shell: []const u8,
|
||||
score: u32,
|
||||
};
|
||||
|
||||
const ShellSections = struct {
|
||||
index: []ShellIndex,
|
||||
blob: []const u8,
|
||||
|
||||
offsets: StringHashMap(u10),
|
||||
|
||||
pub fn getOffset(self: *ShellSections, shell: []const u8) ?u10 {
|
||||
return self.offsets.get(shell);
|
||||
}
|
||||
|
||||
// initializes ShellSections. All strings are copied, nothing is owned.
|
||||
pub fn init(allocator: Allocator, shells: BoundedArray([]const u8, MaxShells)) ShellSections {
|
||||
self.offsets = StringHashMap(u10).init(allocator);
|
||||
_ = allocator;
|
||||
_ = shells;
|
||||
}
|
||||
};
|
||||
|
||||
pub fn init(allocator: Allocator) Self {
|
||||
return Self{
|
||||
.counts = std.StringHashMap(u32).init(allocator),
|
||||
.allocator = allocator,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
var it = self.counts.keyIterator();
|
||||
while (it.next()) |key_ptr| {
|
||||
self.counts.allocator.free(key_ptr.*);
|
||||
}
|
||||
self.counts.deinit();
|
||||
self.* = undefined;
|
||||
}
|
||||
|
||||
pub fn put(self: *Self, shell: []const u8) !void {
|
||||
// TODO getOrPutAdapted may be more elegant, not sure which
|
||||
// context to pass.
|
||||
if (self.counts.getPtr(shell)) |ptr| {
|
||||
ptr.* += 1;
|
||||
} else {
|
||||
var ourShell = try self.allocator.alloc(u8, shell.len);
|
||||
std.mem.copy(u8, ourShell, shell);
|
||||
try self.counts.put(ourShell, 1);
|
||||
}
|
||||
}
|
||||
|
||||
fn cmpShells(context: void, a: KV, b: KV) std.math.Order {
|
||||
_ = context;
|
||||
return std.math.order(a.score, b.score);
|
||||
}
|
||||
|
||||
pub fn getSections(self: *Self, limit: u32) ShellSections {
|
||||
const stderr = std.io.getStdErr().writer();
|
||||
_ = stderr;
|
||||
|
||||
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
|
||||
defer deque.deinit();
|
||||
|
||||
var it = self.counts.iterator();
|
||||
while (it.next()) |entry| {
|
||||
if (entry.value_ptr.* == 1) {
|
||||
continue;
|
||||
}
|
||||
const score = @truncate(u32, entry.key_ptr.*.len) * entry.value_ptr.*;
|
||||
try deque.add(KV{ .shell = entry.key_ptr.*, .score = score });
|
||||
}
|
||||
|
||||
const total = std.math.min(deque.count(), limit);
|
||||
var strSlice = self.allocator.alloc([]u8, total);
|
||||
defer strSlice.deinit();
|
||||
|
||||
var i: u32 = 0;
|
||||
while (i < total) {
|
||||
strSlice[i] = deque.removeMax();
|
||||
i += 1;
|
||||
}
|
||||
|
||||
return ShellSections.init(self.allocator, strSlice);
|
||||
}
|
||||
};
|
||||
|
||||
test "[]u8 comparison" {
|
||||
var s1: []const u8 = "/bin/bash";
|
||||
var s2: []const u8 = "/bin/bash";
|
||||
try testing.expectEqual(s1, s2);
|
||||
}
|
||||
|
||||
test "basic shellpop" {
|
||||
var popcon = ShellPopcon.init(testing.allocator);
|
||||
defer popcon.deinit();
|
||||
|
||||
try popcon.put("/bin/bash");
|
||||
try popcon.put("/bin/bash");
|
||||
try popcon.put("/bin/bash");
|
||||
try popcon.put("/bin/zsh");
|
||||
try popcon.put("/bin/zsh");
|
||||
try popcon.put("/bin/zsh");
|
||||
try popcon.put("/bin/zsh");
|
||||
try popcon.put("/bin/nobody");
|
||||
try popcon.put("/bin/very-long-shell-name-ought-to-be-first");
|
||||
try popcon.put("/bin/very-long-shell-name-ought-to-be-first");
|
||||
|
||||
const stderr = std.io.getStdErr().writer();
|
||||
|
||||
var topshells = try popcon.top(2);
|
||||
defer topshells.deinit();
|
||||
var shellStrings = topshells.keys();
|
||||
try testing.expectEqual(shellStrings.len, 2);
|
||||
|
||||
try stderr.print("\n", .{});
|
||||
try stderr.print("0th type: {s}\n", .{@typeName(@TypeOf(shellStrings[0]))});
|
||||
try stderr.print("1st type: {s}\n", .{@typeName(@TypeOf(shellStrings[1]))});
|
||||
try stderr.print("0th: {s}, len: {d}\n", .{ shellStrings[0], shellStrings[0].len });
|
||||
try stderr.print("0ww: /bin/very-long-shell-name-ought-to-be-first\n", .{});
|
||||
try stderr.print("1st: {s}, len: {d}\n", .{ shellStrings[1], shellStrings[1].len });
|
||||
try stderr.print("1ww: /bin/zsh\n", .{});
|
||||
|
||||
try testing.expectEqual(shellStrings[0], "/bin/very-long-shell-name-ought-to-be-first");
|
||||
try testing.expectEqual(shellStrings[1], "/bin/zsh");
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
test "turbonss test suite" {
|
||||
_ = @import("main.zig");
|
||||
_ = @import("shellpop.zig");
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
const std = @import("std");
|
||||
|
||||
const DB = struct {
|
||||
users: std.StringHashMap(User),
|
||||
groups: std.StringHashMap(Group),
|
||||
};
|
||||
|
||||
const Group = struct {
|
||||
gid: u32,
|
||||
name: []const u8,
|
||||
members: std.BufSet,
|
||||
};
|
||||
|
||||
const User = struct {
|
||||
uid: u32,
|
||||
gid: u32,
|
||||
name: []const u8,
|
||||
gecos: []const u8,
|
||||
home: []const u8,
|
||||
shell: []const u8,
|
||||
groups: std.BufSet,
|
||||
};
|
||||
|
||||
const PackedUser = packed struct {
|
||||
uid: u32,
|
||||
gid: u32,
|
||||
additional_gids_offset: u29,
|
||||
shell_here: u1,
|
||||
shell_len_or_place: u6,
|
||||
homedir_len: u6,
|
||||
username_is_a_suffix: u1,
|
||||
username_offset_or_len: u5,
|
||||
gecos_len: u8,
|
||||
};
|
Loading…
Reference in New Issue