shellpop skeleton
This commit is contained in:
parent
ce882b9086
commit
f584642cca
44
README.md
44
README.md
|
@ -66,10 +66,10 @@ consumed heap space for each separate turbonss instance will be minimal.
|
||||||
Tight packing places some constraints on the underlying data:
|
Tight packing places some constraints on the underlying data:
|
||||||
|
|
||||||
- Maximum database size: 4GB.
|
- Maximum database size: 4GB.
|
||||||
- Maximum length of username and groupname: 32 bytes.
|
- Permitted length of username and groupname: 1-32 bytes.
|
||||||
- Maximum length of shell and homedir: 64 bytes.
|
- Permitted length of shell and homedir: 1-64 bytes.
|
||||||
- Maximum comment ("gecos") length: 256 bytes.
|
- Permitted comment ("gecos") length: 0-255 bytes.
|
||||||
- Username and groupname must be utf8-encoded.
|
- Username, groupname and gecos must be utf8-encoded.
|
||||||
|
|
||||||
Checking out and building
|
Checking out and building
|
||||||
-------------------------
|
-------------------------
|
||||||
|
@ -156,7 +156,7 @@ OFFSET TYPE NAME DESCRIPTION
|
||||||
0 [4]u8 magic always 0xf09fa4b7
|
0 [4]u8 magic always 0xf09fa4b7
|
||||||
4 u8 version now `0`
|
4 u8 version now `0`
|
||||||
5 u16 bom 0x1234
|
5 u16 bom 0x1234
|
||||||
7 u8 padding
|
7 u6 num_shells max value: 63
|
||||||
8 u32 num_users number of passwd entries
|
8 u32 num_users number of passwd entries
|
||||||
12 u32 num_groups number of group entries
|
12 u32 num_groups number of group entries
|
||||||
16 u32 offset_cmph_uid2user
|
16 u32 offset_cmph_uid2user
|
||||||
|
@ -165,9 +165,8 @@ OFFSET TYPE NAME DESCRIPTION
|
||||||
28 u32 offset_idx offset to the first idx_ section
|
28 u32 offset_idx offset to the first idx_ section
|
||||||
32 u32 offset_groups
|
32 u32 offset_groups
|
||||||
36 u32 offset_users
|
36 u32 offset_users
|
||||||
40 u32 offset_shells
|
40 u32 offset_groupmembers
|
||||||
44 u32 offset_groupmembers
|
44 u32 offset_additional_gids
|
||||||
48 u32 offset_additional_gids
|
|
||||||
```
|
```
|
||||||
|
|
||||||
`magic` is 0xf09fa4b7, and `version` must be `0`. All integers are
|
`magic` is 0xf09fa4b7, and `version` must be `0`. All integers are
|
||||||
|
@ -255,15 +254,25 @@ few examples: `/bin/bash`, `/usr/bin/nologin`, `/bin/zsh` among others.
|
||||||
Therefore, "shells" have an optimization: they can be pointed by in the
|
Therefore, "shells" have an optimization: they can be pointed by in the
|
||||||
external list, or reside among the user's data.
|
external list, or reside among the user's data.
|
||||||
|
|
||||||
64 (1>>6) most popular shells (i.e. referred to by at least two User entries)
|
63 most popular shells (i.e. referred to by at least two User entries) are
|
||||||
are stored externally in "Shells" area. The less popular ones are stored with
|
stored externally in "Shells" area. The less popular ones are stored with
|
||||||
userdata.
|
userdata.
|
||||||
|
|
||||||
The `shell_here=true` bit signifies that the shell is stored with userdata.
|
There are two "Shells" areas: the index and the blob. The index is a list of
|
||||||
`false` means it is stored in the `Shells` section. If the shell is stored
|
structs which point to a location in the "blob" area:
|
||||||
"here", it is the first element in `stringdata`, and it's length is
|
|
||||||
`shell_len_or_place`. If it is stored externally, the latter variable points
|
```
|
||||||
to it's index in the external storage.
|
const ShellIndex = struct {
|
||||||
|
offset: u10,
|
||||||
|
len: u6,
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
In the user's struct the `shell_here=true` bit signifies that the shell is
|
||||||
|
stored with userdata. `false` means it is stored in the `Shells` section. If
|
||||||
|
the shell is stored "here", it is the first element in `stringdata`, and it's
|
||||||
|
length is `shell_len_or_place`. If it is stored externally, the latter variable
|
||||||
|
points to it's index in the ShellIndex area.
|
||||||
|
|
||||||
Shells in the external storage are sorted by their weight, which is
|
Shells in the external storage are sorted by their weight, which is
|
||||||
`length*frequency`.
|
`length*frequency`.
|
||||||
|
@ -315,7 +324,7 @@ Each section is padded to 64 bytes.
|
||||||
|
|
||||||
```
|
```
|
||||||
SECTION SIZE DESCRIPTION
|
SECTION SIZE DESCRIPTION
|
||||||
Header 52 see "Turbonss header" section
|
Header 48 see "Turbonss header" section
|
||||||
cmph_gid2group ? gid->group cmph
|
cmph_gid2group ? gid->group cmph
|
||||||
cmph_uid2user ? uid->user cmph
|
cmph_uid2user ? uid->user cmph
|
||||||
cmph_groupname2group ? groupname->group cmph
|
cmph_groupname2group ? groupname->group cmph
|
||||||
|
@ -324,9 +333,10 @@ idx_gid2group len(group)*4*29/32 cmph->offset gid2group
|
||||||
idx_groupname2group len(group)*4*29/32 cmph->offset groupname2group
|
idx_groupname2group len(group)*4*29/32 cmph->offset groupname2group
|
||||||
idx_uid2user len(user)*4*29/32 cmph->offset uid2user
|
idx_uid2user len(user)*4*29/32 cmph->offset uid2user
|
||||||
idx_username2user len(user)*4*29/32 cmph->offset username2user
|
idx_username2user len(user)*4*29/32 cmph->offset username2user
|
||||||
|
ShellIndex len(shells)*2 Shell index array
|
||||||
|
ShellBlob <= 4032 Shell data blob (max 63*64 bytes)
|
||||||
Groups ? packed Group entries (8b padding)
|
Groups ? packed Group entries (8b padding)
|
||||||
Users ? packed User entries (8b padding)
|
Users ? packed User entries (8b padding)
|
||||||
Shells ? See "Shells" section
|
|
||||||
groupmembers ? per-group memberlist (32b padding)
|
groupmembers ? per-group memberlist (32b padding)
|
||||||
additional_gids ? per-user grouplist (8b padding)
|
additional_gids ? per-user grouplist (8b padding)
|
||||||
```
|
```
|
||||||
|
|
|
@ -68,7 +68,7 @@ pub fn build(b: *zbs.Builder) void {
|
||||||
exe.install();
|
exe.install();
|
||||||
|
|
||||||
{
|
{
|
||||||
const turbonss_test = b.addTest("src/main.zig");
|
const turbonss_test = b.addTest("src/test_main.zig");
|
||||||
addCmphDeps(turbonss_test, cmph);
|
addCmphDeps(turbonss_test, cmph);
|
||||||
const test_step = b.step("test", "Run the tests");
|
const test_step = b.step("test", "Run the tests");
|
||||||
test_step.dependOn(&turbonss_test.step);
|
test_step.dependOn(&turbonss_test.step);
|
||||||
|
|
|
@ -14,7 +14,7 @@ pub fn main() !void {}
|
||||||
test "simple cmph usage" {
|
test "simple cmph usage" {
|
||||||
var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||||
const arena = arena_instance.allocator();
|
const arena = arena_instance.allocator();
|
||||||
const stderr = std.io.getStdErr().writer();
|
const stdout = std.io.getStdOut().writer();
|
||||||
|
|
||||||
var vector = std.ArrayList([*:0]const u8).init(arena);
|
var vector = std.ArrayList([*:0]const u8).init(arena);
|
||||||
try vector.appendSlice(&.{
|
try vector.appendSlice(&.{
|
||||||
|
@ -50,10 +50,10 @@ test "simple cmph usage" {
|
||||||
hash = c.cmph_load(mphf_fd) orelse unreachable;
|
hash = c.cmph_load(mphf_fd) orelse unreachable;
|
||||||
defer c.cmph_destroy(hash);
|
defer c.cmph_destroy(hash);
|
||||||
|
|
||||||
try stderr.print("\n", .{});
|
try stdout.print("\n", .{});
|
||||||
for (vector.items) |key| {
|
for (vector.items) |key| {
|
||||||
var id = c.cmph_search(hash, key, @truncate(c_uint, c.strlen(key)));
|
var id = c.cmph_search(hash, key, @truncate(c_uint, c.strlen(key)));
|
||||||
try stderr.print("key: {s}, id: {d}\n", .{ key, id });
|
try stdout.print("key: {s}, id: {d}\n", .{ key, id });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,153 @@
|
||||||
|
const std = @import("std");
|
||||||
|
const Allocator = std.mem.Allocator;
|
||||||
|
const PriorityDequeue = std.PriorityDequeue;
|
||||||
|
const StringArrayHashMap = std.StringArrayHashMap;
|
||||||
|
const StringHashMap = std.StringHashMap;
|
||||||
|
const BoundedArray = std.BoundedArray;
|
||||||
|
const testing = std.testing;
|
||||||
|
|
||||||
|
// ShellIndex is an index to the shell strings. As shell can be up to 64 bytes
|
||||||
|
// (1<<6), maximum number of shells is 63 (1<<6-1), the maximum location offset
|
||||||
|
// is 1<<12. To make location resolvable in 10 bits, all shells will be padded
|
||||||
|
// to 4 bytes.
|
||||||
|
const ShellIndex = struct {
|
||||||
|
offset: u10,
|
||||||
|
len: u6,
|
||||||
|
};
|
||||||
|
|
||||||
|
// MaxShells is the maximum number of "popular" shells.
|
||||||
|
const MaxShells = 63;
|
||||||
|
|
||||||
|
// ShellPopcon is a shell popularity contest: collect shells and return the
|
||||||
|
// popular ones, sorted by score. score := len(shell) * number_of_shells.
|
||||||
|
// String values are copied, the returned slice of shells is allocated
|
||||||
|
// using an allocator.
|
||||||
|
const ShellPopcon = struct {
|
||||||
|
counts: std.StringHashMap(u32),
|
||||||
|
allocator: Allocator,
|
||||||
|
const Self = @This();
|
||||||
|
const KV = struct {
|
||||||
|
shell: []const u8,
|
||||||
|
score: u32,
|
||||||
|
};
|
||||||
|
|
||||||
|
const ShellSections = struct {
|
||||||
|
index: []ShellIndex,
|
||||||
|
blob: []const u8,
|
||||||
|
|
||||||
|
offsets: StringHashMap(u10),
|
||||||
|
|
||||||
|
pub fn getOffset(self: *ShellSections, shell: []const u8) ?u10 {
|
||||||
|
return self.offsets.get(shell);
|
||||||
|
}
|
||||||
|
|
||||||
|
// initializes ShellSections. All strings are copied, nothing is owned.
|
||||||
|
pub fn init(allocator: Allocator, shells: BoundedArray([]const u8, MaxShells)) ShellSections {
|
||||||
|
self.offsets = StringHashMap(u10).init(allocator);
|
||||||
|
_ = allocator;
|
||||||
|
_ = shells;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn init(allocator: Allocator) Self {
|
||||||
|
return Self{
|
||||||
|
.counts = std.StringHashMap(u32).init(allocator),
|
||||||
|
.allocator = allocator,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn deinit(self: *Self) void {
|
||||||
|
var it = self.counts.keyIterator();
|
||||||
|
while (it.next()) |key_ptr| {
|
||||||
|
self.counts.allocator.free(key_ptr.*);
|
||||||
|
}
|
||||||
|
self.counts.deinit();
|
||||||
|
self.* = undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn put(self: *Self, shell: []const u8) !void {
|
||||||
|
// TODO getOrPutAdapted may be more elegant, not sure which
|
||||||
|
// context to pass.
|
||||||
|
if (self.counts.getPtr(shell)) |ptr| {
|
||||||
|
ptr.* += 1;
|
||||||
|
} else {
|
||||||
|
var ourShell = try self.allocator.alloc(u8, shell.len);
|
||||||
|
std.mem.copy(u8, ourShell, shell);
|
||||||
|
try self.counts.put(ourShell, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cmpShells(context: void, a: KV, b: KV) std.math.Order {
|
||||||
|
_ = context;
|
||||||
|
return std.math.order(a.score, b.score);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn getSections(self: *Self, limit: u32) ShellSections {
|
||||||
|
const stderr = std.io.getStdErr().writer();
|
||||||
|
_ = stderr;
|
||||||
|
|
||||||
|
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
|
||||||
|
defer deque.deinit();
|
||||||
|
|
||||||
|
var it = self.counts.iterator();
|
||||||
|
while (it.next()) |entry| {
|
||||||
|
if (entry.value_ptr.* == 1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const score = @truncate(u32, entry.key_ptr.*.len) * entry.value_ptr.*;
|
||||||
|
try deque.add(KV{ .shell = entry.key_ptr.*, .score = score });
|
||||||
|
}
|
||||||
|
|
||||||
|
const total = std.math.min(deque.count(), limit);
|
||||||
|
var strSlice = self.allocator.alloc([]u8, total);
|
||||||
|
defer strSlice.deinit();
|
||||||
|
|
||||||
|
var i: u32 = 0;
|
||||||
|
while (i < total) {
|
||||||
|
strSlice[i] = deque.removeMax();
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ShellSections.init(self.allocator, strSlice);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
test "[]u8 comparison" {
|
||||||
|
var s1: []const u8 = "/bin/bash";
|
||||||
|
var s2: []const u8 = "/bin/bash";
|
||||||
|
try testing.expectEqual(s1, s2);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "basic shellpop" {
|
||||||
|
var popcon = ShellPopcon.init(testing.allocator);
|
||||||
|
defer popcon.deinit();
|
||||||
|
|
||||||
|
try popcon.put("/bin/bash");
|
||||||
|
try popcon.put("/bin/bash");
|
||||||
|
try popcon.put("/bin/bash");
|
||||||
|
try popcon.put("/bin/zsh");
|
||||||
|
try popcon.put("/bin/zsh");
|
||||||
|
try popcon.put("/bin/zsh");
|
||||||
|
try popcon.put("/bin/zsh");
|
||||||
|
try popcon.put("/bin/nobody");
|
||||||
|
try popcon.put("/bin/very-long-shell-name-ought-to-be-first");
|
||||||
|
try popcon.put("/bin/very-long-shell-name-ought-to-be-first");
|
||||||
|
|
||||||
|
const stderr = std.io.getStdErr().writer();
|
||||||
|
|
||||||
|
var topshells = try popcon.top(2);
|
||||||
|
defer topshells.deinit();
|
||||||
|
var shellStrings = topshells.keys();
|
||||||
|
try testing.expectEqual(shellStrings.len, 2);
|
||||||
|
|
||||||
|
try stderr.print("\n", .{});
|
||||||
|
try stderr.print("0th type: {s}\n", .{@typeName(@TypeOf(shellStrings[0]))});
|
||||||
|
try stderr.print("1st type: {s}\n", .{@typeName(@TypeOf(shellStrings[1]))});
|
||||||
|
try stderr.print("0th: {s}, len: {d}\n", .{ shellStrings[0], shellStrings[0].len });
|
||||||
|
try stderr.print("0ww: /bin/very-long-shell-name-ought-to-be-first\n", .{});
|
||||||
|
try stderr.print("1st: {s}, len: {d}\n", .{ shellStrings[1], shellStrings[1].len });
|
||||||
|
try stderr.print("1ww: /bin/zsh\n", .{});
|
||||||
|
|
||||||
|
try testing.expectEqual(shellStrings[0], "/bin/very-long-shell-name-ought-to-be-first");
|
||||||
|
try testing.expectEqual(shellStrings[1], "/bin/zsh");
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
test "turbonss test suite" {
|
||||||
|
_ = @import("main.zig");
|
||||||
|
_ = @import("shellpop.zig");
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
const DB = struct {
|
||||||
|
users: std.StringHashMap(User),
|
||||||
|
groups: std.StringHashMap(Group),
|
||||||
|
};
|
||||||
|
|
||||||
|
const Group = struct {
|
||||||
|
gid: u32,
|
||||||
|
name: []const u8,
|
||||||
|
members: std.BufSet,
|
||||||
|
};
|
||||||
|
|
||||||
|
const User = struct {
|
||||||
|
uid: u32,
|
||||||
|
gid: u32,
|
||||||
|
name: []const u8,
|
||||||
|
gecos: []const u8,
|
||||||
|
home: []const u8,
|
||||||
|
shell: []const u8,
|
||||||
|
groups: std.BufSet,
|
||||||
|
};
|
||||||
|
|
||||||
|
const PackedUser = packed struct {
|
||||||
|
uid: u32,
|
||||||
|
gid: u32,
|
||||||
|
additional_gids_offset: u29,
|
||||||
|
shell_here: u1,
|
||||||
|
shell_len_or_place: u6,
|
||||||
|
homedir_len: u6,
|
||||||
|
username_is_a_suffix: u1,
|
||||||
|
username_offset_or_len: u5,
|
||||||
|
gecos_len: u8,
|
||||||
|
};
|
Loading…
Reference in New Issue