From 40deb3f0be4e4f3a9a6b10c9d2117cec24d69954 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Mon, 28 Feb 2022 06:14:00 +0200 Subject: [PATCH] preparing for bringing this all together --- README.md | 37 +++++++++++++++++++++++++------------ src/compress.zig | 7 +++---- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 322629b..b81420f 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,11 @@ Tight packing places some constraints on the underlying data: - Permitted comment ("gecos") length: 0-255 bytes. - User name, groupname, gecos and shell must be utf8-encoded. +Sorting is stable. In v0: +- Groups are sorted by gid, ascending. +- Users are sorted by their name, ascending by the unicode codepoints + (locale-independent). + Checking out and building ------------------------- @@ -182,10 +187,9 @@ the beginning of the section. ``` const PackedGroup = packed struct { gid: u32, - // index to a separate structure with a list of members. The memberlist is - // 2^5-byte aligned (32b), this is an index there. - members_offset: u27, - groupname_len: u5, + // index to a separate structure with a list of members. + members_offset: u32, + groupname_len: u8, // max is 32, but have too much space here. // a groupname_len-sized string groupname []u8; } @@ -281,7 +285,7 @@ Similarly, when user's groups are resolved in (2), they are not always necessary (i.e. not part of `struct user*`), therefore the memberships themselves are stored out of bound. -`Groupmembers` and `Username2gids` store group and user memberships +`Groupmembers` and `UserGids` store group and user memberships respectively. Membership IDs are used in their entirety — not necessitating random access, thus suitable for tight packing and varint encoding. @@ -290,7 +294,7 @@ random access, thus suitable for tight packing and varint encoding. - For each user — a list of gids, because `initgroups_dyn` (and friends) returns an array of gids. -An entry of `Groupmembers` and `Username2gids` looks like this piece of +An entry of `Groupmembers` and `UserGids` looks like this piece of pseudo-code: ``` @@ -299,7 +303,7 @@ const PackedList = struct { Members: [Length]varint, } const Groupmembers = PackedList; -const Username2gids = PackedList; +const UserGids = PackedList; ``` Indices @@ -355,15 +359,24 @@ STATUS SECTION SIZE DESCRIPTION idx_groupname2group len(group)*29/8 bdz->offset Groups idx_uid2user len(user)*29/8 bdz->offset Users idx_name2user len(user)*29/8 bdz->offset Users - idx_username2gids len(user)*29/8 bdz->offset Username2gids -✅ ShellIndex len(shells)*2 Shell index array -✅ ShellBlob <= 4032 Shell data blob (max 63*64 bytes) + idx_username2gids len(user)*29/8 bdz->offset UserGids +✅ ShellIndex len(shells)*2 shell index array +✅ ShellBlob <= 4032 shell data blob (max 63*64 bytes) ✅ Groups ? packed Group entries (8b padding) ✅ Users ? packed User entries (8b padding) - Groupmembers ? per-group memberlist (32b padding) - Username2gids ? Per-user gidlist entries (8b padding) + Groupmembers ? per-group memberlist (no padding) + UserGids ? per-user gidlist entries (8b padding) ``` +Section creation order: + +1. Groupmembers, UserGids. No dependencies. +2. ShellIndex, ShellBlob. No dependencies. +3. `bdz_*`. No depdendencies. +4. Groups. Requires Groupmembers. +5. Users. Requires Groupmembers and ShellIndex. +6. `idx_*`. Requires offsets to Groups and Users. + [git-subtrac]: https://apenwarr.ca/log/20191109 [cmph]: http://cmph.sourceforge.net/ [id]: https://linux.die.net/man/1/id diff --git a/src/compress.zig b/src/compress.zig index 7368251..4677867 100644 --- a/src/compress.zig +++ b/src/compress.zig @@ -1,9 +1,8 @@ // // varint64 []const u8 variants // -// Thanks to https://github.com/gsquire/zig-snappy/blob/master/snappy.zig and golang's -// varint implementation. - +// Thanks to https://github.com/gsquire/zig-snappy/blob/master/snappy.zig and +// golang's varint implementation. const std = @import("std"); // compresses a strictly incrementing sorted slice of integers using delta @@ -96,7 +95,7 @@ pub const Varint = struct { bytesRead: usize, }; -const maxVarintLen64 = 10; +pub const maxVarintLen64 = 10; // https://golang.org/pkg/encoding/binary/#Uvarint pub fn uvarint(buf: []const u8) error{Overflow}!Varint {