test and README tiny updates

2022-02-18 17:24:22 +02:00
parent 80ec63b6a4
commit 411fb91fd1
2 changed files with 41 additions and 36 deletions
--- a/README.md
+++ b/README.md
@@ -92,7 +92,10 @@ $ zig build test

 Other commands will be documented as they are implemented.

-This project uses [git subtrac][git-subtrac] for managing dependencies.
+This project uses [git subtrac][git-subtrac] for managing dependencies. They
+work just like regular submodules, except all the refs of the submodules are in
+this repository. Repeat after me: all the submodules are in this repository.
+So if you have a copy of this repo, dependencies will not disappear.

 remarks on `id(1)`
 ------------------
@@ -131,12 +134,12 @@ understand which operations need to be fast; in order of importance:
 3. lookup groupname -> group.
 4. lookup username -> user.

-These indices can use perfect hashing like [cmph][cmph]: a perfect hash hashes
-a list of bytes to a sequential list of integers. Perfect hashing algorithms
-require some space, and take some time to calculate ("hashing duration"). I've
-tested BDZ, which hashes [][]u8 to a sequential list of integers (not
-preserving order) and CHM, preserves order. BDZ accepts an optional argument `3
-<= b <= 10`.
+These indices can use perfect hashing like [bdz from cmph][cmph]: a perfect
+hash hashes a list of bytes to a sequential list of integers. Perfect hashing
+algorithms require some space, and take some time to calculate ("hashing
+duration"). I've tested BDZ, which hashes [][]u8 to a sequential list of
+integers (not preserving order) and CHM, preserves order. BDZ accepts an
+optional argument `3 <= b <= 10`.

 * BDZ algorithm requires (b=3, 900KB, b=7, 338KB, b=10, 306KB) for 1M values.
 * Latency to resolve 1M keys: (170ms, 180ms, 230ms, respectively).
@@ -159,9 +162,9 @@ OFFSET     TYPE     NAME                          DESCRIPTION
             u8     num_shells                    max value: 63. Padding is strange on little endian.
   8        u32     num_users                     number of passwd entries
  12        u32     num_groups                    number of group entries
-  16        u32     offset_cmph_uid2user
-  20        u32     offset_cmph_groupname2group
-  24        u32     offset_cmph_username2user
+  16        u32     offset_bdz_uid2user
+  20        u32     offset_bdz_groupname2group
+  24        u32     offset_bdz_username2user
  28        u32     offset_idx                    offset to the first idx_ section
  32        u32     offset_groups
  36        u32     offset_users
@@ -182,7 +185,7 @@ all `offset_*` values could be `u26`. As `u32` is easier to visualize with xxd,
 and the header block fits to 64 bytes anyway, we are keeping them as u32 now.

 Sections whose lengths can be calculated do not have a corresponding `offset_*`
-header field. For example, `cmph_gid2group` comes immediately after the header,
+header field. For example, `bdz_gid2group` comes immediately after the header,
 and `idx_groupname2group` comes after `idx_gid2group`, whose offset is
 `offset_idx`, and size can be calculated.

@@ -330,14 +333,14 @@ Each section is padded to 64 bytes.
 ```
 SECTION               SIZE                 DESCRIPTION
 Header                48                   see "Turbonss header" section
-cmph_gid2group        ?                    gid->group cmph
-cmph_uid2user         ?                    uid->user cmph
-cmph_groupname2group  ?                    groupname->group cmph
-cmph_username2user    ?                    username->user cmph
-idx_gid2group         len(group)*4*29/32   cmph->offset gid2group
-idx_groupname2group   len(group)*4*29/32   cmph->offset groupname2group
-idx_uid2user          len(user)*4*29/32    cmph->offset uid2user
-idx_username2user     len(user)*4*29/32    cmph->offset username2user
+bdz_gid2group         ?                    gid->group bdz
+bdz_uid2user          ?                    uid->user bdz
+bdz_groupname2group   ?                    groupname->group bdz
+bdz_username2user     ?                    username->user bdz
+idx_gid2group         len(group)*4*29/32   bdz->offset gid2group
+idx_groupname2group   len(group)*4*29/32   bdz->offset groupname2group
+idx_uid2user          len(user)*4*29/32    bdz->offset uid2user
+idx_username2user     len(user)*4*29/32    bdz->offset username2user
 ShellIndex            len(shells)*2        Shell index array
 ShellBlob             <= 4032              Shell data blob (max 63*64 bytes)
 Groups                ?                    packed Group entries (8b padding)
@@ -346,7 +349,7 @@ groupmembers          ?                    per-group memberlist (32b padding)
 additional_gids       ?                    per-user grouplist (8b padding)
 ```

-[git-subtrac]: https://github.com/apenwarr/git-subtrac/
+[git-subtrac]: https://apenwarr.ca/log/20191109
 [cmph]: http://cmph.sourceforge.net/
 [id]: https://linux.die.net/man/1/id
 [nsswitch]: https://linux.die.net/man/5/nsswitch.conf
--- a/src/header.zig
+++ b/src/header.zig
@@ -8,7 +8,7 @@ const Bom = 0x1234;

 pub const SectionLength = 64;

-const HeaderError = error{
+const InvalidHeader = error{
    InvalidMagic,
    InvalidVersion,
    InvalidBom,
@@ -23,16 +23,16 @@ const Header = packed struct {
    num_shells: u8,
    num_users: u32,
    num_groups: u32,
-    offset_cmph_uid2user: u32,
-    offset_cmph_groupname2group: u32,
-    offset_cmph_username2user: u32,
+    offset_bdz_uid2user: u32,
+    offset_bdz_groupname2group: u32,
+    offset_bdz_username2user: u32,
    offset_idx: u32,
    offset_groups: u32,
    offset_users: u32,
    offset_groupmembers: u32,
    offset_additional_gids: u32,

-    pub fn init(blob: [HeaderSize]u8) HeaderError!Header {
+    pub fn init(blob: [HeaderSize]u8) InvalidHeader!Header {
        const self = @bitCast(Header, blob);
        for (Magic) |item, index| {
            if (self.magic[index] != item) return error.InvalidMagic;
@@ -48,9 +48,9 @@ const Header = packed struct {
        }

        const offsets = [_]u32{
-            self.offset_cmph_uid2user,
-            self.offset_cmph_groupname2group,
-            self.offset_cmph_username2user,
+            self.offset_bdz_uid2user,
+            self.offset_bdz_groupname2group,
+            self.offset_bdz_username2user,
            self.offset_idx,
            self.offset_groups,
            self.offset_users,
@@ -73,12 +73,15 @@ const Header = packed struct {

 const testing = std.testing;

-test "constants and types are reasonable" {
+test "header is byte-aligned" {
    try testing.expectEqual(HeaderSize * 8, @bitSizeOf(Header));
+}
+
+test "Section length is a power of two" {
    try testing.expect(std.math.isPowerOfTwo(SectionLength));
 }

-test "header pack and unpack" {
+test "header pack, unpack and validation" {
    const goodHeader = Header{
        .magic = Magic,
        .version = Version,
@@ -86,9 +89,9 @@ test "header pack and unpack" {
        .num_shells = 0,
        .num_users = 0,
        .num_groups = 0,
-        .offset_cmph_uid2user = 0,
-        .offset_cmph_groupname2group = 0,
-        .offset_cmph_username2user = 0,
+        .offset_bdz_uid2user = 0,
+        .offset_bdz_groupname2group = 0,
+        .offset_bdz_username2user = 0,
        .offset_idx = 0,
        .offset_groups = 0,
        .offset_users = 0,
@@ -96,8 +99,7 @@ test "header pack and unpack" {
        .offset_additional_gids = 0,
    };

-    const blob = goodHeader.asArray();
-    const gotHeader = try Header.init(blob);
+    const gotHeader = try Header.init(goodHeader.asArray());
    try testing.expectEqual(goodHeader, gotHeader);

    {
@@ -120,7 +122,7 @@ test "header pack and unpack" {

    {
        var header = goodHeader;
-        header.offset_cmph_uid2user = 65;
+        header.offset_bdz_uid2user = 65;
        try testing.expectError(error.InvalidOffset, Header.init(header.asArray()));
    }
 }