diff --git a/README.md b/README.md index d3b18da..7df9838 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,10 @@ $ zig build test Other commands will be documented as they are implemented. -This project uses [git subtrac][git-subtrac] for managing dependencies. +This project uses [git subtrac][git-subtrac] for managing dependencies. They +work just like regular submodules, except all the refs of the submodules are in +this repository. Repeat after me: all the submodules are in this repository. +So if you have a copy of this repo, dependencies will not disappear. remarks on `id(1)` ------------------ @@ -131,12 +134,12 @@ understand which operations need to be fast; in order of importance: 3. lookup groupname -> group. 4. lookup username -> user. -These indices can use perfect hashing like [cmph][cmph]: a perfect hash hashes -a list of bytes to a sequential list of integers. Perfect hashing algorithms -require some space, and take some time to calculate ("hashing duration"). I've -tested BDZ, which hashes [][]u8 to a sequential list of integers (not -preserving order) and CHM, preserves order. BDZ accepts an optional argument `3 -<= b <= 10`. +These indices can use perfect hashing like [bdz from cmph][cmph]: a perfect +hash hashes a list of bytes to a sequential list of integers. Perfect hashing +algorithms require some space, and take some time to calculate ("hashing +duration"). I've tested BDZ, which hashes [][]u8 to a sequential list of +integers (not preserving order) and CHM, preserves order. BDZ accepts an +optional argument `3 <= b <= 10`. * BDZ algorithm requires (b=3, 900KB, b=7, 338KB, b=10, 306KB) for 1M values. * Latency to resolve 1M keys: (170ms, 180ms, 230ms, respectively). @@ -159,9 +162,9 @@ OFFSET TYPE NAME DESCRIPTION u8 num_shells max value: 63. Padding is strange on little endian. 8 u32 num_users number of passwd entries 12 u32 num_groups number of group entries - 16 u32 offset_cmph_uid2user - 20 u32 offset_cmph_groupname2group - 24 u32 offset_cmph_username2user + 16 u32 offset_bdz_uid2user + 20 u32 offset_bdz_groupname2group + 24 u32 offset_bdz_username2user 28 u32 offset_idx offset to the first idx_ section 32 u32 offset_groups 36 u32 offset_users @@ -182,7 +185,7 @@ all `offset_*` values could be `u26`. As `u32` is easier to visualize with xxd, and the header block fits to 64 bytes anyway, we are keeping them as u32 now. Sections whose lengths can be calculated do not have a corresponding `offset_*` -header field. For example, `cmph_gid2group` comes immediately after the header, +header field. For example, `bdz_gid2group` comes immediately after the header, and `idx_groupname2group` comes after `idx_gid2group`, whose offset is `offset_idx`, and size can be calculated. @@ -330,14 +333,14 @@ Each section is padded to 64 bytes. ``` SECTION SIZE DESCRIPTION Header 48 see "Turbonss header" section -cmph_gid2group ? gid->group cmph -cmph_uid2user ? uid->user cmph -cmph_groupname2group ? groupname->group cmph -cmph_username2user ? username->user cmph -idx_gid2group len(group)*4*29/32 cmph->offset gid2group -idx_groupname2group len(group)*4*29/32 cmph->offset groupname2group -idx_uid2user len(user)*4*29/32 cmph->offset uid2user -idx_username2user len(user)*4*29/32 cmph->offset username2user +bdz_gid2group ? gid->group bdz +bdz_uid2user ? uid->user bdz +bdz_groupname2group ? groupname->group bdz +bdz_username2user ? username->user bdz +idx_gid2group len(group)*4*29/32 bdz->offset gid2group +idx_groupname2group len(group)*4*29/32 bdz->offset groupname2group +idx_uid2user len(user)*4*29/32 bdz->offset uid2user +idx_username2user len(user)*4*29/32 bdz->offset username2user ShellIndex len(shells)*2 Shell index array ShellBlob <= 4032 Shell data blob (max 63*64 bytes) Groups ? packed Group entries (8b padding) @@ -346,7 +349,7 @@ groupmembers ? per-group memberlist (32b padding) additional_gids ? per-user grouplist (8b padding) ``` -[git-subtrac]: https://github.com/apenwarr/git-subtrac/ +[git-subtrac]: https://apenwarr.ca/log/20191109 [cmph]: http://cmph.sourceforge.net/ [id]: https://linux.die.net/man/1/id [nsswitch]: https://linux.die.net/man/5/nsswitch.conf diff --git a/src/header.zig b/src/header.zig index ea55359..c49278d 100644 --- a/src/header.zig +++ b/src/header.zig @@ -8,7 +8,7 @@ const Bom = 0x1234; pub const SectionLength = 64; -const HeaderError = error{ +const InvalidHeader = error{ InvalidMagic, InvalidVersion, InvalidBom, @@ -23,16 +23,16 @@ const Header = packed struct { num_shells: u8, num_users: u32, num_groups: u32, - offset_cmph_uid2user: u32, - offset_cmph_groupname2group: u32, - offset_cmph_username2user: u32, + offset_bdz_uid2user: u32, + offset_bdz_groupname2group: u32, + offset_bdz_username2user: u32, offset_idx: u32, offset_groups: u32, offset_users: u32, offset_groupmembers: u32, offset_additional_gids: u32, - pub fn init(blob: [HeaderSize]u8) HeaderError!Header { + pub fn init(blob: [HeaderSize]u8) InvalidHeader!Header { const self = @bitCast(Header, blob); for (Magic) |item, index| { if (self.magic[index] != item) return error.InvalidMagic; @@ -48,9 +48,9 @@ const Header = packed struct { } const offsets = [_]u32{ - self.offset_cmph_uid2user, - self.offset_cmph_groupname2group, - self.offset_cmph_username2user, + self.offset_bdz_uid2user, + self.offset_bdz_groupname2group, + self.offset_bdz_username2user, self.offset_idx, self.offset_groups, self.offset_users, @@ -73,12 +73,15 @@ const Header = packed struct { const testing = std.testing; -test "constants and types are reasonable" { +test "header is byte-aligned" { try testing.expectEqual(HeaderSize * 8, @bitSizeOf(Header)); +} + +test "Section length is a power of two" { try testing.expect(std.math.isPowerOfTwo(SectionLength)); } -test "header pack and unpack" { +test "header pack, unpack and validation" { const goodHeader = Header{ .magic = Magic, .version = Version, @@ -86,9 +89,9 @@ test "header pack and unpack" { .num_shells = 0, .num_users = 0, .num_groups = 0, - .offset_cmph_uid2user = 0, - .offset_cmph_groupname2group = 0, - .offset_cmph_username2user = 0, + .offset_bdz_uid2user = 0, + .offset_bdz_groupname2group = 0, + .offset_bdz_username2user = 0, .offset_idx = 0, .offset_groups = 0, .offset_users = 0, @@ -96,8 +99,7 @@ test "header pack and unpack" { .offset_additional_gids = 0, }; - const blob = goodHeader.asArray(); - const gotHeader = try Header.init(blob); + const gotHeader = try Header.init(goodHeader.asArray()); try testing.expectEqual(goodHeader, gotHeader); { @@ -120,7 +122,7 @@ test "header pack and unpack" { { var header = goodHeader; - header.offset_cmph_uid2user = 65; + header.offset_bdz_uid2user = 65; try testing.expectError(error.InvalidOffset, Header.init(header.asArray())); } }