update user record
This commit is contained in:
parent
1c39007f6b
commit
84bb5f7fd7
101
README.md
101
README.md
|
@ -35,66 +35,28 @@ ID/s.
|
||||||
id(1) works as follows:
|
id(1) works as follows:
|
||||||
- lookup user by name.
|
- lookup user by name.
|
||||||
- get all additional gids (an array attached to a member).
|
- get all additional gids (an array attached to a member).
|
||||||
- for each additional gid, return the group name.
|
- for each additional gid, get the group name.
|
||||||
|
|
||||||
Assuming a member is in ~100 groups on average, that's 1M group lookups per
|
Assuming a member is in ~100 groups on average, that's 1M group lookups per
|
||||||
second. We need to convert gid to a group index quickly.
|
second (cmph can do 1M in <200ms). We need to convert gid to a group index
|
||||||
|
quickly.
|
||||||
|
|
||||||
Data structures
|
API
|
||||||
---------------
|
---
|
||||||
|
|
||||||
Basic data structures that allow efficient storage:
|
|
||||||
|
|
||||||
```lang=c
|
|
||||||
// reminder:
|
|
||||||
typedef uid_t uint32;
|
|
||||||
typedef gid_t uint32;
|
|
||||||
|
|
||||||
// 6*32b = 6*4B = 24B/user
|
|
||||||
typedef struct {
|
|
||||||
uid_t uid;
|
|
||||||
gid_t gid;
|
|
||||||
name_offset uint32; // offset into *usernames
|
|
||||||
gecos_offset uint32; // offset into *gecos
|
|
||||||
shell_offset uint32; // offset into *shells
|
|
||||||
additional_groups_offset uint32; // offset into additional_groups
|
|
||||||
} user;
|
|
||||||
|
|
||||||
const char* usernames; // all concatenated usernames, fsst-compressed
|
|
||||||
const char* gecoss; // all concatenated gecos, fsst-compressed
|
|
||||||
const char* shells; // all concatenated home directories, fsst-compressed
|
|
||||||
const uint8_t additional_groups; // all additional_groups, turbo compressed
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
gid_t gid;
|
|
||||||
name_offset uint32; // offset into *groupnames
|
|
||||||
members_offset uint32; // offset into members
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* groupnames; // all concatenated group names, fsst-compressed
|
|
||||||
const uint8_8 members; // all concatenated members, turbo compressed
|
|
||||||
```
|
|
||||||
|
|
||||||
"turbo compression" encodes a list of uids/gids with this algorithm:
|
|
||||||
1. sort ascending.
|
|
||||||
2. extract deltas and subtract 1: `awk '{diff=$0-prev; prev=$0; print
|
|
||||||
diff-1}'`.
|
|
||||||
3. varint-encode these deltas into an uint32, like protobuf or utf8.
|
|
||||||
|
|
||||||
With typical group memberships (as of writing) this requires ~1.3-1.5 byte per
|
|
||||||
entry.
|
|
||||||
|
|
||||||
Indexes
|
|
||||||
-------
|
|
||||||
|
|
||||||
The following operations need to be fast, in order of importance:
|
The following operations need to be fast, in order of importance:
|
||||||
|
|
||||||
1. lookup gid -> group (this is on hot path in id).
|
1. lookup gid -> group (this is on hot path in id) with or without members (2
|
||||||
|
separate calls).
|
||||||
2. lookup uid -> user.
|
2. lookup uid -> user.
|
||||||
3. lookup groupname -> group.
|
3. lookup groupname -> group.
|
||||||
4. lookup username -> user.
|
4. lookup username -> user.
|
||||||
5. (optional) iterate users using a defined order (`getent passwd`).
|
5. lookup uid -> list of gids.
|
||||||
6. (optional) iterate groups using a defined order (`getent group`).
|
6. (optional) iterate users using a defined order (`getent passwd`).
|
||||||
|
7. (optional) iterate groups using a defined order (`getent group`).
|
||||||
|
|
||||||
|
Indexes
|
||||||
|
-------
|
||||||
|
|
||||||
Preliminary results of playing with [cmph][cmph]:
|
Preliminary results of playing with [cmph][cmph]:
|
||||||
|
|
||||||
|
@ -104,34 +66,31 @@ BDZ: tried b=3, b=7 (default), and b=10.
|
||||||
* Latency for 1M keys: (170ms, 180ms, 230ms).
|
* Latency for 1M keys: (170ms, 180ms, 230ms).
|
||||||
* Packed vs non-packed latency differences are not meaningful.
|
* Packed vs non-packed latency differences are not meaningful.
|
||||||
|
|
||||||
CHM retains order, however, 1M keys weigh 8MB. 10k keys are ~20x larger with
|
CHM retains order, however, 0M keys weigh 8MB. 10k keys are ~20x larger with
|
||||||
CHM than with BDZ, eliminating the benefit of preserved ordering.
|
CHM than with BDZ, eliminating the benefit of preserved ordering.
|
||||||
|
|
||||||
Full file structure
|
Full file structure
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
The file structure stars with the metadata field. All indexes are number of
|
The file structure stars with magic and version number, followed by a list of
|
||||||
bytes, relative to the beginning of the file.
|
User, Group records and their indices. All indices are number of bytes,
|
||||||
|
relative to the beginning of the file.
|
||||||
|
|
||||||
```
|
```
|
||||||
const Offsets = struct {
|
// /home/motiejusMotiejus.Jakstys is 16+30=46b for "my" record.
|
||||||
magic: [4]u32,
|
const User = struct {
|
||||||
version: u32,
|
uid: u32,
|
||||||
|
gid: u32,
|
||||||
num_users, size_num_users: u32,
|
additional_gids_offset: u29,
|
||||||
num_groups, size_num_groups: u32,
|
shell_here: u1, // whether it's stored "here" or in another place. Docs TBD
|
||||||
cmph_gid2group: u32,
|
shell_len: u6,
|
||||||
size_cmph_gid2group: u32,
|
home_len: u6,
|
||||||
cmph_uid2user, size_cmph_uid2user: u32,
|
username_len: u6,
|
||||||
cmph_groupname2group, size_cmph_groupname2group: u32,
|
gecos_len: u8,
|
||||||
cmph_username2user, size_cmph_username2user: u32,
|
// a variable-sized array that will be stored immediately after this
|
||||||
structs_group, size_structs_group: u32,
|
// struct.
|
||||||
structs_user, size_structs_user: u32,
|
stringdata []u8;
|
||||||
fsst_usernames_homes, size_fsst_usernames_homes: u32,
|
|
||||||
fsst_groupnames, size_fsst_usernames_homes: u32,
|
|
||||||
fsst_shells, size_fsst_shells: u32,
|
|
||||||
}
|
}
|
||||||
```
|
|
||||||
|
|
||||||
`magic` must be 0xf09fa4b7, and `version` must be `0x00`. The remaining fields
|
`magic` must be 0xf09fa4b7, and `version` must be `0x00`. The remaining fields
|
||||||
are indexes to further sections of the file with their sizes in bytes. All
|
are indexes to further sections of the file with their sizes in bytes. All
|
||||||
|
|
Loading…
Reference in New Issue