add utf8 validation; rework user iterator

This commit is contained in:
Motiejus Jakštys 2022-02-23 06:07:53 +02:00 committed by Motiejus Jakštys
parent 0e5365127a
commit bb4b5eba03
2 changed files with 102 additions and 87 deletions

View File

@ -69,7 +69,7 @@ Tight packing places some constraints on the underlying data:
- Permitted length of username and groupname: 1-32 bytes.
- Permitted length of shell and home: 1-64 bytes.
- Permitted comment ("gecos") length: 0-1023 bytes.
- User name, groupname and gecos must be utf8-encoded.
- User name, groupname, gecos and shell must be utf8-encoded.
Checking out and building

View File

@ -17,7 +17,7 @@ pub const PackedUser = packed struct {
gecos_len: u10,
padding: u3,
// blobSize returns the length of the blob storing string values.
// blobLength returns the length of the blob storing string values.
pub fn blobLength(self: *const PackedUser) usize {
var result: usize = self.realHomeLen();
if (!self.name_is_a_suffix) {
@ -69,7 +69,10 @@ pub const UserWriter = struct {
appendTo: *ArrayList(u8),
shellIndexFn: shellIndexFnType,
pub fn init(appendTo: *ArrayList(u8), shellIndexFn: shellIndexFnType) UserWriter {
pub fn init(
appendTo: *ArrayList(u8),
shellIndexFn: shellIndexFnType,
) UserWriter {
return UserWriter{
.appendTo = appendTo,
.shellIndexFn = shellIndexFn,
@ -84,12 +87,33 @@ pub const UserWriter = struct {
pub fn appendUser(self: *UserWriter, user: User) !void {
pub fn validateUtf8(s: []const u8) error{InvalidRecord}!void {
if (!std.unicode.utf8ValidateSlice(s)) {
return error.InvalidRecord;
// FIXME(motiejus) record valiation should return a separate type. For User
// case, it should be length-bound slices and utf8-codepoints instead of
// strings.
// zig does not have error contexts
// ( and length-limited slices.
// (It does have bounded_array, but that preallocates the maximum length,
// which is not great for User records). So I am using those excuses to
// do the validation here. I may move it once I learn the language better.
const appendUserErr = error{InvalidRecord} || Allocator.Error;
pub fn appendUser(self: *UserWriter, user: User) appendUserErr!void {
const home_len = try downCast(u6, user.home.len - 1);
const name_len = try downCast(u5, - 1);
const shell_len = try downCast(u6, - 1);
const gecos_len = try downCast(u10, user.gecos.len);
try validateUtf8(user.home);
try validateUtf8(;
try validateUtf8(;
try validateUtf8(user.gecos);
var puser = PackedUser{
.uid = user.uid,
.gid = user.gid,
@ -123,92 +147,93 @@ pub const UserWriter = struct {
pub const UserReader = struct {
const shellIndexProto = fn (u6) []const u8;
blob: []u8,
section: []const u8,
shellIndex: shellIndexProto,
pub const PackedEntry = struct {
packed_user: *PackedUser,
blob: []const u8,
section: []const u8,
pub fn init(blob: []u8) UserReader {
pub fn init(section: []u8, shellIndex: shellIndexProto) UserReader {
return UserReader{
.blob = blob,
.section = section,
.shellIndex = shellIndex,
pub const PackedIterator = struct {
ur: *UserReader,
index: usize = 0,
pub const Entry = struct {
user: User,
nextOffset: usize,
pub fn next(it: *PackedIterator) ?PackedEntry {
if (it.index == it.ur.blob.len) return null;
assert(it.index < it.ur.blob.len);
const endUser = it.index + @sizeOf(PackedUser);
var packedUser = std.mem.bytesAsValue(
// atOffset returns a ?User in a given offset of the User section. Also,
// the offset to the next user.
pub fn atOffset(self: *UserReader, index: usize) ?Entry {
if (index == self.section.len) return null;
assert(index < self.section.len);
const endUser = index + @sizeOf(PackedUser);
var u = std.mem.bytesAsValue(
const startBlob = endUser;
const endBlob = startBlob + packedUser.blobLength();
it.index = pad.roundUp(usize, PackedUserAlignmentBits, endBlob);
return PackedEntry{
.packed_user = packedUser,
.blob = it.ur.blob[startBlob..endBlob],
const endBlob = startBlob + u.blobLength();
const section = self.section[startBlob..endBlob];
pub fn packedIterator(self: *UserReader) PackedIterator {
return .{ .ur = self };
pub const Iterator = struct {
pit: PackedIterator,
shellIndex: shellIndexProto,
pub fn next(it: *Iterator) ?User {
const entry = orelse return null;
const u = entry.packed_user;
const home = entry.blob[0..u.realHomeLen()];
const home = section[0..u.realHomeLen()];
var name: []const u8 = undefined;
var pos: usize = undefined;
if (u.name_is_a_suffix) {
const name_start = u.realHomeLen() - u.realNameLen();
name = entry.blob[name_start..u.realHomeLen()];
name = section[name_start..u.realHomeLen()];
pos = u.realHomeLen();
} else {
const name_start = u.realHomeLen();
name = entry.blob[name_start .. name_start + u.realNameLen()];
name = section[name_start .. name_start + u.realNameLen()];
pos = name_start + u.realNameLen();
const gecos = entry.blob[pos .. pos + u.realGecosLen()];
const gecos = section[pos .. pos + u.realGecosLen()];
pos += u.realGecosLen();
var shell: []const u8 = undefined;
if (u.shell_here) {
shell = entry.blob[pos .. pos + u.realShellLen()];
shell = section[pos .. pos + u.realShellLen()];
} else {
shell = it.shellIndex(u.shell_len_or_idx);
shell = self.shellIndex(u.shell_len_or_idx);
return User{
return Entry{
.user = User{
.uid = u.uid,
.gid = u.gid,
.name = name,
.gecos = gecos,
.home = home,
.shell = shell,
.nextOffset = pad.roundUp(usize, PackedUserAlignmentBits, endBlob),
pub const Iterator = struct {
ur: *UserReader,
offset: usize = 0,
pub fn next(it: *Iterator) ?User {
if (it.ur.atOffset(it.offset)) |result| {
it.offset = result.nextOffset;
return result.user;
return null;
pub fn iterator(self: *UserReader, shellIndex: shellIndexProto) Iterator {
return .{
.pit = self.packedIterator(),
.shellIndex = shellIndex,
pub fn iterator(self: *UserReader) Iterator {
return Iterator{
.ur = self,
.offset = 0,
@ -236,7 +261,7 @@ fn testShell(index: u6) []const u8 {
test "construct PackedUser blob" {
test "construct PackedUser section" {
var buf = ArrayList(u8).init(testing.allocator);
defer buf.deinit();
@ -267,18 +292,9 @@ test "construct PackedUser blob" {
try writer.appendUser(user);
var rd = UserReader.init(buf.items);
var it = rd.packedIterator();
var i: u32 = 0;
while ( |entry| : (i += 1) {
try testing.expectEqual(users[i].uid, entry.packed_user.uid);
try testing.expectEqual(users[i].gid, entry.packed_user.gid);
var rd = UserReader.init(buf.items, testShell);
var it = rd.iterator(testShell);
var it = rd.iterator();
var i: u32 = 0;
while ( |user| : (i += 1) {
try testing.expectEqual(users[i].uid, user.uid);
@ -288,5 +304,4 @@ test "construct PackedUser blob" {
try testing.expectEqualStrings(users[i].home, user.home);
try testing.expectEqualStrings(users[i].shell,;