From bb4b5eba03fe9c2ebddca593eb403a8bbc163409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Wed, 23 Feb 2022 06:07:53 +0200 Subject: [PATCH] add utf8 validation; rework user iterator --- README.md | 2 +- src/user.zig | 187 ++++++++++++++++++++++++++++----------------------- 2 files changed, 102 insertions(+), 87 deletions(-) diff --git a/README.md b/README.md index 13b34d4..f14981d 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ Tight packing places some constraints on the underlying data: - Permitted length of username and groupname: 1-32 bytes. - Permitted length of shell and home: 1-64 bytes. - Permitted comment ("gecos") length: 0-1023 bytes. -- User name, groupname and gecos must be utf8-encoded. +- User name, groupname, gecos and shell must be utf8-encoded. Checking out and building ------------------------- diff --git a/src/user.zig b/src/user.zig index d55687c..c99e84e 100644 --- a/src/user.zig +++ b/src/user.zig @@ -17,7 +17,7 @@ pub const PackedUser = packed struct { gecos_len: u10, padding: u3, - // blobSize returns the length of the blob storing string values. + // blobLength returns the length of the blob storing string values. pub fn blobLength(self: *const PackedUser) usize { var result: usize = self.realHomeLen(); if (!self.name_is_a_suffix) { @@ -69,7 +69,10 @@ pub const UserWriter = struct { appendTo: *ArrayList(u8), shellIndexFn: shellIndexFnType, - pub fn init(appendTo: *ArrayList(u8), shellIndexFn: shellIndexFnType) UserWriter { + pub fn init( + appendTo: *ArrayList(u8), + shellIndexFn: shellIndexFnType, + ) UserWriter { return UserWriter{ .appendTo = appendTo, .shellIndexFn = shellIndexFn, @@ -84,12 +87,33 @@ pub const UserWriter = struct { }; } - pub fn appendUser(self: *UserWriter, user: User) !void { + pub fn validateUtf8(s: []const u8) error{InvalidRecord}!void { + if (!std.unicode.utf8ValidateSlice(s)) { + return error.InvalidRecord; + } + } + + // FIXME(motiejus) record valiation should return a separate type. For User + // case, it should be length-bound slices and utf8-codepoints instead of + // strings. + // + // zig does not have error contexts + // (https://github.com/ziglang/zig/issues/2647) and length-limited slices. + // (It does have bounded_array, but that preallocates the maximum length, + // which is not great for User records). So I am using those excuses to + // do the validation here. I may move it once I learn the language better. + const appendUserErr = error{InvalidRecord} || Allocator.Error; + pub fn appendUser(self: *UserWriter, user: User) appendUserErr!void { const home_len = try downCast(u6, user.home.len - 1); const name_len = try downCast(u5, user.name.len - 1); const shell_len = try downCast(u6, user.shell.len - 1); const gecos_len = try downCast(u10, user.gecos.len); + try validateUtf8(user.home); + try validateUtf8(user.name); + try validateUtf8(user.shell); + try validateUtf8(user.gecos); + var puser = PackedUser{ .uid = user.uid, .gid = user.gid, @@ -123,92 +147,93 @@ pub const UserWriter = struct { pub const UserReader = struct { const shellIndexProto = fn (u6) []const u8; - blob: []u8, + section: []const u8, + shellIndex: shellIndexProto, pub const PackedEntry = struct { packed_user: *PackedUser, - blob: []const u8, + section: []const u8, }; - pub fn init(blob: []u8) UserReader { + pub fn init(section: []u8, shellIndex: shellIndexProto) UserReader { return UserReader{ - .blob = blob, + .section = section, + .shellIndex = shellIndex, }; } - pub const PackedIterator = struct { - ur: *UserReader, - index: usize = 0, - - pub fn next(it: *PackedIterator) ?PackedEntry { - if (it.index == it.ur.blob.len) return null; - assert(it.index < it.ur.blob.len); - - const endUser = it.index + @sizeOf(PackedUser); - var packedUser = std.mem.bytesAsValue( - PackedUser, - it.ur.blob[it.index..endUser][0..@sizeOf(PackedUser)], - ); - const startBlob = endUser; - const endBlob = startBlob + packedUser.blobLength(); - it.index = pad.roundUp(usize, PackedUserAlignmentBits, endBlob); - return PackedEntry{ - .packed_user = packedUser, - .blob = it.ur.blob[startBlob..endBlob], - }; - } + pub const Entry = struct { + user: User, + nextOffset: usize, }; - pub fn packedIterator(self: *UserReader) PackedIterator { - return .{ .ur = self }; - } + // atOffset returns a ?User in a given offset of the User section. Also, + // the offset to the next user. + pub fn atOffset(self: *UserReader, index: usize) ?Entry { + if (index == self.section.len) return null; + assert(index < self.section.len); + const endUser = index + @sizeOf(PackedUser); + var u = std.mem.bytesAsValue( + PackedUser, + self.section[index..endUser][0..@sizeOf(PackedUser)], + ); + const startBlob = endUser; + const endBlob = startBlob + u.blobLength(); + const section = self.section[startBlob..endBlob]; - pub const Iterator = struct { - pit: PackedIterator, - shellIndex: shellIndexProto, + const home = section[0..u.realHomeLen()]; + var name: []const u8 = undefined; + var pos: usize = undefined; - pub fn next(it: *Iterator) ?User { - const entry = it.pit.next() orelse return null; - const u = entry.packed_user; + if (u.name_is_a_suffix) { + const name_start = u.realHomeLen() - u.realNameLen(); + name = section[name_start..u.realHomeLen()]; + pos = u.realHomeLen(); + } else { + const name_start = u.realHomeLen(); + name = section[name_start .. name_start + u.realNameLen()]; + pos = name_start + u.realNameLen(); + } + const gecos = section[pos .. pos + u.realGecosLen()]; + pos += u.realGecosLen(); - const home = entry.blob[0..u.realHomeLen()]; - var name: []const u8 = undefined; - var pos: usize = undefined; + var shell: []const u8 = undefined; + if (u.shell_here) { + shell = section[pos .. pos + u.realShellLen()]; + } else { + shell = self.shellIndex(u.shell_len_or_idx); + } - if (u.name_is_a_suffix) { - const name_start = u.realHomeLen() - u.realNameLen(); - name = entry.blob[name_start..u.realHomeLen()]; - pos = u.realHomeLen(); - } else { - const name_start = u.realHomeLen(); - name = entry.blob[name_start .. name_start + u.realNameLen()]; - pos = name_start + u.realNameLen(); - } - const gecos = entry.blob[pos .. pos + u.realGecosLen()]; - pos += u.realGecosLen(); - - var shell: []const u8 = undefined; - if (u.shell_here) { - shell = entry.blob[pos .. pos + u.realShellLen()]; - } else { - shell = it.shellIndex(u.shell_len_or_idx); - } - - return User{ + return Entry{ + .user = User{ .uid = u.uid, .gid = u.gid, .name = name, .gecos = gecos, .home = home, .shell = shell, - }; + }, + .nextOffset = pad.roundUp(usize, PackedUserAlignmentBits, endBlob), + }; + } + + pub const Iterator = struct { + ur: *UserReader, + offset: usize = 0, + + pub fn next(it: *Iterator) ?User { + if (it.ur.atOffset(it.offset)) |result| { + it.offset = result.nextOffset; + return result.user; + } + return null; } }; - pub fn iterator(self: *UserReader, shellIndex: shellIndexProto) Iterator { - return .{ - .pit = self.packedIterator(), - .shellIndex = shellIndex, + pub fn iterator(self: *UserReader) Iterator { + return Iterator{ + .ur = self, + .offset = 0, }; } }; @@ -236,7 +261,7 @@ fn testShell(index: u6) []const u8 { }; } -test "construct PackedUser blob" { +test "construct PackedUser section" { var buf = ArrayList(u8).init(testing.allocator); defer buf.deinit(); @@ -267,26 +292,16 @@ test "construct PackedUser blob" { try writer.appendUser(user); } - var rd = UserReader.init(buf.items); - { - var it = rd.packedIterator(); - var i: u32 = 0; - while (it.next()) |entry| : (i += 1) { - try testing.expectEqual(users[i].uid, entry.packed_user.uid); - try testing.expectEqual(users[i].gid, entry.packed_user.gid); - } - } + var rd = UserReader.init(buf.items, testShell); - { - var it = rd.iterator(testShell); - var i: u32 = 0; - while (it.next()) |user| : (i += 1) { - try testing.expectEqual(users[i].uid, user.uid); - try testing.expectEqual(users[i].gid, user.gid); - try testing.expectEqualStrings(users[i].name, user.name); - try testing.expectEqualStrings(users[i].gecos, user.gecos); - try testing.expectEqualStrings(users[i].home, user.home); - try testing.expectEqualStrings(users[i].shell, user.shell); - } + var it = rd.iterator(); + var i: u32 = 0; + while (it.next()) |user| : (i += 1) { + try testing.expectEqual(users[i].uid, user.uid); + try testing.expectEqual(users[i].gid, user.gid); + try testing.expectEqualStrings(users[i].name, user.name); + try testing.expectEqualStrings(users[i].gecos, user.gecos); + try testing.expectEqualStrings(users[i].home, user.home); + try testing.expectEqualStrings(users[i].shell, user.shell); } }