1
Fork 0

add utf8 validation; rework user iterator

This commit is contained in:
Motiejus Jakštys 2022-02-23 06:07:53 +02:00 committed by Motiejus Jakštys
parent 0e5365127a
commit bb4b5eba03
2 changed files with 102 additions and 87 deletions

View File

@ -69,7 +69,7 @@ Tight packing places some constraints on the underlying data:
- Permitted length of username and groupname: 1-32 bytes. - Permitted length of username and groupname: 1-32 bytes.
- Permitted length of shell and home: 1-64 bytes. - Permitted length of shell and home: 1-64 bytes.
- Permitted comment ("gecos") length: 0-1023 bytes. - Permitted comment ("gecos") length: 0-1023 bytes.
- User name, groupname and gecos must be utf8-encoded. - User name, groupname, gecos and shell must be utf8-encoded.
Checking out and building Checking out and building
------------------------- -------------------------

View File

@ -17,7 +17,7 @@ pub const PackedUser = packed struct {
gecos_len: u10, gecos_len: u10,
padding: u3, padding: u3,
// blobSize returns the length of the blob storing string values. // blobLength returns the length of the blob storing string values.
pub fn blobLength(self: *const PackedUser) usize { pub fn blobLength(self: *const PackedUser) usize {
var result: usize = self.realHomeLen(); var result: usize = self.realHomeLen();
if (!self.name_is_a_suffix) { if (!self.name_is_a_suffix) {
@ -69,7 +69,10 @@ pub const UserWriter = struct {
appendTo: *ArrayList(u8), appendTo: *ArrayList(u8),
shellIndexFn: shellIndexFnType, shellIndexFn: shellIndexFnType,
pub fn init(appendTo: *ArrayList(u8), shellIndexFn: shellIndexFnType) UserWriter { pub fn init(
appendTo: *ArrayList(u8),
shellIndexFn: shellIndexFnType,
) UserWriter {
return UserWriter{ return UserWriter{
.appendTo = appendTo, .appendTo = appendTo,
.shellIndexFn = shellIndexFn, .shellIndexFn = shellIndexFn,
@ -84,12 +87,33 @@ pub const UserWriter = struct {
}; };
} }
pub fn appendUser(self: *UserWriter, user: User) !void { pub fn validateUtf8(s: []const u8) error{InvalidRecord}!void {
if (!std.unicode.utf8ValidateSlice(s)) {
return error.InvalidRecord;
}
}
// FIXME(motiejus) record valiation should return a separate type. For User
// case, it should be length-bound slices and utf8-codepoints instead of
// strings.
//
// zig does not have error contexts
// (https://github.com/ziglang/zig/issues/2647) and length-limited slices.
// (It does have bounded_array, but that preallocates the maximum length,
// which is not great for User records). So I am using those excuses to
// do the validation here. I may move it once I learn the language better.
const appendUserErr = error{InvalidRecord} || Allocator.Error;
pub fn appendUser(self: *UserWriter, user: User) appendUserErr!void {
const home_len = try downCast(u6, user.home.len - 1); const home_len = try downCast(u6, user.home.len - 1);
const name_len = try downCast(u5, user.name.len - 1); const name_len = try downCast(u5, user.name.len - 1);
const shell_len = try downCast(u6, user.shell.len - 1); const shell_len = try downCast(u6, user.shell.len - 1);
const gecos_len = try downCast(u10, user.gecos.len); const gecos_len = try downCast(u10, user.gecos.len);
try validateUtf8(user.home);
try validateUtf8(user.name);
try validateUtf8(user.shell);
try validateUtf8(user.gecos);
var puser = PackedUser{ var puser = PackedUser{
.uid = user.uid, .uid = user.uid,
.gid = user.gid, .gid = user.gid,
@ -123,92 +147,93 @@ pub const UserWriter = struct {
pub const UserReader = struct { pub const UserReader = struct {
const shellIndexProto = fn (u6) []const u8; const shellIndexProto = fn (u6) []const u8;
blob: []u8, section: []const u8,
shellIndex: shellIndexProto,
pub const PackedEntry = struct { pub const PackedEntry = struct {
packed_user: *PackedUser, packed_user: *PackedUser,
blob: []const u8, section: []const u8,
}; };
pub fn init(blob: []u8) UserReader { pub fn init(section: []u8, shellIndex: shellIndexProto) UserReader {
return UserReader{ return UserReader{
.blob = blob, .section = section,
.shellIndex = shellIndex,
}; };
} }
pub const PackedIterator = struct { pub const Entry = struct {
ur: *UserReader, user: User,
index: usize = 0, nextOffset: usize,
};
pub fn next(it: *PackedIterator) ?PackedEntry { // atOffset returns a ?User in a given offset of the User section. Also,
if (it.index == it.ur.blob.len) return null; // the offset to the next user.
assert(it.index < it.ur.blob.len); pub fn atOffset(self: *UserReader, index: usize) ?Entry {
if (index == self.section.len) return null;
const endUser = it.index + @sizeOf(PackedUser); assert(index < self.section.len);
var packedUser = std.mem.bytesAsValue( const endUser = index + @sizeOf(PackedUser);
var u = std.mem.bytesAsValue(
PackedUser, PackedUser,
it.ur.blob[it.index..endUser][0..@sizeOf(PackedUser)], self.section[index..endUser][0..@sizeOf(PackedUser)],
); );
const startBlob = endUser; const startBlob = endUser;
const endBlob = startBlob + packedUser.blobLength(); const endBlob = startBlob + u.blobLength();
it.index = pad.roundUp(usize, PackedUserAlignmentBits, endBlob); const section = self.section[startBlob..endBlob];
return PackedEntry{
.packed_user = packedUser,
.blob = it.ur.blob[startBlob..endBlob],
};
}
};
pub fn packedIterator(self: *UserReader) PackedIterator { const home = section[0..u.realHomeLen()];
return .{ .ur = self };
}
pub const Iterator = struct {
pit: PackedIterator,
shellIndex: shellIndexProto,
pub fn next(it: *Iterator) ?User {
const entry = it.pit.next() orelse return null;
const u = entry.packed_user;
const home = entry.blob[0..u.realHomeLen()];
var name: []const u8 = undefined; var name: []const u8 = undefined;
var pos: usize = undefined; var pos: usize = undefined;
if (u.name_is_a_suffix) { if (u.name_is_a_suffix) {
const name_start = u.realHomeLen() - u.realNameLen(); const name_start = u.realHomeLen() - u.realNameLen();
name = entry.blob[name_start..u.realHomeLen()]; name = section[name_start..u.realHomeLen()];
pos = u.realHomeLen(); pos = u.realHomeLen();
} else { } else {
const name_start = u.realHomeLen(); const name_start = u.realHomeLen();
name = entry.blob[name_start .. name_start + u.realNameLen()]; name = section[name_start .. name_start + u.realNameLen()];
pos = name_start + u.realNameLen(); pos = name_start + u.realNameLen();
} }
const gecos = entry.blob[pos .. pos + u.realGecosLen()]; const gecos = section[pos .. pos + u.realGecosLen()];
pos += u.realGecosLen(); pos += u.realGecosLen();
var shell: []const u8 = undefined; var shell: []const u8 = undefined;
if (u.shell_here) { if (u.shell_here) {
shell = entry.blob[pos .. pos + u.realShellLen()]; shell = section[pos .. pos + u.realShellLen()];
} else { } else {
shell = it.shellIndex(u.shell_len_or_idx); shell = self.shellIndex(u.shell_len_or_idx);
} }
return User{ return Entry{
.user = User{
.uid = u.uid, .uid = u.uid,
.gid = u.gid, .gid = u.gid,
.name = name, .name = name,
.gecos = gecos, .gecos = gecos,
.home = home, .home = home,
.shell = shell, .shell = shell,
},
.nextOffset = pad.roundUp(usize, PackedUserAlignmentBits, endBlob),
}; };
} }
pub const Iterator = struct {
ur: *UserReader,
offset: usize = 0,
pub fn next(it: *Iterator) ?User {
if (it.ur.atOffset(it.offset)) |result| {
it.offset = result.nextOffset;
return result.user;
}
return null;
}
}; };
pub fn iterator(self: *UserReader, shellIndex: shellIndexProto) Iterator { pub fn iterator(self: *UserReader) Iterator {
return .{ return Iterator{
.pit = self.packedIterator(), .ur = self,
.shellIndex = shellIndex, .offset = 0,
}; };
} }
}; };
@ -236,7 +261,7 @@ fn testShell(index: u6) []const u8 {
}; };
} }
test "construct PackedUser blob" { test "construct PackedUser section" {
var buf = ArrayList(u8).init(testing.allocator); var buf = ArrayList(u8).init(testing.allocator);
defer buf.deinit(); defer buf.deinit();
@ -267,18 +292,9 @@ test "construct PackedUser blob" {
try writer.appendUser(user); try writer.appendUser(user);
} }
var rd = UserReader.init(buf.items); var rd = UserReader.init(buf.items, testShell);
{
var it = rd.packedIterator();
var i: u32 = 0;
while (it.next()) |entry| : (i += 1) {
try testing.expectEqual(users[i].uid, entry.packed_user.uid);
try testing.expectEqual(users[i].gid, entry.packed_user.gid);
}
}
{ var it = rd.iterator();
var it = rd.iterator(testShell);
var i: u32 = 0; var i: u32 = 0;
while (it.next()) |user| : (i += 1) { while (it.next()) |user| : (i += 1) {
try testing.expectEqual(users[i].uid, user.uid); try testing.expectEqual(users[i].uid, user.uid);
@ -288,5 +304,4 @@ test "construct PackedUser blob" {
try testing.expectEqualStrings(users[i].home, user.home); try testing.expectEqualStrings(users[i].home, user.home);
try testing.expectEqualStrings(users[i].shell, user.shell); try testing.expectEqualStrings(users[i].shell, user.shell);
} }
}
} }