From af7ba5edcf830b0eada27c7b8318ce026e8a8b9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Wed, 27 Jul 2022 21:00:18 -0700 Subject: [PATCH] add turbonss-makecorpus --- build.zig | 8 ++ src/PackedUser.zig | 11 +- src/test_all.zig | 1 + src/turbonss-getent.zig | 38 +++-- src/turbonss-makecorpus.zig | 276 ++++++++++++++++++++++++++++++++++++ src/turbonss-unix2db.zig | 4 +- 6 files changed, 313 insertions(+), 25 deletions(-) create mode 100644 src/turbonss-makecorpus.zig diff --git a/build.zig b/build.zig index 540ea23..98dc16d 100644 --- a/build.zig +++ b/build.zig @@ -80,6 +80,14 @@ pub fn build(b: *zbs.Builder) void { exe.install(); } + { + const exe = b.addExecutable("turbonss-makecorpus", "src/turbonss-makecorpus.zig"); + exe.strip = strip; + exe.setTarget(target); + exe.setBuildMode(mode); + exe.install(); + } + { const exe = b.addExecutable("turbonss-getent", "src/turbonss-getent.zig"); exe.strip = strip; diff --git a/src/PackedUser.zig b/src/PackedUser.zig index c5b8a42..492c151 100644 --- a/src/PackedUser.zig +++ b/src/PackedUser.zig @@ -150,7 +150,6 @@ pub fn packTo( std.debug.assert(arr.items.len & 7 == 0); // function arguments are consts. We need to mutate the underlying // slice, so passing it via pointer instead. - //const home_len = try validate.downCast(u6, user.home.len - 1); const home_len = try validate.downCast(fieldInfo(Inner, .home_len).field_type, user.home.len - 1); const name_len = try validate.downCast(fieldInfo(Inner, .name_len).field_type, user.name.len - 1); const shell_len = try validate.downCast(fieldInfo(Inner, .shell_len_or_idx).field_type, user.shell.len - 1); @@ -231,11 +230,15 @@ pub fn toUser(self: *const PackedUser, shell_reader: ShellReader) User { }; } +pub const max_home_len = math.maxInt(fieldInfo(Inner, .home_len).field_type) + 1; +pub const max_name_len = math.maxInt(fieldInfo(Inner, .name_len).field_type) + 1; +pub const max_gecos_len = math.maxInt(fieldInfo(Inner, .gecos_len).field_type); + pub const max_str_len = math.maxInt(fieldInfo(Inner, .shell_len_or_idx).field_type) + 1 + - math.maxInt(fieldInfo(Inner, .home_len).field_type) + 1 + - math.maxInt(fieldInfo(Inner, .name_len).field_type) + 1 + - math.maxInt(fieldInfo(Inner, .gecos_len).field_type); + max_home_len + + max_name_len + + max_gecos_len; const testing = std.testing; diff --git a/src/test_all.zig b/src/test_all.zig index e22d532..dc9ae7a 100644 --- a/src/test_all.zig +++ b/src/test_all.zig @@ -19,4 +19,5 @@ test "turbonss test suite" { _ = @import("turbonss-getent.zig"); _ = @import("turbonss-unix2db.zig"); _ = @import("turbonss-analyze.zig"); + _ = @import("turbonss-makecorpus.zig"); } diff --git a/src/turbonss-getent.zig b/src/turbonss-getent.zig index bae2fab..58a85ea 100644 --- a/src/turbonss-getent.zig +++ b/src/turbonss-getent.zig @@ -202,26 +202,24 @@ test "turbonss-getent passwd" { var stderr = ArrayList(u8).init(testing.allocator); defer stderr.deinit(); - { - const args = &[_][*:0]const u8{ - "--db", - tf.path, - "passwd", - "root", - "doesnotexist", - "vidmantas", - "0", - "1", - }; - const got = execute(stdout.writer(), stderr.writer(), args); - try testing.expectEqual(got, 2); - const want_root = "root:x:0:0::/root:/bin/bash\n"; - try testing.expectEqualStrings(stdout.items[0..want_root.len], want_root); - const want_vidmantas = "vidmantas:x:128:128:Vidmantas Kaminskas:/home/vidmantas:/bin/bash\n"; - var offset: usize = want_root.len + want_vidmantas.len; - try testing.expectEqualStrings(stdout.items[want_root.len..offset], want_vidmantas); - try testing.expectEqualStrings(stdout.items[offset..], want_root); - } + const args = &[_][*:0]const u8{ + "--db", + tf.path, + "passwd", + "root", + "doesnotexist", + "vidmantas", + "0", + "1", + }; + const got = execute(stdout.writer(), stderr.writer(), args); + try testing.expectEqual(got, 2); + const want_root = "root:x:0:0::/root:/bin/bash\n"; + try testing.expectEqualStrings(stdout.items[0..want_root.len], want_root); + const want_vidmantas = "vidmantas:x:128:128:Vidmantas Kaminskas:/home/vidmantas:/bin/bash\n"; + var offset: usize = want_root.len + want_vidmantas.len; + try testing.expectEqualStrings(stdout.items[want_root.len..offset], want_vidmantas); + try testing.expectEqualStrings(stdout.items[offset..], want_root); } test "turbonss-getent passwdAll" { diff --git a/src/turbonss-makecorpus.zig b/src/turbonss-makecorpus.zig new file mode 100644 index 0000000..f6ae845 --- /dev/null +++ b/src/turbonss-makecorpus.zig @@ -0,0 +1,276 @@ +const std = @import("std"); +const fs = std.fs; +const io = std.io; +const os = std.os; +const fmt = std.fmt; +const mem = std.mem; +const heap = std.heap; +const math = std.math; +const meta = std.meta; +const ArrayList = std.ArrayList; +const Allocator = std.mem.Allocator; +const BoundedArray = std.BoundedArray; + +const flags = @import("flags.zig"); +const User = @import("User.zig"); +const PackedUser = @import("PackedUser.zig"); + +const usage = + \\usage: turbonss-makecorpus [OPTION]... + \\ + \\Options: + \\ -h Print this help message and exit + \\ --directory Write files to given directory (default: .) + \\ --num-users Number of users (default: 20000) + \\ --num-groups Number of groups (default: 5000) + \\ --avg-members Average members per group (default: 20) + \\ +; + +const shells = &[_][]const u8{ + "/bin/bash", + "/bin/sh", + "/bin/zsh", + "/usr/sbin/nologin", +}; + +pub fn main() void { + // This line is here because of https://github.com/ziglang/zig/issues/7807 + const argv: []const [*:0]const u8 = os.argv; + + const stderr = io.getStdErr().writer(); + const stdout = io.getStdOut().writer(); + + execute(stdout, stderr, argv[1..]) catch |err| switch (err) { + error.User => os.exit(1), + error.IO => os.exit(3), + }; +} + +const options = &[_]flags.Flag{ + .{ .name = "-h", .kind = .boolean }, + .{ .name = "--directory", .kind = .arg }, + .{ .name = "--num-users", .kind = .arg }, + .{ .name = "--num-groups", .kind = .arg }, + .{ .name = "--avg-members", .kind = .arg }, +}; + +fn execute( + stdout: anytype, + stderr: anytype, + argv: []const [*:0]const u8, +) error{ User, IO }!void { + const myflags = flags.parse(argv, options) catch { + stderr.writeAll(usage) catch {}; + return error.User; + }; + + if (myflags.boolFlag("-h")) { + stdout.writeAll(usage) catch return error.IO; + return; + } + + if (myflags.args.len != 0) { + std.debug.print("args: {s}\n", .{myflags.args}); + stderr.writeAll(usage) catch {}; + return error.User; + } + + const num_users = parseInt(stderr, myflags, "--num-users", 20000) orelse return error.User; + const num_groups = parseInt(stderr, myflags, "--num-groups", 5000) orelse return error.User; + const avg_members = parseInt(stderr, myflags, "--avg-members", 20) orelse return error.User; + std.debug.print("num_users={d}\n", .{num_users}); + + // longest possible path name: 16k? dunno. + var buf: [1 << 14]u8 = undefined; + var fixed = std.heap.FixedBufferAllocator.init(buf[0..]); + const fixed_a = fixed.allocator(); + const dir = myflags.argFlag("--directory") orelse "."; + const passwd_path = fs.path.join(fixed_a, &[_][]const u8{ dir, "passwd" }) catch |err| switch (err) { + error.OutOfMemory => { + stderr.print("ERROR: --directory too long\n", .{}) catch {}; + return error.User; + }, + }; + + const open_flags = std.fs.File.CreateFlags{ .mode = 0o644, .exclusive = true }; + var passwdf = fs.cwd().createFile(passwd_path, open_flags) catch |err| { + stderr.print("ERROR: create passwd: {s}\n", .{@errorName(err)}) catch {}; + return error.User; + }; + defer passwdf.close(); + errdefer os.unlink(passwd_path) catch {}; + try dump_passwd(passwdf.writer(), num_users); + + fixed.reset(); + // if passwd fit, group will too + const group_path = fs.path.join(fixed_a, &[_][]const u8{ dir, "group" }) catch unreachable; + var groupf = fs.cwd().createFile(group_path, open_flags) catch |err| { + stderr.print("ERROR: creating group: {s}\n", .{@errorName(err)}) catch {}; + return error.User; + }; + defer groupf.close(); + errdefer os.unlink(group_path) catch {}; + var group_wr = io.bufferedWriter(groupf.writer()); + try dump_group(group_wr.writer(), num_users, num_groups, avg_members); + group_wr.flush() catch return error.IO; + + std.debug.print("\nwrote users={d} groups={d} avg-members={d} to {s}\n", .{ + num_users, + num_groups, + avg_members, + dir, + }); +} + +fn dump_passwd(wr: anytype, num_users: u64) error{IO}!void { + var i: u32 = 1000000; + var buf_gecos: [PackedUser.max_gecos_len]u8 = undefined; + var buf_name: [PackedUser.max_name_len]u8 = undefined; + var buf_home: [PackedUser.max_home_len]u8 = undefined; + while (i < 1000000 + num_users) : (i += 1) { + const name = fmt.bufPrint(buf_name[0..], "u_{d}", .{i}) catch unreachable; + const gecos = fmt.bufPrint(buf_gecos[0..], "User {d}", .{i}) catch unreachable; + const home = fmt.bufPrint(buf_home[0..], "/home/{s}", .{name}) catch unreachable; + const user = User{ + .uid = i, + .gid = 1000000, + .name = name, + .gecos = gecos, + .home = home, + .shell = shells[i % shells.len], + }; + _ = wr.write(user.toLine().constSlice()) catch return error.IO; + } +} + +fn dump_group(wr: anytype, num_users: u64, num_groups: u64, avg_members: u32) error{IO}!void { + var lw = io.bufferedWriter(wr); + var lwr = lw.writer(); + var i: u32 = 1000000; + while (i < 1000000 + num_groups) : (i += 1) { + lwr.print("g_{d}:x:{d}:", .{ i, i }) catch return error.IO; + var j: usize = 0; + if (avg_members > 0) { + while (j < i % avg_members) : (j += 1) { + const user_idx = (i + j - 1) % num_users; + if (j != 0) + _ = lwr.write(",") catch return error.IO; + _ = lwr.print("u_{d}", .{1000000 + user_idx}) catch return error.IO; + } + } + _ = lwr.write("\n") catch return error.IO; + } + lw.flush() catch return error.IO; +} + +fn parseInt(stderr: anytype, myflags: flags.ParseResult(options), option: [:0]const u8, default: u32) ?u32 { + const arg = myflags.argFlag(option) orelse return default; + const num = std.fmt.parseUnsigned(u32, arg, 0) catch |err| { + stderr.print("unable to parse {s}={s}: {s}\n", .{ option, arg, @errorName(err) }) catch {}; + return null; + }; + return num; +} + +const want_group = + \\g_1000000:x:1000000: + \\g_1000001:x:1000001:u_1000001 + \\g_1000002:x:1000002:u_1000002,u_1000003 + \\g_1000003:x:1000003:u_1000003,u_1000004,u_1000005 + \\g_1000004:x:1000004: + \\g_1000005:x:1000005:u_1000005 + \\g_1000006:x:1000006:u_1000006,u_1000000 + \\g_1000007:x:1000007:u_1000000,u_1000001,u_1000002 + \\ +; + +const want_passwd = + \\u_1000000:x:1000000:1000000:User 1000000:/home/u_1000000:/bin/bash + \\u_1000001:x:1000001:1000000:User 1000001:/home/u_1000001:/bin/sh + \\u_1000002:x:1000002:1000000:User 1000002:/home/u_1000002:/bin/zsh + \\u_1000003:x:1000003:1000000:User 1000003:/home/u_1000003:/usr/sbin/nologin + \\u_1000004:x:1000004:1000000:User 1000004:/home/u_1000004:/bin/bash + \\u_1000005:x:1000005:1000000:User 1000005:/home/u_1000005:/bin/sh + \\u_1000006:x:1000006:1000000:User 1000006:/home/u_1000006:/bin/zsh + \\ +; + +const testing = std.testing; + +test "turbonss-makecorpus: bad flag" { + var stdout = ArrayList(u8).init(testing.allocator); + defer stdout.deinit(); + var stderr = ArrayList(u8).init(testing.allocator); + defer stderr.deinit(); + + const args = &[_][*:0]const u8{ "--num-users", "x" }; + + const got = execute(stdout.writer(), stderr.writer(), args); + try testing.expectEqual(got, error.User); + try testing.expectEqualStrings(stderr.items, "unable to parse --num-users=x: InvalidCharacter\n"); + try testing.expectEqualStrings(stdout.items, ""); +} + +test "turbonss-makecorpus: simple OK case" { + const allocator = testing.allocator; + var stdout = ArrayList(u8).init(allocator); + defer stdout.deinit(); + var stderr = ArrayList(u8).init(allocator); + defer stderr.deinit(); + + var tmp = testing.tmpDir(.{}); + //defer tmp.cleanup(); + + const tmp_path = blk: { + const relative_path = try fs.path.join(allocator, &[_][]const u8{ + "zig-cache", + "tmp", + tmp.sub_path[0..], + }); + defer allocator.free(relative_path); + const real_path = try fs.realpathAlloc(allocator, relative_path); + const real_pathZ = try allocator.dupeZ(u8, real_path); + allocator.free(real_path); + break :blk real_pathZ; + }; + defer allocator.free(tmp_path); + + const args = &[_][*:0]const u8{ + "--num-users", + "1", + "--num-groups", + "1", + "--avg-members", + "1", + "--directory", + tmp_path, + }; + + try execute(stdout.writer(), stderr.writer(), args); + try testing.expectEqualStrings(stderr.items, ""); + try testing.expectEqualStrings(stdout.items, ""); + + const passwd_fname = try fs.path.join(allocator, &[_][]const u8{ tmp_path, "passwd" }); + defer allocator.free(passwd_fname); + var passwd_file = try fs.cwd().openFile(passwd_fname, .{ .mode = .read_only }); + defer passwd_file.close(); + var group_file = try fs.cwd().openFile(passwd_fname, .{ .mode = .read_only }); + defer group_file.close(); +} + +test "turbonss-makecorpus: dump_passwd" { + var wr = ArrayList(u8).init(testing.allocator); + defer wr.deinit(); + + try dump_passwd(wr.writer(), 7); + try testing.expectEqualStrings(want_passwd, wr.items); +} + +test "turbonss-makecorpus: dump_group" { + var wr = ArrayList(u8).init(testing.allocator); + defer wr.deinit(); + + try dump_group(wr.writer(), 7, 8, 4); + try testing.expectEqualStrings(want_group, wr.items); +} diff --git a/src/turbonss-unix2db.zig b/src/turbonss-unix2db.zig index cbf8d02..257a4b9 100644 --- a/src/turbonss-unix2db.zig +++ b/src/turbonss-unix2db.zig @@ -15,8 +15,9 @@ const DB = @import("DB.zig"); const ErrCtx = @import("ErrCtx.zig"); const usage = - \\usage: turbonss-unix2db [options] + \\usage: turbonss-unix2db [OPTION]... \\ + \\Options: \\ -h Print this help message and exit \\ --passwd Path to passwd file (default: passwd) \\ --group Path to group file (default: group) @@ -185,6 +186,7 @@ test "turbonss-unix2db smoke test" { defer corpus.deinit(); var tmp = testing.tmpDir(.{}); + // TODO: defer errdefer tmp.cleanup(); const tmp_path = blk: {