add turbonss-makecorpus

This commit is contained in:
Motiejus Jakštys 2022-07-27 21:00:18 -07:00
parent 8ec16474f7
commit af7ba5edcf
6 changed files with 313 additions and 25 deletions

View File

@ -80,6 +80,14 @@ pub fn build(b: *zbs.Builder) void {
exe.install(); exe.install();
} }
{
const exe = b.addExecutable("turbonss-makecorpus", "src/turbonss-makecorpus.zig");
exe.strip = strip;
exe.setTarget(target);
exe.setBuildMode(mode);
exe.install();
}
{ {
const exe = b.addExecutable("turbonss-getent", "src/turbonss-getent.zig"); const exe = b.addExecutable("turbonss-getent", "src/turbonss-getent.zig");
exe.strip = strip; exe.strip = strip;

View File

@ -150,7 +150,6 @@ pub fn packTo(
std.debug.assert(arr.items.len & 7 == 0); std.debug.assert(arr.items.len & 7 == 0);
// function arguments are consts. We need to mutate the underlying // function arguments are consts. We need to mutate the underlying
// slice, so passing it via pointer instead. // slice, so passing it via pointer instead.
//const home_len = try validate.downCast(u6, user.home.len - 1);
const home_len = try validate.downCast(fieldInfo(Inner, .home_len).field_type, user.home.len - 1); const home_len = try validate.downCast(fieldInfo(Inner, .home_len).field_type, user.home.len - 1);
const name_len = try validate.downCast(fieldInfo(Inner, .name_len).field_type, user.name.len - 1); const name_len = try validate.downCast(fieldInfo(Inner, .name_len).field_type, user.name.len - 1);
const shell_len = try validate.downCast(fieldInfo(Inner, .shell_len_or_idx).field_type, user.shell.len - 1); const shell_len = try validate.downCast(fieldInfo(Inner, .shell_len_or_idx).field_type, user.shell.len - 1);
@ -231,11 +230,15 @@ pub fn toUser(self: *const PackedUser, shell_reader: ShellReader) User {
}; };
} }
pub const max_home_len = math.maxInt(fieldInfo(Inner, .home_len).field_type) + 1;
pub const max_name_len = math.maxInt(fieldInfo(Inner, .name_len).field_type) + 1;
pub const max_gecos_len = math.maxInt(fieldInfo(Inner, .gecos_len).field_type);
pub const max_str_len = pub const max_str_len =
math.maxInt(fieldInfo(Inner, .shell_len_or_idx).field_type) + 1 + math.maxInt(fieldInfo(Inner, .shell_len_or_idx).field_type) + 1 +
math.maxInt(fieldInfo(Inner, .home_len).field_type) + 1 + max_home_len +
math.maxInt(fieldInfo(Inner, .name_len).field_type) + 1 + max_name_len +
math.maxInt(fieldInfo(Inner, .gecos_len).field_type); max_gecos_len;
const testing = std.testing; const testing = std.testing;

View File

@ -19,4 +19,5 @@ test "turbonss test suite" {
_ = @import("turbonss-getent.zig"); _ = @import("turbonss-getent.zig");
_ = @import("turbonss-unix2db.zig"); _ = @import("turbonss-unix2db.zig");
_ = @import("turbonss-analyze.zig"); _ = @import("turbonss-analyze.zig");
_ = @import("turbonss-makecorpus.zig");
} }

View File

@ -202,26 +202,24 @@ test "turbonss-getent passwd" {
var stderr = ArrayList(u8).init(testing.allocator); var stderr = ArrayList(u8).init(testing.allocator);
defer stderr.deinit(); defer stderr.deinit();
{ const args = &[_][*:0]const u8{
const args = &[_][*:0]const u8{ "--db",
"--db", tf.path,
tf.path, "passwd",
"passwd", "root",
"root", "doesnotexist",
"doesnotexist", "vidmantas",
"vidmantas", "0",
"0", "1",
"1", };
}; const got = execute(stdout.writer(), stderr.writer(), args);
const got = execute(stdout.writer(), stderr.writer(), args); try testing.expectEqual(got, 2);
try testing.expectEqual(got, 2); const want_root = "root:x:0:0::/root:/bin/bash\n";
const want_root = "root:x:0:0::/root:/bin/bash\n"; try testing.expectEqualStrings(stdout.items[0..want_root.len], want_root);
try testing.expectEqualStrings(stdout.items[0..want_root.len], want_root); const want_vidmantas = "vidmantas:x:128:128:Vidmantas Kaminskas:/home/vidmantas:/bin/bash\n";
const want_vidmantas = "vidmantas:x:128:128:Vidmantas Kaminskas:/home/vidmantas:/bin/bash\n"; var offset: usize = want_root.len + want_vidmantas.len;
var offset: usize = want_root.len + want_vidmantas.len; try testing.expectEqualStrings(stdout.items[want_root.len..offset], want_vidmantas);
try testing.expectEqualStrings(stdout.items[want_root.len..offset], want_vidmantas); try testing.expectEqualStrings(stdout.items[offset..], want_root);
try testing.expectEqualStrings(stdout.items[offset..], want_root);
}
} }
test "turbonss-getent passwdAll" { test "turbonss-getent passwdAll" {

276
src/turbonss-makecorpus.zig Normal file
View File

@ -0,0 +1,276 @@
const std = @import("std");
const fs = std.fs;
const io = std.io;
const os = std.os;
const fmt = std.fmt;
const mem = std.mem;
const heap = std.heap;
const math = std.math;
const meta = std.meta;
const ArrayList = std.ArrayList;
const Allocator = std.mem.Allocator;
const BoundedArray = std.BoundedArray;
const flags = @import("flags.zig");
const User = @import("User.zig");
const PackedUser = @import("PackedUser.zig");
const usage =
\\usage: turbonss-makecorpus [OPTION]...
\\
\\Options:
\\ -h Print this help message and exit
\\ --directory Write files to given directory (default: .)
\\ --num-users Number of users (default: 20000)
\\ --num-groups Number of groups (default: 5000)
\\ --avg-members Average members per group (default: 20)
\\
;
const shells = &[_][]const u8{
"/bin/bash",
"/bin/sh",
"/bin/zsh",
"/usr/sbin/nologin",
};
pub fn main() void {
// This line is here because of https://github.com/ziglang/zig/issues/7807
const argv: []const [*:0]const u8 = os.argv;
const stderr = io.getStdErr().writer();
const stdout = io.getStdOut().writer();
execute(stdout, stderr, argv[1..]) catch |err| switch (err) {
error.User => os.exit(1),
error.IO => os.exit(3),
};
}
const options = &[_]flags.Flag{
.{ .name = "-h", .kind = .boolean },
.{ .name = "--directory", .kind = .arg },
.{ .name = "--num-users", .kind = .arg },
.{ .name = "--num-groups", .kind = .arg },
.{ .name = "--avg-members", .kind = .arg },
};
fn execute(
stdout: anytype,
stderr: anytype,
argv: []const [*:0]const u8,
) error{ User, IO }!void {
const myflags = flags.parse(argv, options) catch {
stderr.writeAll(usage) catch {};
return error.User;
};
if (myflags.boolFlag("-h")) {
stdout.writeAll(usage) catch return error.IO;
return;
}
if (myflags.args.len != 0) {
std.debug.print("args: {s}\n", .{myflags.args});
stderr.writeAll(usage) catch {};
return error.User;
}
const num_users = parseInt(stderr, myflags, "--num-users", 20000) orelse return error.User;
const num_groups = parseInt(stderr, myflags, "--num-groups", 5000) orelse return error.User;
const avg_members = parseInt(stderr, myflags, "--avg-members", 20) orelse return error.User;
std.debug.print("num_users={d}\n", .{num_users});
// longest possible path name: 16k? dunno.
var buf: [1 << 14]u8 = undefined;
var fixed = std.heap.FixedBufferAllocator.init(buf[0..]);
const fixed_a = fixed.allocator();
const dir = myflags.argFlag("--directory") orelse ".";
const passwd_path = fs.path.join(fixed_a, &[_][]const u8{ dir, "passwd" }) catch |err| switch (err) {
error.OutOfMemory => {
stderr.print("ERROR: --directory too long\n", .{}) catch {};
return error.User;
},
};
const open_flags = std.fs.File.CreateFlags{ .mode = 0o644, .exclusive = true };
var passwdf = fs.cwd().createFile(passwd_path, open_flags) catch |err| {
stderr.print("ERROR: create passwd: {s}\n", .{@errorName(err)}) catch {};
return error.User;
};
defer passwdf.close();
errdefer os.unlink(passwd_path) catch {};
try dump_passwd(passwdf.writer(), num_users);
fixed.reset();
// if passwd fit, group will too
const group_path = fs.path.join(fixed_a, &[_][]const u8{ dir, "group" }) catch unreachable;
var groupf = fs.cwd().createFile(group_path, open_flags) catch |err| {
stderr.print("ERROR: creating group: {s}\n", .{@errorName(err)}) catch {};
return error.User;
};
defer groupf.close();
errdefer os.unlink(group_path) catch {};
var group_wr = io.bufferedWriter(groupf.writer());
try dump_group(group_wr.writer(), num_users, num_groups, avg_members);
group_wr.flush() catch return error.IO;
std.debug.print("\nwrote users={d} groups={d} avg-members={d} to {s}\n", .{
num_users,
num_groups,
avg_members,
dir,
});
}
fn dump_passwd(wr: anytype, num_users: u64) error{IO}!void {
var i: u32 = 1000000;
var buf_gecos: [PackedUser.max_gecos_len]u8 = undefined;
var buf_name: [PackedUser.max_name_len]u8 = undefined;
var buf_home: [PackedUser.max_home_len]u8 = undefined;
while (i < 1000000 + num_users) : (i += 1) {
const name = fmt.bufPrint(buf_name[0..], "u_{d}", .{i}) catch unreachable;
const gecos = fmt.bufPrint(buf_gecos[0..], "User {d}", .{i}) catch unreachable;
const home = fmt.bufPrint(buf_home[0..], "/home/{s}", .{name}) catch unreachable;
const user = User{
.uid = i,
.gid = 1000000,
.name = name,
.gecos = gecos,
.home = home,
.shell = shells[i % shells.len],
};
_ = wr.write(user.toLine().constSlice()) catch return error.IO;
}
}
fn dump_group(wr: anytype, num_users: u64, num_groups: u64, avg_members: u32) error{IO}!void {
var lw = io.bufferedWriter(wr);
var lwr = lw.writer();
var i: u32 = 1000000;
while (i < 1000000 + num_groups) : (i += 1) {
lwr.print("g_{d}:x:{d}:", .{ i, i }) catch return error.IO;
var j: usize = 0;
if (avg_members > 0) {
while (j < i % avg_members) : (j += 1) {
const user_idx = (i + j - 1) % num_users;
if (j != 0)
_ = lwr.write(",") catch return error.IO;
_ = lwr.print("u_{d}", .{1000000 + user_idx}) catch return error.IO;
}
}
_ = lwr.write("\n") catch return error.IO;
}
lw.flush() catch return error.IO;
}
fn parseInt(stderr: anytype, myflags: flags.ParseResult(options), option: [:0]const u8, default: u32) ?u32 {
const arg = myflags.argFlag(option) orelse return default;
const num = std.fmt.parseUnsigned(u32, arg, 0) catch |err| {
stderr.print("unable to parse {s}={s}: {s}\n", .{ option, arg, @errorName(err) }) catch {};
return null;
};
return num;
}
const want_group =
\\g_1000000:x:1000000:
\\g_1000001:x:1000001:u_1000001
\\g_1000002:x:1000002:u_1000002,u_1000003
\\g_1000003:x:1000003:u_1000003,u_1000004,u_1000005
\\g_1000004:x:1000004:
\\g_1000005:x:1000005:u_1000005
\\g_1000006:x:1000006:u_1000006,u_1000000
\\g_1000007:x:1000007:u_1000000,u_1000001,u_1000002
\\
;
const want_passwd =
\\u_1000000:x:1000000:1000000:User 1000000:/home/u_1000000:/bin/bash
\\u_1000001:x:1000001:1000000:User 1000001:/home/u_1000001:/bin/sh
\\u_1000002:x:1000002:1000000:User 1000002:/home/u_1000002:/bin/zsh
\\u_1000003:x:1000003:1000000:User 1000003:/home/u_1000003:/usr/sbin/nologin
\\u_1000004:x:1000004:1000000:User 1000004:/home/u_1000004:/bin/bash
\\u_1000005:x:1000005:1000000:User 1000005:/home/u_1000005:/bin/sh
\\u_1000006:x:1000006:1000000:User 1000006:/home/u_1000006:/bin/zsh
\\
;
const testing = std.testing;
test "turbonss-makecorpus: bad flag" {
var stdout = ArrayList(u8).init(testing.allocator);
defer stdout.deinit();
var stderr = ArrayList(u8).init(testing.allocator);
defer stderr.deinit();
const args = &[_][*:0]const u8{ "--num-users", "x" };
const got = execute(stdout.writer(), stderr.writer(), args);
try testing.expectEqual(got, error.User);
try testing.expectEqualStrings(stderr.items, "unable to parse --num-users=x: InvalidCharacter\n");
try testing.expectEqualStrings(stdout.items, "");
}
test "turbonss-makecorpus: simple OK case" {
const allocator = testing.allocator;
var stdout = ArrayList(u8).init(allocator);
defer stdout.deinit();
var stderr = ArrayList(u8).init(allocator);
defer stderr.deinit();
var tmp = testing.tmpDir(.{});
//defer tmp.cleanup();
const tmp_path = blk: {
const relative_path = try fs.path.join(allocator, &[_][]const u8{
"zig-cache",
"tmp",
tmp.sub_path[0..],
});
defer allocator.free(relative_path);
const real_path = try fs.realpathAlloc(allocator, relative_path);
const real_pathZ = try allocator.dupeZ(u8, real_path);
allocator.free(real_path);
break :blk real_pathZ;
};
defer allocator.free(tmp_path);
const args = &[_][*:0]const u8{
"--num-users",
"1",
"--num-groups",
"1",
"--avg-members",
"1",
"--directory",
tmp_path,
};
try execute(stdout.writer(), stderr.writer(), args);
try testing.expectEqualStrings(stderr.items, "");
try testing.expectEqualStrings(stdout.items, "");
const passwd_fname = try fs.path.join(allocator, &[_][]const u8{ tmp_path, "passwd" });
defer allocator.free(passwd_fname);
var passwd_file = try fs.cwd().openFile(passwd_fname, .{ .mode = .read_only });
defer passwd_file.close();
var group_file = try fs.cwd().openFile(passwd_fname, .{ .mode = .read_only });
defer group_file.close();
}
test "turbonss-makecorpus: dump_passwd" {
var wr = ArrayList(u8).init(testing.allocator);
defer wr.deinit();
try dump_passwd(wr.writer(), 7);
try testing.expectEqualStrings(want_passwd, wr.items);
}
test "turbonss-makecorpus: dump_group" {
var wr = ArrayList(u8).init(testing.allocator);
defer wr.deinit();
try dump_group(wr.writer(), 7, 8, 4);
try testing.expectEqualStrings(want_group, wr.items);
}

View File

@ -15,8 +15,9 @@ const DB = @import("DB.zig");
const ErrCtx = @import("ErrCtx.zig"); const ErrCtx = @import("ErrCtx.zig");
const usage = const usage =
\\usage: turbonss-unix2db [options] \\usage: turbonss-unix2db [OPTION]...
\\ \\
\\Options:
\\ -h Print this help message and exit \\ -h Print this help message and exit
\\ --passwd Path to passwd file (default: passwd) \\ --passwd Path to passwd file (default: passwd)
\\ --group Path to group file (default: group) \\ --group Path to group file (default: group)
@ -185,6 +186,7 @@ test "turbonss-unix2db smoke test" {
defer corpus.deinit(); defer corpus.deinit();
var tmp = testing.tmpDir(.{}); var tmp = testing.tmpDir(.{});
// TODO: defer
errdefer tmp.cleanup(); errdefer tmp.cleanup();
const tmp_path = blk: { const tmp_path = blk: {