diff --git a/README.md b/README.md index aa72ea9..65e4d98 100644 --- a/README.md +++ b/README.md @@ -218,7 +218,7 @@ is `home_len`. The same logic applies to all the `stringdata` fields: there is a way to calculate their relative position from the length of the fields before them. -PackedUser employs two "simple" compression techniques: +PackedUser employs two data-oriented compression techniques: - shells are often shared across different users, see the "Shells" section. - `name` is frequently a suffix of `home`. For example, `/home/vidmantas` and `vidmantas`. In this case storing both name and home is wasteful. Therefore @@ -331,10 +331,10 @@ will be pointing to a number `n ∈ [0,N-1]`, regardless whether the value was i the initial dictionary. Therefore one must always confirm, after calculating the hash, that the key matches what's been hashed. -`idx_*` sections are of type `[]u32` and are pointing to the respective -`Groups` and `Users` entries (from the beginning of the respective section). -Since User and Group records are 8-byte aligned, the actual offset to the -record is acquired by right-shifting this value by 3 bits. +`idx_*` sections are of type `[]u32` and are pointing from `hash(key)` to the +respective `Groups` and `Users` entries (from the beginning of the respective +section). Since User and Group records are 8-byte aligned, the actual offset to +the record is acquired by right-shifting this value by 3 bits. Database file structure ----------------------- diff --git a/src/DB.zig b/src/DB.zig index 1c6accc..b81ea93 100644 --- a/src/DB.zig +++ b/src/DB.zig @@ -591,7 +591,7 @@ fn bdzIdx( } // nblocks_n returns how many blocks a given number of bytes will take -fn nblocks_n(comptime T: type, nbytes: usize) T { +pub fn nblocks_n(comptime T: type, nbytes: usize) T { const B = switch (T) { u8 => u14, u16 => u22, diff --git a/src/analyze.zig b/src/analyze.zig new file mode 100644 index 0000000..7242c25 --- /dev/null +++ b/src/analyze.zig @@ -0,0 +1,240 @@ +const std = @import("std"); +const fs = std.fs; +const io = std.io; +const mem = std.mem; +const os = std.os; +const heap = std.heap; +const meta = std.meta; +const ArrayList = std.ArrayList; +const Allocator = std.mem.Allocator; + +const flags = @import("flags.zig"); + +const DB = @import("DB.zig"); +const File = @import("File.zig"); +const PackedUser = @import("PackedUser.zig"); +const Header = @import("header.zig").Header; +const section_length = @import("header.zig").section_length; + +const usage = + \\usage: turbo-analyze [options] [db.turbo] + \\ + \\ -h Print this help message and exit + \\ [db.turbo] Path to the turbonss database file (default: ./db.turbo) + \\ +; + +const Info = struct { + fname: []const u8, + size_file: os.off_t, + version: meta.fieldInfo(Header, .version).field_type, + endian: meta.fieldInfo(Header, .endian).field_type, + ptr_size: meta.fieldInfo(Header, .ptr_size).field_type, + getgr_bufsize: meta.fieldInfo(Header, .getgr_bufsize).field_type, + getpw_bufsize: meta.fieldInfo(Header, .getpw_bufsize).field_type, + users: meta.fieldInfo(Header, .num_users).field_type, + groups: meta.fieldInfo(Header, .num_groups).field_type, + shells: meta.fieldInfo(Header, .num_shells).field_type, + nbytes_header: u64, + nbytes_bdz_gid: u64, + offset_bdz_gid: u64, + nbytes_bdz_groupname: u64, + offset_bdz_groupname: u64, + nbytes_bdz_uid: u64, + offset_bdz_uid: u64, + nbytes_bdz_username: u64, + offset_bdz_username: u64, + nbytes_idx_gid2group: u64, + offset_idx_gid2group: u64, + nbytes_idx_groupname2group: u64, + offset_idx_groupname2group: u64, + nbytes_idx_uid2user: u64, + offset_idx_uid2user: u64, + nbytes_idx_name2user: u64, + offset_idx_name2user: u64, + nbytes_shell_index: u64, + offset_shell_index: u64, + nbytes_shell_blob: u64, + offset_shell_blob: u64, + nbytes_groups: u64, + offset_groups: u64, + nbytes_users: u64, + offset_users: u64, + nbytes_additional_gids: u64, + offset_additional_gids: u64, +}; + +const template = + \\File: {[fname]s} + \\Size: {[size_file]:.2} ({[size_file]d} bytes) + \\Version: {[version}d} + \\Endian: {[endian]s} + \\Pointer size: {[ptr_size]}d + \\Buffer size in bytes for getgr: {[getgr_bufsize]d} + \\Buffer size in bytes for getpw: {[getpw_bufsize]d} + \\ + \\Users: {[users]d} + \\Groups: {[groups]d} + \\Shells: {[shells]d} + \\Sections: + \\ Name Offset Size + \\ header 0 {[nbytes_header]:.2} + \\ bdz_gid {[offset_bdz_gid]d<21} {[nbytes_bdz_gid]:.2} + \\ bdz_groupname {[offset_bdz_groupname]d<21}{[nbytes_bdz_groupname]:.2} + \\ bdz_uid {[offset_bdz_uid]d<21} {[nbytes_bdz_uid]:.2} + \\ bdz_username {[offset_bdz_username]d} {[nbytes_bdz_username]:.2} + \\ idx_gid2group {[offset_idx_group]d} {[nbytes_idx_group]:.2} + \\ idx_groupname2group {[offset_idx_group]d} {[nbytes_idx_group]:.2} + \\ idx_uid2user {[offset_idx_user]d} {[nbytes_idx_user]:.2} + \\ idx_name2user {[offset_idx_user]d} {[nbytes_idx_user]:.2} + \\ shell_index {[offset_shell_index]d} {[nbytes_shell_index]:.2} + \\ shell_blob {[offset_shell_blob]d} {[nbytes_shell_blob]:.2} + \\ groups {[offset_groups]d} {[nbytes_groups]:.2} + \\ users {[offset_users]d} {[nbytes_users]:.2} + \\ additional_gids {[offset_additional_gids]d} {[nbytes_additional_gids]:.2} +; + +pub fn main() !void { + // This line is here because of https://github.com/ziglang/zig/issues/7807 + const argv: []const [*:0]const u8 = os.argv; + + const stderr = io.getStdErr().writer(); + const stdout = io.getStdOut().writer(); + + const return_code = execute(stdout, stderr, argv[1..]); + os.exit(return_code); +} + +fn execute( + stdout: anytype, + stderr: anytype, + argv: []const [*:0]const u8, +) u8 { + _ = stdout; + const myflags = flags.parse(argv, &[_]flags.Flag{ + .{ .name = "-h", .kind = .boolean }, + }) catch { + stderr.writeAll(usage) catch {}; + return 1; + }; + + if (myflags.boolFlag("-h")) { + stdout.writeAll(usage) catch return 1; + return 0; + } + + const db_file = switch (myflags.args.len) { + 0 => "./db.turbo", + 1 => mem.span(myflags.args[0]), + else => { + stderr.print("ERROR: too many arguments\n", .{}) catch {}; + stderr.writeAll(usage) catch {}; + return 1; + }, + }; + + const file_size_bytes = blk: { + const fd = os.open(db_file, os.O.RDONLY, 0) catch |err| { + stderr.print("ERROR: failed to open '{s}': {s}\n", .{ + db_file, + @errorName(err), + }) catch {}; + return 1; + }; + defer os.close(fd); + const stat = os.fstat(fd) catch |err| { + stderr.print("ERROR: fstat '{s}': {s}\n", .{ + db_file, + @errorName(err), + }) catch {}; + return 1; + }; + break :blk stat.size; + }; + + var file = File.open(db_file) catch |err| { + stderr.print( + "ERROR {s}: file '{s}' is corrupted or cannot be read\n", + .{ @errorName(err), db_file }, + ) catch {}; + return 1; + }; + defer file.close(); + const db = file.db; + + // const offset_bdz_gid = @ptrToInt(db.bdz_gid.ptr) - @ptrToInt(db.header.ptr); + // const offset_bdz_groupname= @ptrToInt(db.bdz_groupname) - offset_bdz_gid; + // const offset_bdz_uid= @ptrToInt(db.bdz_uid) - offset_bdz_groupname; + // const offset_bdz_username= @ptrToInt(db.bdz_username) - offset_bdz_uid; + // const offset_idx_gid2group= @ptrToInt(db.idx_gid2group) - offset_bdz_username; + // const offset_idx_groupname2group= @ptrToInt(db.idx_groupname2group) - offset_idx_gid2group; + // const offset_idx_uid2user= @ptrToInt(db.idx_uid2user) - offset_idx_groupname2group; + // const offset_idx_name2user= @ptrToInt(db.idx_name2user) - offset_idx_uid2user; + // const offset_shell_index= @ptrToInt(db.shell_index) - offset_idx_name2user; + // const offset_shell_blob= @ptrToInt(db.shell_blob) - offset_shell_index; + // const offset_groups= @ptrToInt(db.groups) - offset_shell_blob; + // const offset_users= @ptrToInt(db.users) - offset_groups; + // const offset_groupmembers= @ptrToInt(db.groupmembers) - offset_users; + // const offset_additional_gids= @ptrToInt(db.additional_gids) - offset_groupmembers; + + const info = Info{ + .fname = db_file, + .size_file = file_size_bytes, + .version = db.header.version, + .endian = db.header.endian, + .ptr_size = db.header.ptr_size, + .getgr_bufsize = db.header.getgr_bufsize, + .getpw_bufsize = db.header.getpw_bufsize, + .users = db.header.num_users, + .groups = db.header.num_groups, + .shells = db.header.num_shells, + .nbytes_header = section_length * DB.nblocks_n(u64, @sizeOf(Header)), + .offset_bdz_gid = 0, + .nbytes_bdz_gid = 0, + .nbytes_bdz_groupname = 0, + .offset_bdz_groupname = 0, + .nbytes_bdz_uid = 0, + .offset_bdz_uid = 0, + .nbytes_bdz_username = 0, + .offset_bdz_username = 0, + .nbytes_idx_gid2group = 0, + .offset_idx_gid2group = 0, + .nbytes_idx_groupname2group = 0, + .offset_idx_groupname2group = 0, + .nbytes_idx_uid2user = 0, + .offset_idx_uid2user = 0, + .nbytes_idx_name2user = 0, + .offset_idx_name2user = 0, + .nbytes_shell_index = 0, + .offset_shell_index = 0, + .nbytes_shell_blob = 0, + .offset_shell_blob = 0, + .nbytes_groups = 0, + .offset_groups = 0, + .nbytes_users = 0, + .offset_users = 0, + .nbytes_additional_gids = 0, + .offset_additional_gids = 0, + }; + _ = info; + + return 0; +} + +const testing = std.testing; + +test "trivial error: db file" { + const args = &[_][*:0]const u8{}; + const allocator = testing.allocator; + var stderr = ArrayList(u8).init(allocator); + defer stderr.deinit(); + var stdout = ArrayList(u8).init(allocator); + defer stdout.deinit(); + + const exit_code = execute(stdout.writer(), stderr.writer(), args[0..]); + try testing.expectEqual(@as(u8, 1), exit_code); + try testing.expectEqualStrings( + stderr.items, + "ERROR: failed to open './db.turbo': FileNotFound\n", + ); +} diff --git a/src/header.zig b/src/header.zig index 2cb878c..4e95919 100644 --- a/src/header.zig +++ b/src/header.zig @@ -79,7 +79,7 @@ test "Section length is a power of two" { try testing.expect(std.math.isPowerOfTwo(section_length)); } -test "Header fits into two section" { +test "Header fits into two sections" { try testing.expect(@sizeOf(Header) == 2 * section_length); } diff --git a/src/test_all.zig b/src/test_all.zig index 0b441e2..5d82af2 100644 --- a/src/test_all.zig +++ b/src/test_all.zig @@ -17,4 +17,5 @@ test "turbonss test suite" { // main _ = @import("unix2db.zig"); + _ = @import("analyze.zig"); } diff --git a/src/unix2db.zig b/src/unix2db.zig index 05fbb92..7a64f6b 100644 --- a/src/unix2db.zig +++ b/src/unix2db.zig @@ -15,7 +15,7 @@ const DB = @import("DB.zig"); const ErrCtx = @import("ErrCtx.zig"); const usage = - \\usage: turbonss-unix2db [options] + \\usage: turbo-unix2db [options] \\ \\ -h Print this help message and exit \\ --passwd Path to passwd file (default: ./passwd) @@ -62,22 +62,22 @@ fn execute( return 1; } - const passwdFname = result.argFlag("--passwd") orelse "./passwd"; - const groupFname = result.argFlag("--group") orelse "./group"; - const outFile = result.argFlag("--output") orelse "./db.turbo"; + const passwd_fname = result.argFlag("--passwd") orelse "./passwd"; + const group_fname = result.argFlag("--group") orelse "./group"; + const out_fname = result.argFlag("--output") orelse "./db.turbo"; // to catch an error set file.OpenError, wait for // https://github.com/ziglang/zig/issues/2473 var errc = ErrCtx{}; - var passwdFile = fs.cwd().openFile(passwdFname, .{ .mode = .read_only }) catch |err| - return fail(errc.wrapf("open '{s}'", .{passwdFname}), stderr, err); - defer passwdFile.close(); + var passwd_file = fs.cwd().openFile(passwd_fname, .{ .mode = .read_only }) catch |err| + return fail(errc.wrapf("open '{s}'", .{passwd_fname}), stderr, err); + defer passwd_file.close(); - var groupFile = fs.cwd().openFile(groupFname, .{ .mode = .read_only }) catch |err| - return fail(errc.wrapf("open '{s}'", .{groupFname}), stderr, err); - defer groupFile.close(); + var group_file = fs.cwd().openFile(group_fname, .{ .mode = .read_only }) catch |err| + return fail(errc.wrapf("open '{s}'", .{group_fname}), stderr, err); + defer group_file.close(); - var passwdReader = io.bufferedReader(passwdFile.reader()).reader(); + var passwdReader = io.bufferedReader(passwd_file.reader()).reader(); var users = User.fromReader(allocator, &errc, passwdReader) catch |err| return fail(errc.wrap("read users"), stderr, err); defer { @@ -85,7 +85,7 @@ fn execute( allocator.free(users); } - var groupReader = io.bufferedReader(groupFile.reader()).reader(); + var groupReader = io.bufferedReader(group_file.reader()).reader(); var groups = Group.fromReader(allocator, groupReader) catch |err| return fail(errc.wrap("read groups"), stderr, err); defer { @@ -101,14 +101,14 @@ fn execute( return fail(errc.wrap("construct DB from corpus"), stderr, err); defer db.deinit(allocator); - const fd = os.open(outFile, os.O.WRONLY | os.O.TRUNC | os.O.CREAT, 0644) catch |err| - return fail(errc.wrapf("open for writing {s}", .{outFile}), stderr, err); + const fd = os.open(out_fname, os.O.WRONLY | os.O.TRUNC | os.O.CREAT, 0644) catch |err| + return fail(errc.wrapf("open for writing {s}", .{out_fname}), stderr, err); errdefer os.close(fd); const len = os.writev(fd, db.iov().constSlice()) catch |err| - return fail(errc.wrapf("writev to {s}", .{outFile}), stderr, err); + return fail(errc.wrapf("writev to {s}", .{out_fname}), stderr, err); os.fsync(fd) catch |err| - return fail(errc.wrapf("fsync {s}", .{outFile}), stderr, err); + return fail(errc.wrapf("fsync {s}", .{out_fname}), stderr, err); os.close(fd); stderr.print("total {d} bytes. groups={d} users={d}\n", .{