wip: turbo-analyze

This commit is contained in:
Motiejus Jakštys 2022-07-05 06:02:54 +03:00
parent 84eb2ca6f0
commit 65b0fa774d
6 changed files with 264 additions and 23 deletions

View File

@ -218,7 +218,7 @@ is `home_len`. The same logic applies to all the `stringdata` fields: there is
a way to calculate their relative position from the length of the fields before a way to calculate their relative position from the length of the fields before
them. them.
PackedUser employs two "simple" compression techniques: PackedUser employs two data-oriented compression techniques:
- shells are often shared across different users, see the "Shells" section. - shells are often shared across different users, see the "Shells" section.
- `name` is frequently a suffix of `home`. For example, `/home/vidmantas` and - `name` is frequently a suffix of `home`. For example, `/home/vidmantas` and
`vidmantas`. In this case storing both name and home is wasteful. Therefore `vidmantas`. In this case storing both name and home is wasteful. Therefore
@ -331,10 +331,10 @@ will be pointing to a number `n ∈ [0,N-1]`, regardless whether the value was i
the initial dictionary. Therefore one must always confirm, after calculating the initial dictionary. Therefore one must always confirm, after calculating
the hash, that the key matches what's been hashed. the hash, that the key matches what's been hashed.
`idx_*` sections are of type `[]u32` and are pointing to the respective `idx_*` sections are of type `[]u32` and are pointing from `hash(key)` to the
`Groups` and `Users` entries (from the beginning of the respective section). respective `Groups` and `Users` entries (from the beginning of the respective
Since User and Group records are 8-byte aligned, the actual offset to the section). Since User and Group records are 8-byte aligned, the actual offset to
record is acquired by right-shifting this value by 3 bits. the record is acquired by right-shifting this value by 3 bits.
Database file structure Database file structure
----------------------- -----------------------

View File

@ -591,7 +591,7 @@ fn bdzIdx(
} }
// nblocks_n returns how many blocks a given number of bytes will take // nblocks_n returns how many blocks a given number of bytes will take
fn nblocks_n(comptime T: type, nbytes: usize) T { pub fn nblocks_n(comptime T: type, nbytes: usize) T {
const B = switch (T) { const B = switch (T) {
u8 => u14, u8 => u14,
u16 => u22, u16 => u22,

240
src/analyze.zig Normal file
View File

@ -0,0 +1,240 @@
const std = @import("std");
const fs = std.fs;
const io = std.io;
const mem = std.mem;
const os = std.os;
const heap = std.heap;
const meta = std.meta;
const ArrayList = std.ArrayList;
const Allocator = std.mem.Allocator;
const flags = @import("flags.zig");
const DB = @import("DB.zig");
const File = @import("File.zig");
const PackedUser = @import("PackedUser.zig");
const Header = @import("header.zig").Header;
const section_length = @import("header.zig").section_length;
const usage =
\\usage: turbo-analyze [options] [db.turbo]
\\
\\ -h Print this help message and exit
\\ [db.turbo] Path to the turbonss database file (default: ./db.turbo)
\\
;
const Info = struct {
fname: []const u8,
size_file: os.off_t,
version: meta.fieldInfo(Header, .version).field_type,
endian: meta.fieldInfo(Header, .endian).field_type,
ptr_size: meta.fieldInfo(Header, .ptr_size).field_type,
getgr_bufsize: meta.fieldInfo(Header, .getgr_bufsize).field_type,
getpw_bufsize: meta.fieldInfo(Header, .getpw_bufsize).field_type,
users: meta.fieldInfo(Header, .num_users).field_type,
groups: meta.fieldInfo(Header, .num_groups).field_type,
shells: meta.fieldInfo(Header, .num_shells).field_type,
nbytes_header: u64,
nbytes_bdz_gid: u64,
offset_bdz_gid: u64,
nbytes_bdz_groupname: u64,
offset_bdz_groupname: u64,
nbytes_bdz_uid: u64,
offset_bdz_uid: u64,
nbytes_bdz_username: u64,
offset_bdz_username: u64,
nbytes_idx_gid2group: u64,
offset_idx_gid2group: u64,
nbytes_idx_groupname2group: u64,
offset_idx_groupname2group: u64,
nbytes_idx_uid2user: u64,
offset_idx_uid2user: u64,
nbytes_idx_name2user: u64,
offset_idx_name2user: u64,
nbytes_shell_index: u64,
offset_shell_index: u64,
nbytes_shell_blob: u64,
offset_shell_blob: u64,
nbytes_groups: u64,
offset_groups: u64,
nbytes_users: u64,
offset_users: u64,
nbytes_additional_gids: u64,
offset_additional_gids: u64,
};
const template =
\\File: {[fname]s}
\\Size: {[size_file]:.2} ({[size_file]d} bytes)
\\Version: {[version}d}
\\Endian: {[endian]s}
\\Pointer size: {[ptr_size]}d
\\Buffer size in bytes for getgr: {[getgr_bufsize]d}
\\Buffer size in bytes for getpw: {[getpw_bufsize]d}
\\
\\Users: {[users]d}
\\Groups: {[groups]d}
\\Shells: {[shells]d}
\\Sections:
\\ Name Offset Size
\\ header 0 {[nbytes_header]:.2}
\\ bdz_gid {[offset_bdz_gid]d<21} {[nbytes_bdz_gid]:.2}
\\ bdz_groupname {[offset_bdz_groupname]d<21}{[nbytes_bdz_groupname]:.2}
\\ bdz_uid {[offset_bdz_uid]d<21} {[nbytes_bdz_uid]:.2}
\\ bdz_username {[offset_bdz_username]d} {[nbytes_bdz_username]:.2}
\\ idx_gid2group {[offset_idx_group]d} {[nbytes_idx_group]:.2}
\\ idx_groupname2group {[offset_idx_group]d} {[nbytes_idx_group]:.2}
\\ idx_uid2user {[offset_idx_user]d} {[nbytes_idx_user]:.2}
\\ idx_name2user {[offset_idx_user]d} {[nbytes_idx_user]:.2}
\\ shell_index {[offset_shell_index]d} {[nbytes_shell_index]:.2}
\\ shell_blob {[offset_shell_blob]d} {[nbytes_shell_blob]:.2}
\\ groups {[offset_groups]d} {[nbytes_groups]:.2}
\\ users {[offset_users]d} {[nbytes_users]:.2}
\\ additional_gids {[offset_additional_gids]d} {[nbytes_additional_gids]:.2}
;
pub fn main() !void {
// This line is here because of https://github.com/ziglang/zig/issues/7807
const argv: []const [*:0]const u8 = os.argv;
const stderr = io.getStdErr().writer();
const stdout = io.getStdOut().writer();
const return_code = execute(stdout, stderr, argv[1..]);
os.exit(return_code);
}
fn execute(
stdout: anytype,
stderr: anytype,
argv: []const [*:0]const u8,
) u8 {
_ = stdout;
const myflags = flags.parse(argv, &[_]flags.Flag{
.{ .name = "-h", .kind = .boolean },
}) catch {
stderr.writeAll(usage) catch {};
return 1;
};
if (myflags.boolFlag("-h")) {
stdout.writeAll(usage) catch return 1;
return 0;
}
const db_file = switch (myflags.args.len) {
0 => "./db.turbo",
1 => mem.span(myflags.args[0]),
else => {
stderr.print("ERROR: too many arguments\n", .{}) catch {};
stderr.writeAll(usage) catch {};
return 1;
},
};
const file_size_bytes = blk: {
const fd = os.open(db_file, os.O.RDONLY, 0) catch |err| {
stderr.print("ERROR: failed to open '{s}': {s}\n", .{
db_file,
@errorName(err),
}) catch {};
return 1;
};
defer os.close(fd);
const stat = os.fstat(fd) catch |err| {
stderr.print("ERROR: fstat '{s}': {s}\n", .{
db_file,
@errorName(err),
}) catch {};
return 1;
};
break :blk stat.size;
};
var file = File.open(db_file) catch |err| {
stderr.print(
"ERROR {s}: file '{s}' is corrupted or cannot be read\n",
.{ @errorName(err), db_file },
) catch {};
return 1;
};
defer file.close();
const db = file.db;
// const offset_bdz_gid = @ptrToInt(db.bdz_gid.ptr) - @ptrToInt(db.header.ptr);
// const offset_bdz_groupname= @ptrToInt(db.bdz_groupname) - offset_bdz_gid;
// const offset_bdz_uid= @ptrToInt(db.bdz_uid) - offset_bdz_groupname;
// const offset_bdz_username= @ptrToInt(db.bdz_username) - offset_bdz_uid;
// const offset_idx_gid2group= @ptrToInt(db.idx_gid2group) - offset_bdz_username;
// const offset_idx_groupname2group= @ptrToInt(db.idx_groupname2group) - offset_idx_gid2group;
// const offset_idx_uid2user= @ptrToInt(db.idx_uid2user) - offset_idx_groupname2group;
// const offset_idx_name2user= @ptrToInt(db.idx_name2user) - offset_idx_uid2user;
// const offset_shell_index= @ptrToInt(db.shell_index) - offset_idx_name2user;
// const offset_shell_blob= @ptrToInt(db.shell_blob) - offset_shell_index;
// const offset_groups= @ptrToInt(db.groups) - offset_shell_blob;
// const offset_users= @ptrToInt(db.users) - offset_groups;
// const offset_groupmembers= @ptrToInt(db.groupmembers) - offset_users;
// const offset_additional_gids= @ptrToInt(db.additional_gids) - offset_groupmembers;
const info = Info{
.fname = db_file,
.size_file = file_size_bytes,
.version = db.header.version,
.endian = db.header.endian,
.ptr_size = db.header.ptr_size,
.getgr_bufsize = db.header.getgr_bufsize,
.getpw_bufsize = db.header.getpw_bufsize,
.users = db.header.num_users,
.groups = db.header.num_groups,
.shells = db.header.num_shells,
.nbytes_header = section_length * DB.nblocks_n(u64, @sizeOf(Header)),
.offset_bdz_gid = 0,
.nbytes_bdz_gid = 0,
.nbytes_bdz_groupname = 0,
.offset_bdz_groupname = 0,
.nbytes_bdz_uid = 0,
.offset_bdz_uid = 0,
.nbytes_bdz_username = 0,
.offset_bdz_username = 0,
.nbytes_idx_gid2group = 0,
.offset_idx_gid2group = 0,
.nbytes_idx_groupname2group = 0,
.offset_idx_groupname2group = 0,
.nbytes_idx_uid2user = 0,
.offset_idx_uid2user = 0,
.nbytes_idx_name2user = 0,
.offset_idx_name2user = 0,
.nbytes_shell_index = 0,
.offset_shell_index = 0,
.nbytes_shell_blob = 0,
.offset_shell_blob = 0,
.nbytes_groups = 0,
.offset_groups = 0,
.nbytes_users = 0,
.offset_users = 0,
.nbytes_additional_gids = 0,
.offset_additional_gids = 0,
};
_ = info;
return 0;
}
const testing = std.testing;
test "trivial error: db file" {
const args = &[_][*:0]const u8{};
const allocator = testing.allocator;
var stderr = ArrayList(u8).init(allocator);
defer stderr.deinit();
var stdout = ArrayList(u8).init(allocator);
defer stdout.deinit();
const exit_code = execute(stdout.writer(), stderr.writer(), args[0..]);
try testing.expectEqual(@as(u8, 1), exit_code);
try testing.expectEqualStrings(
stderr.items,
"ERROR: failed to open './db.turbo': FileNotFound\n",
);
}

View File

@ -79,7 +79,7 @@ test "Section length is a power of two" {
try testing.expect(std.math.isPowerOfTwo(section_length)); try testing.expect(std.math.isPowerOfTwo(section_length));
} }
test "Header fits into two section" { test "Header fits into two sections" {
try testing.expect(@sizeOf(Header) == 2 * section_length); try testing.expect(@sizeOf(Header) == 2 * section_length);
} }

View File

@ -17,4 +17,5 @@ test "turbonss test suite" {
// main // main
_ = @import("unix2db.zig"); _ = @import("unix2db.zig");
_ = @import("analyze.zig");
} }

View File

@ -15,7 +15,7 @@ const DB = @import("DB.zig");
const ErrCtx = @import("ErrCtx.zig"); const ErrCtx = @import("ErrCtx.zig");
const usage = const usage =
\\usage: turbonss-unix2db [options] \\usage: turbo-unix2db [options]
\\ \\
\\ -h Print this help message and exit \\ -h Print this help message and exit
\\ --passwd Path to passwd file (default: ./passwd) \\ --passwd Path to passwd file (default: ./passwd)
@ -62,22 +62,22 @@ fn execute(
return 1; return 1;
} }
const passwdFname = result.argFlag("--passwd") orelse "./passwd"; const passwd_fname = result.argFlag("--passwd") orelse "./passwd";
const groupFname = result.argFlag("--group") orelse "./group"; const group_fname = result.argFlag("--group") orelse "./group";
const outFile = result.argFlag("--output") orelse "./db.turbo"; const out_fname = result.argFlag("--output") orelse "./db.turbo";
// to catch an error set file.OpenError, wait for // to catch an error set file.OpenError, wait for
// https://github.com/ziglang/zig/issues/2473 // https://github.com/ziglang/zig/issues/2473
var errc = ErrCtx{}; var errc = ErrCtx{};
var passwdFile = fs.cwd().openFile(passwdFname, .{ .mode = .read_only }) catch |err| var passwd_file = fs.cwd().openFile(passwd_fname, .{ .mode = .read_only }) catch |err|
return fail(errc.wrapf("open '{s}'", .{passwdFname}), stderr, err); return fail(errc.wrapf("open '{s}'", .{passwd_fname}), stderr, err);
defer passwdFile.close(); defer passwd_file.close();
var groupFile = fs.cwd().openFile(groupFname, .{ .mode = .read_only }) catch |err| var group_file = fs.cwd().openFile(group_fname, .{ .mode = .read_only }) catch |err|
return fail(errc.wrapf("open '{s}'", .{groupFname}), stderr, err); return fail(errc.wrapf("open '{s}'", .{group_fname}), stderr, err);
defer groupFile.close(); defer group_file.close();
var passwdReader = io.bufferedReader(passwdFile.reader()).reader(); var passwdReader = io.bufferedReader(passwd_file.reader()).reader();
var users = User.fromReader(allocator, &errc, passwdReader) catch |err| var users = User.fromReader(allocator, &errc, passwdReader) catch |err|
return fail(errc.wrap("read users"), stderr, err); return fail(errc.wrap("read users"), stderr, err);
defer { defer {
@ -85,7 +85,7 @@ fn execute(
allocator.free(users); allocator.free(users);
} }
var groupReader = io.bufferedReader(groupFile.reader()).reader(); var groupReader = io.bufferedReader(group_file.reader()).reader();
var groups = Group.fromReader(allocator, groupReader) catch |err| var groups = Group.fromReader(allocator, groupReader) catch |err|
return fail(errc.wrap("read groups"), stderr, err); return fail(errc.wrap("read groups"), stderr, err);
defer { defer {
@ -101,14 +101,14 @@ fn execute(
return fail(errc.wrap("construct DB from corpus"), stderr, err); return fail(errc.wrap("construct DB from corpus"), stderr, err);
defer db.deinit(allocator); defer db.deinit(allocator);
const fd = os.open(outFile, os.O.WRONLY | os.O.TRUNC | os.O.CREAT, 0644) catch |err| const fd = os.open(out_fname, os.O.WRONLY | os.O.TRUNC | os.O.CREAT, 0644) catch |err|
return fail(errc.wrapf("open for writing {s}", .{outFile}), stderr, err); return fail(errc.wrapf("open for writing {s}", .{out_fname}), stderr, err);
errdefer os.close(fd); errdefer os.close(fd);
const len = os.writev(fd, db.iov().constSlice()) catch |err| const len = os.writev(fd, db.iov().constSlice()) catch |err|
return fail(errc.wrapf("writev to {s}", .{outFile}), stderr, err); return fail(errc.wrapf("writev to {s}", .{out_fname}), stderr, err);
os.fsync(fd) catch |err| os.fsync(fd) catch |err|
return fail(errc.wrapf("fsync {s}", .{outFile}), stderr, err); return fail(errc.wrapf("fsync {s}", .{out_fname}), stderr, err);
os.close(fd); os.close(fd);
stderr.print("total {d} bytes. groups={d} users={d}\n", .{ stderr.print("total {d} bytes. groups={d} users={d}\n", .{