1
Fork 0

wip: turbo-analyze

main
Motiejus Jakštys 2022-07-05 06:02:54 +03:00
parent 84eb2ca6f0
commit 65b0fa774d
6 changed files with 264 additions and 23 deletions

View File

@ -218,7 +218,7 @@ is `home_len`. The same logic applies to all the `stringdata` fields: there is
a way to calculate their relative position from the length of the fields before
them.
PackedUser employs two "simple" compression techniques:
PackedUser employs two data-oriented compression techniques:
- shells are often shared across different users, see the "Shells" section.
- `name` is frequently a suffix of `home`. For example, `/home/vidmantas` and
`vidmantas`. In this case storing both name and home is wasteful. Therefore
@ -331,10 +331,10 @@ will be pointing to a number `n ∈ [0,N-1]`, regardless whether the value was i
the initial dictionary. Therefore one must always confirm, after calculating
the hash, that the key matches what's been hashed.
`idx_*` sections are of type `[]u32` and are pointing to the respective
`Groups` and `Users` entries (from the beginning of the respective section).
Since User and Group records are 8-byte aligned, the actual offset to the
record is acquired by right-shifting this value by 3 bits.
`idx_*` sections are of type `[]u32` and are pointing from `hash(key)` to the
respective `Groups` and `Users` entries (from the beginning of the respective
section). Since User and Group records are 8-byte aligned, the actual offset to
the record is acquired by right-shifting this value by 3 bits.
Database file structure
-----------------------

View File

@ -591,7 +591,7 @@ fn bdzIdx(
}
// nblocks_n returns how many blocks a given number of bytes will take
fn nblocks_n(comptime T: type, nbytes: usize) T {
pub fn nblocks_n(comptime T: type, nbytes: usize) T {
const B = switch (T) {
u8 => u14,
u16 => u22,

240
src/analyze.zig Normal file
View File

@ -0,0 +1,240 @@
const std = @import("std");
const fs = std.fs;
const io = std.io;
const mem = std.mem;
const os = std.os;
const heap = std.heap;
const meta = std.meta;
const ArrayList = std.ArrayList;
const Allocator = std.mem.Allocator;
const flags = @import("flags.zig");
const DB = @import("DB.zig");
const File = @import("File.zig");
const PackedUser = @import("PackedUser.zig");
const Header = @import("header.zig").Header;
const section_length = @import("header.zig").section_length;
const usage =
\\usage: turbo-analyze [options] [db.turbo]
\\
\\ -h Print this help message and exit
\\ [db.turbo] Path to the turbonss database file (default: ./db.turbo)
\\
;
const Info = struct {
fname: []const u8,
size_file: os.off_t,
version: meta.fieldInfo(Header, .version).field_type,
endian: meta.fieldInfo(Header, .endian).field_type,
ptr_size: meta.fieldInfo(Header, .ptr_size).field_type,
getgr_bufsize: meta.fieldInfo(Header, .getgr_bufsize).field_type,
getpw_bufsize: meta.fieldInfo(Header, .getpw_bufsize).field_type,
users: meta.fieldInfo(Header, .num_users).field_type,
groups: meta.fieldInfo(Header, .num_groups).field_type,
shells: meta.fieldInfo(Header, .num_shells).field_type,
nbytes_header: u64,
nbytes_bdz_gid: u64,
offset_bdz_gid: u64,
nbytes_bdz_groupname: u64,
offset_bdz_groupname: u64,
nbytes_bdz_uid: u64,
offset_bdz_uid: u64,
nbytes_bdz_username: u64,
offset_bdz_username: u64,
nbytes_idx_gid2group: u64,
offset_idx_gid2group: u64,
nbytes_idx_groupname2group: u64,
offset_idx_groupname2group: u64,
nbytes_idx_uid2user: u64,
offset_idx_uid2user: u64,
nbytes_idx_name2user: u64,
offset_idx_name2user: u64,
nbytes_shell_index: u64,
offset_shell_index: u64,
nbytes_shell_blob: u64,
offset_shell_blob: u64,
nbytes_groups: u64,
offset_groups: u64,
nbytes_users: u64,
offset_users: u64,
nbytes_additional_gids: u64,
offset_additional_gids: u64,
};
const template =
\\File: {[fname]s}
\\Size: {[size_file]:.2} ({[size_file]d} bytes)
\\Version: {[version}d}
\\Endian: {[endian]s}
\\Pointer size: {[ptr_size]}d
\\Buffer size in bytes for getgr: {[getgr_bufsize]d}
\\Buffer size in bytes for getpw: {[getpw_bufsize]d}
\\
\\Users: {[users]d}
\\Groups: {[groups]d}
\\Shells: {[shells]d}
\\Sections:
\\ Name Offset Size
\\ header 0 {[nbytes_header]:.2}
\\ bdz_gid {[offset_bdz_gid]d<21} {[nbytes_bdz_gid]:.2}
\\ bdz_groupname {[offset_bdz_groupname]d<21}{[nbytes_bdz_groupname]:.2}
\\ bdz_uid {[offset_bdz_uid]d<21} {[nbytes_bdz_uid]:.2}
\\ bdz_username {[offset_bdz_username]d} {[nbytes_bdz_username]:.2}
\\ idx_gid2group {[offset_idx_group]d} {[nbytes_idx_group]:.2}
\\ idx_groupname2group {[offset_idx_group]d} {[nbytes_idx_group]:.2}
\\ idx_uid2user {[offset_idx_user]d} {[nbytes_idx_user]:.2}
\\ idx_name2user {[offset_idx_user]d} {[nbytes_idx_user]:.2}
\\ shell_index {[offset_shell_index]d} {[nbytes_shell_index]:.2}
\\ shell_blob {[offset_shell_blob]d} {[nbytes_shell_blob]:.2}
\\ groups {[offset_groups]d} {[nbytes_groups]:.2}
\\ users {[offset_users]d} {[nbytes_users]:.2}
\\ additional_gids {[offset_additional_gids]d} {[nbytes_additional_gids]:.2}
;
pub fn main() !void {
// This line is here because of https://github.com/ziglang/zig/issues/7807
const argv: []const [*:0]const u8 = os.argv;
const stderr = io.getStdErr().writer();
const stdout = io.getStdOut().writer();
const return_code = execute(stdout, stderr, argv[1..]);
os.exit(return_code);
}
fn execute(
stdout: anytype,
stderr: anytype,
argv: []const [*:0]const u8,
) u8 {
_ = stdout;
const myflags = flags.parse(argv, &[_]flags.Flag{
.{ .name = "-h", .kind = .boolean },
}) catch {
stderr.writeAll(usage) catch {};
return 1;
};
if (myflags.boolFlag("-h")) {
stdout.writeAll(usage) catch return 1;
return 0;
}
const db_file = switch (myflags.args.len) {
0 => "./db.turbo",
1 => mem.span(myflags.args[0]),
else => {
stderr.print("ERROR: too many arguments\n", .{}) catch {};
stderr.writeAll(usage) catch {};
return 1;
},
};
const file_size_bytes = blk: {
const fd = os.open(db_file, os.O.RDONLY, 0) catch |err| {
stderr.print("ERROR: failed to open '{s}': {s}\n", .{
db_file,
@errorName(err),
}) catch {};
return 1;
};
defer os.close(fd);
const stat = os.fstat(fd) catch |err| {
stderr.print("ERROR: fstat '{s}': {s}\n", .{
db_file,
@errorName(err),
}) catch {};
return 1;
};
break :blk stat.size;
};
var file = File.open(db_file) catch |err| {
stderr.print(
"ERROR {s}: file '{s}' is corrupted or cannot be read\n",
.{ @errorName(err), db_file },
) catch {};
return 1;
};
defer file.close();
const db = file.db;
// const offset_bdz_gid = @ptrToInt(db.bdz_gid.ptr) - @ptrToInt(db.header.ptr);
// const offset_bdz_groupname= @ptrToInt(db.bdz_groupname) - offset_bdz_gid;
// const offset_bdz_uid= @ptrToInt(db.bdz_uid) - offset_bdz_groupname;
// const offset_bdz_username= @ptrToInt(db.bdz_username) - offset_bdz_uid;
// const offset_idx_gid2group= @ptrToInt(db.idx_gid2group) - offset_bdz_username;
// const offset_idx_groupname2group= @ptrToInt(db.idx_groupname2group) - offset_idx_gid2group;
// const offset_idx_uid2user= @ptrToInt(db.idx_uid2user) - offset_idx_groupname2group;
// const offset_idx_name2user= @ptrToInt(db.idx_name2user) - offset_idx_uid2user;
// const offset_shell_index= @ptrToInt(db.shell_index) - offset_idx_name2user;
// const offset_shell_blob= @ptrToInt(db.shell_blob) - offset_shell_index;
// const offset_groups= @ptrToInt(db.groups) - offset_shell_blob;
// const offset_users= @ptrToInt(db.users) - offset_groups;
// const offset_groupmembers= @ptrToInt(db.groupmembers) - offset_users;
// const offset_additional_gids= @ptrToInt(db.additional_gids) - offset_groupmembers;
const info = Info{
.fname = db_file,
.size_file = file_size_bytes,
.version = db.header.version,
.endian = db.header.endian,
.ptr_size = db.header.ptr_size,
.getgr_bufsize = db.header.getgr_bufsize,
.getpw_bufsize = db.header.getpw_bufsize,
.users = db.header.num_users,
.groups = db.header.num_groups,
.shells = db.header.num_shells,
.nbytes_header = section_length * DB.nblocks_n(u64, @sizeOf(Header)),
.offset_bdz_gid = 0,
.nbytes_bdz_gid = 0,
.nbytes_bdz_groupname = 0,
.offset_bdz_groupname = 0,
.nbytes_bdz_uid = 0,
.offset_bdz_uid = 0,
.nbytes_bdz_username = 0,
.offset_bdz_username = 0,
.nbytes_idx_gid2group = 0,
.offset_idx_gid2group = 0,
.nbytes_idx_groupname2group = 0,
.offset_idx_groupname2group = 0,
.nbytes_idx_uid2user = 0,
.offset_idx_uid2user = 0,
.nbytes_idx_name2user = 0,
.offset_idx_name2user = 0,
.nbytes_shell_index = 0,
.offset_shell_index = 0,
.nbytes_shell_blob = 0,
.offset_shell_blob = 0,
.nbytes_groups = 0,
.offset_groups = 0,
.nbytes_users = 0,
.offset_users = 0,
.nbytes_additional_gids = 0,
.offset_additional_gids = 0,
};
_ = info;
return 0;
}
const testing = std.testing;
test "trivial error: db file" {
const args = &[_][*:0]const u8{};
const allocator = testing.allocator;
var stderr = ArrayList(u8).init(allocator);
defer stderr.deinit();
var stdout = ArrayList(u8).init(allocator);
defer stdout.deinit();
const exit_code = execute(stdout.writer(), stderr.writer(), args[0..]);
try testing.expectEqual(@as(u8, 1), exit_code);
try testing.expectEqualStrings(
stderr.items,
"ERROR: failed to open './db.turbo': FileNotFound\n",
);
}

View File

@ -79,7 +79,7 @@ test "Section length is a power of two" {
try testing.expect(std.math.isPowerOfTwo(section_length));
}
test "Header fits into two section" {
test "Header fits into two sections" {
try testing.expect(@sizeOf(Header) == 2 * section_length);
}

View File

@ -17,4 +17,5 @@ test "turbonss test suite" {
// main
_ = @import("unix2db.zig");
_ = @import("analyze.zig");
}

View File

@ -15,7 +15,7 @@ const DB = @import("DB.zig");
const ErrCtx = @import("ErrCtx.zig");
const usage =
\\usage: turbonss-unix2db [options]
\\usage: turbo-unix2db [options]
\\
\\ -h Print this help message and exit
\\ --passwd Path to passwd file (default: ./passwd)
@ -62,22 +62,22 @@ fn execute(
return 1;
}
const passwdFname = result.argFlag("--passwd") orelse "./passwd";
const groupFname = result.argFlag("--group") orelse "./group";
const outFile = result.argFlag("--output") orelse "./db.turbo";
const passwd_fname = result.argFlag("--passwd") orelse "./passwd";
const group_fname = result.argFlag("--group") orelse "./group";
const out_fname = result.argFlag("--output") orelse "./db.turbo";
// to catch an error set file.OpenError, wait for
// https://github.com/ziglang/zig/issues/2473
var errc = ErrCtx{};
var passwdFile = fs.cwd().openFile(passwdFname, .{ .mode = .read_only }) catch |err|
return fail(errc.wrapf("open '{s}'", .{passwdFname}), stderr, err);
defer passwdFile.close();
var passwd_file = fs.cwd().openFile(passwd_fname, .{ .mode = .read_only }) catch |err|
return fail(errc.wrapf("open '{s}'", .{passwd_fname}), stderr, err);
defer passwd_file.close();
var groupFile = fs.cwd().openFile(groupFname, .{ .mode = .read_only }) catch |err|
return fail(errc.wrapf("open '{s}'", .{groupFname}), stderr, err);
defer groupFile.close();
var group_file = fs.cwd().openFile(group_fname, .{ .mode = .read_only }) catch |err|
return fail(errc.wrapf("open '{s}'", .{group_fname}), stderr, err);
defer group_file.close();
var passwdReader = io.bufferedReader(passwdFile.reader()).reader();
var passwdReader = io.bufferedReader(passwd_file.reader()).reader();
var users = User.fromReader(allocator, &errc, passwdReader) catch |err|
return fail(errc.wrap("read users"), stderr, err);
defer {
@ -85,7 +85,7 @@ fn execute(
allocator.free(users);
}
var groupReader = io.bufferedReader(groupFile.reader()).reader();
var groupReader = io.bufferedReader(group_file.reader()).reader();
var groups = Group.fromReader(allocator, groupReader) catch |err|
return fail(errc.wrap("read groups"), stderr, err);
defer {
@ -101,14 +101,14 @@ fn execute(
return fail(errc.wrap("construct DB from corpus"), stderr, err);
defer db.deinit(allocator);
const fd = os.open(outFile, os.O.WRONLY | os.O.TRUNC | os.O.CREAT, 0644) catch |err|
return fail(errc.wrapf("open for writing {s}", .{outFile}), stderr, err);
const fd = os.open(out_fname, os.O.WRONLY | os.O.TRUNC | os.O.CREAT, 0644) catch |err|
return fail(errc.wrapf("open for writing {s}", .{out_fname}), stderr, err);
errdefer os.close(fd);
const len = os.writev(fd, db.iov().constSlice()) catch |err|
return fail(errc.wrapf("writev to {s}", .{outFile}), stderr, err);
return fail(errc.wrapf("writev to {s}", .{out_fname}), stderr, err);
os.fsync(fd) catch |err|
return fail(errc.wrapf("fsync {s}", .{outFile}), stderr, err);
return fail(errc.wrapf("fsync {s}", .{out_fname}), stderr, err);
os.close(fd);
stderr.print("total {d} bytes. groups={d} users={d}\n", .{