weekend changes

- move main.zig to it's own package, create lib/ - rename AllSections to DB, remove intermediate tuples - iovec does not allocate - remove error{Overflow} from almost everywhere
2022-03-22 08:57:57 +02:00
parent 886382d900
commit a8b45911aa
14 changed files with 131 additions and 146 deletions
--- a/build.zig
+++ b/build.zig
@@ -50,24 +50,19 @@ pub fn build(b: *zbs.Builder) void {
    cmph.addIncludeDir("deps/cmph/src");
    cmph.addIncludeDir("include/deps/cmph");

-    const exe = b.addExecutable("init-exe", "src/main.zig");
+    {
+        const exe = b.addExecutable("turbo-unix2db", "cli/unix2db/main.zig");
        exe.setTarget(target);
        exe.setBuildMode(mode);
        addCmphDeps(exe, cmph);
        exe.install();
-
-    {
-        const turbonss_test = b.addTest("src/test_main.zig");
-        addCmphDeps(turbonss_test, cmph);
-        const test_step = b.step("test", "Run the tests");
-        test_step.dependOn(&turbonss_test.step);
    }

    {
-        const run_cmd = exe.run();
-        run_cmd.step.dependOn(b.getInstallStep());
-        const run_step = b.step("run", "Run the app");
-        run_step.dependOn(&run_cmd.step);
+        const turbonss_test = b.addTest("lib/test_all.zig");
+        addCmphDeps(turbonss_test, cmph);
+        const test_step = b.step("test", "Run the tests");
+        test_step.dependOn(&turbonss_test.step);
    }
 }

--- a/cli/unix2db/main.zig
+++ b/cli/unix2db/main.zig
--- a/lib/bdz.zig
+++ b/lib/bdz.zig
--- a/lib/cmph.zig
+++ b/lib/cmph.zig
@@ -2,6 +2,7 @@ const std = @import("std");
 const Allocator = std.mem.Allocator;
 const math = std.math;
 const sort = std.sort;
+const assert = std.debug.assert;

 const bdz = @import("bdz.zig");

@@ -33,9 +34,9 @@ extern fn cmph_destroy(mphf: [*]const u8) void;
 // pack packs cmph hashes for the given input and returns a slice ("cmph pack
 // minus first 4 bytes") for further storage. The slice must be freed by the
 // caller.
-pub const Error = error{ OutOfMemory, Overflow };
-pub fn pack(allocator: Allocator, input: [][*:0]const u8) Error![]const u8 {
-    const input_len = try math.cast(c_uint, input.len);
+pub fn pack(allocator: Allocator, input: [][*:0]const u8) error{OutOfMemory}![]const u8 {
+    assert(input.len <= math.maxInt(c_uint));
+    const input_len = @intCast(c_uint, input.len);
    var source = cmph_io_vector_adapter(input.ptr, input_len);
    defer cmph_io_vector_adapter_destroy(source);
    var config = cmph_config_new(source) orelse return error.OutOfMemory;
@@ -53,7 +54,7 @@ pub fn pack(allocator: Allocator, input: [][*:0]const u8) Error![]const u8 {
 }

 // perfect-hash a list of numbers and return the packed mphf
-pub fn packU32(allocator: Allocator, numbers: []const u32) Error![]const u8 {
+pub fn packU32(allocator: Allocator, numbers: []const u32) error{OutOfMemory}![]const u8 {
    var keys: [][6]u8 = try allocator.alloc([6]u8, numbers.len);
    defer allocator.free(keys);
    for (numbers) |n, i|
@@ -67,7 +68,7 @@ pub fn packU32(allocator: Allocator, numbers: []const u32) Error![]const u8 {
 }

 // perfect-hash a list of strings and return the packed mphf
-pub fn packStr(allocator: Allocator, strings: []const []const u8) Error![]const u8 {
+pub fn packStr(allocator: Allocator, strings: []const []const u8) error{OutOfMemory}![]const u8 {
    var arena = std.heap.ArenaAllocator.init(allocator);
    defer arena.deinit();
    var keys = try arena.allocator().alloc([*:0]const u8, strings.len);
--- a/lib/compress.zig
+++ b/lib/compress.zig
--- a/lib/group.zig
+++ b/lib/group.zig
@@ -115,11 +115,10 @@ pub const PackedGroup = struct {
        return self.groupdata;
    }

-    const packErr = validate.InvalidRecord || Allocator.Error || error{Overflow};
    pub fn packTo(
        arr: *ArrayList(u8),
        group: GroupStored,
-    ) packErr!void {
+    ) error{ InvalidRecord, OutOfMemory }!void {
        std.debug.assert(arr.items.len & 7 == 0);
        try validate.utf8(group.name);
        const len = try validate.downCast(u5, group.name.len - 1);
--- a/lib/header.zig
+++ b/lib/header.zig
--- a/lib/padding.zig
+++ b/lib/padding.zig
--- a/lib/sections.zig
+++ b/lib/sections.zig
@@ -3,6 +3,7 @@ const os = std.os;
 const fmt = std.fmt;
 const mem = std.mem;
 const math = std.math;
+const meta = std.meta;
 const sort = std.sort;
 const assert = std.debug.assert;
 const unicode = std.unicode;
@@ -14,6 +15,7 @@ const MultiArrayList = std.MultiArrayList;
 const StringHashMap = std.StringHashMap;
 const AutoHashMap = std.AutoHashMap;
 const BufSet = std.BufSet;
+const BoundedArray = std.BoundedArray;

 const pad = @import("padding.zig");
 const compress = @import("compress.zig");
@@ -51,7 +53,10 @@ const Corpus = struct {
        baseAllocator: Allocator,
        usersConst: []const User,
        groupsConst: []const Group,
-    ) error{ OutOfMemory, InvalidUtf8, Duplicate, NotFound }!Corpus {
+    ) error{ OutOfMemory, InvalidUtf8, Duplicate, NotFound, TooMany }!Corpus {
+        if (usersConst.len >= math.maxInt(u32)) return error.TooMany;
+        if (groupsConst.len >= math.maxInt(u32)) return error.TooMany;
+
        var arena = ArenaAllocator.init(baseAllocator);
        var allocator = arena.allocator();
        errdefer arena.deinit();
@@ -145,7 +150,7 @@ const Corpus = struct {
 pub fn shellSections(
    allocator: Allocator,
    corpus: *const Corpus,
-) error{ OutOfMemory, Overflow }!ShellSections {
+) error{OutOfMemory}!ShellSections {
    var popcon = ShellWriter.init(allocator);
    for (corpus.users.items(.shell)) |shell|
        try popcon.put(shell);
@@ -169,10 +174,10 @@ pub const AdditionalGids = struct {
    }
 };

-pub fn userGids(
+pub fn additionalGids(
    allocator: Allocator,
    corpus: *const Corpus,
-) error{ OutOfMemory, Overflow }!AdditionalGids {
+) error{OutOfMemory}!AdditionalGids {
    var blob = ArrayList(u8).init(allocator);
    errdefer blob.deinit();
    var idx2offset = try allocator.alloc(u64, corpus.users.len);
@@ -227,7 +232,7 @@ pub fn usersSection(
    corpus: *const Corpus,
    gids: *const AdditionalGids,
    shells: *const ShellSections,
-) error{ OutOfMemory, Overflow, InvalidRecord }!UsersSection {
+) error{ OutOfMemory, InvalidRecord, TooMany }!UsersSection {
    var idx2offset = try allocator.alloc(u32, corpus.users.len);
    errdefer allocator.free(idx2offset);
    // as of writing each user takes 12 bytes + blobs + padding, padded to
@@ -238,7 +243,9 @@ pub fn usersSection(
    while (i < corpus.users.len) : (i += 1) {
        // TODO: this is inefficient by calling `.slice()` on every iteration
        const user = corpus.users.get(i);
-        const user_offset = try math.cast(u35, blob.items.len);
+        const user_offset = math.cast(u35, blob.items.len) catch |err| switch (err) {
+            error.Overflow => return error.TooMany,
+        };
        assert(user_offset & 7 == 0);
        idx2offset[i] = @truncate(u32, user_offset >> 3);
        try PackedUser.packTo(
@@ -327,7 +334,7 @@ pub fn groupsSection(
    allocator: Allocator,
    corpus: *const Corpus,
    members_offset: []const u64,
-) error{ OutOfMemory, Overflow, InvalidRecord }!GroupsSection {
+) error{ OutOfMemory, InvalidRecord }!GroupsSection {
    var idx2offset = try allocator.alloc(u32, corpus.groups.len);
    errdefer allocator.free(idx2offset);

@@ -338,7 +345,7 @@ pub fn groupsSection(
    while (i < corpus.groups.len) : (i += 1) {
        // TODO: this is inefficient; it's calling `.slice()` on every iteration
        const group = corpus.groups.get(i);
-        const group_offset = try math.cast(u32, blob.items.len);
+        const group_offset = @intCast(u32, blob.items.len);
        assert(group_offset & 7 == 0);
        idx2offset[i] = @truncate(u32, group_offset >> 3);
        const group_stored = GroupStored{
@@ -416,29 +423,28 @@ fn nblocks(comptime T: type, arr: []const u8) T {
    return @truncate(T, upper >> 6);
 }

-pub const AllSections = struct {
-    allocator: Allocator,
-
+pub const DB = struct {
+    // All sections, as they end up in the DB. Order is important.
+    header: []const u8,
    bdz_gid: []const u8,
    bdz_groupname: []const u8,
    bdz_uid: []const u8,
    bdz_username: []const u8,
-    users: UsersSection,
-    shell_sections: ShellSections,
-    shell_reader: ShellReader,
-    additional_gids: AdditionalGids,
-    groupmembers: GroupMembers,
-    groups: GroupsSection,
    idx_gid2group: []const u32,
    idx_groupname2group: []const u32,
    idx_uid2user: []const u32,
    idx_name2user: []const u32,
-    header: []const u8,
+    shell_index: []const u16,
+    shell_blob: []const u8,
+    groups: []const u8,
+    users: []const u8,
+    groupmembers: []const u8,
+    additional_gids: []const u8,

-    pub fn init(
+    pub fn fromCorpus(
        allocator: Allocator,
        corpus: *const Corpus,
-    ) error{ Overflow, OutOfMemory, InvalidRecord }!AllSections {
+    ) error{ OutOfMemory, InvalidRecord, TooMany }!DB {
        const gids = corpus.groups.items(.gid);
        const gnames = corpus.groups.items(.name);
        const uids = corpus.users.items(.uid);
@@ -457,30 +463,34 @@ pub const AllSections = struct {
        errdefer allocator.free(bdz_username);

        var shell = try shellSections(allocator, corpus);
-        errdefer shell.deinit();
+        defer shell.deinit();

-        var additional_gids = try userGids(allocator, corpus);
-        errdefer additional_gids.deinit(allocator);
+        var additional_gids = try additionalGids(allocator, corpus);
+        errdefer allocator.free(additional_gids.blob);

        var users = try usersSection(allocator, corpus, &additional_gids, &shell);
-        errdefer users.deinit(allocator);
+        allocator.free(additional_gids.idx2offset);
+        errdefer allocator.free(users.blob);

        var groupmembers = try groupMembers(allocator, corpus, users.idx2offset);
-        errdefer groupmembers.deinit(allocator);
+        errdefer allocator.free(groupmembers.blob);

        var groups = try groupsSection(allocator, corpus, groupmembers.idx2offset);
-        errdefer groups.deinit(allocator);
+        allocator.free(groupmembers.idx2offset);
+        errdefer allocator.free(groups.blob);

        var idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids, groups.idx2offset);
        errdefer allocator.free(idx_gid2group);

        var idx_groupname2group = try bdzIdx([]const u8, allocator, bdz_groupname, gnames, groups.idx2offset);
+        allocator.free(groups.idx2offset);
        errdefer allocator.free(idx_groupname2group);

        var idx_uid2user = try bdzIdx(u32, allocator, bdz_uid, uids, users.idx2offset);
        errdefer allocator.free(idx_uid2user);

        var idx_name2user = try bdzIdx([]const u8, allocator, bdz_username, unames, users.idx2offset);
+        allocator.free(users.idx2offset);
        errdefer allocator.free(idx_name2user);

        const header = Header{
@@ -498,59 +508,38 @@ pub const AllSections = struct {
            .nblocks_additional_gids = nblocks(u64, additional_gids.blob),
        };

-        return AllSections{
-            .allocator = allocator,
+        return DB{
+            .header = header.asBytes(),
            .bdz_gid = bdz_gid,
            .bdz_groupname = bdz_groupname,
            .bdz_uid = bdz_uid,
            .bdz_username = bdz_username,
-            .shell_sections = shell,
-            .shell_reader = ShellReader.init(
-                mem.sliceAsBytes(shell.index.constSlice()),
-                mem.sliceAsBytes(shell.blob.constSlice()),
-            ),
-            .additional_gids = additional_gids,
-            .users = users,
-            .groupmembers = groupmembers,
-            .groups = groups,
            .idx_gid2group = idx_gid2group,
            .idx_groupname2group = idx_groupname2group,
            .idx_uid2user = idx_uid2user,
            .idx_name2user = idx_name2user,
-            .header = header.asBytes(),
+            .shell_index = shell.index.constSlice(),
+            .shell_blob = shell.blob.constSlice(),
+            .groups = groups.blob,
+            .users = users.blob,
+            .groupmembers = groupmembers.blob,
+            .additional_gids = additional_gids.blob,
        };
    }

-    pub fn iov(self: *const AllSections) error{OutOfMemory}![]os.iovec_const {
-        const sections = &[_][]const u8{
-            self.header,
-            self.bdz_gid,
-            self.bdz_groupname,
-            self.bdz_uid,
-            self.bdz_username,
-            mem.sliceAsBytes(self.idx_gid2group),
-            mem.sliceAsBytes(self.idx_groupname2group),
-            mem.sliceAsBytes(self.idx_uid2user),
-            mem.sliceAsBytes(self.idx_name2user),
-            mem.sliceAsBytes(self.shell_sections.index.constSlice()),
-            mem.sliceAsBytes(self.shell_sections.blob.constSlice()),
-            self.groups.blob,
-            self.users.blob,
-            self.groupmembers.blob,
-            self.additional_gids.blob,
+    pub fn iov(self: *const DB) error{OutOfMemory}![]const os.iovec_const {
+        const fields = comptime meta.fieldNames(DB);
+        var result = BoundedArray(os.iovec_const, fields.len * 2).init(0) catch |err| switch (err) {
+            error.Overflow => unreachable,
        };
-        var result = try ArrayList(os.iovec_const).initCapacity(
-            self.allocator,
-            sections.len * 2,
-        );
-        errdefer result.deinit();

-        for (sections) |section| {
+        inline for (fields) |fname| {
+            const bytes = mem.sliceAsBytes(@field(self, fname));
            result.appendAssumeCapacity(os.iovec_const{
-                .iov_base = section.ptr,
-                .iov_len = section.len,
+                .iov_base = bytes.ptr,
+                .iov_len = bytes.len,
            });
-            const padding = pad.until(usize, section_length_bits, section.len);
+            const padding = pad.until(usize, section_length_bits, bytes.len);
            if (padding != 0)
                result.appendAssumeCapacity(.{
                    .iov_base = zeroes,
@@ -558,23 +547,22 @@ pub const AllSections = struct {
                });
        }

-        return result.toOwnedSlice();
+        return result.constSlice();
    }

-    pub fn deinit(self: *AllSections) void {
-        self.allocator.free(self.bdz_gid);
-        self.allocator.free(self.bdz_groupname);
-        self.allocator.free(self.bdz_uid);
-        self.allocator.free(self.bdz_username);
-        self.shell_sections.deinit();
-        self.additional_gids.deinit(self.allocator);
-        self.users.deinit(self.allocator);
-        self.groupmembers.deinit(self.allocator);
-        self.groups.deinit(self.allocator);
-        self.allocator.free(self.idx_gid2group);
-        self.allocator.free(self.idx_groupname2group);
-        self.allocator.free(self.idx_uid2user);
-        self.allocator.free(self.idx_name2user);
+    pub fn deinit(self: *DB, allocator: Allocator) void {
+        allocator.free(self.bdz_gid);
+        allocator.free(self.bdz_groupname);
+        allocator.free(self.bdz_uid);
+        allocator.free(self.bdz_username);
+        allocator.free(self.idx_gid2group);
+        allocator.free(self.idx_groupname2group);
+        allocator.free(self.idx_uid2user);
+        allocator.free(self.idx_name2user);
+        allocator.free(self.groups);
+        allocator.free(self.users);
+        allocator.free(self.groupmembers);
+        allocator.free(self.additional_gids);
        self.* = undefined;
    }
 };
@@ -704,46 +692,48 @@ test "test groups, group members and users" {
    var corpus = try testCorpus(allocator);
    defer corpus.deinit();

-    var sections = try AllSections.init(allocator, &corpus);
-    defer sections.deinit();
+    var db = try DB.fromCorpus(allocator, &corpus);
+    defer db.deinit(allocator);

-    const blob = sections.groupmembers.blob;
-    var i: usize = 0;
-    while (i < corpus.groups.len) : (i += 1) {
-        const offset = sections.groupmembers.idx2offset[i];
-        var vit = try compress.VarintSliceIterator(blob[offset..]);
-        var it = compress.DeltaDecompressionIterator(&vit);
-        for (corpus.group2users[i]) |user_idx| {
-            const got_user_offset = (try it.next()).?;
-            const want_user_offset = sections.users.idx2offset[user_idx];
-            try testing.expectEqual(got_user_offset, want_user_offset);
-        }
-        try testing.expectEqual(it.next(), null);
-    }
+    // TODO: replace with an integration test when high-level
+    // reader API is present
+    //const blob = sections.groupmembers.blob;
+    //var i: usize = 0;
+    //while (i < corpus.groups.len) : (i += 1) {
+    //const offset = sections.groupmembers.idx2offset[i];
+    //var vit = try compress.VarintSliceIterator(blob[offset..]);
+    //var it = compress.DeltaDecompressionIterator(&vit);
+    //for (corpus.group2users[i]) |user_idx| {
+    //    const got_user_offset = (try it.next()).?;
+    //    const want_user_offset = sections.users.idx2offset[user_idx];
+    //    try testing.expectEqual(got_user_offset, want_user_offset);
+    //}
+    //try testing.expectEqual(it.next(), null);
+    //}

-    var it = PackedUser.iterator(sections.users.blob, sections.shell_reader);
-    i = 0;
-    while (i < corpus.users.len) : (i += 1) {
-        const got = (try it.next()).?;
-        const user = corpus.users.get(i);
-        try testing.expectEqual(user.uid, got.uid());
-        try testing.expectEqual(user.gid, got.gid());
-        try testing.expectEqualStrings(user.name, got.name());
-        try testing.expectEqualStrings(user.gecos, got.gecos());
-        try testing.expectEqualStrings(user.home, got.home());
-        try testing.expectEqualStrings(user.shell, got.shell(sections.shell_reader));
-    }
+    //var it = PackedUser.iterator(sections.users.blob, sections.shell_reader);
+    //i = 0;
+    //while (i < corpus.users.len) : (i += 1) {
+    //    const got = (try it.next()).?;
+    //    const user = corpus.users.get(i);
+    //    try testing.expectEqual(user.uid, got.uid());
+    //    try testing.expectEqual(user.gid, got.gid());
+    //    try testing.expectEqualStrings(user.name, got.name());
+    //    try testing.expectEqualStrings(user.gecos, got.gecos());
+    //    try testing.expectEqualStrings(user.home, got.home());
+    //    try testing.expectEqualStrings(user.shell, got.shell(sections.shell_reader));
+    //}

-    var iovec = try sections.iov();
-    allocator.free(iovec);
+    var iovec = try db.iov();
+    _ = iovec;
 }

-test "userGids" {
+test "additionalGids" {
    const allocator = testing.allocator;
    var corpus = try testCorpus(allocator);
    defer corpus.deinit();

-    var additional_gids = try userGids(allocator, &corpus);
+    var additional_gids = try additionalGids(allocator, &corpus);
    defer additional_gids.deinit(allocator);

    var user_idx: usize = 0;
--- a/lib/shell.zig
+++ b/lib/shell.zig
@@ -5,6 +5,7 @@ const StringArrayHashMap = std.StringArrayHashMap;
 const StringHashMap = std.StringHashMap;
 const BoundedArray = std.BoundedArray;
 const StringContext = std.hash_map.StringContext;
+const assert = std.debug.assert;

 pub const max_shells = 255;
 pub const max_shell_len = 256;
@@ -55,11 +56,12 @@ pub const ShellWriter = struct {
        pub fn init(
            allocator: Allocator,
            shells: BoundedArray([]const u8, max_shells),
-        ) error{ Overflow, OutOfMemory }!ShellSections {
+        ) error{OutOfMemory}!ShellSections {
+            assert(shells.len <= max_shells);
            var self = ShellSections{
                .len = @intCast(u8, shells.len),
-                .index = try BoundedArray(u16, max_shells).init(shells.len),
-                .blob = try BoundedArray(u8, (max_shells + 1) * max_shell_len).init(0),
+                .index = BoundedArray(u16, max_shells).init(shells.len) catch unreachable,
+                .blob = BoundedArray(u8, (max_shells + 1) * max_shell_len).init(0) catch unreachable,
                .shell2idx = StringHashMap(u8).init(allocator),
            };
            if (shells.len == 0) return self;
@@ -68,11 +70,11 @@ pub const ShellWriter = struct {
            for (shells.constSlice()) |shell, idx| {
                const idx8 = @intCast(u8, idx);
                const offset = @intCast(u16, self.blob.len);
-                try self.blob.appendSlice(shell);
+                self.blob.appendSliceAssumeCapacity(shell);
                try self.shell2idx.put(self.blob.constSlice()[offset..], idx8);
                self.index.set(idx8, offset);
            }
-            try self.index.append(@intCast(u8, self.blob.len));
+            self.index.appendAssumeCapacity(@intCast(u8, self.blob.len));
            return self;
        }

@@ -126,10 +128,9 @@ pub const ShellWriter = struct {
    // toOwnedSections returns the analyzed ShellSections. Resets the shell
    // popularity contest. ShellSections memory is allocated by the ShellWriter
    // allocator, and must be deInit'ed by the caller.
-    pub fn toOwnedSections(
-        self: *ShellWriter,
-        limit: u10,
-    ) error{ Overflow, OutOfMemory }!ShellSections {
+    pub fn toOwnedSections(self: *ShellWriter, limit: u10) error{OutOfMemory}!ShellSections {
+        assert(limit <= max_shells);
+
        var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
        defer deque.deinit();

@@ -142,7 +143,9 @@ pub const ShellWriter = struct {
        }

        const total = std.math.min(deque.count(), limit);
-        var topShells = try BoundedArray([]const u8, max_shells).init(total);
+        var topShells = BoundedArray([]const u8, max_shells).init(total) catch |err| switch (err) {
+            error.Overflow => unreachable,
+        };

        var i: u32 = 0;
        while (i < total) : (i += 1)
--- a/lib/so.zig
+++ b/lib/so.zig
--- a/src/test_main.zig
+++ b/src/test_main.zig
@@ -1,5 +1,4 @@
 test "turbonss test suite" {
-    _ = @import("main.zig");
    _ = @import("header.zig");
    _ = @import("so.zig");
    _ = @import("sections.zig");
--- a/lib/user.zig
+++ b/lib/user.zig
--- a/lib/validate.zig
+++ b/lib/validate.zig
@@ -1,8 +1,6 @@
 const std = @import("std");

-pub const InvalidRecord = error{InvalidRecord};
-
-pub fn downCast(comptime T: type, n: u64) InvalidRecord!T {
+pub fn downCast(comptime T: type, n: u64) error{InvalidRecord}!T {
    return std.math.cast(T, n) catch |err| switch (err) {
        error.Overflow => {
            return error.InvalidRecord;
@@ -10,7 +8,7 @@ pub fn downCast(comptime T: type, n: u64) InvalidRecord!T {
    };
 }

-pub fn utf8(s: []const u8) InvalidRecord!void {
+pub fn utf8(s: []const u8) error{InvalidRecord}!void {
    if (!std.unicode.utf8ValidateSlice(s)) {
        return error.InvalidRecord;
    }