weekend changes

- move main.zig to it's own package, create lib/
- rename AllSections to DB, remove intermediate tuples
- iovec does not allocate
- remove error{Overflow} from almost everywhere
This commit is contained in:
Motiejus Jakštys 2022-03-22 08:57:57 +02:00 committed by Motiejus Jakštys
parent 886382d900
commit a8b45911aa
14 changed files with 131 additions and 146 deletions

View File

@ -50,24 +50,19 @@ pub fn build(b: *zbs.Builder) void {
cmph.addIncludeDir("deps/cmph/src");
cmph.addIncludeDir("include/deps/cmph");
const exe = b.addExecutable("init-exe", "src/main.zig");
{
const exe = b.addExecutable("turbo-unix2db", "cli/unix2db/main.zig");
exe.setTarget(target);
exe.setBuildMode(mode);
addCmphDeps(exe, cmph);
exe.install();
{
const turbonss_test = b.addTest("src/test_main.zig");
addCmphDeps(turbonss_test, cmph);
const test_step = b.step("test", "Run the tests");
test_step.dependOn(&turbonss_test.step);
}
{
const run_cmd = exe.run();
run_cmd.step.dependOn(b.getInstallStep());
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
const turbonss_test = b.addTest("lib/test_all.zig");
addCmphDeps(turbonss_test, cmph);
const test_step = b.step("test", "Run the tests");
test_step.dependOn(&turbonss_test.step);
}
}

View File

@ -2,6 +2,7 @@ const std = @import("std");
const Allocator = std.mem.Allocator;
const math = std.math;
const sort = std.sort;
const assert = std.debug.assert;
const bdz = @import("bdz.zig");
@ -33,9 +34,9 @@ extern fn cmph_destroy(mphf: [*]const u8) void;
// pack packs cmph hashes for the given input and returns a slice ("cmph pack
// minus first 4 bytes") for further storage. The slice must be freed by the
// caller.
pub const Error = error{ OutOfMemory, Overflow };
pub fn pack(allocator: Allocator, input: [][*:0]const u8) Error![]const u8 {
const input_len = try math.cast(c_uint, input.len);
pub fn pack(allocator: Allocator, input: [][*:0]const u8) error{OutOfMemory}![]const u8 {
assert(input.len <= math.maxInt(c_uint));
const input_len = @intCast(c_uint, input.len);
var source = cmph_io_vector_adapter(input.ptr, input_len);
defer cmph_io_vector_adapter_destroy(source);
var config = cmph_config_new(source) orelse return error.OutOfMemory;
@ -53,7 +54,7 @@ pub fn pack(allocator: Allocator, input: [][*:0]const u8) Error![]const u8 {
}
// perfect-hash a list of numbers and return the packed mphf
pub fn packU32(allocator: Allocator, numbers: []const u32) Error![]const u8 {
pub fn packU32(allocator: Allocator, numbers: []const u32) error{OutOfMemory}![]const u8 {
var keys: [][6]u8 = try allocator.alloc([6]u8, numbers.len);
defer allocator.free(keys);
for (numbers) |n, i|
@ -67,7 +68,7 @@ pub fn packU32(allocator: Allocator, numbers: []const u32) Error![]const u8 {
}
// perfect-hash a list of strings and return the packed mphf
pub fn packStr(allocator: Allocator, strings: []const []const u8) Error![]const u8 {
pub fn packStr(allocator: Allocator, strings: []const []const u8) error{OutOfMemory}![]const u8 {
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
var keys = try arena.allocator().alloc([*:0]const u8, strings.len);

View File

@ -115,11 +115,10 @@ pub const PackedGroup = struct {
return self.groupdata;
}
const packErr = validate.InvalidRecord || Allocator.Error || error{Overflow};
pub fn packTo(
arr: *ArrayList(u8),
group: GroupStored,
) packErr!void {
) error{ InvalidRecord, OutOfMemory }!void {
std.debug.assert(arr.items.len & 7 == 0);
try validate.utf8(group.name);
const len = try validate.downCast(u5, group.name.len - 1);

View File

@ -3,6 +3,7 @@ const os = std.os;
const fmt = std.fmt;
const mem = std.mem;
const math = std.math;
const meta = std.meta;
const sort = std.sort;
const assert = std.debug.assert;
const unicode = std.unicode;
@ -14,6 +15,7 @@ const MultiArrayList = std.MultiArrayList;
const StringHashMap = std.StringHashMap;
const AutoHashMap = std.AutoHashMap;
const BufSet = std.BufSet;
const BoundedArray = std.BoundedArray;
const pad = @import("padding.zig");
const compress = @import("compress.zig");
@ -51,7 +53,10 @@ const Corpus = struct {
baseAllocator: Allocator,
usersConst: []const User,
groupsConst: []const Group,
) error{ OutOfMemory, InvalidUtf8, Duplicate, NotFound }!Corpus {
) error{ OutOfMemory, InvalidUtf8, Duplicate, NotFound, TooMany }!Corpus {
if (usersConst.len >= math.maxInt(u32)) return error.TooMany;
if (groupsConst.len >= math.maxInt(u32)) return error.TooMany;
var arena = ArenaAllocator.init(baseAllocator);
var allocator = arena.allocator();
errdefer arena.deinit();
@ -145,7 +150,7 @@ const Corpus = struct {
pub fn shellSections(
allocator: Allocator,
corpus: *const Corpus,
) error{ OutOfMemory, Overflow }!ShellSections {
) error{OutOfMemory}!ShellSections {
var popcon = ShellWriter.init(allocator);
for (corpus.users.items(.shell)) |shell|
try popcon.put(shell);
@ -169,10 +174,10 @@ pub const AdditionalGids = struct {
}
};
pub fn userGids(
pub fn additionalGids(
allocator: Allocator,
corpus: *const Corpus,
) error{ OutOfMemory, Overflow }!AdditionalGids {
) error{OutOfMemory}!AdditionalGids {
var blob = ArrayList(u8).init(allocator);
errdefer blob.deinit();
var idx2offset = try allocator.alloc(u64, corpus.users.len);
@ -227,7 +232,7 @@ pub fn usersSection(
corpus: *const Corpus,
gids: *const AdditionalGids,
shells: *const ShellSections,
) error{ OutOfMemory, Overflow, InvalidRecord }!UsersSection {
) error{ OutOfMemory, InvalidRecord, TooMany }!UsersSection {
var idx2offset = try allocator.alloc(u32, corpus.users.len);
errdefer allocator.free(idx2offset);
// as of writing each user takes 12 bytes + blobs + padding, padded to
@ -238,7 +243,9 @@ pub fn usersSection(
while (i < corpus.users.len) : (i += 1) {
// TODO: this is inefficient by calling `.slice()` on every iteration
const user = corpus.users.get(i);
const user_offset = try math.cast(u35, blob.items.len);
const user_offset = math.cast(u35, blob.items.len) catch |err| switch (err) {
error.Overflow => return error.TooMany,
};
assert(user_offset & 7 == 0);
idx2offset[i] = @truncate(u32, user_offset >> 3);
try PackedUser.packTo(
@ -327,7 +334,7 @@ pub fn groupsSection(
allocator: Allocator,
corpus: *const Corpus,
members_offset: []const u64,
) error{ OutOfMemory, Overflow, InvalidRecord }!GroupsSection {
) error{ OutOfMemory, InvalidRecord }!GroupsSection {
var idx2offset = try allocator.alloc(u32, corpus.groups.len);
errdefer allocator.free(idx2offset);
@ -338,7 +345,7 @@ pub fn groupsSection(
while (i < corpus.groups.len) : (i += 1) {
// TODO: this is inefficient; it's calling `.slice()` on every iteration
const group = corpus.groups.get(i);
const group_offset = try math.cast(u32, blob.items.len);
const group_offset = @intCast(u32, blob.items.len);
assert(group_offset & 7 == 0);
idx2offset[i] = @truncate(u32, group_offset >> 3);
const group_stored = GroupStored{
@ -416,29 +423,28 @@ fn nblocks(comptime T: type, arr: []const u8) T {
return @truncate(T, upper >> 6);
}
pub const AllSections = struct {
allocator: Allocator,
pub const DB = struct {
// All sections, as they end up in the DB. Order is important.
header: []const u8,
bdz_gid: []const u8,
bdz_groupname: []const u8,
bdz_uid: []const u8,
bdz_username: []const u8,
users: UsersSection,
shell_sections: ShellSections,
shell_reader: ShellReader,
additional_gids: AdditionalGids,
groupmembers: GroupMembers,
groups: GroupsSection,
idx_gid2group: []const u32,
idx_groupname2group: []const u32,
idx_uid2user: []const u32,
idx_name2user: []const u32,
header: []const u8,
shell_index: []const u16,
shell_blob: []const u8,
groups: []const u8,
users: []const u8,
groupmembers: []const u8,
additional_gids: []const u8,
pub fn init(
pub fn fromCorpus(
allocator: Allocator,
corpus: *const Corpus,
) error{ Overflow, OutOfMemory, InvalidRecord }!AllSections {
) error{ OutOfMemory, InvalidRecord, TooMany }!DB {
const gids = corpus.groups.items(.gid);
const gnames = corpus.groups.items(.name);
const uids = corpus.users.items(.uid);
@ -457,30 +463,34 @@ pub const AllSections = struct {
errdefer allocator.free(bdz_username);
var shell = try shellSections(allocator, corpus);
errdefer shell.deinit();
defer shell.deinit();
var additional_gids = try userGids(allocator, corpus);
errdefer additional_gids.deinit(allocator);
var additional_gids = try additionalGids(allocator, corpus);
errdefer allocator.free(additional_gids.blob);
var users = try usersSection(allocator, corpus, &additional_gids, &shell);
errdefer users.deinit(allocator);
allocator.free(additional_gids.idx2offset);
errdefer allocator.free(users.blob);
var groupmembers = try groupMembers(allocator, corpus, users.idx2offset);
errdefer groupmembers.deinit(allocator);
errdefer allocator.free(groupmembers.blob);
var groups = try groupsSection(allocator, corpus, groupmembers.idx2offset);
errdefer groups.deinit(allocator);
allocator.free(groupmembers.idx2offset);
errdefer allocator.free(groups.blob);
var idx_gid2group = try bdzIdx(u32, allocator, bdz_gid, gids, groups.idx2offset);
errdefer allocator.free(idx_gid2group);
var idx_groupname2group = try bdzIdx([]const u8, allocator, bdz_groupname, gnames, groups.idx2offset);
allocator.free(groups.idx2offset);
errdefer allocator.free(idx_groupname2group);
var idx_uid2user = try bdzIdx(u32, allocator, bdz_uid, uids, users.idx2offset);
errdefer allocator.free(idx_uid2user);
var idx_name2user = try bdzIdx([]const u8, allocator, bdz_username, unames, users.idx2offset);
allocator.free(users.idx2offset);
errdefer allocator.free(idx_name2user);
const header = Header{
@ -498,59 +508,38 @@ pub const AllSections = struct {
.nblocks_additional_gids = nblocks(u64, additional_gids.blob),
};
return AllSections{
.allocator = allocator,
return DB{
.header = header.asBytes(),
.bdz_gid = bdz_gid,
.bdz_groupname = bdz_groupname,
.bdz_uid = bdz_uid,
.bdz_username = bdz_username,
.shell_sections = shell,
.shell_reader = ShellReader.init(
mem.sliceAsBytes(shell.index.constSlice()),
mem.sliceAsBytes(shell.blob.constSlice()),
),
.additional_gids = additional_gids,
.users = users,
.groupmembers = groupmembers,
.groups = groups,
.idx_gid2group = idx_gid2group,
.idx_groupname2group = idx_groupname2group,
.idx_uid2user = idx_uid2user,
.idx_name2user = idx_name2user,
.header = header.asBytes(),
.shell_index = shell.index.constSlice(),
.shell_blob = shell.blob.constSlice(),
.groups = groups.blob,
.users = users.blob,
.groupmembers = groupmembers.blob,
.additional_gids = additional_gids.blob,
};
}
pub fn iov(self: *const AllSections) error{OutOfMemory}![]os.iovec_const {
const sections = &[_][]const u8{
self.header,
self.bdz_gid,
self.bdz_groupname,
self.bdz_uid,
self.bdz_username,
mem.sliceAsBytes(self.idx_gid2group),
mem.sliceAsBytes(self.idx_groupname2group),
mem.sliceAsBytes(self.idx_uid2user),
mem.sliceAsBytes(self.idx_name2user),
mem.sliceAsBytes(self.shell_sections.index.constSlice()),
mem.sliceAsBytes(self.shell_sections.blob.constSlice()),
self.groups.blob,
self.users.blob,
self.groupmembers.blob,
self.additional_gids.blob,
pub fn iov(self: *const DB) error{OutOfMemory}![]const os.iovec_const {
const fields = comptime meta.fieldNames(DB);
var result = BoundedArray(os.iovec_const, fields.len * 2).init(0) catch |err| switch (err) {
error.Overflow => unreachable,
};
var result = try ArrayList(os.iovec_const).initCapacity(
self.allocator,
sections.len * 2,
);
errdefer result.deinit();
for (sections) |section| {
inline for (fields) |fname| {
const bytes = mem.sliceAsBytes(@field(self, fname));
result.appendAssumeCapacity(os.iovec_const{
.iov_base = section.ptr,
.iov_len = section.len,
.iov_base = bytes.ptr,
.iov_len = bytes.len,
});
const padding = pad.until(usize, section_length_bits, section.len);
const padding = pad.until(usize, section_length_bits, bytes.len);
if (padding != 0)
result.appendAssumeCapacity(.{
.iov_base = zeroes,
@ -558,23 +547,22 @@ pub const AllSections = struct {
});
}
return result.toOwnedSlice();
return result.constSlice();
}
pub fn deinit(self: *AllSections) void {
self.allocator.free(self.bdz_gid);
self.allocator.free(self.bdz_groupname);
self.allocator.free(self.bdz_uid);
self.allocator.free(self.bdz_username);
self.shell_sections.deinit();
self.additional_gids.deinit(self.allocator);
self.users.deinit(self.allocator);
self.groupmembers.deinit(self.allocator);
self.groups.deinit(self.allocator);
self.allocator.free(self.idx_gid2group);
self.allocator.free(self.idx_groupname2group);
self.allocator.free(self.idx_uid2user);
self.allocator.free(self.idx_name2user);
pub fn deinit(self: *DB, allocator: Allocator) void {
allocator.free(self.bdz_gid);
allocator.free(self.bdz_groupname);
allocator.free(self.bdz_uid);
allocator.free(self.bdz_username);
allocator.free(self.idx_gid2group);
allocator.free(self.idx_groupname2group);
allocator.free(self.idx_uid2user);
allocator.free(self.idx_name2user);
allocator.free(self.groups);
allocator.free(self.users);
allocator.free(self.groupmembers);
allocator.free(self.additional_gids);
self.* = undefined;
}
};
@ -704,46 +692,48 @@ test "test groups, group members and users" {
var corpus = try testCorpus(allocator);
defer corpus.deinit();
var sections = try AllSections.init(allocator, &corpus);
defer sections.deinit();
var db = try DB.fromCorpus(allocator, &corpus);
defer db.deinit(allocator);
const blob = sections.groupmembers.blob;
var i: usize = 0;
while (i < corpus.groups.len) : (i += 1) {
const offset = sections.groupmembers.idx2offset[i];
var vit = try compress.VarintSliceIterator(blob[offset..]);
var it = compress.DeltaDecompressionIterator(&vit);
for (corpus.group2users[i]) |user_idx| {
const got_user_offset = (try it.next()).?;
const want_user_offset = sections.users.idx2offset[user_idx];
try testing.expectEqual(got_user_offset, want_user_offset);
}
try testing.expectEqual(it.next(), null);
// TODO: replace with an integration test when high-level
// reader API is present
//const blob = sections.groupmembers.blob;
//var i: usize = 0;
//while (i < corpus.groups.len) : (i += 1) {
//const offset = sections.groupmembers.idx2offset[i];
//var vit = try compress.VarintSliceIterator(blob[offset..]);
//var it = compress.DeltaDecompressionIterator(&vit);
//for (corpus.group2users[i]) |user_idx| {
// const got_user_offset = (try it.next()).?;
// const want_user_offset = sections.users.idx2offset[user_idx];
// try testing.expectEqual(got_user_offset, want_user_offset);
//}
//try testing.expectEqual(it.next(), null);
//}
//var it = PackedUser.iterator(sections.users.blob, sections.shell_reader);
//i = 0;
//while (i < corpus.users.len) : (i += 1) {
// const got = (try it.next()).?;
// const user = corpus.users.get(i);
// try testing.expectEqual(user.uid, got.uid());
// try testing.expectEqual(user.gid, got.gid());
// try testing.expectEqualStrings(user.name, got.name());
// try testing.expectEqualStrings(user.gecos, got.gecos());
// try testing.expectEqualStrings(user.home, got.home());
// try testing.expectEqualStrings(user.shell, got.shell(sections.shell_reader));
//}
var iovec = try db.iov();
_ = iovec;
}
var it = PackedUser.iterator(sections.users.blob, sections.shell_reader);
i = 0;
while (i < corpus.users.len) : (i += 1) {
const got = (try it.next()).?;
const user = corpus.users.get(i);
try testing.expectEqual(user.uid, got.uid());
try testing.expectEqual(user.gid, got.gid());
try testing.expectEqualStrings(user.name, got.name());
try testing.expectEqualStrings(user.gecos, got.gecos());
try testing.expectEqualStrings(user.home, got.home());
try testing.expectEqualStrings(user.shell, got.shell(sections.shell_reader));
}
var iovec = try sections.iov();
allocator.free(iovec);
}
test "userGids" {
test "additionalGids" {
const allocator = testing.allocator;
var corpus = try testCorpus(allocator);
defer corpus.deinit();
var additional_gids = try userGids(allocator, &corpus);
var additional_gids = try additionalGids(allocator, &corpus);
defer additional_gids.deinit(allocator);
var user_idx: usize = 0;

View File

@ -5,6 +5,7 @@ const StringArrayHashMap = std.StringArrayHashMap;
const StringHashMap = std.StringHashMap;
const BoundedArray = std.BoundedArray;
const StringContext = std.hash_map.StringContext;
const assert = std.debug.assert;
pub const max_shells = 255;
pub const max_shell_len = 256;
@ -55,11 +56,12 @@ pub const ShellWriter = struct {
pub fn init(
allocator: Allocator,
shells: BoundedArray([]const u8, max_shells),
) error{ Overflow, OutOfMemory }!ShellSections {
) error{OutOfMemory}!ShellSections {
assert(shells.len <= max_shells);
var self = ShellSections{
.len = @intCast(u8, shells.len),
.index = try BoundedArray(u16, max_shells).init(shells.len),
.blob = try BoundedArray(u8, (max_shells + 1) * max_shell_len).init(0),
.index = BoundedArray(u16, max_shells).init(shells.len) catch unreachable,
.blob = BoundedArray(u8, (max_shells + 1) * max_shell_len).init(0) catch unreachable,
.shell2idx = StringHashMap(u8).init(allocator),
};
if (shells.len == 0) return self;
@ -68,11 +70,11 @@ pub const ShellWriter = struct {
for (shells.constSlice()) |shell, idx| {
const idx8 = @intCast(u8, idx);
const offset = @intCast(u16, self.blob.len);
try self.blob.appendSlice(shell);
self.blob.appendSliceAssumeCapacity(shell);
try self.shell2idx.put(self.blob.constSlice()[offset..], idx8);
self.index.set(idx8, offset);
}
try self.index.append(@intCast(u8, self.blob.len));
self.index.appendAssumeCapacity(@intCast(u8, self.blob.len));
return self;
}
@ -126,10 +128,9 @@ pub const ShellWriter = struct {
// toOwnedSections returns the analyzed ShellSections. Resets the shell
// popularity contest. ShellSections memory is allocated by the ShellWriter
// allocator, and must be deInit'ed by the caller.
pub fn toOwnedSections(
self: *ShellWriter,
limit: u10,
) error{ Overflow, OutOfMemory }!ShellSections {
pub fn toOwnedSections(self: *ShellWriter, limit: u10) error{OutOfMemory}!ShellSections {
assert(limit <= max_shells);
var deque = PriorityDequeue(KV, void, cmpShells).init(self.allocator, {});
defer deque.deinit();
@ -142,7 +143,9 @@ pub const ShellWriter = struct {
}
const total = std.math.min(deque.count(), limit);
var topShells = try BoundedArray([]const u8, max_shells).init(total);
var topShells = BoundedArray([]const u8, max_shells).init(total) catch |err| switch (err) {
error.Overflow => unreachable,
};
var i: u32 = 0;
while (i < total) : (i += 1)

View File

@ -1,5 +1,4 @@
test "turbonss test suite" {
_ = @import("main.zig");
_ = @import("header.zig");
_ = @import("so.zig");
_ = @import("sections.zig");

View File

@ -1,8 +1,6 @@
const std = @import("std");
pub const InvalidRecord = error{InvalidRecord};
pub fn downCast(comptime T: type, n: u64) InvalidRecord!T {
pub fn downCast(comptime T: type, n: u64) error{InvalidRecord}!T {
return std.math.cast(T, n) catch |err| switch (err) {
error.Overflow => {
return error.InvalidRecord;
@ -10,7 +8,7 @@ pub fn downCast(comptime T: type, n: u64) InvalidRecord!T {
};
}
pub fn utf8(s: []const u8) InvalidRecord!void {
pub fn utf8(s: []const u8) error{InvalidRecord}!void {
if (!std.unicode.utf8ValidateSlice(s)) {
return error.InvalidRecord;
}