start with cmph.zig and bdz.zig

This commit is contained in:
Motiejus Jakštys 2022-02-23 15:25:55 +02:00 committed by Motiejus Jakštys
parent 6c386e720e
commit 4f1bada988
5 changed files with 87 additions and 68 deletions

View File

8 u32 num_users number of passwd entries
12 u32 num_groups number of group entries
16 u32 offset_bdz_uid2user
20 u32 offset_bdz_groupname2group
24 u32 offset_bdz_name2user
20 u32 offset_bdz_groupname2group
28 u32 offset_idx offset to the first idx_ section
32 u32 offset_groups
36 u32 offset_users
@ -180,7 +180,7 @@ referred by their byte offset in the `Users` and `Groups` section relative to
the beginning of the section.
const Group = struct {
const PackedGroup = struct {
gid: u32,
// index to a separate structure with a list of members. The memberlist is
// always 2^5-byte aligned (32b), this is an index there.
@ -345,10 +345,10 @@ Each section is padded to 64 bytes.
✅ Header 48 see "Turbonss header" section
bdz_gid2group ? gid->group bdz
bdz_uid2user ? uid->user bdz
bdz_groupname2group ? groupname->group bdz
bdz_name2user ? username->user bdz
bdz_gid ? bdz(gid)
bdz_groupname ? bdz(groupname)
bdz_uid ? bdz(uid)
bdz_name ? bdz(username)
idx_gid2group len(group)*29/8 bdz->offset Groups
idx_groupname2group len(group)*29/8 bdz->offset Groups
idx_uid2user len(user)*29/8 bdz->offset Users

src/bdz.zig Normal file
View File

@ -0,0 +1,12 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const c = @cImport({
pub fn search_packed(packed_mphf: []const u8, key: []const u8) error{Overflow}!u32 {
const bdz_start = @intToPtr(?*anyopaque, @ptrToInt(&packed_mphf[4]));
const len = try std.math.cast(c_uint, key.len);
return @as(u32, c.bdz_search_packed(bdz_start, key.ptr, len));

src/cmph.zig Normal file
View File

@ -0,0 +1,68 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const bdz = @import("bdz.zig");
const c = @cImport({
// pack packs cmph hashes for the given input and returns a slice ("cmph
// userdata") for further storage. The slice must be freed by the caller.
const packErr = Allocator.Error || error{Overflow};
pub fn pack(allocator: Allocator, input: [][*:0]const u8) packErr![]const u8 {
var cvector = @ptrCast([*c][*c]u8, input.ptr);
const len = try std.math.cast(c_uint, input.len);
var source = c.cmph_io_vector_adapter(cvector, len);
defer c.cmph_io_vector_adapter_destroy(source);
var config: *c.cmph_config_t = c.cmph_config_new(source) orelse return error.OutOfMemory;
c.cmph_config_set_algo(config, c.CMPH_BDZ);
c.cmph_config_set_b(config, 7);
var hash: *c.cmph_t = c.cmph_new(config) orelse return error.OutOfMemory;
const size = c.cmph_packed_size(hash);
var buf = try allocator.alloc(u8, size);
c.cmph_pack(hash, &buf[0]);
return buf;
const testing = std.testing;
const items = .{
const items_len = items.len;
fn samplePack(allocator: Allocator) ![]const u8 {
var vector = std.ArrayList([*:0]const u8).init(allocator);
defer vector.deinit();
try vector.appendSlice(&items);
return pack(allocator, vector.items);
test "basic pack/unpack" {
const buf = try samplePack(testing.allocator);
try testing.expect(buf.len < 100);
var used: [items_len]bool = undefined;
inline for (items) |elem| {
const hashed = try bdz.search_packed(buf, elem);
used[hashed] = true;
for (used) |item| {
try testing.expect(item);

View File

@ -1,65 +1,3 @@
const std = @import("std");
const c = @cImport({
@cDefine("DEBUG", "1");
const tempMph = "temp.mph";
pub fn main() !void {}
test "simple cmph usage" {
var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
const arena = arena_instance.allocator();
const stdout =;
var vector = std.ArrayList([*:0]const u8).init(arena);
try vector.appendSlice(&.{
var nkeys = @truncate(c_uint, vector.items.len);
var mphf_fd = c.fopen(tempMph, "w");
var cvector = @ptrCast([*c][*c]u8, vector.items.ptr);
var source = c.cmph_io_vector_adapter(cvector, nkeys);
defer c.cmph_io_vector_adapter_destroy(source);
var config: *c.cmph_config_t = c.cmph_config_new(source) orelse return error.OutOfMemory;
c.cmph_config_set_algo(config, c.CMPH_BRZ);
c.cmph_config_set_mphf_fd(config, mphf_fd);
var hash: *c.cmph_t = c.cmph_new(config) orelse return error.OutOfMemory;
_ = c.cmph_dump(hash, mphf_fd);
try closeCFile(mphf_fd);
mphf_fd = c.fopen(tempMph, "r");
defer _ = c.fclose(mphf_fd);
hash = c.cmph_load(mphf_fd) orelse unreachable;
defer c.cmph_destroy(hash);
try stdout.print("\n", .{});
for (vector.items) |key| {
var id = c.cmph_search(hash, key, @truncate(c_uint, c.strlen(key)));
try stdout.print("key: {s}, id: {d}\n", .{ key, id });
fn closeCFile(f: *c.FILE) !void {
var close_code = c.fclose(f);
if (close_code != 0) {
return std.os.unexpectedErrno(std.os.errno(close_code));

View File

@ -6,4 +6,5 @@ test "turbonss test suite" {
_ = @import("group.zig");
_ = @import("padding.zig");
_ = @import("varint.zig");
_ = @import("cmph.zig");