From 4f1bada9883ac2cd263c8dce05c5fb1d9eabceac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Wed, 23 Feb 2022 15:25:55 +0200 Subject: [PATCH] start with cmph.zig and bdz.zig --- README.md | 12 ++++----- src/bdz.zig | 12 +++++++++ src/cmph.zig | 68 +++++++++++++++++++++++++++++++++++++++++++++++ src/main.zig | 62 ------------------------------------------ src/test_main.zig | 1 + 5 files changed, 87 insertions(+), 68 deletions(-) create mode 100644 src/bdz.zig create mode 100644 src/cmph.zig diff --git a/README.md b/README.md index 6ac700d..4b5de19 100644 --- a/README.md +++ b/README.md @@ -139,8 +139,8 @@ OFFSET TYPE NAME DESCRIPTION 8 u32 num_users number of passwd entries 12 u32 num_groups number of group entries 16 u32 offset_bdz_uid2user - 20 u32 offset_bdz_groupname2group 24 u32 offset_bdz_name2user + 20 u32 offset_bdz_groupname2group 28 u32 offset_idx offset to the first idx_ section 32 u32 offset_groups 36 u32 offset_users @@ -180,7 +180,7 @@ referred by their byte offset in the `Users` and `Groups` section relative to the beginning of the section. ``` -const Group = struct { +const PackedGroup = struct { gid: u32, // index to a separate structure with a list of members. The memberlist is // always 2^5-byte aligned (32b), this is an index there. @@ -345,10 +345,10 @@ Each section is padded to 64 bytes. ``` STATUS SECTION SIZE DESCRIPTION ✅ Header 48 see "Turbonss header" section - bdz_gid2group ? gid->group bdz - bdz_uid2user ? uid->user bdz - bdz_groupname2group ? groupname->group bdz - bdz_name2user ? username->user bdz + bdz_gid ? bdz(gid) + bdz_groupname ? bdz(groupname) + bdz_uid ? bdz(uid) + bdz_name ? bdz(username) idx_gid2group len(group)*29/8 bdz->offset Groups idx_groupname2group len(group)*29/8 bdz->offset Groups idx_uid2user len(user)*29/8 bdz->offset Users diff --git a/src/bdz.zig b/src/bdz.zig new file mode 100644 index 0000000..bc57205 --- /dev/null +++ b/src/bdz.zig @@ -0,0 +1,12 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +const c = @cImport({ + @cInclude("bdz.h"); +}); + +pub fn search_packed(packed_mphf: []const u8, key: []const u8) error{Overflow}!u32 { + const bdz_start = @intToPtr(?*anyopaque, @ptrToInt(&packed_mphf[4])); + const len = try std.math.cast(c_uint, key.len); + return @as(u32, c.bdz_search_packed(bdz_start, key.ptr, len)); +} diff --git a/src/cmph.zig b/src/cmph.zig new file mode 100644 index 0000000..74f0d73 --- /dev/null +++ b/src/cmph.zig @@ -0,0 +1,68 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const bdz = @import("bdz.zig"); + +const c = @cImport({ + @cInclude("cmph.h"); +}); + +// pack packs cmph hashes for the given input and returns a slice ("cmph +// userdata") for further storage. The slice must be freed by the caller. +const packErr = Allocator.Error || error{Overflow}; +pub fn pack(allocator: Allocator, input: [][*:0]const u8) packErr![]const u8 { + var cvector = @ptrCast([*c][*c]u8, input.ptr); + const len = try std.math.cast(c_uint, input.len); + var source = c.cmph_io_vector_adapter(cvector, len); + defer c.cmph_io_vector_adapter_destroy(source); + var config: *c.cmph_config_t = c.cmph_config_new(source) orelse return error.OutOfMemory; + c.cmph_config_set_algo(config, c.CMPH_BDZ); + c.cmph_config_set_b(config, 7); + var hash: *c.cmph_t = c.cmph_new(config) orelse return error.OutOfMemory; + c.cmph_config_destroy(config); + + const size = c.cmph_packed_size(hash); + var buf = try allocator.alloc(u8, size); + c.cmph_pack(hash, &buf[0]); + c.cmph_destroy(hash); + return buf; +} + +const testing = std.testing; + +const items = .{ + "aaaaaaaaaa", + "bbbbbbbbbb", + "cccccccccc", + "dddddddddd", + "eeeeeeeeee", + "ffffffffff", + "gggggggggg", + "hhhhhhhhhh", + "iiiiiiiiii", + "jjjjjjjjjj", +}; +const items_len = items.len; + +fn samplePack(allocator: Allocator) ![]const u8 { + var vector = std.ArrayList([*:0]const u8).init(allocator); + defer vector.deinit(); + try vector.appendSlice(&items); + return pack(allocator, vector.items); +} + +test "basic pack/unpack" { + const buf = try samplePack(testing.allocator); + defer testing.allocator.free(buf); + try testing.expect(buf.len < 100); + + var used: [items_len]bool = undefined; + + inline for (items) |elem| { + const hashed = try bdz.search_packed(buf, elem); + used[hashed] = true; + } + + for (used) |item| { + try testing.expect(item); + } +} diff --git a/src/main.zig b/src/main.zig index a68ab3f..13ab026 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,65 +1,3 @@ const std = @import("std"); -const c = @cImport({ - @cDefine("DEBUG", "1"); - @cInclude("stdio.h"); - @cInclude("string.h"); - @cInclude("cmph.h"); -}); - -const tempMph = "temp.mph"; - pub fn main() !void {} - -test "simple cmph usage" { - var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator); - const arena = arena_instance.allocator(); - const stdout = std.io.getStdOut().writer(); - - var vector = std.ArrayList([*:0]const u8).init(arena); - try vector.appendSlice(&.{ - "aaaaaaaaaa", - "bbbbbbbbbb", - "cccccccccc", - "dddddddddd", - "eeeeeeeeee", - "ffffffffff", - "gggggggggg", - "hhhhhhhhhh", - "iiiiiiiiii", - "jjjjjjjjjj", - }); - var nkeys = @truncate(c_uint, vector.items.len); - - var mphf_fd = c.fopen(tempMph, "w"); - var cvector = @ptrCast([*c][*c]u8, vector.items.ptr); - var source = c.cmph_io_vector_adapter(cvector, nkeys); - defer c.cmph_io_vector_adapter_destroy(source); - var config: *c.cmph_config_t = c.cmph_config_new(source) orelse return error.OutOfMemory; - c.cmph_config_set_algo(config, c.CMPH_BRZ); - c.cmph_config_set_mphf_fd(config, mphf_fd); - var hash: *c.cmph_t = c.cmph_new(config) orelse return error.OutOfMemory; - c.cmph_config_destroy(config); - _ = c.cmph_dump(hash, mphf_fd); - c.cmph_destroy(hash); - try closeCFile(mphf_fd); - - mphf_fd = c.fopen(tempMph, "r"); - defer _ = c.fclose(mphf_fd); - - hash = c.cmph_load(mphf_fd) orelse unreachable; - defer c.cmph_destroy(hash); - - try stdout.print("\n", .{}); - for (vector.items) |key| { - var id = c.cmph_search(hash, key, @truncate(c_uint, c.strlen(key))); - try stdout.print("key: {s}, id: {d}\n", .{ key, id }); - } -} - -fn closeCFile(f: *c.FILE) !void { - var close_code = c.fclose(f); - if (close_code != 0) { - return std.os.unexpectedErrno(std.os.errno(close_code)); - } -} diff --git a/src/test_main.zig b/src/test_main.zig index 8aa20ff..72d9707 100644 --- a/src/test_main.zig +++ b/src/test_main.zig @@ -6,4 +6,5 @@ test "turbonss test suite" { _ = @import("group.zig"); _ = @import("padding.zig"); _ = @import("varint.zig"); + _ = @import("cmph.zig"); }