more robust bdz for numbers + helpers
This commit is contained in:
parent
4fc54e5b65
commit
e1bdb6c529
32
src/bdz.zig
32
src/bdz.zig
@ -1,12 +1,28 @@
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const c = @cImport({
|
||||
@cInclude("bdz.h");
|
||||
});
|
||||
extern fn bdz_search_packed(packed_mphf: [*]const u8, key: [*]const u8, len: c_uint) u32;
|
||||
|
||||
pub fn search(packed_mphf: []const u8, key: []const u8) error{Overflow}!u32 {
|
||||
const bdz_start = @intToPtr(?*anyopaque, @ptrToInt(&packed_mphf[0]));
|
||||
const len = try std.math.cast(c_uint, key.len);
|
||||
return @as(u32, c.bdz_search_packed(bdz_start, key.ptr, len));
|
||||
pub fn search(packed_mphf: []const u8, key: []const u8) u32 {
|
||||
const len = std.math.cast(c_uint, key.len) catch unreachable;
|
||||
return @as(u32, bdz_search_packed(packed_mphf.ptr, key.ptr, len));
|
||||
}
|
||||
|
||||
pub fn search_u32(packed_mphf: []const u8, key: u32) u32 {
|
||||
return search(packed_mphf, unzero(key)[0..]);
|
||||
}
|
||||
|
||||
// encode a u32 to 5 bytes so no bytes is a '\0'.
|
||||
//
|
||||
// TODO(motiejus) figure out how to use cmph_io_byte_vector_adapter, so cmph
|
||||
// packing would accept zero bytes. For now we will be doing a dance of not
|
||||
// passing zero bytes.
|
||||
pub fn unzero(x: u32) [5]u8 {
|
||||
const one: u8 = 0b10000000;
|
||||
var buf: [5]u8 = undefined;
|
||||
buf[0] = @truncate(u8, (x & 0b11111110_00000000_00000000_00000000) >> 25) | one;
|
||||
buf[1] = @truncate(u8, (x & 0b00000001_11111100_00000000_00000000) >> 18) | one;
|
||||
buf[2] = @truncate(u8, (x & 0b00000000_00000011_11110000_00000000) >> 12) | one;
|
||||
buf[3] = @truncate(u8, (x & 0b00000000_00000000_00001111_11000000) >> 6) | one;
|
||||
buf[4] = @truncate(u8, (x & 0b00000000_00000000_00000000_00111111) >> 0) | one;
|
||||
return buf;
|
||||
}
|
||||
|
85
src/cmph.zig
85
src/cmph.zig
@ -1,5 +1,8 @@
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const math = std.math;
|
||||
const sort = std.sort;
|
||||
|
||||
const bdz = @import("bdz.zig");
|
||||
|
||||
const c = @cImport({
|
||||
@ -9,11 +12,12 @@ const c = @cImport({
|
||||
// pack packs cmph hashes for the given input and returns a slice ("cmph pack
|
||||
// minus first 4 bytes") for further storage. The slice must be freed by the
|
||||
// caller.
|
||||
const packErr = Allocator.Error || error{Overflow};
|
||||
pub fn pack(allocator: Allocator, input: [][*:0]const u8) packErr![]const u8 {
|
||||
var cvector = @ptrCast([*c][*c]u8, input.ptr);
|
||||
const len = try std.math.cast(c_uint, input.len);
|
||||
var source = c.cmph_io_vector_adapter(cvector, len);
|
||||
pub const Error = Allocator.Error || error{Overflow};
|
||||
pub fn pack(allocator: Allocator, input: [][*:0]const u8) Error![]const u8 {
|
||||
var source = c.cmph_io_vector_adapter(
|
||||
@ptrCast(*[*c]u8, input.ptr),
|
||||
try math.cast(c_uint, input.len),
|
||||
);
|
||||
defer c.cmph_io_vector_adapter_destroy(source);
|
||||
var config: *c.cmph_config_t = c.cmph_config_new(source) orelse
|
||||
return error.OutOfMemory;
|
||||
@ -29,6 +33,30 @@ pub fn pack(allocator: Allocator, input: [][*:0]const u8) packErr![]const u8 {
|
||||
return buf[4..];
|
||||
}
|
||||
|
||||
// perfect-hash a list of numbers and return the packed mphf
|
||||
pub fn pack_u32(allocator: Allocator, numbers: []const u32) Error![]const u8 {
|
||||
var keys: [][6]u8 = try allocator.alloc([6]u8, numbers.len);
|
||||
defer allocator.free(keys);
|
||||
for (numbers) |n, i|
|
||||
keys[i] = unzeroZ(n);
|
||||
|
||||
var keys2 = try allocator.alloc([*:0]const u8, numbers.len);
|
||||
defer allocator.free(keys2);
|
||||
for (keys) |_, i|
|
||||
keys2[i] = @ptrCast([*:0]const u8, &keys[i]);
|
||||
return pack(allocator, keys2);
|
||||
}
|
||||
|
||||
// perfect-hash a list of strings and return the packed mphf
|
||||
pub fn pack_str(allocator: Allocator, strings: []const []const u8) Error![]const u8 {
|
||||
var arena = std.heap.ArenaAllocator.init(allocator);
|
||||
defer arena.deinit();
|
||||
var keys = try arena.allocator().alloc([*:0]const u8, strings.len);
|
||||
for (strings) |_, i|
|
||||
keys[i] = try arena.allocator().dupeZ(u8, strings[i]);
|
||||
return pack(allocator, keys);
|
||||
}
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
const items = .{
|
||||
@ -60,7 +88,7 @@ test "basic pack/unpack" {
|
||||
var used: [items_len]bool = undefined;
|
||||
|
||||
inline for (items) |elem| {
|
||||
const hashed = try bdz.search(buf, elem);
|
||||
const hashed = bdz.search(buf, elem);
|
||||
used[hashed] = true;
|
||||
}
|
||||
|
||||
@ -68,3 +96,48 @@ test "basic pack/unpack" {
|
||||
try testing.expect(item);
|
||||
}
|
||||
}
|
||||
|
||||
// encodes a u32 to 6 bytes so no bytes except the last one is a '\0'.
|
||||
// This is useful for cmph-packing, where it accepts 0-terminated char*s.
|
||||
pub fn unzeroZ(x: u32) [6]u8 {
|
||||
var buf: [6]u8 = undefined;
|
||||
std.mem.copy(u8, buf[0..], bdz.unzero(x)[0..]);
|
||||
buf[5] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
test "unzeroZ" {
|
||||
const result = unzeroZ(0);
|
||||
try testing.expect(result[0] != 0);
|
||||
try testing.expect(result[1] != 0);
|
||||
try testing.expect(result[2] != 0);
|
||||
try testing.expect(result[3] != 0);
|
||||
try testing.expect(result[4] != 0);
|
||||
try testing.expect(result[5] == 0);
|
||||
}
|
||||
|
||||
test "pack u32" {
|
||||
const keys = &[_]u32{ 42, 1, math.maxInt(u32), 2 };
|
||||
const packed_mphf = try pack_u32(testing.allocator, keys);
|
||||
defer testing.allocator.free(packed_mphf);
|
||||
var hashes: [keys.len]u32 = undefined;
|
||||
for (keys) |key, i| {
|
||||
hashes[i] = bdz.search_u32(packed_mphf, key);
|
||||
}
|
||||
sort.sort(u32, hashes[0..], {}, comptime sort.asc(u32));
|
||||
for (hashes) |hash, i|
|
||||
try testing.expectEqual(i, hash);
|
||||
}
|
||||
|
||||
test "pack str" {
|
||||
const keys = &[_][]const u8{ "foo", "bar", "baz", "1", "2", "3" };
|
||||
const packed_mphf = try pack_str(testing.allocator, keys[0..]);
|
||||
defer testing.allocator.free(packed_mphf);
|
||||
var hashes: [keys.len]u32 = undefined;
|
||||
for (keys) |key, i| {
|
||||
hashes[i] = bdz.search(packed_mphf, key);
|
||||
}
|
||||
sort.sort(u32, hashes[0..], {}, comptime sort.asc(u32));
|
||||
for (hashes) |hash, i|
|
||||
try testing.expectEqual(i, hash);
|
||||
}
|
||||
|
@ -15,8 +15,7 @@ pub const Group = struct {
|
||||
members: BufSet,
|
||||
|
||||
pub fn clone(self: *const Group, allocator: Allocator) Allocator.Error!Group {
|
||||
var name = try allocator.alloc(u8, self.name.len);
|
||||
mem.copy(u8, name, self.name);
|
||||
var name = try allocator.dupe(u8, self.name);
|
||||
return Group{
|
||||
.gid = self.gid,
|
||||
.name = name,
|
||||
|
110
src/sections.zig
110
src/sections.zig
@ -1,9 +1,12 @@
|
||||
const std = @import("std");
|
||||
const unicode = std.unicode;
|
||||
const fmt = std.fmt;
|
||||
const math = std.math;
|
||||
const sort = std.sort;
|
||||
const unicode = std.unicode;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const ArrayListUnmanaged = std.ArrayListUnmanaged;
|
||||
const ArrayList = std.ArrayList;
|
||||
const MultiArrayList = std.MultiArrayList;
|
||||
const StringHashMap = std.StringHashMap;
|
||||
const AutoHashMap = std.AutoHashMap;
|
||||
const BufSet = std.BufSet;
|
||||
@ -12,6 +15,8 @@ const pad = @import("padding.zig");
|
||||
const compress = @import("compress.zig");
|
||||
const userImport = @import("user.zig");
|
||||
const groupImport = @import("group.zig");
|
||||
const cmph = @import("cmph.zig");
|
||||
const bdz = @import("bdz.zig");
|
||||
const User = userImport.User;
|
||||
const Group = groupImport.Group;
|
||||
|
||||
@ -23,6 +28,10 @@ const Corpus = struct {
|
||||
// sorted by gid
|
||||
groups: []Group,
|
||||
|
||||
// convenience users and groups by column
|
||||
usersMulti: MultiArrayList(User),
|
||||
groupsMulti: MultiArrayList(Group),
|
||||
|
||||
// pointing to `users` and `groups` slices above.
|
||||
name2user: StringHashMap(*const User),
|
||||
uid2user: AutoHashMap(u32, *const User),
|
||||
@ -54,6 +63,15 @@ const Corpus = struct {
|
||||
sort.sort(User, users, {}, cmpUser);
|
||||
sort.sort(Group, groups, {}, cmpGroup);
|
||||
|
||||
var usersMulti = MultiArrayList(User){};
|
||||
try usersMulti.ensureTotalCapacity(allocator, users.len);
|
||||
for (users) |user|
|
||||
usersMulti.appendAssumeCapacity(user);
|
||||
var groupsMulti = MultiArrayList(Group){};
|
||||
try groupsMulti.ensureTotalCapacity(allocator, groups.len);
|
||||
for (groups) |group|
|
||||
groupsMulti.appendAssumeCapacity(group);
|
||||
|
||||
var name2user = StringHashMap(*const User).init(allocator);
|
||||
var uid2user = AutoHashMap(u32, *const User).init(allocator);
|
||||
var name2group = StringHashMap(*const Group).init(allocator);
|
||||
@ -82,22 +100,22 @@ const Corpus = struct {
|
||||
res2.value_ptr.* = group;
|
||||
}
|
||||
|
||||
var groupname2users = StringHashMap(
|
||||
ArrayListUnmanaged(*const User),
|
||||
).init(baseAllocator);
|
||||
var groupname2users = StringHashMap([]*const User).init(allocator);
|
||||
|
||||
// uses baseAllocator, because it will be freed before
|
||||
// returning from this function. This keeps the arena clean.
|
||||
var username2groups = StringHashMap(
|
||||
ArrayListUnmanaged(*const Group),
|
||||
).init(baseAllocator);
|
||||
for (groups) |*group| {
|
||||
var members = try ArrayListUnmanaged(*const User).initCapacity(
|
||||
allocator,
|
||||
group.members.count(),
|
||||
);
|
||||
var members = try allocator.alloc(*const User, group.members.count());
|
||||
members.len = 0;
|
||||
|
||||
var it = group.members.iterator();
|
||||
while (it.next()) |memberName| {
|
||||
if (name2user.get(memberName.*)) |user| {
|
||||
members.appendAssumeCapacity(user);
|
||||
members.len += 1;
|
||||
members[members.len - 1] = user;
|
||||
} else {
|
||||
return error.NotFound;
|
||||
}
|
||||
@ -114,24 +132,14 @@ const Corpus = struct {
|
||||
result.value_ptr.* = members;
|
||||
}
|
||||
|
||||
{
|
||||
var it = groupname2users.valueIterator();
|
||||
while (it.next()) |groupUsers|
|
||||
sort.sort(*const User, groupUsers.items, {}, cmpUserPtr);
|
||||
var it1 = groupname2users.valueIterator();
|
||||
while (it1.next()) |groupUsers| {
|
||||
sort.sort(*const User, groupUsers.*, {}, cmpUserPtr);
|
||||
}
|
||||
|
||||
{
|
||||
var it = username2groups.valueIterator();
|
||||
while (it.next()) |userGroups|
|
||||
var it2 = username2groups.valueIterator();
|
||||
while (it2.next()) |userGroups|
|
||||
sort.sort(*const Group, userGroups.items, {}, cmpGroupPtr);
|
||||
}
|
||||
|
||||
var groupname2users_final = StringHashMap([]*const User).init(allocator);
|
||||
for (groups) |group| {
|
||||
const groupUsers = groupname2users.get(group.name).?.toOwnedSlice(allocator);
|
||||
try groupname2users_final.put(group.name, groupUsers);
|
||||
}
|
||||
groupname2users.deinit();
|
||||
|
||||
var username2groups_final = StringHashMap([]*const Group).init(allocator);
|
||||
for (users) |user| {
|
||||
@ -144,11 +152,13 @@ const Corpus = struct {
|
||||
.arena = arena,
|
||||
.users = users,
|
||||
.groups = groups,
|
||||
.usersMulti = usersMulti,
|
||||
.groupsMulti = groupsMulti,
|
||||
.name2user = name2user,
|
||||
.uid2user = uid2user,
|
||||
.name2group = name2group,
|
||||
.gid2group = gid2group,
|
||||
.groupname2users = groupname2users_final,
|
||||
.groupname2users = groupname2users,
|
||||
.username2groups = username2groups_final,
|
||||
};
|
||||
}
|
||||
@ -233,19 +243,17 @@ fn cmpGroupPtr(context: void, a: *const Group, b: *const Group) bool {
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
test "test corpus" {
|
||||
const allocator = testing.allocator;
|
||||
|
||||
fn testCorpus(allocator: Allocator) !Corpus {
|
||||
const users = [_]User{ User{
|
||||
.uid = 1000,
|
||||
.gid = 1000,
|
||||
.uid = 128,
|
||||
.gid = 128,
|
||||
.name = "vidmantas",
|
||||
.gecos = "Vidmantas Kaminskas",
|
||||
.home = "/home/vidmantas",
|
||||
.shell = "/bin/bash",
|
||||
}, User{
|
||||
.uid = 0,
|
||||
.gid = std.math.maxInt(u32),
|
||||
.gid = math.maxInt(u32),
|
||||
.name = "Name" ** 8,
|
||||
.gecos = "Gecos" ** 51,
|
||||
.home = "Home" ** 16,
|
||||
@ -278,7 +286,7 @@ test "test corpus" {
|
||||
defer members3.deinit();
|
||||
|
||||
const groups = [_]Group{ Group{
|
||||
.gid = 1000,
|
||||
.gid = 128,
|
||||
.name = "vidmantas",
|
||||
.members = members1,
|
||||
}, Group{
|
||||
@ -286,25 +294,30 @@ test "test corpus" {
|
||||
.name = "all",
|
||||
.members = members3,
|
||||
}, Group{
|
||||
.gid = 1234,
|
||||
.gid = 0,
|
||||
.name = "service-account",
|
||||
.members = members2,
|
||||
} };
|
||||
|
||||
var corpus = try Corpus.init(allocator, users[0..], groups[0..]);
|
||||
return try Corpus.init(allocator, users[0..], groups[0..]);
|
||||
}
|
||||
|
||||
test "test corpus" {
|
||||
var corpus = try testCorpus(testing.allocator);
|
||||
defer corpus.deinit();
|
||||
|
||||
try testing.expectEqualStrings(corpus.users[0].name, "Name" ** 8);
|
||||
try testing.expectEqualStrings(corpus.users[1].name, "svc-bar");
|
||||
try testing.expectEqualStrings(corpus.users[2].name, "vidmantas");
|
||||
|
||||
try testing.expectEqual(corpus.name2user.get("404"), null);
|
||||
try testing.expectEqual(corpus.name2user.get("vidmantas").?.uid, 1000);
|
||||
try testing.expectEqual(corpus.name2user.get("vidmantas").?.uid, 128);
|
||||
try testing.expectEqual(corpus.uid2user.get(42), null);
|
||||
try testing.expectEqual(corpus.uid2user.get(1000).?.gid, 1000);
|
||||
try testing.expectEqual(corpus.uid2user.get(128).?.gid, 128);
|
||||
try testing.expectEqual(corpus.name2group.get("404"), null);
|
||||
try testing.expectEqual(corpus.name2group.get("vidmantas").?.gid, 1000);
|
||||
try testing.expectEqual(corpus.name2group.get("vidmantas").?.gid, 128);
|
||||
try testing.expectEqual(corpus.gid2group.get(42), null);
|
||||
try testing.expectEqual(corpus.gid2group.get(1000).?.gid, 1000);
|
||||
try testing.expectEqual(corpus.gid2group.get(128).?.gid, 128);
|
||||
|
||||
const membersOfAll = corpus.groupname2users.get("all").?;
|
||||
try testing.expectEqualStrings(membersOfAll[0].name, "Name" ** 8);
|
||||
@ -313,12 +326,29 @@ test "test corpus" {
|
||||
try testing.expectEqual(corpus.groupname2users.get("404"), null);
|
||||
|
||||
const groupsOfVidmantas = corpus.username2groups.get("vidmantas").?;
|
||||
try testing.expectEqual(groupsOfVidmantas[0].gid, 1000);
|
||||
try testing.expectEqual(groupsOfVidmantas[1].gid, 1234);
|
||||
try testing.expectEqual(groupsOfVidmantas[0].gid, 0);
|
||||
try testing.expectEqual(groupsOfVidmantas[1].gid, 128);
|
||||
try testing.expectEqual(groupsOfVidmantas[2].gid, 9999);
|
||||
try testing.expectEqual(corpus.username2groups.get("404"), null);
|
||||
}
|
||||
|
||||
test "pack gids" {
|
||||
const allocator = testing.allocator;
|
||||
var corpus = try testCorpus(allocator);
|
||||
defer corpus.deinit();
|
||||
|
||||
const cmph_gid = try cmph.pack_u32(allocator, corpus.groupsMulti.items(.gid));
|
||||
defer allocator.free(cmph_gid);
|
||||
|
||||
const k1 = bdz.search_u32(cmph_gid, 0);
|
||||
const k2 = bdz.search_u32(cmph_gid, 128);
|
||||
const k3 = bdz.search_u32(cmph_gid, 9999);
|
||||
var hashes = &[_]u32{ k1, k2, k3 };
|
||||
sort.sort(u32, hashes, {}, comptime sort.asc(u32));
|
||||
for (hashes) |hash, i|
|
||||
try testing.expectEqual(i, hash);
|
||||
}
|
||||
|
||||
fn testUser(name: []const u8) User {
|
||||
var result = std.mem.zeroes(User);
|
||||
result.name = name;
|
||||
|
@ -119,10 +119,7 @@ pub const ShellWriter = struct {
|
||||
if (res.found_existing) {
|
||||
res.value_ptr.* += 1;
|
||||
} else {
|
||||
// TODO(motiejus): can we avoid `ourShell` variable here?
|
||||
const ourShell = try self.allocator.alloc(u8, shell.len);
|
||||
std.mem.copy(u8, ourShell, shell);
|
||||
res.key_ptr.* = ourShell;
|
||||
res.key_ptr.* = try self.allocator.dupe(u8, shell);
|
||||
res.value_ptr.* = 1;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user