cmph may return a larger hash than there are items.
This commit is contained in:
parent
2205d6c4b2
commit
1a01175c46
@ -275,6 +275,7 @@ fn getGroup(self: *const DB, group: PackedGroup, buf: *[]u8) error{OutOfMemory}!
|
|||||||
// get a CGroup entry by name.
|
// get a CGroup entry by name.
|
||||||
fn getgrnam(self: *const DB, name: []const u8, buf: *[]u8) error{OutOfMemory}!?CGroup {
|
fn getgrnam(self: *const DB, name: []const u8, buf: *[]u8) error{OutOfMemory}!?CGroup {
|
||||||
const idx = bdz.search(self.bdz_groupname, name);
|
const idx = bdz.search(self.bdz_groupname, name);
|
||||||
|
if (idx >= self.header.num_groups) return null;
|
||||||
const offset = self.idx_groupname2group[idx];
|
const offset = self.idx_groupname2group[idx];
|
||||||
const nbits = PackedGroup.alignment_bits;
|
const nbits = PackedGroup.alignment_bits;
|
||||||
const group = PackedGroup.fromBytes(self.groups[offset << nbits ..]).group;
|
const group = PackedGroup.fromBytes(self.groups[offset << nbits ..]).group;
|
||||||
@ -285,6 +286,7 @@ fn getgrnam(self: *const DB, name: []const u8, buf: *[]u8) error{OutOfMemory}!?C
|
|||||||
// get a CGroup entry by it's gid.
|
// get a CGroup entry by it's gid.
|
||||||
fn getgrgid(self: *const DB, gid: u32, buf: *[]u8) error{OutOfMemory}!?CGroup {
|
fn getgrgid(self: *const DB, gid: u32, buf: *[]u8) error{OutOfMemory}!?CGroup {
|
||||||
const idx = bdz.search_u32(self.bdz_gid, gid);
|
const idx = bdz.search_u32(self.bdz_gid, gid);
|
||||||
|
if (idx >= self.header.num_groups) return null;
|
||||||
const offset = self.idx_gid2group[idx];
|
const offset = self.idx_gid2group[idx];
|
||||||
const nbits = PackedGroup.alignment_bits;
|
const nbits = PackedGroup.alignment_bits;
|
||||||
const group = PackedGroup.fromBytes(self.groups[offset << nbits ..]).group;
|
const group = PackedGroup.fromBytes(self.groups[offset << nbits ..]).group;
|
||||||
@ -336,6 +338,8 @@ fn getUser(self: *const DB, user: PackedUser, buf: *[]u8) error{OutOfMemory}!CUs
|
|||||||
// get a CUser entry by name.
|
// get a CUser entry by name.
|
||||||
fn getpwnam(self: *const DB, name: []const u8, buf: *[]u8) error{OutOfMemory}!?CUser {
|
fn getpwnam(self: *const DB, name: []const u8, buf: *[]u8) error{OutOfMemory}!?CUser {
|
||||||
const idx = bdz.search(self.bdz_username, name);
|
const idx = bdz.search(self.bdz_username, name);
|
||||||
|
// bdz may return a hash that's bigger than the number of users
|
||||||
|
if (idx >= self.header.num_users) return null;
|
||||||
const offset = self.idx_name2user[idx];
|
const offset = self.idx_name2user[idx];
|
||||||
const nbits = PackedUser.alignment_bits;
|
const nbits = PackedUser.alignment_bits;
|
||||||
const user = PackedUser.fromBytes(self.users[offset << nbits ..]).user;
|
const user = PackedUser.fromBytes(self.users[offset << nbits ..]).user;
|
||||||
@ -346,6 +350,7 @@ fn getpwnam(self: *const DB, name: []const u8, buf: *[]u8) error{OutOfMemory}!?C
|
|||||||
// get a CUser entry by uid.
|
// get a CUser entry by uid.
|
||||||
fn getpwuid(self: *const DB, uid: u32, buf: *[]u8) error{OutOfMemory}!?CUser {
|
fn getpwuid(self: *const DB, uid: u32, buf: *[]u8) error{OutOfMemory}!?CUser {
|
||||||
const idx = bdz.search_u32(self.bdz_uid, uid);
|
const idx = bdz.search_u32(self.bdz_uid, uid);
|
||||||
|
if (idx >= self.header.num_users) return null;
|
||||||
const offset = self.idx_uid2user[idx];
|
const offset = self.idx_uid2user[idx];
|
||||||
const nbits = PackedUser.alignment_bits;
|
const nbits = PackedUser.alignment_bits;
|
||||||
const user = PackedUser.fromBytes(self.users[offset << nbits ..]).user;
|
const user = PackedUser.fromBytes(self.users[offset << nbits ..]).user;
|
||||||
|
42
lib/cmph.zig
42
lib/cmph.zig
@ -6,19 +6,9 @@ const assert = std.debug.assert;
|
|||||||
|
|
||||||
const bdz = @import("bdz.zig");
|
const bdz = @import("bdz.zig");
|
||||||
|
|
||||||
// must be kept in sync with the definition in cmph_types.h
|
const CMPH_BDZ = @cImport({
|
||||||
const CMPH_ALGO = enum(c_int) {
|
@cInclude("cmph_types.h");
|
||||||
CMPH_BMZ,
|
}).CMPH_BDZ;
|
||||||
CMPH_BMZ8,
|
|
||||||
CMPH_CHM,
|
|
||||||
CMPH_BRZ,
|
|
||||||
CMPH_FCH,
|
|
||||||
CMPH_BDZ,
|
|
||||||
CMPH_BDZ_PH,
|
|
||||||
CMPH_CHD_PH,
|
|
||||||
CMPH_CHD,
|
|
||||||
CMPH_COUNT,
|
|
||||||
};
|
|
||||||
|
|
||||||
extern fn cmph_io_vector_adapter(vector: [*]const [*:0]const u8, len: c_uint) [*]u8;
|
extern fn cmph_io_vector_adapter(vector: [*]const [*:0]const u8, len: c_uint) [*]u8;
|
||||||
extern fn cmph_io_vector_adapter_destroy(key_source: [*]u8) void;
|
extern fn cmph_io_vector_adapter_destroy(key_source: [*]u8) void;
|
||||||
@ -29,18 +19,17 @@ extern fn cmph_new(config: [*]const u8) ?[*]u8;
|
|||||||
extern fn cmph_config_destroy(mph: [*]u8) void;
|
extern fn cmph_config_destroy(mph: [*]u8) void;
|
||||||
extern fn cmph_packed_size(mphf: [*]const u8) u32;
|
extern fn cmph_packed_size(mphf: [*]const u8) u32;
|
||||||
extern fn cmph_pack(mphf: [*]const u8, packed_mphf: [*]u8) void;
|
extern fn cmph_pack(mphf: [*]const u8, packed_mphf: [*]u8) void;
|
||||||
extern fn cmph_destroy(mphf: [*]const u8) void;
|
extern fn cmph_destroy(mphf: [*]u8) void;
|
||||||
|
|
||||||
// pack packs cmph hashes for the given input and returns a slice ("cmph pack
|
// pack packs cmph hashes for the given input and returns a slice ("cmph pack
|
||||||
// minus first 4 bytes") for further storage. The slice must be freed by the
|
// minus first 4 bytes") for further storage. The slice must be freed by the
|
||||||
// caller.
|
// caller.
|
||||||
pub fn pack(allocator: Allocator, input: [][*:0]const u8) error{OutOfMemory}![]const u8 {
|
pub fn pack(allocator: Allocator, input: [][*:0]const u8) error{OutOfMemory}![]const u8 {
|
||||||
assert(input.len <= math.maxInt(c_uint));
|
|
||||||
const input_len = @intCast(c_uint, input.len);
|
const input_len = @intCast(c_uint, input.len);
|
||||||
var source = cmph_io_vector_adapter(input.ptr, input_len);
|
var source = cmph_io_vector_adapter(input.ptr, input_len);
|
||||||
defer cmph_io_vector_adapter_destroy(source);
|
defer cmph_io_vector_adapter_destroy(source);
|
||||||
var config = cmph_config_new(source) orelse return error.OutOfMemory;
|
var config = cmph_config_new(source) orelse return error.OutOfMemory;
|
||||||
cmph_config_set_algo(config, @enumToInt(CMPH_ALGO.CMPH_BDZ));
|
cmph_config_set_algo(config, CMPH_BDZ);
|
||||||
cmph_config_set_b(config, 7);
|
cmph_config_set_b(config, 7);
|
||||||
var mph = cmph_new(config) orelse return error.OutOfMemory;
|
var mph = cmph_new(config) orelse return error.OutOfMemory;
|
||||||
cmph_config_destroy(config);
|
cmph_config_destroy(config);
|
||||||
@ -104,24 +93,20 @@ test "basic pack/unpack" {
|
|||||||
const buf = try samplePack(testing.allocator);
|
const buf = try samplePack(testing.allocator);
|
||||||
defer testing.allocator.free(buf);
|
defer testing.allocator.free(buf);
|
||||||
try testing.expect(buf.len < 100);
|
try testing.expect(buf.len < 100);
|
||||||
|
|
||||||
var used: [items_len]bool = undefined;
|
var used: [items_len]bool = undefined;
|
||||||
|
|
||||||
inline for (items) |elem| {
|
inline for (items) |elem| {
|
||||||
const hashed = bdz.search(buf, elem);
|
const hashed = bdz.search(buf, elem);
|
||||||
used[hashed] = true;
|
used[hashed] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (used) |item| {
|
for (used) |item| try testing.expect(item);
|
||||||
try testing.expect(item);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// encodes a u32 to 6 bytes so no bytes except the last one is a '\0'.
|
// encodes a u32 to 6 bytes so no bytes except the last one is a '\0'.
|
||||||
// This is useful for cmph-packing, where it accepts 0-terminated char*s.
|
// This is useful for cmph-packing, where it accepts 0-terminated char*s.
|
||||||
pub fn unzeroZ(x: u32) [6]u8 {
|
pub fn unzeroZ(x: u32) [6]u8 {
|
||||||
var buf: [6]u8 = undefined;
|
var buf: [6]u8 = undefined;
|
||||||
std.mem.copy(u8, buf[0..], bdz.unzero(x)[0..]);
|
std.mem.copy(u8, &buf, &bdz.unzero(x));
|
||||||
buf[5] = 0;
|
buf[5] = 0;
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
@ -141,9 +126,8 @@ test "pack u32" {
|
|||||||
const packed_mphf = try packU32(testing.allocator, keys);
|
const packed_mphf = try packU32(testing.allocator, keys);
|
||||||
defer testing.allocator.free(packed_mphf);
|
defer testing.allocator.free(packed_mphf);
|
||||||
var hashes: [keys.len]u32 = undefined;
|
var hashes: [keys.len]u32 = undefined;
|
||||||
for (keys) |key, i| {
|
for (keys) |key, i|
|
||||||
hashes[i] = bdz.search_u32(packed_mphf, key);
|
hashes[i] = bdz.search_u32(packed_mphf, key);
|
||||||
}
|
|
||||||
sort.sort(u32, hashes[0..], {}, comptime sort.asc(u32));
|
sort.sort(u32, hashes[0..], {}, comptime sort.asc(u32));
|
||||||
for (hashes) |hash, i|
|
for (hashes) |hash, i|
|
||||||
try testing.expectEqual(i, hash);
|
try testing.expectEqual(i, hash);
|
||||||
@ -154,17 +138,9 @@ test "pack str" {
|
|||||||
const packed_mphf = try packStr(testing.allocator, keys[0..]);
|
const packed_mphf = try packStr(testing.allocator, keys[0..]);
|
||||||
defer testing.allocator.free(packed_mphf);
|
defer testing.allocator.free(packed_mphf);
|
||||||
var hashes: [keys.len]u32 = undefined;
|
var hashes: [keys.len]u32 = undefined;
|
||||||
for (keys) |key, i| {
|
for (keys) |key, i|
|
||||||
hashes[i] = bdz.search(packed_mphf, key);
|
hashes[i] = bdz.search(packed_mphf, key);
|
||||||
}
|
|
||||||
sort.sort(u32, hashes[0..], {}, comptime sort.asc(u32));
|
sort.sort(u32, hashes[0..], {}, comptime sort.asc(u32));
|
||||||
for (hashes) |hash, i|
|
for (hashes) |hash, i|
|
||||||
try testing.expectEqual(i, hash);
|
try testing.expectEqual(i, hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "CMPH_ALGO.CMPH_BDZ is in sync with our definition" {
|
|
||||||
const c = @cImport({
|
|
||||||
@cInclude("cmph_types.h");
|
|
||||||
});
|
|
||||||
try testing.expectEqual(c.CMPH_BDZ, @enumToInt(CMPH_ALGO.CMPH_BDZ));
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user