From 752b0bac6f5996da2d601e3f5c33b27f5eb3f1c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Mon, 4 Jul 2022 07:44:20 +0300 Subject: [PATCH] pack differently --- README.md | 7 +++++++ src/bdz.zig | 4 ++-- src/cmph.zig | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 50634f6..1d127a5 100644 --- a/README.md +++ b/README.md @@ -371,6 +371,13 @@ Section creation order: 1. ✅ `idx_*`. requires offsets to `groups` and `users`. 1. ✅ Header. +For v2 +------ + +These are desired for the next DB format: +- Compress strings with fsst. +- Trim first 4 bytes from the cmph headers. + [git-subtrac]: https://apenwarr.ca/log/20191109 [cmph]: http://cmph.sourceforge.net/ [id]: https://linux.die.net/man/1/id diff --git a/src/bdz.zig b/src/bdz.zig index 2701b88..40e8d17 100644 --- a/src/bdz.zig +++ b/src/bdz.zig @@ -4,13 +4,13 @@ extern fn bdz_search_packed(packed_mphf: [*]const u8, key: [*]const u8, len: c_u pub fn search(packed_mphf: []const u8, key: []const u8) u32 { const len = std.math.cast(c_uint, key.len).?; - return @as(u32, bdz_search_packed(packed_mphf.ptr, key.ptr, len)); + return @as(u32, bdz_search_packed(packed_mphf[4..].ptr, key.ptr, len)); } const u32len = 5; pub fn search_u32(packed_mphf: []const u8, key: u32) u32 { - return @as(u32, bdz_search_packed(packed_mphf.ptr, &unzero(key), u32len)); + return @as(u32, bdz_search_packed(packed_mphf[4..].ptr, &unzero(key), u32len)); } // encode a u32 to 5 bytes so no bytes is a '\0'. diff --git a/src/cmph.zig b/src/cmph.zig index b6a5f8b..965370a 100644 --- a/src/cmph.zig +++ b/src/cmph.zig @@ -39,7 +39,7 @@ pub fn pack(allocator: Allocator, input: [][*:0]const u8) error{OutOfMemory}![]c errdefer allocator.free(buf); cmph_pack(mph, buf.ptr); cmph_destroy(mph); - return buf[4..]; + return buf; } // perfect-hash a list of numbers and return the packed mphf