2022-02-23 06:47:56 +02:00
|
|
|
//
|
|
|
|
// varint64 []const u8 variants
|
|
|
|
//
|
2022-02-28 06:14:00 +02:00
|
|
|
// Thanks to https://github.com/gsquire/zig-snappy/blob/master/snappy.zig and
|
|
|
|
// golang's varint implementation.
|
2022-02-23 06:47:56 +02:00
|
|
|
const std = @import("std");
|
|
|
|
|
2023-02-02 17:04:21 +02:00
|
|
|
const ArrayListAligned = std.ArrayListAligned;
|
2022-03-04 10:37:07 +02:00
|
|
|
const Allocator = std.mem.Allocator;
|
2022-03-05 10:19:42 +02:00
|
|
|
const assert = std.debug.assert;
|
2022-03-06 06:29:16 +02:00
|
|
|
const math = std.math;
|
2022-03-04 10:37:07 +02:00
|
|
|
|
2022-02-28 06:02:16 +02:00
|
|
|
// compresses a strictly incrementing sorted slice of integers using delta
|
|
|
|
// compression. Compression is in-place.
|
2022-02-28 05:48:43 +02:00
|
|
|
pub fn deltaCompress(comptime T: type, elems: []T) error{NotSorted}!void {
|
2022-02-28 06:02:16 +02:00
|
|
|
if (elems.len <= 1) {
|
2022-02-28 05:48:43 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
var prev: T = elems[0];
|
|
|
|
var i: usize = 1;
|
|
|
|
while (i < elems.len) : (i += 1) {
|
|
|
|
const cur = elems[i];
|
|
|
|
if (cur <= prev) {
|
|
|
|
return error.NotSorted;
|
|
|
|
}
|
|
|
|
elems[i] = cur - prev - 1;
|
|
|
|
prev = cur;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-28 06:02:16 +02:00
|
|
|
// decompresses a slice compressed by deltaCompress. In-place.
|
|
|
|
pub fn deltaDecompress(comptime T: type, elems: []T) error{Overflow}!void {
|
|
|
|
if (elems.len <= 1) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
var i: usize = 1;
|
|
|
|
while (i < elems.len) : (i += 1) {
|
2022-03-06 06:29:16 +02:00
|
|
|
const x = try math.add(T, elems[i - 1], 1);
|
|
|
|
elems[i] = try math.add(T, elems[i], x);
|
2022-02-28 06:02:16 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-23 06:47:56 +02:00
|
|
|
// Represents a variable length integer that we read from a byte stream along
|
|
|
|
// with how many bytes were read to decode it.
|
|
|
|
pub const Varint = struct {
|
|
|
|
value: u64,
|
2022-03-05 10:19:42 +02:00
|
|
|
bytes_read: usize,
|
2022-02-23 06:47:56 +02:00
|
|
|
};
|
|
|
|
|
2022-02-28 06:14:00 +02:00
|
|
|
pub const maxVarintLen64 = 10;
|
2022-02-23 06:47:56 +02:00
|
|
|
|
|
|
|
// https://golang.org/pkg/encoding/binary/#Uvarint
|
|
|
|
pub fn uvarint(buf: []const u8) error{Overflow}!Varint {
|
|
|
|
var x: u64 = 0;
|
|
|
|
var s: u6 = 0;
|
|
|
|
|
2023-02-24 14:55:33 +02:00
|
|
|
for (buf, 0..) |b, i| {
|
2022-03-02 06:18:19 +02:00
|
|
|
if (i == maxVarintLen64)
|
2022-02-27 15:49:00 +02:00
|
|
|
// Catch byte reads past maxVarintLen64.
|
2022-02-23 06:47:56 +02:00
|
|
|
// See issue https://golang.org/issues/41185
|
|
|
|
return error.Overflow;
|
|
|
|
|
|
|
|
if (b < 0x80) {
|
2022-02-27 15:49:00 +02:00
|
|
|
if (i == maxVarintLen64 - 1 and b > 1) {
|
2022-02-23 06:47:56 +02:00
|
|
|
return error.Overflow;
|
|
|
|
}
|
2022-03-02 06:18:19 +02:00
|
|
|
return Varint{
|
|
|
|
.value = x | (@as(u64, b) << s),
|
2022-03-05 10:19:42 +02:00
|
|
|
.bytes_read = i + 1,
|
2022-03-02 06:18:19 +02:00
|
|
|
};
|
2022-02-23 06:47:56 +02:00
|
|
|
}
|
|
|
|
x |= (@as(u64, b & 0x7f) << s);
|
2022-03-06 06:29:16 +02:00
|
|
|
s = try math.add(u6, s, 7);
|
2022-02-23 06:47:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return Varint{
|
|
|
|
.value = 0,
|
2022-03-05 10:19:42 +02:00
|
|
|
.bytes_read = 0,
|
2022-02-23 06:47:56 +02:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
// https://golang.org/pkg/encoding/binary/#PutUvarint
|
|
|
|
pub fn putUvarint(buf: []u8, x: u64) usize {
|
|
|
|
var i: usize = 0;
|
|
|
|
var mutX = x;
|
|
|
|
|
|
|
|
while (mutX >= 0x80) {
|
2023-08-21 13:50:52 +03:00
|
|
|
buf[i] = @as(u8, @truncate(mutX)) | 0x80;
|
2022-02-23 06:47:56 +02:00
|
|
|
mutX >>= 7;
|
|
|
|
i += 1;
|
|
|
|
}
|
2023-08-21 13:50:52 +03:00
|
|
|
buf[i] = @truncate(mutX);
|
2022-02-23 06:47:56 +02:00
|
|
|
|
|
|
|
return i + 1;
|
|
|
|
}
|
|
|
|
|
2022-07-11 15:38:32 +03:00
|
|
|
// varintSliceIterator iterates over varint-encoded slice.
|
2022-03-05 10:19:42 +02:00
|
|
|
// The first element is the length of the slice, in decoded numbers.
|
2022-07-11 15:38:32 +03:00
|
|
|
pub const VarintSliceIterator = struct {
|
2022-03-05 10:19:42 +02:00
|
|
|
remaining: usize,
|
|
|
|
arr: []const u8,
|
|
|
|
idx: usize,
|
|
|
|
|
2022-07-11 15:38:32 +03:00
|
|
|
pub fn next(self: *VarintSliceIterator) error{Overflow}!?u64 {
|
2022-03-06 09:35:18 +02:00
|
|
|
if (self.remaining == 0)
|
2022-03-05 10:19:42 +02:00
|
|
|
return null;
|
2022-03-06 09:35:18 +02:00
|
|
|
const value = try uvarint(self.arr[self.idx..]);
|
2022-07-12 12:44:42 +03:00
|
|
|
//std.debug.print("ptr={*} idx={d:<10} arr.ptr={*}\n", .{ self, self.idx, self.arr.ptr });
|
2022-03-06 09:35:18 +02:00
|
|
|
self.idx += value.bytes_read;
|
|
|
|
self.remaining -= 1;
|
2022-03-05 10:19:42 +02:00
|
|
|
return value.value;
|
|
|
|
}
|
2022-03-06 09:35:18 +02:00
|
|
|
|
|
|
|
// returns the number of remaining items. If called before the first
|
|
|
|
// next(), returns the length of the slice.
|
2022-07-11 15:38:32 +03:00
|
|
|
pub fn remaining(self: *const VarintSliceIterator) usize {
|
2022-03-06 09:35:18 +02:00
|
|
|
return self.remaining;
|
|
|
|
}
|
2022-03-05 10:19:42 +02:00
|
|
|
};
|
|
|
|
|
2022-07-11 15:38:32 +03:00
|
|
|
pub fn varintSliceIterator(arr: []const u8) error{Overflow}!VarintSliceIterator {
|
2022-03-05 10:19:42 +02:00
|
|
|
const firstnumber = try uvarint(arr);
|
2022-07-11 15:38:32 +03:00
|
|
|
return VarintSliceIterator{
|
2022-03-05 10:19:42 +02:00
|
|
|
.remaining = firstnumber.value,
|
|
|
|
.arr = arr,
|
|
|
|
.idx = firstnumber.bytes_read,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-07-09 13:00:45 +03:00
|
|
|
pub const DeltaDecompressionIterator = struct {
|
2022-07-11 15:38:32 +03:00
|
|
|
vit: *VarintSliceIterator,
|
2022-03-06 06:29:16 +02:00
|
|
|
prev: u64,
|
|
|
|
add_to_prev: u1,
|
|
|
|
|
2022-07-09 13:00:45 +03:00
|
|
|
pub fn next(self: *DeltaDecompressionIterator) error{Overflow}!?u64 {
|
2022-03-06 09:35:18 +02:00
|
|
|
const current = try self.vit.next();
|
2022-03-06 06:29:16 +02:00
|
|
|
if (current == null) return null;
|
|
|
|
|
2022-03-06 09:35:18 +02:00
|
|
|
const prevExtra = try math.add(u64, self.prev, self.add_to_prev);
|
2022-03-06 06:29:16 +02:00
|
|
|
const result = try math.add(u64, current.?, prevExtra);
|
2022-03-06 09:35:18 +02:00
|
|
|
self.prev = result;
|
|
|
|
self.add_to_prev = 1;
|
2022-03-06 06:29:16 +02:00
|
|
|
return result;
|
|
|
|
}
|
2022-03-06 09:35:18 +02:00
|
|
|
|
|
|
|
// returns the number of remaining items. If called before the first
|
|
|
|
// next(), returns the length of the slice.
|
2022-07-09 13:00:45 +03:00
|
|
|
pub fn remaining(self: *const DeltaDecompressionIterator) usize {
|
2022-03-06 09:35:18 +02:00
|
|
|
return self.vit.remaining;
|
|
|
|
}
|
2022-03-06 06:29:16 +02:00
|
|
|
};
|
|
|
|
|
2022-07-11 15:38:32 +03:00
|
|
|
pub fn deltaDecompressionIterator(vit: *VarintSliceIterator) DeltaDecompressionIterator {
|
2022-07-09 13:00:45 +03:00
|
|
|
return DeltaDecompressionIterator{
|
2022-03-06 06:29:16 +02:00
|
|
|
.vit = vit,
|
|
|
|
.prev = 0,
|
|
|
|
.add_to_prev = 0,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2023-02-02 17:04:21 +02:00
|
|
|
pub fn appendUvarint(arr: *ArrayListAligned(u8, 8), x: u64) Allocator.Error!void {
|
2022-03-04 10:37:07 +02:00
|
|
|
var buf: [maxVarintLen64]u8 = undefined;
|
|
|
|
const n = putUvarint(&buf, x);
|
|
|
|
try arr.appendSlice(buf[0..n]);
|
|
|
|
}
|
|
|
|
|
2022-03-05 10:19:42 +02:00
|
|
|
const testing = std.testing;
|
|
|
|
|
2022-03-06 06:29:16 +02:00
|
|
|
const uvarint_tests = [_]u64{
|
|
|
|
0,
|
|
|
|
1,
|
|
|
|
2,
|
|
|
|
10,
|
|
|
|
20,
|
|
|
|
63,
|
|
|
|
64,
|
|
|
|
65,
|
|
|
|
127,
|
|
|
|
128,
|
|
|
|
129,
|
|
|
|
255,
|
|
|
|
256,
|
|
|
|
257,
|
|
|
|
1 << 63 - 1,
|
|
|
|
};
|
|
|
|
|
2022-07-12 12:59:47 +03:00
|
|
|
test "compress putUvarint/uvarint" {
|
2022-03-06 06:29:16 +02:00
|
|
|
for (uvarint_tests) |x| {
|
|
|
|
var buf: [maxVarintLen64]u8 = undefined;
|
|
|
|
const n = putUvarint(buf[0..], x);
|
|
|
|
const got = try uvarint(buf[0..n]);
|
|
|
|
|
|
|
|
try testing.expectEqual(x, got.value);
|
|
|
|
try testing.expectEqual(n, got.bytes_read);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-12 12:59:47 +03:00
|
|
|
test "compress varintSliceIterator" {
|
2023-02-02 17:04:21 +02:00
|
|
|
var buf = ArrayListAligned(u8, 8).init(testing.allocator);
|
2022-03-06 06:29:16 +02:00
|
|
|
defer buf.deinit();
|
|
|
|
try appendUvarint(&buf, uvarint_tests.len);
|
|
|
|
for (uvarint_tests) |x|
|
|
|
|
try appendUvarint(&buf, x);
|
|
|
|
|
2022-07-11 15:38:32 +03:00
|
|
|
var it = try varintSliceIterator(buf.items);
|
2022-03-06 06:29:16 +02:00
|
|
|
var i: usize = 0;
|
|
|
|
while (try it.next()) |got| : (i += 1) {
|
|
|
|
try testing.expectEqual(uvarint_tests[i], got);
|
|
|
|
}
|
|
|
|
try testing.expectEqual(i, uvarint_tests.len);
|
|
|
|
}
|
|
|
|
|
2022-07-12 12:59:47 +03:00
|
|
|
test "compress delta compress/decompress" {
|
2022-03-05 10:19:42 +02:00
|
|
|
const tests = [_]struct { input: []const u8, want: []const u8 }{
|
|
|
|
.{ .input = &[_]u8{}, .want = &[_]u8{} },
|
|
|
|
.{ .input = &[_]u8{0}, .want = &[_]u8{0} },
|
|
|
|
.{ .input = &[_]u8{10}, .want = &[_]u8{10} },
|
|
|
|
.{ .input = &[_]u8{ 0, 1, 2 }, .want = &[_]u8{ 0, 0, 0 } },
|
|
|
|
.{ .input = &[_]u8{ 10, 20, 30, 255 }, .want = &[_]u8{ 10, 9, 9, 224 } },
|
|
|
|
.{ .input = &[_]u8{ 0, 254, 255 }, .want = &[_]u8{ 0, 253, 0 } },
|
|
|
|
};
|
|
|
|
for (tests) |t| {
|
2023-02-02 17:04:21 +02:00
|
|
|
var arr = try ArrayListAligned(u8, 8).initCapacity(
|
2022-03-05 10:19:42 +02:00
|
|
|
testing.allocator,
|
|
|
|
t.input.len,
|
|
|
|
);
|
|
|
|
defer arr.deinit();
|
|
|
|
try arr.appendSlice(t.input);
|
|
|
|
|
|
|
|
try deltaCompress(u8, arr.items);
|
|
|
|
try testing.expectEqualSlices(u8, arr.items, t.want);
|
|
|
|
|
|
|
|
try deltaDecompress(u8, arr.items);
|
|
|
|
try testing.expectEqualSlices(u8, arr.items, t.input);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-12 12:59:47 +03:00
|
|
|
test "compress delta compression with varint tests" {
|
2022-03-06 06:29:16 +02:00
|
|
|
var scratch: [uvarint_tests.len]u64 = undefined;
|
|
|
|
std.mem.copy(u64, scratch[0..], uvarint_tests[0..]);
|
|
|
|
try deltaCompress(u64, scratch[0..]);
|
|
|
|
try deltaDecompress(u64, scratch[0..]);
|
|
|
|
try testing.expectEqualSlices(u64, uvarint_tests[0..], scratch[0..]);
|
|
|
|
}
|
|
|
|
|
2022-07-12 12:59:47 +03:00
|
|
|
test "compress delta compression negative tests" {
|
2022-03-05 10:19:42 +02:00
|
|
|
for ([_][]const u8{
|
|
|
|
&[_]u8{ 0, 0 },
|
|
|
|
&[_]u8{ 0, 1, 1 },
|
|
|
|
&[_]u8{ 0, 1, 2, 1 },
|
|
|
|
}) |t| {
|
2023-02-02 17:04:21 +02:00
|
|
|
var arr = try ArrayListAligned(u8, 8).initCapacity(testing.allocator, t.len);
|
2022-03-05 10:19:42 +02:00
|
|
|
defer arr.deinit();
|
|
|
|
try arr.appendSlice(t);
|
|
|
|
try testing.expectError(error.NotSorted, deltaCompress(u8, arr.items));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-12 12:59:47 +03:00
|
|
|
test "compress delta decompress overflow" {
|
2022-03-05 10:19:42 +02:00
|
|
|
for ([_][]const u8{
|
|
|
|
&[_]u8{ 255, 0 },
|
|
|
|
&[_]u8{ 0, 128, 127 },
|
|
|
|
}) |t| {
|
2023-02-02 17:04:21 +02:00
|
|
|
var arr = try ArrayListAligned(u8, 8).initCapacity(testing.allocator, t.len);
|
2022-03-05 10:19:42 +02:00
|
|
|
defer arr.deinit();
|
|
|
|
try arr.appendSlice(t);
|
|
|
|
try testing.expectError(error.Overflow, deltaDecompress(u8, arr.items));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-12 12:59:47 +03:00
|
|
|
test "compress delta decompression with an iterator" {
|
2022-03-06 09:35:18 +02:00
|
|
|
var compressed: [uvarint_tests.len]u64 = undefined;
|
|
|
|
std.mem.copy(u64, compressed[0..], uvarint_tests[0..]);
|
|
|
|
try deltaCompress(u64, compressed[0..]);
|
2022-03-04 10:37:07 +02:00
|
|
|
|
2023-02-02 17:04:21 +02:00
|
|
|
var buf = ArrayListAligned(u8, 8).init(testing.allocator);
|
2022-03-05 10:19:42 +02:00
|
|
|
defer buf.deinit();
|
2022-03-06 06:29:16 +02:00
|
|
|
try appendUvarint(&buf, compressed.len);
|
|
|
|
for (compressed) |x|
|
2022-03-05 10:19:42 +02:00
|
|
|
try appendUvarint(&buf, x);
|
|
|
|
|
2022-07-11 15:38:32 +03:00
|
|
|
var vit = try varintSliceIterator(buf.items);
|
2022-07-09 13:00:45 +03:00
|
|
|
var it = deltaDecompressionIterator(&vit);
|
2022-03-05 10:19:42 +02:00
|
|
|
var i: usize = 0;
|
2022-03-06 09:35:18 +02:00
|
|
|
try testing.expectEqual(it.remaining(), uvarint_tests.len);
|
2022-03-05 10:19:42 +02:00
|
|
|
while (try it.next()) |got| : (i += 1) {
|
|
|
|
try testing.expectEqual(uvarint_tests[i], got);
|
2022-02-23 06:47:56 +02:00
|
|
|
}
|
2022-03-05 10:19:42 +02:00
|
|
|
try testing.expectEqual(i, uvarint_tests.len);
|
2022-02-23 06:47:56 +02:00
|
|
|
}
|
2022-02-23 10:12:23 +02:00
|
|
|
|
2022-07-12 12:59:47 +03:00
|
|
|
test "compress appendUvarint" {
|
2022-03-04 10:37:07 +02:00
|
|
|
for (uvarint_tests) |x| {
|
2023-02-02 17:04:21 +02:00
|
|
|
var buf = ArrayListAligned(u8, 8).init(testing.allocator);
|
2022-03-04 10:37:07 +02:00
|
|
|
defer buf.deinit();
|
|
|
|
|
|
|
|
try appendUvarint(&buf, x);
|
|
|
|
const got = try uvarint(buf.items);
|
|
|
|
|
|
|
|
try testing.expectEqual(x, got.value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-12 12:59:47 +03:00
|
|
|
test "compress overflow" {
|
2022-02-23 10:12:23 +02:00
|
|
|
for ([_][]const u8{
|
|
|
|
&[_]u8{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2 },
|
|
|
|
&[_]u8{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0 },
|
|
|
|
&[_]u8{ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
|
|
|
|
}) |t| {
|
|
|
|
try testing.expectError(error.Overflow, uvarint(t));
|
|
|
|
}
|
|
|
|
}
|
2022-07-12 12:44:42 +03:00
|
|
|
|
|
|
|
const compress = @This();
|
|
|
|
|
|
|
|
const GroupMembersIter = struct {
|
|
|
|
vit: compress.VarintSliceIterator,
|
|
|
|
it: compress.DeltaDecompressionIterator,
|
|
|
|
total: usize,
|
|
|
|
};
|
|
|
|
|
2023-06-06 20:07:34 +03:00
|
|
|
pub fn groupMembersIter(members_slice: []const u8) error{Overflow}!GroupMembersIter {
|
|
|
|
var vit = try compress.varintSliceIterator(members_slice);
|
2022-07-12 12:44:42 +03:00
|
|
|
var it = compress.deltaDecompressionIterator(&vit);
|
|
|
|
return GroupMembersIter{
|
|
|
|
.vit = vit,
|
|
|
|
.it = it,
|
|
|
|
.total = vit.remaining,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
test "compress: trying to repro pointer change of DB.groupMembersIter" {
|
|
|
|
const members_slice = &[_]u8{ 4, 0, 60, 2, 2, 2, 64, 2 };
|
|
|
|
|
2023-06-06 20:07:34 +03:00
|
|
|
var members = try groupMembersIter(members_slice);
|
2022-07-12 12:44:42 +03:00
|
|
|
|
|
|
|
var i: usize = 0;
|
2023-06-06 20:07:34 +03:00
|
|
|
while (try members.it.next()) |member_offset| : (i += 1) {
|
2022-07-12 12:44:42 +03:00
|
|
|
_ = member_offset;
|
|
|
|
}
|
|
|
|
}
|