turbonss/lib/compress.zig

336 lines
9.4 KiB
Zig
Raw Normal View History

2022-02-23 06:47:56 +02:00
//
// varint64 []const u8 variants
//
// Thanks to https://github.com/gsquire/zig-snappy/blob/master/snappy.zig and
// golang's varint implementation.
2022-02-23 06:47:56 +02:00
const std = @import("std");
2022-03-04 10:37:07 +02:00
const ArrayList = std.ArrayList;
const Allocator = std.mem.Allocator;
2022-03-05 10:19:42 +02:00
const assert = std.debug.assert;
2022-03-06 06:29:16 +02:00
const math = std.math;
2022-03-04 10:37:07 +02:00
2022-02-28 06:02:16 +02:00
// compresses a strictly incrementing sorted slice of integers using delta
// compression. Compression is in-place.
2022-02-28 05:48:43 +02:00
pub fn deltaCompress(comptime T: type, elems: []T) error{NotSorted}!void {
2022-02-28 06:02:16 +02:00
if (elems.len <= 1) {
2022-02-28 05:48:43 +02:00
return;
}
var prev: T = elems[0];
var i: usize = 1;
while (i < elems.len) : (i += 1) {
const cur = elems[i];
if (cur <= prev) {
return error.NotSorted;
}
elems[i] = cur - prev - 1;
prev = cur;
}
}
2022-02-28 06:02:16 +02:00
// decompresses a slice compressed by deltaCompress. In-place.
pub fn deltaDecompress(comptime T: type, elems: []T) error{Overflow}!void {
if (elems.len <= 1) {
return;
}
var i: usize = 1;
while (i < elems.len) : (i += 1) {
2022-03-06 06:29:16 +02:00
const x = try math.add(T, elems[i - 1], 1);
elems[i] = try math.add(T, elems[i], x);
2022-02-28 06:02:16 +02:00
}
}
2022-02-23 06:47:56 +02:00
// Represents a variable length integer that we read from a byte stream along
// with how many bytes were read to decode it.
pub const Varint = struct {
value: u64,
2022-03-05 10:19:42 +02:00
bytes_read: usize,
2022-02-23 06:47:56 +02:00
};
pub const maxVarintLen64 = 10;
2022-02-23 06:47:56 +02:00
// https://golang.org/pkg/encoding/binary/#Uvarint
pub fn uvarint(buf: []const u8) error{Overflow}!Varint {
var x: u64 = 0;
var s: u6 = 0;
for (buf) |b, i| {
2022-03-02 06:18:19 +02:00
if (i == maxVarintLen64)
2022-02-27 15:49:00 +02:00
// Catch byte reads past maxVarintLen64.
2022-02-23 06:47:56 +02:00
// See issue https://golang.org/issues/41185
return error.Overflow;
if (b < 0x80) {
2022-02-27 15:49:00 +02:00
if (i == maxVarintLen64 - 1 and b > 1) {
2022-02-23 06:47:56 +02:00
return error.Overflow;
}
2022-03-02 06:18:19 +02:00
return Varint{
.value = x | (@as(u64, b) << s),
2022-03-05 10:19:42 +02:00
.bytes_read = i + 1,
2022-03-02 06:18:19 +02:00
};
2022-02-23 06:47:56 +02:00
}
x |= (@as(u64, b & 0x7f) << s);
2022-03-06 06:29:16 +02:00
s = try math.add(u6, s, 7);
2022-02-23 06:47:56 +02:00
}
return Varint{
.value = 0,
2022-03-05 10:19:42 +02:00
.bytes_read = 0,
2022-02-23 06:47:56 +02:00
};
}
2022-04-08 15:05:56 +03:00
pub fn uvarintMust(buf: []const u8) Varint {
return uvarint(buf) catch |err| switch (err) {
error.Overflow => unreachable,
};
}
2022-02-23 06:47:56 +02:00
// https://golang.org/pkg/encoding/binary/#PutUvarint
pub fn putUvarint(buf: []u8, x: u64) usize {
var i: usize = 0;
var mutX = x;
while (mutX >= 0x80) {
buf[i] = @truncate(u8, mutX) | 0x80;
mutX >>= 7;
i += 1;
}
buf[i] = @truncate(u8, mutX);
return i + 1;
}
2022-03-05 10:19:42 +02:00
// VarintSliceIterator iterates over varint-encoded slice.
// The first element is the length of the slice, in decoded numbers.
const varintSliceIterator = struct {
remaining: usize,
arr: []const u8,
idx: usize,
pub fn next(self: *varintSliceIterator) error{Overflow}!?u64 {
if (self.remaining == 0)
2022-03-05 10:19:42 +02:00
return null;
const value = try uvarint(self.arr[self.idx..]);
self.idx += value.bytes_read;
self.remaining -= 1;
2022-03-05 10:19:42 +02:00
return value.value;
}
2022-04-07 12:23:50 +03:00
pub fn nextMust(self: *varintSliceIterator) ?u64 {
return self.next() catch |err| switch (err) {
error.Overflow => unreachable,
};
}
// returns the number of remaining items. If called before the first
// next(), returns the length of the slice.
pub fn remaining(self: *const varintSliceIterator) usize {
return self.remaining;
}
2022-03-05 10:19:42 +02:00
};
pub fn VarintSliceIterator(arr: []const u8) error{Overflow}!varintSliceIterator {
const firstnumber = try uvarint(arr);
return varintSliceIterator{
.remaining = firstnumber.value,
.arr = arr,
.idx = firstnumber.bytes_read,
};
}
2022-04-07 12:23:50 +03:00
pub fn VarintSliceIteratorMust(arr: []const u8) varintSliceIterator {
return VarintSliceIterator(arr) catch |err| switch (err) {
error.Overflow => unreachable,
};
}
2022-03-06 06:29:16 +02:00
const deltaDecompressionIterator = struct {
vit: *varintSliceIterator,
prev: u64,
add_to_prev: u1,
pub fn next(self: *deltaDecompressionIterator) error{Overflow}!?u64 {
const current = try self.vit.next();
2022-03-06 06:29:16 +02:00
if (current == null) return null;
const prevExtra = try math.add(u64, self.prev, self.add_to_prev);
2022-03-06 06:29:16 +02:00
const result = try math.add(u64, current.?, prevExtra);
self.prev = result;
self.add_to_prev = 1;
2022-03-06 06:29:16 +02:00
return result;
}
// returns the number of remaining items. If called before the first
// next(), returns the length of the slice.
pub fn remaining(self: *const deltaDecompressionIterator) usize {
return self.vit.remaining;
}
2022-04-16 05:36:49 +03:00
pub fn nextMust(self: *deltaDecompressionIterator) ?u64 {
return self.next() catch |err| switch (err) {
error.Overflow => unreachable,
};
}
2022-03-06 06:29:16 +02:00
};
pub fn DeltaDecompressionIterator(vit: *varintSliceIterator) deltaDecompressionIterator {
return deltaDecompressionIterator{
.vit = vit,
.prev = 0,
.add_to_prev = 0,
};
}
2022-03-04 10:37:07 +02:00
pub fn appendUvarint(arr: *ArrayList(u8), x: u64) Allocator.Error!void {
var buf: [maxVarintLen64]u8 = undefined;
const n = putUvarint(&buf, x);
try arr.appendSlice(buf[0..n]);
}
2022-03-05 10:19:42 +02:00
const testing = std.testing;
2022-03-06 06:29:16 +02:00
const uvarint_tests = [_]u64{
0,
1,
2,
10,
20,
63,
64,
65,
127,
128,
129,
255,
256,
257,
1 << 63 - 1,
};
test "putUvarint/uvarint" {
for (uvarint_tests) |x| {
var buf: [maxVarintLen64]u8 = undefined;
const n = putUvarint(buf[0..], x);
const got = try uvarint(buf[0..n]);
try testing.expectEqual(x, got.value);
try testing.expectEqual(n, got.bytes_read);
}
}
test "VarintSliceIterator" {
var buf = ArrayList(u8).init(testing.allocator);
defer buf.deinit();
try appendUvarint(&buf, uvarint_tests.len);
for (uvarint_tests) |x|
try appendUvarint(&buf, x);
var it = try VarintSliceIterator(buf.items);
var i: usize = 0;
while (try it.next()) |got| : (i += 1) {
try testing.expectEqual(uvarint_tests[i], got);
}
try testing.expectEqual(i, uvarint_tests.len);
}
2022-03-05 10:19:42 +02:00
test "delta compress/decompress" {
const tests = [_]struct { input: []const u8, want: []const u8 }{
.{ .input = &[_]u8{}, .want = &[_]u8{} },
.{ .input = &[_]u8{0}, .want = &[_]u8{0} },
.{ .input = &[_]u8{10}, .want = &[_]u8{10} },
.{ .input = &[_]u8{ 0, 1, 2 }, .want = &[_]u8{ 0, 0, 0 } },
.{ .input = &[_]u8{ 10, 20, 30, 255 }, .want = &[_]u8{ 10, 9, 9, 224 } },
.{ .input = &[_]u8{ 0, 254, 255 }, .want = &[_]u8{ 0, 253, 0 } },
};
for (tests) |t| {
var arr = try ArrayList(u8).initCapacity(
testing.allocator,
t.input.len,
);
defer arr.deinit();
try arr.appendSlice(t.input);
try deltaCompress(u8, arr.items);
try testing.expectEqualSlices(u8, arr.items, t.want);
try deltaDecompress(u8, arr.items);
try testing.expectEqualSlices(u8, arr.items, t.input);
}
}
2022-03-06 06:29:16 +02:00
test "delta compression with varint tests" {
var scratch: [uvarint_tests.len]u64 = undefined;
std.mem.copy(u64, scratch[0..], uvarint_tests[0..]);
try deltaCompress(u64, scratch[0..]);
try deltaDecompress(u64, scratch[0..]);
try testing.expectEqualSlices(u64, uvarint_tests[0..], scratch[0..]);
}
2022-03-05 10:19:42 +02:00
test "delta compression negative tests" {
for ([_][]const u8{
&[_]u8{ 0, 0 },
&[_]u8{ 0, 1, 1 },
&[_]u8{ 0, 1, 2, 1 },
}) |t| {
var arr = try ArrayList(u8).initCapacity(testing.allocator, t.len);
defer arr.deinit();
try arr.appendSlice(t);
try testing.expectError(error.NotSorted, deltaCompress(u8, arr.items));
}
}
test "delta decompress overflow" {
for ([_][]const u8{
&[_]u8{ 255, 0 },
&[_]u8{ 0, 128, 127 },
}) |t| {
var arr = try ArrayList(u8).initCapacity(testing.allocator, t.len);
defer arr.deinit();
try arr.appendSlice(t);
try testing.expectError(error.Overflow, deltaDecompress(u8, arr.items));
}
}
2022-03-06 06:29:16 +02:00
test "delta decompression with an iterator" {
var compressed: [uvarint_tests.len]u64 = undefined;
std.mem.copy(u64, compressed[0..], uvarint_tests[0..]);
try deltaCompress(u64, compressed[0..]);
2022-03-04 10:37:07 +02:00
2022-03-05 10:19:42 +02:00
var buf = ArrayList(u8).init(testing.allocator);
defer buf.deinit();
2022-03-06 06:29:16 +02:00
try appendUvarint(&buf, compressed.len);
for (compressed) |x|
2022-03-05 10:19:42 +02:00
try appendUvarint(&buf, x);
2022-03-06 06:29:16 +02:00
var it = DeltaDecompressionIterator(&try VarintSliceIterator(buf.items));
2022-03-05 10:19:42 +02:00
var i: usize = 0;
try testing.expectEqual(it.remaining(), uvarint_tests.len);
2022-03-05 10:19:42 +02:00
while (try it.next()) |got| : (i += 1) {
try testing.expectEqual(uvarint_tests[i], got);
2022-02-23 06:47:56 +02:00
}
2022-03-05 10:19:42 +02:00
try testing.expectEqual(i, uvarint_tests.len);
2022-02-23 06:47:56 +02:00
}
2022-02-23 10:12:23 +02:00
2022-03-04 10:37:07 +02:00
test "appendUvarint" {
for (uvarint_tests) |x| {
var buf = ArrayList(u8).init(testing.allocator);
defer buf.deinit();
try appendUvarint(&buf, x);
const got = try uvarint(buf.items);
try testing.expectEqual(x, got.value);
}
}
2022-02-23 10:12:23 +02:00
test "overflow" {
for ([_][]const u8{
&[_]u8{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2 },
&[_]u8{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0 },
&[_]u8{ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
}) |t| {
try testing.expectError(error.Overflow, uvarint(t));
}
}