1
Fork 0
turbonss/src/compress.zig

152 lines
3.9 KiB
Zig
Raw Normal View History

2022-02-23 06:47:56 +02:00
//
// varint64 []const u8 variants
//
// Thanks to https://github.com/gsquire/zig-snappy/blob/master/snappy.zig and golang's
// varint implementation.
const std = @import("std");
2022-02-28 05:48:43 +02:00
// delta compresses an incrementing sorted array of integers using delta
// compression. Sorting is in-place.
pub fn deltaCompress(comptime T: type, elems: []T) error{NotSorted}!void {
if (elems.len == 0) {
return;
}
var prev: T = elems[0];
var i: usize = 1;
while (i < elems.len) : (i += 1) {
const cur = elems[i];
if (cur <= prev) {
return error.NotSorted;
}
elems[i] = cur - prev - 1;
prev = cur;
}
}
const testing = std.testing;
test "delta compression positive tests" {
const tests = [_]struct { input: []const u8, want: []const u8 }{
.{ .input = &[_]u8{}, .want = &[_]u8{} },
.{ .input = &[_]u8{0}, .want = &[_]u8{0} },
.{ .input = &[_]u8{10}, .want = &[_]u8{10} },
.{ .input = &[_]u8{ 0, 1, 2 }, .want = &[_]u8{ 0, 0, 0 } },
.{ .input = &[_]u8{ 0, 254, 255 }, .want = &[_]u8{ 0, 253, 0 } },
};
for (tests) |t| {
var arr = try std.ArrayList(u8).initCapacity(
testing.allocator,
t.input.len,
);
defer arr.deinit();
try arr.appendSlice(t.input);
try deltaCompress(u8, arr.items);
try testing.expectEqualSlices(u8, arr.items, t.want);
}
}
test "delta compression negative tests" {
for ([_][]const u8{
&[_]u8{ 0, 0 },
&[_]u8{ 0, 1, 1 },
&[_]u8{ 0, 1, 2, 1 },
}) |t| {
var arr = try std.ArrayList(u8).initCapacity(testing.allocator, t.len);
defer arr.deinit();
try arr.appendSlice(t);
try testing.expectError(error.NotSorted, deltaCompress(u8, arr.items));
}
}
2022-02-23 06:47:56 +02:00
// Represents a variable length integer that we read from a byte stream along
// with how many bytes were read to decode it.
pub const Varint = struct {
value: u64,
bytesRead: usize,
};
2022-02-27 15:49:00 +02:00
const maxVarintLen64 = 10;
2022-02-23 06:47:56 +02:00
// https://golang.org/pkg/encoding/binary/#Uvarint
pub fn uvarint(buf: []const u8) error{Overflow}!Varint {
var x: u64 = 0;
var s: u6 = 0;
for (buf) |b, i| {
2022-02-27 15:49:00 +02:00
if (i == maxVarintLen64) {
// Catch byte reads past maxVarintLen64.
2022-02-23 06:47:56 +02:00
// See issue https://golang.org/issues/41185
return error.Overflow;
}
if (b < 0x80) {
2022-02-27 15:49:00 +02:00
if (i == maxVarintLen64 - 1 and b > 1) {
2022-02-23 06:47:56 +02:00
return error.Overflow;
}
return Varint{ .value = x | (@as(u64, b) << s), .bytesRead = i + 1 };
}
x |= (@as(u64, b & 0x7f) << s);
2022-02-23 10:12:23 +02:00
s = try std.math.add(u6, s, 7);
2022-02-23 06:47:56 +02:00
}
return Varint{
.value = 0,
.bytesRead = 0,
};
}
// https://golang.org/pkg/encoding/binary/#PutUvarint
pub fn putUvarint(buf: []u8, x: u64) usize {
var i: usize = 0;
var mutX = x;
while (mutX >= 0x80) {
buf[i] = @truncate(u8, mutX) | 0x80;
mutX >>= 7;
i += 1;
}
buf[i] = @truncate(u8, mutX);
return i + 1;
}
test "uvarint" {
2022-02-28 05:48:43 +02:00
const uvarint_tests = [_]u64{
0,
1,
2,
10,
20,
63,
64,
65,
127,
128,
129,
255,
256,
257,
1 << 63 - 1,
};
for (uvarint_tests) |x| {
2022-02-27 15:49:00 +02:00
var buf: [maxVarintLen64]u8 = undefined;
2022-02-23 06:47:56 +02:00
const n = putUvarint(buf[0..], x);
const got = try uvarint(buf[0..n]);
try testing.expectEqual(x, got.value);
try testing.expectEqual(n, got.bytesRead);
}
}
2022-02-23 10:12:23 +02:00
test "overflow" {
for ([_][]const u8{
&[_]u8{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2 },
&[_]u8{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0 },
&[_]u8{ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
}) |t| {
try testing.expectError(error.Overflow, uvarint(t));
}
}