From ca67bf56e7332f96350d5513478097cc7a4b9541 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Mon, 28 Feb 2022 05:48:43 +0200 Subject: [PATCH] implement delta compression --- src/compress.zig | 93 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 21 deletions(-) diff --git a/src/compress.zig b/src/compress.zig index d0149c3..cfac66b 100644 --- a/src/compress.zig +++ b/src/compress.zig @@ -6,6 +6,60 @@ const std = @import("std"); +// delta compresses an incrementing sorted array of integers using delta +// compression. Sorting is in-place. +pub fn deltaCompress(comptime T: type, elems: []T) error{NotSorted}!void { + if (elems.len == 0) { + return; + } + var prev: T = elems[0]; + var i: usize = 1; + while (i < elems.len) : (i += 1) { + const cur = elems[i]; + if (cur <= prev) { + return error.NotSorted; + } + elems[i] = cur - prev - 1; + prev = cur; + } +} + +const testing = std.testing; + +test "delta compression positive tests" { + const tests = [_]struct { input: []const u8, want: []const u8 }{ + .{ .input = &[_]u8{}, .want = &[_]u8{} }, + .{ .input = &[_]u8{0}, .want = &[_]u8{0} }, + .{ .input = &[_]u8{10}, .want = &[_]u8{10} }, + .{ .input = &[_]u8{ 0, 1, 2 }, .want = &[_]u8{ 0, 0, 0 } }, + .{ .input = &[_]u8{ 0, 254, 255 }, .want = &[_]u8{ 0, 253, 0 } }, + }; + for (tests) |t| { + var arr = try std.ArrayList(u8).initCapacity( + testing.allocator, + t.input.len, + ); + defer arr.deinit(); + try arr.appendSlice(t.input); + + try deltaCompress(u8, arr.items); + try testing.expectEqualSlices(u8, arr.items, t.want); + } +} + +test "delta compression negative tests" { + for ([_][]const u8{ + &[_]u8{ 0, 0 }, + &[_]u8{ 0, 1, 1 }, + &[_]u8{ 0, 1, 2, 1 }, + }) |t| { + var arr = try std.ArrayList(u8).initCapacity(testing.allocator, t.len); + defer arr.deinit(); + try arr.appendSlice(t); + try testing.expectError(error.NotSorted, deltaCompress(u8, arr.items)); + } +} + // Represents a variable length integer that we read from a byte stream along // with how many bytes were read to decode it. pub const Varint = struct { @@ -58,28 +112,25 @@ pub fn putUvarint(buf: []u8, x: u64) usize { return i + 1; } -const testing = std.testing; - -const tests = [_]u64{ - 0, - 1, - 2, - 10, - 20, - 63, - 64, - 65, - 127, - 128, - 129, - 255, - 256, - 257, - 1 << 63 - 1, -}; - test "uvarint" { - for (tests) |x| { + const uvarint_tests = [_]u64{ + 0, + 1, + 2, + 10, + 20, + 63, + 64, + 65, + 127, + 128, + 129, + 255, + 256, + 257, + 1 << 63 - 1, + }; + for (uvarint_tests) |x| { var buf: [maxVarintLen64]u8 = undefined; const n = putUvarint(buf[0..], x); const got = try uvarint(buf[0..n]);