// // varint64 []const u8 variants // // Thanks to https://github.com/gsquire/zig-snappy/blob/master/snappy.zig and // golang's varint implementation. const std = @import("std"); const ArrayListAligned = std.ArrayListAligned; const Allocator = std.mem.Allocator; const assert = std.debug.assert; const math = std.math; // compresses a strictly incrementing sorted slice of integers using delta // compression. Compression is in-place. pub fn deltaCompress(comptime T: type, elems: []T) error{NotSorted}!void { if (elems.len <= 1) { return; } var prev: T = elems[0]; var i: usize = 1; while (i < elems.len) : (i += 1) { const cur = elems[i]; if (cur <= prev) { return error.NotSorted; } elems[i] = cur - prev - 1; prev = cur; } } // decompresses a slice compressed by deltaCompress. In-place. pub fn deltaDecompress(comptime T: type, elems: []T) error{Overflow}!void { if (elems.len <= 1) { return; } var i: usize = 1; while (i < elems.len) : (i += 1) { const x = try math.add(T, elems[i - 1], 1); elems[i] = try math.add(T, elems[i], x); } } // Represents a variable length integer that we read from a byte stream along // with how many bytes were read to decode it. pub const Varint = struct { value: u64, bytes_read: usize, }; pub const maxVarintLen64 = 10; // https://golang.org/pkg/encoding/binary/#Uvarint pub fn uvarint(buf: []const u8) error{Overflow}!Varint { var x: u64 = 0; var s: u6 = 0; for (buf, 0..) |b, i| { if (i == maxVarintLen64) // Catch byte reads past maxVarintLen64. // See issue https://golang.org/issues/41185 return error.Overflow; if (b < 0x80) { if (i == maxVarintLen64 - 1 and b > 1) { return error.Overflow; } return Varint{ .value = x | (@as(u64, b) << s), .bytes_read = i + 1, }; } x |= (@as(u64, b & 0x7f) << s); s = try math.add(u6, s, 7); } return Varint{ .value = 0, .bytes_read = 0, }; } pub fn uvarintMust(buf: []const u8) Varint { return uvarint(buf) catch |err| switch (err) { error.Overflow => unreachable, }; } // https://golang.org/pkg/encoding/binary/#PutUvarint pub fn putUvarint(buf: []u8, x: u64) usize { var i: usize = 0; var mutX = x; while (mutX >= 0x80) { buf[i] = @truncate(u8, mutX) | 0x80; mutX >>= 7; i += 1; } buf[i] = @truncate(u8, mutX); return i + 1; } // varintSliceIterator iterates over varint-encoded slice. // The first element is the length of the slice, in decoded numbers. pub const VarintSliceIterator = struct { remaining: usize, arr: []const u8, idx: usize, pub fn next(self: *VarintSliceIterator) error{Overflow}!?u64 { if (self.remaining == 0) return null; const value = try uvarint(self.arr[self.idx..]); //std.debug.print("ptr={*} idx={d:<10} arr.ptr={*}\n", .{ self, self.idx, self.arr.ptr }); self.idx += value.bytes_read; self.remaining -= 1; return value.value; } pub fn nextMust(self: *VarintSliceIterator) ?u64 { return self.next() catch |err| switch (err) { error.Overflow => unreachable, }; } // returns the number of remaining items. If called before the first // next(), returns the length of the slice. pub fn remaining(self: *const VarintSliceIterator) usize { return self.remaining; } }; pub fn varintSliceIterator(arr: []const u8) error{Overflow}!VarintSliceIterator { const firstnumber = try uvarint(arr); return VarintSliceIterator{ .remaining = firstnumber.value, .arr = arr, .idx = firstnumber.bytes_read, }; } pub fn varintSliceIteratorMust(arr: []const u8) VarintSliceIterator { return varintSliceIterator(arr) catch |err| switch (err) { error.Overflow => unreachable, }; } pub const DeltaDecompressionIterator = struct { vit: *VarintSliceIterator, prev: u64, add_to_prev: u1, pub fn next(self: *DeltaDecompressionIterator) error{Overflow}!?u64 { const current = try self.vit.next(); if (current == null) return null; const prevExtra = try math.add(u64, self.prev, self.add_to_prev); const result = try math.add(u64, current.?, prevExtra); self.prev = result; self.add_to_prev = 1; return result; } // returns the number of remaining items. If called before the first // next(), returns the length of the slice. pub fn remaining(self: *const DeltaDecompressionIterator) usize { return self.vit.remaining; } pub fn nextMust(self: *DeltaDecompressionIterator) ?u64 { return self.next() catch |err| switch (err) { error.Overflow => unreachable, }; } }; pub fn deltaDecompressionIterator(vit: *VarintSliceIterator) DeltaDecompressionIterator { return DeltaDecompressionIterator{ .vit = vit, .prev = 0, .add_to_prev = 0, }; } pub fn appendUvarint(arr: *ArrayListAligned(u8, 8), x: u64) Allocator.Error!void { var buf: [maxVarintLen64]u8 = undefined; const n = putUvarint(&buf, x); try arr.appendSlice(buf[0..n]); } const testing = std.testing; const uvarint_tests = [_]u64{ 0, 1, 2, 10, 20, 63, 64, 65, 127, 128, 129, 255, 256, 257, 1 << 63 - 1, }; test "compress putUvarint/uvarint" { for (uvarint_tests) |x| { var buf: [maxVarintLen64]u8 = undefined; const n = putUvarint(buf[0..], x); const got = try uvarint(buf[0..n]); try testing.expectEqual(x, got.value); try testing.expectEqual(n, got.bytes_read); } } test "compress varintSliceIterator" { var buf = ArrayListAligned(u8, 8).init(testing.allocator); defer buf.deinit(); try appendUvarint(&buf, uvarint_tests.len); for (uvarint_tests) |x| try appendUvarint(&buf, x); var it = try varintSliceIterator(buf.items); var i: usize = 0; while (try it.next()) |got| : (i += 1) { try testing.expectEqual(uvarint_tests[i], got); } try testing.expectEqual(i, uvarint_tests.len); } test "compress delta compress/decompress" { const tests = [_]struct { input: []const u8, want: []const u8 }{ .{ .input = &[_]u8{}, .want = &[_]u8{} }, .{ .input = &[_]u8{0}, .want = &[_]u8{0} }, .{ .input = &[_]u8{10}, .want = &[_]u8{10} }, .{ .input = &[_]u8{ 0, 1, 2 }, .want = &[_]u8{ 0, 0, 0 } }, .{ .input = &[_]u8{ 10, 20, 30, 255 }, .want = &[_]u8{ 10, 9, 9, 224 } }, .{ .input = &[_]u8{ 0, 254, 255 }, .want = &[_]u8{ 0, 253, 0 } }, }; for (tests) |t| { var arr = try ArrayListAligned(u8, 8).initCapacity( testing.allocator, t.input.len, ); defer arr.deinit(); try arr.appendSlice(t.input); try deltaCompress(u8, arr.items); try testing.expectEqualSlices(u8, arr.items, t.want); try deltaDecompress(u8, arr.items); try testing.expectEqualSlices(u8, arr.items, t.input); } } test "compress delta compression with varint tests" { var scratch: [uvarint_tests.len]u64 = undefined; std.mem.copy(u64, scratch[0..], uvarint_tests[0..]); try deltaCompress(u64, scratch[0..]); try deltaDecompress(u64, scratch[0..]); try testing.expectEqualSlices(u64, uvarint_tests[0..], scratch[0..]); } test "compress delta compression negative tests" { for ([_][]const u8{ &[_]u8{ 0, 0 }, &[_]u8{ 0, 1, 1 }, &[_]u8{ 0, 1, 2, 1 }, }) |t| { var arr = try ArrayListAligned(u8, 8).initCapacity(testing.allocator, t.len); defer arr.deinit(); try arr.appendSlice(t); try testing.expectError(error.NotSorted, deltaCompress(u8, arr.items)); } } test "compress delta decompress overflow" { for ([_][]const u8{ &[_]u8{ 255, 0 }, &[_]u8{ 0, 128, 127 }, }) |t| { var arr = try ArrayListAligned(u8, 8).initCapacity(testing.allocator, t.len); defer arr.deinit(); try arr.appendSlice(t); try testing.expectError(error.Overflow, deltaDecompress(u8, arr.items)); } } test "compress delta decompression with an iterator" { var compressed: [uvarint_tests.len]u64 = undefined; std.mem.copy(u64, compressed[0..], uvarint_tests[0..]); try deltaCompress(u64, compressed[0..]); var buf = ArrayListAligned(u8, 8).init(testing.allocator); defer buf.deinit(); try appendUvarint(&buf, compressed.len); for (compressed) |x| try appendUvarint(&buf, x); var vit = try varintSliceIterator(buf.items); var it = deltaDecompressionIterator(&vit); var i: usize = 0; try testing.expectEqual(it.remaining(), uvarint_tests.len); while (try it.next()) |got| : (i += 1) { try testing.expectEqual(uvarint_tests[i], got); } try testing.expectEqual(i, uvarint_tests.len); } test "compress appendUvarint" { for (uvarint_tests) |x| { var buf = ArrayListAligned(u8, 8).init(testing.allocator); defer buf.deinit(); try appendUvarint(&buf, x); const got = try uvarint(buf.items); try testing.expectEqual(x, got.value); } } test "compress overflow" { for ([_][]const u8{ &[_]u8{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x2 }, &[_]u8{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1, 0, 0 }, &[_]u8{ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }, }) |t| { try testing.expectError(error.Overflow, uvarint(t)); } } const compress = @This(); const GroupMembersIter = struct { vit: compress.VarintSliceIterator, it: compress.DeltaDecompressionIterator, total: usize, }; pub fn groupMembersIter(members_slice: []const u8) GroupMembersIter { var vit = compress.varintSliceIteratorMust(members_slice); var it = compress.deltaDecompressionIterator(&vit); return GroupMembersIter{ .vit = vit, .it = it, .total = vit.remaining, }; } test "compress: trying to repro pointer change of DB.groupMembersIter" { const members_slice = &[_]u8{ 4, 0, 60, 2, 2, 2, 64, 2 }; var members = groupMembersIter(members_slice); var i: usize = 0; while (members.it.nextMust()) |member_offset| : (i += 1) { _ = member_offset; //std.debug.print("member_offset: {d}\n", .{member_offset}); } }