1
Fork 0

varint iterator

This commit is contained in:
Motiejus Jakštys 2022-03-05 10:19:42 +02:00 committed by Motiejus Jakštys
parent e17b2c9641
commit ec95b231fa
2 changed files with 127 additions and 65 deletions

View File

@ -7,6 +7,7 @@ const std = @import("std");
const ArrayList = std.ArrayList;
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
// compresses a strictly incrementing sorted slice of integers using delta
// compression. Compression is in-place.
@ -39,6 +40,93 @@ pub fn deltaDecompress(comptime T: type, elems: []T) error{Overflow}!void {
}
}
// Represents a variable length integer that we read from a byte stream along
// with how many bytes were read to decode it.
pub const Varint = struct {
value: u64,
bytes_read: usize,
};
pub const maxVarintLen64 = 10;
// https://golang.org/pkg/encoding/binary/#Uvarint
pub fn uvarint(buf: []const u8) error{Overflow}!Varint {
var x: u64 = 0;
var s: u6 = 0;
for (buf) |b, i| {
if (i == maxVarintLen64)
// Catch byte reads past maxVarintLen64.
// See issue https://golang.org/issues/41185
return error.Overflow;
if (b < 0x80) {
if (i == maxVarintLen64 - 1 and b > 1) {
return error.Overflow;
}
return Varint{
.value = x | (@as(u64, b) << s),
.bytes_read = i + 1,
};
}
x |= (@as(u64, b & 0x7f) << s);
s = try std.math.add(u6, s, 7);
}
return Varint{
.value = 0,
.bytes_read = 0,
};
}
// https://golang.org/pkg/encoding/binary/#PutUvarint
pub fn putUvarint(buf: []u8, x: u64) usize {
var i: usize = 0;
var mutX = x;
while (mutX >= 0x80) {
buf[i] = @truncate(u8, mutX) | 0x80;
mutX >>= 7;
i += 1;
}
buf[i] = @truncate(u8, mutX);
return i + 1;
}
// VarintSliceIterator iterates over varint-encoded slice.
// The first element is the length of the slice, in decoded numbers.
const varintSliceIterator = struct {
remaining: usize,
arr: []const u8,
idx: usize,
pub fn next(it: *varintSliceIterator) error{Overflow}!?u64 {
if (it.remaining == 0)
return null;
const value = try uvarint(it.arr[it.idx..]);
it.idx += value.bytes_read;
it.remaining -= 1;
return value.value;
}
};
pub fn VarintSliceIterator(arr: []const u8) error{Overflow}!varintSliceIterator {
const firstnumber = try uvarint(arr);
return varintSliceIterator{
.remaining = firstnumber.value,
.arr = arr,
.idx = firstnumber.bytes_read,
};
}
pub fn appendUvarint(arr: *ArrayList(u8), x: u64) Allocator.Error!void {
var buf: [maxVarintLen64]u8 = undefined;
const n = putUvarint(&buf, x);
try arr.appendSlice(buf[0..n]);
}
const testing = std.testing;
test "delta compress/decompress" {
@ -91,66 +179,6 @@ test "delta decompress overflow" {
}
}
// Represents a variable length integer that we read from a byte stream along
// with how many bytes were read to decode it.
pub const Varint = struct {
value: u64,
bytesRead: usize,
};
pub const maxVarintLen64 = 10;
// https://golang.org/pkg/encoding/binary/#Uvarint
pub fn uvarint(buf: []const u8) error{Overflow}!Varint {
var x: u64 = 0;
var s: u6 = 0;
for (buf) |b, i| {
if (i == maxVarintLen64)
// Catch byte reads past maxVarintLen64.
// See issue https://golang.org/issues/41185
return error.Overflow;
if (b < 0x80) {
if (i == maxVarintLen64 - 1 and b > 1) {
return error.Overflow;
}
return Varint{
.value = x | (@as(u64, b) << s),
.bytesRead = i + 1,
};
}
x |= (@as(u64, b & 0x7f) << s);
s = try std.math.add(u6, s, 7);
}
return Varint{
.value = 0,
.bytesRead = 0,
};
}
// https://golang.org/pkg/encoding/binary/#PutUvarint
pub fn putUvarint(buf: []u8, x: u64) usize {
var i: usize = 0;
var mutX = x;
while (mutX >= 0x80) {
buf[i] = @truncate(u8, mutX) | 0x80;
mutX >>= 7;
i += 1;
}
buf[i] = @truncate(u8, mutX);
return i + 1;
}
pub fn appendUvarint(arr: *ArrayList(u8), x: u64) Allocator.Error!void {
var buf: [maxVarintLen64]u8 = undefined;
const n = putUvarint(&buf, x);
try arr.appendSlice(buf[0..n]);
}
const uvarint_tests = [_]u64{
0,
1,
@ -176,10 +204,25 @@ test "putUvarint/uvarint" {
const got = try uvarint(buf[0..n]);
try testing.expectEqual(x, got.value);
try testing.expectEqual(n, got.bytesRead);
try testing.expectEqual(n, got.bytes_read);
}
}
test "VarintSliceIterator" {
var buf = ArrayList(u8).init(testing.allocator);
defer buf.deinit();
try appendUvarint(&buf, uvarint_tests.len);
for (uvarint_tests) |x|
try appendUvarint(&buf, x);
var it = try VarintSliceIterator(buf.items);
var i: usize = 0;
while (try it.next()) |got| : (i += 1) {
try testing.expectEqual(uvarint_tests[i], got);
}
try testing.expectEqual(i, uvarint_tests.len);
}
test "appendUvarint" {
for (uvarint_tests) |x| {
var buf = ArrayList(u8).init(testing.allocator);

View File

@ -263,13 +263,15 @@ pub const usersSectionErr = Allocator.Error || error{Overflow};
pub fn usersSection(
allocator: Allocator,
corpus: *const Corpus,
gids: UserGids,
shells: ShellSections,
gids: *const UserGids,
shells: *const ShellSections,
) usersSectionErr![]const u8 {
_ = allocator;
_ = corpus;
// as of writing each user takes 15 bytes + strings + padding, padded to
// 8 bytes. 24 is a very optimistic lower bound.
var buf = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len);
_ = gids;
_ = shells;
return buf.toOwnedSlice();
}
pub fn groupMembers(allocator: Allocator, corpus: *const Corpus) Allocator.Error!void {
@ -447,6 +449,23 @@ test "test sections" {
var user_gids = try userGids(allocator, &corpus);
defer user_gids.deinit(allocator);
var users_section = try usersSection(
allocator,
&corpus,
&user_gids,
&shell_sections,
);
defer allocator.free(users_section);
}
test "userGids" {
const allocator = testing.allocator;
var corpus = try testCorpus(allocator);
defer corpus.deinit();
var user_gids = try userGids(allocator, &corpus);
defer user_gids.deinit(allocator);
}
test "pack gids" {