varint iterator
This commit is contained in:
parent
e17b2c9641
commit
ec95b231fa
165
src/compress.zig
165
src/compress.zig
@ -7,6 +7,7 @@ const std = @import("std");
|
||||
|
||||
const ArrayList = std.ArrayList;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const assert = std.debug.assert;
|
||||
|
||||
// compresses a strictly incrementing sorted slice of integers using delta
|
||||
// compression. Compression is in-place.
|
||||
@ -39,6 +40,93 @@ pub fn deltaDecompress(comptime T: type, elems: []T) error{Overflow}!void {
|
||||
}
|
||||
}
|
||||
|
||||
// Represents a variable length integer that we read from a byte stream along
|
||||
// with how many bytes were read to decode it.
|
||||
pub const Varint = struct {
|
||||
value: u64,
|
||||
bytes_read: usize,
|
||||
};
|
||||
|
||||
pub const maxVarintLen64 = 10;
|
||||
|
||||
// https://golang.org/pkg/encoding/binary/#Uvarint
|
||||
pub fn uvarint(buf: []const u8) error{Overflow}!Varint {
|
||||
var x: u64 = 0;
|
||||
var s: u6 = 0;
|
||||
|
||||
for (buf) |b, i| {
|
||||
if (i == maxVarintLen64)
|
||||
// Catch byte reads past maxVarintLen64.
|
||||
// See issue https://golang.org/issues/41185
|
||||
return error.Overflow;
|
||||
|
||||
if (b < 0x80) {
|
||||
if (i == maxVarintLen64 - 1 and b > 1) {
|
||||
return error.Overflow;
|
||||
}
|
||||
return Varint{
|
||||
.value = x | (@as(u64, b) << s),
|
||||
.bytes_read = i + 1,
|
||||
};
|
||||
}
|
||||
x |= (@as(u64, b & 0x7f) << s);
|
||||
s = try std.math.add(u6, s, 7);
|
||||
}
|
||||
|
||||
return Varint{
|
||||
.value = 0,
|
||||
.bytes_read = 0,
|
||||
};
|
||||
}
|
||||
|
||||
// https://golang.org/pkg/encoding/binary/#PutUvarint
|
||||
pub fn putUvarint(buf: []u8, x: u64) usize {
|
||||
var i: usize = 0;
|
||||
var mutX = x;
|
||||
|
||||
while (mutX >= 0x80) {
|
||||
buf[i] = @truncate(u8, mutX) | 0x80;
|
||||
mutX >>= 7;
|
||||
i += 1;
|
||||
}
|
||||
buf[i] = @truncate(u8, mutX);
|
||||
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
// VarintSliceIterator iterates over varint-encoded slice.
|
||||
// The first element is the length of the slice, in decoded numbers.
|
||||
const varintSliceIterator = struct {
|
||||
remaining: usize,
|
||||
arr: []const u8,
|
||||
idx: usize,
|
||||
|
||||
pub fn next(it: *varintSliceIterator) error{Overflow}!?u64 {
|
||||
if (it.remaining == 0)
|
||||
return null;
|
||||
const value = try uvarint(it.arr[it.idx..]);
|
||||
it.idx += value.bytes_read;
|
||||
it.remaining -= 1;
|
||||
return value.value;
|
||||
}
|
||||
};
|
||||
|
||||
pub fn VarintSliceIterator(arr: []const u8) error{Overflow}!varintSliceIterator {
|
||||
const firstnumber = try uvarint(arr);
|
||||
|
||||
return varintSliceIterator{
|
||||
.remaining = firstnumber.value,
|
||||
.arr = arr,
|
||||
.idx = firstnumber.bytes_read,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn appendUvarint(arr: *ArrayList(u8), x: u64) Allocator.Error!void {
|
||||
var buf: [maxVarintLen64]u8 = undefined;
|
||||
const n = putUvarint(&buf, x);
|
||||
try arr.appendSlice(buf[0..n]);
|
||||
}
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
test "delta compress/decompress" {
|
||||
@ -91,66 +179,6 @@ test "delta decompress overflow" {
|
||||
}
|
||||
}
|
||||
|
||||
// Represents a variable length integer that we read from a byte stream along
|
||||
// with how many bytes were read to decode it.
|
||||
pub const Varint = struct {
|
||||
value: u64,
|
||||
bytesRead: usize,
|
||||
};
|
||||
|
||||
pub const maxVarintLen64 = 10;
|
||||
|
||||
// https://golang.org/pkg/encoding/binary/#Uvarint
|
||||
pub fn uvarint(buf: []const u8) error{Overflow}!Varint {
|
||||
var x: u64 = 0;
|
||||
var s: u6 = 0;
|
||||
|
||||
for (buf) |b, i| {
|
||||
if (i == maxVarintLen64)
|
||||
// Catch byte reads past maxVarintLen64.
|
||||
// See issue https://golang.org/issues/41185
|
||||
return error.Overflow;
|
||||
|
||||
if (b < 0x80) {
|
||||
if (i == maxVarintLen64 - 1 and b > 1) {
|
||||
return error.Overflow;
|
||||
}
|
||||
return Varint{
|
||||
.value = x | (@as(u64, b) << s),
|
||||
.bytesRead = i + 1,
|
||||
};
|
||||
}
|
||||
x |= (@as(u64, b & 0x7f) << s);
|
||||
s = try std.math.add(u6, s, 7);
|
||||
}
|
||||
|
||||
return Varint{
|
||||
.value = 0,
|
||||
.bytesRead = 0,
|
||||
};
|
||||
}
|
||||
|
||||
// https://golang.org/pkg/encoding/binary/#PutUvarint
|
||||
pub fn putUvarint(buf: []u8, x: u64) usize {
|
||||
var i: usize = 0;
|
||||
var mutX = x;
|
||||
|
||||
while (mutX >= 0x80) {
|
||||
buf[i] = @truncate(u8, mutX) | 0x80;
|
||||
mutX >>= 7;
|
||||
i += 1;
|
||||
}
|
||||
buf[i] = @truncate(u8, mutX);
|
||||
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
pub fn appendUvarint(arr: *ArrayList(u8), x: u64) Allocator.Error!void {
|
||||
var buf: [maxVarintLen64]u8 = undefined;
|
||||
const n = putUvarint(&buf, x);
|
||||
try arr.appendSlice(buf[0..n]);
|
||||
}
|
||||
|
||||
const uvarint_tests = [_]u64{
|
||||
0,
|
||||
1,
|
||||
@ -176,10 +204,25 @@ test "putUvarint/uvarint" {
|
||||
const got = try uvarint(buf[0..n]);
|
||||
|
||||
try testing.expectEqual(x, got.value);
|
||||
try testing.expectEqual(n, got.bytesRead);
|
||||
try testing.expectEqual(n, got.bytes_read);
|
||||
}
|
||||
}
|
||||
|
||||
test "VarintSliceIterator" {
|
||||
var buf = ArrayList(u8).init(testing.allocator);
|
||||
defer buf.deinit();
|
||||
try appendUvarint(&buf, uvarint_tests.len);
|
||||
for (uvarint_tests) |x|
|
||||
try appendUvarint(&buf, x);
|
||||
|
||||
var it = try VarintSliceIterator(buf.items);
|
||||
var i: usize = 0;
|
||||
while (try it.next()) |got| : (i += 1) {
|
||||
try testing.expectEqual(uvarint_tests[i], got);
|
||||
}
|
||||
try testing.expectEqual(i, uvarint_tests.len);
|
||||
}
|
||||
|
||||
test "appendUvarint" {
|
||||
for (uvarint_tests) |x| {
|
||||
var buf = ArrayList(u8).init(testing.allocator);
|
||||
|
@ -263,13 +263,15 @@ pub const usersSectionErr = Allocator.Error || error{Overflow};
|
||||
pub fn usersSection(
|
||||
allocator: Allocator,
|
||||
corpus: *const Corpus,
|
||||
gids: UserGids,
|
||||
shells: ShellSections,
|
||||
gids: *const UserGids,
|
||||
shells: *const ShellSections,
|
||||
) usersSectionErr![]const u8 {
|
||||
_ = allocator;
|
||||
_ = corpus;
|
||||
// as of writing each user takes 15 bytes + strings + padding, padded to
|
||||
// 8 bytes. 24 is a very optimistic lower bound.
|
||||
var buf = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len);
|
||||
_ = gids;
|
||||
_ = shells;
|
||||
return buf.toOwnedSlice();
|
||||
}
|
||||
|
||||
pub fn groupMembers(allocator: Allocator, corpus: *const Corpus) Allocator.Error!void {
|
||||
@ -447,6 +449,23 @@ test "test sections" {
|
||||
|
||||
var user_gids = try userGids(allocator, &corpus);
|
||||
defer user_gids.deinit(allocator);
|
||||
|
||||
var users_section = try usersSection(
|
||||
allocator,
|
||||
&corpus,
|
||||
&user_gids,
|
||||
&shell_sections,
|
||||
);
|
||||
defer allocator.free(users_section);
|
||||
}
|
||||
|
||||
test "userGids" {
|
||||
const allocator = testing.allocator;
|
||||
var corpus = try testCorpus(allocator);
|
||||
defer corpus.deinit();
|
||||
|
||||
var user_gids = try userGids(allocator, &corpus);
|
||||
defer user_gids.deinit(allocator);
|
||||
}
|
||||
|
||||
test "pack gids" {
|
||||
|
Loading…
Reference in New Issue
Block a user