From ec95b231fa7477c52a3e33fcf8f28b239badf2d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Sat, 5 Mar 2022 10:19:42 +0200 Subject: [PATCH] varint iterator --- src/compress.zig | 165 +++++++++++++++++++++++++++++------------------ src/sections.zig | 27 ++++++-- 2 files changed, 127 insertions(+), 65 deletions(-) diff --git a/src/compress.zig b/src/compress.zig index 4851d26..396a623 100644 --- a/src/compress.zig +++ b/src/compress.zig @@ -7,6 +7,7 @@ const std = @import("std"); const ArrayList = std.ArrayList; const Allocator = std.mem.Allocator; +const assert = std.debug.assert; // compresses a strictly incrementing sorted slice of integers using delta // compression. Compression is in-place. @@ -39,6 +40,93 @@ pub fn deltaDecompress(comptime T: type, elems: []T) error{Overflow}!void { } } +// Represents a variable length integer that we read from a byte stream along +// with how many bytes were read to decode it. +pub const Varint = struct { + value: u64, + bytes_read: usize, +}; + +pub const maxVarintLen64 = 10; + +// https://golang.org/pkg/encoding/binary/#Uvarint +pub fn uvarint(buf: []const u8) error{Overflow}!Varint { + var x: u64 = 0; + var s: u6 = 0; + + for (buf) |b, i| { + if (i == maxVarintLen64) + // Catch byte reads past maxVarintLen64. + // See issue https://golang.org/issues/41185 + return error.Overflow; + + if (b < 0x80) { + if (i == maxVarintLen64 - 1 and b > 1) { + return error.Overflow; + } + return Varint{ + .value = x | (@as(u64, b) << s), + .bytes_read = i + 1, + }; + } + x |= (@as(u64, b & 0x7f) << s); + s = try std.math.add(u6, s, 7); + } + + return Varint{ + .value = 0, + .bytes_read = 0, + }; +} + +// https://golang.org/pkg/encoding/binary/#PutUvarint +pub fn putUvarint(buf: []u8, x: u64) usize { + var i: usize = 0; + var mutX = x; + + while (mutX >= 0x80) { + buf[i] = @truncate(u8, mutX) | 0x80; + mutX >>= 7; + i += 1; + } + buf[i] = @truncate(u8, mutX); + + return i + 1; +} + +// VarintSliceIterator iterates over varint-encoded slice. +// The first element is the length of the slice, in decoded numbers. +const varintSliceIterator = struct { + remaining: usize, + arr: []const u8, + idx: usize, + + pub fn next(it: *varintSliceIterator) error{Overflow}!?u64 { + if (it.remaining == 0) + return null; + const value = try uvarint(it.arr[it.idx..]); + it.idx += value.bytes_read; + it.remaining -= 1; + return value.value; + } +}; + +pub fn VarintSliceIterator(arr: []const u8) error{Overflow}!varintSliceIterator { + const firstnumber = try uvarint(arr); + + return varintSliceIterator{ + .remaining = firstnumber.value, + .arr = arr, + .idx = firstnumber.bytes_read, + }; +} + +pub fn appendUvarint(arr: *ArrayList(u8), x: u64) Allocator.Error!void { + var buf: [maxVarintLen64]u8 = undefined; + const n = putUvarint(&buf, x); + try arr.appendSlice(buf[0..n]); +} + const testing = std.testing; test "delta compress/decompress" { @@ -91,66 +179,6 @@ test "delta decompress overflow" { } } -// Represents a variable length integer that we read from a byte stream along -// with how many bytes were read to decode it. -pub const Varint = struct { - value: u64, - bytesRead: usize, -}; - -pub const maxVarintLen64 = 10; - -// https://golang.org/pkg/encoding/binary/#Uvarint -pub fn uvarint(buf: []const u8) error{Overflow}!Varint { - var x: u64 = 0; - var s: u6 = 0; - - for (buf) |b, i| { - if (i == maxVarintLen64) - // Catch byte reads past maxVarintLen64. - // See issue https://golang.org/issues/41185 - return error.Overflow; - - if (b < 0x80) { - if (i == maxVarintLen64 - 1 and b > 1) { - return error.Overflow; - } - return Varint{ - .value = x | (@as(u64, b) << s), - .bytesRead = i + 1, - }; - } - x |= (@as(u64, b & 0x7f) << s); - s = try std.math.add(u6, s, 7); - } - - return Varint{ - .value = 0, - .bytesRead = 0, - }; -} - -// https://golang.org/pkg/encoding/binary/#PutUvarint -pub fn putUvarint(buf: []u8, x: u64) usize { - var i: usize = 0; - var mutX = x; - - while (mutX >= 0x80) { - buf[i] = @truncate(u8, mutX) | 0x80; - mutX >>= 7; - i += 1; - } - buf[i] = @truncate(u8, mutX); - - return i + 1; -} - -pub fn appendUvarint(arr: *ArrayList(u8), x: u64) Allocator.Error!void { - var buf: [maxVarintLen64]u8 = undefined; - const n = putUvarint(&buf, x); - try arr.appendSlice(buf[0..n]); -} - const uvarint_tests = [_]u64{ 0, 1, @@ -176,10 +204,25 @@ test "putUvarint/uvarint" { const got = try uvarint(buf[0..n]); try testing.expectEqual(x, got.value); - try testing.expectEqual(n, got.bytesRead); + try testing.expectEqual(n, got.bytes_read); } } +test "VarintSliceIterator" { + var buf = ArrayList(u8).init(testing.allocator); + defer buf.deinit(); + try appendUvarint(&buf, uvarint_tests.len); + for (uvarint_tests) |x| + try appendUvarint(&buf, x); + + var it = try VarintSliceIterator(buf.items); + var i: usize = 0; + while (try it.next()) |got| : (i += 1) { + try testing.expectEqual(uvarint_tests[i], got); + } + try testing.expectEqual(i, uvarint_tests.len); +} + test "appendUvarint" { for (uvarint_tests) |x| { var buf = ArrayList(u8).init(testing.allocator); diff --git a/src/sections.zig b/src/sections.zig index 1f148c5..015dced 100644 --- a/src/sections.zig +++ b/src/sections.zig @@ -263,13 +263,15 @@ pub const usersSectionErr = Allocator.Error || error{Overflow}; pub fn usersSection( allocator: Allocator, corpus: *const Corpus, - gids: UserGids, - shells: ShellSections, + gids: *const UserGids, + shells: *const ShellSections, ) usersSectionErr![]const u8 { - _ = allocator; - _ = corpus; + // as of writing each user takes 15 bytes + strings + padding, padded to + // 8 bytes. 24 is a very optimistic lower bound. + var buf = try ArrayList(u8).initCapacity(allocator, 24 * corpus.users.len); _ = gids; _ = shells; + return buf.toOwnedSlice(); } pub fn groupMembers(allocator: Allocator, corpus: *const Corpus) Allocator.Error!void { @@ -447,6 +449,23 @@ test "test sections" { var user_gids = try userGids(allocator, &corpus); defer user_gids.deinit(allocator); + + var users_section = try usersSection( + allocator, + &corpus, + &user_gids, + &shell_sections, + ); + defer allocator.free(users_section); +} + +test "userGids" { + const allocator = testing.allocator; + var corpus = try testCorpus(allocator); + defer corpus.deinit(); + + var user_gids = try userGids(allocator, &corpus); + defer user_gids.deinit(allocator); } test "pack gids" {