Merge pull request #18923 from ianic/add_flate
add deflate implemented from first principles
This commit is contained in:
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -4,6 +4,7 @@
|
||||
langref.html.in text eol=lf
|
||||
lib/std/compress/testdata/** binary
|
||||
lib/std/compress/deflate/testdata/** binary
|
||||
lib/std/compress/flate/testdata/** binary
|
||||
|
||||
lib/include/** linguist-vendored
|
||||
lib/libc/** linguist-vendored
|
||||
|
||||
@@ -150,7 +150,7 @@ pub fn build(b: *std.Build) !void {
|
||||
"rfc1951.txt",
|
||||
"rfc1952.txt",
|
||||
"rfc8478.txt",
|
||||
// exclude files from lib/std/compress/deflate/testdata
|
||||
// exclude files from lib/std/compress/flate/testdata
|
||||
".expect",
|
||||
".expect-noinput",
|
||||
".golden",
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
const std = @import("std.zig");
|
||||
|
||||
pub const deflate = @import("compress/deflate.zig");
|
||||
pub const flate = @import("compress/flate.zig");
|
||||
pub const gzip = @import("compress/gzip.zig");
|
||||
pub const zlib = @import("compress/zlib.zig");
|
||||
pub const lzma = @import("compress/lzma.zig");
|
||||
pub const lzma2 = @import("compress/lzma2.zig");
|
||||
pub const xz = @import("compress/xz.zig");
|
||||
pub const zlib = @import("compress/zlib.zig");
|
||||
pub const zstd = @import("compress/zstandard.zig");
|
||||
|
||||
pub fn HashedReader(
|
||||
@@ -69,11 +69,11 @@ pub fn hashedWriter(
|
||||
}
|
||||
|
||||
test {
|
||||
_ = deflate;
|
||||
_ = gzip;
|
||||
_ = lzma;
|
||||
_ = lzma2;
|
||||
_ = xz;
|
||||
_ = zlib;
|
||||
_ = zstd;
|
||||
_ = flate;
|
||||
_ = gzip;
|
||||
_ = zlib;
|
||||
}
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
//! The deflate package is a translation of the Go code of the compress/flate package from
|
||||
//! https://go.googlesource.com/go/+/refs/tags/go1.17/src/compress/flate/
|
||||
|
||||
const deflate = @import("deflate/compressor.zig");
|
||||
const inflate = @import("deflate/decompressor.zig");
|
||||
|
||||
pub const Compression = deflate.Compression;
|
||||
pub const CompressorOptions = deflate.CompressorOptions;
|
||||
pub const Compressor = deflate.Compressor;
|
||||
pub const Decompressor = inflate.Decompressor;
|
||||
|
||||
pub const compressor = deflate.compressor;
|
||||
pub const decompressor = inflate.decompressor;
|
||||
|
||||
/// Copies elements from a source `src` slice into a destination `dst` slice.
|
||||
/// The copy never returns an error but might not be complete if the destination is too small.
|
||||
/// Returns the number of elements copied, which will be the minimum of `src.len` and `dst.len`.
|
||||
/// TODO: remove this smelly function
|
||||
pub fn copy(dst: []u8, src: []const u8) usize {
|
||||
if (dst.len <= src.len) {
|
||||
@memcpy(dst, src[0..dst.len]);
|
||||
return dst.len;
|
||||
} else {
|
||||
@memcpy(dst[0..src.len], src);
|
||||
return src.len;
|
||||
}
|
||||
}
|
||||
|
||||
test {
|
||||
_ = @import("deflate/token.zig");
|
||||
_ = @import("deflate/bits_utils.zig");
|
||||
_ = @import("deflate/dict_decoder.zig");
|
||||
|
||||
_ = @import("deflate/huffman_code.zig");
|
||||
_ = @import("deflate/huffman_bit_writer.zig");
|
||||
|
||||
_ = @import("deflate/compressor.zig");
|
||||
_ = @import("deflate/compressor_test.zig");
|
||||
|
||||
_ = @import("deflate/deflate_fast.zig");
|
||||
_ = @import("deflate/deflate_fast_test.zig");
|
||||
|
||||
_ = @import("deflate/decompressor.zig");
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
const math = @import("std").math;
|
||||
|
||||
// Reverse bit-by-bit a N-bit code.
|
||||
pub fn bitReverse(comptime T: type, value: T, N: usize) T {
|
||||
const r = @bitReverse(value);
|
||||
return r >> @as(math.Log2Int(T), @intCast(@typeInfo(T).Int.bits - N));
|
||||
}
|
||||
|
||||
test "bitReverse" {
|
||||
const std = @import("std");
|
||||
|
||||
const ReverseBitsTest = struct {
|
||||
in: u16,
|
||||
bit_count: u5,
|
||||
out: u16,
|
||||
};
|
||||
|
||||
const reverse_bits_tests = [_]ReverseBitsTest{
|
||||
.{ .in = 1, .bit_count = 1, .out = 1 },
|
||||
.{ .in = 1, .bit_count = 2, .out = 2 },
|
||||
.{ .in = 1, .bit_count = 3, .out = 4 },
|
||||
.{ .in = 1, .bit_count = 4, .out = 8 },
|
||||
.{ .in = 1, .bit_count = 5, .out = 16 },
|
||||
.{ .in = 17, .bit_count = 5, .out = 17 },
|
||||
.{ .in = 257, .bit_count = 9, .out = 257 },
|
||||
.{ .in = 29, .bit_count = 5, .out = 23 },
|
||||
};
|
||||
|
||||
for (reverse_bits_tests) |h| {
|
||||
const v = bitReverse(u16, h.in, h.bit_count);
|
||||
try std.testing.expectEqual(h.out, v);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,531 +0,0 @@
|
||||
const std = @import("std");
|
||||
const expect = std.testing.expect;
|
||||
const fifo = std.fifo;
|
||||
const io = std.io;
|
||||
const math = std.math;
|
||||
const mem = std.mem;
|
||||
const testing = std.testing;
|
||||
|
||||
const ArrayList = std.ArrayList;
|
||||
|
||||
const deflate = @import("compressor.zig");
|
||||
const inflate = @import("decompressor.zig");
|
||||
|
||||
const compressor = deflate.compressor;
|
||||
const decompressor = inflate.decompressor;
|
||||
const huffman_only = deflate.huffman_only;
|
||||
|
||||
fn testSync(level: deflate.Compression, input: []const u8) !void {
|
||||
if (input.len == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
var divided_buf = fifo
|
||||
.LinearFifo(u8, fifo.LinearFifoBufferType.Dynamic)
|
||||
.init(testing.allocator);
|
||||
defer divided_buf.deinit();
|
||||
var whole_buf = std.ArrayList(u8).init(testing.allocator);
|
||||
defer whole_buf.deinit();
|
||||
|
||||
const multi_writer = io.multiWriter(.{
|
||||
divided_buf.writer(),
|
||||
whole_buf.writer(),
|
||||
}).writer();
|
||||
|
||||
var comp = try compressor(
|
||||
testing.allocator,
|
||||
multi_writer,
|
||||
.{ .level = level },
|
||||
);
|
||||
defer comp.deinit();
|
||||
|
||||
{
|
||||
var decomp = try decompressor(
|
||||
testing.allocator,
|
||||
divided_buf.reader(),
|
||||
null,
|
||||
);
|
||||
defer decomp.deinit();
|
||||
|
||||
// Write first half of the input and flush()
|
||||
const half: usize = (input.len + 1) / 2;
|
||||
var half_len: usize = half - 0;
|
||||
{
|
||||
_ = try comp.writer().writeAll(input[0..half]);
|
||||
|
||||
// Flush
|
||||
try comp.flush();
|
||||
|
||||
// Read back
|
||||
const decompressed = try testing.allocator.alloc(u8, half_len);
|
||||
defer testing.allocator.free(decompressed);
|
||||
|
||||
const read = try decomp.reader().readAll(decompressed); // read at least half
|
||||
try testing.expectEqual(half_len, read);
|
||||
try testing.expectEqualSlices(u8, input[0..half], decompressed);
|
||||
}
|
||||
|
||||
// Write last half of the input and close()
|
||||
half_len = input.len - half;
|
||||
{
|
||||
_ = try comp.writer().writeAll(input[half..]);
|
||||
|
||||
// Close
|
||||
try comp.close();
|
||||
|
||||
// Read back
|
||||
const decompressed = try testing.allocator.alloc(u8, half_len);
|
||||
defer testing.allocator.free(decompressed);
|
||||
|
||||
var read = try decomp.reader().readAll(decompressed);
|
||||
try testing.expectEqual(half_len, read);
|
||||
try testing.expectEqualSlices(u8, input[half..], decompressed);
|
||||
|
||||
// Extra read
|
||||
var final: [10]u8 = undefined;
|
||||
read = try decomp.reader().readAll(&final);
|
||||
try testing.expectEqual(@as(usize, 0), read); // expect ended stream to return 0 bytes
|
||||
|
||||
try decomp.close();
|
||||
}
|
||||
}
|
||||
|
||||
_ = try comp.writer().writeAll(input);
|
||||
try comp.close();
|
||||
|
||||
// stream should work for ordinary reader too (reading whole_buf in one go)
|
||||
const whole_buf_reader = io.fixedBufferStream(whole_buf.items).reader();
|
||||
var decomp = try decompressor(testing.allocator, whole_buf_reader, null);
|
||||
defer decomp.deinit();
|
||||
|
||||
const decompressed = try testing.allocator.alloc(u8, input.len);
|
||||
defer testing.allocator.free(decompressed);
|
||||
|
||||
_ = try decomp.reader().readAll(decompressed);
|
||||
try decomp.close();
|
||||
|
||||
try testing.expectEqualSlices(u8, input, decompressed);
|
||||
}
|
||||
|
||||
fn testToFromWithLevelAndLimit(level: deflate.Compression, input: []const u8, limit: u32) !void {
|
||||
var compressed = std.ArrayList(u8).init(testing.allocator);
|
||||
defer compressed.deinit();
|
||||
|
||||
var comp = try compressor(testing.allocator, compressed.writer(), .{ .level = level });
|
||||
defer comp.deinit();
|
||||
|
||||
try comp.writer().writeAll(input);
|
||||
try comp.close();
|
||||
|
||||
if (limit > 0) {
|
||||
try expect(compressed.items.len <= limit);
|
||||
}
|
||||
|
||||
var fib = io.fixedBufferStream(compressed.items);
|
||||
var decomp = try decompressor(testing.allocator, fib.reader(), null);
|
||||
defer decomp.deinit();
|
||||
|
||||
const decompressed = try testing.allocator.alloc(u8, input.len);
|
||||
defer testing.allocator.free(decompressed);
|
||||
|
||||
const read: usize = try decomp.reader().readAll(decompressed);
|
||||
try testing.expectEqual(input.len, read);
|
||||
try testing.expectEqualSlices(u8, input, decompressed);
|
||||
|
||||
if (false) {
|
||||
// TODO: this test has regressed
|
||||
try testSync(level, input);
|
||||
}
|
||||
}
|
||||
|
||||
fn testToFromWithLimit(input: []const u8, limit: [11]u32) !void {
|
||||
try testToFromWithLevelAndLimit(.no_compression, input, limit[0]);
|
||||
try testToFromWithLevelAndLimit(.best_speed, input, limit[1]);
|
||||
try testToFromWithLevelAndLimit(.level_2, input, limit[2]);
|
||||
try testToFromWithLevelAndLimit(.level_3, input, limit[3]);
|
||||
try testToFromWithLevelAndLimit(.level_4, input, limit[4]);
|
||||
try testToFromWithLevelAndLimit(.level_5, input, limit[5]);
|
||||
try testToFromWithLevelAndLimit(.level_6, input, limit[6]);
|
||||
try testToFromWithLevelAndLimit(.level_7, input, limit[7]);
|
||||
try testToFromWithLevelAndLimit(.level_8, input, limit[8]);
|
||||
try testToFromWithLevelAndLimit(.best_compression, input, limit[9]);
|
||||
try testToFromWithLevelAndLimit(.huffman_only, input, limit[10]);
|
||||
}
|
||||
|
||||
test "deflate/inflate" {
|
||||
const limits = [_]u32{0} ** 11;
|
||||
|
||||
var test0 = [_]u8{};
|
||||
var test1 = [_]u8{0x11};
|
||||
var test2 = [_]u8{ 0x11, 0x12 };
|
||||
var test3 = [_]u8{ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 };
|
||||
var test4 = [_]u8{ 0x11, 0x10, 0x13, 0x41, 0x21, 0x21, 0x41, 0x13, 0x87, 0x78, 0x13 };
|
||||
|
||||
try testToFromWithLimit(&test0, limits);
|
||||
try testToFromWithLimit(&test1, limits);
|
||||
try testToFromWithLimit(&test2, limits);
|
||||
try testToFromWithLimit(&test3, limits);
|
||||
try testToFromWithLimit(&test4, limits);
|
||||
|
||||
var large_data_chunk = try testing.allocator.alloc(u8, 100_000);
|
||||
defer testing.allocator.free(large_data_chunk);
|
||||
// fill with random data
|
||||
for (large_data_chunk, 0..) |_, i| {
|
||||
large_data_chunk[i] = @as(u8, @truncate(i)) *% @as(u8, @truncate(i));
|
||||
}
|
||||
try testToFromWithLimit(large_data_chunk, limits);
|
||||
}
|
||||
|
||||
test "very long sparse chunk" {
|
||||
// A SparseReader returns a stream consisting of 0s ending with 65,536 (1<<16) 1s.
|
||||
// This tests missing hash references in a very large input.
|
||||
const SparseReader = struct {
|
||||
l: usize, // length
|
||||
cur: usize, // current position
|
||||
|
||||
const Self = @This();
|
||||
const Error = error{};
|
||||
|
||||
pub const Reader = io.Reader(*Self, Error, read);
|
||||
|
||||
pub fn reader(self: *Self) Reader {
|
||||
return .{ .context = self };
|
||||
}
|
||||
|
||||
fn read(s: *Self, b: []u8) Error!usize {
|
||||
var n: usize = 0; // amount read
|
||||
|
||||
if (s.cur >= s.l) {
|
||||
return 0;
|
||||
}
|
||||
n = b.len;
|
||||
var cur = s.cur + n;
|
||||
if (cur > s.l) {
|
||||
n -= cur - s.l;
|
||||
cur = s.l;
|
||||
}
|
||||
for (b[0..n], 0..) |_, i| {
|
||||
if (s.cur + i >= s.l -| (1 << 16)) {
|
||||
b[i] = 1;
|
||||
} else {
|
||||
b[i] = 0;
|
||||
}
|
||||
}
|
||||
s.cur = cur;
|
||||
return n;
|
||||
}
|
||||
};
|
||||
|
||||
var comp = try compressor(
|
||||
testing.allocator,
|
||||
io.null_writer,
|
||||
.{ .level = .best_speed },
|
||||
);
|
||||
defer comp.deinit();
|
||||
var writer = comp.writer();
|
||||
|
||||
var sparse = SparseReader{ .l = 0x23e8, .cur = 0 };
|
||||
var reader = sparse.reader();
|
||||
|
||||
var read: usize = 1;
|
||||
var written: usize = 0;
|
||||
while (read > 0) {
|
||||
var buf: [1 << 15]u8 = undefined; // 32,768 bytes buffer
|
||||
read = try reader.read(&buf);
|
||||
written += try writer.write(buf[0..read]);
|
||||
}
|
||||
try testing.expectEqual(@as(usize, 0x23e8), written);
|
||||
}
|
||||
|
||||
test "compressor reset" {
|
||||
for (std.enums.values(deflate.Compression)) |c| {
|
||||
try testWriterReset(c, null);
|
||||
try testWriterReset(c, "dict");
|
||||
try testWriterReset(c, "hello");
|
||||
}
|
||||
}
|
||||
|
||||
fn testWriterReset(level: deflate.Compression, dict: ?[]const u8) !void {
|
||||
const filler = struct {
|
||||
fn writeData(c: anytype) !void {
|
||||
const msg = "all your base are belong to us";
|
||||
try c.writer().writeAll(msg);
|
||||
try c.flush();
|
||||
|
||||
const hello = "hello world";
|
||||
var i: usize = 0;
|
||||
while (i < 1024) : (i += 1) {
|
||||
try c.writer().writeAll(hello);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (i < 65000) : (i += 1) {
|
||||
try c.writer().writeAll("x");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
var buf1 = ArrayList(u8).init(testing.allocator);
|
||||
defer buf1.deinit();
|
||||
var buf2 = ArrayList(u8).init(testing.allocator);
|
||||
defer buf2.deinit();
|
||||
|
||||
var comp = try compressor(
|
||||
testing.allocator,
|
||||
buf1.writer(),
|
||||
.{ .level = level, .dictionary = dict },
|
||||
);
|
||||
defer comp.deinit();
|
||||
|
||||
try filler.writeData(&comp);
|
||||
try comp.close();
|
||||
|
||||
comp.reset(buf2.writer());
|
||||
try filler.writeData(&comp);
|
||||
try comp.close();
|
||||
|
||||
try testing.expectEqualSlices(u8, buf1.items, buf2.items);
|
||||
}
|
||||
|
||||
test "decompressor dictionary" {
|
||||
const dict = "hello world"; // dictionary
|
||||
const text = "hello again world";
|
||||
|
||||
var compressed = fifo
|
||||
.LinearFifo(u8, fifo.LinearFifoBufferType.Dynamic)
|
||||
.init(testing.allocator);
|
||||
defer compressed.deinit();
|
||||
|
||||
var comp = try compressor(
|
||||
testing.allocator,
|
||||
compressed.writer(),
|
||||
.{
|
||||
.level = .level_5,
|
||||
.dictionary = null, // no dictionary
|
||||
},
|
||||
);
|
||||
defer comp.deinit();
|
||||
|
||||
// imitate a compressor with a dictionary
|
||||
try comp.writer().writeAll(dict);
|
||||
try comp.flush();
|
||||
compressed.discard(compressed.readableLength()); // empty the output
|
||||
try comp.writer().writeAll(text);
|
||||
try comp.close();
|
||||
|
||||
const decompressed = try testing.allocator.alloc(u8, text.len);
|
||||
defer testing.allocator.free(decompressed);
|
||||
|
||||
var decomp = try decompressor(
|
||||
testing.allocator,
|
||||
compressed.reader(),
|
||||
dict,
|
||||
);
|
||||
defer decomp.deinit();
|
||||
|
||||
_ = try decomp.reader().readAll(decompressed);
|
||||
try testing.expectEqualSlices(u8, "hello again world", decompressed);
|
||||
}
|
||||
|
||||
test "compressor dictionary" {
|
||||
const dict = "hello world";
|
||||
const text = "hello again world";
|
||||
|
||||
var compressed_nd = fifo
|
||||
.LinearFifo(u8, fifo.LinearFifoBufferType.Dynamic)
|
||||
.init(testing.allocator); // compressed with no dictionary
|
||||
defer compressed_nd.deinit();
|
||||
|
||||
var compressed_d = ArrayList(u8).init(testing.allocator); // compressed with a dictionary
|
||||
defer compressed_d.deinit();
|
||||
|
||||
// imitate a compressor with a dictionary
|
||||
var comp_nd = try compressor(
|
||||
testing.allocator,
|
||||
compressed_nd.writer(),
|
||||
.{
|
||||
.level = .level_5,
|
||||
.dictionary = null, // no dictionary
|
||||
},
|
||||
);
|
||||
defer comp_nd.deinit();
|
||||
try comp_nd.writer().writeAll(dict);
|
||||
try comp_nd.flush();
|
||||
compressed_nd.discard(compressed_nd.readableLength()); // empty the output
|
||||
try comp_nd.writer().writeAll(text);
|
||||
try comp_nd.close();
|
||||
|
||||
// use a compressor with a dictionary
|
||||
var comp_d = try compressor(
|
||||
testing.allocator,
|
||||
compressed_d.writer(),
|
||||
.{
|
||||
.level = .level_5,
|
||||
.dictionary = dict, // with a dictionary
|
||||
},
|
||||
);
|
||||
defer comp_d.deinit();
|
||||
try comp_d.writer().writeAll(text);
|
||||
try comp_d.close();
|
||||
|
||||
try testing.expectEqualSlices(u8, compressed_d.items, compressed_nd.readableSlice(0));
|
||||
}
|
||||
|
||||
// Update the hash for best_speed only if d.index < d.maxInsertIndex
|
||||
// See https://golang.org/issue/2508
|
||||
test "Go non-regression test for 2508" {
|
||||
var comp = try compressor(
|
||||
testing.allocator,
|
||||
io.null_writer,
|
||||
.{ .level = .best_speed },
|
||||
);
|
||||
defer comp.deinit();
|
||||
|
||||
var buf = [_]u8{0} ** 1024;
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < 131_072) : (i += 1) {
|
||||
try comp.writer().writeAll(&buf);
|
||||
try comp.close();
|
||||
}
|
||||
}
|
||||
|
||||
test "deflate/inflate string" {
|
||||
const StringTest = struct {
|
||||
filename: []const u8,
|
||||
limit: [11]u32,
|
||||
};
|
||||
|
||||
const deflate_inflate_string_tests = [_]StringTest{
|
||||
.{
|
||||
.filename = "compress-e.txt",
|
||||
.limit = [11]u32{
|
||||
100_018, // no_compression
|
||||
50_650, // best_speed
|
||||
50_960, // 2
|
||||
51_150, // 3
|
||||
50_930, // 4
|
||||
50_790, // 5
|
||||
50_790, // 6
|
||||
50_790, // 7
|
||||
50_790, // 8
|
||||
50_790, // best_compression
|
||||
43_683, // huffman_only
|
||||
},
|
||||
},
|
||||
.{
|
||||
.filename = "rfc1951.txt",
|
||||
.limit = [11]u32{
|
||||
36_954, // no_compression
|
||||
12_952, // best_speed
|
||||
12_228, // 2
|
||||
12_016, // 3
|
||||
11_466, // 4
|
||||
11_191, // 5
|
||||
11_129, // 6
|
||||
11_120, // 7
|
||||
11_112, // 8
|
||||
11_109, // best_compression
|
||||
20_273, // huffman_only
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
inline for (deflate_inflate_string_tests) |t| {
|
||||
const golden = @embedFile("testdata/" ++ t.filename);
|
||||
try testToFromWithLimit(golden, t.limit);
|
||||
}
|
||||
}
|
||||
|
||||
test "inflate reset" {
|
||||
const strings = [_][]const u8{
|
||||
"lorem ipsum izzle fo rizzle",
|
||||
"the quick brown fox jumped over",
|
||||
};
|
||||
|
||||
var compressed_strings = [_]ArrayList(u8){
|
||||
ArrayList(u8).init(testing.allocator),
|
||||
ArrayList(u8).init(testing.allocator),
|
||||
};
|
||||
defer compressed_strings[0].deinit();
|
||||
defer compressed_strings[1].deinit();
|
||||
|
||||
for (strings, 0..) |s, i| {
|
||||
var comp = try compressor(
|
||||
testing.allocator,
|
||||
compressed_strings[i].writer(),
|
||||
.{ .level = .level_6 },
|
||||
);
|
||||
defer comp.deinit();
|
||||
|
||||
try comp.writer().writeAll(s);
|
||||
try comp.close();
|
||||
}
|
||||
|
||||
var fib = io.fixedBufferStream(compressed_strings[0].items);
|
||||
var decomp = try decompressor(testing.allocator, fib.reader(), null);
|
||||
defer decomp.deinit();
|
||||
|
||||
const decompressed_0: []u8 = try decomp.reader()
|
||||
.readAllAlloc(testing.allocator, math.maxInt(usize));
|
||||
defer testing.allocator.free(decompressed_0);
|
||||
|
||||
fib = io.fixedBufferStream(compressed_strings[1].items);
|
||||
try decomp.reset(fib.reader(), null);
|
||||
|
||||
const decompressed_1: []u8 = try decomp.reader()
|
||||
.readAllAlloc(testing.allocator, math.maxInt(usize));
|
||||
defer testing.allocator.free(decompressed_1);
|
||||
|
||||
try decomp.close();
|
||||
|
||||
try testing.expectEqualSlices(u8, strings[0], decompressed_0);
|
||||
try testing.expectEqualSlices(u8, strings[1], decompressed_1);
|
||||
}
|
||||
|
||||
test "inflate reset dictionary" {
|
||||
const dict = "the lorem fox";
|
||||
const strings = [_][]const u8{
|
||||
"lorem ipsum izzle fo rizzle",
|
||||
"the quick brown fox jumped over",
|
||||
};
|
||||
|
||||
var compressed_strings = [_]ArrayList(u8){
|
||||
ArrayList(u8).init(testing.allocator),
|
||||
ArrayList(u8).init(testing.allocator),
|
||||
};
|
||||
defer compressed_strings[0].deinit();
|
||||
defer compressed_strings[1].deinit();
|
||||
|
||||
for (strings, 0..) |s, i| {
|
||||
var comp = try compressor(
|
||||
testing.allocator,
|
||||
compressed_strings[i].writer(),
|
||||
.{ .level = .level_6 },
|
||||
);
|
||||
defer comp.deinit();
|
||||
|
||||
try comp.writer().writeAll(s);
|
||||
try comp.close();
|
||||
}
|
||||
|
||||
var fib = io.fixedBufferStream(compressed_strings[0].items);
|
||||
var decomp = try decompressor(testing.allocator, fib.reader(), dict);
|
||||
defer decomp.deinit();
|
||||
|
||||
const decompressed_0: []u8 = try decomp.reader()
|
||||
.readAllAlloc(testing.allocator, math.maxInt(usize));
|
||||
defer testing.allocator.free(decompressed_0);
|
||||
|
||||
fib = io.fixedBufferStream(compressed_strings[1].items);
|
||||
try decomp.reset(fib.reader(), dict);
|
||||
|
||||
const decompressed_1: []u8 = try decomp.reader()
|
||||
.readAllAlloc(testing.allocator, math.maxInt(usize));
|
||||
defer testing.allocator.free(decompressed_1);
|
||||
|
||||
try decomp.close();
|
||||
|
||||
try testing.expectEqualSlices(u8, strings[0], decompressed_0);
|
||||
try testing.expectEqualSlices(u8, strings[1], decompressed_1);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,28 +0,0 @@
|
||||
// Deflate
|
||||
|
||||
// Biggest block size for uncompressed block.
|
||||
pub const max_store_block_size = 65535;
|
||||
// The special code used to mark the end of a block.
|
||||
pub const end_block_marker = 256;
|
||||
|
||||
// LZ77
|
||||
|
||||
// The smallest match length per the RFC section 3.2.5
|
||||
pub const base_match_length = 3;
|
||||
// The smallest match offset.
|
||||
pub const base_match_offset = 1;
|
||||
// The largest match length.
|
||||
pub const max_match_length = 258;
|
||||
// The largest match offset.
|
||||
pub const max_match_offset = 1 << 15;
|
||||
|
||||
// Huffman Codes
|
||||
|
||||
// The largest offset code.
|
||||
pub const offset_code_count = 30;
|
||||
// Max number of frequencies used for a Huffman Code
|
||||
// Possible lengths are codegenCodeCount (19), offset_code_count (30) and max_num_lit (286).
|
||||
// The largest of these is max_num_lit.
|
||||
pub const max_num_frequencies = max_num_lit;
|
||||
// Maximum number of literals.
|
||||
pub const max_num_lit = 286;
|
||||
@@ -1,728 +0,0 @@
|
||||
// This encoding algorithm, which prioritizes speed over output size, is
|
||||
// based on Snappy's LZ77-style encoder: github.com/golang/snappy
|
||||
|
||||
const std = @import("std");
|
||||
const math = std.math;
|
||||
const mem = std.mem;
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const deflate_const = @import("deflate_const.zig");
|
||||
const deflate = @import("compressor.zig");
|
||||
const token = @import("token.zig");
|
||||
|
||||
const base_match_length = deflate_const.base_match_length;
|
||||
const base_match_offset = deflate_const.base_match_offset;
|
||||
const max_match_length = deflate_const.max_match_length;
|
||||
const max_match_offset = deflate_const.max_match_offset;
|
||||
const max_store_block_size = deflate_const.max_store_block_size;
|
||||
|
||||
const table_bits = 14; // Bits used in the table.
|
||||
const table_mask = table_size - 1; // Mask for table indices. Redundant, but can eliminate bounds checks.
|
||||
const table_shift = 32 - table_bits; // Right-shift to get the table_bits most significant bits of a uint32.
|
||||
const table_size = 1 << table_bits; // Size of the table.
|
||||
|
||||
// Reset the buffer offset when reaching this.
|
||||
// Offsets are stored between blocks as i32 values.
|
||||
// Since the offset we are checking against is at the beginning
|
||||
// of the buffer, we need to subtract the current and input
|
||||
// buffer to not risk overflowing the i32.
|
||||
const buffer_reset = math.maxInt(i32) - max_store_block_size * 2;
|
||||
|
||||
fn load32(b: []u8, i: i32) u32 {
|
||||
const s = b[@as(usize, @intCast(i)) .. @as(usize, @intCast(i)) + 4];
|
||||
return @as(u32, @intCast(s[0])) |
|
||||
@as(u32, @intCast(s[1])) << 8 |
|
||||
@as(u32, @intCast(s[2])) << 16 |
|
||||
@as(u32, @intCast(s[3])) << 24;
|
||||
}
|
||||
|
||||
fn load64(b: []u8, i: i32) u64 {
|
||||
const s = b[@as(usize, @intCast(i))..@as(usize, @intCast(i + 8))];
|
||||
return @as(u64, @intCast(s[0])) |
|
||||
@as(u64, @intCast(s[1])) << 8 |
|
||||
@as(u64, @intCast(s[2])) << 16 |
|
||||
@as(u64, @intCast(s[3])) << 24 |
|
||||
@as(u64, @intCast(s[4])) << 32 |
|
||||
@as(u64, @intCast(s[5])) << 40 |
|
||||
@as(u64, @intCast(s[6])) << 48 |
|
||||
@as(u64, @intCast(s[7])) << 56;
|
||||
}
|
||||
|
||||
fn hash(u: u32) u32 {
|
||||
return (u *% 0x1e35a7bd) >> table_shift;
|
||||
}
|
||||
|
||||
// These constants are defined by the Snappy implementation so that its
|
||||
// assembly implementation can fast-path some 16-bytes-at-a-time copies.
|
||||
// They aren't necessary in the pure Go implementation, and may not be
|
||||
// necessary in Zig, but using the same thresholds doesn't really hurt.
|
||||
const input_margin = 16 - 1;
|
||||
const min_non_literal_block_size = 1 + 1 + input_margin;
|
||||
|
||||
const TableEntry = struct {
|
||||
val: u32, // Value at destination
|
||||
offset: i32,
|
||||
};
|
||||
|
||||
pub fn deflateFast() DeflateFast {
|
||||
return DeflateFast{
|
||||
.table = [_]TableEntry{.{ .val = 0, .offset = 0 }} ** table_size,
|
||||
.prev = undefined,
|
||||
.prev_len = 0,
|
||||
.cur = max_store_block_size,
|
||||
.allocator = undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// DeflateFast maintains the table for matches,
|
||||
// and the previous byte block for cross block matching.
|
||||
pub const DeflateFast = struct {
|
||||
table: [table_size]TableEntry,
|
||||
prev: []u8, // Previous block, zero length if unknown.
|
||||
prev_len: u32, // Previous block length
|
||||
cur: i32, // Current match offset.
|
||||
allocator: Allocator,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(self: *Self, allocator: Allocator) !void {
|
||||
self.allocator = allocator;
|
||||
self.prev = try allocator.alloc(u8, max_store_block_size);
|
||||
self.prev_len = 0;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
self.allocator.free(self.prev);
|
||||
self.prev_len = 0;
|
||||
}
|
||||
|
||||
// Encodes a block given in `src` and appends tokens to `dst` and returns the result.
|
||||
pub fn encode(self: *Self, dst: []token.Token, tokens_count: *u16, src: []u8) void {
|
||||
|
||||
// Ensure that self.cur doesn't wrap.
|
||||
if (self.cur >= buffer_reset) {
|
||||
self.shiftOffsets();
|
||||
}
|
||||
|
||||
// This check isn't in the Snappy implementation, but there, the caller
|
||||
// instead of the callee handles this case.
|
||||
if (src.len < min_non_literal_block_size) {
|
||||
self.cur += max_store_block_size;
|
||||
self.prev_len = 0;
|
||||
emitLiteral(dst, tokens_count, src);
|
||||
return;
|
||||
}
|
||||
|
||||
// s_limit is when to stop looking for offset/length copies. The input_margin
|
||||
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||
// looking for copies.
|
||||
const s_limit = @as(i32, @intCast(src.len - input_margin));
|
||||
|
||||
// next_emit is where in src the next emitLiteral should start from.
|
||||
var next_emit: i32 = 0;
|
||||
var s: i32 = 0;
|
||||
var cv: u32 = load32(src, s);
|
||||
var next_hash: u32 = hash(cv);
|
||||
|
||||
outer: while (true) {
|
||||
// Copied from the C++ snappy implementation:
|
||||
//
|
||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
// found, start looking only at every other byte. If 32 more bytes are
|
||||
// scanned (or skipped), look at every third byte, etc.. When a match
|
||||
// is found, immediately go back to looking at every byte. This is a
|
||||
// small loss (~5% performance, ~0.1% density) for compressible data
|
||||
// due to more bookkeeping, but for non-compressible data (such as
|
||||
// JPEG) it's a huge win since the compressor quickly "realizes" the
|
||||
// data is incompressible and doesn't bother looking for matches
|
||||
// everywhere.
|
||||
//
|
||||
// The "skip" variable keeps track of how many bytes there are since
|
||||
// the last match; dividing it by 32 (ie. right-shifting by five) gives
|
||||
// the number of bytes to move ahead for each iteration.
|
||||
var skip: i32 = 32;
|
||||
|
||||
var next_s: i32 = s;
|
||||
var candidate: TableEntry = undefined;
|
||||
while (true) {
|
||||
s = next_s;
|
||||
const bytes_between_hash_lookups = skip >> 5;
|
||||
next_s = s + bytes_between_hash_lookups;
|
||||
skip += bytes_between_hash_lookups;
|
||||
if (next_s > s_limit) {
|
||||
break :outer;
|
||||
}
|
||||
candidate = self.table[next_hash & table_mask];
|
||||
const now = load32(src, next_s);
|
||||
self.table[next_hash & table_mask] = .{ .offset = s + self.cur, .val = cv };
|
||||
next_hash = hash(now);
|
||||
|
||||
const offset = s - (candidate.offset - self.cur);
|
||||
if (offset > max_match_offset or cv != candidate.val) {
|
||||
// Out of range or not matched.
|
||||
cv = now;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||
// match. But, prior to the match, src[next_emit..s] are unmatched. Emit
|
||||
// them as literal bytes.
|
||||
emitLiteral(dst, tokens_count, src[@as(usize, @intCast(next_emit))..@as(usize, @intCast(s))]);
|
||||
|
||||
// Call emitCopy, and then see if another emitCopy could be our next
|
||||
// move. Repeat until we find no match for the input immediately after
|
||||
// what was consumed by the last emitCopy call.
|
||||
//
|
||||
// If we exit this loop normally then we need to call emitLiteral next,
|
||||
// though we don't yet know how big the literal will be. We handle that
|
||||
// by proceeding to the next iteration of the main loop. We also can
|
||||
// exit this loop via goto if we get close to exhausting the input.
|
||||
while (true) {
|
||||
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||
// literal bytes prior to s.
|
||||
|
||||
// Extend the 4-byte match as long as possible.
|
||||
//
|
||||
s += 4;
|
||||
const t = candidate.offset - self.cur + 4;
|
||||
const l = self.matchLen(s, t, src);
|
||||
|
||||
// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
|
||||
dst[tokens_count.*] = token.matchToken(
|
||||
@as(u32, @intCast(l + 4 - base_match_length)),
|
||||
@as(u32, @intCast(s - t - base_match_offset)),
|
||||
);
|
||||
tokens_count.* += 1;
|
||||
s += l;
|
||||
next_emit = s;
|
||||
if (s >= s_limit) {
|
||||
break :outer;
|
||||
}
|
||||
|
||||
// We could immediately start working at s now, but to improve
|
||||
// compression we first update the hash table at s-1 and at s. If
|
||||
// another emitCopy is not our next move, also calculate next_hash
|
||||
// at s+1. At least on amd64 architecture, these three hash calculations
|
||||
// are faster as one load64 call (with some shifts) instead of
|
||||
// three load32 calls.
|
||||
var x = load64(src, s - 1);
|
||||
const prev_hash = hash(@as(u32, @truncate(x)));
|
||||
self.table[prev_hash & table_mask] = TableEntry{
|
||||
.offset = self.cur + s - 1,
|
||||
.val = @as(u32, @truncate(x)),
|
||||
};
|
||||
x >>= 8;
|
||||
const curr_hash = hash(@as(u32, @truncate(x)));
|
||||
candidate = self.table[curr_hash & table_mask];
|
||||
self.table[curr_hash & table_mask] = TableEntry{
|
||||
.offset = self.cur + s,
|
||||
.val = @as(u32, @truncate(x)),
|
||||
};
|
||||
|
||||
const offset = s - (candidate.offset - self.cur);
|
||||
if (offset > max_match_offset or @as(u32, @truncate(x)) != candidate.val) {
|
||||
cv = @as(u32, @truncate(x >> 8));
|
||||
next_hash = hash(cv);
|
||||
s += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (@as(u32, @intCast(next_emit)) < src.len) {
|
||||
emitLiteral(dst, tokens_count, src[@as(usize, @intCast(next_emit))..]);
|
||||
}
|
||||
self.cur += @as(i32, @intCast(src.len));
|
||||
self.prev_len = @as(u32, @intCast(src.len));
|
||||
@memcpy(self.prev[0..self.prev_len], src);
|
||||
return;
|
||||
}
|
||||
|
||||
fn emitLiteral(dst: []token.Token, tokens_count: *u16, lit: []u8) void {
|
||||
for (lit) |v| {
|
||||
dst[tokens_count.*] = token.literalToken(@as(u32, @intCast(v)));
|
||||
tokens_count.* += 1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// matchLen returns the match length between src[s..] and src[t..].
|
||||
// t can be negative to indicate the match is starting in self.prev.
|
||||
// We assume that src[s-4 .. s] and src[t-4 .. t] already match.
|
||||
fn matchLen(self: *Self, s: i32, t: i32, src: []u8) i32 {
|
||||
var s1 = @as(u32, @intCast(s)) + max_match_length - 4;
|
||||
if (s1 > src.len) {
|
||||
s1 = @as(u32, @intCast(src.len));
|
||||
}
|
||||
|
||||
// If we are inside the current block
|
||||
if (t >= 0) {
|
||||
var b = src[@as(usize, @intCast(t))..];
|
||||
const a = src[@as(usize, @intCast(s))..@as(usize, @intCast(s1))];
|
||||
b = b[0..a.len];
|
||||
// Extend the match to be as long as possible.
|
||||
for (a, 0..) |_, i| {
|
||||
if (a[i] != b[i]) {
|
||||
return @as(i32, @intCast(i));
|
||||
}
|
||||
}
|
||||
return @as(i32, @intCast(a.len));
|
||||
}
|
||||
|
||||
// We found a match in the previous block.
|
||||
const tp = @as(i32, @intCast(self.prev_len)) + t;
|
||||
if (tp < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Extend the match to be as long as possible.
|
||||
var a = src[@as(usize, @intCast(s))..@as(usize, @intCast(s1))];
|
||||
var b = self.prev[@as(usize, @intCast(tp))..@as(usize, @intCast(self.prev_len))];
|
||||
if (b.len > a.len) {
|
||||
b = b[0..a.len];
|
||||
}
|
||||
a = a[0..b.len];
|
||||
for (b, 0..) |_, i| {
|
||||
if (a[i] != b[i]) {
|
||||
return @as(i32, @intCast(i));
|
||||
}
|
||||
}
|
||||
|
||||
// If we reached our limit, we matched everything we are
|
||||
// allowed to in the previous block and we return.
|
||||
const n = @as(i32, @intCast(b.len));
|
||||
if (@as(u32, @intCast(s + n)) == s1) {
|
||||
return n;
|
||||
}
|
||||
|
||||
// Continue looking for more matches in the current block.
|
||||
a = src[@as(usize, @intCast(s + n))..@as(usize, @intCast(s1))];
|
||||
b = src[0..a.len];
|
||||
for (a, 0..) |_, i| {
|
||||
if (a[i] != b[i]) {
|
||||
return @as(i32, @intCast(i)) + n;
|
||||
}
|
||||
}
|
||||
return @as(i32, @intCast(a.len)) + n;
|
||||
}
|
||||
|
||||
// Reset resets the encoding history.
|
||||
// This ensures that no matches are made to the previous block.
|
||||
pub fn reset(self: *Self) void {
|
||||
self.prev_len = 0;
|
||||
// Bump the offset, so all matches will fail distance check.
|
||||
// Nothing should be >= self.cur in the table.
|
||||
self.cur += max_match_offset;
|
||||
|
||||
// Protect against self.cur wraparound.
|
||||
if (self.cur >= buffer_reset) {
|
||||
self.shiftOffsets();
|
||||
}
|
||||
}
|
||||
|
||||
// shiftOffsets will shift down all match offset.
|
||||
// This is only called in rare situations to prevent integer overflow.
|
||||
//
|
||||
// See https://golang.org/issue/18636 and https://golang.org/issues/34121.
|
||||
fn shiftOffsets(self: *Self) void {
|
||||
if (self.prev_len == 0) {
|
||||
// We have no history; just clear the table.
|
||||
for (self.table, 0..) |_, i| {
|
||||
self.table[i] = TableEntry{ .val = 0, .offset = 0 };
|
||||
}
|
||||
self.cur = max_match_offset + 1;
|
||||
return;
|
||||
}
|
||||
|
||||
// Shift down everything in the table that isn't already too far away.
|
||||
for (self.table, 0..) |_, i| {
|
||||
var v = self.table[i].offset - self.cur + max_match_offset + 1;
|
||||
if (v < 0) {
|
||||
// We want to reset self.cur to max_match_offset + 1, so we need to shift
|
||||
// all table entries down by (self.cur - (max_match_offset + 1)).
|
||||
// Because we ignore matches > max_match_offset, we can cap
|
||||
// any negative offsets at 0.
|
||||
v = 0;
|
||||
}
|
||||
self.table[i].offset = v;
|
||||
}
|
||||
self.cur = max_match_offset + 1;
|
||||
}
|
||||
};
|
||||
|
||||
test "best speed match 1/3" {
|
||||
if (@import("builtin").os.tag == .wasi) {
|
||||
// https://github.com/ziglang/zig/issues/18885
|
||||
return error.SkipZigTest;
|
||||
}
|
||||
const expectEqual = std.testing.expectEqual;
|
||||
|
||||
{
|
||||
var previous = [_]u8{ 0, 0, 0, 1, 2 };
|
||||
var e = DeflateFast{
|
||||
.prev = &previous,
|
||||
.prev_len = previous.len,
|
||||
.table = undefined,
|
||||
.allocator = undefined,
|
||||
.cur = 0,
|
||||
};
|
||||
var current = [_]u8{ 3, 4, 5, 0, 1, 2, 3, 4, 5 };
|
||||
const got: i32 = e.matchLen(3, -3, ¤t);
|
||||
try expectEqual(@as(i32, 6), got);
|
||||
}
|
||||
{
|
||||
var previous = [_]u8{ 0, 0, 0, 1, 2 };
|
||||
var e = DeflateFast{
|
||||
.prev = &previous,
|
||||
.prev_len = previous.len,
|
||||
.table = undefined,
|
||||
.allocator = undefined,
|
||||
.cur = 0,
|
||||
};
|
||||
var current = [_]u8{ 2, 4, 5, 0, 1, 2, 3, 4, 5 };
|
||||
const got: i32 = e.matchLen(3, -3, ¤t);
|
||||
try expectEqual(@as(i32, 3), got);
|
||||
}
|
||||
{
|
||||
var previous = [_]u8{ 0, 0, 0, 1, 1 };
|
||||
var e = DeflateFast{
|
||||
.prev = &previous,
|
||||
.prev_len = previous.len,
|
||||
.table = undefined,
|
||||
.allocator = undefined,
|
||||
.cur = 0,
|
||||
};
|
||||
var current = [_]u8{ 3, 4, 5, 0, 1, 2, 3, 4, 5 };
|
||||
const got: i32 = e.matchLen(3, -3, ¤t);
|
||||
try expectEqual(@as(i32, 2), got);
|
||||
}
|
||||
{
|
||||
var previous = [_]u8{ 0, 0, 0, 1, 2 };
|
||||
var e = DeflateFast{
|
||||
.prev = &previous,
|
||||
.prev_len = previous.len,
|
||||
.table = undefined,
|
||||
.allocator = undefined,
|
||||
.cur = 0,
|
||||
};
|
||||
var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||
const got: i32 = e.matchLen(0, -1, ¤t);
|
||||
try expectEqual(@as(i32, 4), got);
|
||||
}
|
||||
{
|
||||
var previous = [_]u8{ 0, 0, 0, 1, 2, 3, 4, 5, 2, 2 };
|
||||
var e = DeflateFast{
|
||||
.prev = &previous,
|
||||
.prev_len = previous.len,
|
||||
.table = undefined,
|
||||
.allocator = undefined,
|
||||
.cur = 0,
|
||||
};
|
||||
var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||
const got: i32 = e.matchLen(4, -7, ¤t);
|
||||
try expectEqual(@as(i32, 5), got);
|
||||
}
|
||||
{
|
||||
var previous = [_]u8{ 9, 9, 9, 9, 9 };
|
||||
var e = DeflateFast{
|
||||
.prev = &previous,
|
||||
.prev_len = previous.len,
|
||||
.table = undefined,
|
||||
.allocator = undefined,
|
||||
.cur = 0,
|
||||
};
|
||||
var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||
const got: i32 = e.matchLen(0, -1, ¤t);
|
||||
try expectEqual(@as(i32, 0), got);
|
||||
}
|
||||
{
|
||||
var previous = [_]u8{ 9, 9, 9, 9, 9 };
|
||||
var e = DeflateFast{
|
||||
.prev = &previous,
|
||||
.prev_len = previous.len,
|
||||
.table = undefined,
|
||||
.allocator = undefined,
|
||||
.cur = 0,
|
||||
};
|
||||
var current = [_]u8{ 9, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||
const got: i32 = e.matchLen(1, 0, ¤t);
|
||||
try expectEqual(@as(i32, 0), got);
|
||||
}
|
||||
}
|
||||
|
||||
test "best speed match 2/3" {
|
||||
if (@import("builtin").os.tag == .wasi) {
|
||||
// https://github.com/ziglang/zig/issues/18885
|
||||
return error.SkipZigTest;
|
||||
}
|
||||
const expectEqual = std.testing.expectEqual;
|
||||
|
||||
{
|
||||
var previous = [_]u8{};
|
||||
var e = DeflateFast{
|
||||
.prev = &previous,
|
||||
.prev_len = previous.len,
|
||||
.table = undefined,
|
||||
.allocator = undefined,
|
||||
.cur = 0,
|
||||
};
|
||||
var current = [_]u8{ 9, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||
const got: i32 = e.matchLen(1, -5, ¤t);
|
||||
try expectEqual(@as(i32, 0), got);
|
||||
}
|
||||
{
|
||||
var previous = [_]u8{};
|
||||
var e = DeflateFast{
|
||||
.prev = &previous,
|
||||
.prev_len = previous.len,
|
||||
.table = undefined,
|
||||
.allocator = undefined,
|
||||
.cur = 0,
|
||||
};
|
||||
var current = [_]u8{ 9, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||
const got: i32 = e.matchLen(1, -1, ¤t);
|
||||
try expectEqual(@as(i32, 0), got);
|
||||
}
|
||||
{
|
||||
var previous = [_]u8{};
|
||||
var e = DeflateFast{
|
||||
.prev = &previous,
|
||||
.prev_len = previous.len,
|
||||
.table = undefined,
|
||||
.allocator = undefined,
|
||||
.cur = 0,
|
||||
};
|
||||
var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
|
||||
const got: i32 = e.matchLen(1, 0, ¤t);
|
||||
try expectEqual(@as(i32, 3), got);
|
||||
}
|
||||
{
|
||||
var previous = [_]u8{ 3, 4, 5 };
|
||||
var e = DeflateFast{
|
||||
.prev = &previous,
|
||||
.prev_len = previous.len,
|
||||
.table = undefined,
|
||||
.allocator = undefined,
|
||||
.cur = 0,
|
||||
};
|
||||
var current = [_]u8{ 3, 4, 5 };
|
||||
const got: i32 = e.matchLen(0, -3, ¤t);
|
||||
try expectEqual(@as(i32, 3), got);
|
||||
}
|
||||
}
|
||||
|
||||
test "best speed match 2/2" {
|
||||
const testing = std.testing;
|
||||
const expectEqual = testing.expectEqual;
|
||||
|
||||
const Case = struct {
|
||||
previous: u32,
|
||||
current: u32,
|
||||
s: i32,
|
||||
t: i32,
|
||||
expected: i32,
|
||||
};
|
||||
|
||||
const cases = [_]Case{
|
||||
.{
|
||||
.previous = 1000,
|
||||
.current = 1000,
|
||||
.s = 0,
|
||||
.t = -1000,
|
||||
.expected = max_match_length - 4,
|
||||
},
|
||||
.{
|
||||
.previous = 200,
|
||||
.s = 0,
|
||||
.t = -200,
|
||||
.current = 500,
|
||||
.expected = max_match_length - 4,
|
||||
},
|
||||
.{
|
||||
.previous = 200,
|
||||
.s = 1,
|
||||
.t = 0,
|
||||
.current = 500,
|
||||
.expected = max_match_length - 4,
|
||||
},
|
||||
.{
|
||||
.previous = max_match_length - 4,
|
||||
.s = 0,
|
||||
.t = -(max_match_length - 4),
|
||||
.current = 500,
|
||||
.expected = max_match_length - 4,
|
||||
},
|
||||
.{
|
||||
.previous = 200,
|
||||
.s = 400,
|
||||
.t = -200,
|
||||
.current = 500,
|
||||
.expected = 100,
|
||||
},
|
||||
.{
|
||||
.previous = 10,
|
||||
.s = 400,
|
||||
.t = 200,
|
||||
.current = 500,
|
||||
.expected = 100,
|
||||
},
|
||||
};
|
||||
|
||||
for (cases) |c| {
|
||||
const previous = try testing.allocator.alloc(u8, c.previous);
|
||||
defer testing.allocator.free(previous);
|
||||
@memset(previous, 0);
|
||||
|
||||
const current = try testing.allocator.alloc(u8, c.current);
|
||||
defer testing.allocator.free(current);
|
||||
@memset(current, 0);
|
||||
|
||||
var e = DeflateFast{
|
||||
.prev = previous,
|
||||
.prev_len = @as(u32, @intCast(previous.len)),
|
||||
.table = undefined,
|
||||
.allocator = undefined,
|
||||
.cur = 0,
|
||||
};
|
||||
const got: i32 = e.matchLen(c.s, c.t, current);
|
||||
try expectEqual(@as(i32, c.expected), got);
|
||||
}
|
||||
}
|
||||
|
||||
test "best speed shift offsets" {
|
||||
const testing = std.testing;
|
||||
const expect = std.testing.expect;
|
||||
|
||||
// Test if shiftoffsets properly preserves matches and resets out-of-range matches
|
||||
// seen in https://github.com/golang/go/issues/4142
|
||||
var enc = deflateFast();
|
||||
try enc.init(testing.allocator);
|
||||
defer enc.deinit();
|
||||
|
||||
// test_data may not generate internal matches.
|
||||
var test_data = [32]u8{
|
||||
0xf5, 0x25, 0xf2, 0x55, 0xf6, 0xc1, 0x1f, 0x0b, 0x10, 0xa1,
|
||||
0xd0, 0x77, 0x56, 0x38, 0xf1, 0x9c, 0x7f, 0x85, 0xc5, 0xbd,
|
||||
0x16, 0x28, 0xd4, 0xf9, 0x03, 0xd4, 0xc0, 0xa1, 0x1e, 0x58,
|
||||
0x5b, 0xc9,
|
||||
};
|
||||
|
||||
var tokens = [_]token.Token{0} ** 32;
|
||||
var tokens_count: u16 = 0;
|
||||
|
||||
// Encode the testdata with clean state.
|
||||
// Second part should pick up matches from the first block.
|
||||
tokens_count = 0;
|
||||
enc.encode(&tokens, &tokens_count, &test_data);
|
||||
const want_first_tokens = tokens_count;
|
||||
tokens_count = 0;
|
||||
enc.encode(&tokens, &tokens_count, &test_data);
|
||||
const want_second_tokens = tokens_count;
|
||||
|
||||
try expect(want_first_tokens > want_second_tokens);
|
||||
|
||||
// Forward the current indicator to before wraparound.
|
||||
enc.cur = buffer_reset - @as(i32, @intCast(test_data.len));
|
||||
|
||||
// Part 1 before wrap, should match clean state.
|
||||
tokens_count = 0;
|
||||
enc.encode(&tokens, &tokens_count, &test_data);
|
||||
var got = tokens_count;
|
||||
try testing.expectEqual(want_first_tokens, got);
|
||||
|
||||
// Verify we are about to wrap.
|
||||
try testing.expectEqual(@as(i32, buffer_reset), enc.cur);
|
||||
|
||||
// Part 2 should match clean state as well even if wrapped.
|
||||
tokens_count = 0;
|
||||
enc.encode(&tokens, &tokens_count, &test_data);
|
||||
got = tokens_count;
|
||||
try testing.expectEqual(want_second_tokens, got);
|
||||
|
||||
// Verify that we wrapped.
|
||||
try expect(enc.cur < buffer_reset);
|
||||
|
||||
// Forward the current buffer, leaving the matches at the bottom.
|
||||
enc.cur = buffer_reset;
|
||||
enc.shiftOffsets();
|
||||
|
||||
// Ensure that no matches were picked up.
|
||||
tokens_count = 0;
|
||||
enc.encode(&tokens, &tokens_count, &test_data);
|
||||
got = tokens_count;
|
||||
try testing.expectEqual(want_first_tokens, got);
|
||||
}
|
||||
|
||||
test "best speed reset" {
|
||||
// test that encoding is consistent across a warparound of the table offset.
|
||||
// See https://github.com/golang/go/issues/34121
|
||||
const fmt = std.fmt;
|
||||
const testing = std.testing;
|
||||
|
||||
const ArrayList = std.ArrayList;
|
||||
|
||||
const input_size = 65536;
|
||||
const input = try testing.allocator.alloc(u8, input_size);
|
||||
defer testing.allocator.free(input);
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < input_size) : (i += 1) {
|
||||
_ = try fmt.bufPrint(input, "asdfasdfasdfasdf{d}{d}fghfgujyut{d}yutyu\n", .{ i, i, i });
|
||||
}
|
||||
// This is specific to level 1 (best_speed).
|
||||
const level = .best_speed;
|
||||
const offset: usize = 1;
|
||||
|
||||
// We do an encode with a clean buffer to compare.
|
||||
var want = ArrayList(u8).init(testing.allocator);
|
||||
defer want.deinit();
|
||||
var clean_comp = try deflate.compressor(
|
||||
testing.allocator,
|
||||
want.writer(),
|
||||
.{ .level = level },
|
||||
);
|
||||
defer clean_comp.deinit();
|
||||
|
||||
// Write 3 times, close.
|
||||
try clean_comp.writer().writeAll(input);
|
||||
try clean_comp.writer().writeAll(input);
|
||||
try clean_comp.writer().writeAll(input);
|
||||
try clean_comp.close();
|
||||
|
||||
var o = offset;
|
||||
while (o <= 256) : (o *= 2) {
|
||||
var discard = ArrayList(u8).init(testing.allocator);
|
||||
defer discard.deinit();
|
||||
|
||||
var comp = try deflate.compressor(
|
||||
testing.allocator,
|
||||
discard.writer(),
|
||||
.{ .level = level },
|
||||
);
|
||||
defer comp.deinit();
|
||||
|
||||
// Reset until we are right before the wraparound.
|
||||
// Each reset adds max_match_offset to the offset.
|
||||
i = 0;
|
||||
const limit = (buffer_reset - input.len - o - max_match_offset) / max_match_offset;
|
||||
while (i < limit) : (i += 1) {
|
||||
// skip ahead to where we are close to wrap around...
|
||||
comp.reset(discard.writer());
|
||||
}
|
||||
var got = ArrayList(u8).init(testing.allocator);
|
||||
defer got.deinit();
|
||||
comp.reset(got.writer());
|
||||
|
||||
// Write 3 times, close.
|
||||
try comp.writer().writeAll(input);
|
||||
try comp.writer().writeAll(input);
|
||||
try comp.writer().writeAll(input);
|
||||
try comp.close();
|
||||
|
||||
// output must match at wraparound
|
||||
try testing.expectEqualSlices(u8, want.items, got.items);
|
||||
}
|
||||
}
|
||||
@@ -1,160 +0,0 @@
|
||||
const std = @import("std");
|
||||
const expect = std.testing.expect;
|
||||
const io = std.io;
|
||||
const mem = std.mem;
|
||||
const testing = std.testing;
|
||||
|
||||
const ArrayList = std.ArrayList;
|
||||
|
||||
const deflate = @import("compressor.zig");
|
||||
const inflate = @import("decompressor.zig");
|
||||
const deflate_const = @import("deflate_const.zig");
|
||||
|
||||
test "best speed" {
|
||||
// Tests that round-tripping through deflate and then inflate recovers the original input.
|
||||
// The Write sizes are near the thresholds in the compressor.encSpeed method (0, 16, 128), as well
|
||||
// as near `deflate_const.max_store_block_size` (65535).
|
||||
|
||||
var abcabc = try testing.allocator.alloc(u8, 131_072);
|
||||
defer testing.allocator.free(abcabc);
|
||||
|
||||
for (abcabc, 0..) |_, i| {
|
||||
abcabc[i] = @as(u8, @intCast(i % 128));
|
||||
}
|
||||
|
||||
var tc_01 = [_]u32{ 65536, 0 };
|
||||
var tc_02 = [_]u32{ 65536, 1 };
|
||||
var tc_03 = [_]u32{ 65536, 1, 256 };
|
||||
var tc_04 = [_]u32{ 65536, 1, 65536 };
|
||||
var tc_05 = [_]u32{ 65536, 14 };
|
||||
var tc_06 = [_]u32{ 65536, 15 };
|
||||
var tc_07 = [_]u32{ 65536, 16 };
|
||||
var tc_08 = [_]u32{ 65536, 16, 256 };
|
||||
var tc_09 = [_]u32{ 65536, 16, 65536 };
|
||||
var tc_10 = [_]u32{ 65536, 127 };
|
||||
var tc_11 = [_]u32{ 65536, 127 };
|
||||
var tc_12 = [_]u32{ 65536, 128 };
|
||||
var tc_13 = [_]u32{ 65536, 128, 256 };
|
||||
var tc_14 = [_]u32{ 65536, 128, 65536 };
|
||||
var tc_15 = [_]u32{ 65536, 129 };
|
||||
var tc_16 = [_]u32{ 65536, 65536, 256 };
|
||||
var tc_17 = [_]u32{ 65536, 65536, 65536 };
|
||||
const test_cases = [_][]u32{
|
||||
&tc_01, &tc_02, &tc_03, &tc_04, &tc_05, &tc_06, &tc_07, &tc_08, &tc_09, &tc_10,
|
||||
&tc_11, &tc_12, &tc_13, &tc_14, &tc_15, &tc_16, &tc_17,
|
||||
};
|
||||
|
||||
for (test_cases) |tc| {
|
||||
const firsts = [_]u32{ 1, 65534, 65535, 65536, 65537, 131072 };
|
||||
|
||||
for (firsts) |first_n| {
|
||||
tc[0] = first_n;
|
||||
|
||||
const to_flush = [_]bool{ false, true };
|
||||
for (to_flush) |flush| {
|
||||
var compressed = ArrayList(u8).init(testing.allocator);
|
||||
defer compressed.deinit();
|
||||
|
||||
var want = ArrayList(u8).init(testing.allocator);
|
||||
defer want.deinit();
|
||||
|
||||
var comp = try deflate.compressor(
|
||||
testing.allocator,
|
||||
compressed.writer(),
|
||||
.{ .level = .best_speed },
|
||||
);
|
||||
defer comp.deinit();
|
||||
|
||||
for (tc) |n| {
|
||||
try want.appendSlice(abcabc[0..n]);
|
||||
try comp.writer().writeAll(abcabc[0..n]);
|
||||
if (flush) {
|
||||
try comp.flush();
|
||||
}
|
||||
}
|
||||
|
||||
try comp.close();
|
||||
|
||||
const decompressed = try testing.allocator.alloc(u8, want.items.len);
|
||||
defer testing.allocator.free(decompressed);
|
||||
|
||||
var fib = io.fixedBufferStream(compressed.items);
|
||||
var decomp = try inflate.decompressor(testing.allocator, fib.reader(), null);
|
||||
defer decomp.deinit();
|
||||
|
||||
const read = try decomp.reader().readAll(decompressed);
|
||||
try decomp.close();
|
||||
|
||||
try testing.expectEqual(want.items.len, read);
|
||||
try testing.expectEqualSlices(u8, want.items, decompressed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "best speed max match offset" {
|
||||
const abc = "abcdefgh";
|
||||
const xyz = "stuvwxyz";
|
||||
const input_margin = 16 - 1;
|
||||
|
||||
const match_before = [_]bool{ false, true };
|
||||
for (match_before) |do_match_before| {
|
||||
const extras = [_]u32{
|
||||
0,
|
||||
input_margin - 1,
|
||||
input_margin,
|
||||
input_margin + 1,
|
||||
2 * input_margin,
|
||||
};
|
||||
for (extras) |extra| {
|
||||
var offset_adj: i32 = -5;
|
||||
while (offset_adj <= 5) : (offset_adj += 1) {
|
||||
const offset = deflate_const.max_match_offset + offset_adj;
|
||||
|
||||
// Make src to be a []u8 of the form
|
||||
// fmt("{s}{s}{s}{s}{s}", .{abc, zeros0, xyzMaybe, abc, zeros1})
|
||||
// where:
|
||||
// zeros0 is approximately max_match_offset zeros.
|
||||
// xyzMaybe is either xyz or the empty string.
|
||||
// zeros1 is between 0 and 30 zeros.
|
||||
// The difference between the two abc's will be offset, which
|
||||
// is max_match_offset plus or minus a small adjustment.
|
||||
const src_len: usize = @as(usize, @intCast(offset + @as(i32, abc.len) + @as(i32, @intCast(extra))));
|
||||
var src = try testing.allocator.alloc(u8, src_len);
|
||||
defer testing.allocator.free(src);
|
||||
|
||||
@memcpy(src[0..abc.len], abc);
|
||||
if (!do_match_before) {
|
||||
const src_offset: usize = @as(usize, @intCast(offset - @as(i32, xyz.len)));
|
||||
@memcpy(src[src_offset..][0..xyz.len], xyz);
|
||||
}
|
||||
const src_offset: usize = @as(usize, @intCast(offset));
|
||||
@memcpy(src[src_offset..][0..abc.len], abc);
|
||||
|
||||
var compressed = ArrayList(u8).init(testing.allocator);
|
||||
defer compressed.deinit();
|
||||
|
||||
var comp = try deflate.compressor(
|
||||
testing.allocator,
|
||||
compressed.writer(),
|
||||
.{ .level = .best_speed },
|
||||
);
|
||||
defer comp.deinit();
|
||||
try comp.writer().writeAll(src);
|
||||
_ = try comp.close();
|
||||
|
||||
const decompressed = try testing.allocator.alloc(u8, src.len);
|
||||
defer testing.allocator.free(decompressed);
|
||||
|
||||
var fib = io.fixedBufferStream(compressed.items);
|
||||
var decomp = try inflate.decompressor(testing.allocator, fib.reader(), null);
|
||||
defer decomp.deinit();
|
||||
const read = try decomp.reader().readAll(decompressed);
|
||||
try decomp.close();
|
||||
|
||||
try testing.expectEqual(src.len, read);
|
||||
try testing.expectEqualSlices(u8, src, decompressed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,423 +0,0 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const mem = std.mem;
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
// Implements the LZ77 sliding dictionary as used in decompression.
|
||||
// LZ77 decompresses data through sequences of two forms of commands:
|
||||
//
|
||||
// * Literal insertions: Runs of one or more symbols are inserted into the data
|
||||
// stream as is. This is accomplished through the writeByte method for a
|
||||
// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
|
||||
// Any valid stream must start with a literal insertion if no preset dictionary
|
||||
// is used.
|
||||
//
|
||||
// * Backward copies: Runs of one or more symbols are copied from previously
|
||||
// emitted data. Backward copies come as the tuple (dist, length) where dist
|
||||
// determines how far back in the stream to copy from and length determines how
|
||||
// many bytes to copy. Note that it is valid for the length to be greater than
|
||||
// the distance. Since LZ77 uses forward copies, that situation is used to
|
||||
// perform a form of run-length encoding on repeated runs of symbols.
|
||||
// The writeCopy and tryWriteCopy are used to implement this command.
|
||||
//
|
||||
// For performance reasons, this implementation performs little to no sanity
|
||||
// checks about the arguments. As such, the invariants documented for each
|
||||
// method call must be respected.
|
||||
pub const DictDecoder = struct {
|
||||
const Self = @This();
|
||||
|
||||
allocator: Allocator = undefined,
|
||||
|
||||
hist: []u8 = undefined, // Sliding window history
|
||||
|
||||
// Invariant: 0 <= rd_pos <= wr_pos <= hist.len
|
||||
wr_pos: u32 = 0, // Current output position in buffer
|
||||
rd_pos: u32 = 0, // Have emitted hist[0..rd_pos] already
|
||||
full: bool = false, // Has a full window length been written yet?
|
||||
|
||||
// init initializes DictDecoder to have a sliding window dictionary of the given
|
||||
// size. If a preset dict is provided, it will initialize the dictionary with
|
||||
// the contents of dict.
|
||||
pub fn init(self: *Self, allocator: Allocator, size: u32, dict: ?[]const u8) !void {
|
||||
self.allocator = allocator;
|
||||
|
||||
self.hist = try allocator.alloc(u8, size);
|
||||
|
||||
self.wr_pos = 0;
|
||||
|
||||
if (dict != null) {
|
||||
const src = dict.?[dict.?.len -| self.hist.len..];
|
||||
@memcpy(self.hist[0..src.len], src);
|
||||
self.wr_pos = @as(u32, @intCast(dict.?.len));
|
||||
}
|
||||
|
||||
if (self.wr_pos == self.hist.len) {
|
||||
self.wr_pos = 0;
|
||||
self.full = true;
|
||||
}
|
||||
self.rd_pos = self.wr_pos;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
self.allocator.free(self.hist);
|
||||
}
|
||||
|
||||
// Reports the total amount of historical data in the dictionary.
|
||||
pub fn histSize(self: *Self) u32 {
|
||||
if (self.full) {
|
||||
return @as(u32, @intCast(self.hist.len));
|
||||
}
|
||||
return self.wr_pos;
|
||||
}
|
||||
|
||||
// Reports the number of bytes that can be flushed by readFlush.
|
||||
pub fn availRead(self: *Self) u32 {
|
||||
return self.wr_pos - self.rd_pos;
|
||||
}
|
||||
|
||||
// Reports the available amount of output buffer space.
|
||||
pub fn availWrite(self: *Self) u32 {
|
||||
return @as(u32, @intCast(self.hist.len - self.wr_pos));
|
||||
}
|
||||
|
||||
// Returns a slice of the available buffer to write data to.
|
||||
//
|
||||
// This invariant will be kept: s.len <= availWrite()
|
||||
pub fn writeSlice(self: *Self) []u8 {
|
||||
return self.hist[self.wr_pos..];
|
||||
}
|
||||
|
||||
// Advances the writer pointer by `count`.
|
||||
//
|
||||
// This invariant must be kept: 0 <= count <= availWrite()
|
||||
pub fn writeMark(self: *Self, count: u32) void {
|
||||
assert(0 <= count and count <= self.availWrite());
|
||||
self.wr_pos += count;
|
||||
}
|
||||
|
||||
// Writes a single byte to the dictionary.
|
||||
//
|
||||
// This invariant must be kept: 0 < availWrite()
|
||||
pub fn writeByte(self: *Self, byte: u8) void {
|
||||
self.hist[self.wr_pos] = byte;
|
||||
self.wr_pos += 1;
|
||||
}
|
||||
|
||||
/// TODO: eliminate this function because the callsites should care about whether
|
||||
/// or not their arguments alias and then they should directly call `@memcpy` or
|
||||
/// `mem.copyForwards`.
|
||||
fn copy(dst: []u8, src: []const u8) u32 {
|
||||
if (src.len > dst.len) {
|
||||
mem.copyForwards(u8, dst, src[0..dst.len]);
|
||||
return @as(u32, @intCast(dst.len));
|
||||
}
|
||||
mem.copyForwards(u8, dst[0..src.len], src);
|
||||
return @as(u32, @intCast(src.len));
|
||||
}
|
||||
|
||||
// Copies a string at a given (dist, length) to the output.
|
||||
// This returns the number of bytes copied and may be less than the requested
|
||||
// length if the available space in the output buffer is too small.
|
||||
//
|
||||
// This invariant must be kept: 0 < dist <= histSize()
|
||||
pub fn writeCopy(self: *Self, dist: u32, length: u32) u32 {
|
||||
assert(0 < dist and dist <= self.histSize());
|
||||
const dst_base = self.wr_pos;
|
||||
var dst_pos = dst_base;
|
||||
var src_pos: i32 = @as(i32, @intCast(dst_pos)) - @as(i32, @intCast(dist));
|
||||
var end_pos = dst_pos + length;
|
||||
if (end_pos > self.hist.len) {
|
||||
end_pos = @as(u32, @intCast(self.hist.len));
|
||||
}
|
||||
|
||||
// Copy non-overlapping section after destination position.
|
||||
//
|
||||
// This section is non-overlapping in that the copy length for this section
|
||||
// is always less than or equal to the backwards distance. This can occur
|
||||
// if a distance refers to data that wraps-around in the buffer.
|
||||
// Thus, a backwards copy is performed here; that is, the exact bytes in
|
||||
// the source prior to the copy is placed in the destination.
|
||||
if (src_pos < 0) {
|
||||
src_pos += @as(i32, @intCast(self.hist.len));
|
||||
dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[@as(usize, @intCast(src_pos))..]);
|
||||
src_pos = 0;
|
||||
}
|
||||
|
||||
// Copy possibly overlapping section before destination position.
|
||||
//
|
||||
// This section can overlap if the copy length for this section is larger
|
||||
// than the backwards distance. This is allowed by LZ77 so that repeated
|
||||
// strings can be succinctly represented using (dist, length) pairs.
|
||||
// Thus, a forwards copy is performed here; that is, the bytes copied is
|
||||
// possibly dependent on the resulting bytes in the destination as the copy
|
||||
// progresses along. This is functionally equivalent to the following:
|
||||
//
|
||||
// var i = 0;
|
||||
// while(i < end_pos - dst_pos) : (i+=1) {
|
||||
// self.hist[dst_pos+i] = self.hist[src_pos+i];
|
||||
// }
|
||||
// dst_pos = end_pos;
|
||||
//
|
||||
while (dst_pos < end_pos) {
|
||||
dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[@as(usize, @intCast(src_pos))..dst_pos]);
|
||||
}
|
||||
|
||||
self.wr_pos = dst_pos;
|
||||
return dst_pos - dst_base;
|
||||
}
|
||||
|
||||
// Tries to copy a string at a given (distance, length) to the
|
||||
// output. This specialized version is optimized for short distances.
|
||||
//
|
||||
// This method is designed to be inlined for performance reasons.
|
||||
//
|
||||
// This invariant must be kept: 0 < dist <= histSize()
|
||||
pub fn tryWriteCopy(self: *Self, dist: u32, length: u32) u32 {
|
||||
var dst_pos = self.wr_pos;
|
||||
const end_pos = dst_pos + length;
|
||||
if (dst_pos < dist or end_pos > self.hist.len) {
|
||||
return 0;
|
||||
}
|
||||
const dst_base = dst_pos;
|
||||
const src_pos = dst_pos - dist;
|
||||
|
||||
// Copy possibly overlapping section before destination position.
|
||||
while (dst_pos < end_pos) {
|
||||
dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[src_pos..dst_pos]);
|
||||
}
|
||||
|
||||
self.wr_pos = dst_pos;
|
||||
return dst_pos - dst_base;
|
||||
}
|
||||
|
||||
// Returns a slice of the historical buffer that is ready to be
|
||||
// emitted to the user. The data returned by readFlush must be fully consumed
|
||||
// before calling any other DictDecoder methods.
|
||||
pub fn readFlush(self: *Self) []u8 {
|
||||
const to_read = self.hist[self.rd_pos..self.wr_pos];
|
||||
self.rd_pos = self.wr_pos;
|
||||
if (self.wr_pos == self.hist.len) {
|
||||
self.wr_pos = 0;
|
||||
self.rd_pos = 0;
|
||||
self.full = true;
|
||||
}
|
||||
return to_read;
|
||||
}
|
||||
};
|
||||
|
||||
// tests
|
||||
|
||||
test "dictionary decoder" {
|
||||
const ArrayList = std.ArrayList;
|
||||
const testing = std.testing;
|
||||
|
||||
const abc = "ABC\n";
|
||||
const fox = "The quick brown fox jumped over the lazy dog!\n";
|
||||
const poem: []const u8 =
|
||||
\\The Road Not Taken
|
||||
\\Robert Frost
|
||||
\\
|
||||
\\Two roads diverged in a yellow wood,
|
||||
\\And sorry I could not travel both
|
||||
\\And be one traveler, long I stood
|
||||
\\And looked down one as far as I could
|
||||
\\To where it bent in the undergrowth;
|
||||
\\
|
||||
\\Then took the other, as just as fair,
|
||||
\\And having perhaps the better claim,
|
||||
\\Because it was grassy and wanted wear;
|
||||
\\Though as for that the passing there
|
||||
\\Had worn them really about the same,
|
||||
\\
|
||||
\\And both that morning equally lay
|
||||
\\In leaves no step had trodden black.
|
||||
\\Oh, I kept the first for another day!
|
||||
\\Yet knowing how way leads on to way,
|
||||
\\I doubted if I should ever come back.
|
||||
\\
|
||||
\\I shall be telling this with a sigh
|
||||
\\Somewhere ages and ages hence:
|
||||
\\Two roads diverged in a wood, and I-
|
||||
\\I took the one less traveled by,
|
||||
\\And that has made all the difference.
|
||||
\\
|
||||
;
|
||||
|
||||
const uppercase: []const u8 =
|
||||
\\THE ROAD NOT TAKEN
|
||||
\\ROBERT FROST
|
||||
\\
|
||||
\\TWO ROADS DIVERGED IN A YELLOW WOOD,
|
||||
\\AND SORRY I COULD NOT TRAVEL BOTH
|
||||
\\AND BE ONE TRAVELER, LONG I STOOD
|
||||
\\AND LOOKED DOWN ONE AS FAR AS I COULD
|
||||
\\TO WHERE IT BENT IN THE UNDERGROWTH;
|
||||
\\
|
||||
\\THEN TOOK THE OTHER, AS JUST AS FAIR,
|
||||
\\AND HAVING PERHAPS THE BETTER CLAIM,
|
||||
\\BECAUSE IT WAS GRASSY AND WANTED WEAR;
|
||||
\\THOUGH AS FOR THAT THE PASSING THERE
|
||||
\\HAD WORN THEM REALLY ABOUT THE SAME,
|
||||
\\
|
||||
\\AND BOTH THAT MORNING EQUALLY LAY
|
||||
\\IN LEAVES NO STEP HAD TRODDEN BLACK.
|
||||
\\OH, I KEPT THE FIRST FOR ANOTHER DAY!
|
||||
\\YET KNOWING HOW WAY LEADS ON TO WAY,
|
||||
\\I DOUBTED IF I SHOULD EVER COME BACK.
|
||||
\\
|
||||
\\I SHALL BE TELLING THIS WITH A SIGH
|
||||
\\SOMEWHERE AGES AND AGES HENCE:
|
||||
\\TWO ROADS DIVERGED IN A WOOD, AND I-
|
||||
\\I TOOK THE ONE LESS TRAVELED BY,
|
||||
\\AND THAT HAS MADE ALL THE DIFFERENCE.
|
||||
\\
|
||||
;
|
||||
|
||||
const PoemRefs = struct {
|
||||
dist: u32, // Backward distance (0 if this is an insertion)
|
||||
length: u32, // Length of copy or insertion
|
||||
};
|
||||
|
||||
const poem_refs = [_]PoemRefs{
|
||||
.{ .dist = 0, .length = 38 }, .{ .dist = 33, .length = 3 }, .{ .dist = 0, .length = 48 },
|
||||
.{ .dist = 79, .length = 3 }, .{ .dist = 0, .length = 11 }, .{ .dist = 34, .length = 5 },
|
||||
.{ .dist = 0, .length = 6 }, .{ .dist = 23, .length = 7 }, .{ .dist = 0, .length = 8 },
|
||||
.{ .dist = 50, .length = 3 }, .{ .dist = 0, .length = 2 }, .{ .dist = 69, .length = 3 },
|
||||
.{ .dist = 34, .length = 5 }, .{ .dist = 0, .length = 4 }, .{ .dist = 97, .length = 3 },
|
||||
.{ .dist = 0, .length = 4 }, .{ .dist = 43, .length = 5 }, .{ .dist = 0, .length = 6 },
|
||||
.{ .dist = 7, .length = 4 }, .{ .dist = 88, .length = 7 }, .{ .dist = 0, .length = 12 },
|
||||
.{ .dist = 80, .length = 3 }, .{ .dist = 0, .length = 2 }, .{ .dist = 141, .length = 4 },
|
||||
.{ .dist = 0, .length = 1 }, .{ .dist = 196, .length = 3 }, .{ .dist = 0, .length = 3 },
|
||||
.{ .dist = 157, .length = 3 }, .{ .dist = 0, .length = 6 }, .{ .dist = 181, .length = 3 },
|
||||
.{ .dist = 0, .length = 2 }, .{ .dist = 23, .length = 3 }, .{ .dist = 77, .length = 3 },
|
||||
.{ .dist = 28, .length = 5 }, .{ .dist = 128, .length = 3 }, .{ .dist = 110, .length = 4 },
|
||||
.{ .dist = 70, .length = 3 }, .{ .dist = 0, .length = 4 }, .{ .dist = 85, .length = 6 },
|
||||
.{ .dist = 0, .length = 2 }, .{ .dist = 182, .length = 6 }, .{ .dist = 0, .length = 4 },
|
||||
.{ .dist = 133, .length = 3 }, .{ .dist = 0, .length = 7 }, .{ .dist = 47, .length = 5 },
|
||||
.{ .dist = 0, .length = 20 }, .{ .dist = 112, .length = 5 }, .{ .dist = 0, .length = 1 },
|
||||
.{ .dist = 58, .length = 3 }, .{ .dist = 0, .length = 8 }, .{ .dist = 59, .length = 3 },
|
||||
.{ .dist = 0, .length = 4 }, .{ .dist = 173, .length = 3 }, .{ .dist = 0, .length = 5 },
|
||||
.{ .dist = 114, .length = 3 }, .{ .dist = 0, .length = 4 }, .{ .dist = 92, .length = 5 },
|
||||
.{ .dist = 0, .length = 2 }, .{ .dist = 71, .length = 3 }, .{ .dist = 0, .length = 2 },
|
||||
.{ .dist = 76, .length = 5 }, .{ .dist = 0, .length = 1 }, .{ .dist = 46, .length = 3 },
|
||||
.{ .dist = 96, .length = 4 }, .{ .dist = 130, .length = 4 }, .{ .dist = 0, .length = 3 },
|
||||
.{ .dist = 360, .length = 3 }, .{ .dist = 0, .length = 3 }, .{ .dist = 178, .length = 5 },
|
||||
.{ .dist = 0, .length = 7 }, .{ .dist = 75, .length = 3 }, .{ .dist = 0, .length = 3 },
|
||||
.{ .dist = 45, .length = 6 }, .{ .dist = 0, .length = 6 }, .{ .dist = 299, .length = 6 },
|
||||
.{ .dist = 180, .length = 3 }, .{ .dist = 70, .length = 6 }, .{ .dist = 0, .length = 1 },
|
||||
.{ .dist = 48, .length = 3 }, .{ .dist = 66, .length = 4 }, .{ .dist = 0, .length = 3 },
|
||||
.{ .dist = 47, .length = 5 }, .{ .dist = 0, .length = 9 }, .{ .dist = 325, .length = 3 },
|
||||
.{ .dist = 0, .length = 1 }, .{ .dist = 359, .length = 3 }, .{ .dist = 318, .length = 3 },
|
||||
.{ .dist = 0, .length = 2 }, .{ .dist = 199, .length = 3 }, .{ .dist = 0, .length = 1 },
|
||||
.{ .dist = 344, .length = 3 }, .{ .dist = 0, .length = 3 }, .{ .dist = 248, .length = 3 },
|
||||
.{ .dist = 0, .length = 10 }, .{ .dist = 310, .length = 3 }, .{ .dist = 0, .length = 3 },
|
||||
.{ .dist = 93, .length = 6 }, .{ .dist = 0, .length = 3 }, .{ .dist = 252, .length = 3 },
|
||||
.{ .dist = 157, .length = 4 }, .{ .dist = 0, .length = 2 }, .{ .dist = 273, .length = 5 },
|
||||
.{ .dist = 0, .length = 14 }, .{ .dist = 99, .length = 4 }, .{ .dist = 0, .length = 1 },
|
||||
.{ .dist = 464, .length = 4 }, .{ .dist = 0, .length = 2 }, .{ .dist = 92, .length = 4 },
|
||||
.{ .dist = 495, .length = 3 }, .{ .dist = 0, .length = 1 }, .{ .dist = 322, .length = 4 },
|
||||
.{ .dist = 16, .length = 4 }, .{ .dist = 0, .length = 3 }, .{ .dist = 402, .length = 3 },
|
||||
.{ .dist = 0, .length = 2 }, .{ .dist = 237, .length = 4 }, .{ .dist = 0, .length = 2 },
|
||||
.{ .dist = 432, .length = 4 }, .{ .dist = 0, .length = 1 }, .{ .dist = 483, .length = 5 },
|
||||
.{ .dist = 0, .length = 2 }, .{ .dist = 294, .length = 4 }, .{ .dist = 0, .length = 2 },
|
||||
.{ .dist = 306, .length = 3 }, .{ .dist = 113, .length = 5 }, .{ .dist = 0, .length = 1 },
|
||||
.{ .dist = 26, .length = 4 }, .{ .dist = 164, .length = 3 }, .{ .dist = 488, .length = 4 },
|
||||
.{ .dist = 0, .length = 1 }, .{ .dist = 542, .length = 3 }, .{ .dist = 248, .length = 6 },
|
||||
.{ .dist = 0, .length = 5 }, .{ .dist = 205, .length = 3 }, .{ .dist = 0, .length = 8 },
|
||||
.{ .dist = 48, .length = 3 }, .{ .dist = 449, .length = 6 }, .{ .dist = 0, .length = 2 },
|
||||
.{ .dist = 192, .length = 3 }, .{ .dist = 328, .length = 4 }, .{ .dist = 9, .length = 5 },
|
||||
.{ .dist = 433, .length = 3 }, .{ .dist = 0, .length = 3 }, .{ .dist = 622, .length = 25 },
|
||||
.{ .dist = 615, .length = 5 }, .{ .dist = 46, .length = 5 }, .{ .dist = 0, .length = 2 },
|
||||
.{ .dist = 104, .length = 3 }, .{ .dist = 475, .length = 10 }, .{ .dist = 549, .length = 3 },
|
||||
.{ .dist = 0, .length = 4 }, .{ .dist = 597, .length = 8 }, .{ .dist = 314, .length = 3 },
|
||||
.{ .dist = 0, .length = 1 }, .{ .dist = 473, .length = 6 }, .{ .dist = 317, .length = 5 },
|
||||
.{ .dist = 0, .length = 1 }, .{ .dist = 400, .length = 3 }, .{ .dist = 0, .length = 3 },
|
||||
.{ .dist = 109, .length = 3 }, .{ .dist = 151, .length = 3 }, .{ .dist = 48, .length = 4 },
|
||||
.{ .dist = 0, .length = 4 }, .{ .dist = 125, .length = 3 }, .{ .dist = 108, .length = 3 },
|
||||
.{ .dist = 0, .length = 2 },
|
||||
};
|
||||
|
||||
var got_list = ArrayList(u8).init(testing.allocator);
|
||||
defer got_list.deinit();
|
||||
var got = got_list.writer();
|
||||
|
||||
var want_list = ArrayList(u8).init(testing.allocator);
|
||||
defer want_list.deinit();
|
||||
var want = want_list.writer();
|
||||
|
||||
var dd = DictDecoder{};
|
||||
try dd.init(testing.allocator, 1 << 11, null);
|
||||
defer dd.deinit();
|
||||
|
||||
const util = struct {
|
||||
fn writeCopy(dst_dd: *DictDecoder, dst: anytype, dist: u32, length: u32) !void {
|
||||
var len = length;
|
||||
while (len > 0) {
|
||||
var n = dst_dd.tryWriteCopy(dist, len);
|
||||
if (n == 0) {
|
||||
n = dst_dd.writeCopy(dist, len);
|
||||
}
|
||||
|
||||
len -= n;
|
||||
if (dst_dd.availWrite() == 0) {
|
||||
_ = try dst.write(dst_dd.readFlush());
|
||||
}
|
||||
}
|
||||
}
|
||||
fn writeString(dst_dd: *DictDecoder, dst: anytype, str: []const u8) !void {
|
||||
var string = str;
|
||||
while (string.len > 0) {
|
||||
const cnt = DictDecoder.copy(dst_dd.writeSlice(), string);
|
||||
dst_dd.writeMark(cnt);
|
||||
string = string[cnt..];
|
||||
if (dst_dd.availWrite() == 0) {
|
||||
_ = try dst.write(dst_dd.readFlush());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try util.writeString(&dd, got, ".");
|
||||
_ = try want.write(".");
|
||||
|
||||
var str = poem;
|
||||
for (poem_refs, 0..) |ref, i| {
|
||||
_ = i;
|
||||
if (ref.dist == 0) {
|
||||
try util.writeString(&dd, got, str[0..ref.length]);
|
||||
} else {
|
||||
try util.writeCopy(&dd, got, ref.dist, ref.length);
|
||||
}
|
||||
str = str[ref.length..];
|
||||
}
|
||||
_ = try want.write(poem);
|
||||
|
||||
try util.writeCopy(&dd, got, dd.histSize(), 33);
|
||||
_ = try want.write(want_list.items[0..33]);
|
||||
|
||||
try util.writeString(&dd, got, abc);
|
||||
try util.writeCopy(&dd, got, abc.len, 59 * abc.len);
|
||||
_ = try want.write(abc ** 60);
|
||||
|
||||
try util.writeString(&dd, got, fox);
|
||||
try util.writeCopy(&dd, got, fox.len, 9 * fox.len);
|
||||
_ = try want.write(fox ** 10);
|
||||
|
||||
try util.writeString(&dd, got, ".");
|
||||
try util.writeCopy(&dd, got, 1, 9);
|
||||
_ = try want.write("." ** 10);
|
||||
|
||||
try util.writeString(&dd, got, uppercase);
|
||||
try util.writeCopy(&dd, got, uppercase.len, 7 * uppercase.len);
|
||||
var i: u8 = 0;
|
||||
while (i < 8) : (i += 1) {
|
||||
_ = try want.write(uppercase);
|
||||
}
|
||||
|
||||
try util.writeCopy(&dd, got, dd.histSize(), 10);
|
||||
_ = try want.write(want_list.items[want_list.items.len - dd.histSize() ..][0..10]);
|
||||
|
||||
_ = try got.write(dd.readFlush());
|
||||
try testing.expectEqualSlices(u8, want_list.items, got_list.items);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,432 +0,0 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const math = std.math;
|
||||
const mem = std.mem;
|
||||
const sort = std.sort;
|
||||
const testing = std.testing;
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const bu = @import("bits_utils.zig");
|
||||
const deflate_const = @import("deflate_const.zig");
|
||||
|
||||
const max_bits_limit = 16;
|
||||
|
||||
const LiteralNode = struct {
|
||||
literal: u16,
|
||||
freq: u16,
|
||||
};
|
||||
|
||||
// Describes the state of the constructed tree for a given depth.
|
||||
const LevelInfo = struct {
|
||||
// Our level. for better printing
|
||||
level: u32,
|
||||
|
||||
// The frequency of the last node at this level
|
||||
last_freq: u32,
|
||||
|
||||
// The frequency of the next character to add to this level
|
||||
next_char_freq: u32,
|
||||
|
||||
// The frequency of the next pair (from level below) to add to this level.
|
||||
// Only valid if the "needed" value of the next lower level is 0.
|
||||
next_pair_freq: u32,
|
||||
|
||||
// The number of chains remaining to generate for this level before moving
|
||||
// up to the next level
|
||||
needed: u32,
|
||||
};
|
||||
|
||||
// hcode is a huffman code with a bit code and bit length.
|
||||
pub const HuffCode = struct {
|
||||
code: u16 = 0,
|
||||
len: u16 = 0,
|
||||
|
||||
// set sets the code and length of an hcode.
|
||||
fn set(self: *HuffCode, code: u16, length: u16) void {
|
||||
self.len = length;
|
||||
self.code = code;
|
||||
}
|
||||
};
|
||||
|
||||
pub const HuffmanEncoder = struct {
|
||||
codes: []HuffCode,
|
||||
freq_cache: []LiteralNode = undefined,
|
||||
bit_count: [17]u32 = undefined,
|
||||
lns: []LiteralNode = undefined, // sorted by literal, stored to avoid repeated allocation in generate
|
||||
lfs: []LiteralNode = undefined, // sorted by frequency, stored to avoid repeated allocation in generate
|
||||
allocator: Allocator,
|
||||
|
||||
pub fn deinit(self: *HuffmanEncoder) void {
|
||||
self.allocator.free(self.codes);
|
||||
self.allocator.free(self.freq_cache);
|
||||
}
|
||||
|
||||
// Update this Huffman Code object to be the minimum code for the specified frequency count.
|
||||
//
|
||||
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
|
||||
// max_bits The maximum number of bits to use for any literal.
|
||||
pub fn generate(self: *HuffmanEncoder, freq: []u16, max_bits: u32) void {
|
||||
var list = self.freq_cache[0 .. freq.len + 1];
|
||||
// Number of non-zero literals
|
||||
var count: u32 = 0;
|
||||
// Set list to be the set of all non-zero literals and their frequencies
|
||||
for (freq, 0..) |f, i| {
|
||||
if (f != 0) {
|
||||
list[count] = LiteralNode{ .literal = @as(u16, @intCast(i)), .freq = f };
|
||||
count += 1;
|
||||
} else {
|
||||
list[count] = LiteralNode{ .literal = 0x00, .freq = 0 };
|
||||
self.codes[i].len = 0;
|
||||
}
|
||||
}
|
||||
list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 };
|
||||
|
||||
list = list[0..count];
|
||||
if (count <= 2) {
|
||||
// Handle the small cases here, because they are awkward for the general case code. With
|
||||
// two or fewer literals, everything has bit length 1.
|
||||
for (list, 0..) |node, i| {
|
||||
// "list" is in order of increasing literal value.
|
||||
self.codes[node.literal].set(@as(u16, @intCast(i)), 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
self.lfs = list;
|
||||
mem.sort(LiteralNode, self.lfs, {}, byFreq);
|
||||
|
||||
// Get the number of literals for each bit count
|
||||
const bit_count = self.bitCounts(list, max_bits);
|
||||
// And do the assignment
|
||||
self.assignEncodingAndSize(bit_count, list);
|
||||
}
|
||||
|
||||
pub fn bitLength(self: *HuffmanEncoder, freq: []u16) u32 {
|
||||
var total: u32 = 0;
|
||||
for (freq, 0..) |f, i| {
|
||||
if (f != 0) {
|
||||
total += @as(u32, @intCast(f)) * @as(u32, @intCast(self.codes[i].len));
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
// Return the number of literals assigned to each bit size in the Huffman encoding
|
||||
//
|
||||
// This method is only called when list.len >= 3
|
||||
// The cases of 0, 1, and 2 literals are handled by special case code.
|
||||
//
|
||||
// list: An array of the literals with non-zero frequencies
|
||||
// and their associated frequencies. The array is in order of increasing
|
||||
// frequency, and has as its last element a special element with frequency
|
||||
// std.math.maxInt(i32)
|
||||
//
|
||||
// max_bits: The maximum number of bits that should be used to encode any literal.
|
||||
// Must be less than 16.
|
||||
//
|
||||
// Returns an integer array in which array[i] indicates the number of literals
|
||||
// that should be encoded in i bits.
|
||||
fn bitCounts(self: *HuffmanEncoder, list: []LiteralNode, max_bits_to_use: usize) []u32 {
|
||||
var max_bits = max_bits_to_use;
|
||||
const n = list.len;
|
||||
|
||||
assert(max_bits < max_bits_limit);
|
||||
|
||||
// The tree can't have greater depth than n - 1, no matter what. This
|
||||
// saves a little bit of work in some small cases
|
||||
max_bits = @min(max_bits, n - 1);
|
||||
|
||||
// Create information about each of the levels.
|
||||
// A bogus "Level 0" whose sole purpose is so that
|
||||
// level1.prev.needed == 0. This makes level1.next_pair_freq
|
||||
// be a legitimate value that never gets chosen.
|
||||
var levels: [max_bits_limit]LevelInfo = mem.zeroes([max_bits_limit]LevelInfo);
|
||||
// leaf_counts[i] counts the number of literals at the left
|
||||
// of ancestors of the rightmost node at level i.
|
||||
// leaf_counts[i][j] is the number of literals at the left
|
||||
// of the level j ancestor.
|
||||
var leaf_counts: [max_bits_limit][max_bits_limit]u32 = mem.zeroes([max_bits_limit][max_bits_limit]u32);
|
||||
|
||||
{
|
||||
var level = @as(u32, 1);
|
||||
while (level <= max_bits) : (level += 1) {
|
||||
// For every level, the first two items are the first two characters.
|
||||
// We initialize the levels as if we had already figured this out.
|
||||
levels[level] = LevelInfo{
|
||||
.level = level,
|
||||
.last_freq = list[1].freq,
|
||||
.next_char_freq = list[2].freq,
|
||||
.next_pair_freq = list[0].freq + list[1].freq,
|
||||
.needed = 0,
|
||||
};
|
||||
leaf_counts[level][level] = 2;
|
||||
if (level == 1) {
|
||||
levels[level].next_pair_freq = math.maxInt(i32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We need a total of 2*n - 2 items at top level and have already generated 2.
|
||||
levels[max_bits].needed = 2 * @as(u32, @intCast(n)) - 4;
|
||||
|
||||
{
|
||||
var level = max_bits;
|
||||
while (true) {
|
||||
var l = &levels[level];
|
||||
if (l.next_pair_freq == math.maxInt(i32) and l.next_char_freq == math.maxInt(i32)) {
|
||||
// We've run out of both leafs and pairs.
|
||||
// End all calculations for this level.
|
||||
// To make sure we never come back to this level or any lower level,
|
||||
// set next_pair_freq impossibly large.
|
||||
l.needed = 0;
|
||||
levels[level + 1].next_pair_freq = math.maxInt(i32);
|
||||
level += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const prev_freq = l.last_freq;
|
||||
if (l.next_char_freq < l.next_pair_freq) {
|
||||
// The next item on this row is a leaf node.
|
||||
const next = leaf_counts[level][level] + 1;
|
||||
l.last_freq = l.next_char_freq;
|
||||
// Lower leaf_counts are the same of the previous node.
|
||||
leaf_counts[level][level] = next;
|
||||
if (next >= list.len) {
|
||||
l.next_char_freq = maxNode().freq;
|
||||
} else {
|
||||
l.next_char_freq = list[next].freq;
|
||||
}
|
||||
} else {
|
||||
// The next item on this row is a pair from the previous row.
|
||||
// next_pair_freq isn't valid until we generate two
|
||||
// more values in the level below
|
||||
l.last_freq = l.next_pair_freq;
|
||||
// Take leaf counts from the lower level, except counts[level] remains the same.
|
||||
@memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
|
||||
levels[l.level - 1].needed = 2;
|
||||
}
|
||||
|
||||
l.needed -= 1;
|
||||
if (l.needed == 0) {
|
||||
// We've done everything we need to do for this level.
|
||||
// Continue calculating one level up. Fill in next_pair_freq
|
||||
// of that level with the sum of the two nodes we've just calculated on
|
||||
// this level.
|
||||
if (l.level == max_bits) {
|
||||
// All done!
|
||||
break;
|
||||
}
|
||||
levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq;
|
||||
level += 1;
|
||||
} else {
|
||||
// If we stole from below, move down temporarily to replenish it.
|
||||
while (levels[level - 1].needed > 0) {
|
||||
level -= 1;
|
||||
if (level == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Somethings is wrong if at the end, the top level is null or hasn't used
|
||||
// all of the leaves.
|
||||
assert(leaf_counts[max_bits][max_bits] == n);
|
||||
|
||||
var bit_count = self.bit_count[0 .. max_bits + 1];
|
||||
var bits: u32 = 1;
|
||||
const counts = &leaf_counts[max_bits];
|
||||
{
|
||||
var level = max_bits;
|
||||
while (level > 0) : (level -= 1) {
|
||||
// counts[level] gives the number of literals requiring at least "bits"
|
||||
// bits to encode.
|
||||
bit_count[bits] = counts[level] - counts[level - 1];
|
||||
bits += 1;
|
||||
if (level == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bit_count;
|
||||
}
|
||||
|
||||
// Look at the leaves and assign them a bit count and an encoding as specified
|
||||
// in RFC 1951 3.2.2
|
||||
fn assignEncodingAndSize(self: *HuffmanEncoder, bit_count: []u32, list_arg: []LiteralNode) void {
|
||||
var code = @as(u16, 0);
|
||||
var list = list_arg;
|
||||
|
||||
for (bit_count, 0..) |bits, n| {
|
||||
code <<= 1;
|
||||
if (n == 0 or bits == 0) {
|
||||
continue;
|
||||
}
|
||||
// The literals list[list.len-bits] .. list[list.len-bits]
|
||||
// are encoded using "bits" bits, and get the values
|
||||
// code, code + 1, .... The code values are
|
||||
// assigned in literal order (not frequency order).
|
||||
const chunk = list[list.len - @as(u32, @intCast(bits)) ..];
|
||||
|
||||
self.lns = chunk;
|
||||
mem.sort(LiteralNode, self.lns, {}, byLiteral);
|
||||
|
||||
for (chunk) |node| {
|
||||
self.codes[node.literal] = HuffCode{
|
||||
.code = bu.bitReverse(u16, code, @as(u5, @intCast(n))),
|
||||
.len = @as(u16, @intCast(n)),
|
||||
};
|
||||
code += 1;
|
||||
}
|
||||
list = list[0 .. list.len - @as(u32, @intCast(bits))];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
fn maxNode() LiteralNode {
|
||||
return LiteralNode{
|
||||
.literal = math.maxInt(u16),
|
||||
.freq = math.maxInt(u16),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn newHuffmanEncoder(allocator: Allocator, size: u32) !HuffmanEncoder {
|
||||
return HuffmanEncoder{
|
||||
.codes = try allocator.alloc(HuffCode, size),
|
||||
// Allocate a reusable buffer with the longest possible frequency table.
|
||||
// (deflate_const.max_num_frequencies).
|
||||
.freq_cache = try allocator.alloc(LiteralNode, deflate_const.max_num_frequencies + 1),
|
||||
.allocator = allocator,
|
||||
};
|
||||
}
|
||||
|
||||
// Generates a HuffmanCode corresponding to the fixed literal table
|
||||
pub fn generateFixedLiteralEncoding(allocator: Allocator) !HuffmanEncoder {
|
||||
const h = try newHuffmanEncoder(allocator, deflate_const.max_num_frequencies);
|
||||
var codes = h.codes;
|
||||
var ch: u16 = 0;
|
||||
|
||||
while (ch < deflate_const.max_num_frequencies) : (ch += 1) {
|
||||
var bits: u16 = undefined;
|
||||
var size: u16 = undefined;
|
||||
switch (ch) {
|
||||
0...143 => {
|
||||
// size 8, 000110000 .. 10111111
|
||||
bits = ch + 48;
|
||||
size = 8;
|
||||
},
|
||||
144...255 => {
|
||||
// size 9, 110010000 .. 111111111
|
||||
bits = ch + 400 - 144;
|
||||
size = 9;
|
||||
},
|
||||
256...279 => {
|
||||
// size 7, 0000000 .. 0010111
|
||||
bits = ch - 256;
|
||||
size = 7;
|
||||
},
|
||||
else => {
|
||||
// size 8, 11000000 .. 11000111
|
||||
bits = ch + 192 - 280;
|
||||
size = 8;
|
||||
},
|
||||
}
|
||||
codes[ch] = HuffCode{ .code = bu.bitReverse(u16, bits, @as(u5, @intCast(size))), .len = size };
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
pub fn generateFixedOffsetEncoding(allocator: Allocator) !HuffmanEncoder {
|
||||
const h = try newHuffmanEncoder(allocator, 30);
|
||||
var codes = h.codes;
|
||||
for (codes, 0..) |_, ch| {
|
||||
codes[ch] = HuffCode{ .code = bu.bitReverse(u16, @as(u16, @intCast(ch)), 5), .len = 5 };
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool {
|
||||
_ = context;
|
||||
return a.literal < b.literal;
|
||||
}
|
||||
|
||||
fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
|
||||
_ = context;
|
||||
if (a.freq == b.freq) {
|
||||
return a.literal < b.literal;
|
||||
}
|
||||
return a.freq < b.freq;
|
||||
}
|
||||
|
||||
test "generate a Huffman code from an array of frequencies" {
|
||||
var freqs: [19]u16 = [_]u16{
|
||||
8, // 0
|
||||
1, // 1
|
||||
1, // 2
|
||||
2, // 3
|
||||
5, // 4
|
||||
10, // 5
|
||||
9, // 6
|
||||
1, // 7
|
||||
0, // 8
|
||||
0, // 9
|
||||
0, // 10
|
||||
0, // 11
|
||||
0, // 12
|
||||
0, // 13
|
||||
0, // 14
|
||||
0, // 15
|
||||
1, // 16
|
||||
3, // 17
|
||||
5, // 18
|
||||
};
|
||||
|
||||
var enc = try newHuffmanEncoder(testing.allocator, freqs.len);
|
||||
defer enc.deinit();
|
||||
enc.generate(freqs[0..], 7);
|
||||
|
||||
try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..]));
|
||||
|
||||
try testing.expectEqual(@as(usize, 3), enc.codes[0].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[1].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[2].len);
|
||||
try testing.expectEqual(@as(usize, 5), enc.codes[3].len);
|
||||
try testing.expectEqual(@as(usize, 3), enc.codes[4].len);
|
||||
try testing.expectEqual(@as(usize, 2), enc.codes[5].len);
|
||||
try testing.expectEqual(@as(usize, 2), enc.codes[6].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[7].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[8].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[9].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[10].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[11].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[12].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[13].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[14].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[15].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[16].len);
|
||||
try testing.expectEqual(@as(usize, 5), enc.codes[17].len);
|
||||
try testing.expectEqual(@as(usize, 3), enc.codes[18].len);
|
||||
|
||||
try testing.expectEqual(@as(u16, 0x0), enc.codes[5].code);
|
||||
try testing.expectEqual(@as(u16, 0x2), enc.codes[6].code);
|
||||
try testing.expectEqual(@as(u16, 0x1), enc.codes[0].code);
|
||||
try testing.expectEqual(@as(u16, 0x5), enc.codes[4].code);
|
||||
try testing.expectEqual(@as(u16, 0x3), enc.codes[18].code);
|
||||
try testing.expectEqual(@as(u16, 0x7), enc.codes[3].code);
|
||||
try testing.expectEqual(@as(u16, 0x17), enc.codes[17].code);
|
||||
try testing.expectEqual(@as(u16, 0x0f), enc.codes[1].code);
|
||||
try testing.expectEqual(@as(u16, 0x2f), enc.codes[2].code);
|
||||
try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code);
|
||||
try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code);
|
||||
}
|
||||
|
||||
test "generate a Huffman code for the fixed literal table specific to Deflate" {
|
||||
var enc = try generateFixedLiteralEncoding(testing.allocator);
|
||||
defer enc.deinit();
|
||||
}
|
||||
|
||||
test "generate a Huffman code for the 30 possible relative offsets (LZ77 distances) of Deflate" {
|
||||
var enc = try generateFixedOffsetEncoding(testing.allocator);
|
||||
defer enc.deinit();
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -1,29 +0,0 @@
|
||||
Four score and seven years ago our fathers brought forth on
|
||||
this continent, a new nation, conceived in Liberty, and dedicated
|
||||
to the proposition that all men are created equal.
|
||||
Now we are engaged in a great Civil War, testing whether that
|
||||
nation, or any nation so conceived and so dedicated, can long
|
||||
endure.
|
||||
We are met on a great battle-field of that war.
|
||||
We have come to dedicate a portion of that field, as a final
|
||||
resting place for those who here gave their lives that that
|
||||
nation might live. It is altogether fitting and proper that
|
||||
we should do this.
|
||||
But, in a larger sense, we can not dedicate - we can not
|
||||
consecrate - we can not hallow - this ground.
|
||||
The brave men, living and dead, who struggled here, have
|
||||
consecrated it, far above our poor power to add or detract.
|
||||
The world will little note, nor long remember what we say here,
|
||||
but it can never forget what they did here.
|
||||
It is for us the living, rather, to be dedicated here to the
|
||||
unfinished work which they who fought here have thus far so
|
||||
nobly advanced. It is rather for us to be here dedicated to
|
||||
the great task remaining before us - that from these honored
|
||||
dead we take increased devotion to that cause for which they
|
||||
gave the last full measure of devotion -
|
||||
that we here highly resolve that these dead shall not have
|
||||
died in vain - that this nation, under God, shall have a new
|
||||
birth of freedom - and that government of the people, by the
|
||||
people, for the people, shall not perish from this earth.
|
||||
|
||||
Abraham Lincoln, November 19, 1863, Gettysburg, Pennsylvania
|
||||
File diff suppressed because one or more lines are too long
@@ -1,103 +0,0 @@
|
||||
// 2 bits: type, can be 0 (literal), 1 (EOF), 2 (Match) or 3 (Unused).
|
||||
// 8 bits: xlength (length - MIN_MATCH_LENGTH).
|
||||
// 22 bits: xoffset (offset - MIN_OFFSET_SIZE), or literal.
|
||||
const length_shift = 22;
|
||||
const offset_mask = (1 << length_shift) - 1; // 4_194_303
|
||||
const literal_type = 0 << 30; // 0
|
||||
pub const match_type = 1 << 30; // 1_073_741_824
|
||||
|
||||
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
|
||||
// is length_codes[length - MIN_MATCH_LENGTH]
|
||||
var length_codes = [_]u32{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
|
||||
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
|
||||
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
|
||||
15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
|
||||
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
|
||||
22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
|
||||
23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||
26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 28,
|
||||
};
|
||||
|
||||
var offset_codes = [_]u32{
|
||||
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
};
|
||||
|
||||
pub const Token = u32;
|
||||
|
||||
// Convert a literal into a literal token.
|
||||
pub fn literalToken(lit: u32) Token {
|
||||
return literal_type + lit;
|
||||
}
|
||||
|
||||
// Convert a < xlength, xoffset > pair into a match token.
|
||||
pub fn matchToken(xlength: u32, xoffset: u32) Token {
|
||||
return match_type + (xlength << length_shift) + xoffset;
|
||||
}
|
||||
|
||||
// Returns the literal of a literal token
|
||||
pub fn literal(t: Token) u32 {
|
||||
return @as(u32, @intCast(t - literal_type));
|
||||
}
|
||||
|
||||
// Returns the extra offset of a match token
|
||||
pub fn offset(t: Token) u32 {
|
||||
return @as(u32, @intCast(t)) & offset_mask;
|
||||
}
|
||||
|
||||
pub fn length(t: Token) u32 {
|
||||
return @as(u32, @intCast((t - match_type) >> length_shift));
|
||||
}
|
||||
|
||||
pub fn lengthCode(len: u32) u32 {
|
||||
return length_codes[len];
|
||||
}
|
||||
|
||||
// Returns the offset code corresponding to a specific offset
|
||||
pub fn offsetCode(off: u32) u32 {
|
||||
if (off < @as(u32, @intCast(offset_codes.len))) {
|
||||
return offset_codes[off];
|
||||
}
|
||||
if (off >> 7 < @as(u32, @intCast(offset_codes.len))) {
|
||||
return offset_codes[off >> 7] + 14;
|
||||
}
|
||||
return offset_codes[off >> 14] + 28;
|
||||
}
|
||||
|
||||
test {
|
||||
const std = @import("std");
|
||||
try std.testing.expectEqual(@as(Token, 3_401_581_099), matchToken(555, 555));
|
||||
}
|
||||
481
lib/std/compress/flate.zig
Normal file
481
lib/std/compress/flate.zig
Normal file
@@ -0,0 +1,481 @@
|
||||
/// Deflate is a lossless data compression file format that uses a combination
|
||||
/// of LZ77 and Huffman coding.
|
||||
pub const deflate = @import("flate/deflate.zig");
|
||||
|
||||
/// Inflate is the decoding process that takes a Deflate bitstream for
|
||||
/// decompression and correctly produces the original full-size data or file.
|
||||
pub const inflate = @import("flate/inflate.zig");
|
||||
|
||||
/// Decompress compressed data from reader and write plain data to the writer.
|
||||
pub fn decompress(reader: anytype, writer: anytype) !void {
|
||||
try inflate.decompress(.raw, reader, writer);
|
||||
}
|
||||
|
||||
/// Decompressor type
|
||||
pub fn Decompressor(comptime ReaderType: type) type {
|
||||
return inflate.Inflate(.raw, ReaderType);
|
||||
}
|
||||
|
||||
/// Create Decompressor which will read compressed data from reader.
|
||||
pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) {
|
||||
return inflate.decompressor(.raw, reader);
|
||||
}
|
||||
|
||||
/// Compression level, trades between speed and compression size.
|
||||
pub const Options = deflate.Options;
|
||||
|
||||
/// Compress plain data from reader and write compressed data to the writer.
|
||||
pub fn compress(reader: anytype, writer: anytype, options: Options) !void {
|
||||
try deflate.compress(.raw, reader, writer, options);
|
||||
}
|
||||
|
||||
/// Compressor type
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.Compressor(.raw, WriterType);
|
||||
}
|
||||
|
||||
/// Create Compressor which outputs compressed data to the writer.
|
||||
pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) {
|
||||
return try deflate.compressor(.raw, writer, options);
|
||||
}
|
||||
|
||||
/// Huffman only compression. Without Lempel-Ziv match searching. Faster
|
||||
/// compression, less memory requirements but bigger compressed sizes.
|
||||
pub const huffman = struct {
|
||||
pub fn compress(reader: anytype, writer: anytype) !void {
|
||||
try deflate.huffman.compress(.raw, reader, writer);
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.huffman.Compressor(.raw, WriterType);
|
||||
}
|
||||
|
||||
pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) {
|
||||
return deflate.huffman.compressor(.raw, writer);
|
||||
}
|
||||
};
|
||||
|
||||
// No compression store only. Compressed size is slightly bigger than plain.
|
||||
pub const store = struct {
|
||||
pub fn compress(reader: anytype, writer: anytype) !void {
|
||||
try deflate.store.compress(.raw, reader, writer);
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.store.Compressor(.raw, WriterType);
|
||||
}
|
||||
|
||||
pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) {
|
||||
return deflate.store.compressor(.raw, writer);
|
||||
}
|
||||
};
|
||||
|
||||
/// Container defines header/footer arround deflate bit stream. Gzip and zlib
|
||||
/// compression algorithms are containers arround deflate bit stream body.
|
||||
const Container = @import("flate/container.zig").Container;
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const fixedBufferStream = std.io.fixedBufferStream;
|
||||
const print = std.debug.print;
|
||||
const builtin = @import("builtin");
|
||||
|
||||
test "flate" {
|
||||
_ = @import("flate/deflate.zig");
|
||||
_ = @import("flate/inflate.zig");
|
||||
}
|
||||
|
||||
test "flate compress/decompress" {
|
||||
if (builtin.target.cpu.arch == .wasm32) return error.SkipZigTest;
|
||||
|
||||
var cmp_buf: [64 * 1024]u8 = undefined; // compressed data buffer
|
||||
var dcm_buf: [64 * 1024]u8 = undefined; // decompressed data buffer
|
||||
|
||||
const levels = [_]deflate.Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
|
||||
const cases = [_]struct {
|
||||
data: []const u8, // uncompressed content
|
||||
// compressed data sizes per level 4-9
|
||||
gzip_sizes: [levels.len]usize = [_]usize{0} ** levels.len,
|
||||
huffman_only_size: usize = 0,
|
||||
store_size: usize = 0,
|
||||
}{
|
||||
.{
|
||||
.data = @embedFile("flate/testdata/rfc1951.txt"),
|
||||
.gzip_sizes = [_]usize{ 11513, 11217, 11139, 11126, 11122, 11119 },
|
||||
.huffman_only_size = 20287,
|
||||
.store_size = 36967,
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("flate/testdata/fuzz/roundtrip1.input"),
|
||||
.gzip_sizes = [_]usize{ 373, 370, 370, 370, 370, 370 },
|
||||
.huffman_only_size = 393,
|
||||
.store_size = 393,
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("flate/testdata/fuzz/roundtrip2.input"),
|
||||
.gzip_sizes = [_]usize{ 373, 373, 373, 373, 373, 373 },
|
||||
.huffman_only_size = 394,
|
||||
.store_size = 394,
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("flate/testdata/fuzz/deflate-stream.expect"),
|
||||
.gzip_sizes = [_]usize{ 351, 347, 347, 347, 347, 347 },
|
||||
.huffman_only_size = 498,
|
||||
.store_size = 747,
|
||||
},
|
||||
};
|
||||
|
||||
for (cases, 0..) |case, case_no| { // for each case
|
||||
const data = case.data;
|
||||
|
||||
for (levels, 0..) |level, i| { // for each compression level
|
||||
|
||||
inline for (Container.list) |container| { // for each wrapping
|
||||
var compressed_size: usize = if (case.gzip_sizes[i] > 0)
|
||||
case.gzip_sizes[i] - Container.gzip.size() + container.size()
|
||||
else
|
||||
0;
|
||||
|
||||
// compress original stream to compressed stream
|
||||
{
|
||||
var original = fixedBufferStream(data);
|
||||
var compressed = fixedBufferStream(&cmp_buf);
|
||||
try deflate.compress(container, original.reader(), compressed.writer(), .{ .level = level });
|
||||
if (compressed_size == 0) {
|
||||
if (container == .gzip)
|
||||
print("case {d} gzip level {} compressed size: {d}\n", .{ case_no, level, compressed.pos });
|
||||
compressed_size = compressed.pos;
|
||||
}
|
||||
try testing.expectEqual(compressed_size, compressed.pos);
|
||||
}
|
||||
// decompress compressed stream to decompressed stream
|
||||
{
|
||||
var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
|
||||
var decompressed = fixedBufferStream(&dcm_buf);
|
||||
try inflate.decompress(container, compressed.reader(), decompressed.writer());
|
||||
try testing.expectEqualSlices(u8, data, decompressed.getWritten());
|
||||
}
|
||||
|
||||
// compressor writer interface
|
||||
{
|
||||
var compressed = fixedBufferStream(&cmp_buf);
|
||||
var cmp = try deflate.compressor(container, compressed.writer(), .{ .level = level });
|
||||
var cmp_wrt = cmp.writer();
|
||||
try cmp_wrt.writeAll(data);
|
||||
try cmp.finish();
|
||||
|
||||
try testing.expectEqual(compressed_size, compressed.pos);
|
||||
}
|
||||
// decompressor reader interface
|
||||
{
|
||||
var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
|
||||
var dcm = inflate.decompressor(container, compressed.reader());
|
||||
var dcm_rdr = dcm.reader();
|
||||
const n = try dcm_rdr.readAll(&dcm_buf);
|
||||
try testing.expectEqual(data.len, n);
|
||||
try testing.expectEqualSlices(u8, data, dcm_buf[0..n]);
|
||||
}
|
||||
}
|
||||
}
|
||||
// huffman only compression
|
||||
{
|
||||
inline for (Container.list) |container| { // for each wrapping
|
||||
var compressed_size: usize = if (case.huffman_only_size > 0)
|
||||
case.huffman_only_size - Container.gzip.size() + container.size()
|
||||
else
|
||||
0;
|
||||
|
||||
// compress original stream to compressed stream
|
||||
{
|
||||
var original = fixedBufferStream(data);
|
||||
var compressed = fixedBufferStream(&cmp_buf);
|
||||
var cmp = try deflate.huffman.compressor(container, compressed.writer());
|
||||
try cmp.compress(original.reader());
|
||||
try cmp.finish();
|
||||
if (compressed_size == 0) {
|
||||
if (container == .gzip)
|
||||
print("case {d} huffman only compressed size: {d}\n", .{ case_no, compressed.pos });
|
||||
compressed_size = compressed.pos;
|
||||
}
|
||||
try testing.expectEqual(compressed_size, compressed.pos);
|
||||
}
|
||||
// decompress compressed stream to decompressed stream
|
||||
{
|
||||
var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
|
||||
var decompressed = fixedBufferStream(&dcm_buf);
|
||||
try inflate.decompress(container, compressed.reader(), decompressed.writer());
|
||||
try testing.expectEqualSlices(u8, data, decompressed.getWritten());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// store only
|
||||
{
|
||||
inline for (Container.list) |container| { // for each wrapping
|
||||
var compressed_size: usize = if (case.store_size > 0)
|
||||
case.store_size - Container.gzip.size() + container.size()
|
||||
else
|
||||
0;
|
||||
|
||||
// compress original stream to compressed stream
|
||||
{
|
||||
var original = fixedBufferStream(data);
|
||||
var compressed = fixedBufferStream(&cmp_buf);
|
||||
var cmp = try deflate.store.compressor(container, compressed.writer());
|
||||
try cmp.compress(original.reader());
|
||||
try cmp.finish();
|
||||
if (compressed_size == 0) {
|
||||
if (container == .gzip)
|
||||
print("case {d} store only compressed size: {d}\n", .{ case_no, compressed.pos });
|
||||
compressed_size = compressed.pos;
|
||||
}
|
||||
|
||||
try testing.expectEqual(compressed_size, compressed.pos);
|
||||
}
|
||||
// decompress compressed stream to decompressed stream
|
||||
{
|
||||
var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
|
||||
var decompressed = fixedBufferStream(&dcm_buf);
|
||||
try inflate.decompress(container, compressed.reader(), decompressed.writer());
|
||||
try testing.expectEqualSlices(u8, data, decompressed.getWritten());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn testDecompress(comptime container: Container, compressed: []const u8, expected_plain: []const u8) !void {
|
||||
var in = fixedBufferStream(compressed);
|
||||
var out = std.ArrayList(u8).init(testing.allocator);
|
||||
defer out.deinit();
|
||||
|
||||
try inflate.decompress(container, in.reader(), out.writer());
|
||||
try testing.expectEqualSlices(u8, expected_plain, out.items);
|
||||
}
|
||||
|
||||
test "flate don't read past deflate stream's end" {
|
||||
try testDecompress(.zlib, &[_]u8{
|
||||
0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff,
|
||||
0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01,
|
||||
0x83, 0x95, 0x0b, 0xf5,
|
||||
}, &[_]u8{
|
||||
0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
|
||||
0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
|
||||
0x00, 0x00, 0xff, 0xff, 0xff,
|
||||
});
|
||||
}
|
||||
|
||||
test "flate zlib header" {
|
||||
// Truncated header
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.zlib, &[_]u8{0x78}, ""),
|
||||
);
|
||||
// Wrong CM
|
||||
try testing.expectError(
|
||||
error.BadZlibHeader,
|
||||
testDecompress(.zlib, &[_]u8{ 0x79, 0x94 }, ""),
|
||||
);
|
||||
// Wrong CINFO
|
||||
try testing.expectError(
|
||||
error.BadZlibHeader,
|
||||
testDecompress(.zlib, &[_]u8{ 0x88, 0x98 }, ""),
|
||||
);
|
||||
// Wrong checksum
|
||||
try testing.expectError(
|
||||
error.WrongZlibChecksum,
|
||||
testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
|
||||
);
|
||||
// Truncated checksum
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
|
||||
);
|
||||
}
|
||||
|
||||
test "flate gzip header" {
|
||||
// Truncated header
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.gzip, &[_]u8{ 0x1f, 0x8B }, undefined),
|
||||
);
|
||||
// Wrong CM
|
||||
try testing.expectError(
|
||||
error.BadGzipHeader,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03,
|
||||
}, undefined),
|
||||
);
|
||||
|
||||
// Wrong checksum
|
||||
try testing.expectError(
|
||||
error.WrongGzipChecksum,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
}, undefined),
|
||||
);
|
||||
// Truncated checksum
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00,
|
||||
}, undefined),
|
||||
);
|
||||
// Wrong initial size
|
||||
try testing.expectError(
|
||||
error.WrongGzipSize,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x01,
|
||||
}, undefined),
|
||||
);
|
||||
// Truncated initial size field
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00,
|
||||
}, undefined),
|
||||
);
|
||||
|
||||
try testDecompress(.gzip, &[_]u8{
|
||||
// GZIP header
|
||||
0x1f, 0x8b, 0x08, 0x12, 0x00, 0x09, 0x6e, 0x88, 0x00, 0xff, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00,
|
||||
// header.FHCRC (should cover entire header)
|
||||
0x99, 0xd6,
|
||||
// GZIP data
|
||||
0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
}, "");
|
||||
}
|
||||
|
||||
test "flate public interface" {
|
||||
if (builtin.target.cpu.arch == .wasm32) return error.SkipZigTest;
|
||||
|
||||
const plain_data = [_]u8{ 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a };
|
||||
|
||||
// deflate final stored block, header + plain (stored) data
|
||||
const deflate_block = [_]u8{
|
||||
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
|
||||
} ++ plain_data;
|
||||
|
||||
// gzip header/footer + deflate block
|
||||
const gzip_data =
|
||||
[_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 } ++ // gzip header (10 bytes)
|
||||
deflate_block ++
|
||||
[_]u8{ 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00 }; // gzip footer checksum (4 byte), size (4 bytes)
|
||||
|
||||
// zlib header/footer + deflate block
|
||||
const zlib_data = [_]u8{ 0x78, 0b10_0_11100 } ++ // zlib header (2 bytes)}
|
||||
deflate_block ++
|
||||
[_]u8{ 0x1c, 0xf2, 0x04, 0x47 }; // zlib footer: checksum
|
||||
|
||||
const gzip = @import("gzip.zig");
|
||||
const zlib = @import("zlib.zig");
|
||||
const flate = @This();
|
||||
|
||||
try testInterface(gzip, &gzip_data, &plain_data);
|
||||
try testInterface(zlib, &zlib_data, &plain_data);
|
||||
try testInterface(flate, &deflate_block, &plain_data);
|
||||
}
|
||||
|
||||
fn testInterface(comptime pkg: type, gzip_data: []const u8, plain_data: []const u8) !void {
|
||||
var buffer1: [64]u8 = undefined;
|
||||
var buffer2: [64]u8 = undefined;
|
||||
|
||||
var compressed = fixedBufferStream(&buffer1);
|
||||
var plain = fixedBufferStream(&buffer2);
|
||||
|
||||
// decompress
|
||||
{
|
||||
var in = fixedBufferStream(gzip_data);
|
||||
try pkg.decompress(in.reader(), plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
}
|
||||
plain.reset();
|
||||
compressed.reset();
|
||||
|
||||
// compress/decompress
|
||||
{
|
||||
var in = fixedBufferStream(plain_data);
|
||||
try pkg.compress(in.reader(), compressed.writer(), .{});
|
||||
compressed.reset();
|
||||
try pkg.decompress(compressed.reader(), plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
}
|
||||
plain.reset();
|
||||
compressed.reset();
|
||||
|
||||
// compressor/decompressor
|
||||
{
|
||||
var in = fixedBufferStream(plain_data);
|
||||
var cmp = try pkg.compressor(compressed.writer(), .{});
|
||||
try cmp.compress(in.reader());
|
||||
try cmp.finish();
|
||||
|
||||
compressed.reset();
|
||||
var dcp = pkg.decompressor(compressed.reader());
|
||||
try dcp.decompress(plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
}
|
||||
plain.reset();
|
||||
compressed.reset();
|
||||
|
||||
// huffman
|
||||
{
|
||||
// huffman compress/decompress
|
||||
{
|
||||
var in = fixedBufferStream(plain_data);
|
||||
try pkg.huffman.compress(in.reader(), compressed.writer());
|
||||
compressed.reset();
|
||||
try pkg.decompress(compressed.reader(), plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
}
|
||||
plain.reset();
|
||||
compressed.reset();
|
||||
|
||||
// huffman compressor/decompressor
|
||||
{
|
||||
var in = fixedBufferStream(plain_data);
|
||||
var cmp = try pkg.huffman.compressor(compressed.writer());
|
||||
try cmp.compress(in.reader());
|
||||
try cmp.finish();
|
||||
|
||||
compressed.reset();
|
||||
try pkg.decompress(compressed.reader(), plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
}
|
||||
}
|
||||
plain.reset();
|
||||
compressed.reset();
|
||||
|
||||
// store
|
||||
{
|
||||
// store compress/decompress
|
||||
{
|
||||
var in = fixedBufferStream(plain_data);
|
||||
try pkg.store.compress(in.reader(), compressed.writer());
|
||||
compressed.reset();
|
||||
try pkg.decompress(compressed.reader(), plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
}
|
||||
plain.reset();
|
||||
compressed.reset();
|
||||
|
||||
// store compressor/decompressor
|
||||
{
|
||||
var in = fixedBufferStream(plain_data);
|
||||
var cmp = try pkg.store.compressor(compressed.writer());
|
||||
try cmp.compress(in.reader());
|
||||
try cmp.finish();
|
||||
|
||||
compressed.reset();
|
||||
try pkg.decompress(compressed.reader(), plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
}
|
||||
}
|
||||
}
|
||||
234
lib/std/compress/flate/CircularBuffer.zig
Normal file
234
lib/std/compress/flate/CircularBuffer.zig
Normal file
@@ -0,0 +1,234 @@
|
||||
//! 64K buffer of uncompressed data created in inflate (decompression). Has enough
|
||||
//! history to support writing match<length, distance>; copying length of bytes
|
||||
//! from the position distance backward from current.
|
||||
//!
|
||||
//! Reads can return less than available bytes if they are spread across
|
||||
//! different circles. So reads should repeat until get required number of bytes
|
||||
//! or until returned slice is zero length.
|
||||
//!
|
||||
//! Note on deflate limits:
|
||||
//! * non-compressible block is limited to 65,535 bytes.
|
||||
//! * backward pointer is limited in distance to 32K bytes and in length to 258 bytes.
|
||||
//!
|
||||
//! Whole non-compressed block can be written without overlap. We always have
|
||||
//! history of up to 64K, more then 32K needed.
|
||||
//!
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
|
||||
const consts = @import("consts.zig").match;
|
||||
|
||||
const mask = 0xffff; // 64K - 1
|
||||
const buffer_len = mask + 1; // 64K buffer
|
||||
|
||||
const Self = @This();
|
||||
|
||||
buffer: [buffer_len]u8 = undefined,
|
||||
wp: usize = 0, // write position
|
||||
rp: usize = 0, // read position
|
||||
|
||||
fn writeAll(self: *Self, buf: []const u8) void {
|
||||
for (buf) |c| self.write(c);
|
||||
}
|
||||
|
||||
/// Write literal.
|
||||
pub fn write(self: *Self, b: u8) void {
|
||||
assert(self.wp - self.rp < mask);
|
||||
self.buffer[self.wp & mask] = b;
|
||||
self.wp += 1;
|
||||
}
|
||||
|
||||
/// Write match (back-reference to the same data slice) starting at `distance`
|
||||
/// back from current write position, and `length` of bytes.
|
||||
pub fn writeMatch(self: *Self, length: u16, distance: u16) !void {
|
||||
if (self.wp < distance or
|
||||
length < consts.base_length or length > consts.max_length or
|
||||
distance < consts.min_distance or distance > consts.max_distance)
|
||||
{
|
||||
return error.InvalidMatch;
|
||||
}
|
||||
assert(self.wp - self.rp < mask);
|
||||
|
||||
var from: usize = self.wp - distance;
|
||||
const from_end: usize = from + length;
|
||||
var to: usize = self.wp;
|
||||
const to_end: usize = to + length;
|
||||
|
||||
self.wp += length;
|
||||
|
||||
// Fast path using memcpy
|
||||
if (length <= distance and // no overlapping buffers
|
||||
(from >> 16 == from_end >> 16) and // start and and at the same circle
|
||||
(to >> 16 == to_end >> 16))
|
||||
{
|
||||
@memcpy(self.buffer[to & mask .. to_end & mask], self.buffer[from & mask .. from_end & mask]);
|
||||
return;
|
||||
}
|
||||
|
||||
// Slow byte by byte
|
||||
while (to < to_end) {
|
||||
self.buffer[to & mask] = self.buffer[from & mask];
|
||||
to += 1;
|
||||
from += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns writable part of the internal buffer of size `n` at most. Advances
|
||||
/// write pointer, assumes that returned buffer will be filled with data.
|
||||
pub fn getWritable(self: *Self, n: usize) []u8 {
|
||||
const wp = self.wp & mask;
|
||||
const len = @min(n, buffer_len - wp);
|
||||
self.wp += len;
|
||||
return self.buffer[wp .. wp + len];
|
||||
}
|
||||
|
||||
/// Read available data. Can return part of the available data if it is
|
||||
/// spread across two circles. So read until this returns zero length.
|
||||
pub fn read(self: *Self) []const u8 {
|
||||
return self.readAtMost(buffer_len);
|
||||
}
|
||||
|
||||
/// Read part of available data. Can return less than max even if there are
|
||||
/// more than max decoded data.
|
||||
pub fn readAtMost(self: *Self, limit: usize) []const u8 {
|
||||
const rb = self.readBlock(if (limit == 0) buffer_len else limit);
|
||||
defer self.rp += rb.len;
|
||||
return self.buffer[rb.head..rb.tail];
|
||||
}
|
||||
|
||||
const ReadBlock = struct {
|
||||
head: usize,
|
||||
tail: usize,
|
||||
len: usize,
|
||||
};
|
||||
|
||||
/// Returns position of continous read block data.
|
||||
fn readBlock(self: *Self, max: usize) ReadBlock {
|
||||
const r = self.rp & mask;
|
||||
const w = self.wp & mask;
|
||||
const n = @min(
|
||||
max,
|
||||
if (w >= r) w - r else buffer_len - r,
|
||||
);
|
||||
return .{
|
||||
.head = r,
|
||||
.tail = r + n,
|
||||
.len = n,
|
||||
};
|
||||
}
|
||||
|
||||
/// Number of free bytes for write.
|
||||
pub fn free(self: *Self) usize {
|
||||
return buffer_len - (self.wp - self.rp);
|
||||
}
|
||||
|
||||
/// Full if largest match can't fit. 258 is largest match length. That much
|
||||
/// bytes can be produced in single decode step.
|
||||
pub fn full(self: *Self) bool {
|
||||
return self.free() < 258 + 1;
|
||||
}
|
||||
|
||||
// example from: https://youtu.be/SJPvNi4HrWQ?t=3558
|
||||
test "flate.CircularBuffer writeMatch" {
|
||||
var cb: Self = .{};
|
||||
|
||||
cb.writeAll("a salad; ");
|
||||
try cb.writeMatch(5, 9);
|
||||
try cb.writeMatch(3, 3);
|
||||
|
||||
try testing.expectEqualStrings("a salad; a salsal", cb.read());
|
||||
}
|
||||
|
||||
test "flate.CircularBuffer writeMatch overlap" {
|
||||
var cb: Self = .{};
|
||||
|
||||
cb.writeAll("a b c ");
|
||||
try cb.writeMatch(8, 4);
|
||||
cb.write('d');
|
||||
|
||||
try testing.expectEqualStrings("a b c b c b c d", cb.read());
|
||||
}
|
||||
|
||||
test "flate.CircularBuffer readAtMost" {
|
||||
var cb: Self = .{};
|
||||
|
||||
cb.writeAll("0123456789");
|
||||
try cb.writeMatch(50, 10);
|
||||
|
||||
try testing.expectEqualStrings("0123456789" ** 6, cb.buffer[cb.rp..cb.wp]);
|
||||
for (0..6) |i| {
|
||||
try testing.expectEqual(i * 10, cb.rp);
|
||||
try testing.expectEqualStrings("0123456789", cb.readAtMost(10));
|
||||
}
|
||||
try testing.expectEqualStrings("", cb.readAtMost(10));
|
||||
try testing.expectEqualStrings("", cb.read());
|
||||
}
|
||||
|
||||
test "flate.CircularBuffer" {
|
||||
var cb: Self = .{};
|
||||
|
||||
const data = "0123456789abcdef" ** (1024 / 16);
|
||||
cb.writeAll(data);
|
||||
try testing.expectEqual(@as(usize, 0), cb.rp);
|
||||
try testing.expectEqual(@as(usize, 1024), cb.wp);
|
||||
try testing.expectEqual(@as(usize, 1024 * 63), cb.free());
|
||||
|
||||
for (0..62 * 4) |_|
|
||||
try cb.writeMatch(256, 1024); // write 62K
|
||||
|
||||
try testing.expectEqual(@as(usize, 0), cb.rp);
|
||||
try testing.expectEqual(@as(usize, 63 * 1024), cb.wp);
|
||||
try testing.expectEqual(@as(usize, 1024), cb.free());
|
||||
|
||||
cb.writeAll(data[0..200]);
|
||||
_ = cb.readAtMost(1024); // make some space
|
||||
cb.writeAll(data); // overflows write position
|
||||
try testing.expectEqual(@as(usize, 200 + 65536), cb.wp);
|
||||
try testing.expectEqual(@as(usize, 1024), cb.rp);
|
||||
try testing.expectEqual(@as(usize, 1024 - 200), cb.free());
|
||||
|
||||
const rb = cb.readBlock(Self.buffer_len);
|
||||
try testing.expectEqual(@as(usize, 65536 - 1024), rb.len);
|
||||
try testing.expectEqual(@as(usize, 1024), rb.head);
|
||||
try testing.expectEqual(@as(usize, 65536), rb.tail);
|
||||
|
||||
try testing.expectEqual(@as(usize, 65536 - 1024), cb.read().len); // read to the end of the buffer
|
||||
try testing.expectEqual(@as(usize, 200 + 65536), cb.wp);
|
||||
try testing.expectEqual(@as(usize, 65536), cb.rp);
|
||||
try testing.expectEqual(@as(usize, 65536 - 200), cb.free());
|
||||
|
||||
try testing.expectEqual(@as(usize, 200), cb.read().len); // read the rest
|
||||
}
|
||||
|
||||
test "flate.CircularBuffer write overlap" {
|
||||
var cb: Self = .{};
|
||||
cb.wp = cb.buffer.len - 15;
|
||||
cb.rp = cb.wp;
|
||||
|
||||
cb.writeAll("0123456789");
|
||||
cb.writeAll("abcdefghij");
|
||||
|
||||
try testing.expectEqual(cb.buffer.len + 5, cb.wp);
|
||||
try testing.expectEqual(cb.buffer.len - 15, cb.rp);
|
||||
|
||||
try testing.expectEqualStrings("0123456789abcde", cb.read());
|
||||
try testing.expectEqualStrings("fghij", cb.read());
|
||||
|
||||
try testing.expect(cb.wp == cb.rp);
|
||||
}
|
||||
|
||||
test "flate.CircularBuffer writeMatch/read overlap" {
|
||||
var cb: Self = .{};
|
||||
cb.wp = cb.buffer.len - 15;
|
||||
cb.rp = cb.wp;
|
||||
|
||||
cb.writeAll("0123456789");
|
||||
try cb.writeMatch(15, 5);
|
||||
|
||||
try testing.expectEqualStrings("012345678956789", cb.read());
|
||||
try testing.expectEqualStrings("5678956789", cb.read());
|
||||
|
||||
try cb.writeMatch(20, 25);
|
||||
try testing.expectEqualStrings("01234567895678956789", cb.read());
|
||||
}
|
||||
125
lib/std/compress/flate/Lookup.zig
Normal file
125
lib/std/compress/flate/Lookup.zig
Normal file
@@ -0,0 +1,125 @@
|
||||
/// Lookup of the previous locations for the same 4 byte data. Works on hash of
|
||||
/// 4 bytes data. Head contains position of the first match for each hash. Chain
|
||||
/// points to the previous position of the same hash given the current location.
|
||||
///
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const expect = testing.expect;
|
||||
const consts = @import("consts.zig");
|
||||
|
||||
const Self = @This();
|
||||
|
||||
const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761
|
||||
const chain_len = 2 * consts.history.len;
|
||||
|
||||
// Maps hash => first position
|
||||
head: [consts.lookup.len]u16 = [_]u16{0} ** consts.lookup.len,
|
||||
// Maps position => previous positions for the same hash value
|
||||
chain: [chain_len]u16 = [_]u16{0} ** (chain_len),
|
||||
|
||||
// Calculates hash of the 4 bytes from data.
|
||||
// Inserts `pos` position of that hash in the lookup tables.
|
||||
// Returns previous location with the same hash value.
|
||||
pub fn add(self: *Self, data: []const u8, pos: u16) u16 {
|
||||
if (data.len < 4) return 0;
|
||||
const h = hash(data[0..4]);
|
||||
return self.set(h, pos);
|
||||
}
|
||||
|
||||
// Retruns previous location with the same hash value given the current
|
||||
// position.
|
||||
pub fn prev(self: *Self, pos: u16) u16 {
|
||||
return self.chain[pos];
|
||||
}
|
||||
|
||||
fn set(self: *Self, h: u32, pos: u16) u16 {
|
||||
const p = self.head[h];
|
||||
self.head[h] = pos;
|
||||
self.chain[pos] = p;
|
||||
return p;
|
||||
}
|
||||
|
||||
// Slide all positions in head and chain for `n`
|
||||
pub fn slide(self: *Self, n: u16) void {
|
||||
for (&self.head) |*v| {
|
||||
v.* -|= n;
|
||||
}
|
||||
var i: usize = 0;
|
||||
while (i < n) : (i += 1) {
|
||||
self.chain[i] = self.chain[i + n] -| n;
|
||||
}
|
||||
}
|
||||
|
||||
// Add `len` 4 bytes hashes from `data` into lookup.
|
||||
// Position of the first byte is `pos`.
|
||||
pub fn bulkAdd(self: *Self, data: []const u8, len: u16, pos: u16) void {
|
||||
if (len == 0 or data.len < consts.match.min_length) {
|
||||
return;
|
||||
}
|
||||
var hb =
|
||||
@as(u32, data[3]) |
|
||||
@as(u32, data[2]) << 8 |
|
||||
@as(u32, data[1]) << 16 |
|
||||
@as(u32, data[0]) << 24;
|
||||
_ = self.set(hashu(hb), pos);
|
||||
|
||||
var i = pos;
|
||||
for (4..@min(len + 3, data.len)) |j| {
|
||||
hb = (hb << 8) | @as(u32, data[j]);
|
||||
i += 1;
|
||||
_ = self.set(hashu(hb), i);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculates hash of the first 4 bytes of `b`.
|
||||
fn hash(b: *const [4]u8) u32 {
|
||||
return hashu(@as(u32, b[3]) |
|
||||
@as(u32, b[2]) << 8 |
|
||||
@as(u32, b[1]) << 16 |
|
||||
@as(u32, b[0]) << 24);
|
||||
}
|
||||
|
||||
fn hashu(v: u32) u32 {
|
||||
return @intCast((v *% prime4) >> consts.lookup.shift);
|
||||
}
|
||||
|
||||
test "flate.Lookup add/prev" {
|
||||
const data = [_]u8{
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x01, 0x02, 0x03,
|
||||
};
|
||||
|
||||
var h: Self = .{};
|
||||
for (data, 0..) |_, i| {
|
||||
const p = h.add(data[i..], @intCast(i));
|
||||
if (i >= 8 and i < 24) {
|
||||
try expect(p == i - 8);
|
||||
} else {
|
||||
try expect(p == 0);
|
||||
}
|
||||
}
|
||||
|
||||
const v = Self.hash(data[2 .. 2 + 4]);
|
||||
try expect(h.head[v] == 2 + 16);
|
||||
try expect(h.chain[2 + 16] == 2 + 8);
|
||||
try expect(h.chain[2 + 8] == 2);
|
||||
}
|
||||
|
||||
test "flate.Lookup bulkAdd" {
|
||||
const data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
|
||||
|
||||
// one by one
|
||||
var h: Self = .{};
|
||||
for (data, 0..) |_, i| {
|
||||
_ = h.add(data[i..], @intCast(i));
|
||||
}
|
||||
|
||||
// in bulk
|
||||
var bh: Self = .{};
|
||||
bh.bulkAdd(data, data.len, 0);
|
||||
|
||||
try testing.expectEqualSlices(u16, &h.head, &bh.head);
|
||||
try testing.expectEqualSlices(u16, &h.chain, &bh.chain);
|
||||
}
|
||||
160
lib/std/compress/flate/SlidingWindow.zig
Normal file
160
lib/std/compress/flate/SlidingWindow.zig
Normal file
@@ -0,0 +1,160 @@
|
||||
//! Used in deflate (compression), holds uncompressed data form which Tokens are
|
||||
//! produces. In combination with Lookup it is used to find matches in history data.
|
||||
//!
|
||||
const std = @import("std");
|
||||
const consts = @import("consts.zig");
|
||||
|
||||
const expect = testing.expect;
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
|
||||
const hist_len = consts.history.len;
|
||||
const buffer_len = 2 * hist_len;
|
||||
const min_lookahead = consts.match.min_length + consts.match.max_length;
|
||||
const max_rp = buffer_len - min_lookahead;
|
||||
|
||||
const Self = @This();
|
||||
|
||||
buffer: [buffer_len]u8 = undefined,
|
||||
wp: usize = 0, // write position
|
||||
rp: usize = 0, // read position
|
||||
fp: isize = 0, // last flush position, tokens are build from fp..rp
|
||||
|
||||
/// Returns number of bytes written, or 0 if buffer is full and need to slide.
|
||||
pub fn write(self: *Self, buf: []const u8) usize {
|
||||
if (self.rp >= max_rp) return 0; // need to slide
|
||||
|
||||
const n = @min(buf.len, buffer_len - self.wp);
|
||||
@memcpy(self.buffer[self.wp .. self.wp + n], buf[0..n]);
|
||||
self.wp += n;
|
||||
return n;
|
||||
}
|
||||
|
||||
/// Slide buffer for hist_len.
|
||||
/// Drops old history, preserves between hist_len and hist_len - min_lookahead.
|
||||
/// Returns number of bytes removed.
|
||||
pub fn slide(self: *Self) u16 {
|
||||
assert(self.rp >= max_rp and self.wp >= self.rp);
|
||||
const n = self.wp - hist_len;
|
||||
@memcpy(self.buffer[0..n], self.buffer[hist_len..self.wp]);
|
||||
self.rp -= hist_len;
|
||||
self.wp -= hist_len;
|
||||
self.fp -= hist_len;
|
||||
return @intCast(n);
|
||||
}
|
||||
|
||||
/// Data from the current position (read position). Those part of the buffer is
|
||||
/// not converted to tokens yet.
|
||||
fn lookahead(self: *Self) []const u8 {
|
||||
assert(self.wp >= self.rp);
|
||||
return self.buffer[self.rp..self.wp];
|
||||
}
|
||||
|
||||
/// Returns part of the lookahead buffer. If should_flush is set no lookahead is
|
||||
/// preserved otherwise preserves enough data for the longest match. Returns
|
||||
/// null if there is not enough data.
|
||||
pub fn activeLookahead(self: *Self, should_flush: bool) ?[]const u8 {
|
||||
const min: usize = if (should_flush) 0 else min_lookahead;
|
||||
const lh = self.lookahead();
|
||||
return if (lh.len > min) lh else null;
|
||||
}
|
||||
|
||||
/// Advances read position, shrinks lookahead.
|
||||
pub fn advance(self: *Self, n: u16) void {
|
||||
assert(self.wp >= self.rp + n);
|
||||
self.rp += n;
|
||||
}
|
||||
|
||||
/// Returns writable part of the buffer, where new uncompressed data can be
|
||||
/// written.
|
||||
pub fn writable(self: *Self) []u8 {
|
||||
return self.buffer[self.wp..];
|
||||
}
|
||||
|
||||
/// Notification of what part of writable buffer is filled with data.
|
||||
pub fn written(self: *Self, n: usize) void {
|
||||
self.wp += n;
|
||||
}
|
||||
|
||||
/// Finds match length between previous and current position.
|
||||
/// Used in hot path!
|
||||
pub fn match(self: *Self, prev_pos: u16, curr_pos: u16, min_len: u16) u16 {
|
||||
const max_len: usize = @min(self.wp - curr_pos, consts.match.max_length);
|
||||
// lookahead buffers from previous and current positions
|
||||
const prev_lh = self.buffer[prev_pos..][0..max_len];
|
||||
const curr_lh = self.buffer[curr_pos..][0..max_len];
|
||||
|
||||
// If we alread have match (min_len > 0),
|
||||
// test the first byte above previous len a[min_len] != b[min_len]
|
||||
// and then all the bytes from that position to zero.
|
||||
// That is likely positions to find difference than looping from first bytes.
|
||||
var i: usize = min_len;
|
||||
if (i > 0) {
|
||||
if (max_len <= i) return 0;
|
||||
while (true) {
|
||||
if (prev_lh[i] != curr_lh[i]) return 0;
|
||||
if (i == 0) break;
|
||||
i -= 1;
|
||||
}
|
||||
i = min_len;
|
||||
}
|
||||
while (i < max_len) : (i += 1)
|
||||
if (prev_lh[i] != curr_lh[i]) break;
|
||||
return if (i >= consts.match.min_length) @intCast(i) else 0;
|
||||
}
|
||||
|
||||
/// Current position of non-compressed data. Data before rp are already converted
|
||||
/// to tokens.
|
||||
pub fn pos(self: *Self) u16 {
|
||||
return @intCast(self.rp);
|
||||
}
|
||||
|
||||
/// Notification that token list is cleared.
|
||||
pub fn flush(self: *Self) void {
|
||||
self.fp = @intCast(self.rp);
|
||||
}
|
||||
|
||||
/// Part of the buffer since last flush or null if there was slide in between (so
|
||||
/// fp becomes negative).
|
||||
pub fn tokensBuffer(self: *Self) ?[]const u8 {
|
||||
assert(self.fp <= self.rp);
|
||||
if (self.fp < 0) return null;
|
||||
return self.buffer[@intCast(self.fp)..self.rp];
|
||||
}
|
||||
|
||||
test "flate.SlidingWindow match" {
|
||||
const data = "Blah blah blah blah blah!";
|
||||
var win: Self = .{};
|
||||
try expect(win.write(data) == data.len);
|
||||
try expect(win.wp == data.len);
|
||||
try expect(win.rp == 0);
|
||||
|
||||
// length between l symbols
|
||||
try expect(win.match(1, 6, 0) == 18);
|
||||
try expect(win.match(1, 11, 0) == 13);
|
||||
try expect(win.match(1, 16, 0) == 8);
|
||||
try expect(win.match(1, 21, 0) == 0);
|
||||
|
||||
// position 15 = "blah blah!"
|
||||
// position 20 = "blah!"
|
||||
try expect(win.match(15, 20, 0) == 4);
|
||||
try expect(win.match(15, 20, 3) == 4);
|
||||
try expect(win.match(15, 20, 4) == 0);
|
||||
}
|
||||
|
||||
test "flate.SlidingWindow slide" {
|
||||
var win: Self = .{};
|
||||
win.wp = Self.buffer_len - 11;
|
||||
win.rp = Self.buffer_len - 111;
|
||||
win.buffer[win.rp] = 0xab;
|
||||
try expect(win.lookahead().len == 100);
|
||||
try expect(win.tokensBuffer().?.len == win.rp);
|
||||
|
||||
const n = win.slide();
|
||||
try expect(n == 32757);
|
||||
try expect(win.buffer[win.rp] == 0xab);
|
||||
try expect(win.rp == Self.hist_len - 111);
|
||||
try expect(win.wp == Self.hist_len - 11);
|
||||
try expect(win.lookahead().len == 100);
|
||||
try expect(win.tokensBuffer() == null);
|
||||
}
|
||||
327
lib/std/compress/flate/Token.zig
Normal file
327
lib/std/compress/flate/Token.zig
Normal file
@@ -0,0 +1,327 @@
|
||||
//! Token cat be literal: single byte of data or match; reference to the slice of
|
||||
//! data in the same stream represented with <length, distance>. Where length
|
||||
//! can be 3 - 258 bytes, and distance 1 - 32768 bytes.
|
||||
//!
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const print = std.debug.print;
|
||||
const expect = std.testing.expect;
|
||||
const consts = @import("consts.zig").match;
|
||||
|
||||
const Token = @This();
|
||||
|
||||
pub const Kind = enum(u1) {
|
||||
literal,
|
||||
match,
|
||||
};
|
||||
|
||||
// Distance range 1 - 32768, stored in dist as 0 - 32767 (fits u15)
|
||||
dist: u15 = 0,
|
||||
// Length range 3 - 258, stored in len_lit as 0 - 255 (fits u8)
|
||||
len_lit: u8 = 0,
|
||||
kind: Kind = .literal,
|
||||
|
||||
pub fn literal(t: Token) u8 {
|
||||
return t.len_lit;
|
||||
}
|
||||
|
||||
pub fn distance(t: Token) u16 {
|
||||
return @as(u16, t.dist) + consts.min_distance;
|
||||
}
|
||||
|
||||
pub fn length(t: Token) u16 {
|
||||
return @as(u16, t.len_lit) + consts.base_length;
|
||||
}
|
||||
|
||||
pub fn initLiteral(lit: u8) Token {
|
||||
return .{ .kind = .literal, .len_lit = lit };
|
||||
}
|
||||
|
||||
// distance range 1 - 32768, stored in dist as 0 - 32767 (u15)
|
||||
// length range 3 - 258, stored in len_lit as 0 - 255 (u8)
|
||||
pub fn initMatch(dist: u16, len: u16) Token {
|
||||
assert(len >= consts.min_length and len <= consts.max_length);
|
||||
assert(dist >= consts.min_distance and dist <= consts.max_distance);
|
||||
return .{
|
||||
.kind = .match,
|
||||
.dist = @intCast(dist - consts.min_distance),
|
||||
.len_lit = @intCast(len - consts.base_length),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn eql(t: Token, o: Token) bool {
|
||||
return t.kind == o.kind and
|
||||
t.dist == o.dist and
|
||||
t.len_lit == o.len_lit;
|
||||
}
|
||||
|
||||
pub fn lengthCode(t: Token) u16 {
|
||||
return match_lengths[match_lengths_index[t.len_lit]].code;
|
||||
}
|
||||
|
||||
pub fn lengthEncoding(t: Token) MatchLength {
|
||||
var c = match_lengths[match_lengths_index[t.len_lit]];
|
||||
c.extra_length = t.len_lit - c.base_scaled;
|
||||
return c;
|
||||
}
|
||||
|
||||
// Returns the distance code corresponding to a specific distance.
|
||||
// Distance code is in range: 0 - 29.
|
||||
pub fn distanceCode(t: Token) u8 {
|
||||
var dist: u16 = t.dist;
|
||||
if (dist < match_distances_index.len) {
|
||||
return match_distances_index[dist];
|
||||
}
|
||||
dist >>= 7;
|
||||
if (dist < match_distances_index.len) {
|
||||
return match_distances_index[dist] + 14;
|
||||
}
|
||||
dist >>= 7;
|
||||
return match_distances_index[dist] + 28;
|
||||
}
|
||||
|
||||
pub fn distanceEncoding(t: Token) MatchDistance {
|
||||
var c = match_distances[t.distanceCode()];
|
||||
c.extra_distance = t.dist - c.base_scaled;
|
||||
return c;
|
||||
}
|
||||
|
||||
pub fn lengthExtraBits(code: u32) u8 {
|
||||
return match_lengths[code - length_codes_start].extra_bits;
|
||||
}
|
||||
|
||||
pub fn matchLength(code: u8) MatchLength {
|
||||
return match_lengths[code];
|
||||
}
|
||||
|
||||
pub fn matchDistance(code: u8) MatchDistance {
|
||||
return match_distances[code];
|
||||
}
|
||||
|
||||
pub fn distanceExtraBits(code: u32) u8 {
|
||||
return match_distances[code].extra_bits;
|
||||
}
|
||||
|
||||
pub fn show(t: Token) void {
|
||||
if (t.kind == .literal) {
|
||||
print("L('{c}'), ", .{t.literal()});
|
||||
} else {
|
||||
print("M({d}, {d}), ", .{ t.distance(), t.length() });
|
||||
}
|
||||
}
|
||||
|
||||
// Retruns index in match_lengths table for each length in range 0-255.
|
||||
const match_lengths_index = [_]u8{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
|
||||
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
|
||||
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
|
||||
15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
|
||||
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
|
||||
22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
|
||||
23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||
26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 28,
|
||||
};
|
||||
|
||||
const MatchLength = struct {
|
||||
code: u16,
|
||||
base_scaled: u8, // base - 3, scaled to fit into u8 (0-255), same as lit_len field in Token.
|
||||
base: u16, // 3-258
|
||||
extra_length: u8 = 0,
|
||||
extra_bits: u4,
|
||||
};
|
||||
|
||||
// match_lengths represents table from rfc (https://datatracker.ietf.org/doc/html/rfc1951#page-12)
|
||||
//
|
||||
// Extra Extra Extra
|
||||
// Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
|
||||
// ---- ---- ------ ---- ---- ------- ---- ---- -------
|
||||
// 257 0 3 267 1 15,16 277 4 67-82
|
||||
// 258 0 4 268 1 17,18 278 4 83-98
|
||||
// 259 0 5 269 2 19-22 279 4 99-114
|
||||
// 260 0 6 270 2 23-26 280 4 115-130
|
||||
// 261 0 7 271 2 27-30 281 5 131-162
|
||||
// 262 0 8 272 2 31-34 282 5 163-194
|
||||
// 263 0 9 273 3 35-42 283 5 195-226
|
||||
// 264 0 10 274 3 43-50 284 5 227-257
|
||||
// 265 1 11,12 275 3 51-58 285 0 258
|
||||
// 266 1 13,14 276 3 59-66
|
||||
//
|
||||
pub const length_codes_start = 257;
|
||||
|
||||
const match_lengths = [_]MatchLength{
|
||||
.{ .extra_bits = 0, .base_scaled = 0, .base = 3, .code = 257 },
|
||||
.{ .extra_bits = 0, .base_scaled = 1, .base = 4, .code = 258 },
|
||||
.{ .extra_bits = 0, .base_scaled = 2, .base = 5, .code = 259 },
|
||||
.{ .extra_bits = 0, .base_scaled = 3, .base = 6, .code = 260 },
|
||||
.{ .extra_bits = 0, .base_scaled = 4, .base = 7, .code = 261 },
|
||||
.{ .extra_bits = 0, .base_scaled = 5, .base = 8, .code = 262 },
|
||||
.{ .extra_bits = 0, .base_scaled = 6, .base = 9, .code = 263 },
|
||||
.{ .extra_bits = 0, .base_scaled = 7, .base = 10, .code = 264 },
|
||||
.{ .extra_bits = 1, .base_scaled = 8, .base = 11, .code = 265 },
|
||||
.{ .extra_bits = 1, .base_scaled = 10, .base = 13, .code = 266 },
|
||||
.{ .extra_bits = 1, .base_scaled = 12, .base = 15, .code = 267 },
|
||||
.{ .extra_bits = 1, .base_scaled = 14, .base = 17, .code = 268 },
|
||||
.{ .extra_bits = 2, .base_scaled = 16, .base = 19, .code = 269 },
|
||||
.{ .extra_bits = 2, .base_scaled = 20, .base = 23, .code = 270 },
|
||||
.{ .extra_bits = 2, .base_scaled = 24, .base = 27, .code = 271 },
|
||||
.{ .extra_bits = 2, .base_scaled = 28, .base = 31, .code = 272 },
|
||||
.{ .extra_bits = 3, .base_scaled = 32, .base = 35, .code = 273 },
|
||||
.{ .extra_bits = 3, .base_scaled = 40, .base = 43, .code = 274 },
|
||||
.{ .extra_bits = 3, .base_scaled = 48, .base = 51, .code = 275 },
|
||||
.{ .extra_bits = 3, .base_scaled = 56, .base = 59, .code = 276 },
|
||||
.{ .extra_bits = 4, .base_scaled = 64, .base = 67, .code = 277 },
|
||||
.{ .extra_bits = 4, .base_scaled = 80, .base = 83, .code = 278 },
|
||||
.{ .extra_bits = 4, .base_scaled = 96, .base = 99, .code = 279 },
|
||||
.{ .extra_bits = 4, .base_scaled = 112, .base = 115, .code = 280 },
|
||||
.{ .extra_bits = 5, .base_scaled = 128, .base = 131, .code = 281 },
|
||||
.{ .extra_bits = 5, .base_scaled = 160, .base = 163, .code = 282 },
|
||||
.{ .extra_bits = 5, .base_scaled = 192, .base = 195, .code = 283 },
|
||||
.{ .extra_bits = 5, .base_scaled = 224, .base = 227, .code = 284 },
|
||||
.{ .extra_bits = 0, .base_scaled = 255, .base = 258, .code = 285 },
|
||||
};
|
||||
|
||||
// Used in distanceCode fn to get index in match_distance table for each distance in range 0-32767.
|
||||
const match_distances_index = [_]u8{
|
||||
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
};
|
||||
|
||||
const MatchDistance = struct {
|
||||
base_scaled: u16, // base - 1, same as Token dist field
|
||||
base: u16,
|
||||
extra_distance: u16 = 0,
|
||||
code: u8,
|
||||
extra_bits: u4,
|
||||
};
|
||||
|
||||
// match_distances represents table from rfc (https://datatracker.ietf.org/doc/html/rfc1951#page-12)
|
||||
//
|
||||
// Extra Extra Extra
|
||||
// Code Bits Dist Code Bits Dist Code Bits Distance
|
||||
// ---- ---- ---- ---- ---- ------ ---- ---- --------
|
||||
// 0 0 1 10 4 33-48 20 9 1025-1536
|
||||
// 1 0 2 11 4 49-64 21 9 1537-2048
|
||||
// 2 0 3 12 5 65-96 22 10 2049-3072
|
||||
// 3 0 4 13 5 97-128 23 10 3073-4096
|
||||
// 4 1 5,6 14 6 129-192 24 11 4097-6144
|
||||
// 5 1 7,8 15 6 193-256 25 11 6145-8192
|
||||
// 6 2 9-12 16 7 257-384 26 12 8193-12288
|
||||
// 7 2 13-16 17 7 385-512 27 12 12289-16384
|
||||
// 8 3 17-24 18 8 513-768 28 13 16385-24576
|
||||
// 9 3 25-32 19 8 769-1024 29 13 24577-32768
|
||||
//
|
||||
const match_distances = [_]MatchDistance{
|
||||
.{ .extra_bits = 0, .base_scaled = 0x0000, .code = 0, .base = 1 },
|
||||
.{ .extra_bits = 0, .base_scaled = 0x0001, .code = 1, .base = 2 },
|
||||
.{ .extra_bits = 0, .base_scaled = 0x0002, .code = 2, .base = 3 },
|
||||
.{ .extra_bits = 0, .base_scaled = 0x0003, .code = 3, .base = 4 },
|
||||
.{ .extra_bits = 1, .base_scaled = 0x0004, .code = 4, .base = 5 },
|
||||
.{ .extra_bits = 1, .base_scaled = 0x0006, .code = 5, .base = 7 },
|
||||
.{ .extra_bits = 2, .base_scaled = 0x0008, .code = 6, .base = 9 },
|
||||
.{ .extra_bits = 2, .base_scaled = 0x000c, .code = 7, .base = 13 },
|
||||
.{ .extra_bits = 3, .base_scaled = 0x0010, .code = 8, .base = 17 },
|
||||
.{ .extra_bits = 3, .base_scaled = 0x0018, .code = 9, .base = 25 },
|
||||
.{ .extra_bits = 4, .base_scaled = 0x0020, .code = 10, .base = 33 },
|
||||
.{ .extra_bits = 4, .base_scaled = 0x0030, .code = 11, .base = 49 },
|
||||
.{ .extra_bits = 5, .base_scaled = 0x0040, .code = 12, .base = 65 },
|
||||
.{ .extra_bits = 5, .base_scaled = 0x0060, .code = 13, .base = 97 },
|
||||
.{ .extra_bits = 6, .base_scaled = 0x0080, .code = 14, .base = 129 },
|
||||
.{ .extra_bits = 6, .base_scaled = 0x00c0, .code = 15, .base = 193 },
|
||||
.{ .extra_bits = 7, .base_scaled = 0x0100, .code = 16, .base = 257 },
|
||||
.{ .extra_bits = 7, .base_scaled = 0x0180, .code = 17, .base = 385 },
|
||||
.{ .extra_bits = 8, .base_scaled = 0x0200, .code = 18, .base = 513 },
|
||||
.{ .extra_bits = 8, .base_scaled = 0x0300, .code = 19, .base = 769 },
|
||||
.{ .extra_bits = 9, .base_scaled = 0x0400, .code = 20, .base = 1025 },
|
||||
.{ .extra_bits = 9, .base_scaled = 0x0600, .code = 21, .base = 1537 },
|
||||
.{ .extra_bits = 10, .base_scaled = 0x0800, .code = 22, .base = 2049 },
|
||||
.{ .extra_bits = 10, .base_scaled = 0x0c00, .code = 23, .base = 3073 },
|
||||
.{ .extra_bits = 11, .base_scaled = 0x1000, .code = 24, .base = 4097 },
|
||||
.{ .extra_bits = 11, .base_scaled = 0x1800, .code = 25, .base = 6145 },
|
||||
.{ .extra_bits = 12, .base_scaled = 0x2000, .code = 26, .base = 8193 },
|
||||
.{ .extra_bits = 12, .base_scaled = 0x3000, .code = 27, .base = 12289 },
|
||||
.{ .extra_bits = 13, .base_scaled = 0x4000, .code = 28, .base = 16385 },
|
||||
.{ .extra_bits = 13, .base_scaled = 0x6000, .code = 29, .base = 24577 },
|
||||
};
|
||||
|
||||
test "flate.Token size" {
|
||||
try expect(@sizeOf(Token) == 4);
|
||||
}
|
||||
|
||||
// testing table https://datatracker.ietf.org/doc/html/rfc1951#page-12
|
||||
test "flate.Token MatchLength" {
|
||||
var c = Token.initMatch(1, 4).lengthEncoding();
|
||||
try expect(c.code == 258);
|
||||
try expect(c.extra_bits == 0);
|
||||
try expect(c.extra_length == 0);
|
||||
|
||||
c = Token.initMatch(1, 11).lengthEncoding();
|
||||
try expect(c.code == 265);
|
||||
try expect(c.extra_bits == 1);
|
||||
try expect(c.extra_length == 0);
|
||||
|
||||
c = Token.initMatch(1, 12).lengthEncoding();
|
||||
try expect(c.code == 265);
|
||||
try expect(c.extra_bits == 1);
|
||||
try expect(c.extra_length == 1);
|
||||
|
||||
c = Token.initMatch(1, 130).lengthEncoding();
|
||||
try expect(c.code == 280);
|
||||
try expect(c.extra_bits == 4);
|
||||
try expect(c.extra_length == 130 - 115);
|
||||
}
|
||||
|
||||
test "flate.Token MatchDistance" {
|
||||
var c = Token.initMatch(1, 4).distanceEncoding();
|
||||
try expect(c.code == 0);
|
||||
try expect(c.extra_bits == 0);
|
||||
try expect(c.extra_distance == 0);
|
||||
|
||||
c = Token.initMatch(192, 4).distanceEncoding();
|
||||
try expect(c.code == 14);
|
||||
try expect(c.extra_bits == 6);
|
||||
try expect(c.extra_distance == 192 - 129);
|
||||
}
|
||||
|
||||
test "flate.Token match_lengths" {
|
||||
for (match_lengths, 0..) |ml, i| {
|
||||
try expect(@as(u16, ml.base_scaled) + 3 == ml.base);
|
||||
try expect(i + 257 == ml.code);
|
||||
}
|
||||
|
||||
for (match_distances, 0..) |mo, i| {
|
||||
try expect(mo.base_scaled + 1 == mo.base);
|
||||
try expect(i == mo.code);
|
||||
}
|
||||
}
|
||||
333
lib/std/compress/flate/bit_reader.zig
Normal file
333
lib/std/compress/flate/bit_reader.zig
Normal file
@@ -0,0 +1,333 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
|
||||
pub fn bitReader(reader: anytype) BitReader(@TypeOf(reader)) {
|
||||
return BitReader(@TypeOf(reader)).init(reader);
|
||||
}
|
||||
|
||||
/// Bit reader used during inflate (decompression). Has internal buffer of 64
|
||||
/// bits which shifts right after bits are consumed. Uses forward_reader to fill
|
||||
/// that internal buffer when needed.
|
||||
///
|
||||
/// readF is the core function. Supports few different ways of getting bits
|
||||
/// controlled by flags. In hot path we try to avoid checking whether we need to
|
||||
/// fill buffer from forward_reader by calling fill in advance and readF with
|
||||
/// buffered flag set.
|
||||
///
|
||||
pub fn BitReader(comptime ReaderType: type) type {
|
||||
return struct {
|
||||
// Underlying reader used for filling internal bits buffer
|
||||
forward_reader: ReaderType = undefined,
|
||||
// Internal buffer of 64 bits
|
||||
bits: u64 = 0,
|
||||
// Number of bits in the buffer
|
||||
nbits: u32 = 0,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub const Error = ReaderType.Error || error{EndOfStream};
|
||||
|
||||
pub fn init(rdr: ReaderType) Self {
|
||||
var self = Self{ .forward_reader = rdr };
|
||||
self.fill(1) catch {};
|
||||
return self;
|
||||
}
|
||||
|
||||
/// Try to have `nice` bits are available in buffer. Reads from
|
||||
/// forward reader if there is no `nice` bits in buffer. Returns error
|
||||
/// if end of forward stream is reached and internal buffer is empty.
|
||||
/// It will not error if less than `nice` bits are in buffer, only when
|
||||
/// all bits are exhausted. During inflate we usually know what is the
|
||||
/// maximum bits for the next step but usually that step will need less
|
||||
/// bits to decode. So `nice` is not hard limit, it will just try to have
|
||||
/// that number of bits available. If end of forward stream is reached
|
||||
/// it may be some extra zero bits in buffer.
|
||||
pub inline fn fill(self: *Self, nice: u6) !void {
|
||||
if (self.nbits >= nice) {
|
||||
return; // We have enought bits
|
||||
}
|
||||
// Read more bits from forward reader
|
||||
|
||||
// Number of empty bytes in bits, round nbits to whole bytes.
|
||||
const empty_bytes =
|
||||
@as(u8, if (self.nbits & 0x7 == 0) 8 else 7) - // 8 for 8, 16, 24..., 7 otherwise
|
||||
(self.nbits >> 3); // 0 for 0-7, 1 for 8-16, ... same as / 8
|
||||
|
||||
var buf: [8]u8 = [_]u8{0} ** 8;
|
||||
const bytes_read = self.forward_reader.readAll(buf[0..empty_bytes]) catch 0;
|
||||
if (bytes_read > 0) {
|
||||
const u: u64 = std.mem.readInt(u64, buf[0..8], .little);
|
||||
self.bits |= u << @as(u6, @intCast(self.nbits));
|
||||
self.nbits += 8 * @as(u8, @intCast(bytes_read));
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.nbits == 0)
|
||||
return error.EndOfStream;
|
||||
}
|
||||
|
||||
/// Read exactly buf.len bytes into buf.
|
||||
pub fn readAll(self: *Self, buf: []u8) !void {
|
||||
assert(self.alignBits() == 0); // internal bits must be at byte boundary
|
||||
|
||||
// First read from internal bits buffer.
|
||||
var n: usize = 0;
|
||||
while (self.nbits > 0 and n < buf.len) {
|
||||
buf[n] = try self.readF(u8, flag.buffered);
|
||||
n += 1;
|
||||
}
|
||||
// Then use forward reader for all other bytes.
|
||||
try self.forward_reader.readNoEof(buf[n..]);
|
||||
}
|
||||
|
||||
pub const flag = struct {
|
||||
pub const peek: u3 = 0b001; // dont advance internal buffer, just get bits, leave them in buffer
|
||||
pub const buffered: u3 = 0b010; // assume that there is no need to fill, fill should be called before
|
||||
pub const reverse: u3 = 0b100; // bit reverse readed bits
|
||||
};
|
||||
|
||||
/// Alias for readF(U, 0).
|
||||
pub fn read(self: *Self, comptime U: type) !U {
|
||||
return self.readF(U, 0);
|
||||
}
|
||||
|
||||
/// Alias for readF with flag.peak set.
|
||||
pub inline fn peekF(self: *Self, comptime U: type, comptime how: u3) !U {
|
||||
return self.readF(U, how | flag.peek);
|
||||
}
|
||||
|
||||
/// Read with flags provided.
|
||||
pub fn readF(self: *Self, comptime U: type, comptime how: u3) !U {
|
||||
const n: u6 = @bitSizeOf(U);
|
||||
switch (how) {
|
||||
0 => { // `normal` read
|
||||
try self.fill(n); // ensure that there are n bits in the buffer
|
||||
const u: U = @truncate(self.bits); // get n bits
|
||||
try self.shift(n); // advance buffer for n
|
||||
return u;
|
||||
},
|
||||
(flag.peek) => { // no shift, leave bits in the buffer
|
||||
try self.fill(n);
|
||||
return @truncate(self.bits);
|
||||
},
|
||||
flag.buffered => { // no fill, assume that buffer has enought bits
|
||||
const u: U = @truncate(self.bits);
|
||||
try self.shift(n);
|
||||
return u;
|
||||
},
|
||||
(flag.reverse) => { // same as 0 with bit reverse
|
||||
try self.fill(n);
|
||||
const u: U = @truncate(self.bits);
|
||||
try self.shift(n);
|
||||
return @bitReverse(u);
|
||||
},
|
||||
(flag.peek | flag.reverse) => {
|
||||
try self.fill(n);
|
||||
return @bitReverse(@as(U, @truncate(self.bits)));
|
||||
},
|
||||
(flag.buffered | flag.reverse) => {
|
||||
const u: U = @truncate(self.bits);
|
||||
try self.shift(n);
|
||||
return @bitReverse(u);
|
||||
},
|
||||
(flag.peek | flag.buffered) => {
|
||||
return @truncate(self.bits);
|
||||
},
|
||||
(flag.peek | flag.buffered | flag.reverse) => {
|
||||
return @bitReverse(@as(U, @truncate(self.bits)));
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Read n number of bits.
|
||||
/// Only buffered flag can be used in how.
|
||||
pub fn readN(self: *Self, n: u4, comptime how: u3) !u16 {
|
||||
switch (how) {
|
||||
0 => {
|
||||
try self.fill(n);
|
||||
},
|
||||
flag.buffered => {},
|
||||
else => unreachable,
|
||||
}
|
||||
const mask: u16 = (@as(u16, 1) << n) - 1;
|
||||
const u: u16 = @as(u16, @truncate(self.bits)) & mask;
|
||||
try self.shift(n);
|
||||
return u;
|
||||
}
|
||||
|
||||
/// Advance buffer for n bits.
|
||||
pub fn shift(self: *Self, n: u6) !void {
|
||||
if (n > self.nbits) return error.EndOfStream;
|
||||
self.bits >>= n;
|
||||
self.nbits -= n;
|
||||
}
|
||||
|
||||
/// Skip n bytes.
|
||||
pub fn skipBytes(self: *Self, n: u16) !void {
|
||||
for (0..n) |_| {
|
||||
try self.fill(8);
|
||||
try self.shift(8);
|
||||
}
|
||||
}
|
||||
|
||||
// Number of bits to align stream to the byte boundary.
|
||||
fn alignBits(self: *Self) u3 {
|
||||
return @intCast(self.nbits & 0x7);
|
||||
}
|
||||
|
||||
/// Align stream to the byte boundary.
|
||||
pub fn alignToByte(self: *Self) void {
|
||||
const ab = self.alignBits();
|
||||
if (ab > 0) self.shift(ab) catch unreachable;
|
||||
}
|
||||
|
||||
/// Skip zero terminated string.
|
||||
pub fn skipStringZ(self: *Self) !void {
|
||||
while (true) {
|
||||
if (try self.readF(u8, 0) == 0) break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Read deflate fixed fixed code.
|
||||
/// Reads first 7 bits, and then mybe 1 or 2 more to get full 7,8 or 9 bit code.
|
||||
/// ref: https://datatracker.ietf.org/doc/html/rfc1951#page-12
|
||||
/// Lit Value Bits Codes
|
||||
/// --------- ---- -----
|
||||
/// 0 - 143 8 00110000 through
|
||||
/// 10111111
|
||||
/// 144 - 255 9 110010000 through
|
||||
/// 111111111
|
||||
/// 256 - 279 7 0000000 through
|
||||
/// 0010111
|
||||
/// 280 - 287 8 11000000 through
|
||||
/// 11000111
|
||||
pub fn readFixedCode(self: *Self) !u16 {
|
||||
try self.fill(7 + 2);
|
||||
const code7 = try self.readF(u7, flag.buffered | flag.reverse);
|
||||
if (code7 <= 0b0010_111) { // 7 bits, 256-279, codes 0000_000 - 0010_111
|
||||
return @as(u16, code7) + 256;
|
||||
} else if (code7 <= 0b1011_111) { // 8 bits, 0-143, codes 0011_0000 through 1011_1111
|
||||
return (@as(u16, code7) << 1) + @as(u16, try self.readF(u1, flag.buffered)) - 0b0011_0000;
|
||||
} else if (code7 <= 0b1100_011) { // 8 bit, 280-287, codes 1100_0000 - 1100_0111
|
||||
return (@as(u16, code7 - 0b1100000) << 1) + try self.readF(u1, flag.buffered) + 280;
|
||||
} else { // 9 bit, 144-255, codes 1_1001_0000 - 1_1111_1111
|
||||
return (@as(u16, code7 - 0b1100_100) << 2) + @as(u16, try self.readF(u2, flag.buffered | flag.reverse)) + 144;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test "flate.BitReader" {
|
||||
var fbs = std.io.fixedBufferStream(&[_]u8{ 0xf3, 0x48, 0xcd, 0xc9, 0x00, 0x00 });
|
||||
var br = bitReader(fbs.reader());
|
||||
const F = BitReader(@TypeOf(fbs.reader())).flag;
|
||||
|
||||
try testing.expectEqual(@as(u8, 48), br.nbits);
|
||||
try testing.expectEqual(@as(u64, 0xc9cd48f3), br.bits);
|
||||
|
||||
try testing.expect(try br.readF(u1, 0) == 0b0000_0001);
|
||||
try testing.expect(try br.readF(u2, 0) == 0b0000_0001);
|
||||
try testing.expectEqual(@as(u8, 48 - 3), br.nbits);
|
||||
try testing.expectEqual(@as(u3, 5), br.alignBits());
|
||||
|
||||
try testing.expect(try br.readF(u8, F.peek) == 0b0001_1110);
|
||||
try testing.expect(try br.readF(u9, F.peek) == 0b1_0001_1110);
|
||||
try br.shift(9);
|
||||
try testing.expectEqual(@as(u8, 36), br.nbits);
|
||||
try testing.expectEqual(@as(u3, 4), br.alignBits());
|
||||
|
||||
try testing.expect(try br.readF(u4, 0) == 0b0100);
|
||||
try testing.expectEqual(@as(u8, 32), br.nbits);
|
||||
try testing.expectEqual(@as(u3, 0), br.alignBits());
|
||||
|
||||
try br.shift(1);
|
||||
try testing.expectEqual(@as(u3, 7), br.alignBits());
|
||||
try br.shift(1);
|
||||
try testing.expectEqual(@as(u3, 6), br.alignBits());
|
||||
br.alignToByte();
|
||||
try testing.expectEqual(@as(u3, 0), br.alignBits());
|
||||
|
||||
try testing.expectEqual(@as(u64, 0xc9), br.bits);
|
||||
try testing.expectEqual(@as(u16, 0x9), try br.readN(4, 0));
|
||||
try testing.expectEqual(@as(u16, 0xc), try br.readN(4, 0));
|
||||
}
|
||||
|
||||
test "flate.BitReader read block type 1 data" {
|
||||
const data = [_]u8{
|
||||
0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1
|
||||
0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00,
|
||||
0x0c, 0x01, 0x02, 0x03, //
|
||||
0xaa, 0xbb, 0xcc, 0xdd,
|
||||
};
|
||||
var fbs = std.io.fixedBufferStream(&data);
|
||||
var br = bitReader(fbs.reader());
|
||||
const F = BitReader(@TypeOf(fbs.reader())).flag;
|
||||
|
||||
try testing.expectEqual(@as(u1, 1), try br.readF(u1, 0)); // bfinal
|
||||
try testing.expectEqual(@as(u2, 1), try br.readF(u2, 0)); // block_type
|
||||
|
||||
for ("Hello world\n") |c| {
|
||||
try testing.expectEqual(@as(u8, c), try br.readF(u8, F.reverse) - 0x30);
|
||||
}
|
||||
try testing.expectEqual(@as(u7, 0), try br.readF(u7, 0)); // end of block
|
||||
br.alignToByte();
|
||||
try testing.expectEqual(@as(u32, 0x0302010c), try br.readF(u32, 0));
|
||||
try testing.expectEqual(@as(u16, 0xbbaa), try br.readF(u16, 0));
|
||||
try testing.expectEqual(@as(u16, 0xddcc), try br.readF(u16, 0));
|
||||
}
|
||||
|
||||
test "flate.BitReader init" {
|
||||
const data = [_]u8{
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
};
|
||||
var fbs = std.io.fixedBufferStream(&data);
|
||||
var br = bitReader(fbs.reader());
|
||||
|
||||
try testing.expectEqual(@as(u64, 0x08_07_06_05_04_03_02_01), br.bits);
|
||||
try br.shift(8);
|
||||
try testing.expectEqual(@as(u64, 0x00_08_07_06_05_04_03_02), br.bits);
|
||||
try br.fill(60); // fill with 1 byte
|
||||
try testing.expectEqual(@as(u64, 0x01_08_07_06_05_04_03_02), br.bits);
|
||||
try br.shift(8 * 4 + 4);
|
||||
try testing.expectEqual(@as(u64, 0x00_00_00_00_00_10_80_70), br.bits);
|
||||
|
||||
try br.fill(60); // fill with 4 bytes (shift by 4)
|
||||
try testing.expectEqual(@as(u64, 0x00_50_40_30_20_10_80_70), br.bits);
|
||||
try testing.expectEqual(@as(u8, 8 * 7 + 4), br.nbits);
|
||||
|
||||
try br.shift(@intCast(br.nbits)); // clear buffer
|
||||
try br.fill(8); // refill with the rest of the bytes
|
||||
try testing.expectEqual(@as(u64, 0x00_00_00_00_00_08_07_06), br.bits);
|
||||
}
|
||||
|
||||
test "flate.BitReader readAll" {
|
||||
const data = [_]u8{
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
};
|
||||
var fbs = std.io.fixedBufferStream(&data);
|
||||
var br = bitReader(fbs.reader());
|
||||
|
||||
try testing.expectEqual(@as(u64, 0x08_07_06_05_04_03_02_01), br.bits);
|
||||
|
||||
var out: [16]u8 = undefined;
|
||||
try br.readAll(out[0..]);
|
||||
try testing.expect(br.nbits == 0);
|
||||
try testing.expect(br.bits == 0);
|
||||
|
||||
try testing.expectEqualSlices(u8, data[0..16], &out);
|
||||
}
|
||||
|
||||
test "flate.BitReader readFixedCode" {
|
||||
const fixed_codes = @import("huffman_encoder.zig").fixed_codes;
|
||||
|
||||
var fbs = std.io.fixedBufferStream(&fixed_codes);
|
||||
var rdr = bitReader(fbs.reader());
|
||||
|
||||
for (0..286) |c| {
|
||||
try testing.expectEqual(c, try rdr.readFixedCode());
|
||||
}
|
||||
try testing.expect(rdr.nbits == 0);
|
||||
}
|
||||
99
lib/std/compress/flate/bit_writer.zig
Normal file
99
lib/std/compress/flate/bit_writer.zig
Normal file
@@ -0,0 +1,99 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
|
||||
/// Bit writer for use in deflate (compression).
|
||||
///
|
||||
/// Has internal bits buffer of 64 bits and internal bytes buffer of 248 bytes.
|
||||
/// When we accumulate 48 bits 6 bytes are moved to the bytes buffer. When we
|
||||
/// accumulate 240 bytes they are flushed to the underlying inner_writer.
|
||||
///
|
||||
pub fn BitWriter(comptime WriterType: type) type {
|
||||
// buffer_flush_size indicates the buffer size
|
||||
// after which bytes are flushed to the writer.
|
||||
// Should preferably be a multiple of 6, since
|
||||
// we accumulate 6 bytes between writes to the buffer.
|
||||
const buffer_flush_size = 240;
|
||||
|
||||
// buffer_size is the actual output byte buffer size.
|
||||
// It must have additional headroom for a flush
|
||||
// which can contain up to 8 bytes.
|
||||
const buffer_size = buffer_flush_size + 8;
|
||||
|
||||
return struct {
|
||||
inner_writer: WriterType,
|
||||
|
||||
// Data waiting to be written is bytes[0 .. nbytes]
|
||||
// and then the low nbits of bits. Data is always written
|
||||
// sequentially into the bytes array.
|
||||
bits: u64 = 0,
|
||||
nbits: u32 = 0, // number of bits
|
||||
bytes: [buffer_size]u8 = undefined,
|
||||
nbytes: u32 = 0, // number of bytes
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub const Error = WriterType.Error || error{UnfinishedBits};
|
||||
|
||||
pub fn init(writer: WriterType) Self {
|
||||
return .{ .inner_writer = writer };
|
||||
}
|
||||
|
||||
pub fn setWriter(self: *Self, new_writer: WriterType) void {
|
||||
//assert(self.bits == 0 and self.nbits == 0 and self.nbytes == 0);
|
||||
self.inner_writer = new_writer;
|
||||
}
|
||||
|
||||
pub fn flush(self: *Self) Error!void {
|
||||
var n = self.nbytes;
|
||||
while (self.nbits != 0) {
|
||||
self.bytes[n] = @as(u8, @truncate(self.bits));
|
||||
self.bits >>= 8;
|
||||
if (self.nbits > 8) { // Avoid underflow
|
||||
self.nbits -= 8;
|
||||
} else {
|
||||
self.nbits = 0;
|
||||
}
|
||||
n += 1;
|
||||
}
|
||||
self.bits = 0;
|
||||
_ = try self.inner_writer.write(self.bytes[0..n]);
|
||||
self.nbytes = 0;
|
||||
}
|
||||
|
||||
pub fn writeBits(self: *Self, b: u32, nb: u32) Error!void {
|
||||
self.bits |= @as(u64, @intCast(b)) << @as(u6, @intCast(self.nbits));
|
||||
self.nbits += nb;
|
||||
if (self.nbits < 48)
|
||||
return;
|
||||
|
||||
var n = self.nbytes;
|
||||
std.mem.writeInt(u64, self.bytes[n..][0..8], self.bits, .little);
|
||||
n += 6;
|
||||
if (n >= buffer_flush_size) {
|
||||
_ = try self.inner_writer.write(self.bytes[0..n]);
|
||||
n = 0;
|
||||
}
|
||||
self.nbytes = n;
|
||||
self.bits >>= 48;
|
||||
self.nbits -= 48;
|
||||
}
|
||||
|
||||
pub fn writeBytes(self: *Self, bytes: []const u8) Error!void {
|
||||
var n = self.nbytes;
|
||||
if (self.nbits & 7 != 0) {
|
||||
return error.UnfinishedBits;
|
||||
}
|
||||
while (self.nbits != 0) {
|
||||
self.bytes[n] = @as(u8, @truncate(self.bits));
|
||||
self.bits >>= 8;
|
||||
self.nbits -= 8;
|
||||
n += 1;
|
||||
}
|
||||
if (n != 0) {
|
||||
_ = try self.inner_writer.write(self.bytes[0..n]);
|
||||
}
|
||||
self.nbytes = 0;
|
||||
_ = try self.inner_writer.write(bytes);
|
||||
}
|
||||
};
|
||||
}
|
||||
706
lib/std/compress/flate/block_writer.zig
Normal file
706
lib/std/compress/flate/block_writer.zig
Normal file
@@ -0,0 +1,706 @@
|
||||
const std = @import("std");
|
||||
const io = std.io;
|
||||
const assert = std.debug.assert;
|
||||
|
||||
const hc = @import("huffman_encoder.zig");
|
||||
const consts = @import("consts.zig").huffman;
|
||||
const Token = @import("Token.zig");
|
||||
const BitWriter = @import("bit_writer.zig").BitWriter;
|
||||
|
||||
pub fn blockWriter(writer: anytype) BlockWriter(@TypeOf(writer)) {
|
||||
return BlockWriter(@TypeOf(writer)).init(writer);
|
||||
}
|
||||
|
||||
/// Accepts list of tokens, decides what is best block type to write. What block
|
||||
/// type will provide best compression. Writes header and body of the block.
|
||||
///
|
||||
pub fn BlockWriter(comptime WriterType: type) type {
|
||||
const BitWriterType = BitWriter(WriterType);
|
||||
return struct {
|
||||
const codegen_order = consts.codegen_order;
|
||||
const end_code_mark = 255;
|
||||
const Self = @This();
|
||||
|
||||
pub const Error = BitWriterType.Error;
|
||||
bit_writer: BitWriterType,
|
||||
|
||||
codegen_freq: [consts.codegen_code_count]u16 = undefined,
|
||||
literal_freq: [consts.max_num_lit]u16 = undefined,
|
||||
distance_freq: [consts.distance_code_count]u16 = undefined,
|
||||
codegen: [consts.max_num_lit + consts.distance_code_count + 1]u8 = undefined,
|
||||
literal_encoding: hc.LiteralEncoder = .{},
|
||||
distance_encoding: hc.DistanceEncoder = .{},
|
||||
codegen_encoding: hc.CodegenEncoder = .{},
|
||||
fixed_literal_encoding: hc.LiteralEncoder,
|
||||
fixed_distance_encoding: hc.DistanceEncoder,
|
||||
huff_distance: hc.DistanceEncoder,
|
||||
|
||||
pub fn init(writer: WriterType) Self {
|
||||
return .{
|
||||
.bit_writer = BitWriterType.init(writer),
|
||||
.fixed_literal_encoding = hc.fixedLiteralEncoder(),
|
||||
.fixed_distance_encoding = hc.fixedDistanceEncoder(),
|
||||
.huff_distance = hc.huffmanDistanceEncoder(),
|
||||
};
|
||||
}
|
||||
|
||||
/// Flush intrenal bit buffer to the writer.
|
||||
/// Should be called only when bit stream is at byte boundary.
|
||||
///
|
||||
/// That is after final block; when last byte could be incomplete or
|
||||
/// after stored block; which is aligned to the byte bounday (it has x
|
||||
/// padding bits after first 3 bits).
|
||||
pub fn flush(self: *Self) Error!void {
|
||||
try self.bit_writer.flush();
|
||||
}
|
||||
|
||||
pub fn setWriter(self: *Self, new_writer: WriterType) void {
|
||||
self.bit_writer.setWriter(new_writer);
|
||||
}
|
||||
|
||||
fn writeCode(self: *Self, c: hc.HuffCode) Error!void {
|
||||
try self.bit_writer.writeBits(c.code, c.len);
|
||||
}
|
||||
|
||||
// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
|
||||
// the literal and distance lengths arrays (which are concatenated into a single
|
||||
// array). This method generates that run-length encoding.
|
||||
//
|
||||
// The result is written into the codegen array, and the frequencies
|
||||
// of each code is written into the codegen_freq array.
|
||||
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
|
||||
// information. Code bad_code is an end marker
|
||||
//
|
||||
// num_literals: The number of literals in literal_encoding
|
||||
// num_distances: The number of distances in distance_encoding
|
||||
// lit_enc: The literal encoder to use
|
||||
// dist_enc: The distance encoder to use
|
||||
fn generateCodegen(
|
||||
self: *Self,
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
lit_enc: *hc.LiteralEncoder,
|
||||
dist_enc: *hc.DistanceEncoder,
|
||||
) void {
|
||||
for (self.codegen_freq, 0..) |_, i| {
|
||||
self.codegen_freq[i] = 0;
|
||||
}
|
||||
|
||||
// Note that we are using codegen both as a temporary variable for holding
|
||||
// a copy of the frequencies, and as the place where we put the result.
|
||||
// This is fine because the output is always shorter than the input used
|
||||
// so far.
|
||||
var codegen = &self.codegen; // cache
|
||||
// Copy the concatenated code sizes to codegen. Put a marker at the end.
|
||||
var cgnl = codegen[0..num_literals];
|
||||
for (cgnl, 0..) |_, i| {
|
||||
cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len));
|
||||
}
|
||||
|
||||
cgnl = codegen[num_literals .. num_literals + num_distances];
|
||||
for (cgnl, 0..) |_, i| {
|
||||
cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len));
|
||||
}
|
||||
codegen[num_literals + num_distances] = end_code_mark;
|
||||
|
||||
var size = codegen[0];
|
||||
var count: i32 = 1;
|
||||
var out_index: u32 = 0;
|
||||
var in_index: u32 = 1;
|
||||
while (size != end_code_mark) : (in_index += 1) {
|
||||
// INVARIANT: We have seen "count" copies of size that have not yet
|
||||
// had output generated for them.
|
||||
const next_size = codegen[in_index];
|
||||
if (next_size == size) {
|
||||
count += 1;
|
||||
continue;
|
||||
}
|
||||
// We need to generate codegen indicating "count" of size.
|
||||
if (size != 0) {
|
||||
codegen[out_index] = size;
|
||||
out_index += 1;
|
||||
self.codegen_freq[size] += 1;
|
||||
count -= 1;
|
||||
while (count >= 3) {
|
||||
var n: i32 = 6;
|
||||
if (n > count) {
|
||||
n = count;
|
||||
}
|
||||
codegen[out_index] = 16;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(n - 3));
|
||||
out_index += 1;
|
||||
self.codegen_freq[16] += 1;
|
||||
count -= n;
|
||||
}
|
||||
} else {
|
||||
while (count >= 11) {
|
||||
var n: i32 = 138;
|
||||
if (n > count) {
|
||||
n = count;
|
||||
}
|
||||
codegen[out_index] = 18;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(n - 11));
|
||||
out_index += 1;
|
||||
self.codegen_freq[18] += 1;
|
||||
count -= n;
|
||||
}
|
||||
if (count >= 3) {
|
||||
// 3 <= count <= 10
|
||||
codegen[out_index] = 17;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(count - 3));
|
||||
out_index += 1;
|
||||
self.codegen_freq[17] += 1;
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
count -= 1;
|
||||
while (count >= 0) : (count -= 1) {
|
||||
codegen[out_index] = size;
|
||||
out_index += 1;
|
||||
self.codegen_freq[size] += 1;
|
||||
}
|
||||
// Set up invariant for next time through the loop.
|
||||
size = next_size;
|
||||
count = 1;
|
||||
}
|
||||
// Marker indicating the end of the codegen.
|
||||
codegen[out_index] = end_code_mark;
|
||||
}
|
||||
|
||||
const DynamicSize = struct {
|
||||
size: u32,
|
||||
num_codegens: u32,
|
||||
};
|
||||
|
||||
// dynamicSize returns the size of dynamically encoded data in bits.
|
||||
fn dynamicSize(
|
||||
self: *Self,
|
||||
lit_enc: *hc.LiteralEncoder, // literal encoder
|
||||
dist_enc: *hc.DistanceEncoder, // distance encoder
|
||||
extra_bits: u32,
|
||||
) DynamicSize {
|
||||
var num_codegens = self.codegen_freq.len;
|
||||
while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) {
|
||||
num_codegens -= 1;
|
||||
}
|
||||
const header = 3 + 5 + 5 + 4 + (3 * num_codegens) +
|
||||
self.codegen_encoding.bitLength(self.codegen_freq[0..]) +
|
||||
self.codegen_freq[16] * 2 +
|
||||
self.codegen_freq[17] * 3 +
|
||||
self.codegen_freq[18] * 7;
|
||||
const size = header +
|
||||
lit_enc.bitLength(&self.literal_freq) +
|
||||
dist_enc.bitLength(&self.distance_freq) +
|
||||
extra_bits;
|
||||
|
||||
return DynamicSize{
|
||||
.size = @as(u32, @intCast(size)),
|
||||
.num_codegens = @as(u32, @intCast(num_codegens)),
|
||||
};
|
||||
}
|
||||
|
||||
// fixedSize returns the size of dynamically encoded data in bits.
|
||||
fn fixedSize(self: *Self, extra_bits: u32) u32 {
|
||||
return 3 +
|
||||
self.fixed_literal_encoding.bitLength(&self.literal_freq) +
|
||||
self.fixed_distance_encoding.bitLength(&self.distance_freq) +
|
||||
extra_bits;
|
||||
}
|
||||
|
||||
const StoredSize = struct {
|
||||
size: u32,
|
||||
storable: bool,
|
||||
};
|
||||
|
||||
// storedSizeFits calculates the stored size, including header.
|
||||
// The function returns the size in bits and whether the block
|
||||
// fits inside a single block.
|
||||
fn storedSizeFits(in: ?[]const u8) StoredSize {
|
||||
if (in == null) {
|
||||
return .{ .size = 0, .storable = false };
|
||||
}
|
||||
if (in.?.len <= consts.max_store_block_size) {
|
||||
return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true };
|
||||
}
|
||||
return .{ .size = 0, .storable = false };
|
||||
}
|
||||
|
||||
// Write the header of a dynamic Huffman block to the output stream.
|
||||
//
|
||||
// num_literals: The number of literals specified in codegen
|
||||
// num_distances: The number of distances specified in codegen
|
||||
// num_codegens: The number of codegens used in codegen
|
||||
// eof: Is it the end-of-file? (end of stream)
|
||||
fn dynamicHeader(
|
||||
self: *Self,
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
num_codegens: u32,
|
||||
eof: bool,
|
||||
) Error!void {
|
||||
const first_bits: u32 = if (eof) 5 else 4;
|
||||
try self.bit_writer.writeBits(first_bits, 3);
|
||||
try self.bit_writer.writeBits(num_literals - 257, 5);
|
||||
try self.bit_writer.writeBits(num_distances - 1, 5);
|
||||
try self.bit_writer.writeBits(num_codegens - 4, 4);
|
||||
|
||||
var i: u32 = 0;
|
||||
while (i < num_codegens) : (i += 1) {
|
||||
const value = self.codegen_encoding.codes[codegen_order[i]].len;
|
||||
try self.bit_writer.writeBits(value, 3);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (true) {
|
||||
const code_word: u32 = @as(u32, @intCast(self.codegen[i]));
|
||||
i += 1;
|
||||
if (code_word == end_code_mark) {
|
||||
break;
|
||||
}
|
||||
try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]);
|
||||
|
||||
switch (code_word) {
|
||||
16 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 2);
|
||||
i += 1;
|
||||
},
|
||||
17 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 3);
|
||||
i += 1;
|
||||
},
|
||||
18 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 7);
|
||||
i += 1;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn storedHeader(self: *Self, length: usize, eof: bool) Error!void {
|
||||
assert(length <= 65535);
|
||||
const flag: u32 = if (eof) 1 else 0;
|
||||
try self.bit_writer.writeBits(flag, 3);
|
||||
try self.flush();
|
||||
const l: u16 = @intCast(length);
|
||||
try self.bit_writer.writeBits(l, 16);
|
||||
try self.bit_writer.writeBits(~l, 16);
|
||||
}
|
||||
|
||||
fn fixedHeader(self: *Self, eof: bool) Error!void {
|
||||
// Indicate that we are a fixed Huffman block
|
||||
var value: u32 = 2;
|
||||
if (eof) {
|
||||
value = 3;
|
||||
}
|
||||
try self.bit_writer.writeBits(value, 3);
|
||||
}
|
||||
|
||||
// Write a block of tokens with the smallest encoding. Will choose block type.
|
||||
// The original input can be supplied, and if the huffman encoded data
|
||||
// is larger than the original bytes, the data will be written as a
|
||||
// stored block.
|
||||
// If the input is null, the tokens will always be Huffman encoded.
|
||||
pub fn write(self: *Self, tokens: []const Token, eof: bool, input: ?[]const u8) Error!void {
|
||||
const lit_and_dist = self.indexTokens(tokens);
|
||||
const num_literals = lit_and_dist.num_literals;
|
||||
const num_distances = lit_and_dist.num_distances;
|
||||
|
||||
var extra_bits: u32 = 0;
|
||||
const ret = storedSizeFits(input);
|
||||
const stored_size = ret.size;
|
||||
const storable = ret.storable;
|
||||
|
||||
if (storable) {
|
||||
// We only bother calculating the costs of the extra bits required by
|
||||
// the length of distance fields (which will be the same for both fixed
|
||||
// and dynamic encoding), if we need to compare those two encodings
|
||||
// against stored encoding.
|
||||
var length_code: u16 = Token.length_codes_start + 8;
|
||||
while (length_code < num_literals) : (length_code += 1) {
|
||||
// First eight length codes have extra size = 0.
|
||||
extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) *
|
||||
@as(u32, @intCast(Token.lengthExtraBits(length_code)));
|
||||
}
|
||||
var distance_code: u16 = 4;
|
||||
while (distance_code < num_distances) : (distance_code += 1) {
|
||||
// First four distance codes have extra size = 0.
|
||||
extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) *
|
||||
@as(u32, @intCast(Token.distanceExtraBits(distance_code)));
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out smallest code.
|
||||
// Fixed Huffman baseline.
|
||||
var literal_encoding = &self.fixed_literal_encoding;
|
||||
var distance_encoding = &self.fixed_distance_encoding;
|
||||
var size = self.fixedSize(extra_bits);
|
||||
|
||||
// Dynamic Huffman?
|
||||
var num_codegens: u32 = 0;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
extra_bits,
|
||||
);
|
||||
const dyn_size = dynamic_size.size;
|
||||
num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
if (dyn_size < size) {
|
||||
size = dyn_size;
|
||||
literal_encoding = &self.literal_encoding;
|
||||
distance_encoding = &self.distance_encoding;
|
||||
}
|
||||
|
||||
// Stored bytes?
|
||||
if (storable and stored_size < size) {
|
||||
try self.storedBlock(input.?, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) {
|
||||
try self.fixedHeader(eof);
|
||||
} else {
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
}
|
||||
|
||||
// Write the tokens.
|
||||
try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes);
|
||||
}
|
||||
|
||||
pub fn storedBlock(self: *Self, input: []const u8, eof: bool) Error!void {
|
||||
try self.storedHeader(input.len, eof);
|
||||
try self.bit_writer.writeBytes(input);
|
||||
}
|
||||
|
||||
// writeBlockDynamic encodes a block using a dynamic Huffman table.
|
||||
// This should be used if the symbols used have a disproportionate
|
||||
// histogram distribution.
|
||||
// If input is supplied and the compression savings are below 1/16th of the
|
||||
// input size the block is stored.
|
||||
fn dynamicBlock(
|
||||
self: *Self,
|
||||
tokens: []const Token,
|
||||
eof: bool,
|
||||
input: ?[]const u8,
|
||||
) Error!void {
|
||||
const total_tokens = self.indexTokens(tokens);
|
||||
const num_literals = total_tokens.num_literals;
|
||||
const num_distances = total_tokens.num_distances;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0);
|
||||
const size = dynamic_size.size;
|
||||
const num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
|
||||
const stored_size = storedSizeFits(input);
|
||||
const ssize = stored_size.size;
|
||||
const storable = stored_size.storable;
|
||||
if (storable and ssize < (size + (size >> 4))) {
|
||||
try self.storedBlock(input.?, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Write Huffman table.
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
|
||||
// Write the tokens.
|
||||
try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes);
|
||||
}
|
||||
|
||||
const TotalIndexedTokens = struct {
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
};
|
||||
|
||||
// Indexes a slice of tokens followed by an end_block_marker, and updates
|
||||
// literal_freq and distance_freq, and generates literal_encoding
|
||||
// and distance_encoding.
|
||||
// The number of literal and distance tokens is returned.
|
||||
fn indexTokens(self: *Self, tokens: []const Token) TotalIndexedTokens {
|
||||
var num_literals: u32 = 0;
|
||||
var num_distances: u32 = 0;
|
||||
|
||||
for (self.literal_freq, 0..) |_, i| {
|
||||
self.literal_freq[i] = 0;
|
||||
}
|
||||
for (self.distance_freq, 0..) |_, i| {
|
||||
self.distance_freq[i] = 0;
|
||||
}
|
||||
|
||||
for (tokens) |t| {
|
||||
if (t.kind == Token.Kind.literal) {
|
||||
self.literal_freq[t.literal()] += 1;
|
||||
continue;
|
||||
}
|
||||
self.literal_freq[t.lengthCode()] += 1;
|
||||
self.distance_freq[t.distanceCode()] += 1;
|
||||
}
|
||||
// add end_block_marker token at the end
|
||||
self.literal_freq[consts.end_block_marker] += 1;
|
||||
|
||||
// get the number of literals
|
||||
num_literals = @as(u32, @intCast(self.literal_freq.len));
|
||||
while (self.literal_freq[num_literals - 1] == 0) {
|
||||
num_literals -= 1;
|
||||
}
|
||||
// get the number of distances
|
||||
num_distances = @as(u32, @intCast(self.distance_freq.len));
|
||||
while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) {
|
||||
num_distances -= 1;
|
||||
}
|
||||
if (num_distances == 0) {
|
||||
// We haven't found a single match. If we want to go with the dynamic encoding,
|
||||
// we should count at least one distance to be sure that the distance huffman tree could be encoded.
|
||||
self.distance_freq[0] = 1;
|
||||
num_distances = 1;
|
||||
}
|
||||
self.literal_encoding.generate(&self.literal_freq, 15);
|
||||
self.distance_encoding.generate(&self.distance_freq, 15);
|
||||
return TotalIndexedTokens{
|
||||
.num_literals = num_literals,
|
||||
.num_distances = num_distances,
|
||||
};
|
||||
}
|
||||
|
||||
// Writes a slice of tokens to the output followed by and end_block_marker.
|
||||
// codes for literal and distance encoding must be supplied.
|
||||
fn writeTokens(
|
||||
self: *Self,
|
||||
tokens: []const Token,
|
||||
le_codes: []hc.HuffCode,
|
||||
oe_codes: []hc.HuffCode,
|
||||
) Error!void {
|
||||
for (tokens) |t| {
|
||||
if (t.kind == Token.Kind.literal) {
|
||||
try self.writeCode(le_codes[t.literal()]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Write the length
|
||||
const le = t.lengthEncoding();
|
||||
try self.writeCode(le_codes[le.code]);
|
||||
if (le.extra_bits > 0) {
|
||||
try self.bit_writer.writeBits(le.extra_length, le.extra_bits);
|
||||
}
|
||||
|
||||
// Write the distance
|
||||
const oe = t.distanceEncoding();
|
||||
try self.writeCode(oe_codes[oe.code]);
|
||||
if (oe.extra_bits > 0) {
|
||||
try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits);
|
||||
}
|
||||
}
|
||||
// add end_block_marker at the end
|
||||
try self.writeCode(le_codes[consts.end_block_marker]);
|
||||
}
|
||||
|
||||
// Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes
|
||||
// if the results only gains very little from compression.
|
||||
pub fn huffmanBlock(self: *Self, input: []const u8, eof: bool) Error!void {
|
||||
// Add everything as literals
|
||||
histogram(input, &self.literal_freq);
|
||||
|
||||
self.literal_freq[consts.end_block_marker] = 1;
|
||||
|
||||
const num_literals = consts.end_block_marker + 1;
|
||||
self.distance_freq[0] = 1;
|
||||
const num_distances = 1;
|
||||
|
||||
self.literal_encoding.generate(&self.literal_freq, 15);
|
||||
|
||||
// Figure out smallest code.
|
||||
// Always use dynamic Huffman or Store
|
||||
var num_codegens: u32 = 0;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.huff_distance,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0);
|
||||
const size = dynamic_size.size;
|
||||
num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
const stored_size_ret = storedSizeFits(input);
|
||||
const ssize = stored_size_ret.size;
|
||||
const storable = stored_size_ret.storable;
|
||||
|
||||
if (storable and ssize < (size + (size >> 4))) {
|
||||
try self.storedBlock(input, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
const encoding = self.literal_encoding.codes[0..257];
|
||||
|
||||
for (input) |t| {
|
||||
const c = encoding[t];
|
||||
try self.bit_writer.writeBits(c.code, c.len);
|
||||
}
|
||||
try self.writeCode(encoding[consts.end_block_marker]);
|
||||
}
|
||||
|
||||
// histogram accumulates a histogram of b in h.
|
||||
fn histogram(b: []const u8, h: *[286]u16) void {
|
||||
// Clear histogram
|
||||
for (h, 0..) |_, i| {
|
||||
h[i] = 0;
|
||||
}
|
||||
|
||||
var lh = h.*[0..256];
|
||||
for (b) |t| {
|
||||
lh[t] += 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// tests
|
||||
const expect = std.testing.expect;
|
||||
const fmt = std.fmt;
|
||||
const testing = std.testing;
|
||||
const ArrayList = std.ArrayList;
|
||||
|
||||
const TestCase = @import("testdata/block_writer.zig").TestCase;
|
||||
const testCases = @import("testdata/block_writer.zig").testCases;
|
||||
|
||||
// tests if the writeBlock encoding has changed.
|
||||
test "flate.BlockWriter write" {
|
||||
inline for (0..testCases.len) |i| {
|
||||
try testBlock(testCases[i], .write_block);
|
||||
}
|
||||
}
|
||||
|
||||
// tests if the writeBlockDynamic encoding has changed.
|
||||
test "flate.BlockWriter dynamicBlock" {
|
||||
inline for (0..testCases.len) |i| {
|
||||
try testBlock(testCases[i], .write_dyn_block);
|
||||
}
|
||||
}
|
||||
|
||||
test "flate.BlockWriter huffmanBlock" {
|
||||
inline for (0..testCases.len) |i| {
|
||||
try testBlock(testCases[i], .write_huffman_block);
|
||||
}
|
||||
try testBlock(.{
|
||||
.tokens = &[_]Token{},
|
||||
.input = "huffman-rand-max.input",
|
||||
.want = "huffman-rand-max.{s}.expect",
|
||||
}, .write_huffman_block);
|
||||
}
|
||||
|
||||
const TestFn = enum {
|
||||
write_block,
|
||||
write_dyn_block, // write dynamic block
|
||||
write_huffman_block,
|
||||
|
||||
fn to_s(self: TestFn) []const u8 {
|
||||
return switch (self) {
|
||||
.write_block => "wb",
|
||||
.write_dyn_block => "dyn",
|
||||
.write_huffman_block => "huff",
|
||||
};
|
||||
}
|
||||
|
||||
fn write(
|
||||
comptime self: TestFn,
|
||||
bw: anytype,
|
||||
tok: []const Token,
|
||||
input: ?[]const u8,
|
||||
final: bool,
|
||||
) !void {
|
||||
switch (self) {
|
||||
.write_block => try bw.write(tok, final, input),
|
||||
.write_dyn_block => try bw.dynamicBlock(tok, final, input),
|
||||
.write_huffman_block => try bw.huffmanBlock(input.?, final),
|
||||
}
|
||||
try bw.flush();
|
||||
}
|
||||
};
|
||||
|
||||
// testBlock tests a block against its references
|
||||
//
|
||||
// size
|
||||
// 64K [file-name].input - input non compressed file
|
||||
// 8.1K [file-name].golden -
|
||||
// 78 [file-name].dyn.expect - output with writeBlockDynamic
|
||||
// 78 [file-name].wb.expect - output with writeBlock
|
||||
// 8.1K [file-name].huff.expect - output with writeBlockHuff
|
||||
// 78 [file-name].dyn.expect-noinput - output with writeBlockDynamic when input is null
|
||||
// 78 [file-name].wb.expect-noinput - output with writeBlock when input is null
|
||||
//
|
||||
// wb - writeBlock
|
||||
// dyn - writeBlockDynamic
|
||||
// huff - writeBlockHuff
|
||||
//
|
||||
fn testBlock(comptime tc: TestCase, comptime tfn: TestFn) !void {
|
||||
if (tc.input.len != 0 and tc.want.len != 0) {
|
||||
const want_name = comptime fmt.comptimePrint(tc.want, .{tfn.to_s()});
|
||||
const input = @embedFile("testdata/block_writer/" ++ tc.input);
|
||||
const want = @embedFile("testdata/block_writer/" ++ want_name);
|
||||
try testWriteBlock(tfn, input, want, tc.tokens);
|
||||
}
|
||||
|
||||
if (tfn == .write_huffman_block) {
|
||||
return;
|
||||
}
|
||||
|
||||
const want_name_no_input = comptime fmt.comptimePrint(tc.want_no_input, .{tfn.to_s()});
|
||||
const want = @embedFile("testdata/block_writer/" ++ want_name_no_input);
|
||||
try testWriteBlock(tfn, null, want, tc.tokens);
|
||||
}
|
||||
|
||||
// Uses writer function `tfn` to write `tokens`, tests that we got `want` as output.
|
||||
fn testWriteBlock(comptime tfn: TestFn, input: ?[]const u8, want: []const u8, tokens: []const Token) !void {
|
||||
var buf = ArrayList(u8).init(testing.allocator);
|
||||
var bw = blockWriter(buf.writer());
|
||||
try tfn.write(&bw, tokens, input, false);
|
||||
var got = buf.items;
|
||||
try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
|
||||
try expect(got[0] & 0b0000_0001 == 0); // bfinal is not set
|
||||
//
|
||||
// Test if the writer produces the same output after reset.
|
||||
buf.deinit();
|
||||
buf = ArrayList(u8).init(testing.allocator);
|
||||
defer buf.deinit();
|
||||
bw.setWriter(buf.writer());
|
||||
|
||||
try tfn.write(&bw, tokens, input, true);
|
||||
try bw.flush();
|
||||
got = buf.items;
|
||||
|
||||
try expect(got[0] & 1 == 1); // bfinal is set
|
||||
buf.items[0] &= 0b1111_1110; // remove bfinal bit, so we can run test slices
|
||||
try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
|
||||
}
|
||||
49
lib/std/compress/flate/consts.zig
Normal file
49
lib/std/compress/flate/consts.zig
Normal file
@@ -0,0 +1,49 @@
|
||||
pub const deflate = struct {
|
||||
// Number of tokens to accumlate in deflate before starting block encoding.
|
||||
//
|
||||
// In zlib this depends on memlevel: 6 + memlevel, where default memlevel is
|
||||
// 8 and max 9 that gives 14 or 15 bits.
|
||||
pub const tokens = 1 << 15;
|
||||
};
|
||||
|
||||
pub const match = struct {
|
||||
pub const base_length = 3; // smallest match length per the RFC section 3.2.5
|
||||
pub const min_length = 4; // min length used in this algorithm
|
||||
pub const max_length = 258;
|
||||
|
||||
pub const min_distance = 1;
|
||||
pub const max_distance = 32768;
|
||||
};
|
||||
|
||||
pub const history = struct {
|
||||
pub const len = match.max_distance;
|
||||
};
|
||||
|
||||
pub const lookup = struct {
|
||||
pub const bits = 15;
|
||||
pub const len = 1 << bits;
|
||||
pub const shift = 32 - bits;
|
||||
};
|
||||
|
||||
pub const huffman = struct {
|
||||
// The odd order in which the codegen code sizes are written.
|
||||
pub const codegen_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
|
||||
// The number of codegen codes.
|
||||
pub const codegen_code_count = 19;
|
||||
|
||||
// The largest distance code.
|
||||
pub const distance_code_count = 30;
|
||||
|
||||
// Maximum number of literals.
|
||||
pub const max_num_lit = 286;
|
||||
|
||||
// Max number of frequencies used for a Huffman Code
|
||||
// Possible lengths are codegen_code_count (19), distance_code_count (30) and max_num_lit (286).
|
||||
// The largest of these is max_num_lit.
|
||||
pub const max_num_frequencies = max_num_lit;
|
||||
|
||||
// Biggest block size for uncompressed block.
|
||||
pub const max_store_block_size = 65535;
|
||||
// The special code used to mark the end of a block.
|
||||
pub const end_block_marker = 256;
|
||||
};
|
||||
207
lib/std/compress/flate/container.zig
Normal file
207
lib/std/compress/flate/container.zig
Normal file
@@ -0,0 +1,207 @@
|
||||
//! Container of the deflate bit stream body. Container adds header before
|
||||
//! deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
|
||||
//! no footer, raw bit stream).
|
||||
//!
|
||||
//! Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes
|
||||
//! addler 32 checksum.
|
||||
//!
|
||||
//! Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes
|
||||
//! crc32 checksum and 4 bytes of uncompressed data length.
|
||||
//!
|
||||
//!
|
||||
//! rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4
|
||||
//! rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5
|
||||
//!
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
pub const Container = enum {
|
||||
raw, // no header or footer
|
||||
gzip, // gzip header and footer
|
||||
zlib, // zlib header and footer
|
||||
|
||||
pub fn size(w: Container) usize {
|
||||
return headerSize(w) + footerSize(w);
|
||||
}
|
||||
|
||||
pub fn headerSize(w: Container) usize {
|
||||
return switch (w) {
|
||||
.gzip => 10,
|
||||
.zlib => 2,
|
||||
.raw => 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn footerSize(w: Container) usize {
|
||||
return switch (w) {
|
||||
.gzip => 8,
|
||||
.zlib => 4,
|
||||
.raw => 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub const list = [_]Container{ .raw, .gzip, .zlib };
|
||||
|
||||
pub const Error = error{
|
||||
BadGzipHeader,
|
||||
BadZlibHeader,
|
||||
WrongGzipChecksum,
|
||||
WrongGzipSize,
|
||||
WrongZlibChecksum,
|
||||
};
|
||||
|
||||
pub fn writeHeader(comptime wrap: Container, writer: anytype) !void {
|
||||
switch (wrap) {
|
||||
.gzip => {
|
||||
// GZIP 10 byte header (https://datatracker.ietf.org/doc/html/rfc1952#page-5):
|
||||
// - ID1 (IDentification 1), always 0x1f
|
||||
// - ID2 (IDentification 2), always 0x8b
|
||||
// - CM (Compression Method), always 8 = deflate
|
||||
// - FLG (Flags), all set to 0
|
||||
// - 4 bytes, MTIME (Modification time), not used, all set to zero
|
||||
// - XFL (eXtra FLags), all set to zero
|
||||
// - OS (Operating System), 03 = Unix
|
||||
const gzipHeader = [_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 };
|
||||
try writer.writeAll(&gzipHeader);
|
||||
},
|
||||
.zlib => {
|
||||
// ZLIB has a two-byte header (https://datatracker.ietf.org/doc/html/rfc1950#page-4):
|
||||
// 1st byte:
|
||||
// - First four bits is the CINFO (compression info), which is 7 for the default deflate window size.
|
||||
// - The next four bits is the CM (compression method), which is 8 for deflate.
|
||||
// 2nd byte:
|
||||
// - Two bits is the FLEVEL (compression level). Values are: 0=fastest, 1=fast, 2=default, 3=best.
|
||||
// - The next bit, FDICT, is set if a dictionary is given.
|
||||
// - The final five FCHECK bits form a mod-31 checksum.
|
||||
//
|
||||
// CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100
|
||||
const zlibHeader = [_]u8{ 0x78, 0b10_0_11100 };
|
||||
try writer.writeAll(&zlibHeader);
|
||||
},
|
||||
.raw => {},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn writeFooter(comptime wrap: Container, hasher: *Hasher(wrap), writer: anytype) !void {
|
||||
var bits: [4]u8 = undefined;
|
||||
switch (wrap) {
|
||||
.gzip => {
|
||||
// GZIP 8 bytes footer
|
||||
// - 4 bytes, CRC32 (CRC-32)
|
||||
// - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
|
||||
std.mem.writeInt(u32, &bits, hasher.chksum(), .little);
|
||||
try writer.writeAll(&bits);
|
||||
|
||||
std.mem.writeInt(u32, &bits, hasher.bytesRead(), .little);
|
||||
try writer.writeAll(&bits);
|
||||
},
|
||||
.zlib => {
|
||||
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
|
||||
// 4 bytes of ADLER32 (Adler-32 checksum)
|
||||
// Checksum value of the uncompressed data (excluding any
|
||||
// dictionary data) computed according to Adler-32
|
||||
// algorithm.
|
||||
std.mem.writeInt(u32, &bits, hasher.chksum(), .big);
|
||||
try writer.writeAll(&bits);
|
||||
},
|
||||
.raw => {},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parseHeader(comptime wrap: Container, reader: anytype) !void {
|
||||
switch (wrap) {
|
||||
.gzip => try parseGzipHeader(reader),
|
||||
.zlib => try parseZlibHeader(reader),
|
||||
.raw => {},
|
||||
}
|
||||
}
|
||||
|
||||
fn parseGzipHeader(reader: anytype) !void {
|
||||
const magic1 = try reader.read(u8);
|
||||
const magic2 = try reader.read(u8);
|
||||
const method = try reader.read(u8);
|
||||
const flags = try reader.read(u8);
|
||||
try reader.skipBytes(6); // mtime(4), xflags, os
|
||||
if (magic1 != 0x1f or magic2 != 0x8b or method != 0x08)
|
||||
return error.BadGzipHeader;
|
||||
// Flags description: https://www.rfc-editor.org/rfc/rfc1952.html#page-5
|
||||
if (flags != 0) {
|
||||
if (flags & 0b0000_0100 != 0) { // FEXTRA
|
||||
const extra_len = try reader.read(u16);
|
||||
try reader.skipBytes(extra_len);
|
||||
}
|
||||
if (flags & 0b0000_1000 != 0) { // FNAME
|
||||
try reader.skipStringZ();
|
||||
}
|
||||
if (flags & 0b0001_0000 != 0) { // FCOMMENT
|
||||
try reader.skipStringZ();
|
||||
}
|
||||
if (flags & 0b0000_0010 != 0) { // FHCRC
|
||||
try reader.skipBytes(2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parseZlibHeader(reader: anytype) !void {
|
||||
const cm = try reader.read(u4);
|
||||
const cinfo = try reader.read(u4);
|
||||
_ = try reader.read(u8);
|
||||
if (cm != 8 or cinfo > 7) {
|
||||
return error.BadZlibHeader;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parseFooter(comptime wrap: Container, hasher: *Hasher(wrap), reader: anytype) !void {
|
||||
switch (wrap) {
|
||||
.gzip => {
|
||||
if (try reader.read(u32) != hasher.chksum()) return error.WrongGzipChecksum;
|
||||
if (try reader.read(u32) != hasher.bytesRead()) return error.WrongGzipSize;
|
||||
},
|
||||
.zlib => {
|
||||
const chksum: u32 = @byteSwap(hasher.chksum());
|
||||
if (try reader.read(u32) != chksum) return error.WrongZlibChecksum;
|
||||
},
|
||||
.raw => {},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn Hasher(comptime wrap: Container) type {
|
||||
const HasherType = switch (wrap) {
|
||||
.gzip => std.hash.Crc32,
|
||||
.zlib => std.hash.Adler32,
|
||||
.raw => struct {
|
||||
pub fn init() @This() {
|
||||
return .{};
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
return struct {
|
||||
hasher: HasherType = HasherType.init(),
|
||||
bytes: usize = 0,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn update(self: *Self, buf: []const u8) void {
|
||||
switch (wrap) {
|
||||
.raw => {},
|
||||
else => {
|
||||
self.hasher.update(buf);
|
||||
self.bytes += buf.len;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn chksum(self: *Self) u32 {
|
||||
switch (wrap) {
|
||||
.raw => return 0,
|
||||
else => return self.hasher.final(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn bytesRead(self: *Self) u32 {
|
||||
return @truncate(self.bytes);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
748
lib/std/compress/flate/deflate.zig
Normal file
748
lib/std/compress/flate/deflate.zig
Normal file
@@ -0,0 +1,748 @@
|
||||
const std = @import("std");
|
||||
const io = std.io;
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
const expect = testing.expect;
|
||||
const print = std.debug.print;
|
||||
|
||||
const Token = @import("Token.zig");
|
||||
const consts = @import("consts.zig");
|
||||
const BlockWriter = @import("block_writer.zig").BlockWriter;
|
||||
const Container = @import("container.zig").Container;
|
||||
const SlidingWindow = @import("SlidingWindow.zig");
|
||||
const Lookup = @import("Lookup.zig");
|
||||
|
||||
pub const Options = struct {
|
||||
level: Level = .default,
|
||||
};
|
||||
|
||||
/// Trades between speed and compression size.
|
||||
/// Starts with level 4: in [zlib](https://github.com/madler/zlib/blob/abd3d1a28930f89375d4b41408b39f6c1be157b2/deflate.c#L115C1-L117C43)
|
||||
/// levels 1-3 are using different algorithm to perform faster but with less
|
||||
/// compression. That is not implemented here.
|
||||
pub const Level = enum(u4) {
|
||||
// zig fmt: off
|
||||
fast = 0xb, level_4 = 4,
|
||||
level_5 = 5,
|
||||
default = 0xc, level_6 = 6,
|
||||
level_7 = 7,
|
||||
level_8 = 8,
|
||||
best = 0xd, level_9 = 9,
|
||||
// zig fmt: on
|
||||
};
|
||||
|
||||
/// Algorithm knobs for each level.
|
||||
const LevelArgs = struct {
|
||||
good: u16, // Do less lookups if we already have match of this length.
|
||||
nice: u16, // Stop looking for better match if we found match with at least this length.
|
||||
lazy: u16, // Don't do lazy match find if got match with at least this length.
|
||||
chain: u16, // How many lookups for previous match to perform.
|
||||
|
||||
pub fn get(level: Level) LevelArgs {
|
||||
// zig fmt: off
|
||||
return switch (level) {
|
||||
.fast, .level_4 => .{ .good = 4, .lazy = 4, .nice = 16, .chain = 16 },
|
||||
.level_5 => .{ .good = 8, .lazy = 16, .nice = 32, .chain = 32 },
|
||||
.default, .level_6 => .{ .good = 8, .lazy = 16, .nice = 128, .chain = 128 },
|
||||
.level_7 => .{ .good = 8, .lazy = 32, .nice = 128, .chain = 256 },
|
||||
.level_8 => .{ .good = 32, .lazy = 128, .nice = 258, .chain = 1024 },
|
||||
.best, .level_9 => .{ .good = 32, .lazy = 258, .nice = 258, .chain = 4096 },
|
||||
};
|
||||
// zig fmt: on
|
||||
}
|
||||
};
|
||||
|
||||
/// Compress plain data from reader into compressed stream written to writer.
|
||||
pub fn compress(comptime container: Container, reader: anytype, writer: anytype, options: Options) !void {
|
||||
var c = try compressor(container, writer, options);
|
||||
try c.compress(reader);
|
||||
try c.finish();
|
||||
}
|
||||
|
||||
/// Create compressor for writer type.
|
||||
pub fn compressor(comptime container: Container, writer: anytype, options: Options) !Compressor(
|
||||
container,
|
||||
@TypeOf(writer),
|
||||
) {
|
||||
return try Compressor(container, @TypeOf(writer)).init(writer, options);
|
||||
}
|
||||
|
||||
/// Compressor type.
|
||||
pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
|
||||
const TokenWriterType = BlockWriter(WriterType);
|
||||
return Deflate(container, WriterType, TokenWriterType);
|
||||
}
|
||||
|
||||
/// Default compression algorithm. Has two steps: tokenization and token
|
||||
/// encoding.
|
||||
///
|
||||
/// Tokenization takes uncompressed input stream and produces list of tokens.
|
||||
/// Each token can be literal (byte of data) or match (backrefernce to previous
|
||||
/// data with length and distance). Tokenization accumulators 32K tokens, when
|
||||
/// full or `flush` is called tokens are passed to the `block_writer`. Level
|
||||
/// defines how hard (how slow) it tries to find match.
|
||||
///
|
||||
/// Block writer will decide which type of deflate block to write (stored, fixed,
|
||||
/// dynamic) and encode tokens to the output byte stream. Client has to call
|
||||
/// `finish` to write block with the final bit set.
|
||||
///
|
||||
/// Container defines type of header and footer which can be gzip, zlib or raw.
|
||||
/// They all share same deflate body. Raw has no header or footer just deflate
|
||||
/// body.
|
||||
///
|
||||
/// Compression algorithm explained in rfc-1951 (slightly edited for this case):
|
||||
///
|
||||
/// The compressor uses a chained hash table `lookup` to find duplicated
|
||||
/// strings, using a hash function that operates on 4-byte sequences. At any
|
||||
/// given point during compression, let XYZW be the next 4 input bytes
|
||||
/// (lookahead) to be examined (not necessarily all different, of course).
|
||||
/// First, the compressor examines the hash chain for XYZW. If the chain is
|
||||
/// empty, the compressor simply writes out X as a literal byte and advances
|
||||
/// one byte in the input. If the hash chain is not empty, indicating that the
|
||||
/// sequence XYZW (or, if we are unlucky, some other 4 bytes with the same
|
||||
/// hash function value) has occurred recently, the compressor compares all
|
||||
/// strings on the XYZW hash chain with the actual input data sequence
|
||||
/// starting at the current point, and selects the longest match.
|
||||
///
|
||||
/// To improve overall compression, the compressor defers the selection of
|
||||
/// matches ("lazy matching"): after a match of length N has been found, the
|
||||
/// compressor searches for a longer match starting at the next input byte. If
|
||||
/// it finds a longer match, it truncates the previous match to a length of
|
||||
/// one (thus producing a single literal byte) and then emits the longer
|
||||
/// match. Otherwise, it emits the original match, and, as described above,
|
||||
/// advances N bytes before continuing.
|
||||
///
|
||||
///
|
||||
/// Allocates statically ~400K (192K lookup, 128K tokens, 64K window).
|
||||
///
|
||||
/// Deflate function accepts BlockWriterType so we can change that in test to test
|
||||
/// just tokenization part.
|
||||
///
|
||||
fn Deflate(comptime container: Container, comptime WriterType: type, comptime BlockWriterType: type) type {
|
||||
return struct {
|
||||
lookup: Lookup = .{},
|
||||
win: SlidingWindow = .{},
|
||||
tokens: Tokens = .{},
|
||||
wrt: WriterType,
|
||||
block_writer: BlockWriterType,
|
||||
level: LevelArgs,
|
||||
hasher: container.Hasher() = .{},
|
||||
|
||||
// Match and literal at the previous position.
|
||||
// Used for lazy match finding in processWindow.
|
||||
prev_match: ?Token = null,
|
||||
prev_literal: ?u8 = null,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(wrt: WriterType, options: Options) !Self {
|
||||
const self = Self{
|
||||
.wrt = wrt,
|
||||
.block_writer = BlockWriterType.init(wrt),
|
||||
.level = LevelArgs.get(options.level),
|
||||
};
|
||||
try container.writeHeader(self.wrt);
|
||||
return self;
|
||||
}
|
||||
|
||||
const FlushOption = enum { none, flush, final };
|
||||
|
||||
// Process data in window and create tokens. If token buffer is full
|
||||
// flush tokens to the token writer. In the case of `flush` or `final`
|
||||
// option it will process all data from the window. In the `none` case
|
||||
// it will preserve some data for the next match.
|
||||
fn tokenize(self: *Self, flush_opt: FlushOption) !void {
|
||||
// flush - process all data from window
|
||||
const should_flush = (flush_opt != .none);
|
||||
|
||||
// While there is data in active lookahead buffer.
|
||||
while (self.win.activeLookahead(should_flush)) |lh| {
|
||||
var step: u16 = 1; // 1 in the case of literal, match length otherwise
|
||||
const pos: u16 = self.win.pos();
|
||||
const literal = lh[0]; // literal at current position
|
||||
const min_len: u16 = if (self.prev_match) |m| m.length() else 0;
|
||||
|
||||
// Try to find match at least min_len long.
|
||||
if (self.findMatch(pos, lh, min_len)) |match| {
|
||||
// Found better match than previous.
|
||||
try self.addPrevLiteral();
|
||||
|
||||
// Is found match length good enough?
|
||||
if (match.length() >= self.level.lazy) {
|
||||
// Don't try to lazy find better match, use this.
|
||||
step = try self.addMatch(match);
|
||||
} else {
|
||||
// Store this match.
|
||||
self.prev_literal = literal;
|
||||
self.prev_match = match;
|
||||
}
|
||||
} else {
|
||||
// There is no better match at current pos then it was previous.
|
||||
// Write previous match or literal.
|
||||
if (self.prev_match) |m| {
|
||||
// Write match from previous position.
|
||||
step = try self.addMatch(m) - 1; // we already advanced 1 from previous position
|
||||
} else {
|
||||
// No match at previous postition.
|
||||
// Write previous literal if any, and remember this literal.
|
||||
try self.addPrevLiteral();
|
||||
self.prev_literal = literal;
|
||||
}
|
||||
}
|
||||
// Advance window and add hashes.
|
||||
self.windowAdvance(step, lh, pos);
|
||||
}
|
||||
|
||||
if (should_flush) {
|
||||
// In the case of flushing, last few lookahead buffers were smaller then min match len.
|
||||
// So only last literal can be unwritten.
|
||||
assert(self.prev_match == null);
|
||||
try self.addPrevLiteral();
|
||||
self.prev_literal = null;
|
||||
|
||||
try self.flushTokens(flush_opt);
|
||||
}
|
||||
}
|
||||
|
||||
fn windowAdvance(self: *Self, step: u16, lh: []const u8, pos: u16) void {
|
||||
// current position is already added in findMatch
|
||||
self.lookup.bulkAdd(lh[1..], step - 1, pos + 1);
|
||||
self.win.advance(step);
|
||||
}
|
||||
|
||||
// Add previous literal (if any) to the tokens list.
|
||||
fn addPrevLiteral(self: *Self) !void {
|
||||
if (self.prev_literal) |l| try self.addToken(Token.initLiteral(l));
|
||||
}
|
||||
|
||||
// Add match to the tokens list, reset prev pointers.
|
||||
// Returns length of the added match.
|
||||
fn addMatch(self: *Self, m: Token) !u16 {
|
||||
try self.addToken(m);
|
||||
self.prev_literal = null;
|
||||
self.prev_match = null;
|
||||
return m.length();
|
||||
}
|
||||
|
||||
fn addToken(self: *Self, token: Token) !void {
|
||||
self.tokens.add(token);
|
||||
if (self.tokens.full()) try self.flushTokens(.none);
|
||||
}
|
||||
|
||||
// Finds largest match in the history window with the data at current pos.
|
||||
fn findMatch(self: *Self, pos: u16, lh: []const u8, min_len: u16) ?Token {
|
||||
var len: u16 = min_len;
|
||||
// Previous location with the same hash (same 4 bytes).
|
||||
var prev_pos = self.lookup.add(lh, pos);
|
||||
// Last found match.
|
||||
var match: ?Token = null;
|
||||
|
||||
// How much back-references to try, performance knob.
|
||||
var chain: usize = self.level.chain;
|
||||
if (len >= self.level.good) {
|
||||
// If we've got a match that's good enough, only look in 1/4 the chain.
|
||||
chain >>= 2;
|
||||
}
|
||||
|
||||
// Hot path loop!
|
||||
while (prev_pos > 0 and chain > 0) : (chain -= 1) {
|
||||
const distance = pos - prev_pos;
|
||||
if (distance > consts.match.max_distance)
|
||||
break;
|
||||
|
||||
const new_len = self.win.match(prev_pos, pos, len);
|
||||
if (new_len > len) {
|
||||
match = Token.initMatch(@intCast(distance), new_len);
|
||||
if (new_len >= self.level.nice) {
|
||||
// The match is good enough that we don't try to find a better one.
|
||||
return match;
|
||||
}
|
||||
len = new_len;
|
||||
}
|
||||
prev_pos = self.lookup.prev(prev_pos);
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
fn flushTokens(self: *Self, flush_opt: FlushOption) !void {
|
||||
// Pass tokens to the token writer
|
||||
try self.block_writer.write(self.tokens.tokens(), flush_opt == .final, self.win.tokensBuffer());
|
||||
// Stored block ensures byte aligment.
|
||||
// It has 3 bits (final, block_type) and then padding until byte boundary.
|
||||
// After that everyting is aligned to the boundary in the stored block.
|
||||
// Empty stored block is Ob000 + (0-7) bits of padding + 0x00 0x00 0xFF 0xFF.
|
||||
// Last 4 bytes are byte aligned.
|
||||
if (flush_opt == .flush) {
|
||||
try self.block_writer.storedBlock("", false);
|
||||
}
|
||||
if (flush_opt != .none) {
|
||||
// Safe to call only when byte aligned or it is OK to add
|
||||
// padding bits (on last byte of the final block).
|
||||
try self.block_writer.flush();
|
||||
}
|
||||
// Reset internal tokens store.
|
||||
self.tokens.reset();
|
||||
// Notify win that tokens are flushed.
|
||||
self.win.flush();
|
||||
}
|
||||
|
||||
// Slide win and if needed lookup tables.
|
||||
fn slide(self: *Self) void {
|
||||
const n = self.win.slide();
|
||||
self.lookup.slide(n);
|
||||
}
|
||||
|
||||
/// Compresses as much data as possible, stops when the reader becomes
|
||||
/// empty. It will introduce some output latency (reading input without
|
||||
/// producing all output) because some data are still in internal
|
||||
/// buffers.
|
||||
///
|
||||
/// It is up to the caller to call flush (if needed) or finish (required)
|
||||
/// when is need to output any pending data or complete stream.
|
||||
///
|
||||
pub fn compress(self: *Self, reader: anytype) !void {
|
||||
while (true) {
|
||||
// Fill window from reader
|
||||
const buf = self.win.writable();
|
||||
if (buf.len == 0) {
|
||||
try self.tokenize(.none);
|
||||
self.slide();
|
||||
continue;
|
||||
}
|
||||
const n = try reader.readAll(buf);
|
||||
self.hasher.update(buf[0..n]);
|
||||
self.win.written(n);
|
||||
// Process window
|
||||
try self.tokenize(.none);
|
||||
// Exit when no more data in reader
|
||||
if (n < buf.len) break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Flushes internal buffers to the output writer. Outputs empty stored
|
||||
/// block to sync bit stream to the byte boundary, so that the
|
||||
/// decompressor can get all input data available so far.
|
||||
///
|
||||
/// It is useful mainly in compressed network protocols, to ensure that
|
||||
/// deflate bit stream can be used as byte stream. May degrade
|
||||
/// compression so it should be used only when necessary.
|
||||
///
|
||||
/// Completes the current deflate block and follows it with an empty
|
||||
/// stored block that is three zero bits plus filler bits to the next
|
||||
/// byte, followed by four bytes (00 00 ff ff).
|
||||
///
|
||||
pub fn flush(self: *Self) !void {
|
||||
try self.tokenize(.flush);
|
||||
}
|
||||
|
||||
/// Completes deflate bit stream by writing any pending data as deflate
|
||||
/// final deflate block. HAS to be called once all data are written to
|
||||
/// the compressor as a signal that next block has to have final bit
|
||||
/// set.
|
||||
///
|
||||
pub fn finish(self: *Self) !void {
|
||||
try self.tokenize(.final);
|
||||
try container.writeFooter(&self.hasher, self.wrt);
|
||||
}
|
||||
|
||||
/// Use another writer while preserving history. Most probably flush
|
||||
/// should be called on old writer before setting new.
|
||||
pub fn setWriter(self: *Self, new_writer: WriterType) void {
|
||||
self.block_writer.setWriter(new_writer);
|
||||
self.wrt = new_writer;
|
||||
}
|
||||
|
||||
// Writer interface
|
||||
|
||||
pub const Writer = io.Writer(*Self, Error, write);
|
||||
pub const Error = BlockWriterType.Error;
|
||||
|
||||
/// Write `input` of uncompressed data.
|
||||
/// See compress.
|
||||
pub fn write(self: *Self, input: []const u8) !usize {
|
||||
var fbs = io.fixedBufferStream(input);
|
||||
try self.compress(fbs.reader());
|
||||
return input.len;
|
||||
}
|
||||
|
||||
pub fn writer(self: *Self) Writer {
|
||||
return .{ .context = self };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Tokens store
|
||||
const Tokens = struct {
|
||||
list: [consts.deflate.tokens]Token = undefined,
|
||||
pos: usize = 0,
|
||||
|
||||
fn add(self: *Tokens, t: Token) void {
|
||||
self.list[self.pos] = t;
|
||||
self.pos += 1;
|
||||
}
|
||||
|
||||
fn full(self: *Tokens) bool {
|
||||
return self.pos == self.list.len;
|
||||
}
|
||||
|
||||
fn reset(self: *Tokens) void {
|
||||
self.pos = 0;
|
||||
}
|
||||
|
||||
fn tokens(self: *Tokens) []const Token {
|
||||
return self.list[0..self.pos];
|
||||
}
|
||||
};
|
||||
|
||||
/// Creates huffman only deflate blocks. Disables Lempel-Ziv match searching and
|
||||
/// only performs Huffman entropy encoding. Results in faster compression, much
|
||||
/// less memory requirements during compression but bigger compressed sizes.
|
||||
pub const huffman = struct {
|
||||
pub fn compress(comptime container: Container, reader: anytype, writer: anytype) !void {
|
||||
var c = try huffman.compressor(container, writer);
|
||||
try c.compress(reader);
|
||||
try c.finish();
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
|
||||
return SimpleCompressor(.huffman, container, WriterType);
|
||||
}
|
||||
|
||||
pub fn compressor(comptime container: Container, writer: anytype) !huffman.Compressor(container, @TypeOf(writer)) {
|
||||
return try huffman.Compressor(container, @TypeOf(writer)).init(writer);
|
||||
}
|
||||
};
|
||||
|
||||
/// Creates store blocks only. Data are not compressed only packed into deflate
|
||||
/// store blocks. That adds 9 bytes of header for each block. Max stored block
|
||||
/// size is 64K. Block is emitted when flush is called on on finish.
|
||||
pub const store = struct {
|
||||
pub fn compress(comptime container: Container, reader: anytype, writer: anytype) !void {
|
||||
var c = try store.compressor(container, writer);
|
||||
try c.compress(reader);
|
||||
try c.finish();
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
|
||||
return SimpleCompressor(.store, container, WriterType);
|
||||
}
|
||||
|
||||
pub fn compressor(comptime container: Container, writer: anytype) !store.Compressor(container, @TypeOf(writer)) {
|
||||
return try store.Compressor(container, @TypeOf(writer)).init(writer);
|
||||
}
|
||||
};
|
||||
|
||||
const SimpleCompressorKind = enum {
|
||||
huffman,
|
||||
store,
|
||||
};
|
||||
|
||||
fn simpleCompressor(
|
||||
comptime kind: SimpleCompressorKind,
|
||||
comptime container: Container,
|
||||
writer: anytype,
|
||||
) !SimpleCompressor(kind, container, @TypeOf(writer)) {
|
||||
return try SimpleCompressor(kind, container, @TypeOf(writer)).init(writer);
|
||||
}
|
||||
|
||||
fn SimpleCompressor(
|
||||
comptime kind: SimpleCompressorKind,
|
||||
comptime container: Container,
|
||||
comptime WriterType: type,
|
||||
) type {
|
||||
const BlockWriterType = BlockWriter(WriterType);
|
||||
return struct {
|
||||
buffer: [65535]u8 = undefined, // because store blocks are limited to 65535 bytes
|
||||
wp: usize = 0,
|
||||
|
||||
wrt: WriterType,
|
||||
block_writer: BlockWriterType,
|
||||
hasher: container.Hasher() = .{},
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(wrt: WriterType) !Self {
|
||||
const self = Self{
|
||||
.wrt = wrt,
|
||||
.block_writer = BlockWriterType.init(wrt),
|
||||
};
|
||||
try container.writeHeader(self.wrt);
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn flush(self: *Self) !void {
|
||||
try self.flushBuffer(false);
|
||||
try self.block_writer.storedBlock("", false);
|
||||
try self.block_writer.flush();
|
||||
}
|
||||
|
||||
pub fn finish(self: *Self) !void {
|
||||
try self.flushBuffer(true);
|
||||
try self.block_writer.flush();
|
||||
try container.writeFooter(&self.hasher, self.wrt);
|
||||
}
|
||||
|
||||
fn flushBuffer(self: *Self, final: bool) !void {
|
||||
const buf = self.buffer[0..self.wp];
|
||||
switch (kind) {
|
||||
.huffman => try self.block_writer.huffmanBlock(buf, final),
|
||||
.store => try self.block_writer.storedBlock(buf, final),
|
||||
}
|
||||
self.wp = 0;
|
||||
}
|
||||
|
||||
// Writes all data from the input reader of uncompressed data.
|
||||
// It is up to the caller to call flush or finish if there is need to
|
||||
// output compressed blocks.
|
||||
pub fn compress(self: *Self, reader: anytype) !void {
|
||||
while (true) {
|
||||
// read from rdr into buffer
|
||||
const buf = self.buffer[self.wp..];
|
||||
if (buf.len == 0) {
|
||||
try self.flushBuffer(false);
|
||||
continue;
|
||||
}
|
||||
const n = try reader.readAll(buf);
|
||||
self.hasher.update(buf[0..n]);
|
||||
self.wp += n;
|
||||
if (n < buf.len) break; // no more data in reader
|
||||
}
|
||||
}
|
||||
|
||||
// Writer interface
|
||||
|
||||
pub const Writer = io.Writer(*Self, Error, write);
|
||||
pub const Error = BlockWriterType.Error;
|
||||
|
||||
// Write `input` of uncompressed data.
|
||||
pub fn write(self: *Self, input: []const u8) !usize {
|
||||
var fbs = io.fixedBufferStream(input);
|
||||
try self.compress(fbs.reader());
|
||||
return input.len;
|
||||
}
|
||||
|
||||
pub fn writer(self: *Self) Writer {
|
||||
return .{ .context = self };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const builtin = @import("builtin");
|
||||
|
||||
test "flate.Deflate tokenization" {
|
||||
if (builtin.target.cpu.arch == .wasm32) return error.SkipZigTest;
|
||||
|
||||
const L = Token.initLiteral;
|
||||
const M = Token.initMatch;
|
||||
|
||||
const cases = [_]struct {
|
||||
data: []const u8,
|
||||
tokens: []const Token,
|
||||
}{
|
||||
.{
|
||||
.data = "Blah blah blah blah blah!",
|
||||
.tokens = &[_]Token{ L('B'), L('l'), L('a'), L('h'), L(' '), L('b'), M(5, 18), L('!') },
|
||||
},
|
||||
.{
|
||||
.data = "ABCDEABCD ABCDEABCD",
|
||||
.tokens = &[_]Token{
|
||||
L('A'), L('B'), L('C'), L('D'), L('E'), L('A'), L('B'), L('C'), L('D'), L(' '),
|
||||
L('A'), M(10, 8),
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
for (cases) |c| {
|
||||
inline for (Container.list) |container| { // for each wrapping
|
||||
|
||||
var cw = io.countingWriter(io.null_writer);
|
||||
const cww = cw.writer();
|
||||
var df = try Deflate(container, @TypeOf(cww), TestTokenWriter).init(cww, .{});
|
||||
|
||||
_ = try df.write(c.data);
|
||||
try df.flush();
|
||||
|
||||
// df.token_writer.show();
|
||||
try expect(df.block_writer.pos == c.tokens.len); // number of tokens written
|
||||
try testing.expectEqualSlices(Token, df.block_writer.get(), c.tokens); // tokens match
|
||||
|
||||
try testing.expectEqual(container.headerSize(), cw.bytes_written);
|
||||
try df.finish();
|
||||
try testing.expectEqual(container.size(), cw.bytes_written);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tests that tokens writen are equal to expected token list.
|
||||
const TestTokenWriter = struct {
|
||||
const Self = @This();
|
||||
|
||||
pos: usize = 0,
|
||||
actual: [128]Token = undefined,
|
||||
|
||||
pub fn init(_: anytype) Self {
|
||||
return .{};
|
||||
}
|
||||
pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void {
|
||||
for (tokens) |t| {
|
||||
self.actual[self.pos] = t;
|
||||
self.pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {}
|
||||
|
||||
pub fn get(self: *Self) []Token {
|
||||
return self.actual[0..self.pos];
|
||||
}
|
||||
|
||||
pub fn show(self: *Self) void {
|
||||
print("\n", .{});
|
||||
for (self.get()) |t| {
|
||||
t.show();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn flush(_: *Self) !void {}
|
||||
};
|
||||
|
||||
test "flate deflate file tokenization" {
|
||||
if (builtin.target.cpu.arch == .wasm32) return error.SkipZigTest;
|
||||
|
||||
const levels = [_]Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
|
||||
const cases = [_]struct {
|
||||
data: []const u8, // uncompressed content
|
||||
// expected number of tokens producet in deflate tokenization
|
||||
tokens_count: [levels.len]usize = .{0} ** levels.len,
|
||||
}{
|
||||
.{
|
||||
.data = @embedFile("testdata/rfc1951.txt"),
|
||||
.tokens_count = .{ 7675, 7672, 7599, 7594, 7598, 7599 },
|
||||
},
|
||||
|
||||
.{
|
||||
.data = @embedFile("testdata/block_writer/huffman-null-max.input"),
|
||||
.tokens_count = .{ 257, 257, 257, 257, 257, 257 },
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("testdata/block_writer/huffman-pi.input"),
|
||||
.tokens_count = .{ 2570, 2564, 2564, 2564, 2564, 2564 },
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("testdata/block_writer/huffman-text.input"),
|
||||
.tokens_count = .{ 235, 234, 234, 234, 234, 234 },
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("testdata/fuzz/roundtrip1.input"),
|
||||
.tokens_count = .{ 333, 331, 331, 331, 331, 331 },
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("testdata/fuzz/roundtrip2.input"),
|
||||
.tokens_count = .{ 334, 334, 334, 334, 334, 334 },
|
||||
},
|
||||
};
|
||||
|
||||
for (cases) |case| { // for each case
|
||||
const data = case.data;
|
||||
|
||||
for (levels, 0..) |level, i| { // for each compression level
|
||||
var original = io.fixedBufferStream(data);
|
||||
|
||||
// buffer for decompressed data
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
const writer = al.writer();
|
||||
|
||||
// create compressor
|
||||
const WriterType = @TypeOf(writer);
|
||||
const TokenWriter = TokenDecoder(@TypeOf(writer));
|
||||
var cmp = try Deflate(.raw, WriterType, TokenWriter).init(writer, .{ .level = level });
|
||||
|
||||
// Stream uncompressed `orignal` data to the compressor. It will
|
||||
// produce tokens list and pass that list to the TokenDecoder. This
|
||||
// TokenDecoder uses CircularBuffer from inflate to convert list of
|
||||
// tokens back to the uncompressed stream.
|
||||
try cmp.compress(original.reader());
|
||||
try cmp.flush();
|
||||
const expected_count = case.tokens_count[i];
|
||||
const actual = cmp.block_writer.tokens_count;
|
||||
if (expected_count == 0) {
|
||||
print("actual token count {d}\n", .{actual});
|
||||
} else {
|
||||
try testing.expectEqual(expected_count, actual);
|
||||
}
|
||||
|
||||
try testing.expectEqual(data.len, al.items.len);
|
||||
try testing.expectEqualSlices(u8, data, al.items);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn TokenDecoder(comptime WriterType: type) type {
|
||||
return struct {
|
||||
const CircularBuffer = @import("CircularBuffer.zig");
|
||||
hist: CircularBuffer = .{},
|
||||
wrt: WriterType,
|
||||
tokens_count: usize = 0,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(wrt: WriterType) Self {
|
||||
return .{ .wrt = wrt };
|
||||
}
|
||||
|
||||
pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void {
|
||||
self.tokens_count += tokens.len;
|
||||
for (tokens) |t| {
|
||||
switch (t.kind) {
|
||||
.literal => self.hist.write(t.literal()),
|
||||
.match => try self.hist.writeMatch(t.length(), t.distance()),
|
||||
}
|
||||
if (self.hist.free() < 285) try self.flushWin();
|
||||
}
|
||||
try self.flushWin();
|
||||
}
|
||||
|
||||
pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {}
|
||||
|
||||
fn flushWin(self: *Self) !void {
|
||||
while (true) {
|
||||
const buf = self.hist.read();
|
||||
if (buf.len == 0) break;
|
||||
try self.wrt.writeAll(buf);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn flush(_: *Self) !void {}
|
||||
};
|
||||
}
|
||||
|
||||
test "flate.Deflate store simple compressor" {
|
||||
const data = "Hello world!";
|
||||
const expected = [_]u8{
|
||||
0x1, // block type 0, final bit set
|
||||
0xc, 0x0, // len = 12
|
||||
0xf3, 0xff, // ~len
|
||||
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', //
|
||||
//0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21,
|
||||
};
|
||||
|
||||
var fbs = std.io.fixedBufferStream(data);
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
|
||||
var cmp = try store.compressor(.raw, al.writer());
|
||||
try cmp.compress(fbs.reader());
|
||||
try cmp.finish();
|
||||
try testing.expectEqualSlices(u8, &expected, al.items);
|
||||
|
||||
fbs.reset();
|
||||
try al.resize(0);
|
||||
|
||||
// huffman only compresoor will also emit store block for this small sample
|
||||
var hc = try huffman.compressor(.raw, al.writer());
|
||||
try hc.compress(fbs.reader());
|
||||
try hc.finish();
|
||||
try testing.expectEqualSlices(u8, &expected, al.items);
|
||||
}
|
||||
308
lib/std/compress/flate/huffman_decoder.zig
Normal file
308
lib/std/compress/flate/huffman_decoder.zig
Normal file
@@ -0,0 +1,308 @@
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
|
||||
pub const Symbol = packed struct {
|
||||
pub const Kind = enum(u2) {
|
||||
literal,
|
||||
end_of_block,
|
||||
match,
|
||||
};
|
||||
|
||||
symbol: u8 = 0, // symbol from alphabet
|
||||
code_bits: u4 = 0, // number of bits in code 0-15
|
||||
kind: Kind = .literal,
|
||||
|
||||
code: u16 = 0, // huffman code of the symbol
|
||||
next: u16 = 0, // pointer to the next symbol in linked list
|
||||
// it is safe to use 0 as null pointer, when sorted 0 has shortest code and fits into lookup
|
||||
|
||||
// Sorting less than function.
|
||||
pub fn asc(_: void, a: Symbol, b: Symbol) bool {
|
||||
if (a.code_bits == b.code_bits) {
|
||||
if (a.kind == b.kind) {
|
||||
return a.symbol < b.symbol;
|
||||
}
|
||||
return @intFromEnum(a.kind) < @intFromEnum(b.kind);
|
||||
}
|
||||
return a.code_bits < b.code_bits;
|
||||
}
|
||||
};
|
||||
|
||||
pub const LiteralDecoder = HuffmanDecoder(286, 15, 9);
|
||||
pub const DistanceDecoder = HuffmanDecoder(30, 15, 9);
|
||||
pub const CodegenDecoder = HuffmanDecoder(19, 7, 7);
|
||||
|
||||
pub const Error = error{
|
||||
InvalidCode,
|
||||
OversubscribedHuffmanTree,
|
||||
IncompleteHuffmanTree,
|
||||
MissingEndOfBlockCode,
|
||||
};
|
||||
|
||||
/// Creates huffman tree codes from list of code lengths (in `build`).
|
||||
///
|
||||
/// `find` then finds symbol for code bits. Code can be any length between 1 and
|
||||
/// 15 bits. When calling `find` we don't know how many bits will be used to
|
||||
/// find symbol. When symbol is returned it has code_bits field which defines
|
||||
/// how much we should advance in bit stream.
|
||||
///
|
||||
/// Lookup table is used to map 15 bit int to symbol. Same symbol is written
|
||||
/// many times in this table; 32K places for 286 (at most) symbols.
|
||||
/// Small lookup table is optimization for faster search.
|
||||
/// It is variation of the algorithm explained in [zlib](https://github.com/madler/zlib/blob/643e17b7498d12ab8d15565662880579692f769d/doc/algorithm.txt#L92)
|
||||
/// with difference that we here use statically allocated arrays.
|
||||
///
|
||||
fn HuffmanDecoder(
|
||||
comptime alphabet_size: u16,
|
||||
comptime max_code_bits: u4,
|
||||
comptime lookup_bits: u4,
|
||||
) type {
|
||||
const lookup_shift = max_code_bits - lookup_bits;
|
||||
|
||||
return struct {
|
||||
// all symbols in alaphabet, sorted by code_len, symbol
|
||||
symbols: [alphabet_size]Symbol = undefined,
|
||||
// lookup table code -> symbol
|
||||
lookup: [1 << lookup_bits]Symbol = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
/// Generates symbols and lookup tables from list of code lens for each symbol.
|
||||
pub fn generate(self: *Self, lens: []const u4) !void {
|
||||
try checkCompletnes(lens);
|
||||
|
||||
// init alphabet with code_bits
|
||||
for (self.symbols, 0..) |_, i| {
|
||||
const cb: u4 = if (i < lens.len) lens[i] else 0;
|
||||
self.symbols[i] = if (i < 256)
|
||||
.{ .kind = .literal, .symbol = @intCast(i), .code_bits = cb }
|
||||
else if (i == 256)
|
||||
.{ .kind = .end_of_block, .symbol = 0xff, .code_bits = cb }
|
||||
else
|
||||
.{ .kind = .match, .symbol = @intCast(i - 257), .code_bits = cb };
|
||||
}
|
||||
std.sort.heap(Symbol, &self.symbols, {}, Symbol.asc);
|
||||
|
||||
// reset lookup table
|
||||
for (0..self.lookup.len) |i| {
|
||||
self.lookup[i] = .{};
|
||||
}
|
||||
|
||||
// assign code to symbols
|
||||
// reference: https://youtu.be/9_YEGLe33NA?list=PLU4IQLU9e_OrY8oASHx0u3IXAL9TOdidm&t=2639
|
||||
var code: u16 = 0;
|
||||
var idx: u16 = 0;
|
||||
for (&self.symbols, 0..) |*sym, pos| {
|
||||
//print("sym: {}\n", .{sym});
|
||||
if (sym.code_bits == 0) continue; // skip unused
|
||||
sym.code = code;
|
||||
|
||||
const next_code = code + (@as(u16, 1) << (max_code_bits - sym.code_bits));
|
||||
const next_idx = next_code >> lookup_shift;
|
||||
|
||||
if (next_idx > self.lookup.len or idx >= self.lookup.len) break;
|
||||
if (sym.code_bits <= lookup_bits) {
|
||||
// fill small lookup table
|
||||
for (idx..next_idx) |j|
|
||||
self.lookup[j] = sym.*;
|
||||
} else {
|
||||
// insert into linked table starting at root
|
||||
const root = &self.lookup[idx];
|
||||
const root_next = root.next;
|
||||
root.next = @intCast(pos);
|
||||
sym.next = root_next;
|
||||
}
|
||||
|
||||
idx = next_idx;
|
||||
code = next_code;
|
||||
}
|
||||
//print("decoder generate, code: {d}, idx: {d}\n", .{ code, idx });
|
||||
}
|
||||
|
||||
/// Given the list of code lengths check that it represents a canonical
|
||||
/// Huffman code for n symbols.
|
||||
///
|
||||
/// Reference: https://github.com/madler/zlib/blob/5c42a230b7b468dff011f444161c0145b5efae59/contrib/puff/puff.c#L340
|
||||
fn checkCompletnes(lens: []const u4) !void {
|
||||
if (alphabet_size == 286)
|
||||
if (lens[256] == 0) return error.MissingEndOfBlockCode;
|
||||
|
||||
var count = [_]u16{0} ** (@as(usize, max_code_bits) + 1);
|
||||
var max: usize = 0;
|
||||
for (lens) |n| {
|
||||
if (n == 0) continue;
|
||||
if (n > max) max = n;
|
||||
count[n] += 1;
|
||||
}
|
||||
if (max == 0) // emtpy tree
|
||||
return;
|
||||
|
||||
// check for an over-subscribed or incomplete set of lengths
|
||||
var left: usize = 1; // one possible code of zero length
|
||||
for (1..count.len) |len| {
|
||||
left <<= 1; // one more bit, double codes left
|
||||
if (count[len] > left)
|
||||
return error.OversubscribedHuffmanTree;
|
||||
left -= count[len]; // deduct count from possible codes
|
||||
}
|
||||
if (left > 0) { // left > 0 means incomplete
|
||||
// incomplete code ok only for single length 1 code
|
||||
if (max_code_bits > 7 and max == count[0] + count[1]) return;
|
||||
return error.IncompleteHuffmanTree;
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds symbol for lookup table code.
|
||||
pub fn find(self: *Self, code: u16) !Symbol {
|
||||
// try to find in lookup table
|
||||
const idx = code >> lookup_shift;
|
||||
const sym = self.lookup[idx];
|
||||
if (sym.code_bits != 0) return sym;
|
||||
// if not use linked list of symbols with same prefix
|
||||
return self.findLinked(code, sym.next);
|
||||
}
|
||||
|
||||
inline fn findLinked(self: *Self, code: u16, start: u16) !Symbol {
|
||||
var pos = start;
|
||||
while (pos > 0) {
|
||||
const sym = self.symbols[pos];
|
||||
const shift = max_code_bits - sym.code_bits;
|
||||
// compare code_bits number of upper bits
|
||||
if ((code ^ sym.code) >> shift == 0) return sym;
|
||||
pos = sym.next;
|
||||
}
|
||||
return error.InvalidCode;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test "flate.HuffmanDecoder init/find" {
|
||||
// example data from: https://youtu.be/SJPvNi4HrWQ?t=8423
|
||||
const code_lens = [_]u4{ 4, 3, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 2 };
|
||||
var h: CodegenDecoder = .{};
|
||||
try h.generate(&code_lens);
|
||||
|
||||
const expected = [_]struct {
|
||||
sym: Symbol,
|
||||
code: u16,
|
||||
}{
|
||||
.{
|
||||
.code = 0b00_00000,
|
||||
.sym = .{ .symbol = 3, .code_bits = 2 },
|
||||
},
|
||||
.{
|
||||
.code = 0b01_00000,
|
||||
.sym = .{ .symbol = 18, .code_bits = 2 },
|
||||
},
|
||||
.{
|
||||
.code = 0b100_0000,
|
||||
.sym = .{ .symbol = 1, .code_bits = 3 },
|
||||
},
|
||||
.{
|
||||
.code = 0b101_0000,
|
||||
.sym = .{ .symbol = 4, .code_bits = 3 },
|
||||
},
|
||||
.{
|
||||
.code = 0b110_0000,
|
||||
.sym = .{ .symbol = 17, .code_bits = 3 },
|
||||
},
|
||||
.{
|
||||
.code = 0b1110_000,
|
||||
.sym = .{ .symbol = 0, .code_bits = 4 },
|
||||
},
|
||||
.{
|
||||
.code = 0b1111_000,
|
||||
.sym = .{ .symbol = 16, .code_bits = 4 },
|
||||
},
|
||||
};
|
||||
|
||||
// unused symbols
|
||||
for (0..12) |i| {
|
||||
try testing.expectEqual(0, h.symbols[i].code_bits);
|
||||
}
|
||||
// used, from index 12
|
||||
for (expected, 12..) |e, i| {
|
||||
try testing.expectEqual(e.sym.symbol, h.symbols[i].symbol);
|
||||
try testing.expectEqual(e.sym.code_bits, h.symbols[i].code_bits);
|
||||
const sym_from_code = try h.find(e.code);
|
||||
try testing.expectEqual(e.sym.symbol, sym_from_code.symbol);
|
||||
}
|
||||
|
||||
// All possible codes for each symbol.
|
||||
// Lookup table has 126 elements, to cover all possible 7 bit codes.
|
||||
for (0b0000_000..0b0100_000) |c| // 0..32 (32)
|
||||
try testing.expectEqual(3, (try h.find(@intCast(c))).symbol);
|
||||
|
||||
for (0b0100_000..0b1000_000) |c| // 32..64 (32)
|
||||
try testing.expectEqual(18, (try h.find(@intCast(c))).symbol);
|
||||
|
||||
for (0b1000_000..0b1010_000) |c| // 64..80 (16)
|
||||
try testing.expectEqual(1, (try h.find(@intCast(c))).symbol);
|
||||
|
||||
for (0b1010_000..0b1100_000) |c| // 80..96 (16)
|
||||
try testing.expectEqual(4, (try h.find(@intCast(c))).symbol);
|
||||
|
||||
for (0b1100_000..0b1110_000) |c| // 96..112 (16)
|
||||
try testing.expectEqual(17, (try h.find(@intCast(c))).symbol);
|
||||
|
||||
for (0b1110_000..0b1111_000) |c| // 112..120 (8)
|
||||
try testing.expectEqual(0, (try h.find(@intCast(c))).symbol);
|
||||
|
||||
for (0b1111_000..0b1_0000_000) |c| // 120...128 (8)
|
||||
try testing.expectEqual(16, (try h.find(@intCast(c))).symbol);
|
||||
}
|
||||
|
||||
const print = std.debug.print;
|
||||
const assert = std.debug.assert;
|
||||
const expect = std.testing.expect;
|
||||
|
||||
test "flate.HuffmanDecoder encode/decode literals" {
|
||||
const LiteralEncoder = @import("huffman_encoder.zig").LiteralEncoder;
|
||||
|
||||
for (1..286) |j| { // for all different number of codes
|
||||
var enc: LiteralEncoder = .{};
|
||||
// create freqencies
|
||||
var freq = [_]u16{0} ** 286;
|
||||
freq[256] = 1; // ensure we have end of block code
|
||||
for (&freq, 1..) |*f, i| {
|
||||
if (i % j == 0)
|
||||
f.* = @intCast(i);
|
||||
}
|
||||
|
||||
// encoder from freqencies
|
||||
enc.generate(&freq, 15);
|
||||
|
||||
// get code_lens from encoder
|
||||
var code_lens = [_]u4{0} ** 286;
|
||||
for (code_lens, 0..) |_, i| {
|
||||
code_lens[i] = @intCast(enc.codes[i].len);
|
||||
}
|
||||
// generate decoder from code lens
|
||||
var dec: LiteralDecoder = .{};
|
||||
try dec.generate(&code_lens);
|
||||
|
||||
// expect decoder code to match original encoder code
|
||||
for (dec.symbols) |s| {
|
||||
if (s.code_bits == 0) continue;
|
||||
const c_code: u16 = @bitReverse(@as(u15, @intCast(s.code)));
|
||||
const symbol: u16 = switch (s.kind) {
|
||||
.literal => s.symbol,
|
||||
.end_of_block => 256,
|
||||
.match => @as(u16, s.symbol) + 257,
|
||||
};
|
||||
|
||||
const c = enc.codes[symbol];
|
||||
try expect(c.code == c_code);
|
||||
}
|
||||
|
||||
// find each symbol by code
|
||||
for (enc.codes) |c| {
|
||||
if (c.len == 0) continue;
|
||||
|
||||
const s_code: u15 = @bitReverse(@as(u15, @intCast(c.code)));
|
||||
const s = try dec.find(s_code);
|
||||
try expect(s.code == s_code);
|
||||
try expect(s.code_bits == c.len);
|
||||
}
|
||||
}
|
||||
}
|
||||
536
lib/std/compress/flate/huffman_encoder.zig
Normal file
536
lib/std/compress/flate/huffman_encoder.zig
Normal file
@@ -0,0 +1,536 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const math = std.math;
|
||||
const mem = std.mem;
|
||||
const sort = std.sort;
|
||||
const testing = std.testing;
|
||||
|
||||
const consts = @import("consts.zig").huffman;
|
||||
|
||||
const LiteralNode = struct {
|
||||
literal: u16,
|
||||
freq: u16,
|
||||
};
|
||||
|
||||
// Describes the state of the constructed tree for a given depth.
|
||||
const LevelInfo = struct {
|
||||
// Our level. for better printing
|
||||
level: u32,
|
||||
|
||||
// The frequency of the last node at this level
|
||||
last_freq: u32,
|
||||
|
||||
// The frequency of the next character to add to this level
|
||||
next_char_freq: u32,
|
||||
|
||||
// The frequency of the next pair (from level below) to add to this level.
|
||||
// Only valid if the "needed" value of the next lower level is 0.
|
||||
next_pair_freq: u32,
|
||||
|
||||
// The number of chains remaining to generate for this level before moving
|
||||
// up to the next level
|
||||
needed: u32,
|
||||
};
|
||||
|
||||
// hcode is a huffman code with a bit code and bit length.
|
||||
pub const HuffCode = struct {
|
||||
code: u16 = 0,
|
||||
len: u16 = 0,
|
||||
|
||||
// set sets the code and length of an hcode.
|
||||
fn set(self: *HuffCode, code: u16, length: u16) void {
|
||||
self.len = length;
|
||||
self.code = code;
|
||||
}
|
||||
};
|
||||
|
||||
pub fn HuffmanEncoder(comptime size: usize) type {
|
||||
return struct {
|
||||
codes: [size]HuffCode = undefined,
|
||||
// Reusable buffer with the longest possible frequency table.
|
||||
freq_cache: [consts.max_num_frequencies + 1]LiteralNode = undefined,
|
||||
bit_count: [17]u32 = undefined,
|
||||
lns: []LiteralNode = undefined, // sorted by literal, stored to avoid repeated allocation in generate
|
||||
lfs: []LiteralNode = undefined, // sorted by frequency, stored to avoid repeated allocation in generate
|
||||
|
||||
const Self = @This();
|
||||
|
||||
// Update this Huffman Code object to be the minimum code for the specified frequency count.
|
||||
//
|
||||
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
|
||||
// max_bits The maximum number of bits to use for any literal.
|
||||
pub fn generate(self: *Self, freq: []u16, max_bits: u32) void {
|
||||
var list = self.freq_cache[0 .. freq.len + 1];
|
||||
// Number of non-zero literals
|
||||
var count: u32 = 0;
|
||||
// Set list to be the set of all non-zero literals and their frequencies
|
||||
for (freq, 0..) |f, i| {
|
||||
if (f != 0) {
|
||||
list[count] = LiteralNode{ .literal = @as(u16, @intCast(i)), .freq = f };
|
||||
count += 1;
|
||||
} else {
|
||||
list[count] = LiteralNode{ .literal = 0x00, .freq = 0 };
|
||||
self.codes[i].len = 0;
|
||||
}
|
||||
}
|
||||
list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 };
|
||||
|
||||
list = list[0..count];
|
||||
if (count <= 2) {
|
||||
// Handle the small cases here, because they are awkward for the general case code. With
|
||||
// two or fewer literals, everything has bit length 1.
|
||||
for (list, 0..) |node, i| {
|
||||
// "list" is in order of increasing literal value.
|
||||
self.codes[node.literal].set(@as(u16, @intCast(i)), 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
self.lfs = list;
|
||||
mem.sort(LiteralNode, self.lfs, {}, byFreq);
|
||||
|
||||
// Get the number of literals for each bit count
|
||||
const bit_count = self.bitCounts(list, max_bits);
|
||||
// And do the assignment
|
||||
self.assignEncodingAndSize(bit_count, list);
|
||||
}
|
||||
|
||||
pub fn bitLength(self: *Self, freq: []u16) u32 {
|
||||
var total: u32 = 0;
|
||||
for (freq, 0..) |f, i| {
|
||||
if (f != 0) {
|
||||
total += @as(u32, @intCast(f)) * @as(u32, @intCast(self.codes[i].len));
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
// Return the number of literals assigned to each bit size in the Huffman encoding
|
||||
//
|
||||
// This method is only called when list.len >= 3
|
||||
// The cases of 0, 1, and 2 literals are handled by special case code.
|
||||
//
|
||||
// list: An array of the literals with non-zero frequencies
|
||||
// and their associated frequencies. The array is in order of increasing
|
||||
// frequency, and has as its last element a special element with frequency
|
||||
// std.math.maxInt(i32)
|
||||
//
|
||||
// max_bits: The maximum number of bits that should be used to encode any literal.
|
||||
// Must be less than 16.
|
||||
//
|
||||
// Returns an integer array in which array[i] indicates the number of literals
|
||||
// that should be encoded in i bits.
|
||||
fn bitCounts(self: *Self, list: []LiteralNode, max_bits_to_use: usize) []u32 {
|
||||
var max_bits = max_bits_to_use;
|
||||
const n = list.len;
|
||||
const max_bits_limit = 16;
|
||||
|
||||
assert(max_bits < max_bits_limit);
|
||||
|
||||
// The tree can't have greater depth than n - 1, no matter what. This
|
||||
// saves a little bit of work in some small cases
|
||||
max_bits = @min(max_bits, n - 1);
|
||||
|
||||
// Create information about each of the levels.
|
||||
// A bogus "Level 0" whose sole purpose is so that
|
||||
// level1.prev.needed == 0. This makes level1.next_pair_freq
|
||||
// be a legitimate value that never gets chosen.
|
||||
var levels: [max_bits_limit]LevelInfo = mem.zeroes([max_bits_limit]LevelInfo);
|
||||
// leaf_counts[i] counts the number of literals at the left
|
||||
// of ancestors of the rightmost node at level i.
|
||||
// leaf_counts[i][j] is the number of literals at the left
|
||||
// of the level j ancestor.
|
||||
var leaf_counts: [max_bits_limit][max_bits_limit]u32 = mem.zeroes([max_bits_limit][max_bits_limit]u32);
|
||||
|
||||
{
|
||||
var level = @as(u32, 1);
|
||||
while (level <= max_bits) : (level += 1) {
|
||||
// For every level, the first two items are the first two characters.
|
||||
// We initialize the levels as if we had already figured this out.
|
||||
levels[level] = LevelInfo{
|
||||
.level = level,
|
||||
.last_freq = list[1].freq,
|
||||
.next_char_freq = list[2].freq,
|
||||
.next_pair_freq = list[0].freq + list[1].freq,
|
||||
.needed = 0,
|
||||
};
|
||||
leaf_counts[level][level] = 2;
|
||||
if (level == 1) {
|
||||
levels[level].next_pair_freq = math.maxInt(i32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We need a total of 2*n - 2 items at top level and have already generated 2.
|
||||
levels[max_bits].needed = 2 * @as(u32, @intCast(n)) - 4;
|
||||
|
||||
{
|
||||
var level = max_bits;
|
||||
while (true) {
|
||||
var l = &levels[level];
|
||||
if (l.next_pair_freq == math.maxInt(i32) and l.next_char_freq == math.maxInt(i32)) {
|
||||
// We've run out of both leafs and pairs.
|
||||
// End all calculations for this level.
|
||||
// To make sure we never come back to this level or any lower level,
|
||||
// set next_pair_freq impossibly large.
|
||||
l.needed = 0;
|
||||
levels[level + 1].next_pair_freq = math.maxInt(i32);
|
||||
level += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const prev_freq = l.last_freq;
|
||||
if (l.next_char_freq < l.next_pair_freq) {
|
||||
// The next item on this row is a leaf node.
|
||||
const next = leaf_counts[level][level] + 1;
|
||||
l.last_freq = l.next_char_freq;
|
||||
// Lower leaf_counts are the same of the previous node.
|
||||
leaf_counts[level][level] = next;
|
||||
if (next >= list.len) {
|
||||
l.next_char_freq = maxNode().freq;
|
||||
} else {
|
||||
l.next_char_freq = list[next].freq;
|
||||
}
|
||||
} else {
|
||||
// The next item on this row is a pair from the previous row.
|
||||
// next_pair_freq isn't valid until we generate two
|
||||
// more values in the level below
|
||||
l.last_freq = l.next_pair_freq;
|
||||
// Take leaf counts from the lower level, except counts[level] remains the same.
|
||||
@memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
|
||||
levels[l.level - 1].needed = 2;
|
||||
}
|
||||
|
||||
l.needed -= 1;
|
||||
if (l.needed == 0) {
|
||||
// We've done everything we need to do for this level.
|
||||
// Continue calculating one level up. Fill in next_pair_freq
|
||||
// of that level with the sum of the two nodes we've just calculated on
|
||||
// this level.
|
||||
if (l.level == max_bits) {
|
||||
// All done!
|
||||
break;
|
||||
}
|
||||
levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq;
|
||||
level += 1;
|
||||
} else {
|
||||
// If we stole from below, move down temporarily to replenish it.
|
||||
while (levels[level - 1].needed > 0) {
|
||||
level -= 1;
|
||||
if (level == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Somethings is wrong if at the end, the top level is null or hasn't used
|
||||
// all of the leaves.
|
||||
assert(leaf_counts[max_bits][max_bits] == n);
|
||||
|
||||
var bit_count = self.bit_count[0 .. max_bits + 1];
|
||||
var bits: u32 = 1;
|
||||
const counts = &leaf_counts[max_bits];
|
||||
{
|
||||
var level = max_bits;
|
||||
while (level > 0) : (level -= 1) {
|
||||
// counts[level] gives the number of literals requiring at least "bits"
|
||||
// bits to encode.
|
||||
bit_count[bits] = counts[level] - counts[level - 1];
|
||||
bits += 1;
|
||||
if (level == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bit_count;
|
||||
}
|
||||
|
||||
// Look at the leaves and assign them a bit count and an encoding as specified
|
||||
// in RFC 1951 3.2.2
|
||||
fn assignEncodingAndSize(self: *Self, bit_count: []u32, list_arg: []LiteralNode) void {
|
||||
var code = @as(u16, 0);
|
||||
var list = list_arg;
|
||||
|
||||
for (bit_count, 0..) |bits, n| {
|
||||
code <<= 1;
|
||||
if (n == 0 or bits == 0) {
|
||||
continue;
|
||||
}
|
||||
// The literals list[list.len-bits] .. list[list.len-bits]
|
||||
// are encoded using "bits" bits, and get the values
|
||||
// code, code + 1, .... The code values are
|
||||
// assigned in literal order (not frequency order).
|
||||
const chunk = list[list.len - @as(u32, @intCast(bits)) ..];
|
||||
|
||||
self.lns = chunk;
|
||||
mem.sort(LiteralNode, self.lns, {}, byLiteral);
|
||||
|
||||
for (chunk) |node| {
|
||||
self.codes[node.literal] = HuffCode{
|
||||
.code = bitReverse(u16, code, @as(u5, @intCast(n))),
|
||||
.len = @as(u16, @intCast(n)),
|
||||
};
|
||||
code += 1;
|
||||
}
|
||||
list = list[0 .. list.len - @as(u32, @intCast(bits))];
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn maxNode() LiteralNode {
|
||||
return LiteralNode{
|
||||
.literal = math.maxInt(u16),
|
||||
.freq = math.maxInt(u16),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn huffmanEncoder(comptime size: u32) HuffmanEncoder(size) {
|
||||
return .{};
|
||||
}
|
||||
|
||||
pub const LiteralEncoder = HuffmanEncoder(consts.max_num_frequencies);
|
||||
pub const DistanceEncoder = HuffmanEncoder(consts.distance_code_count);
|
||||
pub const CodegenEncoder = HuffmanEncoder(19);
|
||||
|
||||
// Generates a HuffmanCode corresponding to the fixed literal table
|
||||
pub fn fixedLiteralEncoder() LiteralEncoder {
|
||||
var h: LiteralEncoder = undefined;
|
||||
var ch: u16 = 0;
|
||||
|
||||
while (ch < consts.max_num_frequencies) : (ch += 1) {
|
||||
var bits: u16 = undefined;
|
||||
var size: u16 = undefined;
|
||||
switch (ch) {
|
||||
0...143 => {
|
||||
// size 8, 000110000 .. 10111111
|
||||
bits = ch + 48;
|
||||
size = 8;
|
||||
},
|
||||
144...255 => {
|
||||
// size 9, 110010000 .. 111111111
|
||||
bits = ch + 400 - 144;
|
||||
size = 9;
|
||||
},
|
||||
256...279 => {
|
||||
// size 7, 0000000 .. 0010111
|
||||
bits = ch - 256;
|
||||
size = 7;
|
||||
},
|
||||
else => {
|
||||
// size 8, 11000000 .. 11000111
|
||||
bits = ch + 192 - 280;
|
||||
size = 8;
|
||||
},
|
||||
}
|
||||
h.codes[ch] = HuffCode{ .code = bitReverse(u16, bits, @as(u5, @intCast(size))), .len = size };
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
pub fn fixedDistanceEncoder() DistanceEncoder {
|
||||
var h: DistanceEncoder = undefined;
|
||||
for (h.codes, 0..) |_, ch| {
|
||||
h.codes[ch] = HuffCode{ .code = bitReverse(u16, @as(u16, @intCast(ch)), 5), .len = 5 };
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
pub fn huffmanDistanceEncoder() DistanceEncoder {
|
||||
var distance_freq = [1]u16{0} ** consts.distance_code_count;
|
||||
distance_freq[0] = 1;
|
||||
// huff_distance is a static distance encoder used for huffman only encoding.
|
||||
// It can be reused since we will not be encoding distance values.
|
||||
var h: DistanceEncoder = .{};
|
||||
h.generate(distance_freq[0..], 15);
|
||||
return h;
|
||||
}
|
||||
|
||||
fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool {
|
||||
_ = context;
|
||||
return a.literal < b.literal;
|
||||
}
|
||||
|
||||
fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
|
||||
_ = context;
|
||||
if (a.freq == b.freq) {
|
||||
return a.literal < b.literal;
|
||||
}
|
||||
return a.freq < b.freq;
|
||||
}
|
||||
|
||||
test "flate.HuffmanEncoder generate a Huffman code from an array of frequencies" {
|
||||
var freqs: [19]u16 = [_]u16{
|
||||
8, // 0
|
||||
1, // 1
|
||||
1, // 2
|
||||
2, // 3
|
||||
5, // 4
|
||||
10, // 5
|
||||
9, // 6
|
||||
1, // 7
|
||||
0, // 8
|
||||
0, // 9
|
||||
0, // 10
|
||||
0, // 11
|
||||
0, // 12
|
||||
0, // 13
|
||||
0, // 14
|
||||
0, // 15
|
||||
1, // 16
|
||||
3, // 17
|
||||
5, // 18
|
||||
};
|
||||
|
||||
var enc = huffmanEncoder(19);
|
||||
enc.generate(freqs[0..], 7);
|
||||
|
||||
try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..]));
|
||||
|
||||
try testing.expectEqual(@as(usize, 3), enc.codes[0].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[1].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[2].len);
|
||||
try testing.expectEqual(@as(usize, 5), enc.codes[3].len);
|
||||
try testing.expectEqual(@as(usize, 3), enc.codes[4].len);
|
||||
try testing.expectEqual(@as(usize, 2), enc.codes[5].len);
|
||||
try testing.expectEqual(@as(usize, 2), enc.codes[6].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[7].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[8].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[9].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[10].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[11].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[12].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[13].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[14].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[15].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[16].len);
|
||||
try testing.expectEqual(@as(usize, 5), enc.codes[17].len);
|
||||
try testing.expectEqual(@as(usize, 3), enc.codes[18].len);
|
||||
|
||||
try testing.expectEqual(@as(u16, 0x0), enc.codes[5].code);
|
||||
try testing.expectEqual(@as(u16, 0x2), enc.codes[6].code);
|
||||
try testing.expectEqual(@as(u16, 0x1), enc.codes[0].code);
|
||||
try testing.expectEqual(@as(u16, 0x5), enc.codes[4].code);
|
||||
try testing.expectEqual(@as(u16, 0x3), enc.codes[18].code);
|
||||
try testing.expectEqual(@as(u16, 0x7), enc.codes[3].code);
|
||||
try testing.expectEqual(@as(u16, 0x17), enc.codes[17].code);
|
||||
try testing.expectEqual(@as(u16, 0x0f), enc.codes[1].code);
|
||||
try testing.expectEqual(@as(u16, 0x2f), enc.codes[2].code);
|
||||
try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code);
|
||||
try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code);
|
||||
}
|
||||
|
||||
test "flate.HuffmanEncoder generate a Huffman code for the fixed literal table specific to Deflate" {
|
||||
const enc = fixedLiteralEncoder();
|
||||
for (enc.codes) |c| {
|
||||
switch (c.len) {
|
||||
7 => {
|
||||
const v = @bitReverse(@as(u7, @intCast(c.code)));
|
||||
try testing.expect(v <= 0b0010111);
|
||||
},
|
||||
8 => {
|
||||
const v = @bitReverse(@as(u8, @intCast(c.code)));
|
||||
try testing.expect((v >= 0b000110000 and v <= 0b10111111) or
|
||||
(v >= 0b11000000 and v <= 11000111));
|
||||
},
|
||||
9 => {
|
||||
const v = @bitReverse(@as(u9, @intCast(c.code)));
|
||||
try testing.expect(v >= 0b110010000 and v <= 0b111111111);
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "flate.HuffmanEncoder generate a Huffman code for the 30 possible relative distances (LZ77 distances) of Deflate" {
|
||||
const enc = fixedDistanceEncoder();
|
||||
for (enc.codes) |c| {
|
||||
const v = @bitReverse(@as(u5, @intCast(c.code)));
|
||||
try testing.expect(v <= 29);
|
||||
try testing.expect(c.len == 5);
|
||||
}
|
||||
}
|
||||
|
||||
// Reverse bit-by-bit a N-bit code.
|
||||
fn bitReverse(comptime T: type, value: T, n: usize) T {
|
||||
const r = @bitReverse(value);
|
||||
return r >> @as(math.Log2Int(T), @intCast(@typeInfo(T).Int.bits - n));
|
||||
}
|
||||
|
||||
test "flate bitReverse" {
|
||||
const ReverseBitsTest = struct {
|
||||
in: u16,
|
||||
bit_count: u5,
|
||||
out: u16,
|
||||
};
|
||||
|
||||
const reverse_bits_tests = [_]ReverseBitsTest{
|
||||
.{ .in = 1, .bit_count = 1, .out = 1 },
|
||||
.{ .in = 1, .bit_count = 2, .out = 2 },
|
||||
.{ .in = 1, .bit_count = 3, .out = 4 },
|
||||
.{ .in = 1, .bit_count = 4, .out = 8 },
|
||||
.{ .in = 1, .bit_count = 5, .out = 16 },
|
||||
.{ .in = 17, .bit_count = 5, .out = 17 },
|
||||
.{ .in = 257, .bit_count = 9, .out = 257 },
|
||||
.{ .in = 29, .bit_count = 5, .out = 23 },
|
||||
};
|
||||
|
||||
for (reverse_bits_tests) |h| {
|
||||
const v = bitReverse(u16, h.in, h.bit_count);
|
||||
try std.testing.expectEqual(h.out, v);
|
||||
}
|
||||
}
|
||||
|
||||
test "flate.HuffmanEncoder fixedLiteralEncoder codes" {
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
var bw = std.io.bitWriter(.little, al.writer());
|
||||
|
||||
const f = fixedLiteralEncoder();
|
||||
for (f.codes) |c| {
|
||||
try bw.writeBits(c.code, c.len);
|
||||
}
|
||||
try testing.expectEqualSlices(u8, &fixed_codes, al.items);
|
||||
}
|
||||
|
||||
pub const fixed_codes = [_]u8{
|
||||
0b00001100, 0b10001100, 0b01001100, 0b11001100, 0b00101100, 0b10101100, 0b01101100, 0b11101100,
|
||||
0b00011100, 0b10011100, 0b01011100, 0b11011100, 0b00111100, 0b10111100, 0b01111100, 0b11111100,
|
||||
0b00000010, 0b10000010, 0b01000010, 0b11000010, 0b00100010, 0b10100010, 0b01100010, 0b11100010,
|
||||
0b00010010, 0b10010010, 0b01010010, 0b11010010, 0b00110010, 0b10110010, 0b01110010, 0b11110010,
|
||||
0b00001010, 0b10001010, 0b01001010, 0b11001010, 0b00101010, 0b10101010, 0b01101010, 0b11101010,
|
||||
0b00011010, 0b10011010, 0b01011010, 0b11011010, 0b00111010, 0b10111010, 0b01111010, 0b11111010,
|
||||
0b00000110, 0b10000110, 0b01000110, 0b11000110, 0b00100110, 0b10100110, 0b01100110, 0b11100110,
|
||||
0b00010110, 0b10010110, 0b01010110, 0b11010110, 0b00110110, 0b10110110, 0b01110110, 0b11110110,
|
||||
0b00001110, 0b10001110, 0b01001110, 0b11001110, 0b00101110, 0b10101110, 0b01101110, 0b11101110,
|
||||
0b00011110, 0b10011110, 0b01011110, 0b11011110, 0b00111110, 0b10111110, 0b01111110, 0b11111110,
|
||||
0b00000001, 0b10000001, 0b01000001, 0b11000001, 0b00100001, 0b10100001, 0b01100001, 0b11100001,
|
||||
0b00010001, 0b10010001, 0b01010001, 0b11010001, 0b00110001, 0b10110001, 0b01110001, 0b11110001,
|
||||
0b00001001, 0b10001001, 0b01001001, 0b11001001, 0b00101001, 0b10101001, 0b01101001, 0b11101001,
|
||||
0b00011001, 0b10011001, 0b01011001, 0b11011001, 0b00111001, 0b10111001, 0b01111001, 0b11111001,
|
||||
0b00000101, 0b10000101, 0b01000101, 0b11000101, 0b00100101, 0b10100101, 0b01100101, 0b11100101,
|
||||
0b00010101, 0b10010101, 0b01010101, 0b11010101, 0b00110101, 0b10110101, 0b01110101, 0b11110101,
|
||||
0b00001101, 0b10001101, 0b01001101, 0b11001101, 0b00101101, 0b10101101, 0b01101101, 0b11101101,
|
||||
0b00011101, 0b10011101, 0b01011101, 0b11011101, 0b00111101, 0b10111101, 0b01111101, 0b11111101,
|
||||
0b00010011, 0b00100110, 0b01001110, 0b10011010, 0b00111100, 0b01100101, 0b11101010, 0b10110100,
|
||||
0b11101001, 0b00110011, 0b01100110, 0b11001110, 0b10011010, 0b00111101, 0b01100111, 0b11101110,
|
||||
0b10111100, 0b11111001, 0b00001011, 0b00010110, 0b00101110, 0b01011010, 0b10111100, 0b01100100,
|
||||
0b11101001, 0b10110010, 0b11100101, 0b00101011, 0b01010110, 0b10101110, 0b01011010, 0b10111101,
|
||||
0b01100110, 0b11101101, 0b10111010, 0b11110101, 0b00011011, 0b00110110, 0b01101110, 0b11011010,
|
||||
0b10111100, 0b01100101, 0b11101011, 0b10110110, 0b11101101, 0b00111011, 0b01110110, 0b11101110,
|
||||
0b11011010, 0b10111101, 0b01100111, 0b11101111, 0b10111110, 0b11111101, 0b00000111, 0b00001110,
|
||||
0b00011110, 0b00111010, 0b01111100, 0b11100100, 0b11101000, 0b10110001, 0b11100011, 0b00100111,
|
||||
0b01001110, 0b10011110, 0b00111010, 0b01111101, 0b11100110, 0b11101100, 0b10111001, 0b11110011,
|
||||
0b00010111, 0b00101110, 0b01011110, 0b10111010, 0b01111100, 0b11100101, 0b11101010, 0b10110101,
|
||||
0b11101011, 0b00110111, 0b01101110, 0b11011110, 0b10111010, 0b01111101, 0b11100111, 0b11101110,
|
||||
0b10111101, 0b11111011, 0b00001111, 0b00011110, 0b00111110, 0b01111010, 0b11111100, 0b11100100,
|
||||
0b11101001, 0b10110011, 0b11100111, 0b00101111, 0b01011110, 0b10111110, 0b01111010, 0b11111101,
|
||||
0b11100110, 0b11101101, 0b10111011, 0b11110111, 0b00011111, 0b00111110, 0b01111110, 0b11111010,
|
||||
0b11111100, 0b11100101, 0b11101011, 0b10110111, 0b11101111, 0b00111111, 0b01111110, 0b11111110,
|
||||
0b11111010, 0b11111101, 0b11100111, 0b11101111, 0b10111111, 0b11111111, 0b00000000, 0b00100000,
|
||||
0b00001000, 0b00001100, 0b10000001, 0b11000010, 0b11100000, 0b00001000, 0b00100100, 0b00001010,
|
||||
0b10001101, 0b11000001, 0b11100010, 0b11110000, 0b00000100, 0b00100010, 0b10001001, 0b01001100,
|
||||
0b10100001, 0b11010010, 0b11101000, 0b00000011, 0b10000011, 0b01000011, 0b11000011, 0b00100011,
|
||||
0b10100011,
|
||||
};
|
||||
529
lib/std/compress/flate/inflate.zig
Normal file
529
lib/std/compress/flate/inflate.zig
Normal file
@@ -0,0 +1,529 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
|
||||
const hfd = @import("huffman_decoder.zig");
|
||||
const BitReader = @import("bit_reader.zig").BitReader;
|
||||
const CircularBuffer = @import("CircularBuffer.zig");
|
||||
const Container = @import("container.zig").Container;
|
||||
const Token = @import("Token.zig");
|
||||
const codegen_order = @import("consts.zig").huffman.codegen_order;
|
||||
|
||||
/// Decompresses deflate bit stream `reader` and writes uncompressed data to the
|
||||
/// `writer` stream.
|
||||
pub fn decompress(comptime container: Container, reader: anytype, writer: anytype) !void {
|
||||
var d = decompressor(container, reader);
|
||||
try d.decompress(writer);
|
||||
}
|
||||
|
||||
/// Inflate decompressor for the reader type.
|
||||
pub fn decompressor(comptime container: Container, reader: anytype) Inflate(container, @TypeOf(reader)) {
|
||||
return Inflate(container, @TypeOf(reader)).init(reader);
|
||||
}
|
||||
|
||||
/// Inflate decompresses deflate bit stream. Reads compressed data from reader
|
||||
/// provided in init. Decompressed data are stored in internal hist buffer and
|
||||
/// can be accesses iterable `next` or reader interface.
|
||||
///
|
||||
/// Container defines header/footer wrapper around deflate bit stream. Can be
|
||||
/// gzip or zlib.
|
||||
///
|
||||
/// Deflate bit stream consists of multiple blocks. Block can be one of three types:
|
||||
/// * stored, non compressed, max 64k in size
|
||||
/// * fixed, huffman codes are predefined
|
||||
/// * dynamic, huffman code tables are encoded at the block start
|
||||
///
|
||||
/// `step` function runs decoder until internal `hist` buffer is full. Client
|
||||
/// than needs to read that data in order to proceed with decoding.
|
||||
///
|
||||
/// Allocates 74.5K of internal buffers, most important are:
|
||||
/// * 64K for history (CircularBuffer)
|
||||
/// * ~10K huffman decoders (Literal and DistanceDecoder)
|
||||
///
|
||||
pub fn Inflate(comptime container: Container, comptime ReaderType: type) type {
|
||||
return struct {
|
||||
const BitReaderType = BitReader(ReaderType);
|
||||
const F = BitReaderType.flag;
|
||||
|
||||
bits: BitReaderType = .{},
|
||||
hist: CircularBuffer = .{},
|
||||
// Hashes, produces checkusm, of uncompressed data for gzip/zlib footer.
|
||||
hasher: container.Hasher() = .{},
|
||||
|
||||
// dynamic block huffman code decoders
|
||||
lit_dec: hfd.LiteralDecoder = .{}, // literals
|
||||
dst_dec: hfd.DistanceDecoder = .{}, // distances
|
||||
|
||||
// current read state
|
||||
bfinal: u1 = 0,
|
||||
block_type: u2 = 0b11,
|
||||
state: ReadState = .protocol_header,
|
||||
|
||||
const ReadState = enum {
|
||||
protocol_header,
|
||||
block_header,
|
||||
block,
|
||||
protocol_footer,
|
||||
end,
|
||||
};
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub const Error = BitReaderType.Error || Container.Error || hfd.Error || error{
|
||||
InvalidCode,
|
||||
InvalidMatch,
|
||||
InvalidBlockType,
|
||||
WrongStoredBlockNlen,
|
||||
InvalidDynamicBlockHeader,
|
||||
};
|
||||
|
||||
pub fn init(rt: ReaderType) Self {
|
||||
return .{ .bits = BitReaderType.init(rt) };
|
||||
}
|
||||
|
||||
fn blockHeader(self: *Self) !void {
|
||||
self.bfinal = try self.bits.read(u1);
|
||||
self.block_type = try self.bits.read(u2);
|
||||
}
|
||||
|
||||
fn storedBlock(self: *Self) !bool {
|
||||
self.bits.alignToByte(); // skip padding until byte boundary
|
||||
// everyting after this is byte aligned in stored block
|
||||
var len = try self.bits.read(u16);
|
||||
const nlen = try self.bits.read(u16);
|
||||
if (len != ~nlen) return error.WrongStoredBlockNlen;
|
||||
|
||||
while (len > 0) {
|
||||
const buf = self.hist.getWritable(len);
|
||||
try self.bits.readAll(buf);
|
||||
len -= @intCast(buf.len);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
fn fixedBlock(self: *Self) !bool {
|
||||
while (!self.hist.full()) {
|
||||
const code = try self.bits.readFixedCode();
|
||||
switch (code) {
|
||||
0...255 => self.hist.write(@intCast(code)),
|
||||
256 => return true, // end of block
|
||||
257...285 => try self.fixedDistanceCode(@intCast(code - 257)),
|
||||
else => return error.InvalidCode,
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Handles fixed block non literal (length) code.
|
||||
// Length code is followed by 5 bits of distance code.
|
||||
fn fixedDistanceCode(self: *Self, code: u8) !void {
|
||||
try self.bits.fill(5 + 5 + 13);
|
||||
const length = try self.decodeLength(code);
|
||||
const distance = try self.decodeDistance(try self.bits.readF(u5, F.buffered | F.reverse));
|
||||
try self.hist.writeMatch(length, distance);
|
||||
}
|
||||
|
||||
inline fn decodeLength(self: *Self, code: u8) !u16 {
|
||||
if (code > 28) return error.InvalidCode;
|
||||
const ml = Token.matchLength(code);
|
||||
return if (ml.extra_bits == 0) // 0 - 5 extra bits
|
||||
ml.base
|
||||
else
|
||||
ml.base + try self.bits.readN(ml.extra_bits, F.buffered);
|
||||
}
|
||||
|
||||
fn decodeDistance(self: *Self, code: u8) !u16 {
|
||||
if (code > 29) return error.InvalidCode;
|
||||
const md = Token.matchDistance(code);
|
||||
return if (md.extra_bits == 0) // 0 - 13 extra bits
|
||||
md.base
|
||||
else
|
||||
md.base + try self.bits.readN(md.extra_bits, F.buffered);
|
||||
}
|
||||
|
||||
fn dynamicBlockHeader(self: *Self) !void {
|
||||
const hlit: u16 = @as(u16, try self.bits.read(u5)) + 257; // number of ll code entries present - 257
|
||||
const hdist: u16 = @as(u16, try self.bits.read(u5)) + 1; // number of distance code entries - 1
|
||||
const hclen: u8 = @as(u8, try self.bits.read(u4)) + 4; // hclen + 4 code lenths are encoded
|
||||
|
||||
if (hlit > 286 or hdist > 30)
|
||||
return error.InvalidDynamicBlockHeader;
|
||||
|
||||
// lengths for code lengths
|
||||
var cl_lens = [_]u4{0} ** 19;
|
||||
for (0..hclen) |i| {
|
||||
cl_lens[codegen_order[i]] = try self.bits.read(u3);
|
||||
}
|
||||
var cl_dec: hfd.CodegenDecoder = .{};
|
||||
try cl_dec.generate(&cl_lens);
|
||||
|
||||
// literal code lengths
|
||||
var lit_lens = [_]u4{0} ** (286);
|
||||
var pos: usize = 0;
|
||||
while (pos < hlit) {
|
||||
const sym = try cl_dec.find(try self.bits.peekF(u7, F.reverse));
|
||||
try self.bits.shift(sym.code_bits);
|
||||
pos += try self.dynamicCodeLength(sym.symbol, &lit_lens, pos);
|
||||
}
|
||||
if (pos > hlit)
|
||||
return error.InvalidDynamicBlockHeader;
|
||||
|
||||
// distance code lenths
|
||||
var dst_lens = [_]u4{0} ** (30);
|
||||
pos = 0;
|
||||
while (pos < hdist) {
|
||||
const sym = try cl_dec.find(try self.bits.peekF(u7, F.reverse));
|
||||
try self.bits.shift(sym.code_bits);
|
||||
pos += try self.dynamicCodeLength(sym.symbol, &dst_lens, pos);
|
||||
}
|
||||
if (pos > hdist)
|
||||
return error.InvalidDynamicBlockHeader;
|
||||
|
||||
try self.lit_dec.generate(&lit_lens);
|
||||
try self.dst_dec.generate(&dst_lens);
|
||||
}
|
||||
|
||||
// Decode code length symbol to code length. Writes decoded length into
|
||||
// lens slice starting at position pos. Returns number of positions
|
||||
// advanced.
|
||||
fn dynamicCodeLength(self: *Self, code: u16, lens: []u4, pos: usize) !usize {
|
||||
if (pos >= lens.len)
|
||||
return error.InvalidDynamicBlockHeader;
|
||||
|
||||
switch (code) {
|
||||
0...15 => {
|
||||
// Represent code lengths of 0 - 15
|
||||
lens[pos] = @intCast(code);
|
||||
return 1;
|
||||
},
|
||||
16 => {
|
||||
// Copy the previous code length 3 - 6 times.
|
||||
// The next 2 bits indicate repeat length
|
||||
const n: u8 = @as(u8, try self.bits.read(u2)) + 3;
|
||||
if (pos == 0 or pos + n > lens.len)
|
||||
return error.InvalidDynamicBlockHeader;
|
||||
for (0..n) |i| {
|
||||
lens[pos + i] = lens[pos + i - 1];
|
||||
}
|
||||
return n;
|
||||
},
|
||||
// Repeat a code length of 0 for 3 - 10 times. (3 bits of length)
|
||||
17 => return @as(u8, try self.bits.read(u3)) + 3,
|
||||
// Repeat a code length of 0 for 11 - 138 times (7 bits of length)
|
||||
18 => return @as(u8, try self.bits.read(u7)) + 11,
|
||||
else => return error.InvalidDynamicBlockHeader,
|
||||
}
|
||||
}
|
||||
|
||||
// In larger archives most blocks are usually dynamic, so decompression
|
||||
// performance depends on this function.
|
||||
fn dynamicBlock(self: *Self) !bool {
|
||||
// Hot path loop!
|
||||
while (!self.hist.full()) {
|
||||
try self.bits.fill(15); // optimization so other bit reads can be buffered (avoiding one `if` in hot path)
|
||||
const sym = try self.decodeSymbol(&self.lit_dec);
|
||||
|
||||
switch (sym.kind) {
|
||||
.literal => self.hist.write(sym.symbol),
|
||||
.match => { // Decode match backreference <length, distance>
|
||||
try self.bits.fill(5 + 15 + 13); // so we can use buffered reads
|
||||
const length = try self.decodeLength(sym.symbol);
|
||||
const dsm = try self.decodeSymbol(&self.dst_dec);
|
||||
const distance = try self.decodeDistance(dsm.symbol);
|
||||
try self.hist.writeMatch(length, distance);
|
||||
},
|
||||
.end_of_block => return true,
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Peek 15 bits from bits reader (maximum code len is 15 bits). Use
|
||||
// decoder to find symbol for that code. We then know how many bits is
|
||||
// used. Shift bit reader for that much bits, those bits are used. And
|
||||
// return symbol.
|
||||
fn decodeSymbol(self: *Self, decoder: anytype) !hfd.Symbol {
|
||||
const sym = try decoder.find(try self.bits.peekF(u15, F.buffered | F.reverse));
|
||||
try self.bits.shift(sym.code_bits);
|
||||
return sym;
|
||||
}
|
||||
|
||||
fn step(self: *Self) !void {
|
||||
switch (self.state) {
|
||||
.protocol_header => {
|
||||
try container.parseHeader(&self.bits);
|
||||
self.state = .block_header;
|
||||
},
|
||||
.block_header => {
|
||||
try self.blockHeader();
|
||||
self.state = .block;
|
||||
if (self.block_type == 2) try self.dynamicBlockHeader();
|
||||
},
|
||||
.block => {
|
||||
const done = switch (self.block_type) {
|
||||
0 => try self.storedBlock(),
|
||||
1 => try self.fixedBlock(),
|
||||
2 => try self.dynamicBlock(),
|
||||
else => return error.InvalidBlockType,
|
||||
};
|
||||
if (done) {
|
||||
self.state = if (self.bfinal == 1) .protocol_footer else .block_header;
|
||||
}
|
||||
},
|
||||
.protocol_footer => {
|
||||
self.bits.alignToByte();
|
||||
try container.parseFooter(&self.hasher, &self.bits);
|
||||
self.state = .end;
|
||||
},
|
||||
.end => {},
|
||||
}
|
||||
}
|
||||
|
||||
/// Replaces the inner reader with new reader.
|
||||
pub fn setReader(self: *Self, new_reader: ReaderType) void {
|
||||
self.bits.forward_reader = new_reader;
|
||||
if (self.state == .end or self.state == .protocol_footer) {
|
||||
self.state = .protocol_header;
|
||||
}
|
||||
}
|
||||
|
||||
// Reads all compressed data from the internal reader and outputs plain
|
||||
// (uncompressed) data to the provided writer.
|
||||
pub fn decompress(self: *Self, writer: anytype) !void {
|
||||
while (try self.next()) |buf| {
|
||||
try writer.writeAll(buf);
|
||||
}
|
||||
}
|
||||
|
||||
// Iterator interface
|
||||
|
||||
/// Can be used in iterator like loop without memcpy to another buffer:
|
||||
/// while (try inflate.next()) |buf| { ... }
|
||||
pub fn next(self: *Self) Error!?[]const u8 {
|
||||
const out = try self.get(0);
|
||||
if (out.len == 0) return null;
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Returns decompressed data from internal sliding window buffer.
|
||||
/// Returned buffer can be any length between 0 and `limit` bytes. 0
|
||||
/// returned bytes means end of stream reached. With limit=0 returns as
|
||||
/// much data it can. It newer will be more than 65536 bytes, which is
|
||||
/// size of internal buffer.
|
||||
pub fn get(self: *Self, limit: usize) Error![]const u8 {
|
||||
while (true) {
|
||||
const out = self.hist.readAtMost(limit);
|
||||
if (out.len > 0) {
|
||||
self.hasher.update(out);
|
||||
return out;
|
||||
}
|
||||
if (self.state == .end) return out;
|
||||
try self.step();
|
||||
}
|
||||
}
|
||||
|
||||
// Reader interface
|
||||
|
||||
pub const Reader = std.io.Reader(*Self, Error, read);
|
||||
|
||||
/// Returns the number of bytes read. It may be less than buffer.len.
|
||||
/// If the number of bytes read is 0, it means end of stream.
|
||||
/// End of stream is not an error condition.
|
||||
pub fn read(self: *Self, buffer: []u8) Error!usize {
|
||||
const out = try self.get(buffer.len);
|
||||
@memcpy(buffer[0..out.len], out);
|
||||
return out.len;
|
||||
}
|
||||
|
||||
pub fn reader(self: *Self) Reader {
|
||||
return .{ .context = self };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test "flate.Inflate decompress" {
|
||||
const cases = [_]struct {
|
||||
in: []const u8,
|
||||
out: []const u8,
|
||||
}{
|
||||
// non compressed block (type 0)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
|
||||
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
|
||||
},
|
||||
.out = "Hello world\n",
|
||||
},
|
||||
// fixed code block (type 1)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1
|
||||
0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00,
|
||||
},
|
||||
.out = "Hello world\n",
|
||||
},
|
||||
// dynamic block (type 2)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0x3d, 0xc6, 0x39, 0x11, 0x00, 0x00, 0x0c, 0x02, // deflate data block type 2
|
||||
0x30, 0x2b, 0xb5, 0x52, 0x1e, 0xff, 0x96, 0x38,
|
||||
0x16, 0x96, 0x5c, 0x1e, 0x94, 0xcb, 0x6d, 0x01,
|
||||
},
|
||||
.out = "ABCDEABCD ABCDEABCD",
|
||||
},
|
||||
};
|
||||
for (cases) |c| {
|
||||
var fb = std.io.fixedBufferStream(c.in);
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
|
||||
try decompress(.raw, fb.reader(), al.writer());
|
||||
try testing.expectEqualStrings(c.out, al.items);
|
||||
}
|
||||
}
|
||||
|
||||
test "flate.Inflate gzip decompress" {
|
||||
const cases = [_]struct {
|
||||
in: []const u8,
|
||||
out: []const u8,
|
||||
}{
|
||||
// non compressed block (type 0)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, // gzip header (10 bytes)
|
||||
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
|
||||
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
|
||||
0xd5, 0xe0, 0x39, 0xb7, // gzip footer: checksum
|
||||
0x0c, 0x00, 0x00, 0x00, // gzip footer: size
|
||||
},
|
||||
.out = "Hello world\n",
|
||||
},
|
||||
// fixed code block (type 1)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x03, // gzip header (10 bytes)
|
||||
0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1
|
||||
0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00,
|
||||
0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00, // gzip footer (chksum, len)
|
||||
},
|
||||
.out = "Hello world\n",
|
||||
},
|
||||
// dynamic block (type 2)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, // gzip header (10 bytes)
|
||||
0x3d, 0xc6, 0x39, 0x11, 0x00, 0x00, 0x0c, 0x02, // deflate data block type 2
|
||||
0x30, 0x2b, 0xb5, 0x52, 0x1e, 0xff, 0x96, 0x38,
|
||||
0x16, 0x96, 0x5c, 0x1e, 0x94, 0xcb, 0x6d, 0x01,
|
||||
0x17, 0x1c, 0x39, 0xb4, 0x13, 0x00, 0x00, 0x00, // gzip footer (chksum, len)
|
||||
},
|
||||
.out = "ABCDEABCD ABCDEABCD",
|
||||
},
|
||||
// gzip header with name
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x08, 0xe5, 0x70, 0xb1, 0x65, 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
|
||||
0x74, 0x78, 0x74, 0x00, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
|
||||
0x02, 0x00, 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00,
|
||||
},
|
||||
.out = "Hello world\n",
|
||||
},
|
||||
};
|
||||
for (cases) |c| {
|
||||
var fb = std.io.fixedBufferStream(c.in);
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
|
||||
try decompress(.gzip, fb.reader(), al.writer());
|
||||
try testing.expectEqualStrings(c.out, al.items);
|
||||
}
|
||||
}
|
||||
|
||||
test "flate.Inflate zlib decompress" {
|
||||
const cases = [_]struct {
|
||||
in: []const u8,
|
||||
out: []const u8,
|
||||
}{
|
||||
// non compressed block (type 0)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0x78, 0b10_0_11100, // zlib header (2 bytes)
|
||||
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
|
||||
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
|
||||
0x1c, 0xf2, 0x04, 0x47, // zlib footer: checksum
|
||||
},
|
||||
.out = "Hello world\n",
|
||||
},
|
||||
};
|
||||
for (cases) |c| {
|
||||
var fb = std.io.fixedBufferStream(c.in);
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
|
||||
try decompress(.zlib, fb.reader(), al.writer());
|
||||
try testing.expectEqualStrings(c.out, al.items);
|
||||
}
|
||||
}
|
||||
|
||||
test "flate.Inflate fuzzing tests" {
|
||||
const cases = [_]struct {
|
||||
input: []const u8,
|
||||
out: []const u8 = "",
|
||||
err: ?anyerror = null,
|
||||
}{
|
||||
.{ .input = "deflate-stream", .out = @embedFile("testdata/fuzz/deflate-stream.expect") }, // 0
|
||||
.{ .input = "empty-distance-alphabet01" },
|
||||
.{ .input = "empty-distance-alphabet02" },
|
||||
.{ .input = "end-of-stream", .err = error.EndOfStream },
|
||||
.{ .input = "invalid-distance", .err = error.InvalidMatch },
|
||||
.{ .input = "invalid-tree01", .err = error.IncompleteHuffmanTree }, // 5
|
||||
.{ .input = "invalid-tree02", .err = error.IncompleteHuffmanTree },
|
||||
.{ .input = "invalid-tree03", .err = error.IncompleteHuffmanTree },
|
||||
.{ .input = "lengths-overflow", .err = error.InvalidDynamicBlockHeader },
|
||||
.{ .input = "out-of-codes", .err = error.InvalidCode },
|
||||
.{ .input = "puff01", .err = error.WrongStoredBlockNlen }, // 10
|
||||
.{ .input = "puff02", .err = error.EndOfStream },
|
||||
.{ .input = "puff03", .out = &[_]u8{0xa} },
|
||||
.{ .input = "puff04", .err = error.InvalidCode },
|
||||
.{ .input = "puff05", .err = error.EndOfStream },
|
||||
.{ .input = "puff06", .err = error.EndOfStream },
|
||||
.{ .input = "puff08", .err = error.InvalidCode },
|
||||
.{ .input = "puff09", .out = "P" },
|
||||
.{ .input = "puff10", .err = error.InvalidCode },
|
||||
.{ .input = "puff11", .err = error.InvalidMatch },
|
||||
.{ .input = "puff12", .err = error.InvalidDynamicBlockHeader }, // 20
|
||||
.{ .input = "puff13", .err = error.IncompleteHuffmanTree },
|
||||
.{ .input = "puff14", .err = error.EndOfStream },
|
||||
.{ .input = "puff15", .err = error.IncompleteHuffmanTree },
|
||||
.{ .input = "puff16", .err = error.InvalidDynamicBlockHeader },
|
||||
.{ .input = "puff17", .err = error.InvalidDynamicBlockHeader }, // 25
|
||||
.{ .input = "fuzz1", .err = error.InvalidDynamicBlockHeader },
|
||||
.{ .input = "fuzz2", .err = error.InvalidDynamicBlockHeader },
|
||||
.{ .input = "fuzz3", .err = error.InvalidMatch },
|
||||
.{ .input = "fuzz4", .err = error.OversubscribedHuffmanTree },
|
||||
.{ .input = "puff18", .err = error.OversubscribedHuffmanTree }, // 30
|
||||
.{ .input = "puff19", .err = error.OversubscribedHuffmanTree },
|
||||
.{ .input = "puff20", .err = error.OversubscribedHuffmanTree },
|
||||
.{ .input = "puff21", .err = error.OversubscribedHuffmanTree },
|
||||
.{ .input = "puff22", .err = error.OversubscribedHuffmanTree },
|
||||
.{ .input = "puff23", .err = error.InvalidDynamicBlockHeader }, // 35
|
||||
.{ .input = "puff24", .err = error.InvalidDynamicBlockHeader },
|
||||
.{ .input = "puff25", .err = error.OversubscribedHuffmanTree },
|
||||
.{ .input = "puff26", .err = error.InvalidDynamicBlockHeader },
|
||||
.{ .input = "puff27", .err = error.InvalidDynamicBlockHeader },
|
||||
};
|
||||
|
||||
inline for (cases, 0..) |c, case_no| {
|
||||
var in = std.io.fixedBufferStream(@embedFile("testdata/fuzz/" ++ c.input ++ ".input"));
|
||||
var out = std.ArrayList(u8).init(testing.allocator);
|
||||
defer out.deinit();
|
||||
errdefer std.debug.print("test case failed {}\n", .{case_no});
|
||||
|
||||
if (c.err) |expected_err| {
|
||||
try testing.expectError(expected_err, decompress(.raw, in.reader(), out.writer()));
|
||||
} else {
|
||||
try decompress(.raw, in.reader(), out.writer());
|
||||
try testing.expectEqualStrings(c.out, out.items);
|
||||
}
|
||||
}
|
||||
}
|
||||
606
lib/std/compress/flate/testdata/block_writer.zig
vendored
Normal file
606
lib/std/compress/flate/testdata/block_writer.zig
vendored
Normal file
@@ -0,0 +1,606 @@
|
||||
const Token = @import("../Token.zig");
|
||||
|
||||
pub const TestCase = struct {
|
||||
tokens: []const Token,
|
||||
input: []const u8 = "", // File name of input data matching the tokens.
|
||||
want: []const u8 = "", // File name of data with the expected output with input available.
|
||||
want_no_input: []const u8 = "", // File name of the expected output when no input is available.
|
||||
};
|
||||
|
||||
pub const testCases = blk: {
|
||||
@setEvalBranchQuota(4096 * 2);
|
||||
|
||||
const L = Token.initLiteral;
|
||||
const M = Token.initMatch;
|
||||
const ml = M(1, 258); // Maximum length token. Used to reduce the size of writeBlockTests
|
||||
|
||||
break :blk &[_]TestCase{
|
||||
TestCase{
|
||||
.input = "huffman-null-max.input",
|
||||
.want = "huffman-null-max.{s}.expect",
|
||||
.want_no_input = "huffman-null-max.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L(0x0), ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, L(0x0), L(0x0),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-pi.input",
|
||||
.want = "huffman-pi.{s}.expect",
|
||||
.want_no_input = "huffman-pi.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L('3'), L('.'), L('1'), L('4'), L('1'), L('5'), L('9'), L('2'),
|
||||
L('6'), L('5'), L('3'), L('5'), L('8'), L('9'), L('7'), L('9'),
|
||||
L('3'), L('2'), L('3'), L('8'), L('4'), L('6'), L('2'), L('6'),
|
||||
L('4'), L('3'), L('3'), L('8'), L('3'), L('2'), L('7'), L('9'),
|
||||
L('5'), L('0'), L('2'), L('8'), L('8'), L('4'), L('1'), L('9'),
|
||||
L('7'), L('1'), L('6'), L('9'), L('3'), L('9'), L('9'), L('3'),
|
||||
L('7'), L('5'), L('1'), L('0'), L('5'), L('8'), L('2'), L('0'),
|
||||
L('9'), L('7'), L('4'), L('9'), L('4'), L('4'), L('5'), L('9'),
|
||||
L('2'), L('3'), L('0'), L('7'), L('8'), L('1'), L('6'), L('4'),
|
||||
L('0'), L('6'), L('2'), L('8'), L('6'), L('2'), L('0'), L('8'),
|
||||
L('9'), L('9'), L('8'), L('6'), L('2'), L('8'), L('0'), L('3'),
|
||||
L('4'), L('8'), L('2'), L('5'), L('3'), L('4'), L('2'), L('1'),
|
||||
L('1'), L('7'), L('0'), L('6'), L('7'), L('9'), L('8'), L('2'),
|
||||
L('1'), L('4'), L('8'), L('0'), L('8'), L('6'), L('5'), L('1'),
|
||||
L('3'), L('2'), L('8'), L('2'), L('3'), L('0'), L('6'), L('6'),
|
||||
L('4'), L('7'), L('0'), L('9'), L('3'), L('8'), L('4'), L('4'),
|
||||
L('6'), L('0'), L('9'), L('5'), L('5'), L('0'), L('5'), L('8'),
|
||||
L('2'), L('2'), L('3'), L('1'), L('7'), L('2'), L('5'), L('3'),
|
||||
L('5'), L('9'), L('4'), L('0'), L('8'), L('1'), L('2'), L('8'),
|
||||
L('4'), L('8'), L('1'), L('1'), L('1'), L('7'), L('4'), M(127, 4),
|
||||
L('4'), L('1'), L('0'), L('2'), L('7'), L('0'), L('1'), L('9'),
|
||||
L('3'), L('8'), L('5'), L('2'), L('1'), L('1'), L('0'), L('5'),
|
||||
L('5'), L('5'), L('9'), L('6'), L('4'), L('4'), L('6'), L('2'),
|
||||
L('2'), L('9'), L('4'), L('8'), L('9'), L('5'), L('4'), L('9'),
|
||||
L('3'), L('0'), L('3'), L('8'), L('1'), M(19, 4), L('2'), L('8'),
|
||||
L('8'), L('1'), L('0'), L('9'), L('7'), L('5'), L('6'), L('6'),
|
||||
L('5'), L('9'), L('3'), L('3'), L('4'), L('4'), L('6'), M(72, 4),
|
||||
L('7'), L('5'), L('6'), L('4'), L('8'), L('2'), L('3'), L('3'),
|
||||
L('7'), L('8'), L('6'), L('7'), L('8'), L('3'), L('1'), L('6'),
|
||||
L('5'), L('2'), L('7'), L('1'), L('2'), L('0'), L('1'), L('9'),
|
||||
L('0'), L('9'), L('1'), L('4'), M(27, 4), L('5'), L('6'), L('6'),
|
||||
L('9'), L('2'), L('3'), L('4'), L('6'), M(179, 4), L('6'), L('1'),
|
||||
L('0'), L('4'), L('5'), L('4'), L('3'), L('2'), L('6'), M(51, 4),
|
||||
L('1'), L('3'), L('3'), L('9'), L('3'), L('6'), L('0'), L('7'),
|
||||
L('2'), L('6'), L('0'), L('2'), L('4'), L('9'), L('1'), L('4'),
|
||||
L('1'), L('2'), L('7'), L('3'), L('7'), L('2'), L('4'), L('5'),
|
||||
L('8'), L('7'), L('0'), L('0'), L('6'), L('6'), L('0'), L('6'),
|
||||
L('3'), L('1'), L('5'), L('5'), L('8'), L('8'), L('1'), L('7'),
|
||||
L('4'), L('8'), L('8'), L('1'), L('5'), L('2'), L('0'), L('9'),
|
||||
L('2'), L('0'), L('9'), L('6'), L('2'), L('8'), L('2'), L('9'),
|
||||
L('2'), L('5'), L('4'), L('0'), L('9'), L('1'), L('7'), L('1'),
|
||||
L('5'), L('3'), L('6'), L('4'), L('3'), L('6'), L('7'), L('8'),
|
||||
L('9'), L('2'), L('5'), L('9'), L('0'), L('3'), L('6'), L('0'),
|
||||
L('0'), L('1'), L('1'), L('3'), L('3'), L('0'), L('5'), L('3'),
|
||||
L('0'), L('5'), L('4'), L('8'), L('8'), L('2'), L('0'), L('4'),
|
||||
L('6'), L('6'), L('5'), L('2'), L('1'), L('3'), L('8'), L('4'),
|
||||
L('1'), L('4'), L('6'), L('9'), L('5'), L('1'), L('9'), L('4'),
|
||||
L('1'), L('5'), L('1'), L('1'), L('6'), L('0'), L('9'), L('4'),
|
||||
L('3'), L('3'), L('0'), L('5'), L('7'), L('2'), L('7'), L('0'),
|
||||
L('3'), L('6'), L('5'), L('7'), L('5'), L('9'), L('5'), L('9'),
|
||||
L('1'), L('9'), L('5'), L('3'), L('0'), L('9'), L('2'), L('1'),
|
||||
L('8'), L('6'), L('1'), L('1'), L('7'), M(234, 4), L('3'), L('2'),
|
||||
M(10, 4), L('9'), L('3'), L('1'), L('0'), L('5'), L('1'), L('1'),
|
||||
L('8'), L('5'), L('4'), L('8'), L('0'), L('7'), M(271, 4), L('3'),
|
||||
L('7'), L('9'), L('9'), L('6'), L('2'), L('7'), L('4'), L('9'),
|
||||
L('5'), L('6'), L('7'), L('3'), L('5'), L('1'), L('8'), L('8'),
|
||||
L('5'), L('7'), L('5'), L('2'), L('7'), L('2'), L('4'), L('8'),
|
||||
L('9'), L('1'), L('2'), L('2'), L('7'), L('9'), L('3'), L('8'),
|
||||
L('1'), L('8'), L('3'), L('0'), L('1'), L('1'), L('9'), L('4'),
|
||||
L('9'), L('1'), L('2'), L('9'), L('8'), L('3'), L('3'), L('6'),
|
||||
L('7'), L('3'), L('3'), L('6'), L('2'), L('4'), L('4'), L('0'),
|
||||
L('6'), L('5'), L('6'), L('6'), L('4'), L('3'), L('0'), L('8'),
|
||||
L('6'), L('0'), L('2'), L('1'), L('3'), L('9'), L('4'), L('9'),
|
||||
L('4'), L('6'), L('3'), L('9'), L('5'), L('2'), L('2'), L('4'),
|
||||
L('7'), L('3'), L('7'), L('1'), L('9'), L('0'), L('7'), L('0'),
|
||||
L('2'), L('1'), L('7'), L('9'), L('8'), M(154, 5), L('7'), L('0'),
|
||||
L('2'), L('7'), L('7'), L('0'), L('5'), L('3'), L('9'), L('2'),
|
||||
L('1'), L('7'), L('1'), L('7'), L('6'), L('2'), L('9'), L('3'),
|
||||
L('1'), L('7'), L('6'), L('7'), L('5'), M(563, 5), L('7'), L('4'),
|
||||
L('8'), L('1'), M(7, 4), L('6'), L('6'), L('9'), L('4'), L('0'),
|
||||
M(488, 4), L('0'), L('0'), L('0'), L('5'), L('6'), L('8'), L('1'),
|
||||
L('2'), L('7'), L('1'), L('4'), L('5'), L('2'), L('6'), L('3'),
|
||||
L('5'), L('6'), L('0'), L('8'), L('2'), L('7'), L('7'), L('8'),
|
||||
L('5'), L('7'), L('7'), L('1'), L('3'), L('4'), L('2'), L('7'),
|
||||
L('5'), L('7'), L('7'), L('8'), L('9'), L('6'), M(298, 4), L('3'),
|
||||
L('6'), L('3'), L('7'), L('1'), L('7'), L('8'), L('7'), L('2'),
|
||||
L('1'), L('4'), L('6'), L('8'), L('4'), L('4'), L('0'), L('9'),
|
||||
L('0'), L('1'), L('2'), L('2'), L('4'), L('9'), L('5'), L('3'),
|
||||
L('4'), L('3'), L('0'), L('1'), L('4'), L('6'), L('5'), L('4'),
|
||||
L('9'), L('5'), L('8'), L('5'), L('3'), L('7'), L('1'), L('0'),
|
||||
L('5'), L('0'), L('7'), L('9'), M(203, 4), L('6'), M(340, 4), L('8'),
|
||||
L('9'), L('2'), L('3'), L('5'), L('4'), M(458, 4), L('9'), L('5'),
|
||||
L('6'), L('1'), L('1'), L('2'), L('1'), L('2'), L('9'), L('0'),
|
||||
L('2'), L('1'), L('9'), L('6'), L('0'), L('8'), L('6'), L('4'),
|
||||
L('0'), L('3'), L('4'), L('4'), L('1'), L('8'), L('1'), L('5'),
|
||||
L('9'), L('8'), L('1'), L('3'), L('6'), L('2'), L('9'), L('7'),
|
||||
L('7'), L('4'), M(117, 4), L('0'), L('9'), L('9'), L('6'), L('0'),
|
||||
L('5'), L('1'), L('8'), L('7'), L('0'), L('7'), L('2'), L('1'),
|
||||
L('1'), L('3'), L('4'), L('9'), M(1, 5), L('8'), L('3'), L('7'),
|
||||
L('2'), L('9'), L('7'), L('8'), L('0'), L('4'), L('9'), L('9'),
|
||||
M(731, 4), L('9'), L('7'), L('3'), L('1'), L('7'), L('3'), L('2'),
|
||||
L('8'), M(395, 4), L('6'), L('3'), L('1'), L('8'), L('5'), M(770, 4),
|
||||
M(745, 4), L('4'), L('5'), L('5'), L('3'), L('4'), L('6'), L('9'),
|
||||
L('0'), L('8'), L('3'), L('0'), L('2'), L('6'), L('4'), L('2'),
|
||||
L('5'), L('2'), L('2'), L('3'), L('0'), M(740, 4), M(616, 4), L('8'),
|
||||
L('5'), L('0'), L('3'), L('5'), L('2'), L('6'), L('1'), L('9'),
|
||||
L('3'), L('1'), L('1'), M(531, 4), L('1'), L('0'), L('1'), L('0'),
|
||||
L('0'), L('0'), L('3'), L('1'), L('3'), L('7'), L('8'), L('3'),
|
||||
L('8'), L('7'), L('5'), L('2'), L('8'), L('8'), L('6'), L('5'),
|
||||
L('8'), L('7'), L('5'), L('3'), L('3'), L('2'), L('0'), L('8'),
|
||||
L('3'), L('8'), L('1'), L('4'), L('2'), L('0'), L('6'), M(321, 4),
|
||||
M(300, 4), L('1'), L('4'), L('7'), L('3'), L('0'), L('3'), L('5'),
|
||||
L('9'), M(815, 5), L('9'), L('0'), L('4'), L('2'), L('8'), L('7'),
|
||||
L('5'), L('5'), L('4'), L('6'), L('8'), L('7'), L('3'), L('1'),
|
||||
L('1'), L('5'), L('9'), L('5'), M(854, 4), L('3'), L('8'), L('8'),
|
||||
L('2'), L('3'), L('5'), L('3'), L('7'), L('8'), L('7'), L('5'),
|
||||
M(896, 5), L('9'), M(315, 4), L('1'), M(329, 4), L('8'), L('0'), L('5'),
|
||||
L('3'), M(395, 4), L('2'), L('2'), L('6'), L('8'), L('0'), L('6'),
|
||||
L('6'), L('1'), L('3'), L('0'), L('0'), L('1'), L('9'), L('2'),
|
||||
L('7'), L('8'), L('7'), L('6'), L('6'), L('1'), L('1'), L('1'),
|
||||
L('9'), L('5'), L('9'), M(568, 4), L('6'), M(293, 5), L('8'), L('9'),
|
||||
L('3'), L('8'), L('0'), L('9'), L('5'), L('2'), L('5'), L('7'),
|
||||
L('2'), L('0'), L('1'), L('0'), L('6'), L('5'), L('4'), L('8'),
|
||||
L('5'), L('8'), L('6'), L('3'), L('2'), L('7'), M(155, 4), L('9'),
|
||||
L('3'), L('6'), L('1'), L('5'), L('3'), M(545, 4), M(349, 5), L('2'),
|
||||
L('3'), L('0'), L('3'), L('0'), L('1'), L('9'), L('5'), L('2'),
|
||||
L('0'), L('3'), L('5'), L('3'), L('0'), L('1'), L('8'), L('5'),
|
||||
L('2'), M(370, 4), M(118, 4), L('3'), L('6'), L('2'), L('2'), L('5'),
|
||||
L('9'), L('9'), L('4'), L('1'), L('3'), M(597, 4), L('4'), L('9'),
|
||||
L('7'), L('2'), L('1'), L('7'), M(223, 4), L('3'), L('4'), L('7'),
|
||||
L('9'), L('1'), L('3'), L('1'), L('5'), L('1'), L('5'), L('5'),
|
||||
L('7'), L('4'), L('8'), L('5'), L('7'), L('2'), L('4'), L('2'),
|
||||
L('4'), L('5'), L('4'), L('1'), L('5'), L('0'), L('6'), L('9'),
|
||||
M(320, 4), L('8'), L('2'), L('9'), L('5'), L('3'), L('3'), L('1'),
|
||||
L('1'), L('6'), L('8'), L('6'), L('1'), L('7'), L('2'), L('7'),
|
||||
L('8'), M(824, 4), L('9'), L('0'), L('7'), L('5'), L('0'), L('9'),
|
||||
M(270, 4), L('7'), L('5'), L('4'), L('6'), L('3'), L('7'), L('4'),
|
||||
L('6'), L('4'), L('9'), L('3'), L('9'), L('3'), L('1'), L('9'),
|
||||
L('2'), L('5'), L('5'), L('0'), L('6'), L('0'), L('4'), L('0'),
|
||||
L('0'), L('9'), M(620, 4), L('1'), L('6'), L('7'), L('1'), L('1'),
|
||||
L('3'), L('9'), L('0'), L('0'), L('9'), L('8'), M(822, 4), L('4'),
|
||||
L('0'), L('1'), L('2'), L('8'), L('5'), L('8'), L('3'), L('6'),
|
||||
L('1'), L('6'), L('0'), L('3'), L('5'), L('6'), L('3'), L('7'),
|
||||
L('0'), L('7'), L('6'), L('6'), L('0'), L('1'), L('0'), L('4'),
|
||||
M(371, 4), L('8'), L('1'), L('9'), L('4'), L('2'), L('9'), M(1055, 5),
|
||||
M(240, 4), M(652, 4), L('7'), L('8'), L('3'), L('7'), L('4'), M(1193, 4),
|
||||
L('8'), L('2'), L('5'), L('5'), L('3'), L('7'), M(522, 5), L('2'),
|
||||
L('6'), L('8'), M(47, 4), L('4'), L('0'), L('4'), L('7'), M(466, 4),
|
||||
L('4'), M(1206, 4), M(910, 4), L('8'), L('4'), M(937, 4), L('6'), M(800, 6),
|
||||
L('3'), L('3'), L('1'), L('3'), L('6'), L('7'), L('7'), L('0'),
|
||||
L('2'), L('8'), L('9'), L('8'), L('9'), L('1'), L('5'), L('2'),
|
||||
M(99, 4), L('5'), L('2'), L('1'), L('6'), L('2'), L('0'), L('5'),
|
||||
L('6'), L('9'), L('6'), M(1042, 4), L('0'), L('5'), L('8'), M(1144, 4),
|
||||
L('5'), M(1177, 4), L('5'), L('1'), L('1'), M(522, 4), L('8'), L('2'),
|
||||
L('4'), L('3'), L('0'), L('0'), L('3'), L('5'), L('5'), L('8'),
|
||||
L('7'), L('6'), L('4'), L('0'), L('2'), L('4'), L('7'), L('4'),
|
||||
L('9'), L('6'), L('4'), L('7'), L('3'), L('2'), L('6'), L('3'),
|
||||
M(1087, 4), L('9'), L('9'), L('2'), M(1100, 4), L('4'), L('2'), L('6'),
|
||||
L('9'), M(710, 6), L('7'), M(471, 4), L('4'), M(1342, 4), M(1054, 4), L('9'),
|
||||
L('3'), L('4'), L('1'), L('7'), M(430, 4), L('1'), L('2'), M(43, 4),
|
||||
L('4'), M(415, 4), L('1'), L('5'), L('0'), L('3'), L('0'), L('2'),
|
||||
L('8'), L('6'), L('1'), L('8'), L('2'), L('9'), L('7'), L('4'),
|
||||
L('5'), L('5'), L('5'), L('7'), L('0'), L('6'), L('7'), L('4'),
|
||||
M(310, 4), L('5'), L('0'), L('5'), L('4'), L('9'), L('4'), L('5'),
|
||||
L('8'), M(454, 4), L('9'), M(82, 4), L('5'), L('6'), M(493, 4), L('7'),
|
||||
L('2'), L('1'), L('0'), L('7'), L('9'), M(346, 4), L('3'), L('0'),
|
||||
M(267, 4), L('3'), L('2'), L('1'), L('1'), L('6'), L('5'), L('3'),
|
||||
L('4'), L('4'), L('9'), L('8'), L('7'), L('2'), L('0'), L('2'),
|
||||
L('7'), M(284, 4), L('0'), L('2'), L('3'), L('6'), L('4'), M(559, 4),
|
||||
L('5'), L('4'), L('9'), L('9'), L('1'), L('1'), L('9'), L('8'),
|
||||
M(1049, 4), L('4'), M(284, 4), L('5'), L('3'), L('5'), L('6'), L('6'),
|
||||
L('3'), L('6'), L('9'), M(1105, 4), L('2'), L('6'), L('5'), M(741, 4),
|
||||
L('7'), L('8'), L('6'), L('2'), L('5'), L('5'), L('1'), M(987, 4),
|
||||
L('1'), L('7'), L('5'), L('7'), L('4'), L('6'), L('7'), L('2'),
|
||||
L('8'), L('9'), L('0'), L('9'), L('7'), L('7'), L('7'), L('7'),
|
||||
M(1108, 5), L('0'), L('0'), L('0'), M(1534, 4), L('7'), L('0'), M(1248, 4),
|
||||
L('6'), M(1002, 4), L('4'), L('9'), L('1'), M(1055, 4), M(664, 4), L('2'),
|
||||
L('1'), L('4'), L('7'), L('7'), L('2'), L('3'), L('5'), L('0'),
|
||||
L('1'), L('4'), L('1'), L('4'), M(1604, 4), L('3'), L('5'), L('6'),
|
||||
M(1200, 4), L('1'), L('6'), L('1'), L('3'), L('6'), L('1'), L('1'),
|
||||
L('5'), L('7'), L('3'), L('5'), L('2'), L('5'), M(1285, 4), L('3'),
|
||||
L('4'), M(92, 4), L('1'), L('8'), M(1148, 4), L('8'), L('4'), M(1512, 4),
|
||||
L('3'), L('3'), L('2'), L('3'), L('9'), L('0'), L('7'), L('3'),
|
||||
L('9'), L('4'), L('1'), L('4'), L('3'), L('3'), L('3'), L('4'),
|
||||
L('5'), L('4'), L('7'), L('7'), L('6'), L('2'), L('4'), M(579, 4),
|
||||
L('2'), L('5'), L('1'), L('8'), L('9'), L('8'), L('3'), L('5'),
|
||||
L('6'), L('9'), L('4'), L('8'), L('5'), L('5'), L('6'), L('2'),
|
||||
L('0'), L('9'), L('9'), L('2'), L('1'), L('9'), L('2'), L('2'),
|
||||
L('2'), L('1'), L('8'), L('4'), L('2'), L('7'), M(575, 4), L('2'),
|
||||
M(187, 4), L('6'), L('8'), L('8'), L('7'), L('6'), L('7'), L('1'),
|
||||
L('7'), L('9'), L('0'), M(86, 4), L('0'), M(263, 5), L('6'), L('6'),
|
||||
M(1000, 4), L('8'), L('8'), L('6'), L('2'), L('7'), L('2'), M(1757, 4),
|
||||
L('1'), L('7'), L('8'), L('6'), L('0'), L('8'), L('5'), L('7'),
|
||||
M(116, 4), L('3'), M(765, 5), L('7'), L('9'), L('7'), L('6'), L('6'),
|
||||
L('8'), L('1'), M(702, 4), L('0'), L('0'), L('9'), L('5'), L('3'),
|
||||
L('8'), L('8'), M(1593, 4), L('3'), M(1702, 4), L('0'), L('6'), L('8'),
|
||||
L('0'), L('0'), L('6'), L('4'), L('2'), L('2'), L('5'), L('1'),
|
||||
L('2'), L('5'), L('2'), M(1404, 4), L('7'), L('3'), L('9'), L('2'),
|
||||
M(664, 4), M(1141, 4), L('4'), M(1716, 5), L('8'), L('6'), L('2'), L('6'),
|
||||
L('9'), L('4'), L('5'), M(486, 4), L('4'), L('1'), L('9'), L('6'),
|
||||
L('5'), L('2'), L('8'), L('5'), L('0'), M(154, 4), M(925, 4), L('1'),
|
||||
L('8'), L('6'), L('3'), M(447, 4), L('4'), M(341, 5), L('2'), L('0'),
|
||||
L('3'), L('9'), M(1420, 4), L('4'), L('5'), M(701, 4), L('2'), L('3'),
|
||||
L('7'), M(1069, 4), L('6'), M(1297, 4), L('5'), L('6'), M(1593, 4), L('7'),
|
||||
L('1'), L('9'), L('1'), L('7'), L('2'), L('8'), M(370, 4), L('7'),
|
||||
L('6'), L('4'), L('6'), L('5'), L('7'), L('5'), L('7'), L('3'),
|
||||
L('9'), M(258, 4), L('3'), L('8'), L('9'), M(1865, 4), L('8'), L('3'),
|
||||
L('2'), L('6'), L('4'), L('5'), L('9'), L('9'), L('5'), L('8'),
|
||||
M(1704, 4), L('0'), L('4'), L('7'), L('8'), M(479, 4), M(809, 4), L('9'),
|
||||
M(46, 4), L('6'), L('4'), L('0'), L('7'), L('8'), L('9'), L('5'),
|
||||
L('1'), M(143, 4), L('6'), L('8'), L('3'), M(304, 4), L('2'), L('5'),
|
||||
L('9'), L('5'), L('7'), L('0'), M(1129, 4), L('8'), L('2'), L('2'),
|
||||
M(713, 4), L('2'), M(1564, 4), L('4'), L('0'), L('7'), L('7'), L('2'),
|
||||
L('6'), L('7'), L('1'), L('9'), L('4'), L('7'), L('8'), M(794, 4),
|
||||
L('8'), L('2'), L('6'), L('0'), L('1'), L('4'), L('7'), L('6'),
|
||||
L('9'), L('9'), L('0'), L('9'), M(1257, 4), L('0'), L('1'), L('3'),
|
||||
L('6'), L('3'), L('9'), L('4'), L('4'), L('3'), M(640, 4), L('3'),
|
||||
L('0'), M(262, 4), L('2'), L('0'), L('3'), L('4'), L('9'), L('6'),
|
||||
L('2'), L('5'), L('2'), L('4'), L('5'), L('1'), L('7'), M(950, 4),
|
||||
L('9'), L('6'), L('5'), L('1'), L('4'), L('3'), L('1'), L('4'),
|
||||
L('2'), L('9'), L('8'), L('0'), L('9'), L('1'), L('9'), L('0'),
|
||||
L('6'), L('5'), L('9'), L('2'), M(643, 4), L('7'), L('2'), L('2'),
|
||||
L('1'), L('6'), L('9'), L('6'), L('4'), L('6'), M(1050, 4), M(123, 4),
|
||||
L('5'), M(1295, 4), L('4'), M(1382, 5), L('8'), M(1370, 4), L('9'), L('7'),
|
||||
M(1404, 4), L('5'), L('4'), M(1182, 4), M(575, 4), L('7'), M(1627, 4), L('8'),
|
||||
L('4'), L('6'), L('8'), L('1'), L('3'), M(141, 4), L('6'), L('8'),
|
||||
L('3'), L('8'), L('6'), L('8'), L('9'), L('4'), L('2'), L('7'),
|
||||
L('7'), L('4'), L('1'), L('5'), L('5'), L('9'), L('9'), L('1'),
|
||||
L('8'), L('5'), M(91, 4), L('2'), L('4'), L('5'), L('9'), L('5'),
|
||||
L('3'), L('9'), L('5'), L('9'), L('4'), L('3'), L('1'), M(1464, 4),
|
||||
L('7'), M(19, 4), L('6'), L('8'), L('0'), L('8'), L('4'), L('5'),
|
||||
M(744, 4), L('7'), L('3'), M(2079, 4), L('9'), L('5'), L('8'), L('4'),
|
||||
L('8'), L('6'), L('5'), L('3'), L('8'), M(1769, 4), L('6'), L('2'),
|
||||
M(243, 4), L('6'), L('0'), L('9'), M(1207, 4), L('6'), L('0'), L('8'),
|
||||
L('0'), L('5'), L('1'), L('2'), L('4'), L('3'), L('8'), L('8'),
|
||||
L('4'), M(315, 4), M(12, 4), L('4'), L('1'), L('3'), M(784, 4), L('7'),
|
||||
L('6'), L('2'), L('7'), L('8'), M(834, 4), L('7'), L('1'), L('5'),
|
||||
M(1436, 4), L('3'), L('5'), L('9'), L('9'), L('7'), L('7'), L('0'),
|
||||
L('0'), L('1'), L('2'), L('9'), M(1139, 4), L('8'), L('9'), L('4'),
|
||||
L('4'), L('1'), M(632, 4), L('6'), L('8'), L('5'), L('5'), M(96, 4),
|
||||
L('4'), L('0'), L('6'), L('3'), M(2279, 4), L('2'), L('0'), L('7'),
|
||||
L('2'), L('2'), M(345, 4), M(516, 5), L('4'), L('8'), L('1'), L('5'),
|
||||
L('8'), M(518, 4), M(511, 4), M(635, 4), M(665, 4), L('3'), L('9'), L('4'),
|
||||
L('5'), L('2'), L('2'), L('6'), L('7'), M(1175, 6), L('8'), M(1419, 4),
|
||||
L('2'), L('1'), M(747, 4), L('2'), M(904, 4), L('5'), L('4'), L('6'),
|
||||
L('6'), L('6'), M(1308, 4), L('2'), L('3'), L('9'), L('8'), L('6'),
|
||||
L('4'), L('5'), L('6'), M(1221, 4), L('1'), L('6'), L('3'), L('5'),
|
||||
M(596, 5), M(2066, 4), L('7'), M(2222, 4), L('9'), L('8'), M(1119, 4), L('9'),
|
||||
L('3'), L('6'), L('3'), L('4'), M(1884, 4), L('7'), L('4'), L('3'),
|
||||
L('2'), L('4'), M(1148, 4), L('1'), L('5'), L('0'), L('7'), L('6'),
|
||||
M(1212, 4), L('7'), L('9'), L('4'), L('5'), L('1'), L('0'), L('9'),
|
||||
M(63, 4), L('0'), L('9'), L('4'), L('0'), M(1703, 4), L('8'), L('8'),
|
||||
L('7'), L('9'), L('7'), L('1'), L('0'), L('8'), L('9'), L('3'),
|
||||
M(2289, 4), L('6'), L('9'), L('1'), L('3'), L('6'), L('8'), L('6'),
|
||||
L('7'), L('2'), M(604, 4), M(511, 4), L('5'), M(1344, 4), M(1129, 4), M(2050, 4),
|
||||
L('1'), L('7'), L('9'), L('2'), L('8'), L('6'), L('8'), M(2253, 4),
|
||||
L('8'), L('7'), L('4'), L('7'), M(1951, 5), L('8'), L('2'), L('4'),
|
||||
M(2427, 4), L('8'), M(604, 4), L('7'), L('1'), L('4'), L('9'), L('0'),
|
||||
L('9'), L('6'), L('7'), L('5'), L('9'), L('8'), M(1776, 4), L('3'),
|
||||
L('6'), L('5'), M(309, 4), L('8'), L('1'), M(93, 4), M(1862, 4), M(2359, 4),
|
||||
L('6'), L('8'), L('2'), L('9'), M(1407, 4), L('8'), L('7'), L('2'),
|
||||
L('2'), L('6'), L('5'), L('8'), L('8'), L('0'), M(1554, 4), L('5'),
|
||||
M(586, 4), L('4'), L('2'), L('7'), L('0'), L('4'), L('7'), L('7'),
|
||||
L('5'), L('5'), M(2079, 4), L('3'), L('7'), L('9'), L('6'), L('4'),
|
||||
L('1'), L('4'), L('5'), L('1'), L('5'), L('2'), M(1534, 4), L('2'),
|
||||
L('3'), L('4'), L('3'), L('6'), L('4'), L('5'), L('4'), M(1503, 4),
|
||||
L('4'), L('4'), L('4'), L('7'), L('9'), L('5'), M(61, 4), M(1316, 4),
|
||||
M(2279, 5), L('4'), L('1'), M(1323, 4), L('3'), M(773, 4), L('5'), L('2'),
|
||||
L('3'), L('1'), M(2114, 5), L('1'), L('6'), L('6'), L('1'), M(2227, 4),
|
||||
L('5'), L('9'), L('6'), L('9'), L('5'), L('3'), L('6'), L('2'),
|
||||
L('3'), L('1'), L('4'), M(1536, 4), L('2'), L('4'), L('8'), L('4'),
|
||||
L('9'), L('3'), L('7'), L('1'), L('8'), L('7'), L('1'), L('1'),
|
||||
L('0'), L('1'), L('4'), L('5'), L('7'), L('6'), L('5'), L('4'),
|
||||
M(1890, 4), L('0'), L('2'), L('7'), L('9'), L('9'), L('3'), L('4'),
|
||||
L('4'), L('0'), L('3'), L('7'), L('4'), L('2'), L('0'), L('0'),
|
||||
L('7'), M(2368, 4), L('7'), L('8'), L('5'), L('3'), L('9'), L('0'),
|
||||
L('6'), L('2'), L('1'), L('9'), M(666, 5), M(838, 4), L('8'), L('4'),
|
||||
L('7'), M(979, 5), L('8'), L('3'), L('3'), L('2'), L('1'), L('4'),
|
||||
L('4'), L('5'), L('7'), L('1'), M(645, 4), M(1911, 4), L('4'), L('3'),
|
||||
L('5'), L('0'), M(2345, 4), M(1129, 4), L('5'), L('3'), L('1'), L('9'),
|
||||
L('1'), L('0'), L('4'), L('8'), L('4'), L('8'), L('1'), L('0'),
|
||||
L('0'), L('5'), L('3'), L('7'), L('0'), L('6'), M(2237, 4), M(1438, 5),
|
||||
M(1922, 5), L('1'), M(1370, 4), L('7'), M(796, 4), L('5'), M(2029, 4), M(1037, 4),
|
||||
L('6'), L('3'), M(2013, 5), L('4'), M(2418, 4), M(847, 5), M(1014, 5), L('8'),
|
||||
M(1326, 5), M(2184, 5), L('9'), M(392, 4), L('9'), L('1'), M(2255, 4), L('8'),
|
||||
L('1'), L('4'), L('6'), L('7'), L('5'), L('1'), M(1580, 4), L('1'),
|
||||
L('2'), L('3'), L('9'), M(426, 6), L('9'), L('0'), L('7'), L('1'),
|
||||
L('8'), L('6'), L('4'), L('9'), L('4'), L('2'), L('3'), L('1'),
|
||||
L('9'), L('6'), L('1'), L('5'), L('6'), M(493, 4), M(1725, 4), L('9'),
|
||||
L('5'), M(2343, 4), M(1130, 4), M(284, 4), L('6'), L('0'), L('3'), L('8'),
|
||||
M(2598, 4), M(368, 4), M(901, 4), L('6'), L('2'), M(1115, 4), L('5'), M(2125, 4),
|
||||
L('6'), L('3'), L('8'), L('9'), L('3'), L('7'), L('7'), L('8'),
|
||||
L('7'), M(2246, 4), M(249, 4), L('9'), L('7'), L('9'), L('2'), L('0'),
|
||||
L('7'), L('7'), L('3'), M(1496, 4), L('2'), L('1'), L('8'), L('2'),
|
||||
L('5'), L('6'), M(2016, 4), L('6'), L('6'), M(1751, 4), L('4'), L('2'),
|
||||
M(1663, 5), L('6'), M(1767, 4), L('4'), L('4'), M(37, 4), L('5'), L('4'),
|
||||
L('9'), L('2'), L('0'), L('2'), L('6'), L('0'), L('5'), M(2740, 4),
|
||||
M(997, 5), L('2'), L('0'), L('1'), L('4'), L('9'), M(1235, 4), L('8'),
|
||||
L('5'), L('0'), L('7'), L('3'), M(1434, 4), L('6'), L('6'), L('6'),
|
||||
L('0'), M(405, 4), L('2'), L('4'), L('3'), L('4'), L('0'), M(136, 4),
|
||||
L('0'), M(1900, 4), L('8'), L('6'), L('3'), M(2391, 4), M(2021, 4), M(1068, 4),
|
||||
M(373, 4), L('5'), L('7'), L('9'), L('6'), L('2'), L('6'), L('8'),
|
||||
L('5'), L('6'), M(321, 4), L('5'), L('0'), L('8'), M(1316, 4), L('5'),
|
||||
L('8'), L('7'), L('9'), L('6'), L('9'), L('9'), M(1810, 4), L('5'),
|
||||
L('7'), L('4'), M(2585, 4), L('8'), L('4'), L('0'), M(2228, 4), L('1'),
|
||||
L('4'), L('5'), L('9'), L('1'), M(1933, 4), L('7'), L('0'), M(565, 4),
|
||||
L('0'), L('1'), M(3048, 4), L('1'), L('2'), M(3189, 4), L('0'), M(964, 4),
|
||||
L('3'), L('9'), M(2859, 4), M(275, 4), L('7'), L('1'), L('5'), M(945, 4),
|
||||
L('4'), L('2'), L('0'), M(3059, 5), L('9'), M(3011, 4), L('0'), L('7'),
|
||||
M(834, 4), M(1942, 4), M(2736, 4), M(3171, 4), L('2'), L('1'), M(2401, 4), L('2'),
|
||||
L('5'), L('1'), M(1404, 4), M(2373, 4), L('9'), L('2'), M(435, 4), L('8'),
|
||||
L('2'), L('6'), M(2919, 4), L('2'), M(633, 4), L('3'), L('2'), L('1'),
|
||||
L('5'), L('7'), L('9'), L('1'), L('9'), L('8'), L('4'), L('1'),
|
||||
L('4'), M(2172, 5), L('9'), L('1'), L('6'), L('4'), M(1769, 5), L('9'),
|
||||
M(2905, 5), M(2268, 4), L('7'), L('2'), L('2'), M(802, 4), L('5'), M(2213, 4),
|
||||
M(322, 4), L('9'), L('1'), L('0'), M(189, 4), M(3164, 4), L('5'), L('2'),
|
||||
L('8'), L('0'), L('1'), L('7'), M(562, 4), L('7'), L('1'), L('2'),
|
||||
M(2325, 4), L('8'), L('3'), L('2'), M(884, 4), L('1'), M(1418, 4), L('0'),
|
||||
L('9'), L('3'), L('5'), L('3'), L('9'), L('6'), L('5'), L('7'),
|
||||
M(1612, 4), L('1'), L('0'), L('8'), L('3'), M(106, 4), L('5'), L('1'),
|
||||
M(1915, 4), M(3419, 4), L('1'), L('4'), L('4'), L('4'), L('2'), L('1'),
|
||||
L('0'), L('0'), M(515, 4), L('0'), L('3'), M(413, 4), L('1'), L('1'),
|
||||
L('0'), L('3'), M(3202, 4), M(10, 4), M(39, 4), M(1539, 6), L('5'), L('1'),
|
||||
L('6'), M(1498, 4), M(2180, 5), M(2347, 4), L('5'), M(3139, 5), L('8'), L('5'),
|
||||
L('1'), L('7'), L('1'), L('4'), L('3'), L('7'), M(1542, 4), M(110, 4),
|
||||
L('1'), L('5'), L('5'), L('6'), L('5'), L('0'), L('8'), L('8'),
|
||||
M(954, 4), L('9'), L('8'), L('9'), L('8'), L('5'), L('9'), L('9'),
|
||||
L('8'), L('2'), L('3'), L('8'), M(464, 4), M(2491, 4), L('3'), M(365, 4),
|
||||
M(1087, 4), M(2500, 4), L('8'), M(3590, 5), L('3'), L('2'), M(264, 4), L('5'),
|
||||
M(774, 4), L('3'), M(459, 4), L('9'), M(1052, 4), L('9'), L('8'), M(2174, 4),
|
||||
L('4'), M(3257, 4), L('7'), M(1612, 4), L('0'), L('7'), M(230, 4), L('4'),
|
||||
L('8'), L('1'), L('4'), L('1'), M(1338, 4), L('8'), L('5'), L('9'),
|
||||
L('4'), L('6'), L('1'), M(3018, 4), L('8'), L('0'),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-rand-1k.input",
|
||||
.want = "huffman-rand-1k.{s}.expect",
|
||||
.want_no_input = "huffman-rand-1k.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L(0xf8), L(0x8b), L(0x96), L(0x76), L(0x48), L(0xd), L(0x85), L(0x94), L(0x25), L(0x80), L(0xaf), L(0xc2), L(0xfe), L(0x8d),
|
||||
L(0xe8), L(0x20), L(0xeb), L(0x17), L(0x86), L(0xc9), L(0xb7), L(0xc5), L(0xde), L(0x6), L(0xea), L(0x7d), L(0x18), L(0x8b),
|
||||
L(0xe7), L(0x3e), L(0x7), L(0xda), L(0xdf), L(0xff), L(0x6c), L(0x73), L(0xde), L(0xcc), L(0xe7), L(0x6d), L(0x8d), L(0x4),
|
||||
L(0x19), L(0x49), L(0x7f), L(0x47), L(0x1f), L(0x48), L(0x15), L(0xb0), L(0xe8), L(0x9e), L(0xf2), L(0x31), L(0x59), L(0xde),
|
||||
L(0x34), L(0xb4), L(0x5b), L(0xe5), L(0xe0), L(0x9), L(0x11), L(0x30), L(0xc2), L(0x88), L(0x5b), L(0x7c), L(0x5d), L(0x14),
|
||||
L(0x13), L(0x6f), L(0x23), L(0xa9), L(0xd), L(0xbc), L(0x2d), L(0x23), L(0xbe), L(0xd9), L(0xed), L(0x75), L(0x4), L(0x6c),
|
||||
L(0x99), L(0xdf), L(0xfd), L(0x70), L(0x66), L(0xe6), L(0xee), L(0xd9), L(0xb1), L(0x9e), L(0x6e), L(0x83), L(0x59), L(0xd5),
|
||||
L(0xd4), L(0x80), L(0x59), L(0x98), L(0x77), L(0x89), L(0x43), L(0x38), L(0xc9), L(0xaf), L(0x30), L(0x32), L(0x9a), L(0x20),
|
||||
L(0x1b), L(0x46), L(0x3d), L(0x67), L(0x6e), L(0xd7), L(0x72), L(0x9e), L(0x4e), L(0x21), L(0x4f), L(0xc6), L(0xe0), L(0xd4),
|
||||
L(0x7b), L(0x4), L(0x8d), L(0xa5), L(0x3), L(0xf6), L(0x5), L(0x9b), L(0x6b), L(0xdc), L(0x2a), L(0x93), L(0x77), L(0x28),
|
||||
L(0xfd), L(0xb4), L(0x62), L(0xda), L(0x20), L(0xe7), L(0x1f), L(0xab), L(0x6b), L(0x51), L(0x43), L(0x39), L(0x2f), L(0xa0),
|
||||
L(0x92), L(0x1), L(0x6c), L(0x75), L(0x3e), L(0xf4), L(0x35), L(0xfd), L(0x43), L(0x2e), L(0xf7), L(0xa4), L(0x75), L(0xda),
|
||||
L(0xea), L(0x9b), L(0xa), L(0x64), L(0xb), L(0xe0), L(0x23), L(0x29), L(0xbd), L(0xf7), L(0xe7), L(0x83), L(0x3c), L(0xfb),
|
||||
L(0xdf), L(0xb3), L(0xae), L(0x4f), L(0xa4), L(0x47), L(0x55), L(0x99), L(0xde), L(0x2f), L(0x96), L(0x6e), L(0x1c), L(0x43),
|
||||
L(0x4c), L(0x87), L(0xe2), L(0x7c), L(0xd9), L(0x5f), L(0x4c), L(0x7c), L(0xe8), L(0x90), L(0x3), L(0xdb), L(0x30), L(0x95),
|
||||
L(0xd6), L(0x22), L(0xc), L(0x47), L(0xb8), L(0x4d), L(0x6b), L(0xbd), L(0x24), L(0x11), L(0xab), L(0x2c), L(0xd7), L(0xbe),
|
||||
L(0x6e), L(0x7a), L(0xd6), L(0x8), L(0xa3), L(0x98), L(0xd8), L(0xdd), L(0x15), L(0x6a), L(0xfa), L(0x93), L(0x30), L(0x1),
|
||||
L(0x25), L(0x1d), L(0xa2), L(0x74), L(0x86), L(0x4b), L(0x6a), L(0x95), L(0xe8), L(0xe1), L(0x4e), L(0xe), L(0x76), L(0xb9),
|
||||
L(0x49), L(0xa9), L(0x5f), L(0xa0), L(0xa6), L(0x63), L(0x3c), L(0x7e), L(0x7e), L(0x20), L(0x13), L(0x4f), L(0xbb), L(0x66),
|
||||
L(0x92), L(0xb8), L(0x2e), L(0xa4), L(0xfa), L(0x48), L(0xcb), L(0xae), L(0xb9), L(0x3c), L(0xaf), L(0xd3), L(0x1f), L(0xe1),
|
||||
L(0xd5), L(0x8d), L(0x42), L(0x6d), L(0xf0), L(0xfc), L(0x8c), L(0xc), L(0x0), L(0xde), L(0x40), L(0xab), L(0x8b), L(0x47),
|
||||
L(0x97), L(0x4e), L(0xa8), L(0xcf), L(0x8e), L(0xdb), L(0xa6), L(0x8b), L(0x20), L(0x9), L(0x84), L(0x7a), L(0x66), L(0xe5),
|
||||
L(0x98), L(0x29), L(0x2), L(0x95), L(0xe6), L(0x38), L(0x32), L(0x60), L(0x3), L(0xe3), L(0x9a), L(0x1e), L(0x54), L(0xe8),
|
||||
L(0x63), L(0x80), L(0x48), L(0x9c), L(0xe7), L(0x63), L(0x33), L(0x6e), L(0xa0), L(0x65), L(0x83), L(0xfa), L(0xc6), L(0xba),
|
||||
L(0x7a), L(0x43), L(0x71), L(0x5), L(0xf5), L(0x68), L(0x69), L(0x85), L(0x9c), L(0xba), L(0x45), L(0xcd), L(0x6b), L(0xb),
|
||||
L(0x19), L(0xd1), L(0xbb), L(0x7f), L(0x70), L(0x85), L(0x92), L(0xd1), L(0xb4), L(0x64), L(0x82), L(0xb1), L(0xe4), L(0x62),
|
||||
L(0xc5), L(0x3c), L(0x46), L(0x1f), L(0x92), L(0x31), L(0x1c), L(0x4e), L(0x41), L(0x77), L(0xf7), L(0xe7), L(0x87), L(0xa2),
|
||||
L(0xf), L(0x6e), L(0xe8), L(0x92), L(0x3), L(0x6b), L(0xa), L(0xe7), L(0xa9), L(0x3b), L(0x11), L(0xda), L(0x66), L(0x8a),
|
||||
L(0x29), L(0xda), L(0x79), L(0xe1), L(0x64), L(0x8d), L(0xe3), L(0x54), L(0xd4), L(0xf5), L(0xef), L(0x64), L(0x87), L(0x3b),
|
||||
L(0xf4), L(0xc2), L(0xf4), L(0x71), L(0x13), L(0xa9), L(0xe9), L(0xe0), L(0xa2), L(0x6), L(0x14), L(0xab), L(0x5d), L(0xa7),
|
||||
L(0x96), L(0x0), L(0xd6), L(0xc3), L(0xcc), L(0x57), L(0xed), L(0x39), L(0x6a), L(0x25), L(0xcd), L(0x76), L(0xea), L(0xba),
|
||||
L(0x3a), L(0xf2), L(0xa1), L(0x95), L(0x5d), L(0xe5), L(0x71), L(0xcf), L(0x9c), L(0x62), L(0x9e), L(0x6a), L(0xfa), L(0xd5),
|
||||
L(0x31), L(0xd1), L(0xa8), L(0x66), L(0x30), L(0x33), L(0xaa), L(0x51), L(0x17), L(0x13), L(0x82), L(0x99), L(0xc8), L(0x14),
|
||||
L(0x60), L(0x9f), L(0x4d), L(0x32), L(0x6d), L(0xda), L(0x19), L(0x26), L(0x21), L(0xdc), L(0x7e), L(0x2e), L(0x25), L(0x67),
|
||||
L(0x72), L(0xca), L(0xf), L(0x92), L(0xcd), L(0xf6), L(0xd6), L(0xcb), L(0x97), L(0x8a), L(0x33), L(0x58), L(0x73), L(0x70),
|
||||
L(0x91), L(0x1d), L(0xbf), L(0x28), L(0x23), L(0xa3), L(0xc), L(0xf1), L(0x83), L(0xc3), L(0xc8), L(0x56), L(0x77), L(0x68),
|
||||
L(0xe3), L(0x82), L(0xba), L(0xb9), L(0x57), L(0x56), L(0x57), L(0x9c), L(0xc3), L(0xd6), L(0x14), L(0x5), L(0x3c), L(0xb1),
|
||||
L(0xaf), L(0x93), L(0xc8), L(0x8a), L(0x57), L(0x7f), L(0x53), L(0xfa), L(0x2f), L(0xaa), L(0x6e), L(0x66), L(0x83), L(0xfa),
|
||||
L(0x33), L(0xd1), L(0x21), L(0xab), L(0x1b), L(0x71), L(0xb4), L(0x7c), L(0xda), L(0xfd), L(0xfb), L(0x7f), L(0x20), L(0xab),
|
||||
L(0x5e), L(0xd5), L(0xca), L(0xfd), L(0xdd), L(0xe0), L(0xee), L(0xda), L(0xba), L(0xa8), L(0x27), L(0x99), L(0x97), L(0x69),
|
||||
L(0xc1), L(0x3c), L(0x82), L(0x8c), L(0xa), L(0x5c), L(0x2d), L(0x5b), L(0x88), L(0x3e), L(0x34), L(0x35), L(0x86), L(0x37),
|
||||
L(0x46), L(0x79), L(0xe1), L(0xaa), L(0x19), L(0xfb), L(0xaa), L(0xde), L(0x15), L(0x9), L(0xd), L(0x1a), L(0x57), L(0xff),
|
||||
L(0xb5), L(0xf), L(0xf3), L(0x2b), L(0x5a), L(0x6a), L(0x4d), L(0x19), L(0x77), L(0x71), L(0x45), L(0xdf), L(0x4f), L(0xb3),
|
||||
L(0xec), L(0xf1), L(0xeb), L(0x18), L(0x53), L(0x3e), L(0x3b), L(0x47), L(0x8), L(0x9a), L(0x73), L(0xa0), L(0x5c), L(0x8c),
|
||||
L(0x5f), L(0xeb), L(0xf), L(0x3a), L(0xc2), L(0x43), L(0x67), L(0xb4), L(0x66), L(0x67), L(0x80), L(0x58), L(0xe), L(0xc1),
|
||||
L(0xec), L(0x40), L(0xd4), L(0x22), L(0x94), L(0xca), L(0xf9), L(0xe8), L(0x92), L(0xe4), L(0x69), L(0x38), L(0xbe), L(0x67),
|
||||
L(0x64), L(0xca), L(0x50), L(0xc7), L(0x6), L(0x67), L(0x42), L(0x6e), L(0xa3), L(0xf0), L(0xb7), L(0x6c), L(0xf2), L(0xe8),
|
||||
L(0x5f), L(0xb1), L(0xaf), L(0xe7), L(0xdb), L(0xbb), L(0x77), L(0xb5), L(0xf8), L(0xcb), L(0x8), L(0xc4), L(0x75), L(0x7e),
|
||||
L(0xc0), L(0xf9), L(0x1c), L(0x7f), L(0x3c), L(0x89), L(0x2f), L(0xd2), L(0x58), L(0x3a), L(0xe2), L(0xf8), L(0x91), L(0xb6),
|
||||
L(0x7b), L(0x24), L(0x27), L(0xe9), L(0xae), L(0x84), L(0x8b), L(0xde), L(0x74), L(0xac), L(0xfd), L(0xd9), L(0xb7), L(0x69),
|
||||
L(0x2a), L(0xec), L(0x32), L(0x6f), L(0xf0), L(0x92), L(0x84), L(0xf1), L(0x40), L(0xc), L(0x8a), L(0xbc), L(0x39), L(0x6e),
|
||||
L(0x2e), L(0x73), L(0xd4), L(0x6e), L(0x8a), L(0x74), L(0x2a), L(0xdc), L(0x60), L(0x1f), L(0xa3), L(0x7), L(0xde), L(0x75),
|
||||
L(0x8b), L(0x74), L(0xc8), L(0xfe), L(0x63), L(0x75), L(0xf6), L(0x3d), L(0x63), L(0xac), L(0x33), L(0x89), L(0xc3), L(0xf0),
|
||||
L(0xf8), L(0x2d), L(0x6b), L(0xb4), L(0x9e), L(0x74), L(0x8b), L(0x5c), L(0x33), L(0xb4), L(0xca), L(0xa8), L(0xe4), L(0x99),
|
||||
L(0xb6), L(0x90), L(0xa1), L(0xef), L(0xf), L(0xd3), L(0x61), L(0xb2), L(0xc6), L(0x1a), L(0x94), L(0x7c), L(0x44), L(0x55),
|
||||
L(0xf4), L(0x45), L(0xff), L(0x9e), L(0xa5), L(0x5a), L(0xc6), L(0xa0), L(0xe8), L(0x2a), L(0xc1), L(0x8d), L(0x6f), L(0x34),
|
||||
L(0x11), L(0xb9), L(0xbe), L(0x4e), L(0xd9), L(0x87), L(0x97), L(0x73), L(0xcf), L(0x3d), L(0x23), L(0xae), L(0xd5), L(0x1a),
|
||||
L(0x5e), L(0xae), L(0x5d), L(0x6a), L(0x3), L(0xf9), L(0x22), L(0xd), L(0x10), L(0xd9), L(0x47), L(0x69), L(0x15), L(0x3f),
|
||||
L(0xee), L(0x52), L(0xa3), L(0x8), L(0xd2), L(0x3c), L(0x51), L(0xf4), L(0xf8), L(0x9d), L(0xe4), L(0x98), L(0x89), L(0xc8),
|
||||
L(0x67), L(0x39), L(0xd5), L(0x5e), L(0x35), L(0x78), L(0x27), L(0xe8), L(0x3c), L(0x80), L(0xae), L(0x79), L(0x71), L(0xd2),
|
||||
L(0x93), L(0xf4), L(0xaa), L(0x51), L(0x12), L(0x1c), L(0x4b), L(0x1b), L(0xe5), L(0x6e), L(0x15), L(0x6f), L(0xe4), L(0xbb),
|
||||
L(0x51), L(0x9b), L(0x45), L(0x9f), L(0xf9), L(0xc4), L(0x8c), L(0x2a), L(0xfb), L(0x1a), L(0xdf), L(0x55), L(0xd3), L(0x48),
|
||||
L(0x93), L(0x27), L(0x1), L(0x26), L(0xc2), L(0x6b), L(0x55), L(0x6d), L(0xa2), L(0xfb), L(0x84), L(0x8b), L(0xc9), L(0x9e),
|
||||
L(0x28), L(0xc2), L(0xef), L(0x1a), L(0x24), L(0xec), L(0x9b), L(0xae), L(0xbd), L(0x60), L(0xe9), L(0x15), L(0x35), L(0xee),
|
||||
L(0x42), L(0xa4), L(0x33), L(0x5b), L(0xfa), L(0xf), L(0xb6), L(0xf7), L(0x1), L(0xa6), L(0x2), L(0x4c), L(0xca), L(0x90),
|
||||
L(0x58), L(0x3a), L(0x96), L(0x41), L(0xe7), L(0xcb), L(0x9), L(0x8c), L(0xdb), L(0x85), L(0x4d), L(0xa8), L(0x89), L(0xf3),
|
||||
L(0xb5), L(0x8e), L(0xfd), L(0x75), L(0x5b), L(0x4f), L(0xed), L(0xde), L(0x3f), L(0xeb), L(0x38), L(0xa3), L(0xbe), L(0xb0),
|
||||
L(0x73), L(0xfc), L(0xb8), L(0x54), L(0xf7), L(0x4c), L(0x30), L(0x67), L(0x2e), L(0x38), L(0xa2), L(0x54), L(0x18), L(0xba),
|
||||
L(0x8), L(0xbf), L(0xf2), L(0x39), L(0xd5), L(0xfe), L(0xa5), L(0x41), L(0xc6), L(0x66), L(0x66), L(0xba), L(0x81), L(0xef),
|
||||
L(0x67), L(0xe4), L(0xe6), L(0x3c), L(0xc), L(0xca), L(0xa4), L(0xa), L(0x79), L(0xb3), L(0x57), L(0x8b), L(0x8a), L(0x75),
|
||||
L(0x98), L(0x18), L(0x42), L(0x2f), L(0x29), L(0xa3), L(0x82), L(0xef), L(0x9f), L(0x86), L(0x6), L(0x23), L(0xe1), L(0x75),
|
||||
L(0xfa), L(0x8), L(0xb1), L(0xde), L(0x17), L(0x4a),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-rand-limit.input",
|
||||
.want = "huffman-rand-limit.{s}.expect",
|
||||
.want_no_input = "huffman-rand-limit.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L(0x61), M(1, 74), L(0xa), L(0xf8), L(0x8b), L(0x96), L(0x76), L(0x48), L(0xa), L(0x85), L(0x94), L(0x25), L(0x80),
|
||||
L(0xaf), L(0xc2), L(0xfe), L(0x8d), L(0xe8), L(0x20), L(0xeb), L(0x17), L(0x86), L(0xc9), L(0xb7), L(0xc5), L(0xde),
|
||||
L(0x6), L(0xea), L(0x7d), L(0x18), L(0x8b), L(0xe7), L(0x3e), L(0x7), L(0xda), L(0xdf), L(0xff), L(0x6c), L(0x73),
|
||||
L(0xde), L(0xcc), L(0xe7), L(0x6d), L(0x8d), L(0x4), L(0x19), L(0x49), L(0x7f), L(0x47), L(0x1f), L(0x48), L(0x15),
|
||||
L(0xb0), L(0xe8), L(0x9e), L(0xf2), L(0x31), L(0x59), L(0xde), L(0x34), L(0xb4), L(0x5b), L(0xe5), L(0xe0), L(0x9),
|
||||
L(0x11), L(0x30), L(0xc2), L(0x88), L(0x5b), L(0x7c), L(0x5d), L(0x14), L(0x13), L(0x6f), L(0x23), L(0xa9), L(0xa),
|
||||
L(0xbc), L(0x2d), L(0x23), L(0xbe), L(0xd9), L(0xed), L(0x75), L(0x4), L(0x6c), L(0x99), L(0xdf), L(0xfd), L(0x70),
|
||||
L(0x66), L(0xe6), L(0xee), L(0xd9), L(0xb1), L(0x9e), L(0x6e), L(0x83), L(0x59), L(0xd5), L(0xd4), L(0x80), L(0x59),
|
||||
L(0x98), L(0x77), L(0x89), L(0x43), L(0x38), L(0xc9), L(0xaf), L(0x30), L(0x32), L(0x9a), L(0x20), L(0x1b), L(0x46),
|
||||
L(0x3d), L(0x67), L(0x6e), L(0xd7), L(0x72), L(0x9e), L(0x4e), L(0x21), L(0x4f), L(0xc6), L(0xe0), L(0xd4), L(0x7b),
|
||||
L(0x4), L(0x8d), L(0xa5), L(0x3), L(0xf6), L(0x5), L(0x9b), L(0x6b), L(0xdc), L(0x2a), L(0x93), L(0x77), L(0x28),
|
||||
L(0xfd), L(0xb4), L(0x62), L(0xda), L(0x20), L(0xe7), L(0x1f), L(0xab), L(0x6b), L(0x51), L(0x43), L(0x39), L(0x2f),
|
||||
L(0xa0), L(0x92), L(0x1), L(0x6c), L(0x75), L(0x3e), L(0xf4), L(0x35), L(0xfd), L(0x43), L(0x2e), L(0xf7), L(0xa4),
|
||||
L(0x75), L(0xda), L(0xea), L(0x9b), L(0xa),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-shifts.input",
|
||||
.want = "huffman-shifts.{s}.expect",
|
||||
.want_no_input = "huffman-shifts.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L('1'), L('0'), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258),
|
||||
M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258),
|
||||
M(2, 258), M(2, 76), L(0xd), L(0xa), L('2'), L('3'), M(2, 258), M(2, 258),
|
||||
M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 256),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-text-shift.input",
|
||||
.want = "huffman-text-shift.{s}.expect",
|
||||
.want_no_input = "huffman-text-shift.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L('/'), L('/'), L('C'), L('o'), L('p'), L('y'), L('r'), L('i'),
|
||||
L('g'), L('h'), L('t'), L('2'), L('0'), L('0'), L('9'), L('T'),
|
||||
L('h'), L('G'), L('o'), L('A'), L('u'), L('t'), L('h'), L('o'),
|
||||
L('r'), L('.'), L('A'), L('l'), L('l'), M(23, 5), L('r'), L('r'),
|
||||
L('v'), L('d'), L('.'), L(0xd), L(0xa), L('/'), L('/'), L('U'),
|
||||
L('o'), L('f'), L('t'), L('h'), L('i'), L('o'), L('u'), L('r'),
|
||||
L('c'), L('c'), L('o'), L('d'), L('i'), L('g'), L('o'), L('v'),
|
||||
L('r'), L('n'), L('d'), L('b'), L('y'), L('B'), L('S'), L('D'),
|
||||
L('-'), L('t'), L('y'), L('l'), M(33, 4), L('l'), L('i'), L('c'),
|
||||
L('n'), L('t'), L('h'), L('t'), L('c'), L('n'), L('b'), L('f'),
|
||||
L('o'), L('u'), L('n'), L('d'), L('i'), L('n'), L('t'), L('h'),
|
||||
L('L'), L('I'), L('C'), L('E'), L('N'), L('S'), L('E'), L('f'),
|
||||
L('i'), L('l'), L('.'), L(0xd), L(0xa), L(0xd), L(0xa), L('p'),
|
||||
L('c'), L('k'), L('g'), L('m'), L('i'), L('n'), M(11, 4), L('i'),
|
||||
L('m'), L('p'), L('o'), L('r'), L('t'), L('"'), L('o'), L('"'),
|
||||
M(13, 4), L('f'), L('u'), L('n'), L('c'), L('m'), L('i'), L('n'),
|
||||
L('('), L(')'), L('{'), L(0xd), L(0xa), L(0x9), L('v'), L('r'),
|
||||
L('b'), L('='), L('m'), L('k'), L('('), L('['), L(']'), L('b'),
|
||||
L('y'), L('t'), L(','), L('6'), L('5'), L('5'), L('3'), L('5'),
|
||||
L(')'), L(0xd), L(0xa), L(0x9), L('f'), L(','), L('_'), L(':'),
|
||||
L('='), L('o'), L('.'), L('C'), L('r'), L('t'), L('('), L('"'),
|
||||
L('h'), L('u'), L('f'), L('f'), L('m'), L('n'), L('-'), L('n'),
|
||||
L('u'), L('l'), L('l'), L('-'), L('m'), L('x'), L('.'), L('i'),
|
||||
L('n'), L('"'), M(34, 5), L('.'), L('W'), L('r'), L('i'), L('t'),
|
||||
L('('), L('b'), L(')'), L(0xd), L(0xa), L('}'), L(0xd), L(0xa),
|
||||
L('A'), L('B'), L('C'), L('D'), L('E'), L('F'), L('G'), L('H'),
|
||||
L('I'), L('J'), L('K'), L('L'), L('M'), L('N'), L('O'), L('P'),
|
||||
L('Q'), L('R'), L('S'), L('T'), L('U'), L('V'), L('X'), L('x'),
|
||||
L('y'), L('z'), L('!'), L('"'), L('#'), L(0xc2), L(0xa4), L('%'),
|
||||
L('&'), L('/'), L('?'), L('"'),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-text.input",
|
||||
.want = "huffman-text.{s}.expect",
|
||||
.want_no_input = "huffman-text.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L('/'), L('/'), L(' '), L('z'), L('i'), L('g'), L(' '), L('v'),
|
||||
L('0'), L('.'), L('1'), L('0'), L('.'), L('0'), L(0xa), L('/'),
|
||||
L('/'), L(' '), L('c'), L('r'), L('e'), L('a'), L('t'), L('e'),
|
||||
L(' '), L('a'), L(' '), L('f'), L('i'), L('l'), L('e'), M(5, 4),
|
||||
L('l'), L('e'), L('d'), L(' '), L('w'), L('i'), L('t'), L('h'),
|
||||
L(' '), L('0'), L('x'), L('0'), L('0'), L(0xa), L('c'), L('o'),
|
||||
L('n'), L('s'), L('t'), L(' '), L('s'), L('t'), L('d'), L(' '),
|
||||
L('='), L(' '), L('@'), L('i'), L('m'), L('p'), L('o'), L('r'),
|
||||
L('t'), L('('), L('"'), L('s'), L('t'), L('d'), L('"'), L(')'),
|
||||
L(';'), L(0xa), L(0xa), L('p'), L('u'), L('b'), L(' '), L('f'),
|
||||
L('n'), L(' '), L('m'), L('a'), L('i'), L('n'), L('('), L(')'),
|
||||
L(' '), L('!'), L('v'), L('o'), L('i'), L('d'), L(' '), L('{'),
|
||||
L(0xa), L(' '), L(' '), L(' '), L(' '), L('v'), L('a'), L('r'),
|
||||
L(' '), L('b'), L(' '), L('='), L(' '), L('['), L('1'), L(']'),
|
||||
L('u'), L('8'), L('{'), L('0'), L('}'), L(' '), L('*'), L('*'),
|
||||
L(' '), L('6'), L('5'), L('5'), L('3'), L('5'), L(';'), M(31, 5),
|
||||
M(86, 6), L('f'), L(' '), L('='), L(' '), L('t'), L('r'), L('y'),
|
||||
M(94, 4), L('.'), L('f'), L('s'), L('.'), L('c'), L('w'), L('d'),
|
||||
L('('), L(')'), L('.'), M(144, 6), L('F'), L('i'), L('l'), L('e'),
|
||||
L('('), M(43, 5), M(1, 4), L('"'), L('h'), L('u'), L('f'), L('f'),
|
||||
L('m'), L('a'), L('n'), L('-'), L('n'), L('u'), L('l'), L('l'),
|
||||
L('-'), L('m'), L('a'), L('x'), L('.'), L('i'), L('n'), L('"'),
|
||||
L(','), M(31, 9), L('.'), L('{'), L(' '), L('.'), L('r'), L('e'),
|
||||
L('a'), L('d'), M(79, 5), L('u'), L('e'), L(' '), L('}'), M(27, 6),
|
||||
L(')'), M(108, 6), L('d'), L('e'), L('f'), L('e'), L('r'), L(' '),
|
||||
L('f'), L('.'), L('c'), L('l'), L('o'), L('s'), L('e'), L('('),
|
||||
M(183, 4), M(22, 4), L('_'), M(124, 7), L('f'), L('.'), L('w'), L('r'),
|
||||
L('i'), L('t'), L('e'), L('A'), L('l'), L('l'), L('('), L('b'),
|
||||
L('['), L('0'), L('.'), L('.'), L(']'), L(')'), L(';'), L(0xa),
|
||||
L('}'), L(0xa),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-zero.input",
|
||||
.want = "huffman-zero.{s}.expect",
|
||||
.want_no_input = "huffman-zero.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{ L(0x30), ml, M(1, 49) },
|
||||
},
|
||||
TestCase{
|
||||
.input = "",
|
||||
.want = "",
|
||||
.want_no_input = "null-long-match.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L(0x0), ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, M(1, 8),
|
||||
},
|
||||
},
|
||||
};
|
||||
};
|
||||
22
lib/std/compress/flate/testdata/fuzz/deflate-stream.expect
vendored
Normal file
22
lib/std/compress/flate/testdata/fuzz/deflate-stream.expect
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
[
|
||||
{ id: "brieflz",
|
||||
name: "BriefLZ",
|
||||
libraryUrl: "https://github.com/jibsen/brieflz",
|
||||
license: "MIT",
|
||||
revision: "bcaa6a1ee7ccf005512b5c23aa92b40cf75f9ed1",
|
||||
codecs: [ { name: "brieflz" } ], },
|
||||
{ id: "brotli",
|
||||
name: "Brotli",
|
||||
libraryUrl: "https://github.com/google/brotli",
|
||||
license: "Apache 2.0",
|
||||
revision: "1dd66ef114fd244778d9dcb5da09c28b49a0df33",
|
||||
codecs: [ { name: "brotli",
|
||||
levels: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
|
||||
streaming: true } ], },
|
||||
{ id: "bsc",
|
||||
name: "bsc",
|
||||
libraryUrl: "http://libbsc.com/",
|
||||
license: "Apache 2.0",
|
||||
revision: "b2b07421381b19b2fada8b291f3cdead10578abc",
|
||||
codecs: [ { name: "bsc" } ] }
|
||||
]
|
||||
3
lib/std/compress/flate/testdata/fuzz/deflate-stream.input
vendored
Normal file
3
lib/std/compress/flate/testdata/fuzz/deflate-stream.input
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
•<EFBFBD>=oÂ0†çò+Ṉ<>„ÄÙÚR»µKƒ}>W!AI@j+þ{
|
||||
¨|ˆª–¼<çóÝóÎFÎx[ÀØ´ž\õ9f;Pë%ü0à§·#®¼iuûñÚV¡¸èûUWDQéûÅÚL°YFïÞtTGç¿U©î†Ÿ_Ž´¥<C2B4>ï|SãQë©D¢ã<M…4)ÊXk%MÂÑe©SdűKØ0û]Cas[vª×ô•¿´;¡É•MSV™ó®ƒÛýJã‚@Nø5EaítJNˆÄY™$Y–[eѤVs…27‰Òܺ8¾¸}wÓ†ª¡.H1ƒ„AÊ`Ê c<>3PW̺¾%½ôuY@ß®éjNž‡ô]$
|
||||
,<ÙôïLŒ4<K¤ˆsa„2Òi«s#•p1ZÒVð4˵Á›™„ýØŽæ£o
|
||||
BIN
lib/std/compress/flate/testdata/fuzz/empty-distance-alphabet01.input
vendored
Normal file
BIN
lib/std/compress/flate/testdata/fuzz/empty-distance-alphabet01.input
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/flate/testdata/fuzz/empty-distance-alphabet02.input
vendored
Normal file
BIN
lib/std/compress/flate/testdata/fuzz/empty-distance-alphabet02.input
vendored
Normal file
Binary file not shown.
1
lib/std/compress/flate/testdata/fuzz/end-of-stream.input
vendored
Normal file
1
lib/std/compress/flate/testdata/fuzz/end-of-stream.input
vendored
Normal file
@@ -0,0 +1 @@
|
||||
武=oツ0<10>0
|
||||
BIN
lib/std/compress/flate/testdata/fuzz/fuzz1.input
vendored
Normal file
BIN
lib/std/compress/flate/testdata/fuzz/fuzz1.input
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/flate/testdata/fuzz/fuzz2.input
vendored
Normal file
BIN
lib/std/compress/flate/testdata/fuzz/fuzz2.input
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/flate/testdata/fuzz/fuzz3.input
vendored
Normal file
BIN
lib/std/compress/flate/testdata/fuzz/fuzz3.input
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/flate/testdata/fuzz/fuzz4.input
vendored
Normal file
BIN
lib/std/compress/flate/testdata/fuzz/fuzz4.input
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/flate/testdata/fuzz/invalid-distance.input
vendored
Normal file
BIN
lib/std/compress/flate/testdata/fuzz/invalid-distance.input
vendored
Normal file
Binary file not shown.
1
lib/std/compress/flate/testdata/fuzz/invalid-tree01.input
vendored
Normal file
1
lib/std/compress/flate/testdata/fuzz/invalid-tree01.input
vendored
Normal file
@@ -0,0 +1 @@
|
||||
000
|
||||
BIN
lib/std/compress/flate/testdata/fuzz/invalid-tree02.input
vendored
Normal file
BIN
lib/std/compress/flate/testdata/fuzz/invalid-tree02.input
vendored
Normal file
Binary file not shown.
BIN
lib/std/compress/flate/testdata/fuzz/invalid-tree03.input
vendored
Normal file
BIN
lib/std/compress/flate/testdata/fuzz/invalid-tree03.input
vendored
Normal file
Binary file not shown.
1
lib/std/compress/flate/testdata/fuzz/lengths-overflow.input
vendored
Normal file
1
lib/std/compress/flate/testdata/fuzz/lengths-overflow.input
vendored
Normal file
@@ -0,0 +1 @@
|
||||
<EFBFBD>$<24><><EFBFBD>9
|
||||
BIN
lib/std/compress/flate/testdata/fuzz/out-of-codes.input
vendored
Normal file
BIN
lib/std/compress/flate/testdata/fuzz/out-of-codes.input
vendored
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user