Merge pull request #24614 from ziglang/flate
std.compress.flate: rework decompression and delete compression
This commit is contained in:
@@ -438,8 +438,6 @@ pub fn GenericWriter(
|
||||
pub const AnyReader = @import("Io/DeprecatedReader.zig");
|
||||
/// Deprecated in favor of `Writer`.
|
||||
pub const AnyWriter = @import("Io/DeprecatedWriter.zig");
|
||||
/// Deprecated in favor of `File.Reader` and `File.Writer`.
|
||||
pub const SeekableStream = @import("Io/seekable_stream.zig").SeekableStream;
|
||||
/// Deprecated in favor of `Writer`.
|
||||
pub const BufferedWriter = @import("Io/buffered_writer.zig").BufferedWriter;
|
||||
/// Deprecated in favor of `Writer`.
|
||||
@@ -467,12 +465,6 @@ pub const CountingReader = @import("Io/counting_reader.zig").CountingReader;
|
||||
/// Deprecated with no replacement; inefficient pattern
|
||||
pub const countingReader = @import("Io/counting_reader.zig").countingReader;
|
||||
|
||||
pub const BitReader = @import("Io/bit_reader.zig").BitReader;
|
||||
pub const bitReader = @import("Io/bit_reader.zig").bitReader;
|
||||
|
||||
pub const BitWriter = @import("Io/bit_writer.zig").BitWriter;
|
||||
pub const bitWriter = @import("Io/bit_writer.zig").bitWriter;
|
||||
|
||||
pub const tty = @import("Io/tty.zig");
|
||||
|
||||
/// Deprecated in favor of `Writer.Discarding`.
|
||||
@@ -948,16 +940,12 @@ pub fn PollFiles(comptime StreamEnum: type) type {
|
||||
|
||||
test {
|
||||
_ = Reader;
|
||||
_ = Reader.Limited;
|
||||
_ = Writer;
|
||||
_ = BitReader;
|
||||
_ = BitWriter;
|
||||
_ = BufferedReader;
|
||||
_ = BufferedWriter;
|
||||
_ = CountingWriter;
|
||||
_ = CountingReader;
|
||||
_ = FixedBufferStream;
|
||||
_ = SeekableStream;
|
||||
_ = tty;
|
||||
_ = @import("Io/test.zig");
|
||||
}
|
||||
|
||||
@@ -74,6 +74,10 @@ pub const VTable = struct {
|
||||
///
|
||||
/// `data` may not contain an alias to `Reader.buffer`.
|
||||
///
|
||||
/// `data` is mutable because the implementation may to temporarily modify
|
||||
/// the fields in order to handle partial reads. Implementations must
|
||||
/// restore the original value before returning.
|
||||
///
|
||||
/// Implementations may ignore `data`, writing directly to `Reader.buffer`,
|
||||
/// modifying `seek` and `end` accordingly, and returning 0 from this
|
||||
/// function. Implementations are encouraged to take advantage of this if
|
||||
@@ -81,7 +85,7 @@ pub const VTable = struct {
|
||||
///
|
||||
/// The default implementation calls `stream` with either `data[0]` or
|
||||
/// `Reader.buffer`, whichever is bigger.
|
||||
readVec: *const fn (r: *Reader, data: []const []u8) Error!usize = defaultReadVec,
|
||||
readVec: *const fn (r: *Reader, data: [][]u8) Error!usize = defaultReadVec,
|
||||
|
||||
/// Ensures `capacity` more data can be buffered without rebasing.
|
||||
///
|
||||
@@ -262,8 +266,7 @@ pub fn streamRemaining(r: *Reader, w: *Writer) StreamRemainingError!usize {
|
||||
/// number of bytes discarded.
|
||||
pub fn discardRemaining(r: *Reader) ShortError!usize {
|
||||
var offset: usize = r.end - r.seek;
|
||||
r.seek = 0;
|
||||
r.end = 0;
|
||||
r.seek = r.end;
|
||||
while (true) {
|
||||
offset += r.vtable.discard(r, .unlimited) catch |err| switch (err) {
|
||||
error.EndOfStream => return offset,
|
||||
@@ -417,7 +420,7 @@ pub fn readVec(r: *Reader, data: [][]u8) Error!usize {
|
||||
}
|
||||
|
||||
/// Writes to `Reader.buffer` or `data`, whichever has larger capacity.
|
||||
pub fn defaultReadVec(r: *Reader, data: []const []u8) Error!usize {
|
||||
pub fn defaultReadVec(r: *Reader, data: [][]u8) Error!usize {
|
||||
assert(r.seek == r.end);
|
||||
r.seek = 0;
|
||||
r.end = 0;
|
||||
@@ -438,23 +441,6 @@ pub fn defaultReadVec(r: *Reader, data: []const []u8) Error!usize {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Always writes to `Reader.buffer` and returns 0.
|
||||
pub fn indirectReadVec(r: *Reader, data: []const []u8) Error!usize {
|
||||
_ = data;
|
||||
assert(r.seek == r.end);
|
||||
var writer: Writer = .{
|
||||
.buffer = r.buffer,
|
||||
.end = r.end,
|
||||
.vtable = &.{ .drain = Writer.fixedDrain },
|
||||
};
|
||||
const limit: Limit = .limited(writer.buffer.len - writer.end);
|
||||
r.end += r.vtable.stream(r, &writer, limit) catch |err| switch (err) {
|
||||
error.WriteFailed => unreachable,
|
||||
else => |e| return e,
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
pub fn buffered(r: *Reader) []u8 {
|
||||
return r.buffer[r.seek..r.end];
|
||||
}
|
||||
@@ -463,8 +449,8 @@ pub fn bufferedLen(r: *const Reader) usize {
|
||||
return r.end - r.seek;
|
||||
}
|
||||
|
||||
pub fn hashed(r: *Reader, hasher: anytype) Hashed(@TypeOf(hasher)) {
|
||||
return .{ .in = r, .hasher = hasher };
|
||||
pub fn hashed(r: *Reader, hasher: anytype, buffer: []u8) Hashed(@TypeOf(hasher)) {
|
||||
return .init(r, hasher, buffer);
|
||||
}
|
||||
|
||||
pub fn readVecAll(r: *Reader, data: [][]u8) Error!void {
|
||||
@@ -539,8 +525,7 @@ pub fn toss(r: *Reader, n: usize) void {
|
||||
|
||||
/// Equivalent to `toss(r.bufferedLen())`.
|
||||
pub fn tossBuffered(r: *Reader) void {
|
||||
r.seek = 0;
|
||||
r.end = 0;
|
||||
r.seek = r.end;
|
||||
}
|
||||
|
||||
/// Equivalent to `peek` followed by `toss`.
|
||||
@@ -627,8 +612,7 @@ pub fn discardShort(r: *Reader, n: usize) ShortError!usize {
|
||||
return n;
|
||||
}
|
||||
var remaining = n - (r.end - r.seek);
|
||||
r.end = 0;
|
||||
r.seek = 0;
|
||||
r.seek = r.end;
|
||||
while (true) {
|
||||
const discard_len = r.vtable.discard(r, .limited(remaining)) catch |err| switch (err) {
|
||||
error.EndOfStream => return n - remaining,
|
||||
@@ -1678,7 +1662,7 @@ fn endingStream(r: *Reader, w: *Writer, limit: Limit) StreamError!usize {
|
||||
return error.EndOfStream;
|
||||
}
|
||||
|
||||
fn endingReadVec(r: *Reader, data: []const []u8) Error!usize {
|
||||
fn endingReadVec(r: *Reader, data: [][]u8) Error!usize {
|
||||
_ = r;
|
||||
_ = data;
|
||||
return error.EndOfStream;
|
||||
@@ -1709,6 +1693,15 @@ fn failingDiscard(r: *Reader, limit: Limit) Error!usize {
|
||||
return error.ReadFailed;
|
||||
}
|
||||
|
||||
pub fn adaptToOldInterface(r: *Reader) std.Io.AnyReader {
|
||||
return .{ .context = r, .readFn = derpRead };
|
||||
}
|
||||
|
||||
fn derpRead(context: *const anyopaque, buffer: []u8) anyerror!usize {
|
||||
const r: *Reader = @constCast(@alignCast(@ptrCast(context)));
|
||||
return r.readSliceShort(buffer);
|
||||
}
|
||||
|
||||
test "readAlloc when the backing reader provides one byte at a time" {
|
||||
const str = "This is a test";
|
||||
var tiny_buffer: [1]u8 = undefined;
|
||||
@@ -1772,15 +1765,16 @@ pub fn Hashed(comptime Hasher: type) type {
|
||||
return struct {
|
||||
in: *Reader,
|
||||
hasher: Hasher,
|
||||
interface: Reader,
|
||||
reader: Reader,
|
||||
|
||||
pub fn init(in: *Reader, hasher: Hasher, buffer: []u8) @This() {
|
||||
return .{
|
||||
.in = in,
|
||||
.hasher = hasher,
|
||||
.interface = .{
|
||||
.reader = .{
|
||||
.vtable = &.{
|
||||
.read = @This().read,
|
||||
.stream = @This().stream,
|
||||
.readVec = @This().readVec,
|
||||
.discard = @This().discard,
|
||||
},
|
||||
.buffer = buffer,
|
||||
@@ -1790,33 +1784,39 @@ pub fn Hashed(comptime Hasher: type) type {
|
||||
};
|
||||
}
|
||||
|
||||
fn read(r: *Reader, w: *Writer, limit: Limit) StreamError!usize {
|
||||
const this: *@This() = @alignCast(@fieldParentPtr("interface", r));
|
||||
const data = w.writableVector(limit);
|
||||
fn stream(r: *Reader, w: *Writer, limit: Limit) StreamError!usize {
|
||||
const this: *@This() = @alignCast(@fieldParentPtr("reader", r));
|
||||
const data = limit.slice(try w.writableSliceGreedy(1));
|
||||
var vec: [1][]u8 = .{data};
|
||||
const n = try this.in.readVec(&vec);
|
||||
this.hasher.update(data[0..n]);
|
||||
w.advance(n);
|
||||
return n;
|
||||
}
|
||||
|
||||
fn readVec(r: *Reader, data: [][]u8) Error!usize {
|
||||
const this: *@This() = @alignCast(@fieldParentPtr("reader", r));
|
||||
const n = try this.in.readVec(data);
|
||||
const result = w.advanceVector(n);
|
||||
var remaining: usize = n;
|
||||
for (data) |slice| {
|
||||
if (remaining < slice.len) {
|
||||
this.hasher.update(slice[0..remaining]);
|
||||
return result;
|
||||
return n;
|
||||
} else {
|
||||
remaining -= slice.len;
|
||||
this.hasher.update(slice);
|
||||
}
|
||||
}
|
||||
assert(remaining == 0);
|
||||
return result;
|
||||
return n;
|
||||
}
|
||||
|
||||
fn discard(r: *Reader, limit: Limit) Error!usize {
|
||||
const this: *@This() = @alignCast(@fieldParentPtr("interface", r));
|
||||
var w = this.hasher.writer(&.{});
|
||||
const n = this.in.stream(&w, limit) catch |err| switch (err) {
|
||||
error.WriteFailed => unreachable,
|
||||
else => |e| return e,
|
||||
};
|
||||
return n;
|
||||
const this: *@This() = @alignCast(@fieldParentPtr("reader", r));
|
||||
const peeked = limit.slice(try this.in.peekGreedy(1));
|
||||
this.hasher.update(peeked);
|
||||
this.in.toss(peeked.len);
|
||||
return peeked.len;
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -1874,3 +1874,7 @@ pub fn writableVectorWsa(
|
||||
}
|
||||
return .{ i, n };
|
||||
}
|
||||
|
||||
test {
|
||||
_ = Limited;
|
||||
}
|
||||
|
||||
@@ -2266,7 +2266,7 @@ pub fn fixedDrain(w: *Writer, data: []const []const u8, splat: usize) Error!usiz
|
||||
const pattern = data[data.len - 1];
|
||||
const dest = w.buffer[w.end..];
|
||||
switch (pattern.len) {
|
||||
0 => return w.end,
|
||||
0 => return 0,
|
||||
1 => {
|
||||
assert(splat >= dest.len);
|
||||
@memset(dest, pattern[0]);
|
||||
@@ -2286,6 +2286,13 @@ pub fn fixedDrain(w: *Writer, data: []const []const u8, splat: usize) Error!usiz
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unreachableDrain(w: *Writer, data: []const []const u8, splat: usize) Error!usize {
|
||||
_ = w;
|
||||
_ = data;
|
||||
_ = splat;
|
||||
unreachable;
|
||||
}
|
||||
|
||||
/// Provides a `Writer` implementation based on calling `Hasher.update`, sending
|
||||
/// all data also to an underlying `Writer`.
|
||||
///
|
||||
@@ -2296,6 +2303,8 @@ pub fn fixedDrain(w: *Writer, data: []const []const u8, splat: usize) Error!usiz
|
||||
/// generic. A better solution will involve creating a writer for each hash
|
||||
/// function, where the splat buffer can be tailored to the hash implementation
|
||||
/// details.
|
||||
///
|
||||
/// Contrast with `Hashing` which terminates the stream pipeline.
|
||||
pub fn Hashed(comptime Hasher: type) type {
|
||||
return struct {
|
||||
out: *Writer,
|
||||
@@ -2341,7 +2350,7 @@ pub fn Hashed(comptime Hasher: type) type {
|
||||
this.hasher.update(slice);
|
||||
}
|
||||
const pattern = data[data.len - 1];
|
||||
assert(remaining == splat * pattern.len);
|
||||
assert(remaining <= splat * pattern.len);
|
||||
switch (pattern.len) {
|
||||
0 => {
|
||||
assert(remaining == 0);
|
||||
@@ -2368,6 +2377,52 @@ pub fn Hashed(comptime Hasher: type) type {
|
||||
};
|
||||
}
|
||||
|
||||
/// Provides a `Writer` implementation based on calling `Hasher.update`,
|
||||
/// discarding all data.
|
||||
///
|
||||
/// This implementation makes suboptimal buffering decisions due to being
|
||||
/// generic. A better solution will involve creating a writer for each hash
|
||||
/// function, where the splat buffer can be tailored to the hash implementation
|
||||
/// details.
|
||||
///
|
||||
/// The total number of bytes written is stored in `hasher`.
|
||||
///
|
||||
/// Contrast with `Hashed` which also passes the data to an underlying stream.
|
||||
pub fn Hashing(comptime Hasher: type) type {
|
||||
return struct {
|
||||
hasher: Hasher,
|
||||
writer: Writer,
|
||||
|
||||
pub fn init(buffer: []u8) @This() {
|
||||
return .initHasher(.init(.{}), buffer);
|
||||
}
|
||||
|
||||
pub fn initHasher(hasher: Hasher, buffer: []u8) @This() {
|
||||
return .{
|
||||
.hasher = hasher,
|
||||
.writer = .{
|
||||
.buffer = buffer,
|
||||
.vtable = &.{ .drain = @This().drain },
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
fn drain(w: *Writer, data: []const []const u8, splat: usize) Error!usize {
|
||||
const this: *@This() = @alignCast(@fieldParentPtr("writer", w));
|
||||
const hasher = &this.hasher;
|
||||
hasher.update(w.buffered());
|
||||
w.end = 0;
|
||||
var n: usize = 0;
|
||||
for (data[0 .. data.len - 1]) |slice| {
|
||||
hasher.update(slice);
|
||||
n += slice.len;
|
||||
}
|
||||
for (0..splat) |_| hasher.update(data[data.len - 1]);
|
||||
return n + splat * data[data.len - 1].len;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Maintains `Writer` state such that it writes to the unused capacity of an
|
||||
/// array list, filling it up completely before making a call through the
|
||||
/// vtable, causing a resize. Consequently, the same, optimized, non-generic
|
||||
|
||||
@@ -1,238 +0,0 @@
|
||||
const std = @import("../std.zig");
|
||||
|
||||
//General note on endianess:
|
||||
//Big endian is packed starting in the most significant part of the byte and subsequent
|
||||
// bytes contain less significant bits. Thus we always take bits from the high
|
||||
// end and place them below existing bits in our output.
|
||||
//Little endian is packed starting in the least significant part of the byte and
|
||||
// subsequent bytes contain more significant bits. Thus we always take bits from
|
||||
// the low end and place them above existing bits in our output.
|
||||
//Regardless of endianess, within any given byte the bits are always in most
|
||||
// to least significant order.
|
||||
//Also regardless of endianess, the buffer always aligns bits to the low end
|
||||
// of the byte.
|
||||
|
||||
/// Creates a bit reader which allows for reading bits from an underlying standard reader
|
||||
pub fn BitReader(comptime endian: std.builtin.Endian, comptime Reader: type) type {
|
||||
return struct {
|
||||
reader: Reader,
|
||||
bits: u8 = 0,
|
||||
count: u4 = 0,
|
||||
|
||||
const low_bit_mask = [9]u8{
|
||||
0b00000000,
|
||||
0b00000001,
|
||||
0b00000011,
|
||||
0b00000111,
|
||||
0b00001111,
|
||||
0b00011111,
|
||||
0b00111111,
|
||||
0b01111111,
|
||||
0b11111111,
|
||||
};
|
||||
|
||||
fn Bits(comptime T: type) type {
|
||||
return struct {
|
||||
T,
|
||||
u16,
|
||||
};
|
||||
}
|
||||
|
||||
fn initBits(comptime T: type, out: anytype, num: u16) Bits(T) {
|
||||
const UT = std.meta.Int(.unsigned, @bitSizeOf(T));
|
||||
return .{
|
||||
@bitCast(@as(UT, @intCast(out))),
|
||||
num,
|
||||
};
|
||||
}
|
||||
|
||||
/// Reads `bits` bits from the reader and returns a specified type
|
||||
/// containing them in the least significant end, returning an error if the
|
||||
/// specified number of bits could not be read.
|
||||
pub fn readBitsNoEof(self: *@This(), comptime T: type, num: u16) !T {
|
||||
const b, const c = try self.readBitsTuple(T, num);
|
||||
if (c < num) return error.EndOfStream;
|
||||
return b;
|
||||
}
|
||||
|
||||
/// Reads `bits` bits from the reader and returns a specified type
|
||||
/// containing them in the least significant end. The number of bits successfully
|
||||
/// read is placed in `out_bits`, as reaching the end of the stream is not an error.
|
||||
pub fn readBits(self: *@This(), comptime T: type, num: u16, out_bits: *u16) !T {
|
||||
const b, const c = try self.readBitsTuple(T, num);
|
||||
out_bits.* = c;
|
||||
return b;
|
||||
}
|
||||
|
||||
/// Reads `bits` bits from the reader and returns a tuple of the specified type
|
||||
/// containing them in the least significant end, and the number of bits successfully
|
||||
/// read. Reaching the end of the stream is not an error.
|
||||
pub fn readBitsTuple(self: *@This(), comptime T: type, num: u16) !Bits(T) {
|
||||
const UT = std.meta.Int(.unsigned, @bitSizeOf(T));
|
||||
const U = if (@bitSizeOf(T) < 8) u8 else UT; //it is a pain to work with <u8
|
||||
|
||||
//dump any bits in our buffer first
|
||||
if (num <= self.count) return initBits(T, self.removeBits(@intCast(num)), num);
|
||||
|
||||
var out_count: u16 = self.count;
|
||||
var out: U = self.removeBits(self.count);
|
||||
|
||||
//grab all the full bytes we need and put their
|
||||
//bits where they belong
|
||||
const full_bytes_left = (num - out_count) / 8;
|
||||
|
||||
for (0..full_bytes_left) |_| {
|
||||
const byte = self.reader.readByte() catch |err| switch (err) {
|
||||
error.EndOfStream => return initBits(T, out, out_count),
|
||||
else => |e| return e,
|
||||
};
|
||||
|
||||
switch (endian) {
|
||||
.big => {
|
||||
if (U == u8) out = 0 else out <<= 8; //shifting u8 by 8 is illegal in Zig
|
||||
out |= byte;
|
||||
},
|
||||
.little => {
|
||||
const pos = @as(U, byte) << @intCast(out_count);
|
||||
out |= pos;
|
||||
},
|
||||
}
|
||||
out_count += 8;
|
||||
}
|
||||
|
||||
const bits_left = num - out_count;
|
||||
const keep = 8 - bits_left;
|
||||
|
||||
if (bits_left == 0) return initBits(T, out, out_count);
|
||||
|
||||
const final_byte = self.reader.readByte() catch |err| switch (err) {
|
||||
error.EndOfStream => return initBits(T, out, out_count),
|
||||
else => |e| return e,
|
||||
};
|
||||
|
||||
switch (endian) {
|
||||
.big => {
|
||||
out <<= @intCast(bits_left);
|
||||
out |= final_byte >> @intCast(keep);
|
||||
self.bits = final_byte & low_bit_mask[keep];
|
||||
},
|
||||
.little => {
|
||||
const pos = @as(U, final_byte & low_bit_mask[bits_left]) << @intCast(out_count);
|
||||
out |= pos;
|
||||
self.bits = final_byte >> @intCast(bits_left);
|
||||
},
|
||||
}
|
||||
|
||||
self.count = @intCast(keep);
|
||||
return initBits(T, out, num);
|
||||
}
|
||||
|
||||
//convenience function for removing bits from
|
||||
//the appropriate part of the buffer based on
|
||||
//endianess.
|
||||
fn removeBits(self: *@This(), num: u4) u8 {
|
||||
if (num == 8) {
|
||||
self.count = 0;
|
||||
return self.bits;
|
||||
}
|
||||
|
||||
const keep = self.count - num;
|
||||
const bits = switch (endian) {
|
||||
.big => self.bits >> @intCast(keep),
|
||||
.little => self.bits & low_bit_mask[num],
|
||||
};
|
||||
switch (endian) {
|
||||
.big => self.bits &= low_bit_mask[keep],
|
||||
.little => self.bits >>= @intCast(num),
|
||||
}
|
||||
|
||||
self.count = keep;
|
||||
return bits;
|
||||
}
|
||||
|
||||
pub fn alignToByte(self: *@This()) void {
|
||||
self.bits = 0;
|
||||
self.count = 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn bitReader(comptime endian: std.builtin.Endian, reader: anytype) BitReader(endian, @TypeOf(reader)) {
|
||||
return .{ .reader = reader };
|
||||
}
|
||||
|
||||
///////////////////////////////
|
||||
|
||||
test "api coverage" {
|
||||
const mem_be = [_]u8{ 0b11001101, 0b00001011 };
|
||||
const mem_le = [_]u8{ 0b00011101, 0b10010101 };
|
||||
|
||||
var mem_in_be = std.io.fixedBufferStream(&mem_be);
|
||||
var bit_stream_be = bitReader(.big, mem_in_be.reader());
|
||||
|
||||
var out_bits: u16 = undefined;
|
||||
|
||||
const expect = std.testing.expect;
|
||||
const expectError = std.testing.expectError;
|
||||
|
||||
try expect(1 == try bit_stream_be.readBits(u2, 1, &out_bits));
|
||||
try expect(out_bits == 1);
|
||||
try expect(2 == try bit_stream_be.readBits(u5, 2, &out_bits));
|
||||
try expect(out_bits == 2);
|
||||
try expect(3 == try bit_stream_be.readBits(u128, 3, &out_bits));
|
||||
try expect(out_bits == 3);
|
||||
try expect(4 == try bit_stream_be.readBits(u8, 4, &out_bits));
|
||||
try expect(out_bits == 4);
|
||||
try expect(5 == try bit_stream_be.readBits(u9, 5, &out_bits));
|
||||
try expect(out_bits == 5);
|
||||
try expect(1 == try bit_stream_be.readBits(u1, 1, &out_bits));
|
||||
try expect(out_bits == 1);
|
||||
|
||||
mem_in_be.pos = 0;
|
||||
bit_stream_be.count = 0;
|
||||
try expect(0b110011010000101 == try bit_stream_be.readBits(u15, 15, &out_bits));
|
||||
try expect(out_bits == 15);
|
||||
|
||||
mem_in_be.pos = 0;
|
||||
bit_stream_be.count = 0;
|
||||
try expect(0b1100110100001011 == try bit_stream_be.readBits(u16, 16, &out_bits));
|
||||
try expect(out_bits == 16);
|
||||
|
||||
_ = try bit_stream_be.readBits(u0, 0, &out_bits);
|
||||
|
||||
try expect(0 == try bit_stream_be.readBits(u1, 1, &out_bits));
|
||||
try expect(out_bits == 0);
|
||||
try expectError(error.EndOfStream, bit_stream_be.readBitsNoEof(u1, 1));
|
||||
|
||||
var mem_in_le = std.io.fixedBufferStream(&mem_le);
|
||||
var bit_stream_le = bitReader(.little, mem_in_le.reader());
|
||||
|
||||
try expect(1 == try bit_stream_le.readBits(u2, 1, &out_bits));
|
||||
try expect(out_bits == 1);
|
||||
try expect(2 == try bit_stream_le.readBits(u5, 2, &out_bits));
|
||||
try expect(out_bits == 2);
|
||||
try expect(3 == try bit_stream_le.readBits(u128, 3, &out_bits));
|
||||
try expect(out_bits == 3);
|
||||
try expect(4 == try bit_stream_le.readBits(u8, 4, &out_bits));
|
||||
try expect(out_bits == 4);
|
||||
try expect(5 == try bit_stream_le.readBits(u9, 5, &out_bits));
|
||||
try expect(out_bits == 5);
|
||||
try expect(1 == try bit_stream_le.readBits(u1, 1, &out_bits));
|
||||
try expect(out_bits == 1);
|
||||
|
||||
mem_in_le.pos = 0;
|
||||
bit_stream_le.count = 0;
|
||||
try expect(0b001010100011101 == try bit_stream_le.readBits(u15, 15, &out_bits));
|
||||
try expect(out_bits == 15);
|
||||
|
||||
mem_in_le.pos = 0;
|
||||
bit_stream_le.count = 0;
|
||||
try expect(0b1001010100011101 == try bit_stream_le.readBits(u16, 16, &out_bits));
|
||||
try expect(out_bits == 16);
|
||||
|
||||
_ = try bit_stream_le.readBits(u0, 0, &out_bits);
|
||||
|
||||
try expect(0 == try bit_stream_le.readBits(u1, 1, &out_bits));
|
||||
try expect(out_bits == 0);
|
||||
try expectError(error.EndOfStream, bit_stream_le.readBitsNoEof(u1, 1));
|
||||
}
|
||||
@@ -1,179 +0,0 @@
|
||||
const std = @import("../std.zig");
|
||||
|
||||
//General note on endianess:
|
||||
//Big endian is packed starting in the most significant part of the byte and subsequent
|
||||
// bytes contain less significant bits. Thus we write out bits from the high end
|
||||
// of our input first.
|
||||
//Little endian is packed starting in the least significant part of the byte and
|
||||
// subsequent bytes contain more significant bits. Thus we write out bits from
|
||||
// the low end of our input first.
|
||||
//Regardless of endianess, within any given byte the bits are always in most
|
||||
// to least significant order.
|
||||
//Also regardless of endianess, the buffer always aligns bits to the low end
|
||||
// of the byte.
|
||||
|
||||
/// Creates a bit writer which allows for writing bits to an underlying standard writer
|
||||
pub fn BitWriter(comptime endian: std.builtin.Endian, comptime Writer: type) type {
|
||||
return struct {
|
||||
writer: Writer,
|
||||
bits: u8 = 0,
|
||||
count: u4 = 0,
|
||||
|
||||
const low_bit_mask = [9]u8{
|
||||
0b00000000,
|
||||
0b00000001,
|
||||
0b00000011,
|
||||
0b00000111,
|
||||
0b00001111,
|
||||
0b00011111,
|
||||
0b00111111,
|
||||
0b01111111,
|
||||
0b11111111,
|
||||
};
|
||||
|
||||
/// Write the specified number of bits to the writer from the least significant bits of
|
||||
/// the specified value. Bits will only be written to the writer when there
|
||||
/// are enough to fill a byte.
|
||||
pub fn writeBits(self: *@This(), value: anytype, num: u16) !void {
|
||||
const T = @TypeOf(value);
|
||||
const UT = std.meta.Int(.unsigned, @bitSizeOf(T));
|
||||
const U = if (@bitSizeOf(T) < 8) u8 else UT; //<u8 is a pain to work with
|
||||
|
||||
var in: U = @as(UT, @bitCast(value));
|
||||
var in_count: u16 = num;
|
||||
|
||||
if (self.count > 0) {
|
||||
//if we can't fill the buffer, add what we have
|
||||
const bits_free = 8 - self.count;
|
||||
if (num < bits_free) {
|
||||
self.addBits(@truncate(in), @intCast(num));
|
||||
return;
|
||||
}
|
||||
|
||||
//finish filling the buffer and flush it
|
||||
if (num == bits_free) {
|
||||
self.addBits(@truncate(in), @intCast(num));
|
||||
return self.flushBits();
|
||||
}
|
||||
|
||||
switch (endian) {
|
||||
.big => {
|
||||
const bits = in >> @intCast(in_count - bits_free);
|
||||
self.addBits(@truncate(bits), bits_free);
|
||||
},
|
||||
.little => {
|
||||
self.addBits(@truncate(in), bits_free);
|
||||
in >>= @intCast(bits_free);
|
||||
},
|
||||
}
|
||||
in_count -= bits_free;
|
||||
try self.flushBits();
|
||||
}
|
||||
|
||||
//write full bytes while we can
|
||||
const full_bytes_left = in_count / 8;
|
||||
for (0..full_bytes_left) |_| {
|
||||
switch (endian) {
|
||||
.big => {
|
||||
const bits = in >> @intCast(in_count - 8);
|
||||
try self.writer.writeByte(@truncate(bits));
|
||||
},
|
||||
.little => {
|
||||
try self.writer.writeByte(@truncate(in));
|
||||
if (U == u8) in = 0 else in >>= 8;
|
||||
},
|
||||
}
|
||||
in_count -= 8;
|
||||
}
|
||||
|
||||
//save the remaining bits in the buffer
|
||||
self.addBits(@truncate(in), @intCast(in_count));
|
||||
}
|
||||
|
||||
//convenience funciton for adding bits to the buffer
|
||||
//in the appropriate position based on endianess
|
||||
fn addBits(self: *@This(), bits: u8, num: u4) void {
|
||||
if (num == 8) self.bits = bits else switch (endian) {
|
||||
.big => {
|
||||
self.bits <<= @intCast(num);
|
||||
self.bits |= bits & low_bit_mask[num];
|
||||
},
|
||||
.little => {
|
||||
const pos = bits << @intCast(self.count);
|
||||
self.bits |= pos;
|
||||
},
|
||||
}
|
||||
self.count += num;
|
||||
}
|
||||
|
||||
/// Flush any remaining bits to the writer, filling
|
||||
/// unused bits with 0s.
|
||||
pub fn flushBits(self: *@This()) !void {
|
||||
if (self.count == 0) return;
|
||||
if (endian == .big) self.bits <<= @intCast(8 - self.count);
|
||||
try self.writer.writeByte(self.bits);
|
||||
self.bits = 0;
|
||||
self.count = 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn bitWriter(comptime endian: std.builtin.Endian, writer: anytype) BitWriter(endian, @TypeOf(writer)) {
|
||||
return .{ .writer = writer };
|
||||
}
|
||||
|
||||
///////////////////////////////
|
||||
|
||||
test "api coverage" {
|
||||
var mem_be = [_]u8{0} ** 2;
|
||||
var mem_le = [_]u8{0} ** 2;
|
||||
|
||||
var mem_out_be = std.io.fixedBufferStream(&mem_be);
|
||||
var bit_stream_be = bitWriter(.big, mem_out_be.writer());
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
try bit_stream_be.writeBits(@as(u2, 1), 1);
|
||||
try bit_stream_be.writeBits(@as(u5, 2), 2);
|
||||
try bit_stream_be.writeBits(@as(u128, 3), 3);
|
||||
try bit_stream_be.writeBits(@as(u8, 4), 4);
|
||||
try bit_stream_be.writeBits(@as(u9, 5), 5);
|
||||
try bit_stream_be.writeBits(@as(u1, 1), 1);
|
||||
|
||||
try testing.expect(mem_be[0] == 0b11001101 and mem_be[1] == 0b00001011);
|
||||
|
||||
mem_out_be.pos = 0;
|
||||
|
||||
try bit_stream_be.writeBits(@as(u15, 0b110011010000101), 15);
|
||||
try bit_stream_be.flushBits();
|
||||
try testing.expect(mem_be[0] == 0b11001101 and mem_be[1] == 0b00001010);
|
||||
|
||||
mem_out_be.pos = 0;
|
||||
try bit_stream_be.writeBits(@as(u32, 0b110011010000101), 16);
|
||||
try testing.expect(mem_be[0] == 0b01100110 and mem_be[1] == 0b10000101);
|
||||
|
||||
try bit_stream_be.writeBits(@as(u0, 0), 0);
|
||||
|
||||
var mem_out_le = std.io.fixedBufferStream(&mem_le);
|
||||
var bit_stream_le = bitWriter(.little, mem_out_le.writer());
|
||||
|
||||
try bit_stream_le.writeBits(@as(u2, 1), 1);
|
||||
try bit_stream_le.writeBits(@as(u5, 2), 2);
|
||||
try bit_stream_le.writeBits(@as(u128, 3), 3);
|
||||
try bit_stream_le.writeBits(@as(u8, 4), 4);
|
||||
try bit_stream_le.writeBits(@as(u9, 5), 5);
|
||||
try bit_stream_le.writeBits(@as(u1, 1), 1);
|
||||
|
||||
try testing.expect(mem_le[0] == 0b00011101 and mem_le[1] == 0b10010101);
|
||||
|
||||
mem_out_le.pos = 0;
|
||||
try bit_stream_le.writeBits(@as(u15, 0b110011010000101), 15);
|
||||
try bit_stream_le.flushBits();
|
||||
try testing.expect(mem_le[0] == 0b10000101 and mem_le[1] == 0b01100110);
|
||||
|
||||
mem_out_le.pos = 0;
|
||||
try bit_stream_le.writeBits(@as(u32, 0b1100110100001011), 16);
|
||||
try testing.expect(mem_le[0] == 0b00001011 and mem_le[1] == 0b11001101);
|
||||
|
||||
try bit_stream_le.writeBits(@as(u0, 0), 0);
|
||||
}
|
||||
@@ -4,8 +4,7 @@ const testing = std.testing;
|
||||
const mem = std.mem;
|
||||
const assert = std.debug.assert;
|
||||
|
||||
/// This turns a byte buffer into an `io.GenericWriter`, `io.GenericReader`, or `io.SeekableStream`.
|
||||
/// If the supplied byte buffer is const, then `io.GenericWriter` is not available.
|
||||
/// Deprecated in favor of `std.Io.Reader.fixed` and `std.Io.Writer.fixed`.
|
||||
pub fn FixedBufferStream(comptime Buffer: type) type {
|
||||
return struct {
|
||||
/// `Buffer` is either a `[]u8` or `[]const u8`.
|
||||
@@ -20,16 +19,6 @@ pub fn FixedBufferStream(comptime Buffer: type) type {
|
||||
pub const Reader = io.GenericReader(*Self, ReadError, read);
|
||||
pub const Writer = io.GenericWriter(*Self, WriteError, write);
|
||||
|
||||
pub const SeekableStream = io.SeekableStream(
|
||||
*Self,
|
||||
SeekError,
|
||||
GetSeekPosError,
|
||||
seekTo,
|
||||
seekBy,
|
||||
getPos,
|
||||
getEndPos,
|
||||
);
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn reader(self: *Self) Reader {
|
||||
@@ -40,10 +29,6 @@ pub fn FixedBufferStream(comptime Buffer: type) type {
|
||||
return .{ .context = self };
|
||||
}
|
||||
|
||||
pub fn seekableStream(self: *Self) SeekableStream {
|
||||
return .{ .context = self };
|
||||
}
|
||||
|
||||
pub fn read(self: *Self, dest: []u8) ReadError!usize {
|
||||
const size = @min(dest.len, self.buffer.len - self.pos);
|
||||
const end = self.pos + size;
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
const std = @import("../std.zig");
|
||||
|
||||
pub fn SeekableStream(
|
||||
comptime Context: type,
|
||||
comptime SeekErrorType: type,
|
||||
comptime GetSeekPosErrorType: type,
|
||||
comptime seekToFn: fn (context: Context, pos: u64) SeekErrorType!void,
|
||||
comptime seekByFn: fn (context: Context, pos: i64) SeekErrorType!void,
|
||||
comptime getPosFn: fn (context: Context) GetSeekPosErrorType!u64,
|
||||
comptime getEndPosFn: fn (context: Context) GetSeekPosErrorType!u64,
|
||||
) type {
|
||||
return struct {
|
||||
context: Context,
|
||||
|
||||
const Self = @This();
|
||||
pub const SeekError = SeekErrorType;
|
||||
pub const GetSeekPosError = GetSeekPosErrorType;
|
||||
|
||||
pub fn seekTo(self: Self, pos: u64) SeekError!void {
|
||||
return seekToFn(self.context, pos);
|
||||
}
|
||||
|
||||
pub fn seekBy(self: Self, amt: i64) SeekError!void {
|
||||
return seekByFn(self.context, amt);
|
||||
}
|
||||
|
||||
pub fn getEndPos(self: Self) GetSeekPosError!u64 {
|
||||
return getEndPosFn(self.context);
|
||||
}
|
||||
|
||||
pub fn getPos(self: Self) GetSeekPosError!u64 {
|
||||
return getPosFn(self.context);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -57,51 +57,6 @@ test "write a file, read it, then delete it" {
|
||||
try tmp.dir.deleteFile(tmp_file_name);
|
||||
}
|
||||
|
||||
test "BitStreams with File Stream" {
|
||||
var tmp = tmpDir(.{});
|
||||
defer tmp.cleanup();
|
||||
|
||||
const tmp_file_name = "temp_test_file.txt";
|
||||
{
|
||||
var file = try tmp.dir.createFile(tmp_file_name, .{});
|
||||
defer file.close();
|
||||
|
||||
var bit_stream = io.bitWriter(native_endian, file.deprecatedWriter());
|
||||
|
||||
try bit_stream.writeBits(@as(u2, 1), 1);
|
||||
try bit_stream.writeBits(@as(u5, 2), 2);
|
||||
try bit_stream.writeBits(@as(u128, 3), 3);
|
||||
try bit_stream.writeBits(@as(u8, 4), 4);
|
||||
try bit_stream.writeBits(@as(u9, 5), 5);
|
||||
try bit_stream.writeBits(@as(u1, 1), 1);
|
||||
try bit_stream.flushBits();
|
||||
}
|
||||
{
|
||||
var file = try tmp.dir.openFile(tmp_file_name, .{});
|
||||
defer file.close();
|
||||
|
||||
var bit_stream = io.bitReader(native_endian, file.deprecatedReader());
|
||||
|
||||
var out_bits: u16 = undefined;
|
||||
|
||||
try expect(1 == try bit_stream.readBits(u2, 1, &out_bits));
|
||||
try expect(out_bits == 1);
|
||||
try expect(2 == try bit_stream.readBits(u5, 2, &out_bits));
|
||||
try expect(out_bits == 2);
|
||||
try expect(3 == try bit_stream.readBits(u128, 3, &out_bits));
|
||||
try expect(out_bits == 3);
|
||||
try expect(4 == try bit_stream.readBits(u8, 4, &out_bits));
|
||||
try expect(out_bits == 4);
|
||||
try expect(5 == try bit_stream.readBits(u9, 5, &out_bits));
|
||||
try expect(out_bits == 5);
|
||||
try expect(1 == try bit_stream.readBits(u1, 1, &out_bits));
|
||||
try expect(out_bits == 1);
|
||||
|
||||
try expectError(error.EndOfStream, bit_stream.readBitsNoEof(u1, 1));
|
||||
}
|
||||
try tmp.dir.deleteFile(tmp_file_name);
|
||||
}
|
||||
|
||||
test "File seek ops" {
|
||||
var tmp = tmpDir(.{});
|
||||
defer tmp.cleanup();
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
//! Compression algorithms.
|
||||
|
||||
/// gzip and zlib are here.
|
||||
pub const flate = @import("compress/flate.zig");
|
||||
pub const gzip = @import("compress/gzip.zig");
|
||||
pub const zlib = @import("compress/zlib.zig");
|
||||
pub const lzma = @import("compress/lzma.zig");
|
||||
pub const lzma2 = @import("compress/lzma2.zig");
|
||||
pub const xz = @import("compress/xz.zig");
|
||||
@@ -14,6 +13,4 @@ test {
|
||||
_ = lzma2;
|
||||
_ = xz;
|
||||
_ = zstd;
|
||||
_ = gzip;
|
||||
_ = zlib;
|
||||
}
|
||||
|
||||
@@ -1,477 +1,180 @@
|
||||
const std = @import("../std.zig");
|
||||
|
||||
/// When decompressing, the output buffer is used as the history window, so
|
||||
/// less than this may result in failure to decompress streams that were
|
||||
/// compressed with a larger window.
|
||||
pub const max_window_len = history_len * 2;
|
||||
|
||||
pub const history_len = 32768;
|
||||
|
||||
/// Deflate is a lossless data compression file format that uses a combination
|
||||
/// of LZ77 and Huffman coding.
|
||||
pub const deflate = @import("flate/deflate.zig");
|
||||
pub const Compress = @import("flate/Compress.zig");
|
||||
|
||||
/// Inflate is the decoding process that takes a Deflate bitstream for
|
||||
/// decompression and correctly produces the original full-size data or file.
|
||||
pub const inflate = @import("flate/inflate.zig");
|
||||
/// Inflate is the decoding process that consumes a Deflate bitstream and
|
||||
/// produces the original full-size data.
|
||||
pub const Decompress = @import("flate/Decompress.zig");
|
||||
|
||||
/// Decompress compressed data from reader and write plain data to the writer.
|
||||
pub fn decompress(reader: anytype, writer: anytype) !void {
|
||||
try inflate.decompress(.raw, reader, writer);
|
||||
}
|
||||
/// Compression without Lempel-Ziv match searching. Faster compression, less
|
||||
/// memory requirements but bigger compressed sizes.
|
||||
pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig");
|
||||
|
||||
/// Decompressor type
|
||||
pub fn Decompressor(comptime ReaderType: type) type {
|
||||
return inflate.Decompressor(.raw, ReaderType);
|
||||
}
|
||||
/// Container of the deflate bit stream body. Container adds header before
|
||||
/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
|
||||
/// no footer, raw bit stream).
|
||||
///
|
||||
/// Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes
|
||||
/// addler 32 checksum.
|
||||
///
|
||||
/// Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes
|
||||
/// crc32 checksum and 4 bytes of uncompressed data length.
|
||||
///
|
||||
/// rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4
|
||||
/// rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5
|
||||
pub const Container = enum {
|
||||
raw, // no header or footer
|
||||
gzip, // gzip header and footer
|
||||
zlib, // zlib header and footer
|
||||
|
||||
/// Create Decompressor which will read compressed data from reader.
|
||||
pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) {
|
||||
return inflate.decompressor(.raw, reader);
|
||||
}
|
||||
|
||||
/// Compression level, trades between speed and compression size.
|
||||
pub const Options = deflate.Options;
|
||||
|
||||
/// Compress plain data from reader and write compressed data to the writer.
|
||||
pub fn compress(reader: anytype, writer: anytype, options: Options) !void {
|
||||
try deflate.compress(.raw, reader, writer, options);
|
||||
}
|
||||
|
||||
/// Compressor type
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.Compressor(.raw, WriterType);
|
||||
}
|
||||
|
||||
/// Create Compressor which outputs compressed data to the writer.
|
||||
pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) {
|
||||
return try deflate.compressor(.raw, writer, options);
|
||||
}
|
||||
|
||||
/// Huffman only compression. Without Lempel-Ziv match searching. Faster
|
||||
/// compression, less memory requirements but bigger compressed sizes.
|
||||
pub const huffman = struct {
|
||||
pub fn compress(reader: anytype, writer: anytype) !void {
|
||||
try deflate.huffman.compress(.raw, reader, writer);
|
||||
pub fn size(w: Container) usize {
|
||||
return headerSize(w) + footerSize(w);
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.huffman.Compressor(.raw, WriterType);
|
||||
pub fn headerSize(w: Container) usize {
|
||||
return header(w).len;
|
||||
}
|
||||
|
||||
pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) {
|
||||
return deflate.huffman.compressor(.raw, writer);
|
||||
}
|
||||
};
|
||||
|
||||
// No compression store only. Compressed size is slightly bigger than plain.
|
||||
pub const store = struct {
|
||||
pub fn compress(reader: anytype, writer: anytype) !void {
|
||||
try deflate.store.compress(.raw, reader, writer);
|
||||
pub fn footerSize(w: Container) usize {
|
||||
return switch (w) {
|
||||
.gzip => 8,
|
||||
.zlib => 4,
|
||||
.raw => 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.store.Compressor(.raw, WriterType);
|
||||
}
|
||||
pub const list = [_]Container{ .raw, .gzip, .zlib };
|
||||
|
||||
pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) {
|
||||
return deflate.store.compressor(.raw, writer);
|
||||
}
|
||||
};
|
||||
|
||||
/// Container defines header/footer around deflate bit stream. Gzip and zlib
|
||||
/// compression algorithms are containers around deflate bit stream body.
|
||||
const Container = @import("flate/container.zig").Container;
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const fixedBufferStream = std.io.fixedBufferStream;
|
||||
const print = std.debug.print;
|
||||
const builtin = @import("builtin");
|
||||
|
||||
test {
|
||||
_ = deflate;
|
||||
_ = inflate;
|
||||
}
|
||||
|
||||
test "compress/decompress" {
|
||||
var cmp_buf: [64 * 1024]u8 = undefined; // compressed data buffer
|
||||
var dcm_buf: [64 * 1024]u8 = undefined; // decompressed data buffer
|
||||
|
||||
const levels = [_]deflate.Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
|
||||
const cases = [_]struct {
|
||||
data: []const u8, // uncompressed content
|
||||
// compressed data sizes per level 4-9
|
||||
gzip_sizes: [levels.len]usize = [_]usize{0} ** levels.len,
|
||||
huffman_only_size: usize = 0,
|
||||
store_size: usize = 0,
|
||||
}{
|
||||
.{
|
||||
.data = @embedFile("flate/testdata/rfc1951.txt"),
|
||||
.gzip_sizes = [_]usize{ 11513, 11217, 11139, 11126, 11122, 11119 },
|
||||
.huffman_only_size = 20287,
|
||||
.store_size = 36967,
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("flate/testdata/fuzz/roundtrip1.input"),
|
||||
.gzip_sizes = [_]usize{ 373, 370, 370, 370, 370, 370 },
|
||||
.huffman_only_size = 393,
|
||||
.store_size = 393,
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("flate/testdata/fuzz/roundtrip2.input"),
|
||||
.gzip_sizes = [_]usize{ 373, 373, 373, 373, 373, 373 },
|
||||
.huffman_only_size = 394,
|
||||
.store_size = 394,
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("flate/testdata/fuzz/deflate-stream.expect"),
|
||||
.gzip_sizes = [_]usize{ 351, 347, 347, 347, 347, 347 },
|
||||
.huffman_only_size = 498,
|
||||
.store_size = 747,
|
||||
},
|
||||
pub const Error = error{
|
||||
BadGzipHeader,
|
||||
BadZlibHeader,
|
||||
WrongGzipChecksum,
|
||||
WrongGzipSize,
|
||||
WrongZlibChecksum,
|
||||
};
|
||||
|
||||
for (cases, 0..) |case, case_no| { // for each case
|
||||
const data = case.data;
|
||||
pub fn header(container: Container) []const u8 {
|
||||
return switch (container) {
|
||||
// GZIP 10 byte header (https://datatracker.ietf.org/doc/html/rfc1952#page-5):
|
||||
// - ID1 (IDentification 1), always 0x1f
|
||||
// - ID2 (IDentification 2), always 0x8b
|
||||
// - CM (Compression Method), always 8 = deflate
|
||||
// - FLG (Flags), all set to 0
|
||||
// - 4 bytes, MTIME (Modification time), not used, all set to zero
|
||||
// - XFL (eXtra FLags), all set to zero
|
||||
// - OS (Operating System), 03 = Unix
|
||||
.gzip => &[_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 },
|
||||
// ZLIB has a two-byte header (https://datatracker.ietf.org/doc/html/rfc1950#page-4):
|
||||
// 1st byte:
|
||||
// - First four bits is the CINFO (compression info), which is 7 for the default deflate window size.
|
||||
// - The next four bits is the CM (compression method), which is 8 for deflate.
|
||||
// 2nd byte:
|
||||
// - Two bits is the FLEVEL (compression level). Values are: 0=fastest, 1=fast, 2=default, 3=best.
|
||||
// - The next bit, FDICT, is set if a dictionary is given.
|
||||
// - The final five FCHECK bits form a mod-31 checksum.
|
||||
//
|
||||
// CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100
|
||||
.zlib => &[_]u8{ 0x78, 0b10_0_11100 },
|
||||
.raw => &.{},
|
||||
};
|
||||
}
|
||||
|
||||
for (levels, 0..) |level, i| { // for each compression level
|
||||
pub const Hasher = union(Container) {
|
||||
raw: void,
|
||||
gzip: struct {
|
||||
crc: std.hash.Crc32 = .init(),
|
||||
count: u32 = 0,
|
||||
},
|
||||
zlib: std.hash.Adler32,
|
||||
|
||||
inline for (Container.list) |container| { // for each wrapping
|
||||
var compressed_size: usize = if (case.gzip_sizes[i] > 0)
|
||||
case.gzip_sizes[i] - Container.gzip.size() + container.size()
|
||||
else
|
||||
0;
|
||||
|
||||
// compress original stream to compressed stream
|
||||
{
|
||||
var original = fixedBufferStream(data);
|
||||
var compressed = fixedBufferStream(&cmp_buf);
|
||||
try deflate.compress(container, original.reader(), compressed.writer(), .{ .level = level });
|
||||
if (compressed_size == 0) {
|
||||
if (container == .gzip)
|
||||
print("case {d} gzip level {} compressed size: {d}\n", .{ case_no, level, compressed.pos });
|
||||
compressed_size = compressed.pos;
|
||||
}
|
||||
try testing.expectEqual(compressed_size, compressed.pos);
|
||||
}
|
||||
// decompress compressed stream to decompressed stream
|
||||
{
|
||||
var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
|
||||
var decompressed = fixedBufferStream(&dcm_buf);
|
||||
try inflate.decompress(container, compressed.reader(), decompressed.writer());
|
||||
try testing.expectEqualSlices(u8, data, decompressed.getWritten());
|
||||
}
|
||||
|
||||
// compressor writer interface
|
||||
{
|
||||
var compressed = fixedBufferStream(&cmp_buf);
|
||||
var cmp = try deflate.compressor(container, compressed.writer(), .{ .level = level });
|
||||
var cmp_wrt = cmp.writer();
|
||||
try cmp_wrt.writeAll(data);
|
||||
try cmp.finish();
|
||||
|
||||
try testing.expectEqual(compressed_size, compressed.pos);
|
||||
}
|
||||
// decompressor reader interface
|
||||
{
|
||||
var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
|
||||
var dcm = inflate.decompressor(container, compressed.reader());
|
||||
var dcm_rdr = dcm.reader();
|
||||
const n = try dcm_rdr.readAll(&dcm_buf);
|
||||
try testing.expectEqual(data.len, n);
|
||||
try testing.expectEqualSlices(u8, data, dcm_buf[0..n]);
|
||||
}
|
||||
}
|
||||
pub fn init(containter: Container) Hasher {
|
||||
return switch (containter) {
|
||||
.gzip => .{ .gzip = .{} },
|
||||
.zlib => .{ .zlib = .{} },
|
||||
.raw => .raw,
|
||||
};
|
||||
}
|
||||
// huffman only compression
|
||||
{
|
||||
inline for (Container.list) |container| { // for each wrapping
|
||||
var compressed_size: usize = if (case.huffman_only_size > 0)
|
||||
case.huffman_only_size - Container.gzip.size() + container.size()
|
||||
else
|
||||
0;
|
||||
|
||||
// compress original stream to compressed stream
|
||||
{
|
||||
var original = fixedBufferStream(data);
|
||||
var compressed = fixedBufferStream(&cmp_buf);
|
||||
var cmp = try deflate.huffman.compressor(container, compressed.writer());
|
||||
try cmp.compress(original.reader());
|
||||
try cmp.finish();
|
||||
if (compressed_size == 0) {
|
||||
if (container == .gzip)
|
||||
print("case {d} huffman only compressed size: {d}\n", .{ case_no, compressed.pos });
|
||||
compressed_size = compressed.pos;
|
||||
}
|
||||
try testing.expectEqual(compressed_size, compressed.pos);
|
||||
}
|
||||
// decompress compressed stream to decompressed stream
|
||||
{
|
||||
var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
|
||||
var decompressed = fixedBufferStream(&dcm_buf);
|
||||
try inflate.decompress(container, compressed.reader(), decompressed.writer());
|
||||
try testing.expectEqualSlices(u8, data, decompressed.getWritten());
|
||||
}
|
||||
pub fn container(h: Hasher) Container {
|
||||
return h;
|
||||
}
|
||||
|
||||
pub fn update(h: *Hasher, buf: []const u8) void {
|
||||
switch (h.*) {
|
||||
.raw => {},
|
||||
.gzip => |*gzip| {
|
||||
gzip.update(buf);
|
||||
gzip.count +%= buf.len;
|
||||
},
|
||||
.zlib => |*zlib| {
|
||||
zlib.update(buf);
|
||||
},
|
||||
inline .gzip, .zlib => |*x| x.update(buf),
|
||||
}
|
||||
}
|
||||
|
||||
// store only
|
||||
{
|
||||
inline for (Container.list) |container| { // for each wrapping
|
||||
var compressed_size: usize = if (case.store_size > 0)
|
||||
case.store_size - Container.gzip.size() + container.size()
|
||||
else
|
||||
0;
|
||||
pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void {
|
||||
var bits: [4]u8 = undefined;
|
||||
switch (hasher.*) {
|
||||
.gzip => |*gzip| {
|
||||
// GZIP 8 bytes footer
|
||||
// - 4 bytes, CRC32 (CRC-32)
|
||||
// - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
|
||||
std.mem.writeInt(u32, &bits, gzip.final(), .little);
|
||||
try writer.writeAll(&bits);
|
||||
|
||||
// compress original stream to compressed stream
|
||||
{
|
||||
var original = fixedBufferStream(data);
|
||||
var compressed = fixedBufferStream(&cmp_buf);
|
||||
var cmp = try deflate.store.compressor(container, compressed.writer());
|
||||
try cmp.compress(original.reader());
|
||||
try cmp.finish();
|
||||
if (compressed_size == 0) {
|
||||
if (container == .gzip)
|
||||
print("case {d} store only compressed size: {d}\n", .{ case_no, compressed.pos });
|
||||
compressed_size = compressed.pos;
|
||||
}
|
||||
|
||||
try testing.expectEqual(compressed_size, compressed.pos);
|
||||
}
|
||||
// decompress compressed stream to decompressed stream
|
||||
{
|
||||
var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
|
||||
var decompressed = fixedBufferStream(&dcm_buf);
|
||||
try inflate.decompress(container, compressed.reader(), decompressed.writer());
|
||||
try testing.expectEqualSlices(u8, data, decompressed.getWritten());
|
||||
}
|
||||
std.mem.writeInt(u32, &bits, gzip.bytes_read, .little);
|
||||
try writer.writeAll(&bits);
|
||||
},
|
||||
.zlib => |*zlib| {
|
||||
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
|
||||
// 4 bytes of ADLER32 (Adler-32 checksum)
|
||||
// Checksum value of the uncompressed data (excluding any
|
||||
// dictionary data) computed according to Adler-32
|
||||
// algorithm.
|
||||
std.mem.writeInt(u32, &bits, zlib.final, .big);
|
||||
try writer.writeAll(&bits);
|
||||
},
|
||||
.raw => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
fn testDecompress(comptime container: Container, compressed: []const u8, expected_plain: []const u8) !void {
|
||||
var in = fixedBufferStream(compressed);
|
||||
var out = std.ArrayList(u8).init(testing.allocator);
|
||||
defer out.deinit();
|
||||
pub const Metadata = union(Container) {
|
||||
raw: void,
|
||||
gzip: struct {
|
||||
crc: u32 = 0,
|
||||
count: u32 = 0,
|
||||
},
|
||||
zlib: struct {
|
||||
adler: u32 = 0,
|
||||
},
|
||||
|
||||
try inflate.decompress(container, in.reader(), out.writer());
|
||||
try testing.expectEqualSlices(u8, expected_plain, out.items);
|
||||
}
|
||||
|
||||
test "don't read past deflate stream's end" {
|
||||
try testDecompress(.zlib, &[_]u8{
|
||||
0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff,
|
||||
0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01,
|
||||
0x83, 0x95, 0x0b, 0xf5,
|
||||
}, &[_]u8{
|
||||
0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
|
||||
0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
|
||||
0x00, 0x00, 0xff, 0xff, 0xff,
|
||||
});
|
||||
}
|
||||
|
||||
test "zlib header" {
|
||||
// Truncated header
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.zlib, &[_]u8{0x78}, ""),
|
||||
);
|
||||
// Wrong CM
|
||||
try testing.expectError(
|
||||
error.BadZlibHeader,
|
||||
testDecompress(.zlib, &[_]u8{ 0x79, 0x94 }, ""),
|
||||
);
|
||||
// Wrong CINFO
|
||||
try testing.expectError(
|
||||
error.BadZlibHeader,
|
||||
testDecompress(.zlib, &[_]u8{ 0x88, 0x98 }, ""),
|
||||
);
|
||||
// Wrong checksum
|
||||
try testing.expectError(
|
||||
error.WrongZlibChecksum,
|
||||
testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
|
||||
);
|
||||
// Truncated checksum
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
|
||||
);
|
||||
}
|
||||
|
||||
test "gzip header" {
|
||||
// Truncated header
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.gzip, &[_]u8{ 0x1f, 0x8B }, undefined),
|
||||
);
|
||||
// Wrong CM
|
||||
try testing.expectError(
|
||||
error.BadGzipHeader,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03,
|
||||
}, undefined),
|
||||
);
|
||||
|
||||
// Wrong checksum
|
||||
try testing.expectError(
|
||||
error.WrongGzipChecksum,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
}, undefined),
|
||||
);
|
||||
// Truncated checksum
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00,
|
||||
}, undefined),
|
||||
);
|
||||
// Wrong initial size
|
||||
try testing.expectError(
|
||||
error.WrongGzipSize,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x01,
|
||||
}, undefined),
|
||||
);
|
||||
// Truncated initial size field
|
||||
try testing.expectError(
|
||||
error.EndOfStream,
|
||||
testDecompress(.gzip, &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00,
|
||||
}, undefined),
|
||||
);
|
||||
|
||||
try testDecompress(.gzip, &[_]u8{
|
||||
// GZIP header
|
||||
0x1f, 0x8b, 0x08, 0x12, 0x00, 0x09, 0x6e, 0x88, 0x00, 0xff, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00,
|
||||
// header.FHCRC (should cover entire header)
|
||||
0x99, 0xd6,
|
||||
// GZIP data
|
||||
0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
}, "");
|
||||
}
|
||||
|
||||
test "public interface" {
|
||||
const plain_data = [_]u8{ 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a };
|
||||
|
||||
// deflate final stored block, header + plain (stored) data
|
||||
const deflate_block = [_]u8{
|
||||
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
|
||||
} ++ plain_data;
|
||||
|
||||
// gzip header/footer + deflate block
|
||||
const gzip_data =
|
||||
[_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 } ++ // gzip header (10 bytes)
|
||||
deflate_block ++
|
||||
[_]u8{ 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00 }; // gzip footer checksum (4 byte), size (4 bytes)
|
||||
|
||||
// zlib header/footer + deflate block
|
||||
const zlib_data = [_]u8{ 0x78, 0b10_0_11100 } ++ // zlib header (2 bytes)}
|
||||
deflate_block ++
|
||||
[_]u8{ 0x1c, 0xf2, 0x04, 0x47 }; // zlib footer: checksum
|
||||
|
||||
const gzip = @import("gzip.zig");
|
||||
const zlib = @import("zlib.zig");
|
||||
const flate = @This();
|
||||
|
||||
try testInterface(gzip, &gzip_data, &plain_data);
|
||||
try testInterface(zlib, &zlib_data, &plain_data);
|
||||
try testInterface(flate, &deflate_block, &plain_data);
|
||||
}
|
||||
|
||||
fn testInterface(comptime pkg: type, gzip_data: []const u8, plain_data: []const u8) !void {
|
||||
var buffer1: [64]u8 = undefined;
|
||||
var buffer2: [64]u8 = undefined;
|
||||
|
||||
var compressed = fixedBufferStream(&buffer1);
|
||||
var plain = fixedBufferStream(&buffer2);
|
||||
|
||||
// decompress
|
||||
{
|
||||
var in = fixedBufferStream(gzip_data);
|
||||
try pkg.decompress(in.reader(), plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
}
|
||||
plain.reset();
|
||||
compressed.reset();
|
||||
|
||||
// compress/decompress
|
||||
{
|
||||
var in = fixedBufferStream(plain_data);
|
||||
try pkg.compress(in.reader(), compressed.writer(), .{});
|
||||
compressed.reset();
|
||||
try pkg.decompress(compressed.reader(), plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
}
|
||||
plain.reset();
|
||||
compressed.reset();
|
||||
|
||||
// compressor/decompressor
|
||||
{
|
||||
var in = fixedBufferStream(plain_data);
|
||||
var cmp = try pkg.compressor(compressed.writer(), .{});
|
||||
try cmp.compress(in.reader());
|
||||
try cmp.finish();
|
||||
|
||||
compressed.reset();
|
||||
var dcp = pkg.decompressor(compressed.reader());
|
||||
try dcp.decompress(plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
}
|
||||
plain.reset();
|
||||
compressed.reset();
|
||||
|
||||
// huffman
|
||||
{
|
||||
// huffman compress/decompress
|
||||
{
|
||||
var in = fixedBufferStream(plain_data);
|
||||
try pkg.huffman.compress(in.reader(), compressed.writer());
|
||||
compressed.reset();
|
||||
try pkg.decompress(compressed.reader(), plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
pub fn init(containter: Container) Metadata {
|
||||
return switch (containter) {
|
||||
.gzip => .{ .gzip = .{} },
|
||||
.zlib => .{ .zlib = .{} },
|
||||
.raw => .raw,
|
||||
};
|
||||
}
|
||||
plain.reset();
|
||||
compressed.reset();
|
||||
|
||||
// huffman compressor/decompressor
|
||||
{
|
||||
var in = fixedBufferStream(plain_data);
|
||||
var cmp = try pkg.huffman.compressor(compressed.writer());
|
||||
try cmp.compress(in.reader());
|
||||
try cmp.finish();
|
||||
|
||||
compressed.reset();
|
||||
try pkg.decompress(compressed.reader(), plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
pub fn container(m: Metadata) Container {
|
||||
return m;
|
||||
}
|
||||
}
|
||||
plain.reset();
|
||||
compressed.reset();
|
||||
};
|
||||
};
|
||||
|
||||
// store
|
||||
{
|
||||
// store compress/decompress
|
||||
{
|
||||
var in = fixedBufferStream(plain_data);
|
||||
try pkg.store.compress(in.reader(), compressed.writer());
|
||||
compressed.reset();
|
||||
try pkg.decompress(compressed.reader(), plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
}
|
||||
plain.reset();
|
||||
compressed.reset();
|
||||
|
||||
// store compressor/decompressor
|
||||
{
|
||||
var in = fixedBufferStream(plain_data);
|
||||
var cmp = try pkg.store.compressor(compressed.writer());
|
||||
try cmp.compress(in.reader());
|
||||
try cmp.finish();
|
||||
|
||||
compressed.reset();
|
||||
try pkg.decompress(compressed.reader(), plain.writer());
|
||||
try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
|
||||
}
|
||||
}
|
||||
test {
|
||||
_ = HuffmanEncoder;
|
||||
_ = Compress;
|
||||
_ = Decompress;
|
||||
}
|
||||
|
||||
592
lib/std/compress/flate/BlockWriter.zig
Normal file
592
lib/std/compress/flate/BlockWriter.zig
Normal file
@@ -0,0 +1,592 @@
|
||||
//! Accepts list of tokens, decides what is best block type to write. What block
|
||||
//! type will provide best compression. Writes header and body of the block.
|
||||
const std = @import("std");
|
||||
const io = std.io;
|
||||
const assert = std.debug.assert;
|
||||
const Writer = std.io.Writer;
|
||||
|
||||
const BlockWriter = @This();
|
||||
const flate = @import("../flate.zig");
|
||||
const Compress = flate.Compress;
|
||||
const HuffmanEncoder = flate.HuffmanEncoder;
|
||||
const Token = @import("Token.zig");
|
||||
|
||||
const codegen_order = HuffmanEncoder.codegen_order;
|
||||
const end_code_mark = 255;
|
||||
|
||||
output: *Writer,
|
||||
|
||||
codegen_freq: [HuffmanEncoder.codegen_code_count]u16,
|
||||
literal_freq: [HuffmanEncoder.max_num_lit]u16,
|
||||
distance_freq: [HuffmanEncoder.distance_code_count]u16,
|
||||
codegen: [HuffmanEncoder.max_num_lit + HuffmanEncoder.distance_code_count + 1]u8,
|
||||
literal_encoding: HuffmanEncoder,
|
||||
distance_encoding: HuffmanEncoder,
|
||||
codegen_encoding: HuffmanEncoder,
|
||||
fixed_literal_encoding: HuffmanEncoder,
|
||||
fixed_distance_encoding: HuffmanEncoder,
|
||||
huff_distance: HuffmanEncoder,
|
||||
|
||||
fixed_literal_codes: [HuffmanEncoder.max_num_frequencies]HuffmanEncoder.Code,
|
||||
fixed_distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code,
|
||||
distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code,
|
||||
|
||||
pub fn init(output: *Writer) BlockWriter {
|
||||
return .{
|
||||
.output = output,
|
||||
.codegen_freq = undefined,
|
||||
.literal_freq = undefined,
|
||||
.distance_freq = undefined,
|
||||
.codegen = undefined,
|
||||
.literal_encoding = undefined,
|
||||
.distance_encoding = undefined,
|
||||
.codegen_encoding = undefined,
|
||||
.fixed_literal_encoding = undefined,
|
||||
.fixed_distance_encoding = undefined,
|
||||
.huff_distance = undefined,
|
||||
.fixed_literal_codes = undefined,
|
||||
.fixed_distance_codes = undefined,
|
||||
.distance_codes = undefined,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn initBuffers(bw: *BlockWriter) void {
|
||||
bw.fixed_literal_encoding = .fixedLiteralEncoder(&bw.fixed_literal_codes);
|
||||
bw.fixed_distance_encoding = .fixedDistanceEncoder(&bw.fixed_distance_codes);
|
||||
bw.huff_distance = .huffmanDistanceEncoder(&bw.distance_codes);
|
||||
}
|
||||
|
||||
/// Flush intrenal bit buffer to the writer.
|
||||
/// Should be called only when bit stream is at byte boundary.
|
||||
///
|
||||
/// That is after final block; when last byte could be incomplete or
|
||||
/// after stored block; which is aligned to the byte boundary (it has x
|
||||
/// padding bits after first 3 bits).
|
||||
pub fn flush(self: *BlockWriter) Writer.Error!void {
|
||||
try self.bit_writer.flush();
|
||||
}
|
||||
|
||||
fn writeCode(self: *BlockWriter, c: Compress.HuffCode) Writer.Error!void {
|
||||
try self.bit_writer.writeBits(c.code, c.len);
|
||||
}
|
||||
|
||||
/// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
|
||||
/// the literal and distance lengths arrays (which are concatenated into a single
|
||||
/// array). This method generates that run-length encoding.
|
||||
///
|
||||
/// The result is written into the codegen array, and the frequencies
|
||||
/// of each code is written into the codegen_freq array.
|
||||
/// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
|
||||
/// information. Code bad_code is an end marker
|
||||
///
|
||||
/// num_literals: The number of literals in literal_encoding
|
||||
/// num_distances: The number of distances in distance_encoding
|
||||
/// lit_enc: The literal encoder to use
|
||||
/// dist_enc: The distance encoder to use
|
||||
fn generateCodegen(
|
||||
self: *BlockWriter,
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
lit_enc: *Compress.LiteralEncoder,
|
||||
dist_enc: *Compress.DistanceEncoder,
|
||||
) void {
|
||||
for (self.codegen_freq, 0..) |_, i| {
|
||||
self.codegen_freq[i] = 0;
|
||||
}
|
||||
|
||||
// Note that we are using codegen both as a temporary variable for holding
|
||||
// a copy of the frequencies, and as the place where we put the result.
|
||||
// This is fine because the output is always shorter than the input used
|
||||
// so far.
|
||||
var codegen = &self.codegen; // cache
|
||||
// Copy the concatenated code sizes to codegen. Put a marker at the end.
|
||||
var cgnl = codegen[0..num_literals];
|
||||
for (cgnl, 0..) |_, i| {
|
||||
cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len));
|
||||
}
|
||||
|
||||
cgnl = codegen[num_literals .. num_literals + num_distances];
|
||||
for (cgnl, 0..) |_, i| {
|
||||
cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len));
|
||||
}
|
||||
codegen[num_literals + num_distances] = end_code_mark;
|
||||
|
||||
var size = codegen[0];
|
||||
var count: i32 = 1;
|
||||
var out_index: u32 = 0;
|
||||
var in_index: u32 = 1;
|
||||
while (size != end_code_mark) : (in_index += 1) {
|
||||
// INVARIANT: We have seen "count" copies of size that have not yet
|
||||
// had output generated for them.
|
||||
const next_size = codegen[in_index];
|
||||
if (next_size == size) {
|
||||
count += 1;
|
||||
continue;
|
||||
}
|
||||
// We need to generate codegen indicating "count" of size.
|
||||
if (size != 0) {
|
||||
codegen[out_index] = size;
|
||||
out_index += 1;
|
||||
self.codegen_freq[size] += 1;
|
||||
count -= 1;
|
||||
while (count >= 3) {
|
||||
var n: i32 = 6;
|
||||
if (n > count) {
|
||||
n = count;
|
||||
}
|
||||
codegen[out_index] = 16;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(n - 3));
|
||||
out_index += 1;
|
||||
self.codegen_freq[16] += 1;
|
||||
count -= n;
|
||||
}
|
||||
} else {
|
||||
while (count >= 11) {
|
||||
var n: i32 = 138;
|
||||
if (n > count) {
|
||||
n = count;
|
||||
}
|
||||
codegen[out_index] = 18;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(n - 11));
|
||||
out_index += 1;
|
||||
self.codegen_freq[18] += 1;
|
||||
count -= n;
|
||||
}
|
||||
if (count >= 3) {
|
||||
// 3 <= count <= 10
|
||||
codegen[out_index] = 17;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(count - 3));
|
||||
out_index += 1;
|
||||
self.codegen_freq[17] += 1;
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
count -= 1;
|
||||
while (count >= 0) : (count -= 1) {
|
||||
codegen[out_index] = size;
|
||||
out_index += 1;
|
||||
self.codegen_freq[size] += 1;
|
||||
}
|
||||
// Set up invariant for next time through the loop.
|
||||
size = next_size;
|
||||
count = 1;
|
||||
}
|
||||
// Marker indicating the end of the codegen.
|
||||
codegen[out_index] = end_code_mark;
|
||||
}
|
||||
|
||||
const DynamicSize = struct {
|
||||
size: u32,
|
||||
num_codegens: u32,
|
||||
};
|
||||
|
||||
/// dynamicSize returns the size of dynamically encoded data in bits.
|
||||
fn dynamicSize(
|
||||
self: *BlockWriter,
|
||||
lit_enc: *Compress.LiteralEncoder, // literal encoder
|
||||
dist_enc: *Compress.DistanceEncoder, // distance encoder
|
||||
extra_bits: u32,
|
||||
) DynamicSize {
|
||||
var num_codegens = self.codegen_freq.len;
|
||||
while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) {
|
||||
num_codegens -= 1;
|
||||
}
|
||||
const header = 3 + 5 + 5 + 4 + (3 * num_codegens) +
|
||||
self.codegen_encoding.bitLength(self.codegen_freq[0..]) +
|
||||
self.codegen_freq[16] * 2 +
|
||||
self.codegen_freq[17] * 3 +
|
||||
self.codegen_freq[18] * 7;
|
||||
const size = header +
|
||||
lit_enc.bitLength(&self.literal_freq) +
|
||||
dist_enc.bitLength(&self.distance_freq) +
|
||||
extra_bits;
|
||||
|
||||
return DynamicSize{
|
||||
.size = @as(u32, @intCast(size)),
|
||||
.num_codegens = @as(u32, @intCast(num_codegens)),
|
||||
};
|
||||
}
|
||||
|
||||
/// fixedSize returns the size of dynamically encoded data in bits.
|
||||
fn fixedSize(self: *BlockWriter, extra_bits: u32) u32 {
|
||||
return 3 +
|
||||
self.fixed_literal_encoding.bitLength(&self.literal_freq) +
|
||||
self.fixed_distance_encoding.bitLength(&self.distance_freq) +
|
||||
extra_bits;
|
||||
}
|
||||
|
||||
const StoredSize = struct {
|
||||
size: u32,
|
||||
storable: bool,
|
||||
};
|
||||
|
||||
/// storedSizeFits calculates the stored size, including header.
|
||||
/// The function returns the size in bits and whether the block
|
||||
/// fits inside a single block.
|
||||
fn storedSizeFits(in: ?[]const u8) StoredSize {
|
||||
if (in == null) {
|
||||
return .{ .size = 0, .storable = false };
|
||||
}
|
||||
if (in.?.len <= HuffmanEncoder.max_store_block_size) {
|
||||
return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true };
|
||||
}
|
||||
return .{ .size = 0, .storable = false };
|
||||
}
|
||||
|
||||
/// Write the header of a dynamic Huffman block to the output stream.
|
||||
///
|
||||
/// num_literals: The number of literals specified in codegen
|
||||
/// num_distances: The number of distances specified in codegen
|
||||
/// num_codegens: The number of codegens used in codegen
|
||||
/// eof: Is it the end-of-file? (end of stream)
|
||||
fn dynamicHeader(
|
||||
self: *BlockWriter,
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
num_codegens: u32,
|
||||
eof: bool,
|
||||
) Writer.Error!void {
|
||||
const first_bits: u32 = if (eof) 5 else 4;
|
||||
try self.bit_writer.writeBits(first_bits, 3);
|
||||
try self.bit_writer.writeBits(num_literals - 257, 5);
|
||||
try self.bit_writer.writeBits(num_distances - 1, 5);
|
||||
try self.bit_writer.writeBits(num_codegens - 4, 4);
|
||||
|
||||
var i: u32 = 0;
|
||||
while (i < num_codegens) : (i += 1) {
|
||||
const value = self.codegen_encoding.codes[codegen_order[i]].len;
|
||||
try self.bit_writer.writeBits(value, 3);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (true) {
|
||||
const code_word: u32 = @as(u32, @intCast(self.codegen[i]));
|
||||
i += 1;
|
||||
if (code_word == end_code_mark) {
|
||||
break;
|
||||
}
|
||||
try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]);
|
||||
|
||||
switch (code_word) {
|
||||
16 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 2);
|
||||
i += 1;
|
||||
},
|
||||
17 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 3);
|
||||
i += 1;
|
||||
},
|
||||
18 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 7);
|
||||
i += 1;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn storedHeader(self: *BlockWriter, length: usize, eof: bool) Writer.Error!void {
|
||||
assert(length <= 65535);
|
||||
const flag: u32 = if (eof) 1 else 0;
|
||||
try self.bit_writer.writeBits(flag, 3);
|
||||
try self.flush();
|
||||
const l: u16 = @intCast(length);
|
||||
try self.bit_writer.writeBits(l, 16);
|
||||
try self.bit_writer.writeBits(~l, 16);
|
||||
}
|
||||
|
||||
fn fixedHeader(self: *BlockWriter, eof: bool) Writer.Error!void {
|
||||
// Indicate that we are a fixed Huffman block
|
||||
var value: u32 = 2;
|
||||
if (eof) {
|
||||
value = 3;
|
||||
}
|
||||
try self.bit_writer.writeBits(value, 3);
|
||||
}
|
||||
|
||||
/// Write a block of tokens with the smallest encoding. Will choose block type.
|
||||
/// The original input can be supplied, and if the huffman encoded data
|
||||
/// is larger than the original bytes, the data will be written as a
|
||||
/// stored block.
|
||||
/// If the input is null, the tokens will always be Huffman encoded.
|
||||
pub fn write(self: *BlockWriter, tokens: []const Token, eof: bool, input: ?[]const u8) Writer.Error!void {
|
||||
const lit_and_dist = self.indexTokens(tokens);
|
||||
const num_literals = lit_and_dist.num_literals;
|
||||
const num_distances = lit_and_dist.num_distances;
|
||||
|
||||
var extra_bits: u32 = 0;
|
||||
const ret = storedSizeFits(input);
|
||||
const stored_size = ret.size;
|
||||
const storable = ret.storable;
|
||||
|
||||
if (storable) {
|
||||
// We only bother calculating the costs of the extra bits required by
|
||||
// the length of distance fields (which will be the same for both fixed
|
||||
// and dynamic encoding), if we need to compare those two encodings
|
||||
// against stored encoding.
|
||||
var length_code: u16 = Token.length_codes_start + 8;
|
||||
while (length_code < num_literals) : (length_code += 1) {
|
||||
// First eight length codes have extra size = 0.
|
||||
extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) *
|
||||
@as(u32, @intCast(Token.lengthExtraBits(length_code)));
|
||||
}
|
||||
var distance_code: u16 = 4;
|
||||
while (distance_code < num_distances) : (distance_code += 1) {
|
||||
// First four distance codes have extra size = 0.
|
||||
extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) *
|
||||
@as(u32, @intCast(Token.distanceExtraBits(distance_code)));
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out smallest code.
|
||||
// Fixed Huffman baseline.
|
||||
var literal_encoding = &self.fixed_literal_encoding;
|
||||
var distance_encoding = &self.fixed_distance_encoding;
|
||||
var size = self.fixedSize(extra_bits);
|
||||
|
||||
// Dynamic Huffman?
|
||||
var num_codegens: u32 = 0;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
extra_bits,
|
||||
);
|
||||
const dyn_size = dynamic_size.size;
|
||||
num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
if (dyn_size < size) {
|
||||
size = dyn_size;
|
||||
literal_encoding = &self.literal_encoding;
|
||||
distance_encoding = &self.distance_encoding;
|
||||
}
|
||||
|
||||
// Stored bytes?
|
||||
if (storable and stored_size < size) {
|
||||
try self.storedBlock(input.?, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) {
|
||||
try self.fixedHeader(eof);
|
||||
} else {
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
}
|
||||
|
||||
// Write the tokens.
|
||||
try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes);
|
||||
}
|
||||
|
||||
pub fn storedBlock(self: *BlockWriter, input: []const u8, eof: bool) Writer.Error!void {
|
||||
try self.storedHeader(input.len, eof);
|
||||
try self.bit_writer.writeBytes(input);
|
||||
}
|
||||
|
||||
/// writeBlockDynamic encodes a block using a dynamic Huffman table.
|
||||
/// This should be used if the symbols used have a disproportionate
|
||||
/// histogram distribution.
|
||||
/// If input is supplied and the compression savings are below 1/16th of the
|
||||
/// input size the block is stored.
|
||||
fn dynamicBlock(
|
||||
self: *BlockWriter,
|
||||
tokens: []const Token,
|
||||
eof: bool,
|
||||
input: ?[]const u8,
|
||||
) Writer.Error!void {
|
||||
const total_tokens = self.indexTokens(tokens);
|
||||
const num_literals = total_tokens.num_literals;
|
||||
const num_distances = total_tokens.num_distances;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0);
|
||||
const size = dynamic_size.size;
|
||||
const num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
|
||||
const stored_size = storedSizeFits(input);
|
||||
const ssize = stored_size.size;
|
||||
const storable = stored_size.storable;
|
||||
if (storable and ssize < (size + (size >> 4))) {
|
||||
try self.storedBlock(input.?, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Write Huffman table.
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
|
||||
// Write the tokens.
|
||||
try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes);
|
||||
}
|
||||
|
||||
const TotalIndexedTokens = struct {
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
};
|
||||
|
||||
/// Indexes a slice of tokens followed by an end_block_marker, and updates
|
||||
/// literal_freq and distance_freq, and generates literal_encoding
|
||||
/// and distance_encoding.
|
||||
/// The number of literal and distance tokens is returned.
|
||||
fn indexTokens(self: *BlockWriter, tokens: []const Token) TotalIndexedTokens {
|
||||
var num_literals: u32 = 0;
|
||||
var num_distances: u32 = 0;
|
||||
|
||||
for (self.literal_freq, 0..) |_, i| {
|
||||
self.literal_freq[i] = 0;
|
||||
}
|
||||
for (self.distance_freq, 0..) |_, i| {
|
||||
self.distance_freq[i] = 0;
|
||||
}
|
||||
|
||||
for (tokens) |t| {
|
||||
if (t.kind == Token.Kind.literal) {
|
||||
self.literal_freq[t.literal()] += 1;
|
||||
continue;
|
||||
}
|
||||
self.literal_freq[t.lengthCode()] += 1;
|
||||
self.distance_freq[t.distanceCode()] += 1;
|
||||
}
|
||||
// add end_block_marker token at the end
|
||||
self.literal_freq[HuffmanEncoder.end_block_marker] += 1;
|
||||
|
||||
// get the number of literals
|
||||
num_literals = @as(u32, @intCast(self.literal_freq.len));
|
||||
while (self.literal_freq[num_literals - 1] == 0) {
|
||||
num_literals -= 1;
|
||||
}
|
||||
// get the number of distances
|
||||
num_distances = @as(u32, @intCast(self.distance_freq.len));
|
||||
while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) {
|
||||
num_distances -= 1;
|
||||
}
|
||||
if (num_distances == 0) {
|
||||
// We haven't found a single match. If we want to go with the dynamic encoding,
|
||||
// we should count at least one distance to be sure that the distance huffman tree could be encoded.
|
||||
self.distance_freq[0] = 1;
|
||||
num_distances = 1;
|
||||
}
|
||||
self.literal_encoding.generate(&self.literal_freq, 15);
|
||||
self.distance_encoding.generate(&self.distance_freq, 15);
|
||||
return TotalIndexedTokens{
|
||||
.num_literals = num_literals,
|
||||
.num_distances = num_distances,
|
||||
};
|
||||
}
|
||||
|
||||
/// Writes a slice of tokens to the output followed by and end_block_marker.
|
||||
/// codes for literal and distance encoding must be supplied.
|
||||
fn writeTokens(
|
||||
self: *BlockWriter,
|
||||
tokens: []const Token,
|
||||
le_codes: []Compress.HuffCode,
|
||||
oe_codes: []Compress.HuffCode,
|
||||
) Writer.Error!void {
|
||||
for (tokens) |t| {
|
||||
if (t.kind == Token.Kind.literal) {
|
||||
try self.writeCode(le_codes[t.literal()]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Write the length
|
||||
const le = t.lengthEncoding();
|
||||
try self.writeCode(le_codes[le.code]);
|
||||
if (le.extra_bits > 0) {
|
||||
try self.bit_writer.writeBits(le.extra_length, le.extra_bits);
|
||||
}
|
||||
|
||||
// Write the distance
|
||||
const oe = t.distanceEncoding();
|
||||
try self.writeCode(oe_codes[oe.code]);
|
||||
if (oe.extra_bits > 0) {
|
||||
try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits);
|
||||
}
|
||||
}
|
||||
// add end_block_marker at the end
|
||||
try self.writeCode(le_codes[HuffmanEncoder.end_block_marker]);
|
||||
}
|
||||
|
||||
/// Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes
|
||||
/// if the results only gains very little from compression.
|
||||
pub fn huffmanBlock(self: *BlockWriter, input: []const u8, eof: bool) Writer.Error!void {
|
||||
// Add everything as literals
|
||||
histogram(input, &self.literal_freq);
|
||||
|
||||
self.literal_freq[HuffmanEncoder.end_block_marker] = 1;
|
||||
|
||||
const num_literals = HuffmanEncoder.end_block_marker + 1;
|
||||
self.distance_freq[0] = 1;
|
||||
const num_distances = 1;
|
||||
|
||||
self.literal_encoding.generate(&self.literal_freq, 15);
|
||||
|
||||
// Figure out smallest code.
|
||||
// Always use dynamic Huffman or Store
|
||||
var num_codegens: u32 = 0;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.huff_distance,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0);
|
||||
const size = dynamic_size.size;
|
||||
num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
const stored_size_ret = storedSizeFits(input);
|
||||
const ssize = stored_size_ret.size;
|
||||
const storable = stored_size_ret.storable;
|
||||
|
||||
if (storable and ssize < (size + (size >> 4))) {
|
||||
try self.storedBlock(input, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
const encoding = self.literal_encoding.codes[0..257];
|
||||
|
||||
for (input) |t| {
|
||||
const c = encoding[t];
|
||||
try self.bit_writer.writeBits(c.code, c.len);
|
||||
}
|
||||
try self.writeCode(encoding[HuffmanEncoder.end_block_marker]);
|
||||
}
|
||||
|
||||
fn histogram(b: []const u8, h: *[286]u16) void {
|
||||
// Clear histogram
|
||||
for (h, 0..) |_, i| {
|
||||
h[i] = 0;
|
||||
}
|
||||
|
||||
var lh = h.*[0..256];
|
||||
for (b) |t| {
|
||||
lh[t] += 1;
|
||||
}
|
||||
}
|
||||
@@ -1,240 +0,0 @@
|
||||
//! 64K buffer of uncompressed data created in inflate (decompression). Has enough
|
||||
//! history to support writing match<length, distance>; copying length of bytes
|
||||
//! from the position distance backward from current.
|
||||
//!
|
||||
//! Reads can return less than available bytes if they are spread across
|
||||
//! different circles. So reads should repeat until get required number of bytes
|
||||
//! or until returned slice is zero length.
|
||||
//!
|
||||
//! Note on deflate limits:
|
||||
//! * non-compressible block is limited to 65,535 bytes.
|
||||
//! * backward pointer is limited in distance to 32K bytes and in length to 258 bytes.
|
||||
//!
|
||||
//! Whole non-compressed block can be written without overlap. We always have
|
||||
//! history of up to 64K, more then 32K needed.
|
||||
//!
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
|
||||
const consts = @import("consts.zig").match;
|
||||
|
||||
const mask = 0xffff; // 64K - 1
|
||||
const buffer_len = mask + 1; // 64K buffer
|
||||
|
||||
const Self = @This();
|
||||
|
||||
buffer: [buffer_len]u8 = undefined,
|
||||
wp: usize = 0, // write position
|
||||
rp: usize = 0, // read position
|
||||
|
||||
fn writeAll(self: *Self, buf: []const u8) void {
|
||||
for (buf) |c| self.write(c);
|
||||
}
|
||||
|
||||
/// Write literal.
|
||||
pub fn write(self: *Self, b: u8) void {
|
||||
assert(self.wp - self.rp < mask);
|
||||
self.buffer[self.wp & mask] = b;
|
||||
self.wp += 1;
|
||||
}
|
||||
|
||||
/// Write match (back-reference to the same data slice) starting at `distance`
|
||||
/// back from current write position, and `length` of bytes.
|
||||
pub fn writeMatch(self: *Self, length: u16, distance: u16) !void {
|
||||
if (self.wp < distance or
|
||||
length < consts.base_length or length > consts.max_length or
|
||||
distance < consts.min_distance or distance > consts.max_distance)
|
||||
{
|
||||
return error.InvalidMatch;
|
||||
}
|
||||
assert(self.wp - self.rp < mask);
|
||||
|
||||
var from: usize = self.wp - distance & mask;
|
||||
const from_end: usize = from + length;
|
||||
var to: usize = self.wp & mask;
|
||||
const to_end: usize = to + length;
|
||||
|
||||
self.wp += length;
|
||||
|
||||
// Fast path using memcpy
|
||||
if (from_end < buffer_len and to_end < buffer_len) // start and end at the same circle
|
||||
{
|
||||
var cur_len = distance;
|
||||
var remaining_len = length;
|
||||
while (cur_len < remaining_len) {
|
||||
@memcpy(self.buffer[to..][0..cur_len], self.buffer[from..][0..cur_len]);
|
||||
to += cur_len;
|
||||
remaining_len -= cur_len;
|
||||
cur_len = cur_len * 2;
|
||||
}
|
||||
@memcpy(self.buffer[to..][0..remaining_len], self.buffer[from..][0..remaining_len]);
|
||||
return;
|
||||
}
|
||||
|
||||
// Slow byte by byte
|
||||
while (to < to_end) {
|
||||
self.buffer[to & mask] = self.buffer[from & mask];
|
||||
to += 1;
|
||||
from += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns writable part of the internal buffer of size `n` at most. Advances
|
||||
/// write pointer, assumes that returned buffer will be filled with data.
|
||||
pub fn getWritable(self: *Self, n: usize) []u8 {
|
||||
const wp = self.wp & mask;
|
||||
const len = @min(n, buffer_len - wp);
|
||||
self.wp += len;
|
||||
return self.buffer[wp .. wp + len];
|
||||
}
|
||||
|
||||
/// Read available data. Can return part of the available data if it is
|
||||
/// spread across two circles. So read until this returns zero length.
|
||||
pub fn read(self: *Self) []const u8 {
|
||||
return self.readAtMost(buffer_len);
|
||||
}
|
||||
|
||||
/// Read part of available data. Can return less than max even if there are
|
||||
/// more than max decoded data.
|
||||
pub fn readAtMost(self: *Self, limit: usize) []const u8 {
|
||||
const rb = self.readBlock(if (limit == 0) buffer_len else limit);
|
||||
defer self.rp += rb.len;
|
||||
return self.buffer[rb.head..rb.tail];
|
||||
}
|
||||
|
||||
const ReadBlock = struct {
|
||||
head: usize,
|
||||
tail: usize,
|
||||
len: usize,
|
||||
};
|
||||
|
||||
/// Returns position of continuous read block data.
|
||||
fn readBlock(self: *Self, max: usize) ReadBlock {
|
||||
const r = self.rp & mask;
|
||||
const w = self.wp & mask;
|
||||
const n = @min(
|
||||
max,
|
||||
if (w >= r) w - r else buffer_len - r,
|
||||
);
|
||||
return .{
|
||||
.head = r,
|
||||
.tail = r + n,
|
||||
.len = n,
|
||||
};
|
||||
}
|
||||
|
||||
/// Number of free bytes for write.
|
||||
pub fn free(self: *Self) usize {
|
||||
return buffer_len - (self.wp - self.rp);
|
||||
}
|
||||
|
||||
/// Full if largest match can't fit. 258 is largest match length. That much
|
||||
/// bytes can be produced in single decode step.
|
||||
pub fn full(self: *Self) bool {
|
||||
return self.free() < 258 + 1;
|
||||
}
|
||||
|
||||
// example from: https://youtu.be/SJPvNi4HrWQ?t=3558
|
||||
test writeMatch {
|
||||
var cb: Self = .{};
|
||||
|
||||
cb.writeAll("a salad; ");
|
||||
try cb.writeMatch(5, 9);
|
||||
try cb.writeMatch(3, 3);
|
||||
|
||||
try testing.expectEqualStrings("a salad; a salsal", cb.read());
|
||||
}
|
||||
|
||||
test "writeMatch overlap" {
|
||||
var cb: Self = .{};
|
||||
|
||||
cb.writeAll("a b c ");
|
||||
try cb.writeMatch(8, 4);
|
||||
cb.write('d');
|
||||
|
||||
try testing.expectEqualStrings("a b c b c b c d", cb.read());
|
||||
}
|
||||
|
||||
test readAtMost {
|
||||
var cb: Self = .{};
|
||||
|
||||
cb.writeAll("0123456789");
|
||||
try cb.writeMatch(50, 10);
|
||||
|
||||
try testing.expectEqualStrings("0123456789" ** 6, cb.buffer[cb.rp..cb.wp]);
|
||||
for (0..6) |i| {
|
||||
try testing.expectEqual(i * 10, cb.rp);
|
||||
try testing.expectEqualStrings("0123456789", cb.readAtMost(10));
|
||||
}
|
||||
try testing.expectEqualStrings("", cb.readAtMost(10));
|
||||
try testing.expectEqualStrings("", cb.read());
|
||||
}
|
||||
|
||||
test Self {
|
||||
var cb: Self = .{};
|
||||
|
||||
const data = "0123456789abcdef" ** (1024 / 16);
|
||||
cb.writeAll(data);
|
||||
try testing.expectEqual(@as(usize, 0), cb.rp);
|
||||
try testing.expectEqual(@as(usize, 1024), cb.wp);
|
||||
try testing.expectEqual(@as(usize, 1024 * 63), cb.free());
|
||||
|
||||
for (0..62 * 4) |_|
|
||||
try cb.writeMatch(256, 1024); // write 62K
|
||||
|
||||
try testing.expectEqual(@as(usize, 0), cb.rp);
|
||||
try testing.expectEqual(@as(usize, 63 * 1024), cb.wp);
|
||||
try testing.expectEqual(@as(usize, 1024), cb.free());
|
||||
|
||||
cb.writeAll(data[0..200]);
|
||||
_ = cb.readAtMost(1024); // make some space
|
||||
cb.writeAll(data); // overflows write position
|
||||
try testing.expectEqual(@as(usize, 200 + 65536), cb.wp);
|
||||
try testing.expectEqual(@as(usize, 1024), cb.rp);
|
||||
try testing.expectEqual(@as(usize, 1024 - 200), cb.free());
|
||||
|
||||
const rb = cb.readBlock(Self.buffer_len);
|
||||
try testing.expectEqual(@as(usize, 65536 - 1024), rb.len);
|
||||
try testing.expectEqual(@as(usize, 1024), rb.head);
|
||||
try testing.expectEqual(@as(usize, 65536), rb.tail);
|
||||
|
||||
try testing.expectEqual(@as(usize, 65536 - 1024), cb.read().len); // read to the end of the buffer
|
||||
try testing.expectEqual(@as(usize, 200 + 65536), cb.wp);
|
||||
try testing.expectEqual(@as(usize, 65536), cb.rp);
|
||||
try testing.expectEqual(@as(usize, 65536 - 200), cb.free());
|
||||
|
||||
try testing.expectEqual(@as(usize, 200), cb.read().len); // read the rest
|
||||
}
|
||||
|
||||
test "write overlap" {
|
||||
var cb: Self = .{};
|
||||
cb.wp = cb.buffer.len - 15;
|
||||
cb.rp = cb.wp;
|
||||
|
||||
cb.writeAll("0123456789");
|
||||
cb.writeAll("abcdefghij");
|
||||
|
||||
try testing.expectEqual(cb.buffer.len + 5, cb.wp);
|
||||
try testing.expectEqual(cb.buffer.len - 15, cb.rp);
|
||||
|
||||
try testing.expectEqualStrings("0123456789abcde", cb.read());
|
||||
try testing.expectEqualStrings("fghij", cb.read());
|
||||
|
||||
try testing.expect(cb.wp == cb.rp);
|
||||
}
|
||||
|
||||
test "writeMatch/read overlap" {
|
||||
var cb: Self = .{};
|
||||
cb.wp = cb.buffer.len - 15;
|
||||
cb.rp = cb.wp;
|
||||
|
||||
cb.writeAll("0123456789");
|
||||
try cb.writeMatch(15, 5);
|
||||
|
||||
try testing.expectEqualStrings("012345678956789", cb.read());
|
||||
try testing.expectEqualStrings("5678956789", cb.read());
|
||||
|
||||
try cb.writeMatch(20, 25);
|
||||
try testing.expectEqualStrings("01234567895678956789", cb.read());
|
||||
}
|
||||
332
lib/std/compress/flate/Compress.zig
Normal file
332
lib/std/compress/flate/Compress.zig
Normal file
@@ -0,0 +1,332 @@
|
||||
//! Default compression algorithm. Has two steps: tokenization and token
|
||||
//! encoding.
|
||||
//!
|
||||
//! Tokenization takes uncompressed input stream and produces list of tokens.
|
||||
//! Each token can be literal (byte of data) or match (backrefernce to previous
|
||||
//! data with length and distance). Tokenization accumulators 32K tokens, when
|
||||
//! full or `flush` is called tokens are passed to the `block_writer`. Level
|
||||
//! defines how hard (how slow) it tries to find match.
|
||||
//!
|
||||
//! Block writer will decide which type of deflate block to write (stored, fixed,
|
||||
//! dynamic) and encode tokens to the output byte stream. Client has to call
|
||||
//! `finish` to write block with the final bit set.
|
||||
//!
|
||||
//! Container defines type of header and footer which can be gzip, zlib or raw.
|
||||
//! They all share same deflate body. Raw has no header or footer just deflate
|
||||
//! body.
|
||||
//!
|
||||
//! Compression algorithm explained in rfc-1951 (slightly edited for this case):
|
||||
//!
|
||||
//! The compressor uses a chained hash table `lookup` to find duplicated
|
||||
//! strings, using a hash function that operates on 4-byte sequences. At any
|
||||
//! given point during compression, let XYZW be the next 4 input bytes
|
||||
//! (lookahead) to be examined (not necessarily all different, of course).
|
||||
//! First, the compressor examines the hash chain for XYZW. If the chain is
|
||||
//! empty, the compressor simply writes out X as a literal byte and advances
|
||||
//! one byte in the input. If the hash chain is not empty, indicating that the
|
||||
//! sequence XYZW (or, if we are unlucky, some other 4 bytes with the same
|
||||
//! hash function value) has occurred recently, the compressor compares all
|
||||
//! strings on the XYZW hash chain with the actual input data sequence
|
||||
//! starting at the current point, and selects the longest match.
|
||||
//!
|
||||
//! To improve overall compression, the compressor defers the selection of
|
||||
//! matches ("lazy matching"): after a match of length N has been found, the
|
||||
//! compressor searches for a longer match starting at the next input byte. If
|
||||
//! it finds a longer match, it truncates the previous match to a length of
|
||||
//! one (thus producing a single literal byte) and then emits the longer
|
||||
//! match. Otherwise, it emits the original match, and, as described above,
|
||||
//! advances N bytes before continuing.
|
||||
//!
|
||||
//!
|
||||
//! Allocates statically ~400K (192K lookup, 128K tokens, 64K window).
|
||||
|
||||
const builtin = @import("builtin");
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
const expect = testing.expect;
|
||||
const mem = std.mem;
|
||||
const math = std.math;
|
||||
const Writer = std.Io.Writer;
|
||||
|
||||
const Compress = @This();
|
||||
const Token = @import("Token.zig");
|
||||
const BlockWriter = @import("BlockWriter.zig");
|
||||
const flate = @import("../flate.zig");
|
||||
const Container = flate.Container;
|
||||
const Lookup = @import("Lookup.zig");
|
||||
const HuffmanEncoder = flate.HuffmanEncoder;
|
||||
const LiteralNode = HuffmanEncoder.LiteralNode;
|
||||
|
||||
lookup: Lookup = .{},
|
||||
tokens: Tokens = .{},
|
||||
block_writer: BlockWriter,
|
||||
level: LevelArgs,
|
||||
hasher: Container.Hasher,
|
||||
writer: Writer,
|
||||
state: State,
|
||||
|
||||
// Match and literal at the previous position.
|
||||
// Used for lazy match finding in processWindow.
|
||||
prev_match: ?Token = null,
|
||||
prev_literal: ?u8 = null,
|
||||
|
||||
pub const State = enum { header, middle, ended };
|
||||
|
||||
/// Trades between speed and compression size.
|
||||
/// Starts with level 4: in [zlib](https://github.com/madler/zlib/blob/abd3d1a28930f89375d4b41408b39f6c1be157b2/deflate.c#L115C1-L117C43)
|
||||
/// levels 1-3 are using different algorithm to perform faster but with less
|
||||
/// compression. That is not implemented here.
|
||||
pub const Level = enum(u4) {
|
||||
level_4 = 4,
|
||||
level_5 = 5,
|
||||
level_6 = 6,
|
||||
level_7 = 7,
|
||||
level_8 = 8,
|
||||
level_9 = 9,
|
||||
|
||||
fast = 0xb,
|
||||
default = 0xc,
|
||||
best = 0xd,
|
||||
};
|
||||
|
||||
/// Number of tokens to accumulate in deflate before starting block encoding.
|
||||
///
|
||||
/// In zlib this depends on memlevel: 6 + memlevel, where default memlevel is
|
||||
/// 8 and max 9 that gives 14 or 15 bits.
|
||||
pub const n_tokens = 1 << 15;
|
||||
|
||||
/// Algorithm knobs for each level.
|
||||
const LevelArgs = struct {
|
||||
good: u16, // Do less lookups if we already have match of this length.
|
||||
nice: u16, // Stop looking for better match if we found match with at least this length.
|
||||
lazy: u16, // Don't do lazy match find if got match with at least this length.
|
||||
chain: u16, // How many lookups for previous match to perform.
|
||||
|
||||
pub fn get(level: Level) LevelArgs {
|
||||
return switch (level) {
|
||||
.fast, .level_4 => .{ .good = 4, .lazy = 4, .nice = 16, .chain = 16 },
|
||||
.level_5 => .{ .good = 8, .lazy = 16, .nice = 32, .chain = 32 },
|
||||
.default, .level_6 => .{ .good = 8, .lazy = 16, .nice = 128, .chain = 128 },
|
||||
.level_7 => .{ .good = 8, .lazy = 32, .nice = 128, .chain = 256 },
|
||||
.level_8 => .{ .good = 32, .lazy = 128, .nice = 258, .chain = 1024 },
|
||||
.best, .level_9 => .{ .good = 32, .lazy = 258, .nice = 258, .chain = 4096 },
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pub const Options = struct {
|
||||
level: Level = .default,
|
||||
container: Container = .raw,
|
||||
};
|
||||
|
||||
pub fn init(output: *Writer, buffer: []u8, options: Options) Compress {
|
||||
return .{
|
||||
.block_writer = .init(output),
|
||||
.level = .get(options.level),
|
||||
.hasher = .init(options.container),
|
||||
.state = .header,
|
||||
.writer = .{
|
||||
.buffer = buffer,
|
||||
.vtable = &.{ .drain = drain },
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Tokens store
|
||||
const Tokens = struct {
|
||||
list: [n_tokens]Token = undefined,
|
||||
pos: usize = 0,
|
||||
|
||||
fn add(self: *Tokens, t: Token) void {
|
||||
self.list[self.pos] = t;
|
||||
self.pos += 1;
|
||||
}
|
||||
|
||||
fn full(self: *Tokens) bool {
|
||||
return self.pos == self.list.len;
|
||||
}
|
||||
|
||||
fn reset(self: *Tokens) void {
|
||||
self.pos = 0;
|
||||
}
|
||||
|
||||
fn tokens(self: *Tokens) []const Token {
|
||||
return self.list[0..self.pos];
|
||||
}
|
||||
};
|
||||
|
||||
fn drain(me: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
|
||||
_ = data;
|
||||
_ = splat;
|
||||
const c: *Compress = @fieldParentPtr("writer", me);
|
||||
const out = c.block_writer.output;
|
||||
switch (c.state) {
|
||||
.header => {
|
||||
c.state = .middle;
|
||||
const header = c.hasher.container().header();
|
||||
try out.writeAll(header);
|
||||
return header.len;
|
||||
},
|
||||
.middle => {},
|
||||
.ended => unreachable,
|
||||
}
|
||||
|
||||
const buffered = me.buffered();
|
||||
const min_lookahead = Token.min_length + Token.max_length;
|
||||
const history_plus_lookahead_len = flate.history_len + min_lookahead;
|
||||
if (buffered.len < history_plus_lookahead_len) return 0;
|
||||
const lookahead = buffered[flate.history_len..];
|
||||
|
||||
// TODO tokenize
|
||||
_ = lookahead;
|
||||
//c.hasher.update(lookahead[0..n]);
|
||||
@panic("TODO");
|
||||
}
|
||||
|
||||
pub fn end(c: *Compress) !void {
|
||||
try endUnflushed(c);
|
||||
const out = c.block_writer.output;
|
||||
try out.flush();
|
||||
}
|
||||
|
||||
pub fn endUnflushed(c: *Compress) !void {
|
||||
while (c.writer.end != 0) _ = try drain(&c.writer, &.{""}, 1);
|
||||
c.state = .ended;
|
||||
|
||||
const out = c.block_writer.output;
|
||||
|
||||
// TODO flush tokens
|
||||
|
||||
switch (c.hasher) {
|
||||
.gzip => |*gzip| {
|
||||
// GZIP 8 bytes footer
|
||||
// - 4 bytes, CRC32 (CRC-32)
|
||||
// - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
|
||||
const footer = try out.writableArray(8);
|
||||
std.mem.writeInt(u32, footer[0..4], gzip.crc.final(), .little);
|
||||
std.mem.writeInt(u32, footer[4..8], @truncate(gzip.count), .little);
|
||||
},
|
||||
.zlib => |*zlib| {
|
||||
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
|
||||
// 4 bytes of ADLER32 (Adler-32 checksum)
|
||||
// Checksum value of the uncompressed data (excluding any
|
||||
// dictionary data) computed according to Adler-32
|
||||
// algorithm.
|
||||
std.mem.writeInt(u32, try out.writableArray(4), zlib.adler, .big);
|
||||
},
|
||||
.raw => {},
|
||||
}
|
||||
}
|
||||
|
||||
pub const Simple = struct {
|
||||
/// Note that store blocks are limited to 65535 bytes.
|
||||
buffer: []u8,
|
||||
wp: usize,
|
||||
block_writer: BlockWriter,
|
||||
hasher: Container.Hasher,
|
||||
strategy: Strategy,
|
||||
|
||||
pub const Strategy = enum { huffman, store };
|
||||
|
||||
pub fn init(output: *Writer, buffer: []u8, container: Container, strategy: Strategy) !Simple {
|
||||
const header = container.header();
|
||||
try output.writeAll(header);
|
||||
return .{
|
||||
.buffer = buffer,
|
||||
.wp = 0,
|
||||
.block_writer = .init(output),
|
||||
.hasher = .init(container),
|
||||
.strategy = strategy,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn flush(self: *Simple) !void {
|
||||
try self.flushBuffer(false);
|
||||
try self.block_writer.storedBlock("", false);
|
||||
try self.block_writer.flush();
|
||||
}
|
||||
|
||||
pub fn finish(self: *Simple) !void {
|
||||
try self.flushBuffer(true);
|
||||
try self.block_writer.flush();
|
||||
try self.hasher.container().writeFooter(&self.hasher, self.block_writer.output);
|
||||
}
|
||||
|
||||
fn flushBuffer(self: *Simple, final: bool) !void {
|
||||
const buf = self.buffer[0..self.wp];
|
||||
switch (self.strategy) {
|
||||
.huffman => try self.block_writer.huffmanBlock(buf, final),
|
||||
.store => try self.block_writer.storedBlock(buf, final),
|
||||
}
|
||||
self.wp = 0;
|
||||
}
|
||||
};
|
||||
|
||||
test "generate a Huffman code from an array of frequencies" {
|
||||
var freqs: [19]u16 = [_]u16{
|
||||
8, // 0
|
||||
1, // 1
|
||||
1, // 2
|
||||
2, // 3
|
||||
5, // 4
|
||||
10, // 5
|
||||
9, // 6
|
||||
1, // 7
|
||||
0, // 8
|
||||
0, // 9
|
||||
0, // 10
|
||||
0, // 11
|
||||
0, // 12
|
||||
0, // 13
|
||||
0, // 14
|
||||
0, // 15
|
||||
1, // 16
|
||||
3, // 17
|
||||
5, // 18
|
||||
};
|
||||
|
||||
var codes: [19]HuffmanEncoder.Code = undefined;
|
||||
var enc: HuffmanEncoder = .{
|
||||
.codes = &codes,
|
||||
.freq_cache = undefined,
|
||||
.bit_count = undefined,
|
||||
.lns = undefined,
|
||||
.lfs = undefined,
|
||||
};
|
||||
enc.generate(freqs[0..], 7);
|
||||
|
||||
try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..]));
|
||||
|
||||
try testing.expectEqual(@as(usize, 3), enc.codes[0].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[1].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[2].len);
|
||||
try testing.expectEqual(@as(usize, 5), enc.codes[3].len);
|
||||
try testing.expectEqual(@as(usize, 3), enc.codes[4].len);
|
||||
try testing.expectEqual(@as(usize, 2), enc.codes[5].len);
|
||||
try testing.expectEqual(@as(usize, 2), enc.codes[6].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[7].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[8].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[9].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[10].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[11].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[12].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[13].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[14].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[15].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[16].len);
|
||||
try testing.expectEqual(@as(usize, 5), enc.codes[17].len);
|
||||
try testing.expectEqual(@as(usize, 3), enc.codes[18].len);
|
||||
|
||||
try testing.expectEqual(@as(u16, 0x0), enc.codes[5].code);
|
||||
try testing.expectEqual(@as(u16, 0x2), enc.codes[6].code);
|
||||
try testing.expectEqual(@as(u16, 0x1), enc.codes[0].code);
|
||||
try testing.expectEqual(@as(u16, 0x5), enc.codes[4].code);
|
||||
try testing.expectEqual(@as(u16, 0x3), enc.codes[18].code);
|
||||
try testing.expectEqual(@as(u16, 0x7), enc.codes[3].code);
|
||||
try testing.expectEqual(@as(u16, 0x17), enc.codes[17].code);
|
||||
try testing.expectEqual(@as(u16, 0x0f), enc.codes[1].code);
|
||||
try testing.expectEqual(@as(u16, 0x2f), enc.codes[2].code);
|
||||
try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code);
|
||||
try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code);
|
||||
}
|
||||
1270
lib/std/compress/flate/Decompress.zig
Normal file
1270
lib/std/compress/flate/Decompress.zig
Normal file
File diff suppressed because it is too large
Load Diff
463
lib/std/compress/flate/HuffmanEncoder.zig
Normal file
463
lib/std/compress/flate/HuffmanEncoder.zig
Normal file
@@ -0,0 +1,463 @@
|
||||
const HuffmanEncoder = @This();
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
|
||||
codes: []Code,
|
||||
// Reusable buffer with the longest possible frequency table.
|
||||
freq_cache: [max_num_frequencies + 1]LiteralNode,
|
||||
bit_count: [17]u32,
|
||||
lns: []LiteralNode, // sorted by literal, stored to avoid repeated allocation in generate
|
||||
lfs: []LiteralNode, // sorted by frequency, stored to avoid repeated allocation in generate
|
||||
|
||||
pub const LiteralNode = struct {
|
||||
literal: u16,
|
||||
freq: u16,
|
||||
|
||||
pub fn max() LiteralNode {
|
||||
return .{
|
||||
.literal = std.math.maxInt(u16),
|
||||
.freq = std.math.maxInt(u16),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pub const Code = struct {
|
||||
code: u16 = 0,
|
||||
len: u16 = 0,
|
||||
};
|
||||
|
||||
/// The odd order in which the codegen code sizes are written.
|
||||
pub const codegen_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
|
||||
/// The number of codegen codes.
|
||||
pub const codegen_code_count = 19;
|
||||
|
||||
/// The largest distance code.
|
||||
pub const distance_code_count = 30;
|
||||
|
||||
/// Maximum number of literals.
|
||||
pub const max_num_lit = 286;
|
||||
|
||||
/// Max number of frequencies used for a Huffman Code
|
||||
/// Possible lengths are codegen_code_count (19), distance_code_count (30) and max_num_lit (286).
|
||||
/// The largest of these is max_num_lit.
|
||||
pub const max_num_frequencies = max_num_lit;
|
||||
|
||||
/// Biggest block size for uncompressed block.
|
||||
pub const max_store_block_size = 65535;
|
||||
/// The special code used to mark the end of a block.
|
||||
pub const end_block_marker = 256;
|
||||
|
||||
/// Update this Huffman Code object to be the minimum code for the specified frequency count.
|
||||
///
|
||||
/// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
|
||||
/// max_bits The maximum number of bits to use for any literal.
|
||||
pub fn generate(self: *HuffmanEncoder, freq: []u16, max_bits: u32) void {
|
||||
var list = self.freq_cache[0 .. freq.len + 1];
|
||||
// Number of non-zero literals
|
||||
var count: u32 = 0;
|
||||
// Set list to be the set of all non-zero literals and their frequencies
|
||||
for (freq, 0..) |f, i| {
|
||||
if (f != 0) {
|
||||
list[count] = LiteralNode{ .literal = @as(u16, @intCast(i)), .freq = f };
|
||||
count += 1;
|
||||
} else {
|
||||
list[count] = LiteralNode{ .literal = 0x00, .freq = 0 };
|
||||
self.codes[i].len = 0;
|
||||
}
|
||||
}
|
||||
list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 };
|
||||
|
||||
list = list[0..count];
|
||||
if (count <= 2) {
|
||||
// Handle the small cases here, because they are awkward for the general case code. With
|
||||
// two or fewer literals, everything has bit length 1.
|
||||
for (list, 0..) |node, i| {
|
||||
// "list" is in order of increasing literal value.
|
||||
self.codes[node.literal] = .{
|
||||
.code = @intCast(i),
|
||||
.len = 1,
|
||||
};
|
||||
}
|
||||
return;
|
||||
}
|
||||
self.lfs = list;
|
||||
std.mem.sort(LiteralNode, self.lfs, {}, byFreq);
|
||||
|
||||
// Get the number of literals for each bit count
|
||||
const bit_count = self.bitCounts(list, max_bits);
|
||||
// And do the assignment
|
||||
self.assignEncodingAndSize(bit_count, list);
|
||||
}
|
||||
|
||||
pub fn bitLength(self: *HuffmanEncoder, freq: []u16) u32 {
|
||||
var total: u32 = 0;
|
||||
for (freq, 0..) |f, i| {
|
||||
if (f != 0) {
|
||||
total += @as(u32, @intCast(f)) * @as(u32, @intCast(self.codes[i].len));
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
/// Return the number of literals assigned to each bit size in the Huffman encoding
|
||||
///
|
||||
/// This method is only called when list.len >= 3
|
||||
/// The cases of 0, 1, and 2 literals are handled by special case code.
|
||||
///
|
||||
/// list: An array of the literals with non-zero frequencies
|
||||
/// and their associated frequencies. The array is in order of increasing
|
||||
/// frequency, and has as its last element a special element with frequency
|
||||
/// `math.maxInt(i32)`
|
||||
///
|
||||
/// max_bits: The maximum number of bits that should be used to encode any literal.
|
||||
/// Must be less than 16.
|
||||
///
|
||||
/// Returns an integer array in which array[i] indicates the number of literals
|
||||
/// that should be encoded in i bits.
|
||||
fn bitCounts(self: *HuffmanEncoder, list: []LiteralNode, max_bits_to_use: usize) []u32 {
|
||||
var max_bits = max_bits_to_use;
|
||||
const n = list.len;
|
||||
const max_bits_limit = 16;
|
||||
|
||||
assert(max_bits < max_bits_limit);
|
||||
|
||||
// The tree can't have greater depth than n - 1, no matter what. This
|
||||
// saves a little bit of work in some small cases
|
||||
max_bits = @min(max_bits, n - 1);
|
||||
|
||||
// Create information about each of the levels.
|
||||
// A bogus "Level 0" whose sole purpose is so that
|
||||
// level1.prev.needed == 0. This makes level1.next_pair_freq
|
||||
// be a legitimate value that never gets chosen.
|
||||
var levels: [max_bits_limit]LevelInfo = std.mem.zeroes([max_bits_limit]LevelInfo);
|
||||
// leaf_counts[i] counts the number of literals at the left
|
||||
// of ancestors of the rightmost node at level i.
|
||||
// leaf_counts[i][j] is the number of literals at the left
|
||||
// of the level j ancestor.
|
||||
var leaf_counts: [max_bits_limit][max_bits_limit]u32 = @splat(@splat(0));
|
||||
|
||||
{
|
||||
var level = @as(u32, 1);
|
||||
while (level <= max_bits) : (level += 1) {
|
||||
// For every level, the first two items are the first two characters.
|
||||
// We initialize the levels as if we had already figured this out.
|
||||
levels[level] = LevelInfo{
|
||||
.level = level,
|
||||
.last_freq = list[1].freq,
|
||||
.next_char_freq = list[2].freq,
|
||||
.next_pair_freq = list[0].freq + list[1].freq,
|
||||
.needed = 0,
|
||||
};
|
||||
leaf_counts[level][level] = 2;
|
||||
if (level == 1) {
|
||||
levels[level].next_pair_freq = std.math.maxInt(i32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We need a total of 2*n - 2 items at top level and have already generated 2.
|
||||
levels[max_bits].needed = 2 * @as(u32, @intCast(n)) - 4;
|
||||
|
||||
{
|
||||
var level = max_bits;
|
||||
while (true) {
|
||||
var l = &levels[level];
|
||||
if (l.next_pair_freq == std.math.maxInt(i32) and l.next_char_freq == std.math.maxInt(i32)) {
|
||||
// We've run out of both leaves and pairs.
|
||||
// End all calculations for this level.
|
||||
// To make sure we never come back to this level or any lower level,
|
||||
// set next_pair_freq impossibly large.
|
||||
l.needed = 0;
|
||||
levels[level + 1].next_pair_freq = std.math.maxInt(i32);
|
||||
level += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const prev_freq = l.last_freq;
|
||||
if (l.next_char_freq < l.next_pair_freq) {
|
||||
// The next item on this row is a leaf node.
|
||||
const next = leaf_counts[level][level] + 1;
|
||||
l.last_freq = l.next_char_freq;
|
||||
// Lower leaf_counts are the same of the previous node.
|
||||
leaf_counts[level][level] = next;
|
||||
if (next >= list.len) {
|
||||
l.next_char_freq = LiteralNode.max().freq;
|
||||
} else {
|
||||
l.next_char_freq = list[next].freq;
|
||||
}
|
||||
} else {
|
||||
// The next item on this row is a pair from the previous row.
|
||||
// next_pair_freq isn't valid until we generate two
|
||||
// more values in the level below
|
||||
l.last_freq = l.next_pair_freq;
|
||||
// Take leaf counts from the lower level, except counts[level] remains the same.
|
||||
@memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
|
||||
levels[l.level - 1].needed = 2;
|
||||
}
|
||||
|
||||
l.needed -= 1;
|
||||
if (l.needed == 0) {
|
||||
// We've done everything we need to do for this level.
|
||||
// Continue calculating one level up. Fill in next_pair_freq
|
||||
// of that level with the sum of the two nodes we've just calculated on
|
||||
// this level.
|
||||
if (l.level == max_bits) {
|
||||
// All done!
|
||||
break;
|
||||
}
|
||||
levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq;
|
||||
level += 1;
|
||||
} else {
|
||||
// If we stole from below, move down temporarily to replenish it.
|
||||
while (levels[level - 1].needed > 0) {
|
||||
level -= 1;
|
||||
if (level == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Somethings is wrong if at the end, the top level is null or hasn't used
|
||||
// all of the leaves.
|
||||
assert(leaf_counts[max_bits][max_bits] == n);
|
||||
|
||||
var bit_count = self.bit_count[0 .. max_bits + 1];
|
||||
var bits: u32 = 1;
|
||||
const counts = &leaf_counts[max_bits];
|
||||
{
|
||||
var level = max_bits;
|
||||
while (level > 0) : (level -= 1) {
|
||||
// counts[level] gives the number of literals requiring at least "bits"
|
||||
// bits to encode.
|
||||
bit_count[bits] = counts[level] - counts[level - 1];
|
||||
bits += 1;
|
||||
if (level == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bit_count;
|
||||
}
|
||||
|
||||
/// Look at the leaves and assign them a bit count and an encoding as specified
|
||||
/// in RFC 1951 3.2.2
|
||||
fn assignEncodingAndSize(self: *HuffmanEncoder, bit_count: []u32, list_arg: []LiteralNode) void {
|
||||
var code = @as(u16, 0);
|
||||
var list = list_arg;
|
||||
|
||||
for (bit_count, 0..) |bits, n| {
|
||||
code <<= 1;
|
||||
if (n == 0 or bits == 0) {
|
||||
continue;
|
||||
}
|
||||
// The literals list[list.len-bits] .. list[list.len-bits]
|
||||
// are encoded using "bits" bits, and get the values
|
||||
// code, code + 1, .... The code values are
|
||||
// assigned in literal order (not frequency order).
|
||||
const chunk = list[list.len - @as(u32, @intCast(bits)) ..];
|
||||
|
||||
self.lns = chunk;
|
||||
std.mem.sort(LiteralNode, self.lns, {}, byLiteral);
|
||||
|
||||
for (chunk) |node| {
|
||||
self.codes[node.literal] = .{
|
||||
.code = bitReverse(u16, code, @as(u5, @intCast(n))),
|
||||
.len = @as(u16, @intCast(n)),
|
||||
};
|
||||
code += 1;
|
||||
}
|
||||
list = list[0 .. list.len - @as(u32, @intCast(bits))];
|
||||
}
|
||||
}
|
||||
|
||||
fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
|
||||
_ = context;
|
||||
if (a.freq == b.freq) {
|
||||
return a.literal < b.literal;
|
||||
}
|
||||
return a.freq < b.freq;
|
||||
}
|
||||
|
||||
/// Describes the state of the constructed tree for a given depth.
|
||||
const LevelInfo = struct {
|
||||
/// Our level. for better printing
|
||||
level: u32,
|
||||
/// The frequency of the last node at this level
|
||||
last_freq: u32,
|
||||
/// The frequency of the next character to add to this level
|
||||
next_char_freq: u32,
|
||||
/// The frequency of the next pair (from level below) to add to this level.
|
||||
/// Only valid if the "needed" value of the next lower level is 0.
|
||||
next_pair_freq: u32,
|
||||
/// The number of chains remaining to generate for this level before moving
|
||||
/// up to the next level
|
||||
needed: u32,
|
||||
};
|
||||
|
||||
fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool {
|
||||
_ = context;
|
||||
return a.literal < b.literal;
|
||||
}
|
||||
|
||||
/// Reverse bit-by-bit a N-bit code.
|
||||
fn bitReverse(comptime T: type, value: T, n: usize) T {
|
||||
const r = @bitReverse(value);
|
||||
return r >> @as(std.math.Log2Int(T), @intCast(@typeInfo(T).int.bits - n));
|
||||
}
|
||||
|
||||
test bitReverse {
|
||||
const ReverseBitsTest = struct {
|
||||
in: u16,
|
||||
bit_count: u5,
|
||||
out: u16,
|
||||
};
|
||||
|
||||
const reverse_bits_tests = [_]ReverseBitsTest{
|
||||
.{ .in = 1, .bit_count = 1, .out = 1 },
|
||||
.{ .in = 1, .bit_count = 2, .out = 2 },
|
||||
.{ .in = 1, .bit_count = 3, .out = 4 },
|
||||
.{ .in = 1, .bit_count = 4, .out = 8 },
|
||||
.{ .in = 1, .bit_count = 5, .out = 16 },
|
||||
.{ .in = 17, .bit_count = 5, .out = 17 },
|
||||
.{ .in = 257, .bit_count = 9, .out = 257 },
|
||||
.{ .in = 29, .bit_count = 5, .out = 23 },
|
||||
};
|
||||
|
||||
for (reverse_bits_tests) |h| {
|
||||
const v = bitReverse(u16, h.in, h.bit_count);
|
||||
try std.testing.expectEqual(h.out, v);
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a HuffmanCode corresponding to the fixed literal table
|
||||
pub fn fixedLiteralEncoder(codes: *[max_num_frequencies]Code) HuffmanEncoder {
|
||||
var h: HuffmanEncoder = undefined;
|
||||
h.codes = codes;
|
||||
var ch: u16 = 0;
|
||||
|
||||
while (ch < max_num_frequencies) : (ch += 1) {
|
||||
var bits: u16 = undefined;
|
||||
var size: u16 = undefined;
|
||||
switch (ch) {
|
||||
0...143 => {
|
||||
// size 8, 000110000 .. 10111111
|
||||
bits = ch + 48;
|
||||
size = 8;
|
||||
},
|
||||
144...255 => {
|
||||
// size 9, 110010000 .. 111111111
|
||||
bits = ch + 400 - 144;
|
||||
size = 9;
|
||||
},
|
||||
256...279 => {
|
||||
// size 7, 0000000 .. 0010111
|
||||
bits = ch - 256;
|
||||
size = 7;
|
||||
},
|
||||
else => {
|
||||
// size 8, 11000000 .. 11000111
|
||||
bits = ch + 192 - 280;
|
||||
size = 8;
|
||||
},
|
||||
}
|
||||
h.codes[ch] = .{ .code = bitReverse(u16, bits, @as(u5, @intCast(size))), .len = size };
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
pub fn fixedDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder {
|
||||
var h: HuffmanEncoder = undefined;
|
||||
h.codes = codes;
|
||||
for (h.codes, 0..) |_, ch| {
|
||||
h.codes[ch] = .{ .code = bitReverse(u16, @as(u16, @intCast(ch)), 5), .len = 5 };
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
pub fn huffmanDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder {
|
||||
var distance_freq: [distance_code_count]u16 = @splat(0);
|
||||
distance_freq[0] = 1;
|
||||
// huff_distance is a static distance encoder used for huffman only encoding.
|
||||
// It can be reused since we will not be encoding distance values.
|
||||
var h: HuffmanEncoder = .{};
|
||||
h.codes = codes;
|
||||
h.generate(distance_freq[0..], 15);
|
||||
return h;
|
||||
}
|
||||
|
||||
test "generate a Huffman code for the fixed literal table specific to Deflate" {
|
||||
var codes: [max_num_frequencies]Code = undefined;
|
||||
const enc: HuffmanEncoder = .fixedLiteralEncoder(&codes);
|
||||
for (enc.codes) |c| {
|
||||
switch (c.len) {
|
||||
7 => {
|
||||
const v = @bitReverse(@as(u7, @intCast(c.code)));
|
||||
try testing.expect(v <= 0b0010111);
|
||||
},
|
||||
8 => {
|
||||
const v = @bitReverse(@as(u8, @intCast(c.code)));
|
||||
try testing.expect((v >= 0b000110000 and v <= 0b10111111) or
|
||||
(v >= 0b11000000 and v <= 11000111));
|
||||
},
|
||||
9 => {
|
||||
const v = @bitReverse(@as(u9, @intCast(c.code)));
|
||||
try testing.expect(v >= 0b110010000 and v <= 0b111111111);
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "generate a Huffman code for the 30 possible relative distances (LZ77 distances) of Deflate" {
|
||||
var codes: [distance_code_count]Code = undefined;
|
||||
const enc = fixedDistanceEncoder(&codes);
|
||||
for (enc.codes) |c| {
|
||||
const v = @bitReverse(@as(u5, @intCast(c.code)));
|
||||
try testing.expect(v <= 29);
|
||||
try testing.expect(c.len == 5);
|
||||
}
|
||||
}
|
||||
|
||||
pub const fixed_codes = [_]u8{
|
||||
0b00001100, 0b10001100, 0b01001100, 0b11001100, 0b00101100, 0b10101100, 0b01101100, 0b11101100,
|
||||
0b00011100, 0b10011100, 0b01011100, 0b11011100, 0b00111100, 0b10111100, 0b01111100, 0b11111100,
|
||||
0b00000010, 0b10000010, 0b01000010, 0b11000010, 0b00100010, 0b10100010, 0b01100010, 0b11100010,
|
||||
0b00010010, 0b10010010, 0b01010010, 0b11010010, 0b00110010, 0b10110010, 0b01110010, 0b11110010,
|
||||
0b00001010, 0b10001010, 0b01001010, 0b11001010, 0b00101010, 0b10101010, 0b01101010, 0b11101010,
|
||||
0b00011010, 0b10011010, 0b01011010, 0b11011010, 0b00111010, 0b10111010, 0b01111010, 0b11111010,
|
||||
0b00000110, 0b10000110, 0b01000110, 0b11000110, 0b00100110, 0b10100110, 0b01100110, 0b11100110,
|
||||
0b00010110, 0b10010110, 0b01010110, 0b11010110, 0b00110110, 0b10110110, 0b01110110, 0b11110110,
|
||||
0b00001110, 0b10001110, 0b01001110, 0b11001110, 0b00101110, 0b10101110, 0b01101110, 0b11101110,
|
||||
0b00011110, 0b10011110, 0b01011110, 0b11011110, 0b00111110, 0b10111110, 0b01111110, 0b11111110,
|
||||
0b00000001, 0b10000001, 0b01000001, 0b11000001, 0b00100001, 0b10100001, 0b01100001, 0b11100001,
|
||||
0b00010001, 0b10010001, 0b01010001, 0b11010001, 0b00110001, 0b10110001, 0b01110001, 0b11110001,
|
||||
0b00001001, 0b10001001, 0b01001001, 0b11001001, 0b00101001, 0b10101001, 0b01101001, 0b11101001,
|
||||
0b00011001, 0b10011001, 0b01011001, 0b11011001, 0b00111001, 0b10111001, 0b01111001, 0b11111001,
|
||||
0b00000101, 0b10000101, 0b01000101, 0b11000101, 0b00100101, 0b10100101, 0b01100101, 0b11100101,
|
||||
0b00010101, 0b10010101, 0b01010101, 0b11010101, 0b00110101, 0b10110101, 0b01110101, 0b11110101,
|
||||
0b00001101, 0b10001101, 0b01001101, 0b11001101, 0b00101101, 0b10101101, 0b01101101, 0b11101101,
|
||||
0b00011101, 0b10011101, 0b01011101, 0b11011101, 0b00111101, 0b10111101, 0b01111101, 0b11111101,
|
||||
0b00010011, 0b00100110, 0b01001110, 0b10011010, 0b00111100, 0b01100101, 0b11101010, 0b10110100,
|
||||
0b11101001, 0b00110011, 0b01100110, 0b11001110, 0b10011010, 0b00111101, 0b01100111, 0b11101110,
|
||||
0b10111100, 0b11111001, 0b00001011, 0b00010110, 0b00101110, 0b01011010, 0b10111100, 0b01100100,
|
||||
0b11101001, 0b10110010, 0b11100101, 0b00101011, 0b01010110, 0b10101110, 0b01011010, 0b10111101,
|
||||
0b01100110, 0b11101101, 0b10111010, 0b11110101, 0b00011011, 0b00110110, 0b01101110, 0b11011010,
|
||||
0b10111100, 0b01100101, 0b11101011, 0b10110110, 0b11101101, 0b00111011, 0b01110110, 0b11101110,
|
||||
0b11011010, 0b10111101, 0b01100111, 0b11101111, 0b10111110, 0b11111101, 0b00000111, 0b00001110,
|
||||
0b00011110, 0b00111010, 0b01111100, 0b11100100, 0b11101000, 0b10110001, 0b11100011, 0b00100111,
|
||||
0b01001110, 0b10011110, 0b00111010, 0b01111101, 0b11100110, 0b11101100, 0b10111001, 0b11110011,
|
||||
0b00010111, 0b00101110, 0b01011110, 0b10111010, 0b01111100, 0b11100101, 0b11101010, 0b10110101,
|
||||
0b11101011, 0b00110111, 0b01101110, 0b11011110, 0b10111010, 0b01111101, 0b11100111, 0b11101110,
|
||||
0b10111101, 0b11111011, 0b00001111, 0b00011110, 0b00111110, 0b01111010, 0b11111100, 0b11100100,
|
||||
0b11101001, 0b10110011, 0b11100111, 0b00101111, 0b01011110, 0b10111110, 0b01111010, 0b11111101,
|
||||
0b11100110, 0b11101101, 0b10111011, 0b11110111, 0b00011111, 0b00111110, 0b01111110, 0b11111010,
|
||||
0b11111100, 0b11100101, 0b11101011, 0b10110111, 0b11101111, 0b00111111, 0b01111110, 0b11111110,
|
||||
0b11111010, 0b11111101, 0b11100111, 0b11101111, 0b10111111, 0b11111111, 0b00000000, 0b00100000,
|
||||
0b00001000, 0b00001100, 0b10000001, 0b11000010, 0b11100000, 0b00001000, 0b00100100, 0b00001010,
|
||||
0b10001101, 0b11000001, 0b11100010, 0b11110000, 0b00000100, 0b00100010, 0b10001001, 0b01001100,
|
||||
0b10100001, 0b11010010, 0b11101000, 0b00000011, 0b10000011, 0b01000011, 0b11000011, 0b00100011,
|
||||
0b10100011,
|
||||
};
|
||||
@@ -5,22 +5,27 @@
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const expect = testing.expect;
|
||||
const consts = @import("consts.zig");
|
||||
const flate = @import("../flate.zig");
|
||||
const Token = @import("Token.zig");
|
||||
|
||||
const Self = @This();
|
||||
const Lookup = @This();
|
||||
|
||||
const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761
|
||||
const chain_len = 2 * consts.history.len;
|
||||
const chain_len = 2 * flate.history_len;
|
||||
|
||||
pub const bits = 15;
|
||||
pub const len = 1 << bits;
|
||||
pub const shift = 32 - bits;
|
||||
|
||||
// Maps hash => first position
|
||||
head: [consts.lookup.len]u16 = [_]u16{0} ** consts.lookup.len,
|
||||
head: [len]u16 = [_]u16{0} ** len,
|
||||
// Maps position => previous positions for the same hash value
|
||||
chain: [chain_len]u16 = [_]u16{0} ** (chain_len),
|
||||
|
||||
// Calculates hash of the 4 bytes from data.
|
||||
// Inserts `pos` position of that hash in the lookup tables.
|
||||
// Returns previous location with the same hash value.
|
||||
pub fn add(self: *Self, data: []const u8, pos: u16) u16 {
|
||||
pub fn add(self: *Lookup, data: []const u8, pos: u16) u16 {
|
||||
if (data.len < 4) return 0;
|
||||
const h = hash(data[0..4]);
|
||||
return self.set(h, pos);
|
||||
@@ -28,11 +33,11 @@ pub fn add(self: *Self, data: []const u8, pos: u16) u16 {
|
||||
|
||||
// Returns previous location with the same hash value given the current
|
||||
// position.
|
||||
pub fn prev(self: *Self, pos: u16) u16 {
|
||||
pub fn prev(self: *Lookup, pos: u16) u16 {
|
||||
return self.chain[pos];
|
||||
}
|
||||
|
||||
fn set(self: *Self, h: u32, pos: u16) u16 {
|
||||
fn set(self: *Lookup, h: u32, pos: u16) u16 {
|
||||
const p = self.head[h];
|
||||
self.head[h] = pos;
|
||||
self.chain[pos] = p;
|
||||
@@ -40,7 +45,7 @@ fn set(self: *Self, h: u32, pos: u16) u16 {
|
||||
}
|
||||
|
||||
// Slide all positions in head and chain for `n`
|
||||
pub fn slide(self: *Self, n: u16) void {
|
||||
pub fn slide(self: *Lookup, n: u16) void {
|
||||
for (&self.head) |*v| {
|
||||
v.* -|= n;
|
||||
}
|
||||
@@ -52,8 +57,8 @@ pub fn slide(self: *Self, n: u16) void {
|
||||
|
||||
// Add `len` 4 bytes hashes from `data` into lookup.
|
||||
// Position of the first byte is `pos`.
|
||||
pub fn bulkAdd(self: *Self, data: []const u8, len: u16, pos: u16) void {
|
||||
if (len == 0 or data.len < consts.match.min_length) {
|
||||
pub fn bulkAdd(self: *Lookup, data: []const u8, length: u16, pos: u16) void {
|
||||
if (length == 0 or data.len < Token.min_length) {
|
||||
return;
|
||||
}
|
||||
var hb =
|
||||
@@ -64,7 +69,7 @@ pub fn bulkAdd(self: *Self, data: []const u8, len: u16, pos: u16) void {
|
||||
_ = self.set(hashu(hb), pos);
|
||||
|
||||
var i = pos;
|
||||
for (4..@min(len + 3, data.len)) |j| {
|
||||
for (4..@min(length + 3, data.len)) |j| {
|
||||
hb = (hb << 8) | @as(u32, data[j]);
|
||||
i += 1;
|
||||
_ = self.set(hashu(hb), i);
|
||||
@@ -80,7 +85,7 @@ fn hash(b: *const [4]u8) u32 {
|
||||
}
|
||||
|
||||
fn hashu(v: u32) u32 {
|
||||
return @intCast((v *% prime4) >> consts.lookup.shift);
|
||||
return @intCast((v *% prime4) >> shift);
|
||||
}
|
||||
|
||||
test add {
|
||||
@@ -91,7 +96,7 @@ test add {
|
||||
0x01, 0x02, 0x03,
|
||||
};
|
||||
|
||||
var h: Self = .{};
|
||||
var h: Lookup = .{};
|
||||
for (data, 0..) |_, i| {
|
||||
const p = h.add(data[i..], @intCast(i));
|
||||
if (i >= 8 and i < 24) {
|
||||
@@ -101,7 +106,7 @@ test add {
|
||||
}
|
||||
}
|
||||
|
||||
const v = Self.hash(data[2 .. 2 + 4]);
|
||||
const v = Lookup.hash(data[2 .. 2 + 4]);
|
||||
try expect(h.head[v] == 2 + 16);
|
||||
try expect(h.chain[2 + 16] == 2 + 8);
|
||||
try expect(h.chain[2 + 8] == 2);
|
||||
@@ -111,13 +116,13 @@ test bulkAdd {
|
||||
const data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
|
||||
|
||||
// one by one
|
||||
var h: Self = .{};
|
||||
var h: Lookup = .{};
|
||||
for (data, 0..) |_, i| {
|
||||
_ = h.add(data[i..], @intCast(i));
|
||||
}
|
||||
|
||||
// in bulk
|
||||
var bh: Self = .{};
|
||||
var bh: Lookup = .{};
|
||||
bh.bulkAdd(data, data.len, 0);
|
||||
|
||||
try testing.expectEqualSlices(u16, &h.head, &bh.head);
|
||||
|
||||
@@ -1,160 +0,0 @@
|
||||
//! Used in deflate (compression), holds uncompressed data form which Tokens are
|
||||
//! produces. In combination with Lookup it is used to find matches in history data.
|
||||
//!
|
||||
const std = @import("std");
|
||||
const consts = @import("consts.zig");
|
||||
|
||||
const expect = testing.expect;
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
|
||||
const hist_len = consts.history.len;
|
||||
const buffer_len = 2 * hist_len;
|
||||
const min_lookahead = consts.match.min_length + consts.match.max_length;
|
||||
const max_rp = buffer_len - min_lookahead;
|
||||
|
||||
const Self = @This();
|
||||
|
||||
buffer: [buffer_len]u8 = undefined,
|
||||
wp: usize = 0, // write position
|
||||
rp: usize = 0, // read position
|
||||
fp: isize = 0, // last flush position, tokens are build from fp..rp
|
||||
|
||||
/// Returns number of bytes written, or 0 if buffer is full and need to slide.
|
||||
pub fn write(self: *Self, buf: []const u8) usize {
|
||||
if (self.rp >= max_rp) return 0; // need to slide
|
||||
|
||||
const n = @min(buf.len, buffer_len - self.wp);
|
||||
@memcpy(self.buffer[self.wp .. self.wp + n], buf[0..n]);
|
||||
self.wp += n;
|
||||
return n;
|
||||
}
|
||||
|
||||
/// Slide buffer for hist_len.
|
||||
/// Drops old history, preserves between hist_len and hist_len - min_lookahead.
|
||||
/// Returns number of bytes removed.
|
||||
pub fn slide(self: *Self) u16 {
|
||||
assert(self.rp >= max_rp and self.wp >= self.rp);
|
||||
const n = self.wp - hist_len;
|
||||
@memcpy(self.buffer[0..n], self.buffer[hist_len..self.wp]);
|
||||
self.rp -= hist_len;
|
||||
self.wp -= hist_len;
|
||||
self.fp -= hist_len;
|
||||
return @intCast(n);
|
||||
}
|
||||
|
||||
/// Data from the current position (read position). Those part of the buffer is
|
||||
/// not converted to tokens yet.
|
||||
fn lookahead(self: *Self) []const u8 {
|
||||
assert(self.wp >= self.rp);
|
||||
return self.buffer[self.rp..self.wp];
|
||||
}
|
||||
|
||||
/// Returns part of the lookahead buffer. If should_flush is set no lookahead is
|
||||
/// preserved otherwise preserves enough data for the longest match. Returns
|
||||
/// null if there is not enough data.
|
||||
pub fn activeLookahead(self: *Self, should_flush: bool) ?[]const u8 {
|
||||
const min: usize = if (should_flush) 0 else min_lookahead;
|
||||
const lh = self.lookahead();
|
||||
return if (lh.len > min) lh else null;
|
||||
}
|
||||
|
||||
/// Advances read position, shrinks lookahead.
|
||||
pub fn advance(self: *Self, n: u16) void {
|
||||
assert(self.wp >= self.rp + n);
|
||||
self.rp += n;
|
||||
}
|
||||
|
||||
/// Returns writable part of the buffer, where new uncompressed data can be
|
||||
/// written.
|
||||
pub fn writable(self: *Self) []u8 {
|
||||
return self.buffer[self.wp..];
|
||||
}
|
||||
|
||||
/// Notification of what part of writable buffer is filled with data.
|
||||
pub fn written(self: *Self, n: usize) void {
|
||||
self.wp += n;
|
||||
}
|
||||
|
||||
/// Finds match length between previous and current position.
|
||||
/// Used in hot path!
|
||||
pub fn match(self: *Self, prev_pos: u16, curr_pos: u16, min_len: u16) u16 {
|
||||
const max_len: usize = @min(self.wp - curr_pos, consts.match.max_length);
|
||||
// lookahead buffers from previous and current positions
|
||||
const prev_lh = self.buffer[prev_pos..][0..max_len];
|
||||
const curr_lh = self.buffer[curr_pos..][0..max_len];
|
||||
|
||||
// If we already have match (min_len > 0),
|
||||
// test the first byte above previous len a[min_len] != b[min_len]
|
||||
// and then all the bytes from that position to zero.
|
||||
// That is likely positions to find difference than looping from first bytes.
|
||||
var i: usize = min_len;
|
||||
if (i > 0) {
|
||||
if (max_len <= i) return 0;
|
||||
while (true) {
|
||||
if (prev_lh[i] != curr_lh[i]) return 0;
|
||||
if (i == 0) break;
|
||||
i -= 1;
|
||||
}
|
||||
i = min_len;
|
||||
}
|
||||
while (i < max_len) : (i += 1)
|
||||
if (prev_lh[i] != curr_lh[i]) break;
|
||||
return if (i >= consts.match.min_length) @intCast(i) else 0;
|
||||
}
|
||||
|
||||
/// Current position of non-compressed data. Data before rp are already converted
|
||||
/// to tokens.
|
||||
pub fn pos(self: *Self) u16 {
|
||||
return @intCast(self.rp);
|
||||
}
|
||||
|
||||
/// Notification that token list is cleared.
|
||||
pub fn flush(self: *Self) void {
|
||||
self.fp = @intCast(self.rp);
|
||||
}
|
||||
|
||||
/// Part of the buffer since last flush or null if there was slide in between (so
|
||||
/// fp becomes negative).
|
||||
pub fn tokensBuffer(self: *Self) ?[]const u8 {
|
||||
assert(self.fp <= self.rp);
|
||||
if (self.fp < 0) return null;
|
||||
return self.buffer[@intCast(self.fp)..self.rp];
|
||||
}
|
||||
|
||||
test match {
|
||||
const data = "Blah blah blah blah blah!";
|
||||
var win: Self = .{};
|
||||
try expect(win.write(data) == data.len);
|
||||
try expect(win.wp == data.len);
|
||||
try expect(win.rp == 0);
|
||||
|
||||
// length between l symbols
|
||||
try expect(win.match(1, 6, 0) == 18);
|
||||
try expect(win.match(1, 11, 0) == 13);
|
||||
try expect(win.match(1, 16, 0) == 8);
|
||||
try expect(win.match(1, 21, 0) == 0);
|
||||
|
||||
// position 15 = "blah blah!"
|
||||
// position 20 = "blah!"
|
||||
try expect(win.match(15, 20, 0) == 4);
|
||||
try expect(win.match(15, 20, 3) == 4);
|
||||
try expect(win.match(15, 20, 4) == 0);
|
||||
}
|
||||
|
||||
test slide {
|
||||
var win: Self = .{};
|
||||
win.wp = Self.buffer_len - 11;
|
||||
win.rp = Self.buffer_len - 111;
|
||||
win.buffer[win.rp] = 0xab;
|
||||
try expect(win.lookahead().len == 100);
|
||||
try expect(win.tokensBuffer().?.len == win.rp);
|
||||
|
||||
const n = win.slide();
|
||||
try expect(n == 32757);
|
||||
try expect(win.buffer[win.rp] == 0xab);
|
||||
try expect(win.rp == Self.hist_len - 111);
|
||||
try expect(win.wp == Self.hist_len - 11);
|
||||
try expect(win.lookahead().len == 100);
|
||||
try expect(win.tokensBuffer() == null);
|
||||
}
|
||||
@@ -6,7 +6,6 @@ const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const print = std.debug.print;
|
||||
const expect = std.testing.expect;
|
||||
const consts = @import("consts.zig").match;
|
||||
|
||||
const Token = @This();
|
||||
|
||||
@@ -21,16 +20,23 @@ dist: u15 = 0,
|
||||
len_lit: u8 = 0,
|
||||
kind: Kind = .literal,
|
||||
|
||||
pub const base_length = 3; // smallest match length per the RFC section 3.2.5
|
||||
pub const min_length = 4; // min length used in this algorithm
|
||||
pub const max_length = 258;
|
||||
|
||||
pub const min_distance = 1;
|
||||
pub const max_distance = std.compress.flate.history_len;
|
||||
|
||||
pub fn literal(t: Token) u8 {
|
||||
return t.len_lit;
|
||||
}
|
||||
|
||||
pub fn distance(t: Token) u16 {
|
||||
return @as(u16, t.dist) + consts.min_distance;
|
||||
return @as(u16, t.dist) + min_distance;
|
||||
}
|
||||
|
||||
pub fn length(t: Token) u16 {
|
||||
return @as(u16, t.len_lit) + consts.base_length;
|
||||
return @as(u16, t.len_lit) + base_length;
|
||||
}
|
||||
|
||||
pub fn initLiteral(lit: u8) Token {
|
||||
@@ -40,12 +46,12 @@ pub fn initLiteral(lit: u8) Token {
|
||||
// distance range 1 - 32768, stored in dist as 0 - 32767 (u15)
|
||||
// length range 3 - 258, stored in len_lit as 0 - 255 (u8)
|
||||
pub fn initMatch(dist: u16, len: u16) Token {
|
||||
assert(len >= consts.min_length and len <= consts.max_length);
|
||||
assert(dist >= consts.min_distance and dist <= consts.max_distance);
|
||||
assert(len >= min_length and len <= max_length);
|
||||
assert(dist >= min_distance and dist <= max_distance);
|
||||
return .{
|
||||
.kind = .match,
|
||||
.dist = @intCast(dist - consts.min_distance),
|
||||
.len_lit = @intCast(len - consts.base_length),
|
||||
.dist = @intCast(dist - min_distance),
|
||||
.len_lit = @intCast(len - base_length),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,422 +0,0 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
|
||||
pub fn bitReader(comptime T: type, reader: anytype) BitReader(T, @TypeOf(reader)) {
|
||||
return BitReader(T, @TypeOf(reader)).init(reader);
|
||||
}
|
||||
|
||||
pub fn BitReader64(comptime ReaderType: type) type {
|
||||
return BitReader(u64, ReaderType);
|
||||
}
|
||||
|
||||
pub fn BitReader32(comptime ReaderType: type) type {
|
||||
return BitReader(u32, ReaderType);
|
||||
}
|
||||
|
||||
/// Bit reader used during inflate (decompression). Has internal buffer of 64
|
||||
/// bits which shifts right after bits are consumed. Uses forward_reader to fill
|
||||
/// that internal buffer when needed.
|
||||
///
|
||||
/// readF is the core function. Supports few different ways of getting bits
|
||||
/// controlled by flags. In hot path we try to avoid checking whether we need to
|
||||
/// fill buffer from forward_reader by calling fill in advance and readF with
|
||||
/// buffered flag set.
|
||||
///
|
||||
pub fn BitReader(comptime T: type, comptime ReaderType: type) type {
|
||||
assert(T == u32 or T == u64);
|
||||
const t_bytes: usize = @sizeOf(T);
|
||||
const Tshift = if (T == u64) u6 else u5;
|
||||
|
||||
return struct {
|
||||
// Underlying reader used for filling internal bits buffer
|
||||
forward_reader: ReaderType = undefined,
|
||||
// Internal buffer of 64 bits
|
||||
bits: T = 0,
|
||||
// Number of bits in the buffer
|
||||
nbits: u32 = 0,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub const Error = ReaderType.Error || error{EndOfStream};
|
||||
|
||||
pub fn init(rdr: ReaderType) Self {
|
||||
var self = Self{ .forward_reader = rdr };
|
||||
self.fill(1) catch {};
|
||||
return self;
|
||||
}
|
||||
|
||||
/// Try to have `nice` bits are available in buffer. Reads from
|
||||
/// forward reader if there is no `nice` bits in buffer. Returns error
|
||||
/// if end of forward stream is reached and internal buffer is empty.
|
||||
/// It will not error if less than `nice` bits are in buffer, only when
|
||||
/// all bits are exhausted. During inflate we usually know what is the
|
||||
/// maximum bits for the next step but usually that step will need less
|
||||
/// bits to decode. So `nice` is not hard limit, it will just try to have
|
||||
/// that number of bits available. If end of forward stream is reached
|
||||
/// it may be some extra zero bits in buffer.
|
||||
pub inline fn fill(self: *Self, nice: u6) !void {
|
||||
if (self.nbits >= nice and nice != 0) {
|
||||
return; // We have enough bits
|
||||
}
|
||||
// Read more bits from forward reader
|
||||
|
||||
// Number of empty bytes in bits, round nbits to whole bytes.
|
||||
const empty_bytes =
|
||||
@as(u8, if (self.nbits & 0x7 == 0) t_bytes else t_bytes - 1) - // 8 for 8, 16, 24..., 7 otherwise
|
||||
(self.nbits >> 3); // 0 for 0-7, 1 for 8-16, ... same as / 8
|
||||
|
||||
var buf: [t_bytes]u8 = [_]u8{0} ** t_bytes;
|
||||
const bytes_read = self.forward_reader.readAll(buf[0..empty_bytes]) catch 0;
|
||||
if (bytes_read > 0) {
|
||||
const u: T = std.mem.readInt(T, buf[0..t_bytes], .little);
|
||||
self.bits |= u << @as(Tshift, @intCast(self.nbits));
|
||||
self.nbits += 8 * @as(u8, @intCast(bytes_read));
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.nbits == 0)
|
||||
return error.EndOfStream;
|
||||
}
|
||||
|
||||
/// Read exactly buf.len bytes into buf.
|
||||
pub fn readAll(self: *Self, buf: []u8) !void {
|
||||
assert(self.alignBits() == 0); // internal bits must be at byte boundary
|
||||
|
||||
// First read from internal bits buffer.
|
||||
var n: usize = 0;
|
||||
while (self.nbits > 0 and n < buf.len) {
|
||||
buf[n] = try self.readF(u8, flag.buffered);
|
||||
n += 1;
|
||||
}
|
||||
// Then use forward reader for all other bytes.
|
||||
try self.forward_reader.readNoEof(buf[n..]);
|
||||
}
|
||||
|
||||
pub const flag = struct {
|
||||
pub const peek: u3 = 0b001; // dont advance internal buffer, just get bits, leave them in buffer
|
||||
pub const buffered: u3 = 0b010; // assume that there is no need to fill, fill should be called before
|
||||
pub const reverse: u3 = 0b100; // bit reverse read bits
|
||||
};
|
||||
|
||||
/// Alias for readF(U, 0).
|
||||
pub fn read(self: *Self, comptime U: type) !U {
|
||||
return self.readF(U, 0);
|
||||
}
|
||||
|
||||
/// Alias for readF with flag.peak set.
|
||||
pub inline fn peekF(self: *Self, comptime U: type, comptime how: u3) !U {
|
||||
return self.readF(U, how | flag.peek);
|
||||
}
|
||||
|
||||
/// Read with flags provided.
|
||||
pub fn readF(self: *Self, comptime U: type, comptime how: u3) !U {
|
||||
if (U == T) {
|
||||
assert(how == 0);
|
||||
assert(self.alignBits() == 0);
|
||||
try self.fill(@bitSizeOf(T));
|
||||
if (self.nbits != @bitSizeOf(T)) return error.EndOfStream;
|
||||
const v = self.bits;
|
||||
self.nbits = 0;
|
||||
self.bits = 0;
|
||||
return v;
|
||||
}
|
||||
const n: Tshift = @bitSizeOf(U);
|
||||
switch (how) {
|
||||
0 => { // `normal` read
|
||||
try self.fill(n); // ensure that there are n bits in the buffer
|
||||
const u: U = @truncate(self.bits); // get n bits
|
||||
try self.shift(n); // advance buffer for n
|
||||
return u;
|
||||
},
|
||||
(flag.peek) => { // no shift, leave bits in the buffer
|
||||
try self.fill(n);
|
||||
return @truncate(self.bits);
|
||||
},
|
||||
flag.buffered => { // no fill, assume that buffer has enough bits
|
||||
const u: U = @truncate(self.bits);
|
||||
try self.shift(n);
|
||||
return u;
|
||||
},
|
||||
(flag.reverse) => { // same as 0 with bit reverse
|
||||
try self.fill(n);
|
||||
const u: U = @truncate(self.bits);
|
||||
try self.shift(n);
|
||||
return @bitReverse(u);
|
||||
},
|
||||
(flag.peek | flag.reverse) => {
|
||||
try self.fill(n);
|
||||
return @bitReverse(@as(U, @truncate(self.bits)));
|
||||
},
|
||||
(flag.buffered | flag.reverse) => {
|
||||
const u: U = @truncate(self.bits);
|
||||
try self.shift(n);
|
||||
return @bitReverse(u);
|
||||
},
|
||||
(flag.peek | flag.buffered) => {
|
||||
return @truncate(self.bits);
|
||||
},
|
||||
(flag.peek | flag.buffered | flag.reverse) => {
|
||||
return @bitReverse(@as(U, @truncate(self.bits)));
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Read n number of bits.
|
||||
/// Only buffered flag can be used in how.
|
||||
pub fn readN(self: *Self, n: u4, comptime how: u3) !u16 {
|
||||
switch (how) {
|
||||
0 => {
|
||||
try self.fill(n);
|
||||
},
|
||||
flag.buffered => {},
|
||||
else => unreachable,
|
||||
}
|
||||
const mask: u16 = (@as(u16, 1) << n) - 1;
|
||||
const u: u16 = @as(u16, @truncate(self.bits)) & mask;
|
||||
try self.shift(n);
|
||||
return u;
|
||||
}
|
||||
|
||||
/// Advance buffer for n bits.
|
||||
pub fn shift(self: *Self, n: Tshift) !void {
|
||||
if (n > self.nbits) return error.EndOfStream;
|
||||
self.bits >>= n;
|
||||
self.nbits -= n;
|
||||
}
|
||||
|
||||
/// Skip n bytes.
|
||||
pub fn skipBytes(self: *Self, n: u16) !void {
|
||||
for (0..n) |_| {
|
||||
try self.fill(8);
|
||||
try self.shift(8);
|
||||
}
|
||||
}
|
||||
|
||||
// Number of bits to align stream to the byte boundary.
|
||||
fn alignBits(self: *Self) u3 {
|
||||
return @intCast(self.nbits & 0x7);
|
||||
}
|
||||
|
||||
/// Align stream to the byte boundary.
|
||||
pub fn alignToByte(self: *Self) void {
|
||||
const ab = self.alignBits();
|
||||
if (ab > 0) self.shift(ab) catch unreachable;
|
||||
}
|
||||
|
||||
/// Skip zero terminated string.
|
||||
pub fn skipStringZ(self: *Self) !void {
|
||||
while (true) {
|
||||
if (try self.readF(u8, 0) == 0) break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Read deflate fixed fixed code.
|
||||
/// Reads first 7 bits, and then maybe 1 or 2 more to get full 7,8 or 9 bit code.
|
||||
/// ref: https://datatracker.ietf.org/doc/html/rfc1951#page-12
|
||||
/// Lit Value Bits Codes
|
||||
/// --------- ---- -----
|
||||
/// 0 - 143 8 00110000 through
|
||||
/// 10111111
|
||||
/// 144 - 255 9 110010000 through
|
||||
/// 111111111
|
||||
/// 256 - 279 7 0000000 through
|
||||
/// 0010111
|
||||
/// 280 - 287 8 11000000 through
|
||||
/// 11000111
|
||||
pub fn readFixedCode(self: *Self) !u16 {
|
||||
try self.fill(7 + 2);
|
||||
const code7 = try self.readF(u7, flag.buffered | flag.reverse);
|
||||
if (code7 <= 0b0010_111) { // 7 bits, 256-279, codes 0000_000 - 0010_111
|
||||
return @as(u16, code7) + 256;
|
||||
} else if (code7 <= 0b1011_111) { // 8 bits, 0-143, codes 0011_0000 through 1011_1111
|
||||
return (@as(u16, code7) << 1) + @as(u16, try self.readF(u1, flag.buffered)) - 0b0011_0000;
|
||||
} else if (code7 <= 0b1100_011) { // 8 bit, 280-287, codes 1100_0000 - 1100_0111
|
||||
return (@as(u16, code7 - 0b1100000) << 1) + try self.readF(u1, flag.buffered) + 280;
|
||||
} else { // 9 bit, 144-255, codes 1_1001_0000 - 1_1111_1111
|
||||
return (@as(u16, code7 - 0b1100_100) << 2) + @as(u16, try self.readF(u2, flag.buffered | flag.reverse)) + 144;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test "readF" {
|
||||
var fbs = std.io.fixedBufferStream(&[_]u8{ 0xf3, 0x48, 0xcd, 0xc9, 0x00, 0x00 });
|
||||
var br = bitReader(u64, fbs.reader());
|
||||
const F = BitReader64(@TypeOf(fbs.reader())).flag;
|
||||
|
||||
try testing.expectEqual(@as(u8, 48), br.nbits);
|
||||
try testing.expectEqual(@as(u64, 0xc9cd48f3), br.bits);
|
||||
|
||||
try testing.expect(try br.readF(u1, 0) == 0b0000_0001);
|
||||
try testing.expect(try br.readF(u2, 0) == 0b0000_0001);
|
||||
try testing.expectEqual(@as(u8, 48 - 3), br.nbits);
|
||||
try testing.expectEqual(@as(u3, 5), br.alignBits());
|
||||
|
||||
try testing.expect(try br.readF(u8, F.peek) == 0b0001_1110);
|
||||
try testing.expect(try br.readF(u9, F.peek) == 0b1_0001_1110);
|
||||
try br.shift(9);
|
||||
try testing.expectEqual(@as(u8, 36), br.nbits);
|
||||
try testing.expectEqual(@as(u3, 4), br.alignBits());
|
||||
|
||||
try testing.expect(try br.readF(u4, 0) == 0b0100);
|
||||
try testing.expectEqual(@as(u8, 32), br.nbits);
|
||||
try testing.expectEqual(@as(u3, 0), br.alignBits());
|
||||
|
||||
try br.shift(1);
|
||||
try testing.expectEqual(@as(u3, 7), br.alignBits());
|
||||
try br.shift(1);
|
||||
try testing.expectEqual(@as(u3, 6), br.alignBits());
|
||||
br.alignToByte();
|
||||
try testing.expectEqual(@as(u3, 0), br.alignBits());
|
||||
|
||||
try testing.expectEqual(@as(u64, 0xc9), br.bits);
|
||||
try testing.expectEqual(@as(u16, 0x9), try br.readN(4, 0));
|
||||
try testing.expectEqual(@as(u16, 0xc), try br.readN(4, 0));
|
||||
}
|
||||
|
||||
test "read block type 1 data" {
|
||||
inline for ([_]type{ u64, u32 }) |T| {
|
||||
const data = [_]u8{
|
||||
0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1
|
||||
0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00,
|
||||
0x0c, 0x01, 0x02, 0x03, //
|
||||
0xaa, 0xbb, 0xcc, 0xdd,
|
||||
};
|
||||
var fbs = std.io.fixedBufferStream(&data);
|
||||
var br = bitReader(T, fbs.reader());
|
||||
const F = BitReader(T, @TypeOf(fbs.reader())).flag;
|
||||
|
||||
try testing.expectEqual(@as(u1, 1), try br.readF(u1, 0)); // bfinal
|
||||
try testing.expectEqual(@as(u2, 1), try br.readF(u2, 0)); // block_type
|
||||
|
||||
for ("Hello world\n") |c| {
|
||||
try testing.expectEqual(@as(u8, c), try br.readF(u8, F.reverse) - 0x30);
|
||||
}
|
||||
try testing.expectEqual(@as(u7, 0), try br.readF(u7, 0)); // end of block
|
||||
br.alignToByte();
|
||||
try testing.expectEqual(@as(u32, 0x0302010c), try br.readF(u32, 0));
|
||||
try testing.expectEqual(@as(u16, 0xbbaa), try br.readF(u16, 0));
|
||||
try testing.expectEqual(@as(u16, 0xddcc), try br.readF(u16, 0));
|
||||
}
|
||||
}
|
||||
|
||||
test "shift/fill" {
|
||||
const data = [_]u8{
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
};
|
||||
var fbs = std.io.fixedBufferStream(&data);
|
||||
var br = bitReader(u64, fbs.reader());
|
||||
|
||||
try testing.expectEqual(@as(u64, 0x08_07_06_05_04_03_02_01), br.bits);
|
||||
try br.shift(8);
|
||||
try testing.expectEqual(@as(u64, 0x00_08_07_06_05_04_03_02), br.bits);
|
||||
try br.fill(60); // fill with 1 byte
|
||||
try testing.expectEqual(@as(u64, 0x01_08_07_06_05_04_03_02), br.bits);
|
||||
try br.shift(8 * 4 + 4);
|
||||
try testing.expectEqual(@as(u64, 0x00_00_00_00_00_10_80_70), br.bits);
|
||||
|
||||
try br.fill(60); // fill with 4 bytes (shift by 4)
|
||||
try testing.expectEqual(@as(u64, 0x00_50_40_30_20_10_80_70), br.bits);
|
||||
try testing.expectEqual(@as(u8, 8 * 7 + 4), br.nbits);
|
||||
|
||||
try br.shift(@intCast(br.nbits)); // clear buffer
|
||||
try br.fill(8); // refill with the rest of the bytes
|
||||
try testing.expectEqual(@as(u64, 0x00_00_00_00_00_08_07_06), br.bits);
|
||||
}
|
||||
|
||||
test "readAll" {
|
||||
inline for ([_]type{ u64, u32 }) |T| {
|
||||
const data = [_]u8{
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
};
|
||||
var fbs = std.io.fixedBufferStream(&data);
|
||||
var br = bitReader(T, fbs.reader());
|
||||
|
||||
switch (T) {
|
||||
u64 => try testing.expectEqual(@as(u64, 0x08_07_06_05_04_03_02_01), br.bits),
|
||||
u32 => try testing.expectEqual(@as(u32, 0x04_03_02_01), br.bits),
|
||||
else => unreachable,
|
||||
}
|
||||
|
||||
var out: [16]u8 = undefined;
|
||||
try br.readAll(out[0..]);
|
||||
try testing.expect(br.nbits == 0);
|
||||
try testing.expect(br.bits == 0);
|
||||
|
||||
try testing.expectEqualSlices(u8, data[0..16], &out);
|
||||
}
|
||||
}
|
||||
|
||||
test "readFixedCode" {
|
||||
inline for ([_]type{ u64, u32 }) |T| {
|
||||
const fixed_codes = @import("huffman_encoder.zig").fixed_codes;
|
||||
|
||||
var fbs = std.io.fixedBufferStream(&fixed_codes);
|
||||
var rdr = bitReader(T, fbs.reader());
|
||||
|
||||
for (0..286) |c| {
|
||||
try testing.expectEqual(c, try rdr.readFixedCode());
|
||||
}
|
||||
try testing.expect(rdr.nbits == 0);
|
||||
}
|
||||
}
|
||||
|
||||
test "u32 leaves no bits on u32 reads" {
|
||||
const data = [_]u8{
|
||||
0xff, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
};
|
||||
var fbs = std.io.fixedBufferStream(&data);
|
||||
var br = bitReader(u32, fbs.reader());
|
||||
|
||||
_ = try br.read(u3);
|
||||
try testing.expectEqual(29, br.nbits);
|
||||
br.alignToByte();
|
||||
try testing.expectEqual(24, br.nbits);
|
||||
try testing.expectEqual(0x04_03_02_01, try br.read(u32));
|
||||
try testing.expectEqual(0, br.nbits);
|
||||
try testing.expectEqual(0x08_07_06_05, try br.read(u32));
|
||||
try testing.expectEqual(0, br.nbits);
|
||||
|
||||
_ = try br.read(u9);
|
||||
try testing.expectEqual(23, br.nbits);
|
||||
br.alignToByte();
|
||||
try testing.expectEqual(16, br.nbits);
|
||||
try testing.expectEqual(0x0e_0d_0c_0b, try br.read(u32));
|
||||
try testing.expectEqual(0, br.nbits);
|
||||
}
|
||||
|
||||
test "u64 need fill after alignToByte" {
|
||||
const data = [_]u8{
|
||||
0xff, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
};
|
||||
|
||||
// without fill
|
||||
var fbs = std.io.fixedBufferStream(&data);
|
||||
var br = bitReader(u64, fbs.reader());
|
||||
_ = try br.read(u23);
|
||||
try testing.expectEqual(41, br.nbits);
|
||||
br.alignToByte();
|
||||
try testing.expectEqual(40, br.nbits);
|
||||
try testing.expectEqual(0x06_05_04_03, try br.read(u32));
|
||||
try testing.expectEqual(8, br.nbits);
|
||||
try testing.expectEqual(0x0a_09_08_07, try br.read(u32));
|
||||
try testing.expectEqual(32, br.nbits);
|
||||
|
||||
// fill after align ensures all bits filled
|
||||
fbs.reset();
|
||||
br = bitReader(u64, fbs.reader());
|
||||
_ = try br.read(u23);
|
||||
try testing.expectEqual(41, br.nbits);
|
||||
br.alignToByte();
|
||||
try br.fill(0);
|
||||
try testing.expectEqual(64, br.nbits);
|
||||
try testing.expectEqual(0x06_05_04_03, try br.read(u32));
|
||||
try testing.expectEqual(32, br.nbits);
|
||||
try testing.expectEqual(0x0a_09_08_07, try br.read(u32));
|
||||
try testing.expectEqual(0, br.nbits);
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
|
||||
/// Bit writer for use in deflate (compression).
|
||||
///
|
||||
/// Has internal bits buffer of 64 bits and internal bytes buffer of 248 bytes.
|
||||
/// When we accumulate 48 bits 6 bytes are moved to the bytes buffer. When we
|
||||
/// accumulate 240 bytes they are flushed to the underlying inner_writer.
|
||||
///
|
||||
pub fn BitWriter(comptime WriterType: type) type {
|
||||
// buffer_flush_size indicates the buffer size
|
||||
// after which bytes are flushed to the writer.
|
||||
// Should preferably be a multiple of 6, since
|
||||
// we accumulate 6 bytes between writes to the buffer.
|
||||
const buffer_flush_size = 240;
|
||||
|
||||
// buffer_size is the actual output byte buffer size.
|
||||
// It must have additional headroom for a flush
|
||||
// which can contain up to 8 bytes.
|
||||
const buffer_size = buffer_flush_size + 8;
|
||||
|
||||
return struct {
|
||||
inner_writer: WriterType,
|
||||
|
||||
// Data waiting to be written is bytes[0 .. nbytes]
|
||||
// and then the low nbits of bits. Data is always written
|
||||
// sequentially into the bytes array.
|
||||
bits: u64 = 0,
|
||||
nbits: u32 = 0, // number of bits
|
||||
bytes: [buffer_size]u8 = undefined,
|
||||
nbytes: u32 = 0, // number of bytes
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub const Error = WriterType.Error || error{UnfinishedBits};
|
||||
|
||||
pub fn init(writer: WriterType) Self {
|
||||
return .{ .inner_writer = writer };
|
||||
}
|
||||
|
||||
pub fn setWriter(self: *Self, new_writer: WriterType) void {
|
||||
//assert(self.bits == 0 and self.nbits == 0 and self.nbytes == 0);
|
||||
self.inner_writer = new_writer;
|
||||
}
|
||||
|
||||
pub fn flush(self: *Self) Error!void {
|
||||
var n = self.nbytes;
|
||||
while (self.nbits != 0) {
|
||||
self.bytes[n] = @as(u8, @truncate(self.bits));
|
||||
self.bits >>= 8;
|
||||
if (self.nbits > 8) { // Avoid underflow
|
||||
self.nbits -= 8;
|
||||
} else {
|
||||
self.nbits = 0;
|
||||
}
|
||||
n += 1;
|
||||
}
|
||||
self.bits = 0;
|
||||
_ = try self.inner_writer.write(self.bytes[0..n]);
|
||||
self.nbytes = 0;
|
||||
}
|
||||
|
||||
pub fn writeBits(self: *Self, b: u32, nb: u32) Error!void {
|
||||
self.bits |= @as(u64, @intCast(b)) << @as(u6, @intCast(self.nbits));
|
||||
self.nbits += nb;
|
||||
if (self.nbits < 48)
|
||||
return;
|
||||
|
||||
var n = self.nbytes;
|
||||
std.mem.writeInt(u64, self.bytes[n..][0..8], self.bits, .little);
|
||||
n += 6;
|
||||
if (n >= buffer_flush_size) {
|
||||
_ = try self.inner_writer.write(self.bytes[0..n]);
|
||||
n = 0;
|
||||
}
|
||||
self.nbytes = n;
|
||||
self.bits >>= 48;
|
||||
self.nbits -= 48;
|
||||
}
|
||||
|
||||
pub fn writeBytes(self: *Self, bytes: []const u8) Error!void {
|
||||
var n = self.nbytes;
|
||||
if (self.nbits & 7 != 0) {
|
||||
return error.UnfinishedBits;
|
||||
}
|
||||
while (self.nbits != 0) {
|
||||
self.bytes[n] = @as(u8, @truncate(self.bits));
|
||||
self.bits >>= 8;
|
||||
self.nbits -= 8;
|
||||
n += 1;
|
||||
}
|
||||
if (n != 0) {
|
||||
_ = try self.inner_writer.write(self.bytes[0..n]);
|
||||
}
|
||||
self.nbytes = 0;
|
||||
_ = try self.inner_writer.write(bytes);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -1,706 +0,0 @@
|
||||
const std = @import("std");
|
||||
const io = std.io;
|
||||
const assert = std.debug.assert;
|
||||
|
||||
const hc = @import("huffman_encoder.zig");
|
||||
const consts = @import("consts.zig").huffman;
|
||||
const Token = @import("Token.zig");
|
||||
const BitWriter = @import("bit_writer.zig").BitWriter;
|
||||
|
||||
pub fn blockWriter(writer: anytype) BlockWriter(@TypeOf(writer)) {
|
||||
return BlockWriter(@TypeOf(writer)).init(writer);
|
||||
}
|
||||
|
||||
/// Accepts list of tokens, decides what is best block type to write. What block
|
||||
/// type will provide best compression. Writes header and body of the block.
|
||||
///
|
||||
pub fn BlockWriter(comptime WriterType: type) type {
|
||||
const BitWriterType = BitWriter(WriterType);
|
||||
return struct {
|
||||
const codegen_order = consts.codegen_order;
|
||||
const end_code_mark = 255;
|
||||
const Self = @This();
|
||||
|
||||
pub const Error = BitWriterType.Error;
|
||||
bit_writer: BitWriterType,
|
||||
|
||||
codegen_freq: [consts.codegen_code_count]u16 = undefined,
|
||||
literal_freq: [consts.max_num_lit]u16 = undefined,
|
||||
distance_freq: [consts.distance_code_count]u16 = undefined,
|
||||
codegen: [consts.max_num_lit + consts.distance_code_count + 1]u8 = undefined,
|
||||
literal_encoding: hc.LiteralEncoder = .{},
|
||||
distance_encoding: hc.DistanceEncoder = .{},
|
||||
codegen_encoding: hc.CodegenEncoder = .{},
|
||||
fixed_literal_encoding: hc.LiteralEncoder,
|
||||
fixed_distance_encoding: hc.DistanceEncoder,
|
||||
huff_distance: hc.DistanceEncoder,
|
||||
|
||||
pub fn init(writer: WriterType) Self {
|
||||
return .{
|
||||
.bit_writer = BitWriterType.init(writer),
|
||||
.fixed_literal_encoding = hc.fixedLiteralEncoder(),
|
||||
.fixed_distance_encoding = hc.fixedDistanceEncoder(),
|
||||
.huff_distance = hc.huffmanDistanceEncoder(),
|
||||
};
|
||||
}
|
||||
|
||||
/// Flush intrenal bit buffer to the writer.
|
||||
/// Should be called only when bit stream is at byte boundary.
|
||||
///
|
||||
/// That is after final block; when last byte could be incomplete or
|
||||
/// after stored block; which is aligned to the byte boundary (it has x
|
||||
/// padding bits after first 3 bits).
|
||||
pub fn flush(self: *Self) Error!void {
|
||||
try self.bit_writer.flush();
|
||||
}
|
||||
|
||||
pub fn setWriter(self: *Self, new_writer: WriterType) void {
|
||||
self.bit_writer.setWriter(new_writer);
|
||||
}
|
||||
|
||||
fn writeCode(self: *Self, c: hc.HuffCode) Error!void {
|
||||
try self.bit_writer.writeBits(c.code, c.len);
|
||||
}
|
||||
|
||||
// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
|
||||
// the literal and distance lengths arrays (which are concatenated into a single
|
||||
// array). This method generates that run-length encoding.
|
||||
//
|
||||
// The result is written into the codegen array, and the frequencies
|
||||
// of each code is written into the codegen_freq array.
|
||||
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
|
||||
// information. Code bad_code is an end marker
|
||||
//
|
||||
// num_literals: The number of literals in literal_encoding
|
||||
// num_distances: The number of distances in distance_encoding
|
||||
// lit_enc: The literal encoder to use
|
||||
// dist_enc: The distance encoder to use
|
||||
fn generateCodegen(
|
||||
self: *Self,
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
lit_enc: *hc.LiteralEncoder,
|
||||
dist_enc: *hc.DistanceEncoder,
|
||||
) void {
|
||||
for (self.codegen_freq, 0..) |_, i| {
|
||||
self.codegen_freq[i] = 0;
|
||||
}
|
||||
|
||||
// Note that we are using codegen both as a temporary variable for holding
|
||||
// a copy of the frequencies, and as the place where we put the result.
|
||||
// This is fine because the output is always shorter than the input used
|
||||
// so far.
|
||||
var codegen = &self.codegen; // cache
|
||||
// Copy the concatenated code sizes to codegen. Put a marker at the end.
|
||||
var cgnl = codegen[0..num_literals];
|
||||
for (cgnl, 0..) |_, i| {
|
||||
cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len));
|
||||
}
|
||||
|
||||
cgnl = codegen[num_literals .. num_literals + num_distances];
|
||||
for (cgnl, 0..) |_, i| {
|
||||
cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len));
|
||||
}
|
||||
codegen[num_literals + num_distances] = end_code_mark;
|
||||
|
||||
var size = codegen[0];
|
||||
var count: i32 = 1;
|
||||
var out_index: u32 = 0;
|
||||
var in_index: u32 = 1;
|
||||
while (size != end_code_mark) : (in_index += 1) {
|
||||
// INVARIANT: We have seen "count" copies of size that have not yet
|
||||
// had output generated for them.
|
||||
const next_size = codegen[in_index];
|
||||
if (next_size == size) {
|
||||
count += 1;
|
||||
continue;
|
||||
}
|
||||
// We need to generate codegen indicating "count" of size.
|
||||
if (size != 0) {
|
||||
codegen[out_index] = size;
|
||||
out_index += 1;
|
||||
self.codegen_freq[size] += 1;
|
||||
count -= 1;
|
||||
while (count >= 3) {
|
||||
var n: i32 = 6;
|
||||
if (n > count) {
|
||||
n = count;
|
||||
}
|
||||
codegen[out_index] = 16;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(n - 3));
|
||||
out_index += 1;
|
||||
self.codegen_freq[16] += 1;
|
||||
count -= n;
|
||||
}
|
||||
} else {
|
||||
while (count >= 11) {
|
||||
var n: i32 = 138;
|
||||
if (n > count) {
|
||||
n = count;
|
||||
}
|
||||
codegen[out_index] = 18;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(n - 11));
|
||||
out_index += 1;
|
||||
self.codegen_freq[18] += 1;
|
||||
count -= n;
|
||||
}
|
||||
if (count >= 3) {
|
||||
// 3 <= count <= 10
|
||||
codegen[out_index] = 17;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(count - 3));
|
||||
out_index += 1;
|
||||
self.codegen_freq[17] += 1;
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
count -= 1;
|
||||
while (count >= 0) : (count -= 1) {
|
||||
codegen[out_index] = size;
|
||||
out_index += 1;
|
||||
self.codegen_freq[size] += 1;
|
||||
}
|
||||
// Set up invariant for next time through the loop.
|
||||
size = next_size;
|
||||
count = 1;
|
||||
}
|
||||
// Marker indicating the end of the codegen.
|
||||
codegen[out_index] = end_code_mark;
|
||||
}
|
||||
|
||||
const DynamicSize = struct {
|
||||
size: u32,
|
||||
num_codegens: u32,
|
||||
};
|
||||
|
||||
// dynamicSize returns the size of dynamically encoded data in bits.
|
||||
fn dynamicSize(
|
||||
self: *Self,
|
||||
lit_enc: *hc.LiteralEncoder, // literal encoder
|
||||
dist_enc: *hc.DistanceEncoder, // distance encoder
|
||||
extra_bits: u32,
|
||||
) DynamicSize {
|
||||
var num_codegens = self.codegen_freq.len;
|
||||
while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) {
|
||||
num_codegens -= 1;
|
||||
}
|
||||
const header = 3 + 5 + 5 + 4 + (3 * num_codegens) +
|
||||
self.codegen_encoding.bitLength(self.codegen_freq[0..]) +
|
||||
self.codegen_freq[16] * 2 +
|
||||
self.codegen_freq[17] * 3 +
|
||||
self.codegen_freq[18] * 7;
|
||||
const size = header +
|
||||
lit_enc.bitLength(&self.literal_freq) +
|
||||
dist_enc.bitLength(&self.distance_freq) +
|
||||
extra_bits;
|
||||
|
||||
return DynamicSize{
|
||||
.size = @as(u32, @intCast(size)),
|
||||
.num_codegens = @as(u32, @intCast(num_codegens)),
|
||||
};
|
||||
}
|
||||
|
||||
// fixedSize returns the size of dynamically encoded data in bits.
|
||||
fn fixedSize(self: *Self, extra_bits: u32) u32 {
|
||||
return 3 +
|
||||
self.fixed_literal_encoding.bitLength(&self.literal_freq) +
|
||||
self.fixed_distance_encoding.bitLength(&self.distance_freq) +
|
||||
extra_bits;
|
||||
}
|
||||
|
||||
const StoredSize = struct {
|
||||
size: u32,
|
||||
storable: bool,
|
||||
};
|
||||
|
||||
// storedSizeFits calculates the stored size, including header.
|
||||
// The function returns the size in bits and whether the block
|
||||
// fits inside a single block.
|
||||
fn storedSizeFits(in: ?[]const u8) StoredSize {
|
||||
if (in == null) {
|
||||
return .{ .size = 0, .storable = false };
|
||||
}
|
||||
if (in.?.len <= consts.max_store_block_size) {
|
||||
return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true };
|
||||
}
|
||||
return .{ .size = 0, .storable = false };
|
||||
}
|
||||
|
||||
// Write the header of a dynamic Huffman block to the output stream.
|
||||
//
|
||||
// num_literals: The number of literals specified in codegen
|
||||
// num_distances: The number of distances specified in codegen
|
||||
// num_codegens: The number of codegens used in codegen
|
||||
// eof: Is it the end-of-file? (end of stream)
|
||||
fn dynamicHeader(
|
||||
self: *Self,
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
num_codegens: u32,
|
||||
eof: bool,
|
||||
) Error!void {
|
||||
const first_bits: u32 = if (eof) 5 else 4;
|
||||
try self.bit_writer.writeBits(first_bits, 3);
|
||||
try self.bit_writer.writeBits(num_literals - 257, 5);
|
||||
try self.bit_writer.writeBits(num_distances - 1, 5);
|
||||
try self.bit_writer.writeBits(num_codegens - 4, 4);
|
||||
|
||||
var i: u32 = 0;
|
||||
while (i < num_codegens) : (i += 1) {
|
||||
const value = self.codegen_encoding.codes[codegen_order[i]].len;
|
||||
try self.bit_writer.writeBits(value, 3);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (true) {
|
||||
const code_word: u32 = @as(u32, @intCast(self.codegen[i]));
|
||||
i += 1;
|
||||
if (code_word == end_code_mark) {
|
||||
break;
|
||||
}
|
||||
try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]);
|
||||
|
||||
switch (code_word) {
|
||||
16 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 2);
|
||||
i += 1;
|
||||
},
|
||||
17 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 3);
|
||||
i += 1;
|
||||
},
|
||||
18 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 7);
|
||||
i += 1;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn storedHeader(self: *Self, length: usize, eof: bool) Error!void {
|
||||
assert(length <= 65535);
|
||||
const flag: u32 = if (eof) 1 else 0;
|
||||
try self.bit_writer.writeBits(flag, 3);
|
||||
try self.flush();
|
||||
const l: u16 = @intCast(length);
|
||||
try self.bit_writer.writeBits(l, 16);
|
||||
try self.bit_writer.writeBits(~l, 16);
|
||||
}
|
||||
|
||||
fn fixedHeader(self: *Self, eof: bool) Error!void {
|
||||
// Indicate that we are a fixed Huffman block
|
||||
var value: u32 = 2;
|
||||
if (eof) {
|
||||
value = 3;
|
||||
}
|
||||
try self.bit_writer.writeBits(value, 3);
|
||||
}
|
||||
|
||||
// Write a block of tokens with the smallest encoding. Will choose block type.
|
||||
// The original input can be supplied, and if the huffman encoded data
|
||||
// is larger than the original bytes, the data will be written as a
|
||||
// stored block.
|
||||
// If the input is null, the tokens will always be Huffman encoded.
|
||||
pub fn write(self: *Self, tokens: []const Token, eof: bool, input: ?[]const u8) Error!void {
|
||||
const lit_and_dist = self.indexTokens(tokens);
|
||||
const num_literals = lit_and_dist.num_literals;
|
||||
const num_distances = lit_and_dist.num_distances;
|
||||
|
||||
var extra_bits: u32 = 0;
|
||||
const ret = storedSizeFits(input);
|
||||
const stored_size = ret.size;
|
||||
const storable = ret.storable;
|
||||
|
||||
if (storable) {
|
||||
// We only bother calculating the costs of the extra bits required by
|
||||
// the length of distance fields (which will be the same for both fixed
|
||||
// and dynamic encoding), if we need to compare those two encodings
|
||||
// against stored encoding.
|
||||
var length_code: u16 = Token.length_codes_start + 8;
|
||||
while (length_code < num_literals) : (length_code += 1) {
|
||||
// First eight length codes have extra size = 0.
|
||||
extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) *
|
||||
@as(u32, @intCast(Token.lengthExtraBits(length_code)));
|
||||
}
|
||||
var distance_code: u16 = 4;
|
||||
while (distance_code < num_distances) : (distance_code += 1) {
|
||||
// First four distance codes have extra size = 0.
|
||||
extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) *
|
||||
@as(u32, @intCast(Token.distanceExtraBits(distance_code)));
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out smallest code.
|
||||
// Fixed Huffman baseline.
|
||||
var literal_encoding = &self.fixed_literal_encoding;
|
||||
var distance_encoding = &self.fixed_distance_encoding;
|
||||
var size = self.fixedSize(extra_bits);
|
||||
|
||||
// Dynamic Huffman?
|
||||
var num_codegens: u32 = 0;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
extra_bits,
|
||||
);
|
||||
const dyn_size = dynamic_size.size;
|
||||
num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
if (dyn_size < size) {
|
||||
size = dyn_size;
|
||||
literal_encoding = &self.literal_encoding;
|
||||
distance_encoding = &self.distance_encoding;
|
||||
}
|
||||
|
||||
// Stored bytes?
|
||||
if (storable and stored_size < size) {
|
||||
try self.storedBlock(input.?, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) {
|
||||
try self.fixedHeader(eof);
|
||||
} else {
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
}
|
||||
|
||||
// Write the tokens.
|
||||
try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes);
|
||||
}
|
||||
|
||||
pub fn storedBlock(self: *Self, input: []const u8, eof: bool) Error!void {
|
||||
try self.storedHeader(input.len, eof);
|
||||
try self.bit_writer.writeBytes(input);
|
||||
}
|
||||
|
||||
// writeBlockDynamic encodes a block using a dynamic Huffman table.
|
||||
// This should be used if the symbols used have a disproportionate
|
||||
// histogram distribution.
|
||||
// If input is supplied and the compression savings are below 1/16th of the
|
||||
// input size the block is stored.
|
||||
fn dynamicBlock(
|
||||
self: *Self,
|
||||
tokens: []const Token,
|
||||
eof: bool,
|
||||
input: ?[]const u8,
|
||||
) Error!void {
|
||||
const total_tokens = self.indexTokens(tokens);
|
||||
const num_literals = total_tokens.num_literals;
|
||||
const num_distances = total_tokens.num_distances;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0);
|
||||
const size = dynamic_size.size;
|
||||
const num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
|
||||
const stored_size = storedSizeFits(input);
|
||||
const ssize = stored_size.size;
|
||||
const storable = stored_size.storable;
|
||||
if (storable and ssize < (size + (size >> 4))) {
|
||||
try self.storedBlock(input.?, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Write Huffman table.
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
|
||||
// Write the tokens.
|
||||
try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes);
|
||||
}
|
||||
|
||||
const TotalIndexedTokens = struct {
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
};
|
||||
|
||||
// Indexes a slice of tokens followed by an end_block_marker, and updates
|
||||
// literal_freq and distance_freq, and generates literal_encoding
|
||||
// and distance_encoding.
|
||||
// The number of literal and distance tokens is returned.
|
||||
fn indexTokens(self: *Self, tokens: []const Token) TotalIndexedTokens {
|
||||
var num_literals: u32 = 0;
|
||||
var num_distances: u32 = 0;
|
||||
|
||||
for (self.literal_freq, 0..) |_, i| {
|
||||
self.literal_freq[i] = 0;
|
||||
}
|
||||
for (self.distance_freq, 0..) |_, i| {
|
||||
self.distance_freq[i] = 0;
|
||||
}
|
||||
|
||||
for (tokens) |t| {
|
||||
if (t.kind == Token.Kind.literal) {
|
||||
self.literal_freq[t.literal()] += 1;
|
||||
continue;
|
||||
}
|
||||
self.literal_freq[t.lengthCode()] += 1;
|
||||
self.distance_freq[t.distanceCode()] += 1;
|
||||
}
|
||||
// add end_block_marker token at the end
|
||||
self.literal_freq[consts.end_block_marker] += 1;
|
||||
|
||||
// get the number of literals
|
||||
num_literals = @as(u32, @intCast(self.literal_freq.len));
|
||||
while (self.literal_freq[num_literals - 1] == 0) {
|
||||
num_literals -= 1;
|
||||
}
|
||||
// get the number of distances
|
||||
num_distances = @as(u32, @intCast(self.distance_freq.len));
|
||||
while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) {
|
||||
num_distances -= 1;
|
||||
}
|
||||
if (num_distances == 0) {
|
||||
// We haven't found a single match. If we want to go with the dynamic encoding,
|
||||
// we should count at least one distance to be sure that the distance huffman tree could be encoded.
|
||||
self.distance_freq[0] = 1;
|
||||
num_distances = 1;
|
||||
}
|
||||
self.literal_encoding.generate(&self.literal_freq, 15);
|
||||
self.distance_encoding.generate(&self.distance_freq, 15);
|
||||
return TotalIndexedTokens{
|
||||
.num_literals = num_literals,
|
||||
.num_distances = num_distances,
|
||||
};
|
||||
}
|
||||
|
||||
// Writes a slice of tokens to the output followed by and end_block_marker.
|
||||
// codes for literal and distance encoding must be supplied.
|
||||
fn writeTokens(
|
||||
self: *Self,
|
||||
tokens: []const Token,
|
||||
le_codes: []hc.HuffCode,
|
||||
oe_codes: []hc.HuffCode,
|
||||
) Error!void {
|
||||
for (tokens) |t| {
|
||||
if (t.kind == Token.Kind.literal) {
|
||||
try self.writeCode(le_codes[t.literal()]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Write the length
|
||||
const le = t.lengthEncoding();
|
||||
try self.writeCode(le_codes[le.code]);
|
||||
if (le.extra_bits > 0) {
|
||||
try self.bit_writer.writeBits(le.extra_length, le.extra_bits);
|
||||
}
|
||||
|
||||
// Write the distance
|
||||
const oe = t.distanceEncoding();
|
||||
try self.writeCode(oe_codes[oe.code]);
|
||||
if (oe.extra_bits > 0) {
|
||||
try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits);
|
||||
}
|
||||
}
|
||||
// add end_block_marker at the end
|
||||
try self.writeCode(le_codes[consts.end_block_marker]);
|
||||
}
|
||||
|
||||
// Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes
|
||||
// if the results only gains very little from compression.
|
||||
pub fn huffmanBlock(self: *Self, input: []const u8, eof: bool) Error!void {
|
||||
// Add everything as literals
|
||||
histogram(input, &self.literal_freq);
|
||||
|
||||
self.literal_freq[consts.end_block_marker] = 1;
|
||||
|
||||
const num_literals = consts.end_block_marker + 1;
|
||||
self.distance_freq[0] = 1;
|
||||
const num_distances = 1;
|
||||
|
||||
self.literal_encoding.generate(&self.literal_freq, 15);
|
||||
|
||||
// Figure out smallest code.
|
||||
// Always use dynamic Huffman or Store
|
||||
var num_codegens: u32 = 0;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.huff_distance,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0);
|
||||
const size = dynamic_size.size;
|
||||
num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
const stored_size_ret = storedSizeFits(input);
|
||||
const ssize = stored_size_ret.size;
|
||||
const storable = stored_size_ret.storable;
|
||||
|
||||
if (storable and ssize < (size + (size >> 4))) {
|
||||
try self.storedBlock(input, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
const encoding = self.literal_encoding.codes[0..257];
|
||||
|
||||
for (input) |t| {
|
||||
const c = encoding[t];
|
||||
try self.bit_writer.writeBits(c.code, c.len);
|
||||
}
|
||||
try self.writeCode(encoding[consts.end_block_marker]);
|
||||
}
|
||||
|
||||
// histogram accumulates a histogram of b in h.
|
||||
fn histogram(b: []const u8, h: *[286]u16) void {
|
||||
// Clear histogram
|
||||
for (h, 0..) |_, i| {
|
||||
h[i] = 0;
|
||||
}
|
||||
|
||||
var lh = h.*[0..256];
|
||||
for (b) |t| {
|
||||
lh[t] += 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// tests
|
||||
const expect = std.testing.expect;
|
||||
const fmt = std.fmt;
|
||||
const testing = std.testing;
|
||||
const ArrayList = std.ArrayList;
|
||||
|
||||
const TestCase = @import("testdata/block_writer.zig").TestCase;
|
||||
const testCases = @import("testdata/block_writer.zig").testCases;
|
||||
|
||||
// tests if the writeBlock encoding has changed.
|
||||
test "write" {
|
||||
inline for (0..testCases.len) |i| {
|
||||
try testBlock(testCases[i], .write_block);
|
||||
}
|
||||
}
|
||||
|
||||
// tests if the writeBlockDynamic encoding has changed.
|
||||
test "dynamicBlock" {
|
||||
inline for (0..testCases.len) |i| {
|
||||
try testBlock(testCases[i], .write_dyn_block);
|
||||
}
|
||||
}
|
||||
|
||||
test "huffmanBlock" {
|
||||
inline for (0..testCases.len) |i| {
|
||||
try testBlock(testCases[i], .write_huffman_block);
|
||||
}
|
||||
try testBlock(.{
|
||||
.tokens = &[_]Token{},
|
||||
.input = "huffman-rand-max.input",
|
||||
.want = "huffman-rand-max.{s}.expect",
|
||||
}, .write_huffman_block);
|
||||
}
|
||||
|
||||
const TestFn = enum {
|
||||
write_block,
|
||||
write_dyn_block, // write dynamic block
|
||||
write_huffman_block,
|
||||
|
||||
fn to_s(self: TestFn) []const u8 {
|
||||
return switch (self) {
|
||||
.write_block => "wb",
|
||||
.write_dyn_block => "dyn",
|
||||
.write_huffman_block => "huff",
|
||||
};
|
||||
}
|
||||
|
||||
fn write(
|
||||
comptime self: TestFn,
|
||||
bw: anytype,
|
||||
tok: []const Token,
|
||||
input: ?[]const u8,
|
||||
final: bool,
|
||||
) !void {
|
||||
switch (self) {
|
||||
.write_block => try bw.write(tok, final, input),
|
||||
.write_dyn_block => try bw.dynamicBlock(tok, final, input),
|
||||
.write_huffman_block => try bw.huffmanBlock(input.?, final),
|
||||
}
|
||||
try bw.flush();
|
||||
}
|
||||
};
|
||||
|
||||
// testBlock tests a block against its references
|
||||
//
|
||||
// size
|
||||
// 64K [file-name].input - input non compressed file
|
||||
// 8.1K [file-name].golden -
|
||||
// 78 [file-name].dyn.expect - output with writeBlockDynamic
|
||||
// 78 [file-name].wb.expect - output with writeBlock
|
||||
// 8.1K [file-name].huff.expect - output with writeBlockHuff
|
||||
// 78 [file-name].dyn.expect-noinput - output with writeBlockDynamic when input is null
|
||||
// 78 [file-name].wb.expect-noinput - output with writeBlock when input is null
|
||||
//
|
||||
// wb - writeBlock
|
||||
// dyn - writeBlockDynamic
|
||||
// huff - writeBlockHuff
|
||||
//
|
||||
fn testBlock(comptime tc: TestCase, comptime tfn: TestFn) !void {
|
||||
if (tc.input.len != 0 and tc.want.len != 0) {
|
||||
const want_name = comptime fmt.comptimePrint(tc.want, .{tfn.to_s()});
|
||||
const input = @embedFile("testdata/block_writer/" ++ tc.input);
|
||||
const want = @embedFile("testdata/block_writer/" ++ want_name);
|
||||
try testWriteBlock(tfn, input, want, tc.tokens);
|
||||
}
|
||||
|
||||
if (tfn == .write_huffman_block) {
|
||||
return;
|
||||
}
|
||||
|
||||
const want_name_no_input = comptime fmt.comptimePrint(tc.want_no_input, .{tfn.to_s()});
|
||||
const want = @embedFile("testdata/block_writer/" ++ want_name_no_input);
|
||||
try testWriteBlock(tfn, null, want, tc.tokens);
|
||||
}
|
||||
|
||||
// Uses writer function `tfn` to write `tokens`, tests that we got `want` as output.
|
||||
fn testWriteBlock(comptime tfn: TestFn, input: ?[]const u8, want: []const u8, tokens: []const Token) !void {
|
||||
var buf = ArrayList(u8).init(testing.allocator);
|
||||
var bw = blockWriter(buf.writer());
|
||||
try tfn.write(&bw, tokens, input, false);
|
||||
var got = buf.items;
|
||||
try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
|
||||
try expect(got[0] & 0b0000_0001 == 0); // bfinal is not set
|
||||
//
|
||||
// Test if the writer produces the same output after reset.
|
||||
buf.deinit();
|
||||
buf = ArrayList(u8).init(testing.allocator);
|
||||
defer buf.deinit();
|
||||
bw.setWriter(buf.writer());
|
||||
|
||||
try tfn.write(&bw, tokens, input, true);
|
||||
try bw.flush();
|
||||
got = buf.items;
|
||||
|
||||
try expect(got[0] & 1 == 1); // bfinal is set
|
||||
buf.items[0] &= 0b1111_1110; // remove bfinal bit, so we can run test slices
|
||||
try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
pub const deflate = struct {
|
||||
// Number of tokens to accumulate in deflate before starting block encoding.
|
||||
//
|
||||
// In zlib this depends on memlevel: 6 + memlevel, where default memlevel is
|
||||
// 8 and max 9 that gives 14 or 15 bits.
|
||||
pub const tokens = 1 << 15;
|
||||
};
|
||||
|
||||
pub const match = struct {
|
||||
pub const base_length = 3; // smallest match length per the RFC section 3.2.5
|
||||
pub const min_length = 4; // min length used in this algorithm
|
||||
pub const max_length = 258;
|
||||
|
||||
pub const min_distance = 1;
|
||||
pub const max_distance = 32768;
|
||||
};
|
||||
|
||||
pub const history = struct {
|
||||
pub const len = match.max_distance;
|
||||
};
|
||||
|
||||
pub const lookup = struct {
|
||||
pub const bits = 15;
|
||||
pub const len = 1 << bits;
|
||||
pub const shift = 32 - bits;
|
||||
};
|
||||
|
||||
pub const huffman = struct {
|
||||
// The odd order in which the codegen code sizes are written.
|
||||
pub const codegen_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
|
||||
// The number of codegen codes.
|
||||
pub const codegen_code_count = 19;
|
||||
|
||||
// The largest distance code.
|
||||
pub const distance_code_count = 30;
|
||||
|
||||
// Maximum number of literals.
|
||||
pub const max_num_lit = 286;
|
||||
|
||||
// Max number of frequencies used for a Huffman Code
|
||||
// Possible lengths are codegen_code_count (19), distance_code_count (30) and max_num_lit (286).
|
||||
// The largest of these is max_num_lit.
|
||||
pub const max_num_frequencies = max_num_lit;
|
||||
|
||||
// Biggest block size for uncompressed block.
|
||||
pub const max_store_block_size = 65535;
|
||||
// The special code used to mark the end of a block.
|
||||
pub const end_block_marker = 256;
|
||||
};
|
||||
@@ -1,208 +0,0 @@
|
||||
//! Container of the deflate bit stream body. Container adds header before
|
||||
//! deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
|
||||
//! no footer, raw bit stream).
|
||||
//!
|
||||
//! Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes
|
||||
//! addler 32 checksum.
|
||||
//!
|
||||
//! Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes
|
||||
//! crc32 checksum and 4 bytes of uncompressed data length.
|
||||
//!
|
||||
//!
|
||||
//! rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4
|
||||
//! rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5
|
||||
//!
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
pub const Container = enum {
|
||||
raw, // no header or footer
|
||||
gzip, // gzip header and footer
|
||||
zlib, // zlib header and footer
|
||||
|
||||
pub fn size(w: Container) usize {
|
||||
return headerSize(w) + footerSize(w);
|
||||
}
|
||||
|
||||
pub fn headerSize(w: Container) usize {
|
||||
return switch (w) {
|
||||
.gzip => 10,
|
||||
.zlib => 2,
|
||||
.raw => 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn footerSize(w: Container) usize {
|
||||
return switch (w) {
|
||||
.gzip => 8,
|
||||
.zlib => 4,
|
||||
.raw => 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub const list = [_]Container{ .raw, .gzip, .zlib };
|
||||
|
||||
pub const Error = error{
|
||||
BadGzipHeader,
|
||||
BadZlibHeader,
|
||||
WrongGzipChecksum,
|
||||
WrongGzipSize,
|
||||
WrongZlibChecksum,
|
||||
};
|
||||
|
||||
pub fn writeHeader(comptime wrap: Container, writer: anytype) !void {
|
||||
switch (wrap) {
|
||||
.gzip => {
|
||||
// GZIP 10 byte header (https://datatracker.ietf.org/doc/html/rfc1952#page-5):
|
||||
// - ID1 (IDentification 1), always 0x1f
|
||||
// - ID2 (IDentification 2), always 0x8b
|
||||
// - CM (Compression Method), always 8 = deflate
|
||||
// - FLG (Flags), all set to 0
|
||||
// - 4 bytes, MTIME (Modification time), not used, all set to zero
|
||||
// - XFL (eXtra FLags), all set to zero
|
||||
// - OS (Operating System), 03 = Unix
|
||||
const gzipHeader = [_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 };
|
||||
try writer.writeAll(&gzipHeader);
|
||||
},
|
||||
.zlib => {
|
||||
// ZLIB has a two-byte header (https://datatracker.ietf.org/doc/html/rfc1950#page-4):
|
||||
// 1st byte:
|
||||
// - First four bits is the CINFO (compression info), which is 7 for the default deflate window size.
|
||||
// - The next four bits is the CM (compression method), which is 8 for deflate.
|
||||
// 2nd byte:
|
||||
// - Two bits is the FLEVEL (compression level). Values are: 0=fastest, 1=fast, 2=default, 3=best.
|
||||
// - The next bit, FDICT, is set if a dictionary is given.
|
||||
// - The final five FCHECK bits form a mod-31 checksum.
|
||||
//
|
||||
// CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100
|
||||
const zlibHeader = [_]u8{ 0x78, 0b10_0_11100 };
|
||||
try writer.writeAll(&zlibHeader);
|
||||
},
|
||||
.raw => {},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn writeFooter(comptime wrap: Container, hasher: *Hasher(wrap), writer: anytype) !void {
|
||||
var bits: [4]u8 = undefined;
|
||||
switch (wrap) {
|
||||
.gzip => {
|
||||
// GZIP 8 bytes footer
|
||||
// - 4 bytes, CRC32 (CRC-32)
|
||||
// - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
|
||||
std.mem.writeInt(u32, &bits, hasher.chksum(), .little);
|
||||
try writer.writeAll(&bits);
|
||||
|
||||
std.mem.writeInt(u32, &bits, hasher.bytesRead(), .little);
|
||||
try writer.writeAll(&bits);
|
||||
},
|
||||
.zlib => {
|
||||
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
|
||||
// 4 bytes of ADLER32 (Adler-32 checksum)
|
||||
// Checksum value of the uncompressed data (excluding any
|
||||
// dictionary data) computed according to Adler-32
|
||||
// algorithm.
|
||||
std.mem.writeInt(u32, &bits, hasher.chksum(), .big);
|
||||
try writer.writeAll(&bits);
|
||||
},
|
||||
.raw => {},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parseHeader(comptime wrap: Container, reader: anytype) !void {
|
||||
switch (wrap) {
|
||||
.gzip => try parseGzipHeader(reader),
|
||||
.zlib => try parseZlibHeader(reader),
|
||||
.raw => {},
|
||||
}
|
||||
}
|
||||
|
||||
fn parseGzipHeader(reader: anytype) !void {
|
||||
const magic1 = try reader.read(u8);
|
||||
const magic2 = try reader.read(u8);
|
||||
const method = try reader.read(u8);
|
||||
const flags = try reader.read(u8);
|
||||
try reader.skipBytes(6); // mtime(4), xflags, os
|
||||
if (magic1 != 0x1f or magic2 != 0x8b or method != 0x08)
|
||||
return error.BadGzipHeader;
|
||||
// Flags description: https://www.rfc-editor.org/rfc/rfc1952.html#page-5
|
||||
if (flags != 0) {
|
||||
if (flags & 0b0000_0100 != 0) { // FEXTRA
|
||||
const extra_len = try reader.read(u16);
|
||||
try reader.skipBytes(extra_len);
|
||||
}
|
||||
if (flags & 0b0000_1000 != 0) { // FNAME
|
||||
try reader.skipStringZ();
|
||||
}
|
||||
if (flags & 0b0001_0000 != 0) { // FCOMMENT
|
||||
try reader.skipStringZ();
|
||||
}
|
||||
if (flags & 0b0000_0010 != 0) { // FHCRC
|
||||
try reader.skipBytes(2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parseZlibHeader(reader: anytype) !void {
|
||||
const cm = try reader.read(u4);
|
||||
const cinfo = try reader.read(u4);
|
||||
_ = try reader.read(u8);
|
||||
if (cm != 8 or cinfo > 7) {
|
||||
return error.BadZlibHeader;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parseFooter(comptime wrap: Container, hasher: *Hasher(wrap), reader: anytype) !void {
|
||||
switch (wrap) {
|
||||
.gzip => {
|
||||
try reader.fill(0);
|
||||
if (try reader.read(u32) != hasher.chksum()) return error.WrongGzipChecksum;
|
||||
if (try reader.read(u32) != hasher.bytesRead()) return error.WrongGzipSize;
|
||||
},
|
||||
.zlib => {
|
||||
const chksum: u32 = @byteSwap(hasher.chksum());
|
||||
if (try reader.read(u32) != chksum) return error.WrongZlibChecksum;
|
||||
},
|
||||
.raw => {},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn Hasher(comptime wrap: Container) type {
|
||||
const HasherType = switch (wrap) {
|
||||
.gzip => std.hash.Crc32,
|
||||
.zlib => std.hash.Adler32,
|
||||
.raw => struct {
|
||||
pub fn init() @This() {
|
||||
return .{};
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
return struct {
|
||||
hasher: HasherType = HasherType.init(),
|
||||
bytes: usize = 0,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn update(self: *Self, buf: []const u8) void {
|
||||
switch (wrap) {
|
||||
.raw => {},
|
||||
else => {
|
||||
self.hasher.update(buf);
|
||||
self.bytes += buf.len;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn chksum(self: *Self) u32 {
|
||||
switch (wrap) {
|
||||
.raw => return 0,
|
||||
else => return self.hasher.final(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn bytesRead(self: *Self) u32 {
|
||||
return @truncate(self.bytes);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
@@ -1,744 +0,0 @@
|
||||
const std = @import("std");
|
||||
const io = std.io;
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
const expect = testing.expect;
|
||||
const print = std.debug.print;
|
||||
|
||||
const Token = @import("Token.zig");
|
||||
const consts = @import("consts.zig");
|
||||
const BlockWriter = @import("block_writer.zig").BlockWriter;
|
||||
const Container = @import("container.zig").Container;
|
||||
const SlidingWindow = @import("SlidingWindow.zig");
|
||||
const Lookup = @import("Lookup.zig");
|
||||
|
||||
pub const Options = struct {
|
||||
level: Level = .default,
|
||||
};
|
||||
|
||||
/// Trades between speed and compression size.
|
||||
/// Starts with level 4: in [zlib](https://github.com/madler/zlib/blob/abd3d1a28930f89375d4b41408b39f6c1be157b2/deflate.c#L115C1-L117C43)
|
||||
/// levels 1-3 are using different algorithm to perform faster but with less
|
||||
/// compression. That is not implemented here.
|
||||
pub const Level = enum(u4) {
|
||||
// zig fmt: off
|
||||
fast = 0xb, level_4 = 4,
|
||||
level_5 = 5,
|
||||
default = 0xc, level_6 = 6,
|
||||
level_7 = 7,
|
||||
level_8 = 8,
|
||||
best = 0xd, level_9 = 9,
|
||||
// zig fmt: on
|
||||
};
|
||||
|
||||
/// Algorithm knobs for each level.
|
||||
const LevelArgs = struct {
|
||||
good: u16, // Do less lookups if we already have match of this length.
|
||||
nice: u16, // Stop looking for better match if we found match with at least this length.
|
||||
lazy: u16, // Don't do lazy match find if got match with at least this length.
|
||||
chain: u16, // How many lookups for previous match to perform.
|
||||
|
||||
pub fn get(level: Level) LevelArgs {
|
||||
// zig fmt: off
|
||||
return switch (level) {
|
||||
.fast, .level_4 => .{ .good = 4, .lazy = 4, .nice = 16, .chain = 16 },
|
||||
.level_5 => .{ .good = 8, .lazy = 16, .nice = 32, .chain = 32 },
|
||||
.default, .level_6 => .{ .good = 8, .lazy = 16, .nice = 128, .chain = 128 },
|
||||
.level_7 => .{ .good = 8, .lazy = 32, .nice = 128, .chain = 256 },
|
||||
.level_8 => .{ .good = 32, .lazy = 128, .nice = 258, .chain = 1024 },
|
||||
.best, .level_9 => .{ .good = 32, .lazy = 258, .nice = 258, .chain = 4096 },
|
||||
};
|
||||
// zig fmt: on
|
||||
}
|
||||
};
|
||||
|
||||
/// Compress plain data from reader into compressed stream written to writer.
|
||||
pub fn compress(comptime container: Container, reader: anytype, writer: anytype, options: Options) !void {
|
||||
var c = try compressor(container, writer, options);
|
||||
try c.compress(reader);
|
||||
try c.finish();
|
||||
}
|
||||
|
||||
/// Create compressor for writer type.
|
||||
pub fn compressor(comptime container: Container, writer: anytype, options: Options) !Compressor(
|
||||
container,
|
||||
@TypeOf(writer),
|
||||
) {
|
||||
return try Compressor(container, @TypeOf(writer)).init(writer, options);
|
||||
}
|
||||
|
||||
/// Compressor type.
|
||||
pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
|
||||
const TokenWriterType = BlockWriter(WriterType);
|
||||
return Deflate(container, WriterType, TokenWriterType);
|
||||
}
|
||||
|
||||
/// Default compression algorithm. Has two steps: tokenization and token
|
||||
/// encoding.
|
||||
///
|
||||
/// Tokenization takes uncompressed input stream and produces list of tokens.
|
||||
/// Each token can be literal (byte of data) or match (backrefernce to previous
|
||||
/// data with length and distance). Tokenization accumulators 32K tokens, when
|
||||
/// full or `flush` is called tokens are passed to the `block_writer`. Level
|
||||
/// defines how hard (how slow) it tries to find match.
|
||||
///
|
||||
/// Block writer will decide which type of deflate block to write (stored, fixed,
|
||||
/// dynamic) and encode tokens to the output byte stream. Client has to call
|
||||
/// `finish` to write block with the final bit set.
|
||||
///
|
||||
/// Container defines type of header and footer which can be gzip, zlib or raw.
|
||||
/// They all share same deflate body. Raw has no header or footer just deflate
|
||||
/// body.
|
||||
///
|
||||
/// Compression algorithm explained in rfc-1951 (slightly edited for this case):
|
||||
///
|
||||
/// The compressor uses a chained hash table `lookup` to find duplicated
|
||||
/// strings, using a hash function that operates on 4-byte sequences. At any
|
||||
/// given point during compression, let XYZW be the next 4 input bytes
|
||||
/// (lookahead) to be examined (not necessarily all different, of course).
|
||||
/// First, the compressor examines the hash chain for XYZW. If the chain is
|
||||
/// empty, the compressor simply writes out X as a literal byte and advances
|
||||
/// one byte in the input. If the hash chain is not empty, indicating that the
|
||||
/// sequence XYZW (or, if we are unlucky, some other 4 bytes with the same
|
||||
/// hash function value) has occurred recently, the compressor compares all
|
||||
/// strings on the XYZW hash chain with the actual input data sequence
|
||||
/// starting at the current point, and selects the longest match.
|
||||
///
|
||||
/// To improve overall compression, the compressor defers the selection of
|
||||
/// matches ("lazy matching"): after a match of length N has been found, the
|
||||
/// compressor searches for a longer match starting at the next input byte. If
|
||||
/// it finds a longer match, it truncates the previous match to a length of
|
||||
/// one (thus producing a single literal byte) and then emits the longer
|
||||
/// match. Otherwise, it emits the original match, and, as described above,
|
||||
/// advances N bytes before continuing.
|
||||
///
|
||||
///
|
||||
/// Allocates statically ~400K (192K lookup, 128K tokens, 64K window).
|
||||
///
|
||||
/// Deflate function accepts BlockWriterType so we can change that in test to test
|
||||
/// just tokenization part.
|
||||
///
|
||||
fn Deflate(comptime container: Container, comptime WriterType: type, comptime BlockWriterType: type) type {
|
||||
return struct {
|
||||
lookup: Lookup = .{},
|
||||
win: SlidingWindow = .{},
|
||||
tokens: Tokens = .{},
|
||||
wrt: WriterType,
|
||||
block_writer: BlockWriterType,
|
||||
level: LevelArgs,
|
||||
hasher: container.Hasher() = .{},
|
||||
|
||||
// Match and literal at the previous position.
|
||||
// Used for lazy match finding in processWindow.
|
||||
prev_match: ?Token = null,
|
||||
prev_literal: ?u8 = null,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(wrt: WriterType, options: Options) !Self {
|
||||
const self = Self{
|
||||
.wrt = wrt,
|
||||
.block_writer = BlockWriterType.init(wrt),
|
||||
.level = LevelArgs.get(options.level),
|
||||
};
|
||||
try container.writeHeader(self.wrt);
|
||||
return self;
|
||||
}
|
||||
|
||||
const FlushOption = enum { none, flush, final };
|
||||
|
||||
// Process data in window and create tokens. If token buffer is full
|
||||
// flush tokens to the token writer. In the case of `flush` or `final`
|
||||
// option it will process all data from the window. In the `none` case
|
||||
// it will preserve some data for the next match.
|
||||
fn tokenize(self: *Self, flush_opt: FlushOption) !void {
|
||||
// flush - process all data from window
|
||||
const should_flush = (flush_opt != .none);
|
||||
|
||||
// While there is data in active lookahead buffer.
|
||||
while (self.win.activeLookahead(should_flush)) |lh| {
|
||||
var step: u16 = 1; // 1 in the case of literal, match length otherwise
|
||||
const pos: u16 = self.win.pos();
|
||||
const literal = lh[0]; // literal at current position
|
||||
const min_len: u16 = if (self.prev_match) |m| m.length() else 0;
|
||||
|
||||
// Try to find match at least min_len long.
|
||||
if (self.findMatch(pos, lh, min_len)) |match| {
|
||||
// Found better match than previous.
|
||||
try self.addPrevLiteral();
|
||||
|
||||
// Is found match length good enough?
|
||||
if (match.length() >= self.level.lazy) {
|
||||
// Don't try to lazy find better match, use this.
|
||||
step = try self.addMatch(match);
|
||||
} else {
|
||||
// Store this match.
|
||||
self.prev_literal = literal;
|
||||
self.prev_match = match;
|
||||
}
|
||||
} else {
|
||||
// There is no better match at current pos then it was previous.
|
||||
// Write previous match or literal.
|
||||
if (self.prev_match) |m| {
|
||||
// Write match from previous position.
|
||||
step = try self.addMatch(m) - 1; // we already advanced 1 from previous position
|
||||
} else {
|
||||
// No match at previous position.
|
||||
// Write previous literal if any, and remember this literal.
|
||||
try self.addPrevLiteral();
|
||||
self.prev_literal = literal;
|
||||
}
|
||||
}
|
||||
// Advance window and add hashes.
|
||||
self.windowAdvance(step, lh, pos);
|
||||
}
|
||||
|
||||
if (should_flush) {
|
||||
// In the case of flushing, last few lookahead buffers were smaller then min match len.
|
||||
// So only last literal can be unwritten.
|
||||
assert(self.prev_match == null);
|
||||
try self.addPrevLiteral();
|
||||
self.prev_literal = null;
|
||||
|
||||
try self.flushTokens(flush_opt);
|
||||
}
|
||||
}
|
||||
|
||||
fn windowAdvance(self: *Self, step: u16, lh: []const u8, pos: u16) void {
|
||||
// current position is already added in findMatch
|
||||
self.lookup.bulkAdd(lh[1..], step - 1, pos + 1);
|
||||
self.win.advance(step);
|
||||
}
|
||||
|
||||
// Add previous literal (if any) to the tokens list.
|
||||
fn addPrevLiteral(self: *Self) !void {
|
||||
if (self.prev_literal) |l| try self.addToken(Token.initLiteral(l));
|
||||
}
|
||||
|
||||
// Add match to the tokens list, reset prev pointers.
|
||||
// Returns length of the added match.
|
||||
fn addMatch(self: *Self, m: Token) !u16 {
|
||||
try self.addToken(m);
|
||||
self.prev_literal = null;
|
||||
self.prev_match = null;
|
||||
return m.length();
|
||||
}
|
||||
|
||||
fn addToken(self: *Self, token: Token) !void {
|
||||
self.tokens.add(token);
|
||||
if (self.tokens.full()) try self.flushTokens(.none);
|
||||
}
|
||||
|
||||
// Finds largest match in the history window with the data at current pos.
|
||||
fn findMatch(self: *Self, pos: u16, lh: []const u8, min_len: u16) ?Token {
|
||||
var len: u16 = min_len;
|
||||
// Previous location with the same hash (same 4 bytes).
|
||||
var prev_pos = self.lookup.add(lh, pos);
|
||||
// Last found match.
|
||||
var match: ?Token = null;
|
||||
|
||||
// How much back-references to try, performance knob.
|
||||
var chain: usize = self.level.chain;
|
||||
if (len >= self.level.good) {
|
||||
// If we've got a match that's good enough, only look in 1/4 the chain.
|
||||
chain >>= 2;
|
||||
}
|
||||
|
||||
// Hot path loop!
|
||||
while (prev_pos > 0 and chain > 0) : (chain -= 1) {
|
||||
const distance = pos - prev_pos;
|
||||
if (distance > consts.match.max_distance)
|
||||
break;
|
||||
|
||||
const new_len = self.win.match(prev_pos, pos, len);
|
||||
if (new_len > len) {
|
||||
match = Token.initMatch(@intCast(distance), new_len);
|
||||
if (new_len >= self.level.nice) {
|
||||
// The match is good enough that we don't try to find a better one.
|
||||
return match;
|
||||
}
|
||||
len = new_len;
|
||||
}
|
||||
prev_pos = self.lookup.prev(prev_pos);
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
fn flushTokens(self: *Self, flush_opt: FlushOption) !void {
|
||||
// Pass tokens to the token writer
|
||||
try self.block_writer.write(self.tokens.tokens(), flush_opt == .final, self.win.tokensBuffer());
|
||||
// Stored block ensures byte alignment.
|
||||
// It has 3 bits (final, block_type) and then padding until byte boundary.
|
||||
// After that everything is aligned to the boundary in the stored block.
|
||||
// Empty stored block is Ob000 + (0-7) bits of padding + 0x00 0x00 0xFF 0xFF.
|
||||
// Last 4 bytes are byte aligned.
|
||||
if (flush_opt == .flush) {
|
||||
try self.block_writer.storedBlock("", false);
|
||||
}
|
||||
if (flush_opt != .none) {
|
||||
// Safe to call only when byte aligned or it is OK to add
|
||||
// padding bits (on last byte of the final block).
|
||||
try self.block_writer.flush();
|
||||
}
|
||||
// Reset internal tokens store.
|
||||
self.tokens.reset();
|
||||
// Notify win that tokens are flushed.
|
||||
self.win.flush();
|
||||
}
|
||||
|
||||
// Slide win and if needed lookup tables.
|
||||
fn slide(self: *Self) void {
|
||||
const n = self.win.slide();
|
||||
self.lookup.slide(n);
|
||||
}
|
||||
|
||||
/// Compresses as much data as possible, stops when the reader becomes
|
||||
/// empty. It will introduce some output latency (reading input without
|
||||
/// producing all output) because some data are still in internal
|
||||
/// buffers.
|
||||
///
|
||||
/// It is up to the caller to call flush (if needed) or finish (required)
|
||||
/// when is need to output any pending data or complete stream.
|
||||
///
|
||||
pub fn compress(self: *Self, reader: anytype) !void {
|
||||
while (true) {
|
||||
// Fill window from reader
|
||||
const buf = self.win.writable();
|
||||
if (buf.len == 0) {
|
||||
try self.tokenize(.none);
|
||||
self.slide();
|
||||
continue;
|
||||
}
|
||||
const n = try reader.readAll(buf);
|
||||
self.hasher.update(buf[0..n]);
|
||||
self.win.written(n);
|
||||
// Process window
|
||||
try self.tokenize(.none);
|
||||
// Exit when no more data in reader
|
||||
if (n < buf.len) break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Flushes internal buffers to the output writer. Outputs empty stored
|
||||
/// block to sync bit stream to the byte boundary, so that the
|
||||
/// decompressor can get all input data available so far.
|
||||
///
|
||||
/// It is useful mainly in compressed network protocols, to ensure that
|
||||
/// deflate bit stream can be used as byte stream. May degrade
|
||||
/// compression so it should be used only when necessary.
|
||||
///
|
||||
/// Completes the current deflate block and follows it with an empty
|
||||
/// stored block that is three zero bits plus filler bits to the next
|
||||
/// byte, followed by four bytes (00 00 ff ff).
|
||||
///
|
||||
pub fn flush(self: *Self) !void {
|
||||
try self.tokenize(.flush);
|
||||
}
|
||||
|
||||
/// Completes deflate bit stream by writing any pending data as deflate
|
||||
/// final deflate block. HAS to be called once all data are written to
|
||||
/// the compressor as a signal that next block has to have final bit
|
||||
/// set.
|
||||
///
|
||||
pub fn finish(self: *Self) !void {
|
||||
try self.tokenize(.final);
|
||||
try container.writeFooter(&self.hasher, self.wrt);
|
||||
}
|
||||
|
||||
/// Use another writer while preserving history. Most probably flush
|
||||
/// should be called on old writer before setting new.
|
||||
pub fn setWriter(self: *Self, new_writer: WriterType) void {
|
||||
self.block_writer.setWriter(new_writer);
|
||||
self.wrt = new_writer;
|
||||
}
|
||||
|
||||
// Writer interface
|
||||
|
||||
pub const Writer = io.GenericWriter(*Self, Error, write);
|
||||
pub const Error = BlockWriterType.Error;
|
||||
|
||||
/// Write `input` of uncompressed data.
|
||||
/// See compress.
|
||||
pub fn write(self: *Self, input: []const u8) !usize {
|
||||
var fbs = io.fixedBufferStream(input);
|
||||
try self.compress(fbs.reader());
|
||||
return input.len;
|
||||
}
|
||||
|
||||
pub fn writer(self: *Self) Writer {
|
||||
return .{ .context = self };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Tokens store
|
||||
const Tokens = struct {
|
||||
list: [consts.deflate.tokens]Token = undefined,
|
||||
pos: usize = 0,
|
||||
|
||||
fn add(self: *Tokens, t: Token) void {
|
||||
self.list[self.pos] = t;
|
||||
self.pos += 1;
|
||||
}
|
||||
|
||||
fn full(self: *Tokens) bool {
|
||||
return self.pos == self.list.len;
|
||||
}
|
||||
|
||||
fn reset(self: *Tokens) void {
|
||||
self.pos = 0;
|
||||
}
|
||||
|
||||
fn tokens(self: *Tokens) []const Token {
|
||||
return self.list[0..self.pos];
|
||||
}
|
||||
};
|
||||
|
||||
/// Creates huffman only deflate blocks. Disables Lempel-Ziv match searching and
|
||||
/// only performs Huffman entropy encoding. Results in faster compression, much
|
||||
/// less memory requirements during compression but bigger compressed sizes.
|
||||
pub const huffman = struct {
|
||||
pub fn compress(comptime container: Container, reader: anytype, writer: anytype) !void {
|
||||
var c = try huffman.compressor(container, writer);
|
||||
try c.compress(reader);
|
||||
try c.finish();
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
|
||||
return SimpleCompressor(.huffman, container, WriterType);
|
||||
}
|
||||
|
||||
pub fn compressor(comptime container: Container, writer: anytype) !huffman.Compressor(container, @TypeOf(writer)) {
|
||||
return try huffman.Compressor(container, @TypeOf(writer)).init(writer);
|
||||
}
|
||||
};
|
||||
|
||||
/// Creates store blocks only. Data are not compressed only packed into deflate
|
||||
/// store blocks. That adds 9 bytes of header for each block. Max stored block
|
||||
/// size is 64K. Block is emitted when flush is called on on finish.
|
||||
pub const store = struct {
|
||||
pub fn compress(comptime container: Container, reader: anytype, writer: anytype) !void {
|
||||
var c = try store.compressor(container, writer);
|
||||
try c.compress(reader);
|
||||
try c.finish();
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
|
||||
return SimpleCompressor(.store, container, WriterType);
|
||||
}
|
||||
|
||||
pub fn compressor(comptime container: Container, writer: anytype) !store.Compressor(container, @TypeOf(writer)) {
|
||||
return try store.Compressor(container, @TypeOf(writer)).init(writer);
|
||||
}
|
||||
};
|
||||
|
||||
const SimpleCompressorKind = enum {
|
||||
huffman,
|
||||
store,
|
||||
};
|
||||
|
||||
fn simpleCompressor(
|
||||
comptime kind: SimpleCompressorKind,
|
||||
comptime container: Container,
|
||||
writer: anytype,
|
||||
) !SimpleCompressor(kind, container, @TypeOf(writer)) {
|
||||
return try SimpleCompressor(kind, container, @TypeOf(writer)).init(writer);
|
||||
}
|
||||
|
||||
fn SimpleCompressor(
|
||||
comptime kind: SimpleCompressorKind,
|
||||
comptime container: Container,
|
||||
comptime WriterType: type,
|
||||
) type {
|
||||
const BlockWriterType = BlockWriter(WriterType);
|
||||
return struct {
|
||||
buffer: [65535]u8 = undefined, // because store blocks are limited to 65535 bytes
|
||||
wp: usize = 0,
|
||||
|
||||
wrt: WriterType,
|
||||
block_writer: BlockWriterType,
|
||||
hasher: container.Hasher() = .{},
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(wrt: WriterType) !Self {
|
||||
const self = Self{
|
||||
.wrt = wrt,
|
||||
.block_writer = BlockWriterType.init(wrt),
|
||||
};
|
||||
try container.writeHeader(self.wrt);
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn flush(self: *Self) !void {
|
||||
try self.flushBuffer(false);
|
||||
try self.block_writer.storedBlock("", false);
|
||||
try self.block_writer.flush();
|
||||
}
|
||||
|
||||
pub fn finish(self: *Self) !void {
|
||||
try self.flushBuffer(true);
|
||||
try self.block_writer.flush();
|
||||
try container.writeFooter(&self.hasher, self.wrt);
|
||||
}
|
||||
|
||||
fn flushBuffer(self: *Self, final: bool) !void {
|
||||
const buf = self.buffer[0..self.wp];
|
||||
switch (kind) {
|
||||
.huffman => try self.block_writer.huffmanBlock(buf, final),
|
||||
.store => try self.block_writer.storedBlock(buf, final),
|
||||
}
|
||||
self.wp = 0;
|
||||
}
|
||||
|
||||
// Writes all data from the input reader of uncompressed data.
|
||||
// It is up to the caller to call flush or finish if there is need to
|
||||
// output compressed blocks.
|
||||
pub fn compress(self: *Self, reader: anytype) !void {
|
||||
while (true) {
|
||||
// read from rdr into buffer
|
||||
const buf = self.buffer[self.wp..];
|
||||
if (buf.len == 0) {
|
||||
try self.flushBuffer(false);
|
||||
continue;
|
||||
}
|
||||
const n = try reader.readAll(buf);
|
||||
self.hasher.update(buf[0..n]);
|
||||
self.wp += n;
|
||||
if (n < buf.len) break; // no more data in reader
|
||||
}
|
||||
}
|
||||
|
||||
// Writer interface
|
||||
|
||||
pub const Writer = io.GenericWriter(*Self, Error, write);
|
||||
pub const Error = BlockWriterType.Error;
|
||||
|
||||
// Write `input` of uncompressed data.
|
||||
pub fn write(self: *Self, input: []const u8) !usize {
|
||||
var fbs = io.fixedBufferStream(input);
|
||||
try self.compress(fbs.reader());
|
||||
return input.len;
|
||||
}
|
||||
|
||||
pub fn writer(self: *Self) Writer {
|
||||
return .{ .context = self };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const builtin = @import("builtin");
|
||||
|
||||
test "tokenization" {
|
||||
const L = Token.initLiteral;
|
||||
const M = Token.initMatch;
|
||||
|
||||
const cases = [_]struct {
|
||||
data: []const u8,
|
||||
tokens: []const Token,
|
||||
}{
|
||||
.{
|
||||
.data = "Blah blah blah blah blah!",
|
||||
.tokens = &[_]Token{ L('B'), L('l'), L('a'), L('h'), L(' '), L('b'), M(5, 18), L('!') },
|
||||
},
|
||||
.{
|
||||
.data = "ABCDEABCD ABCDEABCD",
|
||||
.tokens = &[_]Token{
|
||||
L('A'), L('B'), L('C'), L('D'), L('E'), L('A'), L('B'), L('C'), L('D'), L(' '),
|
||||
L('A'), M(10, 8),
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
for (cases) |c| {
|
||||
inline for (Container.list) |container| { // for each wrapping
|
||||
|
||||
var cw = io.countingWriter(io.null_writer);
|
||||
const cww = cw.writer();
|
||||
var df = try Deflate(container, @TypeOf(cww), TestTokenWriter).init(cww, .{});
|
||||
|
||||
_ = try df.write(c.data);
|
||||
try df.flush();
|
||||
|
||||
// df.token_writer.show();
|
||||
try expect(df.block_writer.pos == c.tokens.len); // number of tokens written
|
||||
try testing.expectEqualSlices(Token, df.block_writer.get(), c.tokens); // tokens match
|
||||
|
||||
try testing.expectEqual(container.headerSize(), cw.bytes_written);
|
||||
try df.finish();
|
||||
try testing.expectEqual(container.size(), cw.bytes_written);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tests that tokens written are equal to expected token list.
|
||||
const TestTokenWriter = struct {
|
||||
const Self = @This();
|
||||
|
||||
pos: usize = 0,
|
||||
actual: [128]Token = undefined,
|
||||
|
||||
pub fn init(_: anytype) Self {
|
||||
return .{};
|
||||
}
|
||||
pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void {
|
||||
for (tokens) |t| {
|
||||
self.actual[self.pos] = t;
|
||||
self.pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {}
|
||||
|
||||
pub fn get(self: *Self) []Token {
|
||||
return self.actual[0..self.pos];
|
||||
}
|
||||
|
||||
pub fn show(self: *Self) void {
|
||||
print("\n", .{});
|
||||
for (self.get()) |t| {
|
||||
t.show();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn flush(_: *Self) !void {}
|
||||
};
|
||||
|
||||
test "file tokenization" {
|
||||
const levels = [_]Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
|
||||
const cases = [_]struct {
|
||||
data: []const u8, // uncompressed content
|
||||
// expected number of tokens producet in deflate tokenization
|
||||
tokens_count: [levels.len]usize = .{0} ** levels.len,
|
||||
}{
|
||||
.{
|
||||
.data = @embedFile("testdata/rfc1951.txt"),
|
||||
.tokens_count = .{ 7675, 7672, 7599, 7594, 7598, 7599 },
|
||||
},
|
||||
|
||||
.{
|
||||
.data = @embedFile("testdata/block_writer/huffman-null-max.input"),
|
||||
.tokens_count = .{ 257, 257, 257, 257, 257, 257 },
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("testdata/block_writer/huffman-pi.input"),
|
||||
.tokens_count = .{ 2570, 2564, 2564, 2564, 2564, 2564 },
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("testdata/block_writer/huffman-text.input"),
|
||||
.tokens_count = .{ 235, 234, 234, 234, 234, 234 },
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("testdata/fuzz/roundtrip1.input"),
|
||||
.tokens_count = .{ 333, 331, 331, 331, 331, 331 },
|
||||
},
|
||||
.{
|
||||
.data = @embedFile("testdata/fuzz/roundtrip2.input"),
|
||||
.tokens_count = .{ 334, 334, 334, 334, 334, 334 },
|
||||
},
|
||||
};
|
||||
|
||||
for (cases) |case| { // for each case
|
||||
const data = case.data;
|
||||
|
||||
for (levels, 0..) |level, i| { // for each compression level
|
||||
var original = io.fixedBufferStream(data);
|
||||
|
||||
// buffer for decompressed data
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
const writer = al.writer();
|
||||
|
||||
// create compressor
|
||||
const WriterType = @TypeOf(writer);
|
||||
const TokenWriter = TokenDecoder(@TypeOf(writer));
|
||||
var cmp = try Deflate(.raw, WriterType, TokenWriter).init(writer, .{ .level = level });
|
||||
|
||||
// Stream uncompressed `original` data to the compressor. It will
|
||||
// produce tokens list and pass that list to the TokenDecoder. This
|
||||
// TokenDecoder uses CircularBuffer from inflate to convert list of
|
||||
// tokens back to the uncompressed stream.
|
||||
try cmp.compress(original.reader());
|
||||
try cmp.flush();
|
||||
const expected_count = case.tokens_count[i];
|
||||
const actual = cmp.block_writer.tokens_count;
|
||||
if (expected_count == 0) {
|
||||
print("actual token count {d}\n", .{actual});
|
||||
} else {
|
||||
try testing.expectEqual(expected_count, actual);
|
||||
}
|
||||
|
||||
try testing.expectEqual(data.len, al.items.len);
|
||||
try testing.expectEqualSlices(u8, data, al.items);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn TokenDecoder(comptime WriterType: type) type {
|
||||
return struct {
|
||||
const CircularBuffer = @import("CircularBuffer.zig");
|
||||
hist: CircularBuffer = .{},
|
||||
wrt: WriterType,
|
||||
tokens_count: usize = 0,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(wrt: WriterType) Self {
|
||||
return .{ .wrt = wrt };
|
||||
}
|
||||
|
||||
pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void {
|
||||
self.tokens_count += tokens.len;
|
||||
for (tokens) |t| {
|
||||
switch (t.kind) {
|
||||
.literal => self.hist.write(t.literal()),
|
||||
.match => try self.hist.writeMatch(t.length(), t.distance()),
|
||||
}
|
||||
if (self.hist.free() < 285) try self.flushWin();
|
||||
}
|
||||
try self.flushWin();
|
||||
}
|
||||
|
||||
pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {}
|
||||
|
||||
fn flushWin(self: *Self) !void {
|
||||
while (true) {
|
||||
const buf = self.hist.read();
|
||||
if (buf.len == 0) break;
|
||||
try self.wrt.writeAll(buf);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn flush(_: *Self) !void {}
|
||||
};
|
||||
}
|
||||
|
||||
test "store simple compressor" {
|
||||
const data = "Hello world!";
|
||||
const expected = [_]u8{
|
||||
0x1, // block type 0, final bit set
|
||||
0xc, 0x0, // len = 12
|
||||
0xf3, 0xff, // ~len
|
||||
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', //
|
||||
//0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21,
|
||||
};
|
||||
|
||||
var fbs = std.io.fixedBufferStream(data);
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
|
||||
var cmp = try store.compressor(.raw, al.writer());
|
||||
try cmp.compress(fbs.reader());
|
||||
try cmp.finish();
|
||||
try testing.expectEqualSlices(u8, &expected, al.items);
|
||||
|
||||
fbs.reset();
|
||||
try al.resize(0);
|
||||
|
||||
// huffman only compresoor will also emit store block for this small sample
|
||||
var hc = try huffman.compressor(.raw, al.writer());
|
||||
try hc.compress(fbs.reader());
|
||||
try hc.finish();
|
||||
try testing.expectEqualSlices(u8, &expected, al.items);
|
||||
}
|
||||
@@ -1,302 +0,0 @@
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
|
||||
pub const Symbol = packed struct {
|
||||
pub const Kind = enum(u2) {
|
||||
literal,
|
||||
end_of_block,
|
||||
match,
|
||||
};
|
||||
|
||||
symbol: u8 = 0, // symbol from alphabet
|
||||
code_bits: u4 = 0, // number of bits in code 0-15
|
||||
kind: Kind = .literal,
|
||||
|
||||
code: u16 = 0, // huffman code of the symbol
|
||||
next: u16 = 0, // pointer to the next symbol in linked list
|
||||
// it is safe to use 0 as null pointer, when sorted 0 has shortest code and fits into lookup
|
||||
|
||||
// Sorting less than function.
|
||||
pub fn asc(_: void, a: Symbol, b: Symbol) bool {
|
||||
if (a.code_bits == b.code_bits) {
|
||||
if (a.kind == b.kind) {
|
||||
return a.symbol < b.symbol;
|
||||
}
|
||||
return @intFromEnum(a.kind) < @intFromEnum(b.kind);
|
||||
}
|
||||
return a.code_bits < b.code_bits;
|
||||
}
|
||||
};
|
||||
|
||||
pub const LiteralDecoder = HuffmanDecoder(286, 15, 9);
|
||||
pub const DistanceDecoder = HuffmanDecoder(30, 15, 9);
|
||||
pub const CodegenDecoder = HuffmanDecoder(19, 7, 7);
|
||||
|
||||
pub const Error = error{
|
||||
InvalidCode,
|
||||
OversubscribedHuffmanTree,
|
||||
IncompleteHuffmanTree,
|
||||
MissingEndOfBlockCode,
|
||||
};
|
||||
|
||||
/// Creates huffman tree codes from list of code lengths (in `build`).
|
||||
///
|
||||
/// `find` then finds symbol for code bits. Code can be any length between 1 and
|
||||
/// 15 bits. When calling `find` we don't know how many bits will be used to
|
||||
/// find symbol. When symbol is returned it has code_bits field which defines
|
||||
/// how much we should advance in bit stream.
|
||||
///
|
||||
/// Lookup table is used to map 15 bit int to symbol. Same symbol is written
|
||||
/// many times in this table; 32K places for 286 (at most) symbols.
|
||||
/// Small lookup table is optimization for faster search.
|
||||
/// It is variation of the algorithm explained in [zlib](https://github.com/madler/zlib/blob/643e17b7498d12ab8d15565662880579692f769d/doc/algorithm.txt#L92)
|
||||
/// with difference that we here use statically allocated arrays.
|
||||
///
|
||||
fn HuffmanDecoder(
|
||||
comptime alphabet_size: u16,
|
||||
comptime max_code_bits: u4,
|
||||
comptime lookup_bits: u4,
|
||||
) type {
|
||||
const lookup_shift = max_code_bits - lookup_bits;
|
||||
|
||||
return struct {
|
||||
// all symbols in alaphabet, sorted by code_len, symbol
|
||||
symbols: [alphabet_size]Symbol = undefined,
|
||||
// lookup table code -> symbol
|
||||
lookup: [1 << lookup_bits]Symbol = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
/// Generates symbols and lookup tables from list of code lens for each symbol.
|
||||
pub fn generate(self: *Self, lens: []const u4) !void {
|
||||
try checkCompleteness(lens);
|
||||
|
||||
// init alphabet with code_bits
|
||||
for (self.symbols, 0..) |_, i| {
|
||||
const cb: u4 = if (i < lens.len) lens[i] else 0;
|
||||
self.symbols[i] = if (i < 256)
|
||||
.{ .kind = .literal, .symbol = @intCast(i), .code_bits = cb }
|
||||
else if (i == 256)
|
||||
.{ .kind = .end_of_block, .symbol = 0xff, .code_bits = cb }
|
||||
else
|
||||
.{ .kind = .match, .symbol = @intCast(i - 257), .code_bits = cb };
|
||||
}
|
||||
std.sort.heap(Symbol, &self.symbols, {}, Symbol.asc);
|
||||
|
||||
// reset lookup table
|
||||
for (0..self.lookup.len) |i| {
|
||||
self.lookup[i] = .{};
|
||||
}
|
||||
|
||||
// assign code to symbols
|
||||
// reference: https://youtu.be/9_YEGLe33NA?list=PLU4IQLU9e_OrY8oASHx0u3IXAL9TOdidm&t=2639
|
||||
var code: u16 = 0;
|
||||
var idx: u16 = 0;
|
||||
for (&self.symbols, 0..) |*sym, pos| {
|
||||
if (sym.code_bits == 0) continue; // skip unused
|
||||
sym.code = code;
|
||||
|
||||
const next_code = code + (@as(u16, 1) << (max_code_bits - sym.code_bits));
|
||||
const next_idx = next_code >> lookup_shift;
|
||||
|
||||
if (next_idx > self.lookup.len or idx >= self.lookup.len) break;
|
||||
if (sym.code_bits <= lookup_bits) {
|
||||
// fill small lookup table
|
||||
for (idx..next_idx) |j|
|
||||
self.lookup[j] = sym.*;
|
||||
} else {
|
||||
// insert into linked table starting at root
|
||||
const root = &self.lookup[idx];
|
||||
const root_next = root.next;
|
||||
root.next = @intCast(pos);
|
||||
sym.next = root_next;
|
||||
}
|
||||
|
||||
idx = next_idx;
|
||||
code = next_code;
|
||||
}
|
||||
}
|
||||
|
||||
/// Given the list of code lengths check that it represents a canonical
|
||||
/// Huffman code for n symbols.
|
||||
///
|
||||
/// Reference: https://github.com/madler/zlib/blob/5c42a230b7b468dff011f444161c0145b5efae59/contrib/puff/puff.c#L340
|
||||
fn checkCompleteness(lens: []const u4) !void {
|
||||
if (alphabet_size == 286)
|
||||
if (lens[256] == 0) return error.MissingEndOfBlockCode;
|
||||
|
||||
var count = [_]u16{0} ** (@as(usize, max_code_bits) + 1);
|
||||
var max: usize = 0;
|
||||
for (lens) |n| {
|
||||
if (n == 0) continue;
|
||||
if (n > max) max = n;
|
||||
count[n] += 1;
|
||||
}
|
||||
if (max == 0) // empty tree
|
||||
return;
|
||||
|
||||
// check for an over-subscribed or incomplete set of lengths
|
||||
var left: usize = 1; // one possible code of zero length
|
||||
for (1..count.len) |len| {
|
||||
left <<= 1; // one more bit, double codes left
|
||||
if (count[len] > left)
|
||||
return error.OversubscribedHuffmanTree;
|
||||
left -= count[len]; // deduct count from possible codes
|
||||
}
|
||||
if (left > 0) { // left > 0 means incomplete
|
||||
// incomplete code ok only for single length 1 code
|
||||
if (max_code_bits > 7 and max == count[0] + count[1]) return;
|
||||
return error.IncompleteHuffmanTree;
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds symbol for lookup table code.
|
||||
pub fn find(self: *Self, code: u16) !Symbol {
|
||||
// try to find in lookup table
|
||||
const idx = code >> lookup_shift;
|
||||
const sym = self.lookup[idx];
|
||||
if (sym.code_bits != 0) return sym;
|
||||
// if not use linked list of symbols with same prefix
|
||||
return self.findLinked(code, sym.next);
|
||||
}
|
||||
|
||||
inline fn findLinked(self: *Self, code: u16, start: u16) !Symbol {
|
||||
var pos = start;
|
||||
while (pos > 0) {
|
||||
const sym = self.symbols[pos];
|
||||
const shift = max_code_bits - sym.code_bits;
|
||||
// compare code_bits number of upper bits
|
||||
if ((code ^ sym.code) >> shift == 0) return sym;
|
||||
pos = sym.next;
|
||||
}
|
||||
return error.InvalidCode;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test "init/find" {
|
||||
// example data from: https://youtu.be/SJPvNi4HrWQ?t=8423
|
||||
const code_lens = [_]u4{ 4, 3, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 2 };
|
||||
var h: CodegenDecoder = .{};
|
||||
try h.generate(&code_lens);
|
||||
|
||||
const expected = [_]struct {
|
||||
sym: Symbol,
|
||||
code: u16,
|
||||
}{
|
||||
.{
|
||||
.code = 0b00_00000,
|
||||
.sym = .{ .symbol = 3, .code_bits = 2 },
|
||||
},
|
||||
.{
|
||||
.code = 0b01_00000,
|
||||
.sym = .{ .symbol = 18, .code_bits = 2 },
|
||||
},
|
||||
.{
|
||||
.code = 0b100_0000,
|
||||
.sym = .{ .symbol = 1, .code_bits = 3 },
|
||||
},
|
||||
.{
|
||||
.code = 0b101_0000,
|
||||
.sym = .{ .symbol = 4, .code_bits = 3 },
|
||||
},
|
||||
.{
|
||||
.code = 0b110_0000,
|
||||
.sym = .{ .symbol = 17, .code_bits = 3 },
|
||||
},
|
||||
.{
|
||||
.code = 0b1110_000,
|
||||
.sym = .{ .symbol = 0, .code_bits = 4 },
|
||||
},
|
||||
.{
|
||||
.code = 0b1111_000,
|
||||
.sym = .{ .symbol = 16, .code_bits = 4 },
|
||||
},
|
||||
};
|
||||
|
||||
// unused symbols
|
||||
for (0..12) |i| {
|
||||
try testing.expectEqual(0, h.symbols[i].code_bits);
|
||||
}
|
||||
// used, from index 12
|
||||
for (expected, 12..) |e, i| {
|
||||
try testing.expectEqual(e.sym.symbol, h.symbols[i].symbol);
|
||||
try testing.expectEqual(e.sym.code_bits, h.symbols[i].code_bits);
|
||||
const sym_from_code = try h.find(e.code);
|
||||
try testing.expectEqual(e.sym.symbol, sym_from_code.symbol);
|
||||
}
|
||||
|
||||
// All possible codes for each symbol.
|
||||
// Lookup table has 126 elements, to cover all possible 7 bit codes.
|
||||
for (0b0000_000..0b0100_000) |c| // 0..32 (32)
|
||||
try testing.expectEqual(3, (try h.find(@intCast(c))).symbol);
|
||||
|
||||
for (0b0100_000..0b1000_000) |c| // 32..64 (32)
|
||||
try testing.expectEqual(18, (try h.find(@intCast(c))).symbol);
|
||||
|
||||
for (0b1000_000..0b1010_000) |c| // 64..80 (16)
|
||||
try testing.expectEqual(1, (try h.find(@intCast(c))).symbol);
|
||||
|
||||
for (0b1010_000..0b1100_000) |c| // 80..96 (16)
|
||||
try testing.expectEqual(4, (try h.find(@intCast(c))).symbol);
|
||||
|
||||
for (0b1100_000..0b1110_000) |c| // 96..112 (16)
|
||||
try testing.expectEqual(17, (try h.find(@intCast(c))).symbol);
|
||||
|
||||
for (0b1110_000..0b1111_000) |c| // 112..120 (8)
|
||||
try testing.expectEqual(0, (try h.find(@intCast(c))).symbol);
|
||||
|
||||
for (0b1111_000..0b1_0000_000) |c| // 120...128 (8)
|
||||
try testing.expectEqual(16, (try h.find(@intCast(c))).symbol);
|
||||
}
|
||||
|
||||
test "encode/decode literals" {
|
||||
const LiteralEncoder = @import("huffman_encoder.zig").LiteralEncoder;
|
||||
|
||||
for (1..286) |j| { // for all different number of codes
|
||||
var enc: LiteralEncoder = .{};
|
||||
// create frequencies
|
||||
var freq = [_]u16{0} ** 286;
|
||||
freq[256] = 1; // ensure we have end of block code
|
||||
for (&freq, 1..) |*f, i| {
|
||||
if (i % j == 0)
|
||||
f.* = @intCast(i);
|
||||
}
|
||||
|
||||
// encoder from frequencies
|
||||
enc.generate(&freq, 15);
|
||||
|
||||
// get code_lens from encoder
|
||||
var code_lens = [_]u4{0} ** 286;
|
||||
for (code_lens, 0..) |_, i| {
|
||||
code_lens[i] = @intCast(enc.codes[i].len);
|
||||
}
|
||||
// generate decoder from code lens
|
||||
var dec: LiteralDecoder = .{};
|
||||
try dec.generate(&code_lens);
|
||||
|
||||
// expect decoder code to match original encoder code
|
||||
for (dec.symbols) |s| {
|
||||
if (s.code_bits == 0) continue;
|
||||
const c_code: u16 = @bitReverse(@as(u15, @intCast(s.code)));
|
||||
const symbol: u16 = switch (s.kind) {
|
||||
.literal => s.symbol,
|
||||
.end_of_block => 256,
|
||||
.match => @as(u16, s.symbol) + 257,
|
||||
};
|
||||
|
||||
const c = enc.codes[symbol];
|
||||
try testing.expect(c.code == c_code);
|
||||
}
|
||||
|
||||
// find each symbol by code
|
||||
for (enc.codes) |c| {
|
||||
if (c.len == 0) continue;
|
||||
|
||||
const s_code: u15 = @bitReverse(@as(u15, @intCast(c.code)));
|
||||
const s = try dec.find(s_code);
|
||||
try testing.expect(s.code == s_code);
|
||||
try testing.expect(s.code_bits == c.len);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,536 +0,0 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const math = std.math;
|
||||
const mem = std.mem;
|
||||
const sort = std.sort;
|
||||
const testing = std.testing;
|
||||
|
||||
const consts = @import("consts.zig").huffman;
|
||||
|
||||
const LiteralNode = struct {
|
||||
literal: u16,
|
||||
freq: u16,
|
||||
};
|
||||
|
||||
// Describes the state of the constructed tree for a given depth.
|
||||
const LevelInfo = struct {
|
||||
// Our level. for better printing
|
||||
level: u32,
|
||||
|
||||
// The frequency of the last node at this level
|
||||
last_freq: u32,
|
||||
|
||||
// The frequency of the next character to add to this level
|
||||
next_char_freq: u32,
|
||||
|
||||
// The frequency of the next pair (from level below) to add to this level.
|
||||
// Only valid if the "needed" value of the next lower level is 0.
|
||||
next_pair_freq: u32,
|
||||
|
||||
// The number of chains remaining to generate for this level before moving
|
||||
// up to the next level
|
||||
needed: u32,
|
||||
};
|
||||
|
||||
// hcode is a huffman code with a bit code and bit length.
|
||||
pub const HuffCode = struct {
|
||||
code: u16 = 0,
|
||||
len: u16 = 0,
|
||||
|
||||
// set sets the code and length of an hcode.
|
||||
fn set(self: *HuffCode, code: u16, length: u16) void {
|
||||
self.len = length;
|
||||
self.code = code;
|
||||
}
|
||||
};
|
||||
|
||||
pub fn HuffmanEncoder(comptime size: usize) type {
|
||||
return struct {
|
||||
codes: [size]HuffCode = undefined,
|
||||
// Reusable buffer with the longest possible frequency table.
|
||||
freq_cache: [consts.max_num_frequencies + 1]LiteralNode = undefined,
|
||||
bit_count: [17]u32 = undefined,
|
||||
lns: []LiteralNode = undefined, // sorted by literal, stored to avoid repeated allocation in generate
|
||||
lfs: []LiteralNode = undefined, // sorted by frequency, stored to avoid repeated allocation in generate
|
||||
|
||||
const Self = @This();
|
||||
|
||||
// Update this Huffman Code object to be the minimum code for the specified frequency count.
|
||||
//
|
||||
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
|
||||
// max_bits The maximum number of bits to use for any literal.
|
||||
pub fn generate(self: *Self, freq: []u16, max_bits: u32) void {
|
||||
var list = self.freq_cache[0 .. freq.len + 1];
|
||||
// Number of non-zero literals
|
||||
var count: u32 = 0;
|
||||
// Set list to be the set of all non-zero literals and their frequencies
|
||||
for (freq, 0..) |f, i| {
|
||||
if (f != 0) {
|
||||
list[count] = LiteralNode{ .literal = @as(u16, @intCast(i)), .freq = f };
|
||||
count += 1;
|
||||
} else {
|
||||
list[count] = LiteralNode{ .literal = 0x00, .freq = 0 };
|
||||
self.codes[i].len = 0;
|
||||
}
|
||||
}
|
||||
list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 };
|
||||
|
||||
list = list[0..count];
|
||||
if (count <= 2) {
|
||||
// Handle the small cases here, because they are awkward for the general case code. With
|
||||
// two or fewer literals, everything has bit length 1.
|
||||
for (list, 0..) |node, i| {
|
||||
// "list" is in order of increasing literal value.
|
||||
self.codes[node.literal].set(@as(u16, @intCast(i)), 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
self.lfs = list;
|
||||
mem.sort(LiteralNode, self.lfs, {}, byFreq);
|
||||
|
||||
// Get the number of literals for each bit count
|
||||
const bit_count = self.bitCounts(list, max_bits);
|
||||
// And do the assignment
|
||||
self.assignEncodingAndSize(bit_count, list);
|
||||
}
|
||||
|
||||
pub fn bitLength(self: *Self, freq: []u16) u32 {
|
||||
var total: u32 = 0;
|
||||
for (freq, 0..) |f, i| {
|
||||
if (f != 0) {
|
||||
total += @as(u32, @intCast(f)) * @as(u32, @intCast(self.codes[i].len));
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
// Return the number of literals assigned to each bit size in the Huffman encoding
|
||||
//
|
||||
// This method is only called when list.len >= 3
|
||||
// The cases of 0, 1, and 2 literals are handled by special case code.
|
||||
//
|
||||
// list: An array of the literals with non-zero frequencies
|
||||
// and their associated frequencies. The array is in order of increasing
|
||||
// frequency, and has as its last element a special element with frequency
|
||||
// std.math.maxInt(i32)
|
||||
//
|
||||
// max_bits: The maximum number of bits that should be used to encode any literal.
|
||||
// Must be less than 16.
|
||||
//
|
||||
// Returns an integer array in which array[i] indicates the number of literals
|
||||
// that should be encoded in i bits.
|
||||
fn bitCounts(self: *Self, list: []LiteralNode, max_bits_to_use: usize) []u32 {
|
||||
var max_bits = max_bits_to_use;
|
||||
const n = list.len;
|
||||
const max_bits_limit = 16;
|
||||
|
||||
assert(max_bits < max_bits_limit);
|
||||
|
||||
// The tree can't have greater depth than n - 1, no matter what. This
|
||||
// saves a little bit of work in some small cases
|
||||
max_bits = @min(max_bits, n - 1);
|
||||
|
||||
// Create information about each of the levels.
|
||||
// A bogus "Level 0" whose sole purpose is so that
|
||||
// level1.prev.needed == 0. This makes level1.next_pair_freq
|
||||
// be a legitimate value that never gets chosen.
|
||||
var levels: [max_bits_limit]LevelInfo = mem.zeroes([max_bits_limit]LevelInfo);
|
||||
// leaf_counts[i] counts the number of literals at the left
|
||||
// of ancestors of the rightmost node at level i.
|
||||
// leaf_counts[i][j] is the number of literals at the left
|
||||
// of the level j ancestor.
|
||||
var leaf_counts: [max_bits_limit][max_bits_limit]u32 = mem.zeroes([max_bits_limit][max_bits_limit]u32);
|
||||
|
||||
{
|
||||
var level = @as(u32, 1);
|
||||
while (level <= max_bits) : (level += 1) {
|
||||
// For every level, the first two items are the first two characters.
|
||||
// We initialize the levels as if we had already figured this out.
|
||||
levels[level] = LevelInfo{
|
||||
.level = level,
|
||||
.last_freq = list[1].freq,
|
||||
.next_char_freq = list[2].freq,
|
||||
.next_pair_freq = list[0].freq + list[1].freq,
|
||||
.needed = 0,
|
||||
};
|
||||
leaf_counts[level][level] = 2;
|
||||
if (level == 1) {
|
||||
levels[level].next_pair_freq = math.maxInt(i32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We need a total of 2*n - 2 items at top level and have already generated 2.
|
||||
levels[max_bits].needed = 2 * @as(u32, @intCast(n)) - 4;
|
||||
|
||||
{
|
||||
var level = max_bits;
|
||||
while (true) {
|
||||
var l = &levels[level];
|
||||
if (l.next_pair_freq == math.maxInt(i32) and l.next_char_freq == math.maxInt(i32)) {
|
||||
// We've run out of both leaves and pairs.
|
||||
// End all calculations for this level.
|
||||
// To make sure we never come back to this level or any lower level,
|
||||
// set next_pair_freq impossibly large.
|
||||
l.needed = 0;
|
||||
levels[level + 1].next_pair_freq = math.maxInt(i32);
|
||||
level += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const prev_freq = l.last_freq;
|
||||
if (l.next_char_freq < l.next_pair_freq) {
|
||||
// The next item on this row is a leaf node.
|
||||
const next = leaf_counts[level][level] + 1;
|
||||
l.last_freq = l.next_char_freq;
|
||||
// Lower leaf_counts are the same of the previous node.
|
||||
leaf_counts[level][level] = next;
|
||||
if (next >= list.len) {
|
||||
l.next_char_freq = maxNode().freq;
|
||||
} else {
|
||||
l.next_char_freq = list[next].freq;
|
||||
}
|
||||
} else {
|
||||
// The next item on this row is a pair from the previous row.
|
||||
// next_pair_freq isn't valid until we generate two
|
||||
// more values in the level below
|
||||
l.last_freq = l.next_pair_freq;
|
||||
// Take leaf counts from the lower level, except counts[level] remains the same.
|
||||
@memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
|
||||
levels[l.level - 1].needed = 2;
|
||||
}
|
||||
|
||||
l.needed -= 1;
|
||||
if (l.needed == 0) {
|
||||
// We've done everything we need to do for this level.
|
||||
// Continue calculating one level up. Fill in next_pair_freq
|
||||
// of that level with the sum of the two nodes we've just calculated on
|
||||
// this level.
|
||||
if (l.level == max_bits) {
|
||||
// All done!
|
||||
break;
|
||||
}
|
||||
levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq;
|
||||
level += 1;
|
||||
} else {
|
||||
// If we stole from below, move down temporarily to replenish it.
|
||||
while (levels[level - 1].needed > 0) {
|
||||
level -= 1;
|
||||
if (level == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Somethings is wrong if at the end, the top level is null or hasn't used
|
||||
// all of the leaves.
|
||||
assert(leaf_counts[max_bits][max_bits] == n);
|
||||
|
||||
var bit_count = self.bit_count[0 .. max_bits + 1];
|
||||
var bits: u32 = 1;
|
||||
const counts = &leaf_counts[max_bits];
|
||||
{
|
||||
var level = max_bits;
|
||||
while (level > 0) : (level -= 1) {
|
||||
// counts[level] gives the number of literals requiring at least "bits"
|
||||
// bits to encode.
|
||||
bit_count[bits] = counts[level] - counts[level - 1];
|
||||
bits += 1;
|
||||
if (level == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bit_count;
|
||||
}
|
||||
|
||||
// Look at the leaves and assign them a bit count and an encoding as specified
|
||||
// in RFC 1951 3.2.2
|
||||
fn assignEncodingAndSize(self: *Self, bit_count: []u32, list_arg: []LiteralNode) void {
|
||||
var code = @as(u16, 0);
|
||||
var list = list_arg;
|
||||
|
||||
for (bit_count, 0..) |bits, n| {
|
||||
code <<= 1;
|
||||
if (n == 0 or bits == 0) {
|
||||
continue;
|
||||
}
|
||||
// The literals list[list.len-bits] .. list[list.len-bits]
|
||||
// are encoded using "bits" bits, and get the values
|
||||
// code, code + 1, .... The code values are
|
||||
// assigned in literal order (not frequency order).
|
||||
const chunk = list[list.len - @as(u32, @intCast(bits)) ..];
|
||||
|
||||
self.lns = chunk;
|
||||
mem.sort(LiteralNode, self.lns, {}, byLiteral);
|
||||
|
||||
for (chunk) |node| {
|
||||
self.codes[node.literal] = HuffCode{
|
||||
.code = bitReverse(u16, code, @as(u5, @intCast(n))),
|
||||
.len = @as(u16, @intCast(n)),
|
||||
};
|
||||
code += 1;
|
||||
}
|
||||
list = list[0 .. list.len - @as(u32, @intCast(bits))];
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn maxNode() LiteralNode {
|
||||
return LiteralNode{
|
||||
.literal = math.maxInt(u16),
|
||||
.freq = math.maxInt(u16),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn huffmanEncoder(comptime size: u32) HuffmanEncoder(size) {
|
||||
return .{};
|
||||
}
|
||||
|
||||
pub const LiteralEncoder = HuffmanEncoder(consts.max_num_frequencies);
|
||||
pub const DistanceEncoder = HuffmanEncoder(consts.distance_code_count);
|
||||
pub const CodegenEncoder = HuffmanEncoder(19);
|
||||
|
||||
// Generates a HuffmanCode corresponding to the fixed literal table
|
||||
pub fn fixedLiteralEncoder() LiteralEncoder {
|
||||
var h: LiteralEncoder = undefined;
|
||||
var ch: u16 = 0;
|
||||
|
||||
while (ch < consts.max_num_frequencies) : (ch += 1) {
|
||||
var bits: u16 = undefined;
|
||||
var size: u16 = undefined;
|
||||
switch (ch) {
|
||||
0...143 => {
|
||||
// size 8, 000110000 .. 10111111
|
||||
bits = ch + 48;
|
||||
size = 8;
|
||||
},
|
||||
144...255 => {
|
||||
// size 9, 110010000 .. 111111111
|
||||
bits = ch + 400 - 144;
|
||||
size = 9;
|
||||
},
|
||||
256...279 => {
|
||||
// size 7, 0000000 .. 0010111
|
||||
bits = ch - 256;
|
||||
size = 7;
|
||||
},
|
||||
else => {
|
||||
// size 8, 11000000 .. 11000111
|
||||
bits = ch + 192 - 280;
|
||||
size = 8;
|
||||
},
|
||||
}
|
||||
h.codes[ch] = HuffCode{ .code = bitReverse(u16, bits, @as(u5, @intCast(size))), .len = size };
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
pub fn fixedDistanceEncoder() DistanceEncoder {
|
||||
var h: DistanceEncoder = undefined;
|
||||
for (h.codes, 0..) |_, ch| {
|
||||
h.codes[ch] = HuffCode{ .code = bitReverse(u16, @as(u16, @intCast(ch)), 5), .len = 5 };
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
pub fn huffmanDistanceEncoder() DistanceEncoder {
|
||||
var distance_freq = [1]u16{0} ** consts.distance_code_count;
|
||||
distance_freq[0] = 1;
|
||||
// huff_distance is a static distance encoder used for huffman only encoding.
|
||||
// It can be reused since we will not be encoding distance values.
|
||||
var h: DistanceEncoder = .{};
|
||||
h.generate(distance_freq[0..], 15);
|
||||
return h;
|
||||
}
|
||||
|
||||
fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool {
|
||||
_ = context;
|
||||
return a.literal < b.literal;
|
||||
}
|
||||
|
||||
fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
|
||||
_ = context;
|
||||
if (a.freq == b.freq) {
|
||||
return a.literal < b.literal;
|
||||
}
|
||||
return a.freq < b.freq;
|
||||
}
|
||||
|
||||
test "generate a Huffman code from an array of frequencies" {
|
||||
var freqs: [19]u16 = [_]u16{
|
||||
8, // 0
|
||||
1, // 1
|
||||
1, // 2
|
||||
2, // 3
|
||||
5, // 4
|
||||
10, // 5
|
||||
9, // 6
|
||||
1, // 7
|
||||
0, // 8
|
||||
0, // 9
|
||||
0, // 10
|
||||
0, // 11
|
||||
0, // 12
|
||||
0, // 13
|
||||
0, // 14
|
||||
0, // 15
|
||||
1, // 16
|
||||
3, // 17
|
||||
5, // 18
|
||||
};
|
||||
|
||||
var enc = huffmanEncoder(19);
|
||||
enc.generate(freqs[0..], 7);
|
||||
|
||||
try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..]));
|
||||
|
||||
try testing.expectEqual(@as(usize, 3), enc.codes[0].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[1].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[2].len);
|
||||
try testing.expectEqual(@as(usize, 5), enc.codes[3].len);
|
||||
try testing.expectEqual(@as(usize, 3), enc.codes[4].len);
|
||||
try testing.expectEqual(@as(usize, 2), enc.codes[5].len);
|
||||
try testing.expectEqual(@as(usize, 2), enc.codes[6].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[7].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[8].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[9].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[10].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[11].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[12].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[13].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[14].len);
|
||||
try testing.expectEqual(@as(usize, 0), enc.codes[15].len);
|
||||
try testing.expectEqual(@as(usize, 6), enc.codes[16].len);
|
||||
try testing.expectEqual(@as(usize, 5), enc.codes[17].len);
|
||||
try testing.expectEqual(@as(usize, 3), enc.codes[18].len);
|
||||
|
||||
try testing.expectEqual(@as(u16, 0x0), enc.codes[5].code);
|
||||
try testing.expectEqual(@as(u16, 0x2), enc.codes[6].code);
|
||||
try testing.expectEqual(@as(u16, 0x1), enc.codes[0].code);
|
||||
try testing.expectEqual(@as(u16, 0x5), enc.codes[4].code);
|
||||
try testing.expectEqual(@as(u16, 0x3), enc.codes[18].code);
|
||||
try testing.expectEqual(@as(u16, 0x7), enc.codes[3].code);
|
||||
try testing.expectEqual(@as(u16, 0x17), enc.codes[17].code);
|
||||
try testing.expectEqual(@as(u16, 0x0f), enc.codes[1].code);
|
||||
try testing.expectEqual(@as(u16, 0x2f), enc.codes[2].code);
|
||||
try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code);
|
||||
try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code);
|
||||
}
|
||||
|
||||
test "generate a Huffman code for the fixed literal table specific to Deflate" {
|
||||
const enc = fixedLiteralEncoder();
|
||||
for (enc.codes) |c| {
|
||||
switch (c.len) {
|
||||
7 => {
|
||||
const v = @bitReverse(@as(u7, @intCast(c.code)));
|
||||
try testing.expect(v <= 0b0010111);
|
||||
},
|
||||
8 => {
|
||||
const v = @bitReverse(@as(u8, @intCast(c.code)));
|
||||
try testing.expect((v >= 0b000110000 and v <= 0b10111111) or
|
||||
(v >= 0b11000000 and v <= 11000111));
|
||||
},
|
||||
9 => {
|
||||
const v = @bitReverse(@as(u9, @intCast(c.code)));
|
||||
try testing.expect(v >= 0b110010000 and v <= 0b111111111);
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "generate a Huffman code for the 30 possible relative distances (LZ77 distances) of Deflate" {
|
||||
const enc = fixedDistanceEncoder();
|
||||
for (enc.codes) |c| {
|
||||
const v = @bitReverse(@as(u5, @intCast(c.code)));
|
||||
try testing.expect(v <= 29);
|
||||
try testing.expect(c.len == 5);
|
||||
}
|
||||
}
|
||||
|
||||
// Reverse bit-by-bit a N-bit code.
|
||||
fn bitReverse(comptime T: type, value: T, n: usize) T {
|
||||
const r = @bitReverse(value);
|
||||
return r >> @as(math.Log2Int(T), @intCast(@typeInfo(T).int.bits - n));
|
||||
}
|
||||
|
||||
test bitReverse {
|
||||
const ReverseBitsTest = struct {
|
||||
in: u16,
|
||||
bit_count: u5,
|
||||
out: u16,
|
||||
};
|
||||
|
||||
const reverse_bits_tests = [_]ReverseBitsTest{
|
||||
.{ .in = 1, .bit_count = 1, .out = 1 },
|
||||
.{ .in = 1, .bit_count = 2, .out = 2 },
|
||||
.{ .in = 1, .bit_count = 3, .out = 4 },
|
||||
.{ .in = 1, .bit_count = 4, .out = 8 },
|
||||
.{ .in = 1, .bit_count = 5, .out = 16 },
|
||||
.{ .in = 17, .bit_count = 5, .out = 17 },
|
||||
.{ .in = 257, .bit_count = 9, .out = 257 },
|
||||
.{ .in = 29, .bit_count = 5, .out = 23 },
|
||||
};
|
||||
|
||||
for (reverse_bits_tests) |h| {
|
||||
const v = bitReverse(u16, h.in, h.bit_count);
|
||||
try std.testing.expectEqual(h.out, v);
|
||||
}
|
||||
}
|
||||
|
||||
test "fixedLiteralEncoder codes" {
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
var bw = std.io.bitWriter(.little, al.writer());
|
||||
|
||||
const f = fixedLiteralEncoder();
|
||||
for (f.codes) |c| {
|
||||
try bw.writeBits(c.code, c.len);
|
||||
}
|
||||
try testing.expectEqualSlices(u8, &fixed_codes, al.items);
|
||||
}
|
||||
|
||||
pub const fixed_codes = [_]u8{
|
||||
0b00001100, 0b10001100, 0b01001100, 0b11001100, 0b00101100, 0b10101100, 0b01101100, 0b11101100,
|
||||
0b00011100, 0b10011100, 0b01011100, 0b11011100, 0b00111100, 0b10111100, 0b01111100, 0b11111100,
|
||||
0b00000010, 0b10000010, 0b01000010, 0b11000010, 0b00100010, 0b10100010, 0b01100010, 0b11100010,
|
||||
0b00010010, 0b10010010, 0b01010010, 0b11010010, 0b00110010, 0b10110010, 0b01110010, 0b11110010,
|
||||
0b00001010, 0b10001010, 0b01001010, 0b11001010, 0b00101010, 0b10101010, 0b01101010, 0b11101010,
|
||||
0b00011010, 0b10011010, 0b01011010, 0b11011010, 0b00111010, 0b10111010, 0b01111010, 0b11111010,
|
||||
0b00000110, 0b10000110, 0b01000110, 0b11000110, 0b00100110, 0b10100110, 0b01100110, 0b11100110,
|
||||
0b00010110, 0b10010110, 0b01010110, 0b11010110, 0b00110110, 0b10110110, 0b01110110, 0b11110110,
|
||||
0b00001110, 0b10001110, 0b01001110, 0b11001110, 0b00101110, 0b10101110, 0b01101110, 0b11101110,
|
||||
0b00011110, 0b10011110, 0b01011110, 0b11011110, 0b00111110, 0b10111110, 0b01111110, 0b11111110,
|
||||
0b00000001, 0b10000001, 0b01000001, 0b11000001, 0b00100001, 0b10100001, 0b01100001, 0b11100001,
|
||||
0b00010001, 0b10010001, 0b01010001, 0b11010001, 0b00110001, 0b10110001, 0b01110001, 0b11110001,
|
||||
0b00001001, 0b10001001, 0b01001001, 0b11001001, 0b00101001, 0b10101001, 0b01101001, 0b11101001,
|
||||
0b00011001, 0b10011001, 0b01011001, 0b11011001, 0b00111001, 0b10111001, 0b01111001, 0b11111001,
|
||||
0b00000101, 0b10000101, 0b01000101, 0b11000101, 0b00100101, 0b10100101, 0b01100101, 0b11100101,
|
||||
0b00010101, 0b10010101, 0b01010101, 0b11010101, 0b00110101, 0b10110101, 0b01110101, 0b11110101,
|
||||
0b00001101, 0b10001101, 0b01001101, 0b11001101, 0b00101101, 0b10101101, 0b01101101, 0b11101101,
|
||||
0b00011101, 0b10011101, 0b01011101, 0b11011101, 0b00111101, 0b10111101, 0b01111101, 0b11111101,
|
||||
0b00010011, 0b00100110, 0b01001110, 0b10011010, 0b00111100, 0b01100101, 0b11101010, 0b10110100,
|
||||
0b11101001, 0b00110011, 0b01100110, 0b11001110, 0b10011010, 0b00111101, 0b01100111, 0b11101110,
|
||||
0b10111100, 0b11111001, 0b00001011, 0b00010110, 0b00101110, 0b01011010, 0b10111100, 0b01100100,
|
||||
0b11101001, 0b10110010, 0b11100101, 0b00101011, 0b01010110, 0b10101110, 0b01011010, 0b10111101,
|
||||
0b01100110, 0b11101101, 0b10111010, 0b11110101, 0b00011011, 0b00110110, 0b01101110, 0b11011010,
|
||||
0b10111100, 0b01100101, 0b11101011, 0b10110110, 0b11101101, 0b00111011, 0b01110110, 0b11101110,
|
||||
0b11011010, 0b10111101, 0b01100111, 0b11101111, 0b10111110, 0b11111101, 0b00000111, 0b00001110,
|
||||
0b00011110, 0b00111010, 0b01111100, 0b11100100, 0b11101000, 0b10110001, 0b11100011, 0b00100111,
|
||||
0b01001110, 0b10011110, 0b00111010, 0b01111101, 0b11100110, 0b11101100, 0b10111001, 0b11110011,
|
||||
0b00010111, 0b00101110, 0b01011110, 0b10111010, 0b01111100, 0b11100101, 0b11101010, 0b10110101,
|
||||
0b11101011, 0b00110111, 0b01101110, 0b11011110, 0b10111010, 0b01111101, 0b11100111, 0b11101110,
|
||||
0b10111101, 0b11111011, 0b00001111, 0b00011110, 0b00111110, 0b01111010, 0b11111100, 0b11100100,
|
||||
0b11101001, 0b10110011, 0b11100111, 0b00101111, 0b01011110, 0b10111110, 0b01111010, 0b11111101,
|
||||
0b11100110, 0b11101101, 0b10111011, 0b11110111, 0b00011111, 0b00111110, 0b01111110, 0b11111010,
|
||||
0b11111100, 0b11100101, 0b11101011, 0b10110111, 0b11101111, 0b00111111, 0b01111110, 0b11111110,
|
||||
0b11111010, 0b11111101, 0b11100111, 0b11101111, 0b10111111, 0b11111111, 0b00000000, 0b00100000,
|
||||
0b00001000, 0b00001100, 0b10000001, 0b11000010, 0b11100000, 0b00001000, 0b00100100, 0b00001010,
|
||||
0b10001101, 0b11000001, 0b11100010, 0b11110000, 0b00000100, 0b00100010, 0b10001001, 0b01001100,
|
||||
0b10100001, 0b11010010, 0b11101000, 0b00000011, 0b10000011, 0b01000011, 0b11000011, 0b00100011,
|
||||
0b10100011,
|
||||
};
|
||||
@@ -1,570 +0,0 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
|
||||
const hfd = @import("huffman_decoder.zig");
|
||||
const BitReader = @import("bit_reader.zig").BitReader;
|
||||
const CircularBuffer = @import("CircularBuffer.zig");
|
||||
const Container = @import("container.zig").Container;
|
||||
const Token = @import("Token.zig");
|
||||
const codegen_order = @import("consts.zig").huffman.codegen_order;
|
||||
|
||||
/// Decompresses deflate bit stream `reader` and writes uncompressed data to the
|
||||
/// `writer` stream.
|
||||
pub fn decompress(comptime container: Container, reader: anytype, writer: anytype) !void {
|
||||
var d = decompressor(container, reader);
|
||||
try d.decompress(writer);
|
||||
}
|
||||
|
||||
/// Inflate decompressor for the reader type.
|
||||
pub fn decompressor(comptime container: Container, reader: anytype) Decompressor(container, @TypeOf(reader)) {
|
||||
return Decompressor(container, @TypeOf(reader)).init(reader);
|
||||
}
|
||||
|
||||
pub fn Decompressor(comptime container: Container, comptime ReaderType: type) type {
|
||||
// zlib has 4 bytes footer, lookahead of 4 bytes ensures that we will not overshoot.
|
||||
// gzip has 8 bytes footer so we will not overshoot even with 8 bytes of lookahead.
|
||||
// For raw deflate there is always possibility of overshot so we use 8 bytes lookahead.
|
||||
const lookahead: type = if (container == .zlib) u32 else u64;
|
||||
return Inflate(container, lookahead, ReaderType);
|
||||
}
|
||||
|
||||
/// Inflate decompresses deflate bit stream. Reads compressed data from reader
|
||||
/// provided in init. Decompressed data are stored in internal hist buffer and
|
||||
/// can be accesses iterable `next` or reader interface.
|
||||
///
|
||||
/// Container defines header/footer wrapper around deflate bit stream. Can be
|
||||
/// gzip or zlib.
|
||||
///
|
||||
/// Deflate bit stream consists of multiple blocks. Block can be one of three types:
|
||||
/// * stored, non compressed, max 64k in size
|
||||
/// * fixed, huffman codes are predefined
|
||||
/// * dynamic, huffman code tables are encoded at the block start
|
||||
///
|
||||
/// `step` function runs decoder until internal `hist` buffer is full. Client
|
||||
/// than needs to read that data in order to proceed with decoding.
|
||||
///
|
||||
/// Allocates 74.5K of internal buffers, most important are:
|
||||
/// * 64K for history (CircularBuffer)
|
||||
/// * ~10K huffman decoders (Literal and DistanceDecoder)
|
||||
///
|
||||
pub fn Inflate(comptime container: Container, comptime LookaheadType: type, comptime ReaderType: type) type {
|
||||
assert(LookaheadType == u32 or LookaheadType == u64);
|
||||
const BitReaderType = BitReader(LookaheadType, ReaderType);
|
||||
|
||||
return struct {
|
||||
//const BitReaderType = BitReader(ReaderType);
|
||||
const F = BitReaderType.flag;
|
||||
|
||||
bits: BitReaderType = .{},
|
||||
hist: CircularBuffer = .{},
|
||||
// Hashes, produces checkusm, of uncompressed data for gzip/zlib footer.
|
||||
hasher: container.Hasher() = .{},
|
||||
|
||||
// dynamic block huffman code decoders
|
||||
lit_dec: hfd.LiteralDecoder = .{}, // literals
|
||||
dst_dec: hfd.DistanceDecoder = .{}, // distances
|
||||
|
||||
// current read state
|
||||
bfinal: u1 = 0,
|
||||
block_type: u2 = 0b11,
|
||||
state: ReadState = .protocol_header,
|
||||
|
||||
const ReadState = enum {
|
||||
protocol_header,
|
||||
block_header,
|
||||
block,
|
||||
protocol_footer,
|
||||
end,
|
||||
};
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub const Error = BitReaderType.Error || Container.Error || hfd.Error || error{
|
||||
InvalidCode,
|
||||
InvalidMatch,
|
||||
InvalidBlockType,
|
||||
WrongStoredBlockNlen,
|
||||
InvalidDynamicBlockHeader,
|
||||
};
|
||||
|
||||
pub fn init(rt: ReaderType) Self {
|
||||
return .{ .bits = BitReaderType.init(rt) };
|
||||
}
|
||||
|
||||
fn blockHeader(self: *Self) !void {
|
||||
self.bfinal = try self.bits.read(u1);
|
||||
self.block_type = try self.bits.read(u2);
|
||||
}
|
||||
|
||||
fn storedBlock(self: *Self) !bool {
|
||||
self.bits.alignToByte(); // skip padding until byte boundary
|
||||
// everything after this is byte aligned in stored block
|
||||
var len = try self.bits.read(u16);
|
||||
const nlen = try self.bits.read(u16);
|
||||
if (len != ~nlen) return error.WrongStoredBlockNlen;
|
||||
|
||||
while (len > 0) {
|
||||
const buf = self.hist.getWritable(len);
|
||||
try self.bits.readAll(buf);
|
||||
len -= @intCast(buf.len);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
fn fixedBlock(self: *Self) !bool {
|
||||
while (!self.hist.full()) {
|
||||
const code = try self.bits.readFixedCode();
|
||||
switch (code) {
|
||||
0...255 => self.hist.write(@intCast(code)),
|
||||
256 => return true, // end of block
|
||||
257...285 => try self.fixedDistanceCode(@intCast(code - 257)),
|
||||
else => return error.InvalidCode,
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Handles fixed block non literal (length) code.
|
||||
// Length code is followed by 5 bits of distance code.
|
||||
fn fixedDistanceCode(self: *Self, code: u8) !void {
|
||||
try self.bits.fill(5 + 5 + 13);
|
||||
const length = try self.decodeLength(code);
|
||||
const distance = try self.decodeDistance(try self.bits.readF(u5, F.buffered | F.reverse));
|
||||
try self.hist.writeMatch(length, distance);
|
||||
}
|
||||
|
||||
inline fn decodeLength(self: *Self, code: u8) !u16 {
|
||||
if (code > 28) return error.InvalidCode;
|
||||
const ml = Token.matchLength(code);
|
||||
return if (ml.extra_bits == 0) // 0 - 5 extra bits
|
||||
ml.base
|
||||
else
|
||||
ml.base + try self.bits.readN(ml.extra_bits, F.buffered);
|
||||
}
|
||||
|
||||
fn decodeDistance(self: *Self, code: u8) !u16 {
|
||||
if (code > 29) return error.InvalidCode;
|
||||
const md = Token.matchDistance(code);
|
||||
return if (md.extra_bits == 0) // 0 - 13 extra bits
|
||||
md.base
|
||||
else
|
||||
md.base + try self.bits.readN(md.extra_bits, F.buffered);
|
||||
}
|
||||
|
||||
fn dynamicBlockHeader(self: *Self) !void {
|
||||
const hlit: u16 = @as(u16, try self.bits.read(u5)) + 257; // number of ll code entries present - 257
|
||||
const hdist: u16 = @as(u16, try self.bits.read(u5)) + 1; // number of distance code entries - 1
|
||||
const hclen: u8 = @as(u8, try self.bits.read(u4)) + 4; // hclen + 4 code lengths are encoded
|
||||
|
||||
if (hlit > 286 or hdist > 30)
|
||||
return error.InvalidDynamicBlockHeader;
|
||||
|
||||
// lengths for code lengths
|
||||
var cl_lens = [_]u4{0} ** 19;
|
||||
for (0..hclen) |i| {
|
||||
cl_lens[codegen_order[i]] = try self.bits.read(u3);
|
||||
}
|
||||
var cl_dec: hfd.CodegenDecoder = .{};
|
||||
try cl_dec.generate(&cl_lens);
|
||||
|
||||
// decoded code lengths
|
||||
var dec_lens = [_]u4{0} ** (286 + 30);
|
||||
var pos: usize = 0;
|
||||
while (pos < hlit + hdist) {
|
||||
const sym = try cl_dec.find(try self.bits.peekF(u7, F.reverse));
|
||||
try self.bits.shift(sym.code_bits);
|
||||
pos += try self.dynamicCodeLength(sym.symbol, &dec_lens, pos);
|
||||
}
|
||||
if (pos > hlit + hdist) {
|
||||
return error.InvalidDynamicBlockHeader;
|
||||
}
|
||||
|
||||
// literal code lengths to literal decoder
|
||||
try self.lit_dec.generate(dec_lens[0..hlit]);
|
||||
|
||||
// distance code lengths to distance decoder
|
||||
try self.dst_dec.generate(dec_lens[hlit .. hlit + hdist]);
|
||||
}
|
||||
|
||||
// Decode code length symbol to code length. Writes decoded length into
|
||||
// lens slice starting at position pos. Returns number of positions
|
||||
// advanced.
|
||||
fn dynamicCodeLength(self: *Self, code: u16, lens: []u4, pos: usize) !usize {
|
||||
if (pos >= lens.len)
|
||||
return error.InvalidDynamicBlockHeader;
|
||||
|
||||
switch (code) {
|
||||
0...15 => {
|
||||
// Represent code lengths of 0 - 15
|
||||
lens[pos] = @intCast(code);
|
||||
return 1;
|
||||
},
|
||||
16 => {
|
||||
// Copy the previous code length 3 - 6 times.
|
||||
// The next 2 bits indicate repeat length
|
||||
const n: u8 = @as(u8, try self.bits.read(u2)) + 3;
|
||||
if (pos == 0 or pos + n > lens.len)
|
||||
return error.InvalidDynamicBlockHeader;
|
||||
for (0..n) |i| {
|
||||
lens[pos + i] = lens[pos + i - 1];
|
||||
}
|
||||
return n;
|
||||
},
|
||||
// Repeat a code length of 0 for 3 - 10 times. (3 bits of length)
|
||||
17 => return @as(u8, try self.bits.read(u3)) + 3,
|
||||
// Repeat a code length of 0 for 11 - 138 times (7 bits of length)
|
||||
18 => return @as(u8, try self.bits.read(u7)) + 11,
|
||||
else => return error.InvalidDynamicBlockHeader,
|
||||
}
|
||||
}
|
||||
|
||||
// In larger archives most blocks are usually dynamic, so decompression
|
||||
// performance depends on this function.
|
||||
fn dynamicBlock(self: *Self) !bool {
|
||||
// Hot path loop!
|
||||
while (!self.hist.full()) {
|
||||
try self.bits.fill(15); // optimization so other bit reads can be buffered (avoiding one `if` in hot path)
|
||||
const sym = try self.decodeSymbol(&self.lit_dec);
|
||||
|
||||
switch (sym.kind) {
|
||||
.literal => self.hist.write(sym.symbol),
|
||||
.match => { // Decode match backreference <length, distance>
|
||||
// fill so we can use buffered reads
|
||||
if (LookaheadType == u32)
|
||||
try self.bits.fill(5 + 15)
|
||||
else
|
||||
try self.bits.fill(5 + 15 + 13);
|
||||
const length = try self.decodeLength(sym.symbol);
|
||||
const dsm = try self.decodeSymbol(&self.dst_dec);
|
||||
if (LookaheadType == u32) try self.bits.fill(13);
|
||||
const distance = try self.decodeDistance(dsm.symbol);
|
||||
try self.hist.writeMatch(length, distance);
|
||||
},
|
||||
.end_of_block => return true,
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Peek 15 bits from bits reader (maximum code len is 15 bits). Use
|
||||
// decoder to find symbol for that code. We then know how many bits is
|
||||
// used. Shift bit reader for that much bits, those bits are used. And
|
||||
// return symbol.
|
||||
fn decodeSymbol(self: *Self, decoder: anytype) !hfd.Symbol {
|
||||
const sym = try decoder.find(try self.bits.peekF(u15, F.buffered | F.reverse));
|
||||
try self.bits.shift(sym.code_bits);
|
||||
return sym;
|
||||
}
|
||||
|
||||
fn step(self: *Self) !void {
|
||||
switch (self.state) {
|
||||
.protocol_header => {
|
||||
try container.parseHeader(&self.bits);
|
||||
self.state = .block_header;
|
||||
},
|
||||
.block_header => {
|
||||
try self.blockHeader();
|
||||
self.state = .block;
|
||||
if (self.block_type == 2) try self.dynamicBlockHeader();
|
||||
},
|
||||
.block => {
|
||||
const done = switch (self.block_type) {
|
||||
0 => try self.storedBlock(),
|
||||
1 => try self.fixedBlock(),
|
||||
2 => try self.dynamicBlock(),
|
||||
else => return error.InvalidBlockType,
|
||||
};
|
||||
if (done) {
|
||||
self.state = if (self.bfinal == 1) .protocol_footer else .block_header;
|
||||
}
|
||||
},
|
||||
.protocol_footer => {
|
||||
self.bits.alignToByte();
|
||||
try container.parseFooter(&self.hasher, &self.bits);
|
||||
self.state = .end;
|
||||
},
|
||||
.end => {},
|
||||
}
|
||||
}
|
||||
|
||||
/// Replaces the inner reader with new reader.
|
||||
pub fn setReader(self: *Self, new_reader: ReaderType) void {
|
||||
self.bits.forward_reader = new_reader;
|
||||
if (self.state == .end or self.state == .protocol_footer) {
|
||||
self.state = .protocol_header;
|
||||
}
|
||||
}
|
||||
|
||||
// Reads all compressed data from the internal reader and outputs plain
|
||||
// (uncompressed) data to the provided writer.
|
||||
pub fn decompress(self: *Self, writer: anytype) !void {
|
||||
while (try self.next()) |buf| {
|
||||
try writer.writeAll(buf);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of bytes that have been read from the internal
|
||||
/// reader but not yet consumed by the decompressor.
|
||||
pub fn unreadBytes(self: Self) usize {
|
||||
// There can be no error here: the denominator is not zero, and
|
||||
// overflow is not possible since the type is unsigned.
|
||||
return std.math.divCeil(usize, self.bits.nbits, 8) catch unreachable;
|
||||
}
|
||||
|
||||
// Iterator interface
|
||||
|
||||
/// Can be used in iterator like loop without memcpy to another buffer:
|
||||
/// while (try inflate.next()) |buf| { ... }
|
||||
pub fn next(self: *Self) Error!?[]const u8 {
|
||||
const out = try self.get(0);
|
||||
if (out.len == 0) return null;
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Returns decompressed data from internal sliding window buffer.
|
||||
/// Returned buffer can be any length between 0 and `limit` bytes. 0
|
||||
/// returned bytes means end of stream reached. With limit=0 returns as
|
||||
/// much data it can. It newer will be more than 65536 bytes, which is
|
||||
/// size of internal buffer.
|
||||
pub fn get(self: *Self, limit: usize) Error![]const u8 {
|
||||
while (true) {
|
||||
const out = self.hist.readAtMost(limit);
|
||||
if (out.len > 0) {
|
||||
self.hasher.update(out);
|
||||
return out;
|
||||
}
|
||||
if (self.state == .end) return out;
|
||||
try self.step();
|
||||
}
|
||||
}
|
||||
|
||||
// Reader interface
|
||||
|
||||
pub const Reader = std.io.GenericReader(*Self, Error, read);
|
||||
|
||||
/// Returns the number of bytes read. It may be less than buffer.len.
|
||||
/// If the number of bytes read is 0, it means end of stream.
|
||||
/// End of stream is not an error condition.
|
||||
pub fn read(self: *Self, buffer: []u8) Error!usize {
|
||||
if (buffer.len == 0) return 0;
|
||||
const out = try self.get(buffer.len);
|
||||
@memcpy(buffer[0..out.len], out);
|
||||
return out.len;
|
||||
}
|
||||
|
||||
pub fn reader(self: *Self) Reader {
|
||||
return .{ .context = self };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test "decompress" {
|
||||
const cases = [_]struct {
|
||||
in: []const u8,
|
||||
out: []const u8,
|
||||
}{
|
||||
// non compressed block (type 0)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
|
||||
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
|
||||
},
|
||||
.out = "Hello world\n",
|
||||
},
|
||||
// fixed code block (type 1)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1
|
||||
0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00,
|
||||
},
|
||||
.out = "Hello world\n",
|
||||
},
|
||||
// dynamic block (type 2)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0x3d, 0xc6, 0x39, 0x11, 0x00, 0x00, 0x0c, 0x02, // deflate data block type 2
|
||||
0x30, 0x2b, 0xb5, 0x52, 0x1e, 0xff, 0x96, 0x38,
|
||||
0x16, 0x96, 0x5c, 0x1e, 0x94, 0xcb, 0x6d, 0x01,
|
||||
},
|
||||
.out = "ABCDEABCD ABCDEABCD",
|
||||
},
|
||||
};
|
||||
for (cases) |c| {
|
||||
var fb = std.io.fixedBufferStream(c.in);
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
|
||||
try decompress(.raw, fb.reader(), al.writer());
|
||||
try testing.expectEqualStrings(c.out, al.items);
|
||||
}
|
||||
}
|
||||
|
||||
test "gzip decompress" {
|
||||
const cases = [_]struct {
|
||||
in: []const u8,
|
||||
out: []const u8,
|
||||
}{
|
||||
// non compressed block (type 0)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, // gzip header (10 bytes)
|
||||
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
|
||||
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
|
||||
0xd5, 0xe0, 0x39, 0xb7, // gzip footer: checksum
|
||||
0x0c, 0x00, 0x00, 0x00, // gzip footer: size
|
||||
},
|
||||
.out = "Hello world\n",
|
||||
},
|
||||
// fixed code block (type 1)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x03, // gzip header (10 bytes)
|
||||
0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1
|
||||
0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00,
|
||||
0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00, // gzip footer (chksum, len)
|
||||
},
|
||||
.out = "Hello world\n",
|
||||
},
|
||||
// dynamic block (type 2)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, // gzip header (10 bytes)
|
||||
0x3d, 0xc6, 0x39, 0x11, 0x00, 0x00, 0x0c, 0x02, // deflate data block type 2
|
||||
0x30, 0x2b, 0xb5, 0x52, 0x1e, 0xff, 0x96, 0x38,
|
||||
0x16, 0x96, 0x5c, 0x1e, 0x94, 0xcb, 0x6d, 0x01,
|
||||
0x17, 0x1c, 0x39, 0xb4, 0x13, 0x00, 0x00, 0x00, // gzip footer (chksum, len)
|
||||
},
|
||||
.out = "ABCDEABCD ABCDEABCD",
|
||||
},
|
||||
// gzip header with name
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x08, 0xe5, 0x70, 0xb1, 0x65, 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
|
||||
0x74, 0x78, 0x74, 0x00, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
|
||||
0x02, 0x00, 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00,
|
||||
},
|
||||
.out = "Hello world\n",
|
||||
},
|
||||
};
|
||||
for (cases) |c| {
|
||||
var fb = std.io.fixedBufferStream(c.in);
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
|
||||
try decompress(.gzip, fb.reader(), al.writer());
|
||||
try testing.expectEqualStrings(c.out, al.items);
|
||||
}
|
||||
}
|
||||
|
||||
test "zlib decompress" {
|
||||
const cases = [_]struct {
|
||||
in: []const u8,
|
||||
out: []const u8,
|
||||
}{
|
||||
// non compressed block (type 0)
|
||||
.{
|
||||
.in = &[_]u8{
|
||||
0x78, 0b10_0_11100, // zlib header (2 bytes)
|
||||
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
|
||||
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
|
||||
0x1c, 0xf2, 0x04, 0x47, // zlib footer: checksum
|
||||
},
|
||||
.out = "Hello world\n",
|
||||
},
|
||||
};
|
||||
for (cases) |c| {
|
||||
var fb = std.io.fixedBufferStream(c.in);
|
||||
var al = std.ArrayList(u8).init(testing.allocator);
|
||||
defer al.deinit();
|
||||
|
||||
try decompress(.zlib, fb.reader(), al.writer());
|
||||
try testing.expectEqualStrings(c.out, al.items);
|
||||
}
|
||||
}
|
||||
|
||||
test "fuzzing tests" {
|
||||
const cases = [_]struct {
|
||||
input: []const u8,
|
||||
out: []const u8 = "",
|
||||
err: ?anyerror = null,
|
||||
}{
|
||||
.{ .input = "deflate-stream", .out = @embedFile("testdata/fuzz/deflate-stream.expect") }, // 0
|
||||
.{ .input = "empty-distance-alphabet01" },
|
||||
.{ .input = "empty-distance-alphabet02" },
|
||||
.{ .input = "end-of-stream", .err = error.EndOfStream },
|
||||
.{ .input = "invalid-distance", .err = error.InvalidMatch },
|
||||
.{ .input = "invalid-tree01", .err = error.IncompleteHuffmanTree }, // 5
|
||||
.{ .input = "invalid-tree02", .err = error.IncompleteHuffmanTree },
|
||||
.{ .input = "invalid-tree03", .err = error.IncompleteHuffmanTree },
|
||||
.{ .input = "lengths-overflow", .err = error.InvalidDynamicBlockHeader },
|
||||
.{ .input = "out-of-codes", .err = error.InvalidCode },
|
||||
.{ .input = "puff01", .err = error.WrongStoredBlockNlen }, // 10
|
||||
.{ .input = "puff02", .err = error.EndOfStream },
|
||||
.{ .input = "puff03", .out = &[_]u8{0xa} },
|
||||
.{ .input = "puff04", .err = error.InvalidCode },
|
||||
.{ .input = "puff05", .err = error.EndOfStream },
|
||||
.{ .input = "puff06", .err = error.EndOfStream },
|
||||
.{ .input = "puff08", .err = error.InvalidCode },
|
||||
.{ .input = "puff09", .out = "P" },
|
||||
.{ .input = "puff10", .err = error.InvalidCode },
|
||||
.{ .input = "puff11", .err = error.InvalidMatch },
|
||||
.{ .input = "puff12", .err = error.InvalidDynamicBlockHeader }, // 20
|
||||
.{ .input = "puff13", .err = error.IncompleteHuffmanTree },
|
||||
.{ .input = "puff14", .err = error.EndOfStream },
|
||||
.{ .input = "puff15", .err = error.IncompleteHuffmanTree },
|
||||
.{ .input = "puff16", .err = error.InvalidDynamicBlockHeader },
|
||||
.{ .input = "puff17", .err = error.MissingEndOfBlockCode }, // 25
|
||||
.{ .input = "fuzz1", .err = error.InvalidDynamicBlockHeader },
|
||||
.{ .input = "fuzz2", .err = error.InvalidDynamicBlockHeader },
|
||||
.{ .input = "fuzz3", .err = error.InvalidMatch },
|
||||
.{ .input = "fuzz4", .err = error.OversubscribedHuffmanTree },
|
||||
.{ .input = "puff18", .err = error.OversubscribedHuffmanTree }, // 30
|
||||
.{ .input = "puff19", .err = error.OversubscribedHuffmanTree },
|
||||
.{ .input = "puff20", .err = error.OversubscribedHuffmanTree },
|
||||
.{ .input = "puff21", .err = error.OversubscribedHuffmanTree },
|
||||
.{ .input = "puff22", .err = error.OversubscribedHuffmanTree },
|
||||
.{ .input = "puff23", .err = error.OversubscribedHuffmanTree }, // 35
|
||||
.{ .input = "puff24", .err = error.IncompleteHuffmanTree },
|
||||
.{ .input = "puff25", .err = error.OversubscribedHuffmanTree },
|
||||
.{ .input = "puff26", .err = error.InvalidDynamicBlockHeader },
|
||||
.{ .input = "puff27", .err = error.InvalidDynamicBlockHeader },
|
||||
};
|
||||
|
||||
inline for (cases, 0..) |c, case_no| {
|
||||
var in = std.io.fixedBufferStream(@embedFile("testdata/fuzz/" ++ c.input ++ ".input"));
|
||||
var out = std.ArrayList(u8).init(testing.allocator);
|
||||
defer out.deinit();
|
||||
errdefer std.debug.print("test case failed {}\n", .{case_no});
|
||||
|
||||
if (c.err) |expected_err| {
|
||||
try testing.expectError(expected_err, decompress(.raw, in.reader(), out.writer()));
|
||||
} else {
|
||||
try decompress(.raw, in.reader(), out.writer());
|
||||
try testing.expectEqualStrings(c.out, out.items);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "bug 18966" {
|
||||
const input = @embedFile("testdata/fuzz/bug_18966.input");
|
||||
const expect = @embedFile("testdata/fuzz/bug_18966.expect");
|
||||
|
||||
var in = std.io.fixedBufferStream(input);
|
||||
var out = std.ArrayList(u8).init(testing.allocator);
|
||||
defer out.deinit();
|
||||
|
||||
try decompress(.gzip, in.reader(), out.writer());
|
||||
try testing.expectEqualStrings(expect, out.items);
|
||||
}
|
||||
|
||||
test "bug 19895" {
|
||||
const input = &[_]u8{
|
||||
0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
|
||||
'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
|
||||
};
|
||||
var in = std.io.fixedBufferStream(input);
|
||||
var decomp = decompressor(.raw, in.reader());
|
||||
var buf: [0]u8 = undefined;
|
||||
try testing.expectEqual(0, try decomp.read(&buf));
|
||||
}
|
||||
606
lib/std/compress/flate/testdata/block_writer.zig
vendored
606
lib/std/compress/flate/testdata/block_writer.zig
vendored
@@ -1,606 +0,0 @@
|
||||
const Token = @import("../Token.zig");
|
||||
|
||||
pub const TestCase = struct {
|
||||
tokens: []const Token,
|
||||
input: []const u8 = "", // File name of input data matching the tokens.
|
||||
want: []const u8 = "", // File name of data with the expected output with input available.
|
||||
want_no_input: []const u8 = "", // File name of the expected output when no input is available.
|
||||
};
|
||||
|
||||
pub const testCases = blk: {
|
||||
@setEvalBranchQuota(4096 * 2);
|
||||
|
||||
const L = Token.initLiteral;
|
||||
const M = Token.initMatch;
|
||||
const ml = M(1, 258); // Maximum length token. Used to reduce the size of writeBlockTests
|
||||
|
||||
break :blk &[_]TestCase{
|
||||
TestCase{
|
||||
.input = "huffman-null-max.input",
|
||||
.want = "huffman-null-max.{s}.expect",
|
||||
.want_no_input = "huffman-null-max.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L(0x0), ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, L(0x0), L(0x0),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-pi.input",
|
||||
.want = "huffman-pi.{s}.expect",
|
||||
.want_no_input = "huffman-pi.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L('3'), L('.'), L('1'), L('4'), L('1'), L('5'), L('9'), L('2'),
|
||||
L('6'), L('5'), L('3'), L('5'), L('8'), L('9'), L('7'), L('9'),
|
||||
L('3'), L('2'), L('3'), L('8'), L('4'), L('6'), L('2'), L('6'),
|
||||
L('4'), L('3'), L('3'), L('8'), L('3'), L('2'), L('7'), L('9'),
|
||||
L('5'), L('0'), L('2'), L('8'), L('8'), L('4'), L('1'), L('9'),
|
||||
L('7'), L('1'), L('6'), L('9'), L('3'), L('9'), L('9'), L('3'),
|
||||
L('7'), L('5'), L('1'), L('0'), L('5'), L('8'), L('2'), L('0'),
|
||||
L('9'), L('7'), L('4'), L('9'), L('4'), L('4'), L('5'), L('9'),
|
||||
L('2'), L('3'), L('0'), L('7'), L('8'), L('1'), L('6'), L('4'),
|
||||
L('0'), L('6'), L('2'), L('8'), L('6'), L('2'), L('0'), L('8'),
|
||||
L('9'), L('9'), L('8'), L('6'), L('2'), L('8'), L('0'), L('3'),
|
||||
L('4'), L('8'), L('2'), L('5'), L('3'), L('4'), L('2'), L('1'),
|
||||
L('1'), L('7'), L('0'), L('6'), L('7'), L('9'), L('8'), L('2'),
|
||||
L('1'), L('4'), L('8'), L('0'), L('8'), L('6'), L('5'), L('1'),
|
||||
L('3'), L('2'), L('8'), L('2'), L('3'), L('0'), L('6'), L('6'),
|
||||
L('4'), L('7'), L('0'), L('9'), L('3'), L('8'), L('4'), L('4'),
|
||||
L('6'), L('0'), L('9'), L('5'), L('5'), L('0'), L('5'), L('8'),
|
||||
L('2'), L('2'), L('3'), L('1'), L('7'), L('2'), L('5'), L('3'),
|
||||
L('5'), L('9'), L('4'), L('0'), L('8'), L('1'), L('2'), L('8'),
|
||||
L('4'), L('8'), L('1'), L('1'), L('1'), L('7'), L('4'), M(127, 4),
|
||||
L('4'), L('1'), L('0'), L('2'), L('7'), L('0'), L('1'), L('9'),
|
||||
L('3'), L('8'), L('5'), L('2'), L('1'), L('1'), L('0'), L('5'),
|
||||
L('5'), L('5'), L('9'), L('6'), L('4'), L('4'), L('6'), L('2'),
|
||||
L('2'), L('9'), L('4'), L('8'), L('9'), L('5'), L('4'), L('9'),
|
||||
L('3'), L('0'), L('3'), L('8'), L('1'), M(19, 4), L('2'), L('8'),
|
||||
L('8'), L('1'), L('0'), L('9'), L('7'), L('5'), L('6'), L('6'),
|
||||
L('5'), L('9'), L('3'), L('3'), L('4'), L('4'), L('6'), M(72, 4),
|
||||
L('7'), L('5'), L('6'), L('4'), L('8'), L('2'), L('3'), L('3'),
|
||||
L('7'), L('8'), L('6'), L('7'), L('8'), L('3'), L('1'), L('6'),
|
||||
L('5'), L('2'), L('7'), L('1'), L('2'), L('0'), L('1'), L('9'),
|
||||
L('0'), L('9'), L('1'), L('4'), M(27, 4), L('5'), L('6'), L('6'),
|
||||
L('9'), L('2'), L('3'), L('4'), L('6'), M(179, 4), L('6'), L('1'),
|
||||
L('0'), L('4'), L('5'), L('4'), L('3'), L('2'), L('6'), M(51, 4),
|
||||
L('1'), L('3'), L('3'), L('9'), L('3'), L('6'), L('0'), L('7'),
|
||||
L('2'), L('6'), L('0'), L('2'), L('4'), L('9'), L('1'), L('4'),
|
||||
L('1'), L('2'), L('7'), L('3'), L('7'), L('2'), L('4'), L('5'),
|
||||
L('8'), L('7'), L('0'), L('0'), L('6'), L('6'), L('0'), L('6'),
|
||||
L('3'), L('1'), L('5'), L('5'), L('8'), L('8'), L('1'), L('7'),
|
||||
L('4'), L('8'), L('8'), L('1'), L('5'), L('2'), L('0'), L('9'),
|
||||
L('2'), L('0'), L('9'), L('6'), L('2'), L('8'), L('2'), L('9'),
|
||||
L('2'), L('5'), L('4'), L('0'), L('9'), L('1'), L('7'), L('1'),
|
||||
L('5'), L('3'), L('6'), L('4'), L('3'), L('6'), L('7'), L('8'),
|
||||
L('9'), L('2'), L('5'), L('9'), L('0'), L('3'), L('6'), L('0'),
|
||||
L('0'), L('1'), L('1'), L('3'), L('3'), L('0'), L('5'), L('3'),
|
||||
L('0'), L('5'), L('4'), L('8'), L('8'), L('2'), L('0'), L('4'),
|
||||
L('6'), L('6'), L('5'), L('2'), L('1'), L('3'), L('8'), L('4'),
|
||||
L('1'), L('4'), L('6'), L('9'), L('5'), L('1'), L('9'), L('4'),
|
||||
L('1'), L('5'), L('1'), L('1'), L('6'), L('0'), L('9'), L('4'),
|
||||
L('3'), L('3'), L('0'), L('5'), L('7'), L('2'), L('7'), L('0'),
|
||||
L('3'), L('6'), L('5'), L('7'), L('5'), L('9'), L('5'), L('9'),
|
||||
L('1'), L('9'), L('5'), L('3'), L('0'), L('9'), L('2'), L('1'),
|
||||
L('8'), L('6'), L('1'), L('1'), L('7'), M(234, 4), L('3'), L('2'),
|
||||
M(10, 4), L('9'), L('3'), L('1'), L('0'), L('5'), L('1'), L('1'),
|
||||
L('8'), L('5'), L('4'), L('8'), L('0'), L('7'), M(271, 4), L('3'),
|
||||
L('7'), L('9'), L('9'), L('6'), L('2'), L('7'), L('4'), L('9'),
|
||||
L('5'), L('6'), L('7'), L('3'), L('5'), L('1'), L('8'), L('8'),
|
||||
L('5'), L('7'), L('5'), L('2'), L('7'), L('2'), L('4'), L('8'),
|
||||
L('9'), L('1'), L('2'), L('2'), L('7'), L('9'), L('3'), L('8'),
|
||||
L('1'), L('8'), L('3'), L('0'), L('1'), L('1'), L('9'), L('4'),
|
||||
L('9'), L('1'), L('2'), L('9'), L('8'), L('3'), L('3'), L('6'),
|
||||
L('7'), L('3'), L('3'), L('6'), L('2'), L('4'), L('4'), L('0'),
|
||||
L('6'), L('5'), L('6'), L('6'), L('4'), L('3'), L('0'), L('8'),
|
||||
L('6'), L('0'), L('2'), L('1'), L('3'), L('9'), L('4'), L('9'),
|
||||
L('4'), L('6'), L('3'), L('9'), L('5'), L('2'), L('2'), L('4'),
|
||||
L('7'), L('3'), L('7'), L('1'), L('9'), L('0'), L('7'), L('0'),
|
||||
L('2'), L('1'), L('7'), L('9'), L('8'), M(154, 5), L('7'), L('0'),
|
||||
L('2'), L('7'), L('7'), L('0'), L('5'), L('3'), L('9'), L('2'),
|
||||
L('1'), L('7'), L('1'), L('7'), L('6'), L('2'), L('9'), L('3'),
|
||||
L('1'), L('7'), L('6'), L('7'), L('5'), M(563, 5), L('7'), L('4'),
|
||||
L('8'), L('1'), M(7, 4), L('6'), L('6'), L('9'), L('4'), L('0'),
|
||||
M(488, 4), L('0'), L('0'), L('0'), L('5'), L('6'), L('8'), L('1'),
|
||||
L('2'), L('7'), L('1'), L('4'), L('5'), L('2'), L('6'), L('3'),
|
||||
L('5'), L('6'), L('0'), L('8'), L('2'), L('7'), L('7'), L('8'),
|
||||
L('5'), L('7'), L('7'), L('1'), L('3'), L('4'), L('2'), L('7'),
|
||||
L('5'), L('7'), L('7'), L('8'), L('9'), L('6'), M(298, 4), L('3'),
|
||||
L('6'), L('3'), L('7'), L('1'), L('7'), L('8'), L('7'), L('2'),
|
||||
L('1'), L('4'), L('6'), L('8'), L('4'), L('4'), L('0'), L('9'),
|
||||
L('0'), L('1'), L('2'), L('2'), L('4'), L('9'), L('5'), L('3'),
|
||||
L('4'), L('3'), L('0'), L('1'), L('4'), L('6'), L('5'), L('4'),
|
||||
L('9'), L('5'), L('8'), L('5'), L('3'), L('7'), L('1'), L('0'),
|
||||
L('5'), L('0'), L('7'), L('9'), M(203, 4), L('6'), M(340, 4), L('8'),
|
||||
L('9'), L('2'), L('3'), L('5'), L('4'), M(458, 4), L('9'), L('5'),
|
||||
L('6'), L('1'), L('1'), L('2'), L('1'), L('2'), L('9'), L('0'),
|
||||
L('2'), L('1'), L('9'), L('6'), L('0'), L('8'), L('6'), L('4'),
|
||||
L('0'), L('3'), L('4'), L('4'), L('1'), L('8'), L('1'), L('5'),
|
||||
L('9'), L('8'), L('1'), L('3'), L('6'), L('2'), L('9'), L('7'),
|
||||
L('7'), L('4'), M(117, 4), L('0'), L('9'), L('9'), L('6'), L('0'),
|
||||
L('5'), L('1'), L('8'), L('7'), L('0'), L('7'), L('2'), L('1'),
|
||||
L('1'), L('3'), L('4'), L('9'), M(1, 5), L('8'), L('3'), L('7'),
|
||||
L('2'), L('9'), L('7'), L('8'), L('0'), L('4'), L('9'), L('9'),
|
||||
M(731, 4), L('9'), L('7'), L('3'), L('1'), L('7'), L('3'), L('2'),
|
||||
L('8'), M(395, 4), L('6'), L('3'), L('1'), L('8'), L('5'), M(770, 4),
|
||||
M(745, 4), L('4'), L('5'), L('5'), L('3'), L('4'), L('6'), L('9'),
|
||||
L('0'), L('8'), L('3'), L('0'), L('2'), L('6'), L('4'), L('2'),
|
||||
L('5'), L('2'), L('2'), L('3'), L('0'), M(740, 4), M(616, 4), L('8'),
|
||||
L('5'), L('0'), L('3'), L('5'), L('2'), L('6'), L('1'), L('9'),
|
||||
L('3'), L('1'), L('1'), M(531, 4), L('1'), L('0'), L('1'), L('0'),
|
||||
L('0'), L('0'), L('3'), L('1'), L('3'), L('7'), L('8'), L('3'),
|
||||
L('8'), L('7'), L('5'), L('2'), L('8'), L('8'), L('6'), L('5'),
|
||||
L('8'), L('7'), L('5'), L('3'), L('3'), L('2'), L('0'), L('8'),
|
||||
L('3'), L('8'), L('1'), L('4'), L('2'), L('0'), L('6'), M(321, 4),
|
||||
M(300, 4), L('1'), L('4'), L('7'), L('3'), L('0'), L('3'), L('5'),
|
||||
L('9'), M(815, 5), L('9'), L('0'), L('4'), L('2'), L('8'), L('7'),
|
||||
L('5'), L('5'), L('4'), L('6'), L('8'), L('7'), L('3'), L('1'),
|
||||
L('1'), L('5'), L('9'), L('5'), M(854, 4), L('3'), L('8'), L('8'),
|
||||
L('2'), L('3'), L('5'), L('3'), L('7'), L('8'), L('7'), L('5'),
|
||||
M(896, 5), L('9'), M(315, 4), L('1'), M(329, 4), L('8'), L('0'), L('5'),
|
||||
L('3'), M(395, 4), L('2'), L('2'), L('6'), L('8'), L('0'), L('6'),
|
||||
L('6'), L('1'), L('3'), L('0'), L('0'), L('1'), L('9'), L('2'),
|
||||
L('7'), L('8'), L('7'), L('6'), L('6'), L('1'), L('1'), L('1'),
|
||||
L('9'), L('5'), L('9'), M(568, 4), L('6'), M(293, 5), L('8'), L('9'),
|
||||
L('3'), L('8'), L('0'), L('9'), L('5'), L('2'), L('5'), L('7'),
|
||||
L('2'), L('0'), L('1'), L('0'), L('6'), L('5'), L('4'), L('8'),
|
||||
L('5'), L('8'), L('6'), L('3'), L('2'), L('7'), M(155, 4), L('9'),
|
||||
L('3'), L('6'), L('1'), L('5'), L('3'), M(545, 4), M(349, 5), L('2'),
|
||||
L('3'), L('0'), L('3'), L('0'), L('1'), L('9'), L('5'), L('2'),
|
||||
L('0'), L('3'), L('5'), L('3'), L('0'), L('1'), L('8'), L('5'),
|
||||
L('2'), M(370, 4), M(118, 4), L('3'), L('6'), L('2'), L('2'), L('5'),
|
||||
L('9'), L('9'), L('4'), L('1'), L('3'), M(597, 4), L('4'), L('9'),
|
||||
L('7'), L('2'), L('1'), L('7'), M(223, 4), L('3'), L('4'), L('7'),
|
||||
L('9'), L('1'), L('3'), L('1'), L('5'), L('1'), L('5'), L('5'),
|
||||
L('7'), L('4'), L('8'), L('5'), L('7'), L('2'), L('4'), L('2'),
|
||||
L('4'), L('5'), L('4'), L('1'), L('5'), L('0'), L('6'), L('9'),
|
||||
M(320, 4), L('8'), L('2'), L('9'), L('5'), L('3'), L('3'), L('1'),
|
||||
L('1'), L('6'), L('8'), L('6'), L('1'), L('7'), L('2'), L('7'),
|
||||
L('8'), M(824, 4), L('9'), L('0'), L('7'), L('5'), L('0'), L('9'),
|
||||
M(270, 4), L('7'), L('5'), L('4'), L('6'), L('3'), L('7'), L('4'),
|
||||
L('6'), L('4'), L('9'), L('3'), L('9'), L('3'), L('1'), L('9'),
|
||||
L('2'), L('5'), L('5'), L('0'), L('6'), L('0'), L('4'), L('0'),
|
||||
L('0'), L('9'), M(620, 4), L('1'), L('6'), L('7'), L('1'), L('1'),
|
||||
L('3'), L('9'), L('0'), L('0'), L('9'), L('8'), M(822, 4), L('4'),
|
||||
L('0'), L('1'), L('2'), L('8'), L('5'), L('8'), L('3'), L('6'),
|
||||
L('1'), L('6'), L('0'), L('3'), L('5'), L('6'), L('3'), L('7'),
|
||||
L('0'), L('7'), L('6'), L('6'), L('0'), L('1'), L('0'), L('4'),
|
||||
M(371, 4), L('8'), L('1'), L('9'), L('4'), L('2'), L('9'), M(1055, 5),
|
||||
M(240, 4), M(652, 4), L('7'), L('8'), L('3'), L('7'), L('4'), M(1193, 4),
|
||||
L('8'), L('2'), L('5'), L('5'), L('3'), L('7'), M(522, 5), L('2'),
|
||||
L('6'), L('8'), M(47, 4), L('4'), L('0'), L('4'), L('7'), M(466, 4),
|
||||
L('4'), M(1206, 4), M(910, 4), L('8'), L('4'), M(937, 4), L('6'), M(800, 6),
|
||||
L('3'), L('3'), L('1'), L('3'), L('6'), L('7'), L('7'), L('0'),
|
||||
L('2'), L('8'), L('9'), L('8'), L('9'), L('1'), L('5'), L('2'),
|
||||
M(99, 4), L('5'), L('2'), L('1'), L('6'), L('2'), L('0'), L('5'),
|
||||
L('6'), L('9'), L('6'), M(1042, 4), L('0'), L('5'), L('8'), M(1144, 4),
|
||||
L('5'), M(1177, 4), L('5'), L('1'), L('1'), M(522, 4), L('8'), L('2'),
|
||||
L('4'), L('3'), L('0'), L('0'), L('3'), L('5'), L('5'), L('8'),
|
||||
L('7'), L('6'), L('4'), L('0'), L('2'), L('4'), L('7'), L('4'),
|
||||
L('9'), L('6'), L('4'), L('7'), L('3'), L('2'), L('6'), L('3'),
|
||||
M(1087, 4), L('9'), L('9'), L('2'), M(1100, 4), L('4'), L('2'), L('6'),
|
||||
L('9'), M(710, 6), L('7'), M(471, 4), L('4'), M(1342, 4), M(1054, 4), L('9'),
|
||||
L('3'), L('4'), L('1'), L('7'), M(430, 4), L('1'), L('2'), M(43, 4),
|
||||
L('4'), M(415, 4), L('1'), L('5'), L('0'), L('3'), L('0'), L('2'),
|
||||
L('8'), L('6'), L('1'), L('8'), L('2'), L('9'), L('7'), L('4'),
|
||||
L('5'), L('5'), L('5'), L('7'), L('0'), L('6'), L('7'), L('4'),
|
||||
M(310, 4), L('5'), L('0'), L('5'), L('4'), L('9'), L('4'), L('5'),
|
||||
L('8'), M(454, 4), L('9'), M(82, 4), L('5'), L('6'), M(493, 4), L('7'),
|
||||
L('2'), L('1'), L('0'), L('7'), L('9'), M(346, 4), L('3'), L('0'),
|
||||
M(267, 4), L('3'), L('2'), L('1'), L('1'), L('6'), L('5'), L('3'),
|
||||
L('4'), L('4'), L('9'), L('8'), L('7'), L('2'), L('0'), L('2'),
|
||||
L('7'), M(284, 4), L('0'), L('2'), L('3'), L('6'), L('4'), M(559, 4),
|
||||
L('5'), L('4'), L('9'), L('9'), L('1'), L('1'), L('9'), L('8'),
|
||||
M(1049, 4), L('4'), M(284, 4), L('5'), L('3'), L('5'), L('6'), L('6'),
|
||||
L('3'), L('6'), L('9'), M(1105, 4), L('2'), L('6'), L('5'), M(741, 4),
|
||||
L('7'), L('8'), L('6'), L('2'), L('5'), L('5'), L('1'), M(987, 4),
|
||||
L('1'), L('7'), L('5'), L('7'), L('4'), L('6'), L('7'), L('2'),
|
||||
L('8'), L('9'), L('0'), L('9'), L('7'), L('7'), L('7'), L('7'),
|
||||
M(1108, 5), L('0'), L('0'), L('0'), M(1534, 4), L('7'), L('0'), M(1248, 4),
|
||||
L('6'), M(1002, 4), L('4'), L('9'), L('1'), M(1055, 4), M(664, 4), L('2'),
|
||||
L('1'), L('4'), L('7'), L('7'), L('2'), L('3'), L('5'), L('0'),
|
||||
L('1'), L('4'), L('1'), L('4'), M(1604, 4), L('3'), L('5'), L('6'),
|
||||
M(1200, 4), L('1'), L('6'), L('1'), L('3'), L('6'), L('1'), L('1'),
|
||||
L('5'), L('7'), L('3'), L('5'), L('2'), L('5'), M(1285, 4), L('3'),
|
||||
L('4'), M(92, 4), L('1'), L('8'), M(1148, 4), L('8'), L('4'), M(1512, 4),
|
||||
L('3'), L('3'), L('2'), L('3'), L('9'), L('0'), L('7'), L('3'),
|
||||
L('9'), L('4'), L('1'), L('4'), L('3'), L('3'), L('3'), L('4'),
|
||||
L('5'), L('4'), L('7'), L('7'), L('6'), L('2'), L('4'), M(579, 4),
|
||||
L('2'), L('5'), L('1'), L('8'), L('9'), L('8'), L('3'), L('5'),
|
||||
L('6'), L('9'), L('4'), L('8'), L('5'), L('5'), L('6'), L('2'),
|
||||
L('0'), L('9'), L('9'), L('2'), L('1'), L('9'), L('2'), L('2'),
|
||||
L('2'), L('1'), L('8'), L('4'), L('2'), L('7'), M(575, 4), L('2'),
|
||||
M(187, 4), L('6'), L('8'), L('8'), L('7'), L('6'), L('7'), L('1'),
|
||||
L('7'), L('9'), L('0'), M(86, 4), L('0'), M(263, 5), L('6'), L('6'),
|
||||
M(1000, 4), L('8'), L('8'), L('6'), L('2'), L('7'), L('2'), M(1757, 4),
|
||||
L('1'), L('7'), L('8'), L('6'), L('0'), L('8'), L('5'), L('7'),
|
||||
M(116, 4), L('3'), M(765, 5), L('7'), L('9'), L('7'), L('6'), L('6'),
|
||||
L('8'), L('1'), M(702, 4), L('0'), L('0'), L('9'), L('5'), L('3'),
|
||||
L('8'), L('8'), M(1593, 4), L('3'), M(1702, 4), L('0'), L('6'), L('8'),
|
||||
L('0'), L('0'), L('6'), L('4'), L('2'), L('2'), L('5'), L('1'),
|
||||
L('2'), L('5'), L('2'), M(1404, 4), L('7'), L('3'), L('9'), L('2'),
|
||||
M(664, 4), M(1141, 4), L('4'), M(1716, 5), L('8'), L('6'), L('2'), L('6'),
|
||||
L('9'), L('4'), L('5'), M(486, 4), L('4'), L('1'), L('9'), L('6'),
|
||||
L('5'), L('2'), L('8'), L('5'), L('0'), M(154, 4), M(925, 4), L('1'),
|
||||
L('8'), L('6'), L('3'), M(447, 4), L('4'), M(341, 5), L('2'), L('0'),
|
||||
L('3'), L('9'), M(1420, 4), L('4'), L('5'), M(701, 4), L('2'), L('3'),
|
||||
L('7'), M(1069, 4), L('6'), M(1297, 4), L('5'), L('6'), M(1593, 4), L('7'),
|
||||
L('1'), L('9'), L('1'), L('7'), L('2'), L('8'), M(370, 4), L('7'),
|
||||
L('6'), L('4'), L('6'), L('5'), L('7'), L('5'), L('7'), L('3'),
|
||||
L('9'), M(258, 4), L('3'), L('8'), L('9'), M(1865, 4), L('8'), L('3'),
|
||||
L('2'), L('6'), L('4'), L('5'), L('9'), L('9'), L('5'), L('8'),
|
||||
M(1704, 4), L('0'), L('4'), L('7'), L('8'), M(479, 4), M(809, 4), L('9'),
|
||||
M(46, 4), L('6'), L('4'), L('0'), L('7'), L('8'), L('9'), L('5'),
|
||||
L('1'), M(143, 4), L('6'), L('8'), L('3'), M(304, 4), L('2'), L('5'),
|
||||
L('9'), L('5'), L('7'), L('0'), M(1129, 4), L('8'), L('2'), L('2'),
|
||||
M(713, 4), L('2'), M(1564, 4), L('4'), L('0'), L('7'), L('7'), L('2'),
|
||||
L('6'), L('7'), L('1'), L('9'), L('4'), L('7'), L('8'), M(794, 4),
|
||||
L('8'), L('2'), L('6'), L('0'), L('1'), L('4'), L('7'), L('6'),
|
||||
L('9'), L('9'), L('0'), L('9'), M(1257, 4), L('0'), L('1'), L('3'),
|
||||
L('6'), L('3'), L('9'), L('4'), L('4'), L('3'), M(640, 4), L('3'),
|
||||
L('0'), M(262, 4), L('2'), L('0'), L('3'), L('4'), L('9'), L('6'),
|
||||
L('2'), L('5'), L('2'), L('4'), L('5'), L('1'), L('7'), M(950, 4),
|
||||
L('9'), L('6'), L('5'), L('1'), L('4'), L('3'), L('1'), L('4'),
|
||||
L('2'), L('9'), L('8'), L('0'), L('9'), L('1'), L('9'), L('0'),
|
||||
L('6'), L('5'), L('9'), L('2'), M(643, 4), L('7'), L('2'), L('2'),
|
||||
L('1'), L('6'), L('9'), L('6'), L('4'), L('6'), M(1050, 4), M(123, 4),
|
||||
L('5'), M(1295, 4), L('4'), M(1382, 5), L('8'), M(1370, 4), L('9'), L('7'),
|
||||
M(1404, 4), L('5'), L('4'), M(1182, 4), M(575, 4), L('7'), M(1627, 4), L('8'),
|
||||
L('4'), L('6'), L('8'), L('1'), L('3'), M(141, 4), L('6'), L('8'),
|
||||
L('3'), L('8'), L('6'), L('8'), L('9'), L('4'), L('2'), L('7'),
|
||||
L('7'), L('4'), L('1'), L('5'), L('5'), L('9'), L('9'), L('1'),
|
||||
L('8'), L('5'), M(91, 4), L('2'), L('4'), L('5'), L('9'), L('5'),
|
||||
L('3'), L('9'), L('5'), L('9'), L('4'), L('3'), L('1'), M(1464, 4),
|
||||
L('7'), M(19, 4), L('6'), L('8'), L('0'), L('8'), L('4'), L('5'),
|
||||
M(744, 4), L('7'), L('3'), M(2079, 4), L('9'), L('5'), L('8'), L('4'),
|
||||
L('8'), L('6'), L('5'), L('3'), L('8'), M(1769, 4), L('6'), L('2'),
|
||||
M(243, 4), L('6'), L('0'), L('9'), M(1207, 4), L('6'), L('0'), L('8'),
|
||||
L('0'), L('5'), L('1'), L('2'), L('4'), L('3'), L('8'), L('8'),
|
||||
L('4'), M(315, 4), M(12, 4), L('4'), L('1'), L('3'), M(784, 4), L('7'),
|
||||
L('6'), L('2'), L('7'), L('8'), M(834, 4), L('7'), L('1'), L('5'),
|
||||
M(1436, 4), L('3'), L('5'), L('9'), L('9'), L('7'), L('7'), L('0'),
|
||||
L('0'), L('1'), L('2'), L('9'), M(1139, 4), L('8'), L('9'), L('4'),
|
||||
L('4'), L('1'), M(632, 4), L('6'), L('8'), L('5'), L('5'), M(96, 4),
|
||||
L('4'), L('0'), L('6'), L('3'), M(2279, 4), L('2'), L('0'), L('7'),
|
||||
L('2'), L('2'), M(345, 4), M(516, 5), L('4'), L('8'), L('1'), L('5'),
|
||||
L('8'), M(518, 4), M(511, 4), M(635, 4), M(665, 4), L('3'), L('9'), L('4'),
|
||||
L('5'), L('2'), L('2'), L('6'), L('7'), M(1175, 6), L('8'), M(1419, 4),
|
||||
L('2'), L('1'), M(747, 4), L('2'), M(904, 4), L('5'), L('4'), L('6'),
|
||||
L('6'), L('6'), M(1308, 4), L('2'), L('3'), L('9'), L('8'), L('6'),
|
||||
L('4'), L('5'), L('6'), M(1221, 4), L('1'), L('6'), L('3'), L('5'),
|
||||
M(596, 5), M(2066, 4), L('7'), M(2222, 4), L('9'), L('8'), M(1119, 4), L('9'),
|
||||
L('3'), L('6'), L('3'), L('4'), M(1884, 4), L('7'), L('4'), L('3'),
|
||||
L('2'), L('4'), M(1148, 4), L('1'), L('5'), L('0'), L('7'), L('6'),
|
||||
M(1212, 4), L('7'), L('9'), L('4'), L('5'), L('1'), L('0'), L('9'),
|
||||
M(63, 4), L('0'), L('9'), L('4'), L('0'), M(1703, 4), L('8'), L('8'),
|
||||
L('7'), L('9'), L('7'), L('1'), L('0'), L('8'), L('9'), L('3'),
|
||||
M(2289, 4), L('6'), L('9'), L('1'), L('3'), L('6'), L('8'), L('6'),
|
||||
L('7'), L('2'), M(604, 4), M(511, 4), L('5'), M(1344, 4), M(1129, 4), M(2050, 4),
|
||||
L('1'), L('7'), L('9'), L('2'), L('8'), L('6'), L('8'), M(2253, 4),
|
||||
L('8'), L('7'), L('4'), L('7'), M(1951, 5), L('8'), L('2'), L('4'),
|
||||
M(2427, 4), L('8'), M(604, 4), L('7'), L('1'), L('4'), L('9'), L('0'),
|
||||
L('9'), L('6'), L('7'), L('5'), L('9'), L('8'), M(1776, 4), L('3'),
|
||||
L('6'), L('5'), M(309, 4), L('8'), L('1'), M(93, 4), M(1862, 4), M(2359, 4),
|
||||
L('6'), L('8'), L('2'), L('9'), M(1407, 4), L('8'), L('7'), L('2'),
|
||||
L('2'), L('6'), L('5'), L('8'), L('8'), L('0'), M(1554, 4), L('5'),
|
||||
M(586, 4), L('4'), L('2'), L('7'), L('0'), L('4'), L('7'), L('7'),
|
||||
L('5'), L('5'), M(2079, 4), L('3'), L('7'), L('9'), L('6'), L('4'),
|
||||
L('1'), L('4'), L('5'), L('1'), L('5'), L('2'), M(1534, 4), L('2'),
|
||||
L('3'), L('4'), L('3'), L('6'), L('4'), L('5'), L('4'), M(1503, 4),
|
||||
L('4'), L('4'), L('4'), L('7'), L('9'), L('5'), M(61, 4), M(1316, 4),
|
||||
M(2279, 5), L('4'), L('1'), M(1323, 4), L('3'), M(773, 4), L('5'), L('2'),
|
||||
L('3'), L('1'), M(2114, 5), L('1'), L('6'), L('6'), L('1'), M(2227, 4),
|
||||
L('5'), L('9'), L('6'), L('9'), L('5'), L('3'), L('6'), L('2'),
|
||||
L('3'), L('1'), L('4'), M(1536, 4), L('2'), L('4'), L('8'), L('4'),
|
||||
L('9'), L('3'), L('7'), L('1'), L('8'), L('7'), L('1'), L('1'),
|
||||
L('0'), L('1'), L('4'), L('5'), L('7'), L('6'), L('5'), L('4'),
|
||||
M(1890, 4), L('0'), L('2'), L('7'), L('9'), L('9'), L('3'), L('4'),
|
||||
L('4'), L('0'), L('3'), L('7'), L('4'), L('2'), L('0'), L('0'),
|
||||
L('7'), M(2368, 4), L('7'), L('8'), L('5'), L('3'), L('9'), L('0'),
|
||||
L('6'), L('2'), L('1'), L('9'), M(666, 5), M(838, 4), L('8'), L('4'),
|
||||
L('7'), M(979, 5), L('8'), L('3'), L('3'), L('2'), L('1'), L('4'),
|
||||
L('4'), L('5'), L('7'), L('1'), M(645, 4), M(1911, 4), L('4'), L('3'),
|
||||
L('5'), L('0'), M(2345, 4), M(1129, 4), L('5'), L('3'), L('1'), L('9'),
|
||||
L('1'), L('0'), L('4'), L('8'), L('4'), L('8'), L('1'), L('0'),
|
||||
L('0'), L('5'), L('3'), L('7'), L('0'), L('6'), M(2237, 4), M(1438, 5),
|
||||
M(1922, 5), L('1'), M(1370, 4), L('7'), M(796, 4), L('5'), M(2029, 4), M(1037, 4),
|
||||
L('6'), L('3'), M(2013, 5), L('4'), M(2418, 4), M(847, 5), M(1014, 5), L('8'),
|
||||
M(1326, 5), M(2184, 5), L('9'), M(392, 4), L('9'), L('1'), M(2255, 4), L('8'),
|
||||
L('1'), L('4'), L('6'), L('7'), L('5'), L('1'), M(1580, 4), L('1'),
|
||||
L('2'), L('3'), L('9'), M(426, 6), L('9'), L('0'), L('7'), L('1'),
|
||||
L('8'), L('6'), L('4'), L('9'), L('4'), L('2'), L('3'), L('1'),
|
||||
L('9'), L('6'), L('1'), L('5'), L('6'), M(493, 4), M(1725, 4), L('9'),
|
||||
L('5'), M(2343, 4), M(1130, 4), M(284, 4), L('6'), L('0'), L('3'), L('8'),
|
||||
M(2598, 4), M(368, 4), M(901, 4), L('6'), L('2'), M(1115, 4), L('5'), M(2125, 4),
|
||||
L('6'), L('3'), L('8'), L('9'), L('3'), L('7'), L('7'), L('8'),
|
||||
L('7'), M(2246, 4), M(249, 4), L('9'), L('7'), L('9'), L('2'), L('0'),
|
||||
L('7'), L('7'), L('3'), M(1496, 4), L('2'), L('1'), L('8'), L('2'),
|
||||
L('5'), L('6'), M(2016, 4), L('6'), L('6'), M(1751, 4), L('4'), L('2'),
|
||||
M(1663, 5), L('6'), M(1767, 4), L('4'), L('4'), M(37, 4), L('5'), L('4'),
|
||||
L('9'), L('2'), L('0'), L('2'), L('6'), L('0'), L('5'), M(2740, 4),
|
||||
M(997, 5), L('2'), L('0'), L('1'), L('4'), L('9'), M(1235, 4), L('8'),
|
||||
L('5'), L('0'), L('7'), L('3'), M(1434, 4), L('6'), L('6'), L('6'),
|
||||
L('0'), M(405, 4), L('2'), L('4'), L('3'), L('4'), L('0'), M(136, 4),
|
||||
L('0'), M(1900, 4), L('8'), L('6'), L('3'), M(2391, 4), M(2021, 4), M(1068, 4),
|
||||
M(373, 4), L('5'), L('7'), L('9'), L('6'), L('2'), L('6'), L('8'),
|
||||
L('5'), L('6'), M(321, 4), L('5'), L('0'), L('8'), M(1316, 4), L('5'),
|
||||
L('8'), L('7'), L('9'), L('6'), L('9'), L('9'), M(1810, 4), L('5'),
|
||||
L('7'), L('4'), M(2585, 4), L('8'), L('4'), L('0'), M(2228, 4), L('1'),
|
||||
L('4'), L('5'), L('9'), L('1'), M(1933, 4), L('7'), L('0'), M(565, 4),
|
||||
L('0'), L('1'), M(3048, 4), L('1'), L('2'), M(3189, 4), L('0'), M(964, 4),
|
||||
L('3'), L('9'), M(2859, 4), M(275, 4), L('7'), L('1'), L('5'), M(945, 4),
|
||||
L('4'), L('2'), L('0'), M(3059, 5), L('9'), M(3011, 4), L('0'), L('7'),
|
||||
M(834, 4), M(1942, 4), M(2736, 4), M(3171, 4), L('2'), L('1'), M(2401, 4), L('2'),
|
||||
L('5'), L('1'), M(1404, 4), M(2373, 4), L('9'), L('2'), M(435, 4), L('8'),
|
||||
L('2'), L('6'), M(2919, 4), L('2'), M(633, 4), L('3'), L('2'), L('1'),
|
||||
L('5'), L('7'), L('9'), L('1'), L('9'), L('8'), L('4'), L('1'),
|
||||
L('4'), M(2172, 5), L('9'), L('1'), L('6'), L('4'), M(1769, 5), L('9'),
|
||||
M(2905, 5), M(2268, 4), L('7'), L('2'), L('2'), M(802, 4), L('5'), M(2213, 4),
|
||||
M(322, 4), L('9'), L('1'), L('0'), M(189, 4), M(3164, 4), L('5'), L('2'),
|
||||
L('8'), L('0'), L('1'), L('7'), M(562, 4), L('7'), L('1'), L('2'),
|
||||
M(2325, 4), L('8'), L('3'), L('2'), M(884, 4), L('1'), M(1418, 4), L('0'),
|
||||
L('9'), L('3'), L('5'), L('3'), L('9'), L('6'), L('5'), L('7'),
|
||||
M(1612, 4), L('1'), L('0'), L('8'), L('3'), M(106, 4), L('5'), L('1'),
|
||||
M(1915, 4), M(3419, 4), L('1'), L('4'), L('4'), L('4'), L('2'), L('1'),
|
||||
L('0'), L('0'), M(515, 4), L('0'), L('3'), M(413, 4), L('1'), L('1'),
|
||||
L('0'), L('3'), M(3202, 4), M(10, 4), M(39, 4), M(1539, 6), L('5'), L('1'),
|
||||
L('6'), M(1498, 4), M(2180, 5), M(2347, 4), L('5'), M(3139, 5), L('8'), L('5'),
|
||||
L('1'), L('7'), L('1'), L('4'), L('3'), L('7'), M(1542, 4), M(110, 4),
|
||||
L('1'), L('5'), L('5'), L('6'), L('5'), L('0'), L('8'), L('8'),
|
||||
M(954, 4), L('9'), L('8'), L('9'), L('8'), L('5'), L('9'), L('9'),
|
||||
L('8'), L('2'), L('3'), L('8'), M(464, 4), M(2491, 4), L('3'), M(365, 4),
|
||||
M(1087, 4), M(2500, 4), L('8'), M(3590, 5), L('3'), L('2'), M(264, 4), L('5'),
|
||||
M(774, 4), L('3'), M(459, 4), L('9'), M(1052, 4), L('9'), L('8'), M(2174, 4),
|
||||
L('4'), M(3257, 4), L('7'), M(1612, 4), L('0'), L('7'), M(230, 4), L('4'),
|
||||
L('8'), L('1'), L('4'), L('1'), M(1338, 4), L('8'), L('5'), L('9'),
|
||||
L('4'), L('6'), L('1'), M(3018, 4), L('8'), L('0'),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-rand-1k.input",
|
||||
.want = "huffman-rand-1k.{s}.expect",
|
||||
.want_no_input = "huffman-rand-1k.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L(0xf8), L(0x8b), L(0x96), L(0x76), L(0x48), L(0xd), L(0x85), L(0x94), L(0x25), L(0x80), L(0xaf), L(0xc2), L(0xfe), L(0x8d),
|
||||
L(0xe8), L(0x20), L(0xeb), L(0x17), L(0x86), L(0xc9), L(0xb7), L(0xc5), L(0xde), L(0x6), L(0xea), L(0x7d), L(0x18), L(0x8b),
|
||||
L(0xe7), L(0x3e), L(0x7), L(0xda), L(0xdf), L(0xff), L(0x6c), L(0x73), L(0xde), L(0xcc), L(0xe7), L(0x6d), L(0x8d), L(0x4),
|
||||
L(0x19), L(0x49), L(0x7f), L(0x47), L(0x1f), L(0x48), L(0x15), L(0xb0), L(0xe8), L(0x9e), L(0xf2), L(0x31), L(0x59), L(0xde),
|
||||
L(0x34), L(0xb4), L(0x5b), L(0xe5), L(0xe0), L(0x9), L(0x11), L(0x30), L(0xc2), L(0x88), L(0x5b), L(0x7c), L(0x5d), L(0x14),
|
||||
L(0x13), L(0x6f), L(0x23), L(0xa9), L(0xd), L(0xbc), L(0x2d), L(0x23), L(0xbe), L(0xd9), L(0xed), L(0x75), L(0x4), L(0x6c),
|
||||
L(0x99), L(0xdf), L(0xfd), L(0x70), L(0x66), L(0xe6), L(0xee), L(0xd9), L(0xb1), L(0x9e), L(0x6e), L(0x83), L(0x59), L(0xd5),
|
||||
L(0xd4), L(0x80), L(0x59), L(0x98), L(0x77), L(0x89), L(0x43), L(0x38), L(0xc9), L(0xaf), L(0x30), L(0x32), L(0x9a), L(0x20),
|
||||
L(0x1b), L(0x46), L(0x3d), L(0x67), L(0x6e), L(0xd7), L(0x72), L(0x9e), L(0x4e), L(0x21), L(0x4f), L(0xc6), L(0xe0), L(0xd4),
|
||||
L(0x7b), L(0x4), L(0x8d), L(0xa5), L(0x3), L(0xf6), L(0x5), L(0x9b), L(0x6b), L(0xdc), L(0x2a), L(0x93), L(0x77), L(0x28),
|
||||
L(0xfd), L(0xb4), L(0x62), L(0xda), L(0x20), L(0xe7), L(0x1f), L(0xab), L(0x6b), L(0x51), L(0x43), L(0x39), L(0x2f), L(0xa0),
|
||||
L(0x92), L(0x1), L(0x6c), L(0x75), L(0x3e), L(0xf4), L(0x35), L(0xfd), L(0x43), L(0x2e), L(0xf7), L(0xa4), L(0x75), L(0xda),
|
||||
L(0xea), L(0x9b), L(0xa), L(0x64), L(0xb), L(0xe0), L(0x23), L(0x29), L(0xbd), L(0xf7), L(0xe7), L(0x83), L(0x3c), L(0xfb),
|
||||
L(0xdf), L(0xb3), L(0xae), L(0x4f), L(0xa4), L(0x47), L(0x55), L(0x99), L(0xde), L(0x2f), L(0x96), L(0x6e), L(0x1c), L(0x43),
|
||||
L(0x4c), L(0x87), L(0xe2), L(0x7c), L(0xd9), L(0x5f), L(0x4c), L(0x7c), L(0xe8), L(0x90), L(0x3), L(0xdb), L(0x30), L(0x95),
|
||||
L(0xd6), L(0x22), L(0xc), L(0x47), L(0xb8), L(0x4d), L(0x6b), L(0xbd), L(0x24), L(0x11), L(0xab), L(0x2c), L(0xd7), L(0xbe),
|
||||
L(0x6e), L(0x7a), L(0xd6), L(0x8), L(0xa3), L(0x98), L(0xd8), L(0xdd), L(0x15), L(0x6a), L(0xfa), L(0x93), L(0x30), L(0x1),
|
||||
L(0x25), L(0x1d), L(0xa2), L(0x74), L(0x86), L(0x4b), L(0x6a), L(0x95), L(0xe8), L(0xe1), L(0x4e), L(0xe), L(0x76), L(0xb9),
|
||||
L(0x49), L(0xa9), L(0x5f), L(0xa0), L(0xa6), L(0x63), L(0x3c), L(0x7e), L(0x7e), L(0x20), L(0x13), L(0x4f), L(0xbb), L(0x66),
|
||||
L(0x92), L(0xb8), L(0x2e), L(0xa4), L(0xfa), L(0x48), L(0xcb), L(0xae), L(0xb9), L(0x3c), L(0xaf), L(0xd3), L(0x1f), L(0xe1),
|
||||
L(0xd5), L(0x8d), L(0x42), L(0x6d), L(0xf0), L(0xfc), L(0x8c), L(0xc), L(0x0), L(0xde), L(0x40), L(0xab), L(0x8b), L(0x47),
|
||||
L(0x97), L(0x4e), L(0xa8), L(0xcf), L(0x8e), L(0xdb), L(0xa6), L(0x8b), L(0x20), L(0x9), L(0x84), L(0x7a), L(0x66), L(0xe5),
|
||||
L(0x98), L(0x29), L(0x2), L(0x95), L(0xe6), L(0x38), L(0x32), L(0x60), L(0x3), L(0xe3), L(0x9a), L(0x1e), L(0x54), L(0xe8),
|
||||
L(0x63), L(0x80), L(0x48), L(0x9c), L(0xe7), L(0x63), L(0x33), L(0x6e), L(0xa0), L(0x65), L(0x83), L(0xfa), L(0xc6), L(0xba),
|
||||
L(0x7a), L(0x43), L(0x71), L(0x5), L(0xf5), L(0x68), L(0x69), L(0x85), L(0x9c), L(0xba), L(0x45), L(0xcd), L(0x6b), L(0xb),
|
||||
L(0x19), L(0xd1), L(0xbb), L(0x7f), L(0x70), L(0x85), L(0x92), L(0xd1), L(0xb4), L(0x64), L(0x82), L(0xb1), L(0xe4), L(0x62),
|
||||
L(0xc5), L(0x3c), L(0x46), L(0x1f), L(0x92), L(0x31), L(0x1c), L(0x4e), L(0x41), L(0x77), L(0xf7), L(0xe7), L(0x87), L(0xa2),
|
||||
L(0xf), L(0x6e), L(0xe8), L(0x92), L(0x3), L(0x6b), L(0xa), L(0xe7), L(0xa9), L(0x3b), L(0x11), L(0xda), L(0x66), L(0x8a),
|
||||
L(0x29), L(0xda), L(0x79), L(0xe1), L(0x64), L(0x8d), L(0xe3), L(0x54), L(0xd4), L(0xf5), L(0xef), L(0x64), L(0x87), L(0x3b),
|
||||
L(0xf4), L(0xc2), L(0xf4), L(0x71), L(0x13), L(0xa9), L(0xe9), L(0xe0), L(0xa2), L(0x6), L(0x14), L(0xab), L(0x5d), L(0xa7),
|
||||
L(0x96), L(0x0), L(0xd6), L(0xc3), L(0xcc), L(0x57), L(0xed), L(0x39), L(0x6a), L(0x25), L(0xcd), L(0x76), L(0xea), L(0xba),
|
||||
L(0x3a), L(0xf2), L(0xa1), L(0x95), L(0x5d), L(0xe5), L(0x71), L(0xcf), L(0x9c), L(0x62), L(0x9e), L(0x6a), L(0xfa), L(0xd5),
|
||||
L(0x31), L(0xd1), L(0xa8), L(0x66), L(0x30), L(0x33), L(0xaa), L(0x51), L(0x17), L(0x13), L(0x82), L(0x99), L(0xc8), L(0x14),
|
||||
L(0x60), L(0x9f), L(0x4d), L(0x32), L(0x6d), L(0xda), L(0x19), L(0x26), L(0x21), L(0xdc), L(0x7e), L(0x2e), L(0x25), L(0x67),
|
||||
L(0x72), L(0xca), L(0xf), L(0x92), L(0xcd), L(0xf6), L(0xd6), L(0xcb), L(0x97), L(0x8a), L(0x33), L(0x58), L(0x73), L(0x70),
|
||||
L(0x91), L(0x1d), L(0xbf), L(0x28), L(0x23), L(0xa3), L(0xc), L(0xf1), L(0x83), L(0xc3), L(0xc8), L(0x56), L(0x77), L(0x68),
|
||||
L(0xe3), L(0x82), L(0xba), L(0xb9), L(0x57), L(0x56), L(0x57), L(0x9c), L(0xc3), L(0xd6), L(0x14), L(0x5), L(0x3c), L(0xb1),
|
||||
L(0xaf), L(0x93), L(0xc8), L(0x8a), L(0x57), L(0x7f), L(0x53), L(0xfa), L(0x2f), L(0xaa), L(0x6e), L(0x66), L(0x83), L(0xfa),
|
||||
L(0x33), L(0xd1), L(0x21), L(0xab), L(0x1b), L(0x71), L(0xb4), L(0x7c), L(0xda), L(0xfd), L(0xfb), L(0x7f), L(0x20), L(0xab),
|
||||
L(0x5e), L(0xd5), L(0xca), L(0xfd), L(0xdd), L(0xe0), L(0xee), L(0xda), L(0xba), L(0xa8), L(0x27), L(0x99), L(0x97), L(0x69),
|
||||
L(0xc1), L(0x3c), L(0x82), L(0x8c), L(0xa), L(0x5c), L(0x2d), L(0x5b), L(0x88), L(0x3e), L(0x34), L(0x35), L(0x86), L(0x37),
|
||||
L(0x46), L(0x79), L(0xe1), L(0xaa), L(0x19), L(0xfb), L(0xaa), L(0xde), L(0x15), L(0x9), L(0xd), L(0x1a), L(0x57), L(0xff),
|
||||
L(0xb5), L(0xf), L(0xf3), L(0x2b), L(0x5a), L(0x6a), L(0x4d), L(0x19), L(0x77), L(0x71), L(0x45), L(0xdf), L(0x4f), L(0xb3),
|
||||
L(0xec), L(0xf1), L(0xeb), L(0x18), L(0x53), L(0x3e), L(0x3b), L(0x47), L(0x8), L(0x9a), L(0x73), L(0xa0), L(0x5c), L(0x8c),
|
||||
L(0x5f), L(0xeb), L(0xf), L(0x3a), L(0xc2), L(0x43), L(0x67), L(0xb4), L(0x66), L(0x67), L(0x80), L(0x58), L(0xe), L(0xc1),
|
||||
L(0xec), L(0x40), L(0xd4), L(0x22), L(0x94), L(0xca), L(0xf9), L(0xe8), L(0x92), L(0xe4), L(0x69), L(0x38), L(0xbe), L(0x67),
|
||||
L(0x64), L(0xca), L(0x50), L(0xc7), L(0x6), L(0x67), L(0x42), L(0x6e), L(0xa3), L(0xf0), L(0xb7), L(0x6c), L(0xf2), L(0xe8),
|
||||
L(0x5f), L(0xb1), L(0xaf), L(0xe7), L(0xdb), L(0xbb), L(0x77), L(0xb5), L(0xf8), L(0xcb), L(0x8), L(0xc4), L(0x75), L(0x7e),
|
||||
L(0xc0), L(0xf9), L(0x1c), L(0x7f), L(0x3c), L(0x89), L(0x2f), L(0xd2), L(0x58), L(0x3a), L(0xe2), L(0xf8), L(0x91), L(0xb6),
|
||||
L(0x7b), L(0x24), L(0x27), L(0xe9), L(0xae), L(0x84), L(0x8b), L(0xde), L(0x74), L(0xac), L(0xfd), L(0xd9), L(0xb7), L(0x69),
|
||||
L(0x2a), L(0xec), L(0x32), L(0x6f), L(0xf0), L(0x92), L(0x84), L(0xf1), L(0x40), L(0xc), L(0x8a), L(0xbc), L(0x39), L(0x6e),
|
||||
L(0x2e), L(0x73), L(0xd4), L(0x6e), L(0x8a), L(0x74), L(0x2a), L(0xdc), L(0x60), L(0x1f), L(0xa3), L(0x7), L(0xde), L(0x75),
|
||||
L(0x8b), L(0x74), L(0xc8), L(0xfe), L(0x63), L(0x75), L(0xf6), L(0x3d), L(0x63), L(0xac), L(0x33), L(0x89), L(0xc3), L(0xf0),
|
||||
L(0xf8), L(0x2d), L(0x6b), L(0xb4), L(0x9e), L(0x74), L(0x8b), L(0x5c), L(0x33), L(0xb4), L(0xca), L(0xa8), L(0xe4), L(0x99),
|
||||
L(0xb6), L(0x90), L(0xa1), L(0xef), L(0xf), L(0xd3), L(0x61), L(0xb2), L(0xc6), L(0x1a), L(0x94), L(0x7c), L(0x44), L(0x55),
|
||||
L(0xf4), L(0x45), L(0xff), L(0x9e), L(0xa5), L(0x5a), L(0xc6), L(0xa0), L(0xe8), L(0x2a), L(0xc1), L(0x8d), L(0x6f), L(0x34),
|
||||
L(0x11), L(0xb9), L(0xbe), L(0x4e), L(0xd9), L(0x87), L(0x97), L(0x73), L(0xcf), L(0x3d), L(0x23), L(0xae), L(0xd5), L(0x1a),
|
||||
L(0x5e), L(0xae), L(0x5d), L(0x6a), L(0x3), L(0xf9), L(0x22), L(0xd), L(0x10), L(0xd9), L(0x47), L(0x69), L(0x15), L(0x3f),
|
||||
L(0xee), L(0x52), L(0xa3), L(0x8), L(0xd2), L(0x3c), L(0x51), L(0xf4), L(0xf8), L(0x9d), L(0xe4), L(0x98), L(0x89), L(0xc8),
|
||||
L(0x67), L(0x39), L(0xd5), L(0x5e), L(0x35), L(0x78), L(0x27), L(0xe8), L(0x3c), L(0x80), L(0xae), L(0x79), L(0x71), L(0xd2),
|
||||
L(0x93), L(0xf4), L(0xaa), L(0x51), L(0x12), L(0x1c), L(0x4b), L(0x1b), L(0xe5), L(0x6e), L(0x15), L(0x6f), L(0xe4), L(0xbb),
|
||||
L(0x51), L(0x9b), L(0x45), L(0x9f), L(0xf9), L(0xc4), L(0x8c), L(0x2a), L(0xfb), L(0x1a), L(0xdf), L(0x55), L(0xd3), L(0x48),
|
||||
L(0x93), L(0x27), L(0x1), L(0x26), L(0xc2), L(0x6b), L(0x55), L(0x6d), L(0xa2), L(0xfb), L(0x84), L(0x8b), L(0xc9), L(0x9e),
|
||||
L(0x28), L(0xc2), L(0xef), L(0x1a), L(0x24), L(0xec), L(0x9b), L(0xae), L(0xbd), L(0x60), L(0xe9), L(0x15), L(0x35), L(0xee),
|
||||
L(0x42), L(0xa4), L(0x33), L(0x5b), L(0xfa), L(0xf), L(0xb6), L(0xf7), L(0x1), L(0xa6), L(0x2), L(0x4c), L(0xca), L(0x90),
|
||||
L(0x58), L(0x3a), L(0x96), L(0x41), L(0xe7), L(0xcb), L(0x9), L(0x8c), L(0xdb), L(0x85), L(0x4d), L(0xa8), L(0x89), L(0xf3),
|
||||
L(0xb5), L(0x8e), L(0xfd), L(0x75), L(0x5b), L(0x4f), L(0xed), L(0xde), L(0x3f), L(0xeb), L(0x38), L(0xa3), L(0xbe), L(0xb0),
|
||||
L(0x73), L(0xfc), L(0xb8), L(0x54), L(0xf7), L(0x4c), L(0x30), L(0x67), L(0x2e), L(0x38), L(0xa2), L(0x54), L(0x18), L(0xba),
|
||||
L(0x8), L(0xbf), L(0xf2), L(0x39), L(0xd5), L(0xfe), L(0xa5), L(0x41), L(0xc6), L(0x66), L(0x66), L(0xba), L(0x81), L(0xef),
|
||||
L(0x67), L(0xe4), L(0xe6), L(0x3c), L(0xc), L(0xca), L(0xa4), L(0xa), L(0x79), L(0xb3), L(0x57), L(0x8b), L(0x8a), L(0x75),
|
||||
L(0x98), L(0x18), L(0x42), L(0x2f), L(0x29), L(0xa3), L(0x82), L(0xef), L(0x9f), L(0x86), L(0x6), L(0x23), L(0xe1), L(0x75),
|
||||
L(0xfa), L(0x8), L(0xb1), L(0xde), L(0x17), L(0x4a),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-rand-limit.input",
|
||||
.want = "huffman-rand-limit.{s}.expect",
|
||||
.want_no_input = "huffman-rand-limit.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L(0x61), M(1, 74), L(0xa), L(0xf8), L(0x8b), L(0x96), L(0x76), L(0x48), L(0xa), L(0x85), L(0x94), L(0x25), L(0x80),
|
||||
L(0xaf), L(0xc2), L(0xfe), L(0x8d), L(0xe8), L(0x20), L(0xeb), L(0x17), L(0x86), L(0xc9), L(0xb7), L(0xc5), L(0xde),
|
||||
L(0x6), L(0xea), L(0x7d), L(0x18), L(0x8b), L(0xe7), L(0x3e), L(0x7), L(0xda), L(0xdf), L(0xff), L(0x6c), L(0x73),
|
||||
L(0xde), L(0xcc), L(0xe7), L(0x6d), L(0x8d), L(0x4), L(0x19), L(0x49), L(0x7f), L(0x47), L(0x1f), L(0x48), L(0x15),
|
||||
L(0xb0), L(0xe8), L(0x9e), L(0xf2), L(0x31), L(0x59), L(0xde), L(0x34), L(0xb4), L(0x5b), L(0xe5), L(0xe0), L(0x9),
|
||||
L(0x11), L(0x30), L(0xc2), L(0x88), L(0x5b), L(0x7c), L(0x5d), L(0x14), L(0x13), L(0x6f), L(0x23), L(0xa9), L(0xa),
|
||||
L(0xbc), L(0x2d), L(0x23), L(0xbe), L(0xd9), L(0xed), L(0x75), L(0x4), L(0x6c), L(0x99), L(0xdf), L(0xfd), L(0x70),
|
||||
L(0x66), L(0xe6), L(0xee), L(0xd9), L(0xb1), L(0x9e), L(0x6e), L(0x83), L(0x59), L(0xd5), L(0xd4), L(0x80), L(0x59),
|
||||
L(0x98), L(0x77), L(0x89), L(0x43), L(0x38), L(0xc9), L(0xaf), L(0x30), L(0x32), L(0x9a), L(0x20), L(0x1b), L(0x46),
|
||||
L(0x3d), L(0x67), L(0x6e), L(0xd7), L(0x72), L(0x9e), L(0x4e), L(0x21), L(0x4f), L(0xc6), L(0xe0), L(0xd4), L(0x7b),
|
||||
L(0x4), L(0x8d), L(0xa5), L(0x3), L(0xf6), L(0x5), L(0x9b), L(0x6b), L(0xdc), L(0x2a), L(0x93), L(0x77), L(0x28),
|
||||
L(0xfd), L(0xb4), L(0x62), L(0xda), L(0x20), L(0xe7), L(0x1f), L(0xab), L(0x6b), L(0x51), L(0x43), L(0x39), L(0x2f),
|
||||
L(0xa0), L(0x92), L(0x1), L(0x6c), L(0x75), L(0x3e), L(0xf4), L(0x35), L(0xfd), L(0x43), L(0x2e), L(0xf7), L(0xa4),
|
||||
L(0x75), L(0xda), L(0xea), L(0x9b), L(0xa),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-shifts.input",
|
||||
.want = "huffman-shifts.{s}.expect",
|
||||
.want_no_input = "huffman-shifts.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L('1'), L('0'), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258),
|
||||
M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258),
|
||||
M(2, 258), M(2, 76), L(0xd), L(0xa), L('2'), L('3'), M(2, 258), M(2, 258),
|
||||
M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 256),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-text-shift.input",
|
||||
.want = "huffman-text-shift.{s}.expect",
|
||||
.want_no_input = "huffman-text-shift.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L('/'), L('/'), L('C'), L('o'), L('p'), L('y'), L('r'), L('i'),
|
||||
L('g'), L('h'), L('t'), L('2'), L('0'), L('0'), L('9'), L('T'),
|
||||
L('h'), L('G'), L('o'), L('A'), L('u'), L('t'), L('h'), L('o'),
|
||||
L('r'), L('.'), L('A'), L('l'), L('l'), M(23, 5), L('r'), L('r'),
|
||||
L('v'), L('d'), L('.'), L(0xd), L(0xa), L('/'), L('/'), L('U'),
|
||||
L('o'), L('f'), L('t'), L('h'), L('i'), L('o'), L('u'), L('r'),
|
||||
L('c'), L('c'), L('o'), L('d'), L('i'), L('g'), L('o'), L('v'),
|
||||
L('r'), L('n'), L('d'), L('b'), L('y'), L('B'), L('S'), L('D'),
|
||||
L('-'), L('t'), L('y'), L('l'), M(33, 4), L('l'), L('i'), L('c'),
|
||||
L('n'), L('t'), L('h'), L('t'), L('c'), L('n'), L('b'), L('f'),
|
||||
L('o'), L('u'), L('n'), L('d'), L('i'), L('n'), L('t'), L('h'),
|
||||
L('L'), L('I'), L('C'), L('E'), L('N'), L('S'), L('E'), L('f'),
|
||||
L('i'), L('l'), L('.'), L(0xd), L(0xa), L(0xd), L(0xa), L('p'),
|
||||
L('c'), L('k'), L('g'), L('m'), L('i'), L('n'), M(11, 4), L('i'),
|
||||
L('m'), L('p'), L('o'), L('r'), L('t'), L('"'), L('o'), L('"'),
|
||||
M(13, 4), L('f'), L('u'), L('n'), L('c'), L('m'), L('i'), L('n'),
|
||||
L('('), L(')'), L('{'), L(0xd), L(0xa), L(0x9), L('v'), L('r'),
|
||||
L('b'), L('='), L('m'), L('k'), L('('), L('['), L(']'), L('b'),
|
||||
L('y'), L('t'), L(','), L('6'), L('5'), L('5'), L('3'), L('5'),
|
||||
L(')'), L(0xd), L(0xa), L(0x9), L('f'), L(','), L('_'), L(':'),
|
||||
L('='), L('o'), L('.'), L('C'), L('r'), L('t'), L('('), L('"'),
|
||||
L('h'), L('u'), L('f'), L('f'), L('m'), L('n'), L('-'), L('n'),
|
||||
L('u'), L('l'), L('l'), L('-'), L('m'), L('x'), L('.'), L('i'),
|
||||
L('n'), L('"'), M(34, 5), L('.'), L('W'), L('r'), L('i'), L('t'),
|
||||
L('('), L('b'), L(')'), L(0xd), L(0xa), L('}'), L(0xd), L(0xa),
|
||||
L('A'), L('B'), L('C'), L('D'), L('E'), L('F'), L('G'), L('H'),
|
||||
L('I'), L('J'), L('K'), L('L'), L('M'), L('N'), L('O'), L('P'),
|
||||
L('Q'), L('R'), L('S'), L('T'), L('U'), L('V'), L('X'), L('x'),
|
||||
L('y'), L('z'), L('!'), L('"'), L('#'), L(0xc2), L(0xa4), L('%'),
|
||||
L('&'), L('/'), L('?'), L('"'),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-text.input",
|
||||
.want = "huffman-text.{s}.expect",
|
||||
.want_no_input = "huffman-text.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L('/'), L('/'), L(' '), L('z'), L('i'), L('g'), L(' '), L('v'),
|
||||
L('0'), L('.'), L('1'), L('0'), L('.'), L('0'), L(0xa), L('/'),
|
||||
L('/'), L(' '), L('c'), L('r'), L('e'), L('a'), L('t'), L('e'),
|
||||
L(' '), L('a'), L(' '), L('f'), L('i'), L('l'), L('e'), M(5, 4),
|
||||
L('l'), L('e'), L('d'), L(' '), L('w'), L('i'), L('t'), L('h'),
|
||||
L(' '), L('0'), L('x'), L('0'), L('0'), L(0xa), L('c'), L('o'),
|
||||
L('n'), L('s'), L('t'), L(' '), L('s'), L('t'), L('d'), L(' '),
|
||||
L('='), L(' '), L('@'), L('i'), L('m'), L('p'), L('o'), L('r'),
|
||||
L('t'), L('('), L('"'), L('s'), L('t'), L('d'), L('"'), L(')'),
|
||||
L(';'), L(0xa), L(0xa), L('p'), L('u'), L('b'), L(' '), L('f'),
|
||||
L('n'), L(' '), L('m'), L('a'), L('i'), L('n'), L('('), L(')'),
|
||||
L(' '), L('!'), L('v'), L('o'), L('i'), L('d'), L(' '), L('{'),
|
||||
L(0xa), L(' '), L(' '), L(' '), L(' '), L('v'), L('a'), L('r'),
|
||||
L(' '), L('b'), L(' '), L('='), L(' '), L('['), L('1'), L(']'),
|
||||
L('u'), L('8'), L('{'), L('0'), L('}'), L(' '), L('*'), L('*'),
|
||||
L(' '), L('6'), L('5'), L('5'), L('3'), L('5'), L(';'), M(31, 5),
|
||||
M(86, 6), L('f'), L(' '), L('='), L(' '), L('t'), L('r'), L('y'),
|
||||
M(94, 4), L('.'), L('f'), L('s'), L('.'), L('c'), L('w'), L('d'),
|
||||
L('('), L(')'), L('.'), M(144, 6), L('F'), L('i'), L('l'), L('e'),
|
||||
L('('), M(43, 5), M(1, 4), L('"'), L('h'), L('u'), L('f'), L('f'),
|
||||
L('m'), L('a'), L('n'), L('-'), L('n'), L('u'), L('l'), L('l'),
|
||||
L('-'), L('m'), L('a'), L('x'), L('.'), L('i'), L('n'), L('"'),
|
||||
L(','), M(31, 9), L('.'), L('{'), L(' '), L('.'), L('r'), L('e'),
|
||||
L('a'), L('d'), M(79, 5), L('u'), L('e'), L(' '), L('}'), M(27, 6),
|
||||
L(')'), M(108, 6), L('d'), L('e'), L('f'), L('e'), L('r'), L(' '),
|
||||
L('f'), L('.'), L('c'), L('l'), L('o'), L('s'), L('e'), L('('),
|
||||
M(183, 4), M(22, 4), L('_'), M(124, 7), L('f'), L('.'), L('w'), L('r'),
|
||||
L('i'), L('t'), L('e'), L('A'), L('l'), L('l'), L('('), L('b'),
|
||||
L('['), L('0'), L('.'), L('.'), L(']'), L(')'), L(';'), L(0xa),
|
||||
L('}'), L(0xa),
|
||||
},
|
||||
},
|
||||
TestCase{
|
||||
.input = "huffman-zero.input",
|
||||
.want = "huffman-zero.{s}.expect",
|
||||
.want_no_input = "huffman-zero.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{ L(0x30), ml, M(1, 49) },
|
||||
},
|
||||
TestCase{
|
||||
.input = "",
|
||||
.want = "",
|
||||
.want_no_input = "null-long-match.{s}.expect-noinput",
|
||||
.tokens = &[_]Token{
|
||||
L(0x0), ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
|
||||
ml, ml, ml, M(1, 8),
|
||||
},
|
||||
},
|
||||
};
|
||||
};
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1 +0,0 @@
|
||||
3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920962829254091715364367892590360011330530548820466521384146951941511609433057270365759591953092186117381932611793105118548074462379962749567351885752724891227938183011949129833673362440656643086021394946395224737190702179860943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901224953430146549585371050792279689258923542019956112129021960864034418159813629774771309960518707211349999998372978049951059731732816096318595024459455346908302642522308253344685035261931188171010003137838752886587533208381420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909216420198938095257201065485863278865936153381827968230301952035301852968995773622599413891249721775283479131515574857242454150695950829533116861727855889075098381754637464939319255060400927701671139009848824012858361603563707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104752162056966024058038150193511253382430035587640247496473263914199272604269922796782354781636009341721641219924586315030286182974555706749838505494588586926995690927210797509302955321165344987202755960236480665499119881834797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548161361157352552133475741849468438523323907394143334547762416862518983569485562099219222184272550254256887671790494601653466804988627232791786085784383827967976681454100953883786360950680064225125205117392984896084128488626945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645995813390478027590099465764078951269468398352595709825822620522489407726719478268482601476990902640136394437455305068203496252451749399651431429809190659250937221696461515709858387410597885959772975498930161753928468138268683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244136549762780797715691435997700129616089441694868555848406353422072225828488648158456028506016842739452267467678895252138522549954666727823986456596116354886230577456498035593634568174324112515076069479451096596094025228879710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821682998948722658804857564014270477555132379641451523746234364542858444795265867821051141354735739523113427166102135969536231442952484937187110145765403590279934403742007310578539062198387447808478489683321445713868751943506430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675142691239748940907186494231961567945208095146550225231603881930142093762137855956638937787083039069792077346722182562599661501421503068038447734549202605414665925201497442850732518666002132434088190710486331734649651453905796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007230558763176359421873125147120532928191826186125867321579198414848829164470609575270695722091756711672291098169091528017350671274858322287183520935396572512108357915136988209144421006751033467110314126711136990865851639831501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064204675259070915481416549859461637180
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,4 +0,0 @@
|
||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
ř‹–vH
|
||||
…”%€ŻÂţŤč ë†É·ĹŢę}‹ç>Úß˙lsŢĚçmŤIGH°čžň1YŢ4´[ĺŕ 0Â<30>[|]o#©
|
||||
Ľ-#ľŮíul™ßýpfćîٱžn<C5BE>YŐÔ€Y<E282AC>w‰C8ÉŻ02š F=gn×ržN!OĆŕÔ{ŤĄö›kÜ*“w(ý´bÚ ç«kQC9/ ’lu>ô5ýC.÷¤uÚę›
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,2 +0,0 @@
|
||||
101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010
|
||||
232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,14 +0,0 @@
|
||||
//Copyright2009ThGoAuthor.Allrightrrvd.
|
||||
//UofthiourccodigovrndbyBSD-tyl
|
||||
//licnthtcnbfoundinthLICENSEfil.
|
||||
|
||||
pckgmin
|
||||
|
||||
import"o"
|
||||
|
||||
funcmin(){
|
||||
vrb=mk([]byt,65535)
|
||||
f,_:=o.Crt("huffmn-null-mx.in")
|
||||
f.Writ(b)
|
||||
}
|
||||
ABCDEFGHIJKLMNOPQRSTUVXxyz!"#¤%&/?"
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,14 +0,0 @@
|
||||
// zig v0.10.0
|
||||
// create a file filled with 0x00
|
||||
const std = @import("std");
|
||||
|
||||
pub fn main() !void {
|
||||
var b = [1]u8{0} ** 65535;
|
||||
const f = try std.fs.cwd().createFile(
|
||||
"huffman-null-max.in",
|
||||
.{ .read = true },
|
||||
);
|
||||
defer f.close();
|
||||
|
||||
_ = try f.writeAll(b[0..]);
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1 +0,0 @@
|
||||
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,66 +0,0 @@
|
||||
const deflate = @import("flate/deflate.zig");
|
||||
const inflate = @import("flate/inflate.zig");
|
||||
|
||||
/// Decompress compressed data from reader and write plain data to the writer.
|
||||
pub fn decompress(reader: anytype, writer: anytype) !void {
|
||||
try inflate.decompress(.gzip, reader, writer);
|
||||
}
|
||||
|
||||
/// Decompressor type
|
||||
pub fn Decompressor(comptime ReaderType: type) type {
|
||||
return inflate.Decompressor(.gzip, ReaderType);
|
||||
}
|
||||
|
||||
/// Create Decompressor which will read compressed data from reader.
|
||||
pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) {
|
||||
return inflate.decompressor(.gzip, reader);
|
||||
}
|
||||
|
||||
/// Compression level, trades between speed and compression size.
|
||||
pub const Options = deflate.Options;
|
||||
|
||||
/// Compress plain data from reader and write compressed data to the writer.
|
||||
pub fn compress(reader: anytype, writer: anytype, options: Options) !void {
|
||||
try deflate.compress(.gzip, reader, writer, options);
|
||||
}
|
||||
|
||||
/// Compressor type
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.Compressor(.gzip, WriterType);
|
||||
}
|
||||
|
||||
/// Create Compressor which outputs compressed data to the writer.
|
||||
pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) {
|
||||
return try deflate.compressor(.gzip, writer, options);
|
||||
}
|
||||
|
||||
/// Huffman only compression. Without Lempel-Ziv match searching. Faster
|
||||
/// compression, less memory requirements but bigger compressed sizes.
|
||||
pub const huffman = struct {
|
||||
pub fn compress(reader: anytype, writer: anytype) !void {
|
||||
try deflate.huffman.compress(.gzip, reader, writer);
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.huffman.Compressor(.gzip, WriterType);
|
||||
}
|
||||
|
||||
pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) {
|
||||
return deflate.huffman.compressor(.gzip, writer);
|
||||
}
|
||||
};
|
||||
|
||||
// No compression store only. Compressed size is slightly bigger than plain.
|
||||
pub const store = struct {
|
||||
pub fn compress(reader: anytype, writer: anytype) !void {
|
||||
try deflate.store.compress(.gzip, reader, writer);
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.store.Compressor(.gzip, WriterType);
|
||||
}
|
||||
|
||||
pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) {
|
||||
return deflate.store.compressor(.gzip, writer);
|
||||
}
|
||||
};
|
||||
@@ -1,101 +0,0 @@
|
||||
const deflate = @import("flate/deflate.zig");
|
||||
const inflate = @import("flate/inflate.zig");
|
||||
|
||||
/// Decompress compressed data from reader and write plain data to the writer.
|
||||
pub fn decompress(reader: anytype, writer: anytype) !void {
|
||||
try inflate.decompress(.zlib, reader, writer);
|
||||
}
|
||||
|
||||
/// Decompressor type
|
||||
pub fn Decompressor(comptime ReaderType: type) type {
|
||||
return inflate.Decompressor(.zlib, ReaderType);
|
||||
}
|
||||
|
||||
/// Create Decompressor which will read compressed data from reader.
|
||||
pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) {
|
||||
return inflate.decompressor(.zlib, reader);
|
||||
}
|
||||
|
||||
/// Compression level, trades between speed and compression size.
|
||||
pub const Options = deflate.Options;
|
||||
|
||||
/// Compress plain data from reader and write compressed data to the writer.
|
||||
pub fn compress(reader: anytype, writer: anytype, options: Options) !void {
|
||||
try deflate.compress(.zlib, reader, writer, options);
|
||||
}
|
||||
|
||||
/// Compressor type
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.Compressor(.zlib, WriterType);
|
||||
}
|
||||
|
||||
/// Create Compressor which outputs compressed data to the writer.
|
||||
pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) {
|
||||
return try deflate.compressor(.zlib, writer, options);
|
||||
}
|
||||
|
||||
/// Huffman only compression. Without Lempel-Ziv match searching. Faster
|
||||
/// compression, less memory requirements but bigger compressed sizes.
|
||||
pub const huffman = struct {
|
||||
pub fn compress(reader: anytype, writer: anytype) !void {
|
||||
try deflate.huffman.compress(.zlib, reader, writer);
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.huffman.Compressor(.zlib, WriterType);
|
||||
}
|
||||
|
||||
pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) {
|
||||
return deflate.huffman.compressor(.zlib, writer);
|
||||
}
|
||||
};
|
||||
|
||||
// No compression store only. Compressed size is slightly bigger than plain.
|
||||
pub const store = struct {
|
||||
pub fn compress(reader: anytype, writer: anytype) !void {
|
||||
try deflate.store.compress(.zlib, reader, writer);
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.store.Compressor(.zlib, WriterType);
|
||||
}
|
||||
|
||||
pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) {
|
||||
return deflate.store.compressor(.zlib, writer);
|
||||
}
|
||||
};
|
||||
|
||||
test "should not overshoot" {
|
||||
const std = @import("std");
|
||||
|
||||
// Compressed zlib data with extra 4 bytes at the end.
|
||||
const data = [_]u8{
|
||||
0x78, 0x9c, 0x73, 0xce, 0x2f, 0xa8, 0x2c, 0xca, 0x4c, 0xcf, 0x28, 0x51, 0x08, 0xcf, 0xcc, 0xc9,
|
||||
0x49, 0xcd, 0x55, 0x28, 0x4b, 0xcc, 0x53, 0x08, 0x4e, 0xce, 0x48, 0xcc, 0xcc, 0xd6, 0x51, 0x08,
|
||||
0xce, 0xcc, 0x4b, 0x4f, 0x2c, 0xc8, 0x2f, 0x4a, 0x55, 0x30, 0xb4, 0xb4, 0x34, 0xd5, 0xb5, 0x34,
|
||||
0x03, 0x00, 0x8b, 0x61, 0x0f, 0xa4, 0x52, 0x5a, 0x94, 0x12,
|
||||
};
|
||||
|
||||
var stream = std.io.fixedBufferStream(data[0..]);
|
||||
const reader = stream.reader();
|
||||
|
||||
var dcp = decompressor(reader);
|
||||
var out: [128]u8 = undefined;
|
||||
|
||||
// Decompress
|
||||
var n = try dcp.reader().readAll(out[0..]);
|
||||
|
||||
// Expected decompressed data
|
||||
try std.testing.expectEqual(46, n);
|
||||
try std.testing.expectEqualStrings("Copyright Willem van Schaik, Singapore 1995-96", out[0..n]);
|
||||
|
||||
// Decompressor don't overshoot underlying reader.
|
||||
// It is leaving it at the end of compressed data chunk.
|
||||
try std.testing.expectEqual(data.len - 4, stream.getPos());
|
||||
try std.testing.expectEqual(0, dcp.unreadBytes());
|
||||
|
||||
// 4 bytes after compressed chunk are available in reader.
|
||||
n = try reader.readAll(out[0..]);
|
||||
try std.testing.expectEqual(n, 4);
|
||||
try std.testing.expectEqualSlices(u8, data[data.len - 4 .. data.len], out[0..n]);
|
||||
}
|
||||
@@ -89,7 +89,7 @@ pub fn init(input: *Reader, buffer: []u8, options: Options) Decompress {
|
||||
.stream = stream,
|
||||
.rebase = rebase,
|
||||
.discard = discard,
|
||||
.readVec = Reader.indirectReadVec,
|
||||
.readVec = readVec,
|
||||
},
|
||||
.buffer = buffer,
|
||||
.seek = 0,
|
||||
@@ -109,10 +109,24 @@ fn rebase(r: *Reader, capacity: usize) Reader.RebaseError!void {
|
||||
r.seek -= discard_n;
|
||||
}
|
||||
|
||||
fn discard(r: *Reader, limit: Limit) Reader.Error!usize {
|
||||
r.rebase(zstd.block_size_max) catch unreachable;
|
||||
var d: Writer.Discarding = .init(r.buffer);
|
||||
const n = r.stream(&d.writer, limit) catch |err| switch (err) {
|
||||
/// This could be improved so that when an amount is discarded that includes an
|
||||
/// entire frame, skip decoding that frame.
|
||||
fn discard(r: *Reader, limit: std.Io.Limit) Reader.Error!usize {
|
||||
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
|
||||
r.rebase(d.window_len) catch unreachable;
|
||||
var writer: Writer = .{
|
||||
.vtable = &.{
|
||||
.drain = std.Io.Writer.Discarding.drain,
|
||||
.sendFile = std.Io.Writer.Discarding.sendFile,
|
||||
},
|
||||
.buffer = r.buffer,
|
||||
.end = r.end,
|
||||
};
|
||||
defer {
|
||||
r.end = writer.end;
|
||||
r.seek = r.end;
|
||||
}
|
||||
const n = r.stream(&writer, limit) catch |err| switch (err) {
|
||||
error.WriteFailed => unreachable,
|
||||
error.ReadFailed => return error.ReadFailed,
|
||||
error.EndOfStream => return error.EndOfStream,
|
||||
@@ -121,6 +135,23 @@ fn discard(r: *Reader, limit: Limit) Reader.Error!usize {
|
||||
return n;
|
||||
}
|
||||
|
||||
fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize {
|
||||
_ = data;
|
||||
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
|
||||
assert(r.seek == r.end);
|
||||
r.rebase(d.window_len) catch unreachable;
|
||||
var writer: Writer = .{
|
||||
.buffer = r.buffer,
|
||||
.end = r.end,
|
||||
.vtable = &.{ .drain = Writer.fixedDrain },
|
||||
};
|
||||
r.end += r.vtable.stream(r, &writer, .limited(writer.buffer.len - writer.end)) catch |err| switch (err) {
|
||||
error.WriteFailed => unreachable,
|
||||
else => |e| return e,
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
fn stream(r: *Reader, w: *Writer, limit: Limit) Reader.StreamError!usize {
|
||||
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
|
||||
const in = d.input;
|
||||
|
||||
@@ -2019,10 +2019,14 @@ pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 {
|
||||
/// This function is to make it handy to comment out the return and make it
|
||||
/// into a crash when working on this file.
|
||||
pub fn bad() error{InvalidDebugInfo} {
|
||||
if (debug_debug_mode) @panic("bad dwarf");
|
||||
invalidDebugInfoDetected();
|
||||
return error.InvalidDebugInfo;
|
||||
}
|
||||
|
||||
fn invalidDebugInfoDetected() void {
|
||||
if (debug_debug_mode) @panic("bad dwarf");
|
||||
}
|
||||
|
||||
fn missing() error{MissingDebugInfo} {
|
||||
if (debug_debug_mode) @panic("missing dwarf");
|
||||
return error.MissingDebugInfo;
|
||||
@@ -2235,21 +2239,23 @@ pub const ElfModule = struct {
|
||||
|
||||
const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size);
|
||||
sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: {
|
||||
var section_stream = std.io.fixedBufferStream(section_bytes);
|
||||
const section_reader = section_stream.reader();
|
||||
const chdr = section_reader.readStruct(elf.Chdr) catch continue;
|
||||
var section_reader: std.Io.Reader = .fixed(section_bytes);
|
||||
const chdr = section_reader.takeStruct(elf.Chdr, endian) catch continue;
|
||||
if (chdr.ch_type != .ZLIB) continue;
|
||||
|
||||
var zlib_stream = std.compress.zlib.decompressor(section_reader);
|
||||
|
||||
const decompressed_section = try gpa.alloc(u8, chdr.ch_size);
|
||||
errdefer gpa.free(decompressed_section);
|
||||
|
||||
const read = zlib_stream.reader().readAll(decompressed_section) catch continue;
|
||||
assert(read == decompressed_section.len);
|
||||
|
||||
var decompress: std.compress.flate.Decompress = .init(§ion_reader, .zlib, &.{});
|
||||
var decompressed_section: std.ArrayListUnmanaged(u8) = .empty;
|
||||
defer decompressed_section.deinit(gpa);
|
||||
decompress.reader.appendRemainingUnlimited(gpa, null, &decompressed_section, std.compress.flate.history_len) catch {
|
||||
invalidDebugInfoDetected();
|
||||
continue;
|
||||
};
|
||||
if (chdr.ch_size != decompressed_section.items.len) {
|
||||
invalidDebugInfoDetected();
|
||||
continue;
|
||||
}
|
||||
break :blk .{
|
||||
.data = decompressed_section,
|
||||
.data = try decompressed_section.toOwnedSlice(gpa),
|
||||
.virtual_address = shdr.sh_addr,
|
||||
.owned = true,
|
||||
};
|
||||
|
||||
@@ -1105,22 +1105,6 @@ pub fn deprecatedWriter(file: File) DeprecatedWriter {
|
||||
return .{ .context = file };
|
||||
}
|
||||
|
||||
/// Deprecated in favor of `Reader` and `Writer`.
|
||||
pub const SeekableStream = io.SeekableStream(
|
||||
File,
|
||||
SeekError,
|
||||
GetSeekPosError,
|
||||
seekTo,
|
||||
seekBy,
|
||||
getPos,
|
||||
getEndPos,
|
||||
);
|
||||
|
||||
/// Deprecated in favor of `Reader` and `Writer`.
|
||||
pub fn seekableStream(file: File) SeekableStream {
|
||||
return .{ .context = file };
|
||||
}
|
||||
|
||||
/// Memoizes key information about a file handle such as:
|
||||
/// * The size from calling stat, or the error that occurred therein.
|
||||
/// * The current seek position.
|
||||
@@ -1321,7 +1305,7 @@ pub const Reader = struct {
|
||||
}
|
||||
}
|
||||
|
||||
fn readVec(io_reader: *std.Io.Reader, data: []const []u8) std.Io.Reader.Error!usize {
|
||||
fn readVec(io_reader: *std.Io.Reader, data: [][]u8) std.Io.Reader.Error!usize {
|
||||
const r: *Reader = @alignCast(@fieldParentPtr("interface", io_reader));
|
||||
switch (r.mode) {
|
||||
.positional, .positional_reading => {
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
const adler = @import("hash/adler.zig");
|
||||
pub const Adler32 = adler.Adler32;
|
||||
pub const Adler32 = @import("hash/Adler32.zig");
|
||||
|
||||
const auto_hash = @import("hash/auto_hash.zig");
|
||||
pub const autoHash = auto_hash.autoHash;
|
||||
@@ -116,7 +115,7 @@ test int {
|
||||
}
|
||||
|
||||
test {
|
||||
_ = adler;
|
||||
_ = Adler32;
|
||||
_ = auto_hash;
|
||||
_ = crc;
|
||||
_ = fnv;
|
||||
|
||||
117
lib/std/hash/Adler32.zig
Normal file
117
lib/std/hash/Adler32.zig
Normal file
@@ -0,0 +1,117 @@
|
||||
//! https://tools.ietf.org/html/rfc1950#section-9
|
||||
//! https://github.com/madler/zlib/blob/master/adler32.c
|
||||
|
||||
const Adler32 = @This();
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
|
||||
adler: u32 = 1,
|
||||
|
||||
pub fn permute(state: u32, input: []const u8) u32 {
|
||||
const base = 65521;
|
||||
const nmax = 5552;
|
||||
|
||||
var s1 = state & 0xffff;
|
||||
var s2 = (state >> 16) & 0xffff;
|
||||
|
||||
if (input.len == 1) {
|
||||
s1 +%= input[0];
|
||||
if (s1 >= base) {
|
||||
s1 -= base;
|
||||
}
|
||||
s2 +%= s1;
|
||||
if (s2 >= base) {
|
||||
s2 -= base;
|
||||
}
|
||||
} else if (input.len < 16) {
|
||||
for (input) |b| {
|
||||
s1 +%= b;
|
||||
s2 +%= s1;
|
||||
}
|
||||
if (s1 >= base) {
|
||||
s1 -= base;
|
||||
}
|
||||
|
||||
s2 %= base;
|
||||
} else {
|
||||
const n = nmax / 16; // note: 16 | nmax
|
||||
|
||||
var i: usize = 0;
|
||||
|
||||
while (i + nmax <= input.len) {
|
||||
var rounds: usize = 0;
|
||||
while (rounds < n) : (rounds += 1) {
|
||||
comptime var j: usize = 0;
|
||||
inline while (j < 16) : (j += 1) {
|
||||
s1 +%= input[i + j];
|
||||
s2 +%= s1;
|
||||
}
|
||||
i += 16;
|
||||
}
|
||||
|
||||
s1 %= base;
|
||||
s2 %= base;
|
||||
}
|
||||
|
||||
if (i < input.len) {
|
||||
while (i + 16 <= input.len) : (i += 16) {
|
||||
comptime var j: usize = 0;
|
||||
inline while (j < 16) : (j += 1) {
|
||||
s1 +%= input[i + j];
|
||||
s2 +%= s1;
|
||||
}
|
||||
}
|
||||
while (i < input.len) : (i += 1) {
|
||||
s1 +%= input[i];
|
||||
s2 +%= s1;
|
||||
}
|
||||
|
||||
s1 %= base;
|
||||
s2 %= base;
|
||||
}
|
||||
}
|
||||
|
||||
return s1 | (s2 << 16);
|
||||
}
|
||||
|
||||
pub fn update(a: *Adler32, input: []const u8) void {
|
||||
a.adler = permute(a.adler, input);
|
||||
}
|
||||
|
||||
pub fn hash(input: []const u8) u32 {
|
||||
return permute(1, input);
|
||||
}
|
||||
|
||||
test "sanity" {
|
||||
try testing.expectEqual(@as(u32, 0x620062), hash("a"));
|
||||
try testing.expectEqual(@as(u32, 0xbc002ed), hash("example"));
|
||||
}
|
||||
|
||||
test "long" {
|
||||
const long1 = [_]u8{1} ** 1024;
|
||||
try testing.expectEqual(@as(u32, 0x06780401), hash(long1[0..]));
|
||||
|
||||
const long2 = [_]u8{1} ** 1025;
|
||||
try testing.expectEqual(@as(u32, 0x0a7a0402), hash(long2[0..]));
|
||||
}
|
||||
|
||||
test "very long" {
|
||||
const long = [_]u8{1} ** 5553;
|
||||
try testing.expectEqual(@as(u32, 0x707f15b2), hash(long[0..]));
|
||||
}
|
||||
|
||||
test "very long with variation" {
|
||||
const long = comptime blk: {
|
||||
@setEvalBranchQuota(7000);
|
||||
var result: [6000]u8 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < result.len) : (i += 1) {
|
||||
result[i] = @as(u8, @truncate(i));
|
||||
}
|
||||
|
||||
break :blk result;
|
||||
};
|
||||
|
||||
try testing.expectEqual(@as(u32, 0x5af38d6e), hash(long[0..]));
|
||||
}
|
||||
@@ -1,134 +0,0 @@
|
||||
// Adler32 checksum.
|
||||
//
|
||||
// https://tools.ietf.org/html/rfc1950#section-9
|
||||
// https://github.com/madler/zlib/blob/master/adler32.c
|
||||
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
|
||||
pub const Adler32 = struct {
|
||||
const base = 65521;
|
||||
const nmax = 5552;
|
||||
|
||||
adler: u32,
|
||||
|
||||
pub fn init() Adler32 {
|
||||
return Adler32{ .adler = 1 };
|
||||
}
|
||||
|
||||
// This fast variant is taken from zlib. It reduces the required modulos and unrolls longer
|
||||
// buffer inputs and should be much quicker.
|
||||
pub fn update(self: *Adler32, input: []const u8) void {
|
||||
var s1 = self.adler & 0xffff;
|
||||
var s2 = (self.adler >> 16) & 0xffff;
|
||||
|
||||
if (input.len == 1) {
|
||||
s1 +%= input[0];
|
||||
if (s1 >= base) {
|
||||
s1 -= base;
|
||||
}
|
||||
s2 +%= s1;
|
||||
if (s2 >= base) {
|
||||
s2 -= base;
|
||||
}
|
||||
} else if (input.len < 16) {
|
||||
for (input) |b| {
|
||||
s1 +%= b;
|
||||
s2 +%= s1;
|
||||
}
|
||||
if (s1 >= base) {
|
||||
s1 -= base;
|
||||
}
|
||||
|
||||
s2 %= base;
|
||||
} else {
|
||||
const n = nmax / 16; // note: 16 | nmax
|
||||
|
||||
var i: usize = 0;
|
||||
|
||||
while (i + nmax <= input.len) {
|
||||
var rounds: usize = 0;
|
||||
while (rounds < n) : (rounds += 1) {
|
||||
comptime var j: usize = 0;
|
||||
inline while (j < 16) : (j += 1) {
|
||||
s1 +%= input[i + j];
|
||||
s2 +%= s1;
|
||||
}
|
||||
i += 16;
|
||||
}
|
||||
|
||||
s1 %= base;
|
||||
s2 %= base;
|
||||
}
|
||||
|
||||
if (i < input.len) {
|
||||
while (i + 16 <= input.len) : (i += 16) {
|
||||
comptime var j: usize = 0;
|
||||
inline while (j < 16) : (j += 1) {
|
||||
s1 +%= input[i + j];
|
||||
s2 +%= s1;
|
||||
}
|
||||
}
|
||||
while (i < input.len) : (i += 1) {
|
||||
s1 +%= input[i];
|
||||
s2 +%= s1;
|
||||
}
|
||||
|
||||
s1 %= base;
|
||||
s2 %= base;
|
||||
}
|
||||
}
|
||||
|
||||
self.adler = s1 | (s2 << 16);
|
||||
}
|
||||
|
||||
pub fn final(self: *Adler32) u32 {
|
||||
return self.adler;
|
||||
}
|
||||
|
||||
pub fn hash(input: []const u8) u32 {
|
||||
var c = Adler32.init();
|
||||
c.update(input);
|
||||
return c.final();
|
||||
}
|
||||
};
|
||||
|
||||
test "adler32 sanity" {
|
||||
try testing.expectEqual(@as(u32, 0x620062), Adler32.hash("a"));
|
||||
try testing.expectEqual(@as(u32, 0xbc002ed), Adler32.hash("example"));
|
||||
}
|
||||
|
||||
test "adler32 long" {
|
||||
const long1 = [_]u8{1} ** 1024;
|
||||
try testing.expectEqual(@as(u32, 0x06780401), Adler32.hash(long1[0..]));
|
||||
|
||||
const long2 = [_]u8{1} ** 1025;
|
||||
try testing.expectEqual(@as(u32, 0x0a7a0402), Adler32.hash(long2[0..]));
|
||||
}
|
||||
|
||||
test "adler32 very long" {
|
||||
const long = [_]u8{1} ** 5553;
|
||||
try testing.expectEqual(@as(u32, 0x707f15b2), Adler32.hash(long[0..]));
|
||||
}
|
||||
|
||||
test "adler32 very long with variation" {
|
||||
const long = comptime blk: {
|
||||
@setEvalBranchQuota(7000);
|
||||
var result: [6000]u8 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < result.len) : (i += 1) {
|
||||
result[i] = @as(u8, @truncate(i));
|
||||
}
|
||||
|
||||
break :blk result;
|
||||
};
|
||||
|
||||
try testing.expectEqual(@as(u32, 0x5af38d6e), std.hash.Adler32.hash(long[0..]));
|
||||
}
|
||||
|
||||
const verify = @import("verify.zig");
|
||||
|
||||
test "adler32 iterative" {
|
||||
try verify.iterativeApi(Adler32);
|
||||
}
|
||||
@@ -45,7 +45,7 @@ pub fn smhasher(comptime hash_fn: anytype) u32 {
|
||||
|
||||
pub fn iterativeApi(comptime Hash: anytype) !void {
|
||||
// Sum(1..32) = 528
|
||||
var buf: [528]u8 = [_]u8{0} ** 528;
|
||||
var buf: [528]u8 = @splat(0);
|
||||
var len: usize = 0;
|
||||
const seed = 0;
|
||||
|
||||
|
||||
@@ -405,13 +405,8 @@ pub const RequestTransfer = union(enum) {
|
||||
|
||||
/// The decompressor for response messages.
|
||||
pub const Compression = union(enum) {
|
||||
pub const DeflateDecompressor = std.compress.zlib.Decompressor(Request.TransferReader);
|
||||
pub const GzipDecompressor = std.compress.gzip.Decompressor(Request.TransferReader);
|
||||
// https://github.com/ziglang/zig/issues/18937
|
||||
//pub const ZstdDecompressor = std.compress.zstd.DecompressStream(Request.TransferReader, .{});
|
||||
|
||||
deflate: DeflateDecompressor,
|
||||
gzip: GzipDecompressor,
|
||||
//deflate: std.compress.flate.Decompress,
|
||||
//gzip: std.compress.flate.Decompress,
|
||||
// https://github.com/ziglang/zig/issues/18937
|
||||
//zstd: ZstdDecompressor,
|
||||
none: void,
|
||||
@@ -1079,12 +1074,10 @@ pub const Request = struct {
|
||||
switch (req.response.transfer_compression) {
|
||||
.identity => req.response.compression = .none,
|
||||
.compress, .@"x-compress" => return error.CompressionUnsupported,
|
||||
.deflate => req.response.compression = .{
|
||||
.deflate = std.compress.zlib.decompressor(req.transferReader()),
|
||||
},
|
||||
.gzip, .@"x-gzip" => req.response.compression = .{
|
||||
.gzip = std.compress.gzip.decompressor(req.transferReader()),
|
||||
},
|
||||
// I'm about to upstream my http.Client rewrite
|
||||
.deflate => return error.CompressionUnsupported,
|
||||
// I'm about to upstream my http.Client rewrite
|
||||
.gzip, .@"x-gzip" => return error.CompressionUnsupported,
|
||||
// https://github.com/ziglang/zig/issues/18937
|
||||
//.zstd => req.response.compression = .{
|
||||
// .zstd = std.compress.zstd.decompressStream(req.client.allocator, req.transferReader()),
|
||||
@@ -1110,8 +1103,9 @@ pub const Request = struct {
|
||||
/// Reads data from the response body. Must be called after `wait`.
|
||||
pub fn read(req: *Request, buffer: []u8) ReadError!usize {
|
||||
const out_index = switch (req.response.compression) {
|
||||
.deflate => |*deflate| deflate.read(buffer) catch return error.DecompressionFailure,
|
||||
.gzip => |*gzip| gzip.read(buffer) catch return error.DecompressionFailure,
|
||||
// I'm about to upstream my http client rewrite
|
||||
//.deflate => |*deflate| deflate.readSlice(buffer) catch return error.DecompressionFailure,
|
||||
//.gzip => |*gzip| gzip.read(buffer) catch return error.DecompressionFailure,
|
||||
// https://github.com/ziglang/zig/issues/18937
|
||||
//.zstd => |*zstd| zstd.read(buffer) catch return error.DecompressionFailure,
|
||||
else => try req.transferRead(buffer),
|
||||
|
||||
@@ -130,8 +130,8 @@ pub const Request = struct {
|
||||
pub const DeflateDecompressor = std.compress.zlib.Decompressor(std.io.AnyReader);
|
||||
pub const GzipDecompressor = std.compress.gzip.Decompressor(std.io.AnyReader);
|
||||
|
||||
deflate: DeflateDecompressor,
|
||||
gzip: GzipDecompressor,
|
||||
deflate: std.compress.flate.Decompress,
|
||||
gzip: std.compress.flate.Decompress,
|
||||
zstd: std.compress.zstd.Decompress,
|
||||
none: void,
|
||||
};
|
||||
|
||||
@@ -1973,12 +1973,13 @@ pub const Stream = struct {
|
||||
|
||||
fn stream(io_r: *Io.Reader, io_w: *Io.Writer, limit: Io.Limit) Io.Reader.StreamError!usize {
|
||||
const dest = limit.slice(try io_w.writableSliceGreedy(1));
|
||||
const n = try readVec(io_r, &.{dest});
|
||||
var bufs: [1][]u8 = .{dest};
|
||||
const n = try readVec(io_r, &bufs);
|
||||
io_w.advance(n);
|
||||
return n;
|
||||
}
|
||||
|
||||
fn readVec(io_r: *std.Io.Reader, data: []const []u8) Io.Reader.Error!usize {
|
||||
fn readVec(io_r: *std.Io.Reader, data: [][]u8) Io.Reader.Error!usize {
|
||||
const r: *Reader = @alignCast(@fieldParentPtr("interface_state", io_r));
|
||||
var iovecs: [max_buffers_len]windows.ws2_32.WSABUF = undefined;
|
||||
const bufs_n, const data_size = try io_r.writableVectorWsa(&iovecs, data);
|
||||
|
||||
1001
lib/std/zip.zig
1001
lib/std/zip.zig
File diff suppressed because it is too large
Load Diff
@@ -1,298 +0,0 @@
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const zip = @import("../zip.zig");
|
||||
const maxInt = std.math.maxInt;
|
||||
|
||||
pub const File = struct {
|
||||
name: []const u8,
|
||||
content: []const u8,
|
||||
compression: zip.CompressionMethod,
|
||||
};
|
||||
|
||||
pub fn expectFiles(
|
||||
test_files: []const File,
|
||||
dir: std.fs.Dir,
|
||||
opt: struct {
|
||||
strip_prefix: ?[]const u8 = null,
|
||||
},
|
||||
) !void {
|
||||
for (test_files) |test_file| {
|
||||
var normalized_sub_path_buf: [std.fs.max_path_bytes]u8 = undefined;
|
||||
|
||||
const name = blk: {
|
||||
if (opt.strip_prefix) |strip_prefix| {
|
||||
try testing.expect(test_file.name.len >= strip_prefix.len);
|
||||
try testing.expectEqualStrings(strip_prefix, test_file.name[0..strip_prefix.len]);
|
||||
break :blk test_file.name[strip_prefix.len..];
|
||||
}
|
||||
break :blk test_file.name;
|
||||
};
|
||||
const normalized_sub_path = normalized_sub_path_buf[0..name.len];
|
||||
@memcpy(normalized_sub_path, name);
|
||||
std.mem.replaceScalar(u8, normalized_sub_path, '\\', '/');
|
||||
var file = try dir.openFile(normalized_sub_path, .{});
|
||||
defer file.close();
|
||||
var content_buf: [4096]u8 = undefined;
|
||||
const n = try file.deprecatedReader().readAll(&content_buf);
|
||||
try testing.expectEqualStrings(test_file.content, content_buf[0..n]);
|
||||
}
|
||||
}
|
||||
|
||||
// Used to store any data from writing a file to the zip archive that's needed
|
||||
// when writing the corresponding central directory record.
|
||||
pub const FileStore = struct {
|
||||
compression: zip.CompressionMethod,
|
||||
file_offset: u64,
|
||||
crc32: u32,
|
||||
compressed_size: u32,
|
||||
uncompressed_size: usize,
|
||||
};
|
||||
|
||||
pub fn makeZip(
|
||||
buf: []u8,
|
||||
comptime files: []const File,
|
||||
options: WriteZipOptions,
|
||||
) !std.io.FixedBufferStream([]u8) {
|
||||
var store: [files.len]FileStore = undefined;
|
||||
return try makeZipWithStore(buf, files, options, &store);
|
||||
}
|
||||
|
||||
pub fn makeZipWithStore(
|
||||
buf: []u8,
|
||||
files: []const File,
|
||||
options: WriteZipOptions,
|
||||
store: []FileStore,
|
||||
) !std.io.FixedBufferStream([]u8) {
|
||||
var fbs = std.io.fixedBufferStream(buf);
|
||||
try writeZip(fbs.writer(), files, store, options);
|
||||
return std.io.fixedBufferStream(buf[0..fbs.pos]);
|
||||
}
|
||||
|
||||
pub const WriteZipOptions = struct {
|
||||
end: ?EndRecordOptions = null,
|
||||
local_header: ?LocalHeaderOptions = null,
|
||||
};
|
||||
pub const LocalHeaderOptions = struct {
|
||||
zip64: ?LocalHeaderZip64Options = null,
|
||||
compressed_size: ?u32 = null,
|
||||
uncompressed_size: ?u32 = null,
|
||||
extra_len: ?u16 = null,
|
||||
};
|
||||
pub const LocalHeaderZip64Options = struct {
|
||||
data_size: ?u16 = null,
|
||||
};
|
||||
pub const EndRecordOptions = struct {
|
||||
zip64: ?Zip64Options = null,
|
||||
sig: ?[4]u8 = null,
|
||||
disk_number: ?u16 = null,
|
||||
central_directory_disk_number: ?u16 = null,
|
||||
record_count_disk: ?u16 = null,
|
||||
record_count_total: ?u16 = null,
|
||||
central_directory_size: ?u32 = null,
|
||||
central_directory_offset: ?u32 = null,
|
||||
comment_len: ?u16 = null,
|
||||
comment: ?[]const u8 = null,
|
||||
};
|
||||
pub const Zip64Options = struct {
|
||||
locator_sig: ?[4]u8 = null,
|
||||
locator_zip64_disk_count: ?u32 = null,
|
||||
locator_record_file_offset: ?u64 = null,
|
||||
locator_total_disk_count: ?u32 = null,
|
||||
//record_size: ?u64 = null,
|
||||
central_directory_size: ?u64 = null,
|
||||
};
|
||||
|
||||
pub fn writeZip(
|
||||
writer: anytype,
|
||||
files: []const File,
|
||||
store: []FileStore,
|
||||
options: WriteZipOptions,
|
||||
) !void {
|
||||
if (store.len < files.len) return error.FileStoreTooSmall;
|
||||
var zipper = initZipper(writer);
|
||||
for (files, 0..) |file, i| {
|
||||
store[i] = try zipper.writeFile(.{
|
||||
.name = file.name,
|
||||
.content = file.content,
|
||||
.compression = file.compression,
|
||||
.write_options = options,
|
||||
});
|
||||
}
|
||||
for (files, 0..) |file, i| {
|
||||
try zipper.writeCentralRecord(store[i], .{
|
||||
.name = file.name,
|
||||
});
|
||||
}
|
||||
try zipper.writeEndRecord(if (options.end) |e| e else .{});
|
||||
}
|
||||
|
||||
pub fn initZipper(writer: anytype) Zipper(@TypeOf(writer)) {
|
||||
return .{ .counting_writer = std.io.countingWriter(writer) };
|
||||
}
|
||||
|
||||
/// Provides methods to format and write the contents of a zip archive
|
||||
/// to the underlying Writer.
|
||||
pub fn Zipper(comptime Writer: type) type {
|
||||
return struct {
|
||||
counting_writer: std.io.CountingWriter(Writer),
|
||||
central_count: u64 = 0,
|
||||
first_central_offset: ?u64 = null,
|
||||
last_central_limit: ?u64 = null,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn writeFile(
|
||||
self: *Self,
|
||||
opt: struct {
|
||||
name: []const u8,
|
||||
content: []const u8,
|
||||
compression: zip.CompressionMethod,
|
||||
write_options: WriteZipOptions,
|
||||
},
|
||||
) !FileStore {
|
||||
const writer = self.counting_writer.writer();
|
||||
|
||||
const file_offset: u64 = @intCast(self.counting_writer.bytes_written);
|
||||
const crc32 = std.hash.Crc32.hash(opt.content);
|
||||
|
||||
const header_options = opt.write_options.local_header;
|
||||
{
|
||||
var compressed_size: u32 = 0;
|
||||
var uncompressed_size: u32 = 0;
|
||||
var extra_len: u16 = 0;
|
||||
if (header_options) |hdr_options| {
|
||||
compressed_size = if (hdr_options.compressed_size) |size| size else 0;
|
||||
uncompressed_size = if (hdr_options.uncompressed_size) |size| size else @intCast(opt.content.len);
|
||||
extra_len = if (hdr_options.extra_len) |len| len else 0;
|
||||
}
|
||||
const hdr: zip.LocalFileHeader = .{
|
||||
.signature = zip.local_file_header_sig,
|
||||
.version_needed_to_extract = 10,
|
||||
.flags = .{ .encrypted = false, ._ = 0 },
|
||||
.compression_method = opt.compression,
|
||||
.last_modification_time = 0,
|
||||
.last_modification_date = 0,
|
||||
.crc32 = crc32,
|
||||
.compressed_size = compressed_size,
|
||||
.uncompressed_size = uncompressed_size,
|
||||
.filename_len = @intCast(opt.name.len),
|
||||
.extra_len = extra_len,
|
||||
};
|
||||
try writer.writeStructEndian(hdr, .little);
|
||||
}
|
||||
try writer.writeAll(opt.name);
|
||||
|
||||
if (header_options) |hdr| {
|
||||
if (hdr.zip64) |options| {
|
||||
try writer.writeInt(u16, 0x0001, .little);
|
||||
const data_size = if (options.data_size) |size| size else 8;
|
||||
try writer.writeInt(u16, data_size, .little);
|
||||
try writer.writeInt(u64, 0, .little);
|
||||
try writer.writeInt(u64, @intCast(opt.content.len), .little);
|
||||
}
|
||||
}
|
||||
|
||||
var compressed_size: u32 = undefined;
|
||||
switch (opt.compression) {
|
||||
.store => {
|
||||
try writer.writeAll(opt.content);
|
||||
compressed_size = @intCast(opt.content.len);
|
||||
},
|
||||
.deflate => {
|
||||
const offset = self.counting_writer.bytes_written;
|
||||
var fbs = std.io.fixedBufferStream(opt.content);
|
||||
try std.compress.flate.deflate.compress(.raw, fbs.reader(), writer, .{});
|
||||
std.debug.assert(fbs.pos == opt.content.len);
|
||||
compressed_size = @intCast(self.counting_writer.bytes_written - offset);
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
return .{
|
||||
.compression = opt.compression,
|
||||
.file_offset = file_offset,
|
||||
.crc32 = crc32,
|
||||
.compressed_size = compressed_size,
|
||||
.uncompressed_size = opt.content.len,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn writeCentralRecord(
|
||||
self: *Self,
|
||||
store: FileStore,
|
||||
opt: struct {
|
||||
name: []const u8,
|
||||
version_needed_to_extract: u16 = 10,
|
||||
},
|
||||
) !void {
|
||||
if (self.first_central_offset == null) {
|
||||
self.first_central_offset = self.counting_writer.bytes_written;
|
||||
}
|
||||
self.central_count += 1;
|
||||
|
||||
const hdr: zip.CentralDirectoryFileHeader = .{
|
||||
.signature = zip.central_file_header_sig,
|
||||
.version_made_by = 0,
|
||||
.version_needed_to_extract = opt.version_needed_to_extract,
|
||||
.flags = .{ .encrypted = false, ._ = 0 },
|
||||
.compression_method = store.compression,
|
||||
.last_modification_time = 0,
|
||||
.last_modification_date = 0,
|
||||
.crc32 = store.crc32,
|
||||
.compressed_size = store.compressed_size,
|
||||
.uncompressed_size = @intCast(store.uncompressed_size),
|
||||
.filename_len = @intCast(opt.name.len),
|
||||
.extra_len = 0,
|
||||
.comment_len = 0,
|
||||
.disk_number = 0,
|
||||
.internal_file_attributes = 0,
|
||||
.external_file_attributes = 0,
|
||||
.local_file_header_offset = @intCast(store.file_offset),
|
||||
};
|
||||
try self.counting_writer.writer().writeStructEndian(hdr, .little);
|
||||
try self.counting_writer.writer().writeAll(opt.name);
|
||||
self.last_central_limit = self.counting_writer.bytes_written;
|
||||
}
|
||||
|
||||
pub fn writeEndRecord(self: *Self, opt: EndRecordOptions) !void {
|
||||
const cd_offset = self.first_central_offset orelse 0;
|
||||
const cd_end = self.last_central_limit orelse 0;
|
||||
|
||||
if (opt.zip64) |zip64| {
|
||||
const end64_off = cd_end;
|
||||
const fixed: zip.EndRecord64 = .{
|
||||
.signature = zip.end_record64_sig,
|
||||
.end_record_size = @sizeOf(zip.EndRecord64) - 12,
|
||||
.version_made_by = 0,
|
||||
.version_needed_to_extract = 45,
|
||||
.disk_number = 0,
|
||||
.central_directory_disk_number = 0,
|
||||
.record_count_disk = @intCast(self.central_count),
|
||||
.record_count_total = @intCast(self.central_count),
|
||||
.central_directory_size = @intCast(cd_end - cd_offset),
|
||||
.central_directory_offset = @intCast(cd_offset),
|
||||
};
|
||||
try self.counting_writer.writer().writeStructEndian(fixed, .little);
|
||||
const locator: zip.EndLocator64 = .{
|
||||
.signature = if (zip64.locator_sig) |s| s else zip.end_locator64_sig,
|
||||
.zip64_disk_count = if (zip64.locator_zip64_disk_count) |c| c else 0,
|
||||
.record_file_offset = if (zip64.locator_record_file_offset) |o| o else @intCast(end64_off),
|
||||
.total_disk_count = if (zip64.locator_total_disk_count) |c| c else 1,
|
||||
};
|
||||
try self.counting_writer.writer().writeStructEndian(locator, .little);
|
||||
}
|
||||
const hdr: zip.EndRecord = .{
|
||||
.signature = if (opt.sig) |s| s else zip.end_record_sig,
|
||||
.disk_number = if (opt.disk_number) |n| n else 0,
|
||||
.central_directory_disk_number = if (opt.central_directory_disk_number) |n| n else 0,
|
||||
.record_count_disk = if (opt.record_count_disk) |c| c else @intCast(self.central_count),
|
||||
.record_count_total = if (opt.record_count_total) |c| c else @intCast(self.central_count),
|
||||
.central_directory_size = if (opt.central_directory_size) |s| s else @intCast(cd_end - cd_offset),
|
||||
.central_directory_offset = if (opt.central_directory_offset) |o| o else @intCast(cd_offset),
|
||||
.comment_len = if (opt.comment_len) |l| l else (if (opt.comment) |c| @as(u16, @intCast(c.len)) else 0),
|
||||
};
|
||||
try self.counting_writer.writer().writeStructEndian(hdr, .little);
|
||||
if (opt.comment) |c|
|
||||
try self.counting_writer.writer().writeAll(c);
|
||||
}
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user